diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,47898 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 7978, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.166666666666667e-06, + "loss": 1.3223, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333334e-06, + "loss": 4.127, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.25e-05, + "loss": 3.8926, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.6666666666666667e-05, + "loss": 4.0557, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.0833333333333333e-05, + "loss": 0.2629, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-05, + "loss": 2.4141, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 2.9166666666666666e-05, + "loss": 2.4883, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 3.3333333333333335e-05, + "loss": 2.0317, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 3.75e-05, + "loss": 1.8884, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 4.1666666666666665e-05, + "loss": 2.0662, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.5833333333333334e-05, + "loss": 2.0518, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 1.8203, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 5.416666666666667e-05, + "loss": 2.2256, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 5.833333333333333e-05, + "loss": 2.2903, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 6.25e-05, + "loss": 2.3442, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 6.666666666666667e-05, + "loss": 1.8438, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 7.083333333333334e-05, + "loss": 2.0557, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 7.5e-05, + "loss": 2.0825, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 7.916666666666666e-05, + "loss": 2.2671, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333333e-05, + "loss": 2.1284, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 8.75e-05, + "loss": 2.3442, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 9.166666666666667e-05, + "loss": 2.2158, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 9.583333333333334e-05, + "loss": 2.1316, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001, + "loss": 2.0645, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010416666666666667, + "loss": 2.7192, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010833333333333334, + "loss": 1.793, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011250000000000001, + "loss": 1.9534, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011666666666666667, + "loss": 1.8796, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012083333333333333, + "loss": 1.896, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 0.000125, + "loss": 2.5396, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012916666666666667, + "loss": 1.7197, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013333333333333334, + "loss": 2.1172, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001375, + "loss": 2.1733, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014166666666666668, + "loss": 2.103, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014583333333333335, + "loss": 1.3877, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015, + "loss": 1.9116, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015416666666666668, + "loss": 1.9229, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 0.00015833333333333332, + "loss": 2.0737, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016250000000000002, + "loss": 2.2156, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016666666666666666, + "loss": 2.1489, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017083333333333333, + "loss": 1.9587, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 0.000175, + "loss": 1.5857, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017916666666666667, + "loss": 2.7075, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018333333333333334, + "loss": 0.2922, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001875, + "loss": 1.582, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019166666666666667, + "loss": 1.9648, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019583333333333334, + "loss": 2.2886, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002, + "loss": 1.9338, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020416666666666668, + "loss": 1.926, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020833333333333335, + "loss": 1.9324, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002125, + "loss": 2.1191, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021666666666666668, + "loss": 2.1582, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022083333333333333, + "loss": 1.9312, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022500000000000002, + "loss": 2.0001, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022916666666666666, + "loss": 1.7617, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023333333333333333, + "loss": 2.0029, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002375, + "loss": 1.9915, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024166666666666667, + "loss": 2.064, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002458333333333333, + "loss": 1.9434, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 0.00025, + "loss": 2.0388, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 0.00025416666666666665, + "loss": 2.1049, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 0.00025833333333333334, + "loss": 1.8016, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 0.00026250000000000004, + "loss": 1.8149, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002666666666666667, + "loss": 1.9885, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002708333333333333, + "loss": 1.8438, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 0.000275, + "loss": 2.1909, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 0.00027916666666666666, + "loss": 1.5735, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 0.00028333333333333335, + "loss": 1.9688, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002875, + "loss": 2.2919, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002916666666666667, + "loss": 1.8003, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029583333333333333, + "loss": 2.2383, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003, + "loss": 2.2507, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 0.00030416666666666667, + "loss": 1.9038, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 0.00030833333333333337, + "loss": 1.938, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003125, + "loss": 2.3398, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 0.00031666666666666665, + "loss": 2.0679, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032083333333333334, + "loss": 0.0775, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032500000000000004, + "loss": 1.8989, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003291666666666667, + "loss": 1.9902, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003333333333333333, + "loss": 1.8933, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003375, + "loss": 1.7324, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 0.00034166666666666666, + "loss": 2.0344, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 0.00034583333333333335, + "loss": 1.9165, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035, + "loss": 1.9918, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003541666666666667, + "loss": 1.7848, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035833333333333333, + "loss": 0.0726, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003625, + "loss": 1.8579, + "step": 87 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036666666666666667, + "loss": 1.6489, + "step": 88 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037083333333333337, + "loss": 1.8225, + "step": 89 + }, + { + "epoch": 0.01, + "learning_rate": 0.000375, + "loss": 1.8835, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037916666666666665, + "loss": 0.0617, + "step": 91 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038333333333333334, + "loss": 1.8457, + "step": 92 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038750000000000004, + "loss": 1.6519, + "step": 93 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003916666666666667, + "loss": 1.6523, + "step": 94 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003958333333333333, + "loss": 1.8003, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004, + "loss": 1.9146, + "step": 96 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040416666666666666, + "loss": 1.8191, + "step": 97 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040833333333333336, + "loss": 1.8691, + "step": 98 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004125, + "loss": 1.9321, + "step": 99 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004166666666666667, + "loss": 1.6443, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042083333333333333, + "loss": 1.9814, + "step": 101 + }, + { + "epoch": 0.01, + "learning_rate": 0.000425, + "loss": 2.0627, + "step": 102 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042916666666666667, + "loss": 1.7913, + "step": 103 + }, + { + "epoch": 0.01, + "learning_rate": 0.00043333333333333337, + "loss": 1.8848, + "step": 104 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004375, + "loss": 1.7351, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044166666666666665, + "loss": 1.7881, + "step": 106 + }, + { + "epoch": 0.01, + "learning_rate": 0.00044583333333333335, + "loss": 1.5391, + "step": 107 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045000000000000004, + "loss": 2.3174, + "step": 108 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004541666666666667, + "loss": 1.9868, + "step": 109 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004583333333333333, + "loss": 2.1895, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004625, + "loss": 1.7886, + "step": 111 + }, + { + "epoch": 0.01, + "learning_rate": 0.00046666666666666666, + "loss": 0.0535, + "step": 112 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047083333333333336, + "loss": 1.9224, + "step": 113 + }, + { + "epoch": 0.01, + "learning_rate": 0.000475, + "loss": 1.7793, + "step": 114 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004791666666666667, + "loss": 1.8384, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 0.00048333333333333334, + "loss": 1.5626, + "step": 116 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004875, + "loss": 1.9985, + "step": 117 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004916666666666666, + "loss": 1.7812, + "step": 118 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004958333333333334, + "loss": 1.9089, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005, + "loss": 2.0747, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005041666666666667, + "loss": 1.9824, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005083333333333333, + "loss": 1.5085, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005124999999999999, + "loss": 1.7146, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005166666666666667, + "loss": 1.8816, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005208333333333334, + "loss": 2.1106, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005250000000000001, + "loss": 0.0483, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005291666666666667, + "loss": 1.5497, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005333333333333334, + "loss": 1.8591, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005375, + "loss": 1.7373, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005416666666666666, + "loss": 1.938, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005458333333333333, + "loss": 1.7251, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 0.00055, + "loss": 1.6508, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005541666666666667, + "loss": 1.8608, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005583333333333333, + "loss": 2.1145, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005625000000000001, + "loss": 2.1765, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005666666666666667, + "loss": 1.7561, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005708333333333333, + "loss": 1.7012, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 0.000575, + "loss": 1.9019, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005791666666666667, + "loss": 1.7783, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005833333333333334, + "loss": 2.1406, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005875, + "loss": 1.8555, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005916666666666667, + "loss": 2.0938, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005958333333333333, + "loss": 1.9917, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006, + "loss": 0.0415, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006041666666666666, + "loss": 1.6831, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006083333333333333, + "loss": 1.8027, + "step": 146 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006125000000000001, + "loss": 1.7866, + "step": 147 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006166666666666667, + "loss": 2.0618, + "step": 148 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006208333333333334, + "loss": 1.9504, + "step": 149 + }, + { + "epoch": 0.02, + "learning_rate": 0.000625, + "loss": 2.1223, + "step": 150 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006291666666666667, + "loss": 1.6636, + "step": 151 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006333333333333333, + "loss": 1.6157, + "step": 152 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006374999999999999, + "loss": 2.0474, + "step": 153 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006416666666666667, + "loss": 1.9028, + "step": 154 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006458333333333334, + "loss": 2.0981, + "step": 155 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006500000000000001, + "loss": 1.6831, + "step": 156 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006541666666666667, + "loss": 1.7529, + "step": 157 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006583333333333334, + "loss": 2.0774, + "step": 158 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006625, + "loss": 1.8049, + "step": 159 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006666666666666666, + "loss": 1.9082, + "step": 160 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006708333333333333, + "loss": 1.9712, + "step": 161 + }, + { + "epoch": 0.02, + "learning_rate": 0.000675, + "loss": 1.5513, + "step": 162 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006791666666666667, + "loss": 1.8667, + "step": 163 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006833333333333333, + "loss": 1.8022, + "step": 164 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006875, + "loss": 1.9116, + "step": 165 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006916666666666667, + "loss": 1.634, + "step": 166 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006958333333333334, + "loss": 1.897, + "step": 167 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007, + "loss": 2.1929, + "step": 168 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007041666666666667, + "loss": 1.8545, + "step": 169 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007083333333333334, + "loss": 1.9246, + "step": 170 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007125, + "loss": 1.6248, + "step": 171 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007166666666666667, + "loss": 1.9287, + "step": 172 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007208333333333333, + "loss": 1.7083, + "step": 173 + }, + { + "epoch": 0.02, + "learning_rate": 0.000725, + "loss": 1.7501, + "step": 174 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007291666666666666, + "loss": 1.7725, + "step": 175 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007333333333333333, + "loss": 2.123, + "step": 176 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007375000000000001, + "loss": 1.7395, + "step": 177 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007416666666666667, + "loss": 2.0637, + "step": 178 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007458333333333334, + "loss": 1.8148, + "step": 179 + }, + { + "epoch": 0.02, + "learning_rate": 0.00075, + "loss": 1.9844, + "step": 180 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007541666666666667, + "loss": 2.0479, + "step": 181 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007583333333333333, + "loss": 1.7937, + "step": 182 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007624999999999999, + "loss": 1.9899, + "step": 183 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007666666666666667, + "loss": 1.9817, + "step": 184 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007708333333333334, + "loss": 1.7769, + "step": 185 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007750000000000001, + "loss": 1.7415, + "step": 186 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007791666666666667, + "loss": 1.9185, + "step": 187 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007833333333333334, + "loss": 1.7571, + "step": 188 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007875, + "loss": 1.6211, + "step": 189 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007916666666666666, + "loss": 0.0876, + "step": 190 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007958333333333333, + "loss": 1.7236, + "step": 191 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008, + "loss": 2.0649, + "step": 192 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008041666666666667, + "loss": 1.7312, + "step": 193 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008083333333333333, + "loss": 1.8079, + "step": 194 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008125000000000001, + "loss": 1.5933, + "step": 195 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008166666666666667, + "loss": 1.7231, + "step": 196 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008208333333333334, + "loss": 2.1333, + "step": 197 + }, + { + "epoch": 0.02, + "learning_rate": 0.000825, + "loss": 1.6195, + "step": 198 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008291666666666667, + "loss": 1.9077, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008333333333333334, + "loss": 1.9771, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008375, + "loss": 1.6016, + "step": 201 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008416666666666667, + "loss": 1.6119, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008458333333333333, + "loss": 1.7527, + "step": 203 + }, + { + "epoch": 0.03, + "learning_rate": 0.00085, + "loss": 1.8054, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008541666666666666, + "loss": 1.2957, + "step": 205 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008583333333333333, + "loss": 1.9165, + "step": 206 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008625000000000001, + "loss": 1.8892, + "step": 207 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008666666666666667, + "loss": 1.6238, + "step": 208 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008708333333333334, + "loss": 1.6209, + "step": 209 + }, + { + "epoch": 0.03, + "learning_rate": 0.000875, + "loss": 1.6309, + "step": 210 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008791666666666667, + "loss": 1.4824, + "step": 211 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008833333333333333, + "loss": 0.1401, + "step": 212 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008874999999999999, + "loss": 1.7722, + "step": 213 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008916666666666667, + "loss": 1.9277, + "step": 214 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008958333333333334, + "loss": 1.8037, + "step": 215 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009000000000000001, + "loss": 1.6921, + "step": 216 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009041666666666667, + "loss": 1.853, + "step": 217 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009083333333333334, + "loss": 1.5697, + "step": 218 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009125, + "loss": 0.1616, + "step": 219 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009166666666666666, + "loss": 1.7329, + "step": 220 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009208333333333333, + "loss": 1.5236, + "step": 221 + }, + { + "epoch": 0.03, + "learning_rate": 0.000925, + "loss": 1.8635, + "step": 222 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009291666666666667, + "loss": 1.9521, + "step": 223 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009333333333333333, + "loss": 1.9097, + "step": 224 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009375, + "loss": 1.871, + "step": 225 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009416666666666667, + "loss": 1.5881, + "step": 226 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009458333333333334, + "loss": 1.6685, + "step": 227 + }, + { + "epoch": 0.03, + "learning_rate": 0.00095, + "loss": 1.4482, + "step": 228 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009541666666666667, + "loss": 1.6606, + "step": 229 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009583333333333334, + "loss": 1.4314, + "step": 230 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009625, + "loss": 1.7632, + "step": 231 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009666666666666667, + "loss": 1.6663, + "step": 232 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009708333333333333, + "loss": 1.939, + "step": 233 + }, + { + "epoch": 0.03, + "learning_rate": 0.000975, + "loss": 1.7214, + "step": 234 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009791666666666666, + "loss": 0.0847, + "step": 235 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009833333333333332, + "loss": 1.6309, + "step": 236 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009875, + "loss": 1.6685, + "step": 237 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009916666666666667, + "loss": 1.6516, + "step": 238 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009958333333333334, + "loss": 1.6868, + "step": 239 + }, + { + "epoch": 0.03, + "learning_rate": 0.001, + "loss": 1.6003, + "step": 240 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999999958791928, + "loss": 2.0054, + "step": 241 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999998351677183, + "loss": 1.9526, + "step": 242 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999996291273914, + "loss": 1.8474, + "step": 243 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999993406709813, + "loss": 1.7935, + "step": 244 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999989697985357, + "loss": 1.6775, + "step": 245 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999985165101157, + "loss": 1.686, + "step": 246 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999979808057958, + "loss": 1.6436, + "step": 247 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999973626856644, + "loss": 1.7859, + "step": 248 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999966621498234, + "loss": 1.7202, + "step": 249 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999958791983882, + "loss": 1.9204, + "step": 250 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999995013831488, + "loss": 0.0677, + "step": 251 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999940660492654, + "loss": 0.0599, + "step": 252 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999930358518766, + "loss": 1.708, + "step": 253 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999919232394914, + "loss": 1.689, + "step": 254 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999990728212293, + "loss": 1.3828, + "step": 255 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999989450770479, + "loss": 1.4258, + "step": 256 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999880909142592, + "loss": 1.7661, + "step": 257 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999866486438583, + "loss": 1.832, + "step": 258 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999851239595138, + "loss": 1.6843, + "step": 259 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999835168614769, + "loss": 1.6294, + "step": 260 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999818273500125, + "loss": 1.6528, + "step": 261 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999800554253994, + "loss": 1.6147, + "step": 262 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999782010879296, + "loss": 1.5176, + "step": 263 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999762643379086, + "loss": 1.5469, + "step": 264 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999742451756556, + "loss": 1.5005, + "step": 265 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999721436015037, + "loss": 1.3838, + "step": 266 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999969959615799, + "loss": 1.4685, + "step": 267 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999676932189016, + "loss": 1.5271, + "step": 268 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999653444111853, + "loss": 1.5613, + "step": 269 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999629131930369, + "loss": 1.6445, + "step": 270 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999603995648572, + "loss": 1.7358, + "step": 271 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999578035270607, + "loss": 1.3657, + "step": 272 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999955125080075, + "loss": 1.7734, + "step": 273 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999523642243422, + "loss": 1.7703, + "step": 274 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999949520960317, + "loss": 1.5586, + "step": 275 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999465952884679, + "loss": 1.5767, + "step": 276 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999435872092772, + "loss": 1.582, + "step": 277 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999940496723241, + "loss": 1.4988, + "step": 278 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999373238308684, + "loss": 1.7017, + "step": 279 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999340685326826, + "loss": 1.6499, + "step": 280 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999307308292203, + "loss": 1.8, + "step": 281 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999273107210314, + "loss": 1.7495, + "step": 282 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999238082086797, + "loss": 1.5054, + "step": 283 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999202232927424, + "loss": 1.7073, + "step": 284 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999165559738108, + "loss": 1.4745, + "step": 285 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999912806252489, + "loss": 1.6069, + "step": 286 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999089741293953, + "loss": 1.3877, + "step": 287 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999050596051612, + "loss": 1.3289, + "step": 288 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999010626804323, + "loss": 1.5205, + "step": 289 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999896983355867, + "loss": 1.4475, + "step": 290 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998928216321376, + "loss": 1.8267, + "step": 291 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998885775099307, + "loss": 1.5017, + "step": 292 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998842509899456, + "loss": 1.4438, + "step": 293 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998798420728949, + "loss": 1.457, + "step": 294 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998753507595062, + "loss": 1.5503, + "step": 295 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998707770505191, + "loss": 1.4534, + "step": 296 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999866120946688, + "loss": 1.5859, + "step": 297 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998613824487802, + "loss": 1.5491, + "step": 298 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998565615575768, + "loss": 1.4456, + "step": 299 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998516582738722, + "loss": 1.4233, + "step": 300 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999846672598475, + "loss": 1.5161, + "step": 301 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998416045322064, + "loss": 1.6433, + "step": 302 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998364540759026, + "loss": 1.6572, + "step": 303 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998312212304119, + "loss": 1.2319, + "step": 304 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998259059965972, + "loss": 1.3123, + "step": 305 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998205083753342, + "loss": 1.8081, + "step": 306 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998150283675135, + "loss": 1.4368, + "step": 307 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998094659740373, + "loss": 1.224, + "step": 308 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999803821195823, + "loss": 1.2954, + "step": 309 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997980940338012, + "loss": 1.4701, + "step": 310 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997922844889155, + "loss": 1.3218, + "step": 311 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997863925621238, + "loss": 1.6989, + "step": 312 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997804182543973, + "loss": 1.5339, + "step": 313 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997743615667205, + "loss": 1.4172, + "step": 314 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999768222500092, + "loss": 1.4321, + "step": 315 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997620010555235, + "loss": 1.2856, + "step": 316 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997556972340404, + "loss": 1.4012, + "step": 317 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997493110366824, + "loss": 1.4788, + "step": 318 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997428424645015, + "loss": 1.5518, + "step": 319 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997362915185643, + "loss": 1.5645, + "step": 320 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997296581999503, + "loss": 1.3779, + "step": 321 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999722942509753, + "loss": 1.4329, + "step": 322 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997161444490795, + "loss": 1.2688, + "step": 323 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997092640190502, + "loss": 1.2819, + "step": 324 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997023012207995, + "loss": 1.3628, + "step": 325 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996952560554747, + "loss": 1.4634, + "step": 326 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996881285242371, + "loss": 1.4856, + "step": 327 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996809186282618, + "loss": 1.4377, + "step": 328 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999673626368737, + "loss": 1.6587, + "step": 329 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999666251746865, + "loss": 1.314, + "step": 330 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996587947638611, + "loss": 1.4458, + "step": 331 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996512554209544, + "loss": 1.5623, + "step": 332 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996436337193879, + "loss": 1.5623, + "step": 333 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996359296604177, + "loss": 1.3721, + "step": 334 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996281432453138, + "loss": 1.3765, + "step": 335 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996202744753594, + "loss": 1.3271, + "step": 336 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999612323351852, + "loss": 1.397, + "step": 337 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996042898761017, + "loss": 1.542, + "step": 338 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999596174049433, + "loss": 1.3748, + "step": 339 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995879758731836, + "loss": 1.5161, + "step": 340 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995796953487046, + "loss": 1.5959, + "step": 341 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995713324773613, + "loss": 1.4731, + "step": 342 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995628872605316, + "loss": 1.3839, + "step": 343 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995543596996081, + "loss": 1.2695, + "step": 344 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999545749795996, + "loss": 1.3162, + "step": 345 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999537057551115, + "loss": 1.4072, + "step": 346 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995282829663975, + "loss": 1.2915, + "step": 347 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995194260432898, + "loss": 1.3657, + "step": 348 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995104867832518, + "loss": 1.3098, + "step": 349 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999501465187757, + "loss": 1.519, + "step": 350 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994923612582929, + "loss": 1.4912, + "step": 351 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994831749963596, + "loss": 1.3411, + "step": 352 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994739064034712, + "loss": 1.3555, + "step": 353 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999464555481156, + "loss": 1.2939, + "step": 354 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999455122230955, + "loss": 0.0867, + "step": 355 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999445606654423, + "loss": 1.2161, + "step": 356 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994360087531285, + "loss": 0.0579, + "step": 357 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994263285286538, + "loss": 1.2944, + "step": 358 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994165659825945, + "loss": 1.4592, + "step": 359 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009994067211165595, + "loss": 1.1677, + "step": 360 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993967939321718, + "loss": 1.396, + "step": 361 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993867844310675, + "loss": 1.1058, + "step": 362 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993766926148966, + "loss": 1.3843, + "step": 363 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993665184853226, + "loss": 1.4592, + "step": 364 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993562620440226, + "loss": 1.1797, + "step": 365 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999345923292687, + "loss": 1.3883, + "step": 366 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993355022330202, + "loss": 1.4988, + "step": 367 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993249988667398, + "loss": 1.2104, + "step": 368 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993144131955768, + "loss": 1.3317, + "step": 369 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993037452212766, + "loss": 1.2798, + "step": 370 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992929949455972, + "loss": 1.3218, + "step": 371 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992821623703108, + "loss": 1.2412, + "step": 372 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992712474972028, + "loss": 1.2795, + "step": 373 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992602503280726, + "loss": 1.2668, + "step": 374 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992491708647325, + "loss": 0.0533, + "step": 375 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992380091090093, + "loss": 1.215, + "step": 376 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999226765062742, + "loss": 1.1584, + "step": 377 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999215438727785, + "loss": 1.2629, + "step": 378 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992040301060043, + "loss": 1.182, + "step": 379 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999192539199281, + "loss": 1.302, + "step": 380 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991809660095093, + "loss": 0.0514, + "step": 381 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991693105385962, + "loss": 1.1018, + "step": 382 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991575727884632, + "loss": 1.3418, + "step": 383 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991457527610452, + "loss": 1.3536, + "step": 384 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991338504582905, + "loss": 1.1804, + "step": 385 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991218658821608, + "loss": 1.2324, + "step": 386 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991097990346316, + "loss": 1.1973, + "step": 387 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999097649917692, + "loss": 1.3335, + "step": 388 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990854185333445, + "loss": 1.2549, + "step": 389 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990731048836053, + "loss": 1.0865, + "step": 390 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999060708970504, + "loss": 1.208, + "step": 391 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990482307960838, + "loss": 1.3105, + "step": 392 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990356703624018, + "loss": 1.3601, + "step": 393 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999023027671528, + "loss": 1.0736, + "step": 394 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990103027255463, + "loss": 1.2974, + "step": 395 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989974955265546, + "loss": 1.1685, + "step": 396 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989846060766635, + "loss": 1.4841, + "step": 397 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989716343779982, + "loss": 1.1438, + "step": 398 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989585804326961, + "loss": 1.063, + "step": 399 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989454442429094, + "loss": 1.1113, + "step": 400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989322258108031, + "loss": 1.2773, + "step": 401 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989189251385562, + "loss": 1.1787, + "step": 402 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998905542228361, + "loss": 1.4731, + "step": 403 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988920770824237, + "loss": 0.0479, + "step": 404 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988785297029633, + "loss": 1.2544, + "step": 405 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988649000922131, + "loss": 1.3342, + "step": 406 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988511882524198, + "loss": 1.1631, + "step": 407 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988373941858435, + "loss": 0.9307, + "step": 408 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988235178947577, + "loss": 1.1365, + "step": 409 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099880955938145, + "loss": 1.2656, + "step": 410 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998795518648221, + "loss": 1.1172, + "step": 411 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998781395697385, + "loss": 1.323, + "step": 412 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987671905312702, + "loss": 1.2374, + "step": 413 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987529031522178, + "loss": 1.1912, + "step": 414 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987385335625829, + "loss": 1.5164, + "step": 415 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987240817647342, + "loss": 1.1277, + "step": 416 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987095477610537, + "loss": 1.3643, + "step": 417 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998694931553937, + "loss": 1.2125, + "step": 418 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986802331457933, + "loss": 1.1604, + "step": 419 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986654525390457, + "loss": 1.3206, + "step": 420 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986505897361302, + "loss": 1.0945, + "step": 421 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986356447394967, + "loss": 1.3306, + "step": 422 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986206175516088, + "loss": 1.1538, + "step": 423 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986055081749434, + "loss": 1.0852, + "step": 424 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985903166119907, + "loss": 1.271, + "step": 425 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985750428652554, + "loss": 0.0459, + "step": 426 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985596869372545, + "loss": 1.1234, + "step": 427 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985442488305194, + "loss": 1.1619, + "step": 428 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985287285475947, + "loss": 1.0275, + "step": 429 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998513126091039, + "loss": 1.0642, + "step": 430 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984974414634236, + "loss": 0.0456, + "step": 431 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984816746673342, + "loss": 0.9827, + "step": 432 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984658257053693, + "loss": 1.1487, + "step": 433 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984498945801417, + "loss": 1.2043, + "step": 434 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984338812942773, + "loss": 1.269, + "step": 435 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984177858504155, + "loss": 1.1458, + "step": 436 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984016082512093, + "loss": 0.991, + "step": 437 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009983853484993252, + "loss": 1.1412, + "step": 438 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983690065974438, + "loss": 1.0627, + "step": 439 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983525825482582, + "loss": 1.157, + "step": 440 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998336076354476, + "loss": 1.0964, + "step": 441 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998319488018818, + "loss": 1.0618, + "step": 442 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983028175440181, + "loss": 1.1675, + "step": 443 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982860649328242, + "loss": 1.1512, + "step": 444 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998269230187998, + "loss": 0.0434, + "step": 445 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982523133123145, + "loss": 1.0127, + "step": 446 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982353143085617, + "loss": 1.1509, + "step": 447 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982182331795416, + "loss": 1.2061, + "step": 448 + }, + { + "epoch": 0.06, + "learning_rate": 0.00099820106992807, + "loss": 1.0491, + "step": 449 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981838245569758, + "loss": 1.1091, + "step": 450 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981664970691018, + "loss": 1.1204, + "step": 451 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981490874673038, + "loss": 0.0424, + "step": 452 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981315957544518, + "loss": 0.9866, + "step": 453 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981140219334288, + "loss": 1.1033, + "step": 454 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980963660071317, + "loss": 1.0938, + "step": 455 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980786279784703, + "loss": 1.1982, + "step": 456 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998060807850369, + "loss": 1.0563, + "step": 457 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980429056257647, + "loss": 0.8628, + "step": 458 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980249213076085, + "loss": 1.0812, + "step": 459 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980068548988649, + "loss": 1.1807, + "step": 460 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979887064025114, + "loss": 0.9387, + "step": 461 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979704758215399, + "loss": 1.2581, + "step": 462 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997952163158955, + "loss": 0.9902, + "step": 463 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979337684177755, + "loss": 0.0413, + "step": 464 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979152916010335, + "loss": 1.0525, + "step": 465 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978967327117744, + "loss": 1.0327, + "step": 466 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978780917530572, + "loss": 1.012, + "step": 467 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978593687279547, + "loss": 0.8512, + "step": 468 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997840563639553, + "loss": 1.5005, + "step": 469 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997821676490952, + "loss": 0.0407, + "step": 470 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978027072852648, + "loss": 0.0407, + "step": 471 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977836560256179, + "loss": 1.2268, + "step": 472 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997764522715152, + "loss": 1.0901, + "step": 473 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977453073570202, + "loss": 0.9287, + "step": 474 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977260099543906, + "loss": 1.1189, + "step": 475 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977066305104434, + "loss": 0.9897, + "step": 476 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976871690283735, + "loss": 0.9998, + "step": 477 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976676255113884, + "loss": 1.259, + "step": 478 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976479999627096, + "loss": 0.9282, + "step": 479 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976282923855719, + "loss": 0.9941, + "step": 480 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976085027832242, + "loss": 1.01, + "step": 481 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975886311589278, + "loss": 1.1167, + "step": 482 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975686775159586, + "loss": 1.0452, + "step": 483 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997548641857606, + "loss": 1.0179, + "step": 484 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975285241871714, + "loss": 1.0415, + "step": 485 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997508324507972, + "loss": 0.9438, + "step": 486 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974880428233366, + "loss": 1.2441, + "step": 487 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974676791366086, + "loss": 1.2214, + "step": 488 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974472334511445, + "loss": 1.2759, + "step": 489 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974267057703146, + "loss": 1.0214, + "step": 490 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974060960975023, + "loss": 1.1372, + "step": 491 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973854044361046, + "loss": 0.994, + "step": 492 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973646307895325, + "loss": 1.1084, + "step": 493 + }, + { + "epoch": 0.06, + "learning_rate": 0.00099734377516121, + "loss": 0.884, + "step": 494 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973228375545749, + "loss": 1.116, + "step": 495 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973018179730783, + "loss": 1.0356, + "step": 496 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972807164201848, + "loss": 0.0412, + "step": 497 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972595328993728, + "loss": 1.1167, + "step": 498 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997238267414134, + "loss": 0.9502, + "step": 499 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972169199679735, + "loss": 0.9351, + "step": 500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00099719549056441, + "loss": 0.9344, + "step": 501 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971739792069762, + "loss": 1.0518, + "step": 502 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971523858992174, + "loss": 0.9856, + "step": 503 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997130710644693, + "loss": 0.9089, + "step": 504 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997108953446976, + "loss": 0.9895, + "step": 505 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970871143096525, + "loss": 1.0176, + "step": 506 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997065193236322, + "loss": 0.887, + "step": 507 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970431902305984, + "loss": 0.9613, + "step": 508 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997021105296108, + "loss": 1.0724, + "step": 509 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969989384364915, + "loss": 0.9077, + "step": 510 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969766896554023, + "loss": 1.0305, + "step": 511 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969543589565084, + "loss": 1.0232, + "step": 512 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969319463434898, + "loss": 1.1052, + "step": 513 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969094518200413, + "loss": 1.0364, + "step": 514 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968868753898706, + "loss": 0.8582, + "step": 515 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996864217056699, + "loss": 0.9165, + "step": 516 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968414768242615, + "loss": 0.9827, + "step": 517 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968186546963063, + "loss": 1.1433, + "step": 518 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996795750676595, + "loss": 0.8981, + "step": 519 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967727647689033, + "loss": 0.809, + "step": 520 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967496969770198, + "loss": 0.9922, + "step": 521 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996726547304747, + "loss": 0.0834, + "step": 522 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967033157559004, + "loss": 0.9431, + "step": 523 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966800023343096, + "loss": 0.8623, + "step": 524 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966566070438172, + "loss": 0.8716, + "step": 525 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966331298882798, + "loss": 0.8468, + "step": 526 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996609570871567, + "loss": 0.8716, + "step": 527 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996585929997562, + "loss": 1.0408, + "step": 528 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965622072701614, + "loss": 1.1611, + "step": 529 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996538402693276, + "loss": 0.8606, + "step": 530 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965145162708292, + "loss": 0.9196, + "step": 531 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964905480067585, + "loss": 0.8282, + "step": 532 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964664979050144, + "loss": 0.9237, + "step": 533 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964423659695612, + "loss": 0.8632, + "step": 534 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964181522043767, + "loss": 0.8751, + "step": 535 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996393856613452, + "loss": 1.0089, + "step": 536 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963694792007919, + "loss": 0.8801, + "step": 537 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963450199704145, + "loss": 1.1189, + "step": 538 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963204789263517, + "loss": 0.1014, + "step": 539 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962958560726482, + "loss": 0.7834, + "step": 540 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962711514133631, + "loss": 0.9054, + "step": 541 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996246364952568, + "loss": 0.9358, + "step": 542 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962214966943493, + "loss": 0.9166, + "step": 543 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961965466428054, + "loss": 0.8441, + "step": 544 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961715148020492, + "loss": 0.7448, + "step": 545 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961464011762067, + "loss": 0.967, + "step": 546 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961212057694173, + "loss": 0.8617, + "step": 547 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960959285858339, + "loss": 0.3604, + "step": 548 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960705696296236, + "loss": 1.4731, + "step": 549 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996045128904966, + "loss": 0.7899, + "step": 550 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960196064160542, + "loss": 0.8646, + "step": 551 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959940021670958, + "loss": 0.8923, + "step": 552 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959683161623106, + "loss": 0.9165, + "step": 553 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995942548405933, + "loss": 0.7933, + "step": 554 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959166989022099, + "loss": 0.8956, + "step": 555 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958907676554025, + "loss": 0.9802, + "step": 556 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995864754669785, + "loss": 0.9521, + "step": 557 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995838659949645, + "loss": 0.8789, + "step": 558 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995812483499284, + "loss": 0.7645, + "step": 559 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957862253230166, + "loss": 0.9594, + "step": 560 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995759885425171, + "loss": 0.9314, + "step": 561 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957334638100888, + "loss": 0.8412, + "step": 562 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957069604821253, + "loss": 0.9204, + "step": 563 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995680375445649, + "loss": 0.6946, + "step": 564 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956537087050421, + "loss": 0.8702, + "step": 565 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956269602647, + "loss": 0.9398, + "step": 566 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956001301290316, + "loss": 0.8048, + "step": 567 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955732183024595, + "loss": 0.8748, + "step": 568 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955462247894199, + "loss": 1.791, + "step": 569 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955191495943617, + "loss": 0.8732, + "step": 570 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954919927217482, + "loss": 0.8567, + "step": 571 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954647541760553, + "loss": 0.8839, + "step": 572 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995437433961773, + "loss": 0.7505, + "step": 573 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995410032083405, + "loss": 0.8604, + "step": 574 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953825485454672, + "loss": 0.8147, + "step": 575 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953549833524905, + "loss": 0.9001, + "step": 576 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995327336509018, + "loss": 0.858, + "step": 577 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952996080196072, + "loss": 0.885, + "step": 578 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952717978888282, + "loss": 0.8464, + "step": 579 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952439061212655, + "loss": 0.749, + "step": 580 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952159327215162, + "loss": 0.8544, + "step": 581 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951878776941912, + "loss": 0.8839, + "step": 582 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951597410439153, + "loss": 0.797, + "step": 583 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951315227753259, + "loss": 0.8123, + "step": 584 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009951032228930744, + "loss": 0.8385, + "step": 585 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995074841401826, + "loss": 0.833, + "step": 586 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950463783062577, + "loss": 0.8967, + "step": 587 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950178336110625, + "loss": 0.7747, + "step": 588 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949892073209446, + "loss": 0.8344, + "step": 589 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949604994406228, + "loss": 0.7568, + "step": 590 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949317099748293, + "loss": 0.8228, + "step": 591 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949028389283092, + "loss": 0.9071, + "step": 592 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948738863058213, + "loss": 0.7562, + "step": 593 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948448521121384, + "loss": 0.8135, + "step": 594 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994815736352046, + "loss": 0.7341, + "step": 595 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947865390303435, + "loss": 0.7385, + "step": 596 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947572601518432, + "loss": 1.0645, + "step": 597 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947278997213711, + "loss": 0.8505, + "step": 598 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946984577437674, + "loss": 0.6611, + "step": 599 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946689342238849, + "loss": 0.6714, + "step": 600 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946393291665895, + "loss": 0.7489, + "step": 601 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946096425767616, + "loss": 0.0578, + "step": 602 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945798744592942, + "loss": 0.8787, + "step": 603 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945500248190943, + "loss": 0.7007, + "step": 604 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994520093661082, + "loss": 0.7874, + "step": 605 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994490080990191, + "loss": 0.6423, + "step": 606 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994459986811368, + "loss": 0.8314, + "step": 607 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944298111295737, + "loss": 0.7676, + "step": 608 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943995539497823, + "loss": 0.7705, + "step": 609 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943692152769809, + "loss": 0.0612, + "step": 610 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943387951161702, + "loss": 0.7032, + "step": 611 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943082934723646, + "loss": 0.9469, + "step": 612 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942777103505917, + "loss": 0.8187, + "step": 613 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942470457558928, + "loss": 0.9288, + "step": 614 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942162996933218, + "loss": 0.6572, + "step": 615 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941854721679474, + "loss": 0.722, + "step": 616 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941545631848502, + "loss": 0.7384, + "step": 617 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941235727491259, + "loss": 0.7482, + "step": 618 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994092500865882, + "loss": 0.6576, + "step": 619 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940613475402404, + "loss": 0.8513, + "step": 620 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940301127773362, + "loss": 0.6309, + "step": 621 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939987965823177, + "loss": 0.6951, + "step": 622 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939673989603473, + "loss": 0.657, + "step": 623 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939359199165998, + "loss": 0.5944, + "step": 624 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939043594562643, + "loss": 0.8179, + "step": 625 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938727175845428, + "loss": 0.8198, + "step": 626 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993840994306651, + "loss": 0.6672, + "step": 627 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938091896278178, + "loss": 0.7524, + "step": 628 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937773035532857, + "loss": 0.6716, + "step": 629 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993745336088311, + "loss": 0.5999, + "step": 630 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993713287238162, + "loss": 0.8099, + "step": 631 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936811570081224, + "loss": 0.6697, + "step": 632 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936489454034877, + "loss": 0.8048, + "step": 633 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936166524295673, + "loss": 0.6462, + "step": 634 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935842780916847, + "loss": 0.7137, + "step": 635 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935518223951757, + "loss": 0.5479, + "step": 636 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935192853453903, + "loss": 0.6101, + "step": 637 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934866669476915, + "loss": 0.8945, + "step": 638 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934539672074563, + "loss": 0.6177, + "step": 639 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934211861300742, + "loss": 0.6194, + "step": 640 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933883237209487, + "loss": 0.9369, + "step": 641 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933553799854966, + "loss": 0.6594, + "step": 642 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933223549291482, + "loss": 0.5992, + "step": 643 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993289248557347, + "loss": 0.6891, + "step": 644 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099325606087555, + "loss": 0.7119, + "step": 645 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932227918892276, + "loss": 0.6395, + "step": 646 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931894416038635, + "loss": 0.562, + "step": 647 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931560100249553, + "loss": 0.6277, + "step": 648 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993122497158013, + "loss": 0.7152, + "step": 649 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930889030085612, + "loss": 0.7003, + "step": 650 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009930552275821368, + "loss": 0.5974, + "step": 651 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993021470884291, + "loss": 0.7859, + "step": 652 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929876329205875, + "loss": 0.7692, + "step": 653 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929537136966046, + "loss": 0.5959, + "step": 654 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929197132179327, + "loss": 0.6725, + "step": 655 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928856314901763, + "loss": 0.656, + "step": 656 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928514685189533, + "loss": 0.6824, + "step": 657 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928172243098949, + "loss": 0.6903, + "step": 658 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927828988686453, + "loss": 0.6143, + "step": 659 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927484922008629, + "loss": 0.7892, + "step": 660 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927140043122188, + "loss": 0.7964, + "step": 661 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926794352083977, + "loss": 0.5833, + "step": 662 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926447848950977, + "loss": 0.6472, + "step": 663 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926100533780304, + "loss": 0.7209, + "step": 664 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009925752406629205, + "loss": 0.8071, + "step": 665 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009925403467555063, + "loss": 0.6554, + "step": 666 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009925053716615397, + "loss": 0.7891, + "step": 667 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924703153867856, + "loss": 0.5647, + "step": 668 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992435177937022, + "loss": 0.5863, + "step": 669 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923999593180414, + "loss": 0.0529, + "step": 670 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923646595356482, + "loss": 0.7268, + "step": 671 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009923292785956616, + "loss": 0.7675, + "step": 672 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992293816503913, + "loss": 0.6976, + "step": 673 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922582732662484, + "loss": 0.5105, + "step": 674 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922226488885257, + "loss": 0.7208, + "step": 675 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921869433766172, + "loss": 0.6841, + "step": 676 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921511567364085, + "loss": 0.7794, + "step": 677 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009921152889737985, + "loss": 0.619, + "step": 678 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920793400946988, + "loss": 0.6909, + "step": 679 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920433101050356, + "loss": 0.9028, + "step": 680 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920071990107474, + "loss": 0.4569, + "step": 681 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919710068177865, + "loss": 0.5696, + "step": 682 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919347335321186, + "loss": 0.8322, + "step": 683 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918983791597227, + "loss": 0.5846, + "step": 684 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918619437065913, + "loss": 0.5913, + "step": 685 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918254271787299, + "loss": 0.5651, + "step": 686 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917888295821577, + "loss": 0.6622, + "step": 687 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917521509229075, + "loss": 0.5518, + "step": 688 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917153912070245, + "loss": 0.5869, + "step": 689 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916785504405683, + "loss": 0.5817, + "step": 690 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916416286296114, + "loss": 0.6609, + "step": 691 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916046257802398, + "loss": 0.4675, + "step": 692 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915675418985526, + "loss": 0.5707, + "step": 693 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009915303769906622, + "loss": 0.4331, + "step": 694 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914931310626952, + "loss": 0.7073, + "step": 695 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914558041207904, + "loss": 0.6759, + "step": 696 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914183961711005, + "loss": 0.618, + "step": 697 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991380907219792, + "loss": 0.6208, + "step": 698 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913433372730437, + "loss": 0.7748, + "step": 699 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913056863370487, + "loss": 0.0491, + "step": 700 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991267954418013, + "loss": 0.5248, + "step": 701 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991230141522156, + "loss": 0.5968, + "step": 702 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911922476557107, + "loss": 0.7867, + "step": 703 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991154272824923, + "loss": 0.5016, + "step": 704 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009911162170360523, + "loss": 0.7037, + "step": 705 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910780802953718, + "loss": 0.678, + "step": 706 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910398626091672, + "loss": 0.6472, + "step": 707 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910015639837384, + "loss": 0.5229, + "step": 708 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990963184425398, + "loss": 0.5422, + "step": 709 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009909247239404725, + "loss": 0.5376, + "step": 710 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908861825353007, + "loss": 0.552, + "step": 711 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908475602162365, + "loss": 0.7676, + "step": 712 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908088569896454, + "loss": 0.7411, + "step": 713 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990770072861907, + "loss": 0.6343, + "step": 714 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009907312078394146, + "loss": 0.6885, + "step": 715 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990692261928574, + "loss": 0.5884, + "step": 716 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906532351358047, + "loss": 0.0521, + "step": 717 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906141274675397, + "loss": 0.5806, + "step": 718 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905749389302254, + "loss": 0.5978, + "step": 719 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905356695303212, + "loss": 0.6398, + "step": 720 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904963192743, + "loss": 0.6042, + "step": 721 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904568881686478, + "loss": 0.5896, + "step": 722 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904173762198642, + "loss": 0.4543, + "step": 723 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903777834344622, + "loss": 0.7919, + "step": 724 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009903381098189678, + "loss": 0.4966, + "step": 725 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902983553799207, + "loss": 0.6486, + "step": 726 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902585201238734, + "loss": 0.0516, + "step": 727 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902186040573924, + "loss": 0.7756, + "step": 728 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901786071870567, + "loss": 0.704, + "step": 729 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901385295194595, + "loss": 0.7562, + "step": 730 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009900983710612067, + "loss": 0.7064, + "step": 731 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009900581318189178, + "loss": 0.7601, + "step": 732 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009900178117992257, + "loss": 0.5723, + "step": 733 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899774110087758, + "loss": 0.5294, + "step": 734 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989936929454228, + "loss": 0.874, + "step": 735 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989896367142255, + "loss": 0.6027, + "step": 736 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898557240795424, + "loss": 0.7151, + "step": 737 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898150002727898, + "loss": 0.6012, + "step": 738 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897741957287098, + "loss": 0.5143, + "step": 739 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897333104540282, + "loss": 0.754, + "step": 740 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989692344455484, + "loss": 0.658, + "step": 741 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009896512977398302, + "loss": 0.6863, + "step": 742 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009896101703138322, + "loss": 0.5546, + "step": 743 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895689621842695, + "loss": 0.6515, + "step": 744 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895276733579341, + "loss": 0.582, + "step": 745 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989486303841632, + "loss": 0.7549, + "step": 746 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009894448536421822, + "loss": 0.6365, + "step": 747 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989403322766417, + "loss": 0.5587, + "step": 748 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893617112211822, + "loss": 0.5292, + "step": 749 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893200190133364, + "loss": 0.47, + "step": 750 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989278246149752, + "loss": 0.5106, + "step": 751 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892363926373145, + "loss": 0.5386, + "step": 752 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989194458482923, + "loss": 0.4763, + "step": 753 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989152443693489, + "loss": 0.7772, + "step": 754 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009891103482759383, + "loss": 0.6663, + "step": 755 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009890681722372096, + "loss": 0.5859, + "step": 756 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009890259155842546, + "loss": 0.8274, + "step": 757 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988983578324039, + "loss": 0.5955, + "step": 758 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988941160463541, + "loss": 0.6124, + "step": 759 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888986620097523, + "loss": 0.6027, + "step": 760 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888560829696786, + "loss": 0.6119, + "step": 761 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888134233503377, + "loss": 0.762, + "step": 762 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009887706831587615, + "loss": 0.7119, + "step": 763 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988727862401995, + "loss": 0.6958, + "step": 764 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009886849610870965, + "loss": 0.5985, + "step": 765 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009886419792211375, + "loss": 0.0513, + "step": 766 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885989168112027, + "loss": 0.7419, + "step": 767 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885557738643902, + "loss": 0.6583, + "step": 768 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885125503878115, + "loss": 0.63, + "step": 769 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988469246388591, + "loss": 0.6124, + "step": 770 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009884258618738666, + "loss": 0.4539, + "step": 771 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883823968507895, + "loss": 0.6968, + "step": 772 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009883388513265243, + "loss": 0.7157, + "step": 773 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882952253082488, + "loss": 0.5875, + "step": 774 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882515188031535, + "loss": 0.5625, + "step": 775 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882077318184434, + "loss": 0.6571, + "step": 776 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988163864361335, + "loss": 0.6577, + "step": 777 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009881199164390598, + "loss": 0.4906, + "step": 778 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009880758880588616, + "loss": 0.6361, + "step": 779 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988031779227998, + "loss": 0.7323, + "step": 780 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009879875899537391, + "loss": 0.7021, + "step": 781 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987943320243369, + "loss": 0.6533, + "step": 782 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009878989701041846, + "loss": 0.5173, + "step": 783 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009878545395434965, + "loss": 0.6166, + "step": 784 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987810028568628, + "loss": 0.6057, + "step": 785 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987765437186916, + "loss": 0.5055, + "step": 786 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987720765405711, + "loss": 0.5747, + "step": 787 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009876760132323757, + "loss": 0.5754, + "step": 788 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009876311806742873, + "loss": 0.5115, + "step": 789 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875862677388352, + "loss": 0.6588, + "step": 790 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875412744334226, + "loss": 0.5066, + "step": 791 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874962007654661, + "loss": 0.7047, + "step": 792 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874510467423952, + "loss": 0.7911, + "step": 793 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874058123716526, + "loss": 0.5764, + "step": 794 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009873604976606944, + "loss": 0.6101, + "step": 795 + }, + { + "epoch": 0.1, + "learning_rate": 0.00098731510261699, + "loss": 0.6746, + "step": 796 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009872696272480222, + "loss": 0.6471, + "step": 797 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009872240715612863, + "loss": 0.6848, + "step": 798 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871784355642917, + "loss": 0.5504, + "step": 799 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871327192645604, + "loss": 0.6233, + "step": 800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870869226696285, + "loss": 0.7566, + "step": 801 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987041045787044, + "loss": 0.7509, + "step": 802 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869950886243694, + "loss": 0.6266, + "step": 803 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869490511891798, + "loss": 0.7794, + "step": 804 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869029334890638, + "loss": 0.5414, + "step": 805 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009868567355316227, + "loss": 0.4448, + "step": 806 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009868104573244718, + "loss": 0.5345, + "step": 807 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009867640988752392, + "loss": 0.6251, + "step": 808 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986717660191566, + "loss": 0.7649, + "step": 809 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986671141281107, + "loss": 0.613, + "step": 810 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009866245421515298, + "loss": 0.7483, + "step": 811 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986577862810516, + "loss": 0.597, + "step": 812 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865311032657591, + "loss": 0.6497, + "step": 813 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864842635249672, + "loss": 0.5511, + "step": 814 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864373435958607, + "loss": 0.6693, + "step": 815 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863903434861735, + "loss": 0.0508, + "step": 816 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863432632036529, + "loss": 0.563, + "step": 817 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986296102756059, + "loss": 0.8492, + "step": 818 + }, + { + "epoch": 0.1, + "learning_rate": 0.000986248862151166, + "loss": 0.6343, + "step": 819 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009862015413967598, + "loss": 0.0508, + "step": 820 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009861541405006408, + "loss": 0.7087, + "step": 821 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009861066594706225, + "loss": 0.5841, + "step": 822 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009860590983145306, + "loss": 0.4662, + "step": 823 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009860114570402056, + "loss": 0.5746, + "step": 824 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859637356554993, + "loss": 0.6549, + "step": 825 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859159341682787, + "loss": 0.634, + "step": 826 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009858680525864224, + "loss": 0.6753, + "step": 827 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985820090917823, + "loss": 0.6257, + "step": 828 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857720491703863, + "loss": 0.516, + "step": 829 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857239273520307, + "loss": 0.5455, + "step": 830 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009856757254706888, + "loss": 0.6936, + "step": 831 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009856274435343054, + "loss": 0.679, + "step": 832 + }, + { + "epoch": 0.1, + "learning_rate": 0.000985579081550839, + "loss": 0.4371, + "step": 833 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855306395282616, + "loss": 0.4534, + "step": 834 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009854821174745574, + "loss": 0.4688, + "step": 835 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009854335153977247, + "loss": 0.5604, + "step": 836 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009853848333057748, + "loss": 0.6177, + "step": 837 + }, + { + "epoch": 0.11, + "learning_rate": 0.000985336071206732, + "loss": 0.4196, + "step": 838 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852872291086338, + "loss": 0.6304, + "step": 839 + }, + { + "epoch": 0.11, + "learning_rate": 0.000985238307019531, + "loss": 0.4905, + "step": 840 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009851893049474874, + "loss": 0.682, + "step": 841 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009851402229005804, + "loss": 0.6494, + "step": 842 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009850910608869, + "loss": 0.4883, + "step": 843 + }, + { + "epoch": 0.11, + "learning_rate": 0.00098504181891455, + "loss": 0.527, + "step": 844 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009849924969916467, + "loss": 0.5477, + "step": 845 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009849430951263203, + "loss": 0.5591, + "step": 846 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009848936133267138, + "loss": 0.5527, + "step": 847 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009848440516009832, + "loss": 0.6525, + "step": 848 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009847944099572977, + "loss": 0.5664, + "step": 849 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009847446884038403, + "loss": 0.7172, + "step": 850 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846948869488067, + "loss": 0.6871, + "step": 851 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846450056004053, + "loss": 0.5328, + "step": 852 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009845950443668585, + "loss": 0.835, + "step": 853 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009845450032564015, + "loss": 1.0186, + "step": 854 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009844948822772827, + "loss": 0.502, + "step": 855 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009844446814377638, + "loss": 0.6454, + "step": 856 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843944007461192, + "loss": 0.6858, + "step": 857 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843440402106367, + "loss": 0.4691, + "step": 858 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009842935998396179, + "loss": 0.6594, + "step": 859 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009842430796413767, + "loss": 0.5483, + "step": 860 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841924796242403, + "loss": 0.5109, + "step": 861 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841417997965493, + "loss": 0.6204, + "step": 862 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009840910401666574, + "loss": 0.5729, + "step": 863 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009840402007429317, + "loss": 0.5562, + "step": 864 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009839892815337516, + "loss": 0.6046, + "step": 865 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009839382825475107, + "loss": 0.7278, + "step": 866 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009838872037926153, + "loss": 0.6818, + "step": 867 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009838360452774843, + "loss": 0.6017, + "step": 868 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009837848070105508, + "loss": 0.63, + "step": 869 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009837334890002605, + "loss": 0.6298, + "step": 870 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009836820912550721, + "loss": 0.7139, + "step": 871 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009836306137834575, + "loss": 0.5371, + "step": 872 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983579056593902, + "loss": 0.5752, + "step": 873 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983527419694904, + "loss": 0.5001, + "step": 874 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009834757030949747, + "loss": 0.6259, + "step": 875 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009834239068026388, + "loss": 0.6865, + "step": 876 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983372030826434, + "loss": 0.5968, + "step": 877 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983320075174911, + "loss": 0.5977, + "step": 878 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009832680398566339, + "loss": 0.6681, + "step": 879 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009832159248801798, + "loss": 0.6924, + "step": 880 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983163730254139, + "loss": 0.5078, + "step": 881 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009831114559871147, + "loss": 0.6982, + "step": 882 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009830591020877234, + "loss": 0.5389, + "step": 883 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009830066685645946, + "loss": 0.0498, + "step": 884 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009829541554263715, + "loss": 0.5243, + "step": 885 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009829015626817094, + "loss": 0.5779, + "step": 886 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982848890339278, + "loss": 0.6997, + "step": 887 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009827961384077583, + "loss": 0.5499, + "step": 888 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009827433068958467, + "loss": 0.6063, + "step": 889 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009826903958122506, + "loss": 0.6799, + "step": 890 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009826374051656922, + "loss": 0.5376, + "step": 891 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825843349649055, + "loss": 0.6322, + "step": 892 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825311852186386, + "loss": 0.7278, + "step": 893 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982477955935652, + "loss": 0.8173, + "step": 894 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009824246471247199, + "loss": 0.5519, + "step": 895 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009823712587946289, + "loss": 0.5765, + "step": 896 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009823177909541795, + "loss": 0.488, + "step": 897 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009822642436121845, + "loss": 0.5541, + "step": 898 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009822106167774707, + "loss": 0.6033, + "step": 899 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009821569104588774, + "loss": 0.7223, + "step": 900 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982103124665257, + "loss": 0.5009, + "step": 901 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009820492594054752, + "loss": 0.632, + "step": 902 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009819953146884108, + "loss": 0.6974, + "step": 903 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009819412905229554, + "loss": 0.6093, + "step": 904 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009818871869180144, + "loss": 0.4677, + "step": 905 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009818330038825053, + "loss": 0.4812, + "step": 906 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009817787414253595, + "loss": 0.5826, + "step": 907 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009817243995555213, + "loss": 0.5493, + "step": 908 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816699782819475, + "loss": 0.4825, + "step": 909 + }, + { + "epoch": 0.11, + "learning_rate": 0.000981615477613609, + "loss": 0.6909, + "step": 910 + }, + { + "epoch": 0.11, + "learning_rate": 0.000981560897559489, + "loss": 0.6649, + "step": 911 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009815062381285842, + "loss": 0.7512, + "step": 912 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009814514993299044, + "loss": 0.0495, + "step": 913 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813966811724718, + "loss": 0.7384, + "step": 914 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813417836653227, + "loss": 0.6602, + "step": 915 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009812868068175057, + "loss": 0.4802, + "step": 916 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009812317506380829, + "loss": 0.7205, + "step": 917 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009811766151361292, + "loss": 0.6869, + "step": 918 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009811214003207327, + "loss": 0.4608, + "step": 919 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009810661062009949, + "loss": 0.572, + "step": 920 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009810107327860298, + "loss": 0.5305, + "step": 921 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009809552800849647, + "loss": 0.6355, + "step": 922 + }, + { + "epoch": 0.12, + "learning_rate": 0.00098089974810694, + "loss": 0.0495, + "step": 923 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009808441368611095, + "loss": 0.6537, + "step": 924 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009807884463566392, + "loss": 0.7336, + "step": 925 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980732676602709, + "loss": 0.5905, + "step": 926 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009806768276085114, + "loss": 0.5912, + "step": 927 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009806208993832523, + "loss": 0.4753, + "step": 928 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009805648919361503, + "loss": 0.5264, + "step": 929 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009805088052764375, + "loss": 0.5049, + "step": 930 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009804526394133586, + "loss": 0.4501, + "step": 931 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009803963943561715, + "loss": 0.7543, + "step": 932 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009803400701141475, + "loss": 0.5737, + "step": 933 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802836666965701, + "loss": 0.6755, + "step": 934 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802271841127367, + "loss": 0.796, + "step": 935 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009801706223719578, + "loss": 0.5917, + "step": 936 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009801139814835562, + "loss": 0.5474, + "step": 937 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009800572614568682, + "loss": 0.5186, + "step": 938 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980000462301243, + "loss": 0.5221, + "step": 939 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979943584026043, + "loss": 0.7611, + "step": 940 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798866266406438, + "loss": 0.4612, + "step": 941 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798295901544338, + "loss": 0.6235, + "step": 942 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009797724745768141, + "loss": 0.5256, + "step": 943 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009797152799171994, + "loss": 0.6855, + "step": 944 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009796580061850173, + "loss": 0.5387, + "step": 945 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009796006533897082, + "loss": 0.6053, + "step": 946 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979543221540726, + "loss": 0.6003, + "step": 947 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009794857106475367, + "loss": 0.8309, + "step": 948 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009794281207196208, + "loss": 0.6488, + "step": 949 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009793704517664702, + "loss": 0.5557, + "step": 950 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009793127037975911, + "loss": 0.6016, + "step": 951 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979254876822502, + "loss": 0.8694, + "step": 952 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009791969708507348, + "loss": 0.7107, + "step": 953 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979138985891834, + "loss": 0.5187, + "step": 954 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009790809219553578, + "loss": 0.5726, + "step": 955 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009790227790508765, + "loss": 0.6095, + "step": 956 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009789645571879747, + "loss": 0.5736, + "step": 957 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009789062563762484, + "loss": 0.6115, + "step": 958 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009788478766253079, + "loss": 0.4757, + "step": 959 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978789417944776, + "loss": 0.7664, + "step": 960 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009787308803442887, + "loss": 0.7067, + "step": 961 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009786722638334946, + "loss": 0.6371, + "step": 962 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009786135684220559, + "loss": 0.5815, + "step": 963 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009785547941196472, + "loss": 0.645, + "step": 964 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009784959409359566, + "loss": 0.4613, + "step": 965 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978437008880685, + "loss": 0.6266, + "step": 966 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009783779979635463, + "loss": 0.5507, + "step": 967 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009783189081942673, + "loss": 0.4736, + "step": 968 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009782597395825879, + "loss": 0.5681, + "step": 969 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009782004921382612, + "loss": 0.7103, + "step": 970 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009781411658710529, + "loss": 0.6252, + "step": 971 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978081760790742, + "loss": 0.6156, + "step": 972 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009780222769071203, + "loss": 0.0504, + "step": 973 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009779627142299928, + "loss": 0.6716, + "step": 974 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977903072769177, + "loss": 0.6052, + "step": 975 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009778433525345041, + "loss": 0.6484, + "step": 976 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009777835535358177, + "loss": 0.6761, + "step": 977 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977723675782975, + "loss": 0.4625, + "step": 978 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977663719285845, + "loss": 0.6151, + "step": 979 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009776036840543113, + "loss": 0.7039, + "step": 980 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009775435700982694, + "loss": 0.5845, + "step": 981 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009774833774276277, + "loss": 0.4999, + "step": 982 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977423106052308, + "loss": 0.4675, + "step": 983 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009773627559822454, + "loss": 0.4772, + "step": 984 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977302327227387, + "loss": 0.5905, + "step": 985 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009772418197976937, + "loss": 0.4164, + "step": 986 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009771812337031392, + "loss": 0.5106, + "step": 987 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009771205689537096, + "loss": 0.672, + "step": 988 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009770598255594049, + "loss": 0.6716, + "step": 989 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009769990035302372, + "loss": 0.0926, + "step": 990 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009769381028762322, + "loss": 0.6617, + "step": 991 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976877123607428, + "loss": 0.5433, + "step": 992 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009768160657338762, + "loss": 0.6593, + "step": 993 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976754929265641, + "loss": 0.6871, + "step": 994 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009766937142127998, + "loss": 0.4918, + "step": 995 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009766324205854426, + "loss": 0.6613, + "step": 996 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009765710483936728, + "loss": 0.4896, + "step": 997 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009765095976476063, + "loss": 0.4904, + "step": 998 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009764480683573722, + "loss": 0.626, + "step": 999 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009763864605331127, + "loss": 0.5262, + "step": 1000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009763247741849828, + "loss": 0.4789, + "step": 1001 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762630093231499, + "loss": 0.6538, + "step": 1002 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762011659577954, + "loss": 0.7125, + "step": 1003 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009761392440991128, + "loss": 0.6144, + "step": 1004 + }, + { + "epoch": 0.13, + "learning_rate": 0.000976077243757309, + "loss": 0.5652, + "step": 1005 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009760151649426034, + "loss": 0.7332, + "step": 1006 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009759530076652289, + "loss": 0.639, + "step": 1007 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758907719354308, + "loss": 0.7562, + "step": 1008 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758284577634675, + "loss": 0.7245, + "step": 1009 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009757660651596108, + "loss": 0.6514, + "step": 1010 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009757035941341445, + "loss": 0.6031, + "step": 1011 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009756410446973661, + "loss": 0.1797, + "step": 1012 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009755784168595859, + "loss": 0.6075, + "step": 1013 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009755157106311266, + "loss": 0.5746, + "step": 1014 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009754529260223246, + "loss": 0.7286, + "step": 1015 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009753900630435286, + "loss": 0.7177, + "step": 1016 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009753271217051005, + "loss": 0.5144, + "step": 1017 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009752641020174151, + "loss": 0.541, + "step": 1018 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009752010039908601, + "loss": 0.5723, + "step": 1019 + }, + { + "epoch": 0.13, + "learning_rate": 0.000975137827635836, + "loss": 0.6801, + "step": 1020 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009750745729627565, + "loss": 0.5798, + "step": 1021 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009750112399820477, + "loss": 0.5546, + "step": 1022 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009749478287041492, + "loss": 0.0645, + "step": 1023 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009748843391395131, + "loss": 0.5143, + "step": 1024 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009748207712986046, + "loss": 0.58, + "step": 1025 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009747571251919016, + "loss": 0.5962, + "step": 1026 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009746934008298951, + "loss": 0.7697, + "step": 1027 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009746295982230891, + "loss": 0.4849, + "step": 1028 + }, + { + "epoch": 0.13, + "learning_rate": 0.000974565717382, + "loss": 0.6845, + "step": 1029 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009745017583171576, + "loss": 0.5757, + "step": 1030 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009744377210391045, + "loss": 0.7008, + "step": 1031 + }, + { + "epoch": 0.13, + "learning_rate": 0.000974373605558396, + "loss": 0.6746, + "step": 1032 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009743094118856004, + "loss": 0.6873, + "step": 1033 + }, + { + "epoch": 0.13, + "learning_rate": 0.000974245140031299, + "loss": 0.433, + "step": 1034 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009741807900060858, + "loss": 0.8115, + "step": 1035 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009741163618205678, + "loss": 0.666, + "step": 1036 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009740518554853647, + "loss": 0.6205, + "step": 1037 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009739872710111093, + "loss": 0.6484, + "step": 1038 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009739226084084473, + "loss": 0.463, + "step": 1039 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009738578676880372, + "loss": 0.5911, + "step": 1040 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009737930488605502, + "loss": 0.5986, + "step": 1041 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009737281519366707, + "loss": 0.6166, + "step": 1042 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009736631769270957, + "loss": 0.7261, + "step": 1043 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009735981238425352, + "loss": 0.7484, + "step": 1044 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009735329926937121, + "loss": 0.5377, + "step": 1045 + }, + { + "epoch": 0.13, + "learning_rate": 0.000973467783491362, + "loss": 0.7867, + "step": 1046 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009734024962462335, + "loss": 0.6046, + "step": 1047 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009733371309690883, + "loss": 0.644, + "step": 1048 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009732716876707004, + "loss": 0.7469, + "step": 1049 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009732061663618571, + "loss": 0.6846, + "step": 1050 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009731405670533584, + "loss": 0.6133, + "step": 1051 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009730748897560172, + "loss": 0.7065, + "step": 1052 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009730091344806593, + "loss": 0.7437, + "step": 1053 + }, + { + "epoch": 0.13, + "learning_rate": 0.000972943301238123, + "loss": 0.5068, + "step": 1054 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009728773900392603, + "loss": 0.5316, + "step": 1055 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009728114008949349, + "loss": 0.7004, + "step": 1056 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009727453338160244, + "loss": 0.5496, + "step": 1057 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009726791888134183, + "loss": 0.514, + "step": 1058 + }, + { + "epoch": 0.13, + "learning_rate": 0.00097261296589802, + "loss": 0.5695, + "step": 1059 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009725466650807448, + "loss": 0.5759, + "step": 1060 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009724802863725213, + "loss": 0.5841, + "step": 1061 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009724138297842909, + "loss": 0.6938, + "step": 1062 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009723472953270077, + "loss": 0.5188, + "step": 1063 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009722806830116388, + "loss": 0.6514, + "step": 1064 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009722139928491641, + "loss": 0.5887, + "step": 1065 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009721472248505762, + "loss": 0.6449, + "step": 1066 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009720803790268807, + "loss": 0.5049, + "step": 1067 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009720134553890959, + "loss": 0.5784, + "step": 1068 + }, + { + "epoch": 0.13, + "learning_rate": 0.000971946453948253, + "loss": 0.6501, + "step": 1069 + }, + { + "epoch": 0.13, + "learning_rate": 0.000971879374715396, + "loss": 0.4288, + "step": 1070 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009718122177015815, + "loss": 0.6654, + "step": 1071 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009717449829178798, + "loss": 0.8065, + "step": 1072 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009716776703753725, + "loss": 0.837, + "step": 1073 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009716102800851554, + "loss": 0.5041, + "step": 1074 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009715428120583366, + "loss": 0.6781, + "step": 1075 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009714752663060369, + "loss": 0.7502, + "step": 1076 + }, + { + "epoch": 0.13, + "learning_rate": 0.00097140764283939, + "loss": 0.5538, + "step": 1077 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009713399416695425, + "loss": 0.5607, + "step": 1078 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009712721628076536, + "loss": 0.7292, + "step": 1079 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009712043062648955, + "loss": 0.5121, + "step": 1080 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009711363720524533, + "loss": 0.6895, + "step": 1081 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009710683601815246, + "loss": 0.631, + "step": 1082 + }, + { + "epoch": 0.14, + "learning_rate": 0.00097100027066332, + "loss": 0.6848, + "step": 1083 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009709321035090628, + "loss": 0.0639, + "step": 1084 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009708638587299893, + "loss": 0.6538, + "step": 1085 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009707955363373482, + "loss": 0.5105, + "step": 1086 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009707271363424015, + "loss": 0.5819, + "step": 1087 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009706586587564235, + "loss": 0.5272, + "step": 1088 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009705901035907017, + "loss": 0.6122, + "step": 1089 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009705214708565361, + "loss": 0.4752, + "step": 1090 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009704527605652396, + "loss": 0.594, + "step": 1091 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009703839727281378, + "loss": 0.5521, + "step": 1092 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009703151073565695, + "loss": 0.6467, + "step": 1093 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009702461644618854, + "loss": 0.5217, + "step": 1094 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009701771440554499, + "loss": 0.6371, + "step": 1095 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009701080461486398, + "loss": 0.6172, + "step": 1096 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009700388707528445, + "loss": 0.7501, + "step": 1097 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009699696178794662, + "loss": 0.734, + "step": 1098 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009699002875399204, + "loss": 0.5795, + "step": 1099 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009698308797456348, + "loss": 0.5101, + "step": 1100 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009697613945080499, + "loss": 0.5088, + "step": 1101 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009696918318386192, + "loss": 0.5908, + "step": 1102 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009696221917488091, + "loss": 0.5587, + "step": 1103 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009695524742500982, + "loss": 0.6519, + "step": 1104 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009694826793539784, + "loss": 0.5193, + "step": 1105 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009694128070719541, + "loss": 0.7501, + "step": 1106 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009693428574155425, + "loss": 0.5936, + "step": 1107 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009692728303962734, + "loss": 0.7341, + "step": 1108 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009692027260256899, + "loss": 0.8007, + "step": 1109 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009691325443153472, + "loss": 0.541, + "step": 1110 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009690622852768135, + "loss": 0.7039, + "step": 1111 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009689919489216699, + "loss": 0.6384, + "step": 1112 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009689215352615099, + "loss": 0.4988, + "step": 1113 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009688510443079401, + "loss": 0.5534, + "step": 1114 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009687804760725796, + "loss": 0.5873, + "step": 1115 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009687098305670605, + "loss": 0.5414, + "step": 1116 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009686391078030272, + "loss": 0.542, + "step": 1117 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009685683077921372, + "loss": 0.0617, + "step": 1118 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009684974305460607, + "loss": 0.5884, + "step": 1119 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009684264760764806, + "loss": 0.7794, + "step": 1120 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009683554443950924, + "loss": 0.6517, + "step": 1121 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009682843355136042, + "loss": 0.5323, + "step": 1122 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009682131494437374, + "loss": 0.7045, + "step": 1123 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009681418861972257, + "loss": 0.787, + "step": 1124 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009680705457858154, + "loss": 0.6761, + "step": 1125 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009679991282212658, + "loss": 0.8365, + "step": 1126 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009679276335153488, + "loss": 0.775, + "step": 1127 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009678560616798491, + "loss": 0.51, + "step": 1128 + }, + { + "epoch": 0.14, + "learning_rate": 0.000967784412726564, + "loss": 0.67, + "step": 1129 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009677126866673037, + "loss": 0.6517, + "step": 1130 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009676408835138906, + "loss": 0.6212, + "step": 1131 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009675690032781604, + "loss": 0.6029, + "step": 1132 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009674970459719614, + "loss": 0.6145, + "step": 1133 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009674250116071544, + "loss": 0.5652, + "step": 1134 + }, + { + "epoch": 0.14, + "learning_rate": 0.000967352900195613, + "loss": 0.4795, + "step": 1135 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009672807117492235, + "loss": 0.6215, + "step": 1136 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009672084462798848, + "loss": 0.6464, + "step": 1137 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009671361037995087, + "loss": 0.4897, + "step": 1138 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009670636843200194, + "loss": 0.6553, + "step": 1139 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009669911878533541, + "loss": 0.4761, + "step": 1140 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009669186144114626, + "loss": 0.6873, + "step": 1141 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009668459640063072, + "loss": 0.6621, + "step": 1142 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009667732366498633, + "loss": 0.4723, + "step": 1143 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009667004323541185, + "loss": 0.4813, + "step": 1144 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009666275511310731, + "loss": 0.5229, + "step": 1145 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009665545929927406, + "loss": 0.6783, + "step": 1146 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009664815579511469, + "loss": 0.6248, + "step": 1147 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009664084460183303, + "loss": 0.5809, + "step": 1148 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009663352572063422, + "loss": 0.6166, + "step": 1149 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009662619915272463, + "loss": 0.5787, + "step": 1150 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009661886489931193, + "loss": 0.6353, + "step": 1151 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009661152296160504, + "loss": 0.5317, + "step": 1152 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009660417334081412, + "loss": 0.8156, + "step": 1153 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009659681603815067, + "loss": 0.7102, + "step": 1154 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009658945105482739, + "loss": 0.6243, + "step": 1155 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009658207839205826, + "loss": 0.7863, + "step": 1156 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009657469805105855, + "loss": 0.6943, + "step": 1157 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009656731003304475, + "loss": 0.0602, + "step": 1158 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009655991433923468, + "loss": 0.6649, + "step": 1159 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009655251097084736, + "loss": 0.6975, + "step": 1160 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009654509992910312, + "loss": 0.5117, + "step": 1161 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009653768121522355, + "loss": 0.5725, + "step": 1162 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009653025483043145, + "loss": 0.6849, + "step": 1163 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009652282077595098, + "loss": 0.5597, + "step": 1164 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009651537905300747, + "loss": 0.4869, + "step": 1165 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009650792966282759, + "loss": 0.7346, + "step": 1166 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009650047260663921, + "loss": 0.6653, + "step": 1167 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009649300788567152, + "loss": 0.5137, + "step": 1168 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009648553550115493, + "loss": 0.4642, + "step": 1169 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009647805545432113, + "loss": 0.0589, + "step": 1170 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009647056774640308, + "loss": 0.5402, + "step": 1171 + }, + { + "epoch": 0.15, + "learning_rate": 0.00096463072378635, + "loss": 0.5378, + "step": 1172 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009645556935225237, + "loss": 0.5739, + "step": 1173 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009644805866849192, + "loss": 0.5166, + "step": 1174 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009644054032859165, + "loss": 0.4891, + "step": 1175 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009643301433379084, + "loss": 0.7733, + "step": 1176 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009642548068533001, + "loss": 0.6162, + "step": 1177 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009641793938445093, + "loss": 0.5667, + "step": 1178 + }, + { + "epoch": 0.15, + "learning_rate": 0.000964103904323967, + "loss": 0.5333, + "step": 1179 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009640283383041158, + "loss": 0.6667, + "step": 1180 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009639526957974117, + "loss": 0.6084, + "step": 1181 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009638769768163228, + "loss": 0.5304, + "step": 1182 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009638011813733303, + "loss": 0.5125, + "step": 1183 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009637253094809276, + "loss": 0.6528, + "step": 1184 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009636493611516208, + "loss": 0.6185, + "step": 1185 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009635733363979286, + "loss": 0.4907, + "step": 1186 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009634972352323825, + "loss": 0.5654, + "step": 1187 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009634210576675265, + "loss": 0.5839, + "step": 1188 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009633448037159167, + "loss": 0.6686, + "step": 1189 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009632684733901227, + "loss": 0.4595, + "step": 1190 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009631920667027259, + "loss": 0.6989, + "step": 1191 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009631155836663205, + "loss": 0.6013, + "step": 1192 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009630390242935139, + "loss": 0.6136, + "step": 1193 + }, + { + "epoch": 0.15, + "learning_rate": 0.000962962388596925, + "loss": 0.7688, + "step": 1194 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009628856765891861, + "loss": 0.485, + "step": 1195 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009628088882829416, + "loss": 0.5585, + "step": 1196 + }, + { + "epoch": 0.15, + "learning_rate": 0.000962732023690849, + "loss": 0.6169, + "step": 1197 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009626550828255779, + "loss": 0.6659, + "step": 1198 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009625780656998107, + "loss": 0.6771, + "step": 1199 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009625009723262421, + "loss": 0.7874, + "step": 1200 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009624238027175798, + "loss": 0.4923, + "step": 1201 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009623465568865439, + "loss": 0.5264, + "step": 1202 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009622692348458668, + "loss": 0.6283, + "step": 1203 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009621918366082938, + "loss": 0.5878, + "step": 1204 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009621143621865826, + "loss": 0.682, + "step": 1205 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009620368115935034, + "loss": 0.5931, + "step": 1206 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009619591848418392, + "loss": 0.5479, + "step": 1207 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009618814819443853, + "loss": 0.6337, + "step": 1208 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009618037029139496, + "loss": 0.5181, + "step": 1209 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009617258477633527, + "loss": 0.4884, + "step": 1210 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009616479165054276, + "loss": 0.6788, + "step": 1211 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009615699091530199, + "loss": 0.5308, + "step": 1212 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009614918257189876, + "loss": 0.4655, + "step": 1213 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009614136662162016, + "loss": 0.5762, + "step": 1214 + }, + { + "epoch": 0.15, + "learning_rate": 0.000961335430657545, + "loss": 0.5352, + "step": 1215 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009612571190559135, + "loss": 0.4886, + "step": 1216 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009611787314242155, + "loss": 0.4384, + "step": 1217 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009611002677753716, + "loss": 0.635, + "step": 1218 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009610217281223153, + "loss": 0.5009, + "step": 1219 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009609431124779927, + "loss": 0.5806, + "step": 1220 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009608644208553616, + "loss": 0.71, + "step": 1221 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009607856532673934, + "loss": 0.6531, + "step": 1222 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009607068097270715, + "loss": 0.6431, + "step": 1223 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009606278902473917, + "loss": 0.5284, + "step": 1224 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009605488948413626, + "loss": 0.4249, + "step": 1225 + }, + { + "epoch": 0.15, + "learning_rate": 0.000960469823522005, + "loss": 0.6569, + "step": 1226 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009603906763023526, + "loss": 0.5908, + "step": 1227 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009603114531954514, + "loss": 0.6927, + "step": 1228 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009602321542143599, + "loss": 0.5292, + "step": 1229 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009601527793721491, + "loss": 0.4578, + "step": 1230 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009600733286819025, + "loss": 0.058, + "step": 1231 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009599938021567163, + "loss": 0.5813, + "step": 1232 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009599141998096989, + "loss": 0.6016, + "step": 1233 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009598345216539714, + "loss": 0.692, + "step": 1234 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009597547677026672, + "loss": 0.6377, + "step": 1235 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009596749379689325, + "loss": 0.672, + "step": 1236 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009595950324659259, + "loss": 0.8998, + "step": 1237 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009595150512068181, + "loss": 0.5098, + "step": 1238 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009594349942047928, + "loss": 0.5591, + "step": 1239 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009593548614730458, + "loss": 0.678, + "step": 1240 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009592746530247859, + "loss": 0.6315, + "step": 1241 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009591943688732337, + "loss": 0.4865, + "step": 1242 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009591140090316228, + "loss": 0.6829, + "step": 1243 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009590335735131991, + "loss": 0.499, + "step": 1244 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009589530623312209, + "loss": 0.5721, + "step": 1245 + }, + { + "epoch": 0.16, + "learning_rate": 0.000958872475498959, + "loss": 0.625, + "step": 1246 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009587918130296969, + "loss": 0.4911, + "step": 1247 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009587110749367301, + "loss": 0.4934, + "step": 1248 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009586302612333672, + "loss": 0.6321, + "step": 1249 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009585493719329285, + "loss": 0.8044, + "step": 1250 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009584684070487475, + "loss": 0.4834, + "step": 1251 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009583873665941697, + "loss": 0.645, + "step": 1252 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009583062505825532, + "loss": 0.5762, + "step": 1253 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009582250590272686, + "loss": 0.6, + "step": 1254 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009581437919416986, + "loss": 0.4315, + "step": 1255 + }, + { + "epoch": 0.16, + "learning_rate": 0.000958062449339239, + "loss": 0.5643, + "step": 1256 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009579810312332975, + "loss": 0.9166, + "step": 1257 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009578995376372944, + "loss": 0.6659, + "step": 1258 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009578179685646626, + "loss": 0.5972, + "step": 1259 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009577363240288473, + "loss": 0.458, + "step": 1260 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009576546040433062, + "loss": 0.468, + "step": 1261 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009575728086215092, + "loss": 0.6393, + "step": 1262 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009574909377769391, + "loss": 0.6548, + "step": 1263 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009574089915230906, + "loss": 0.5863, + "step": 1264 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009573269698734711, + "loss": 0.621, + "step": 1265 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009572448728416005, + "loss": 0.5775, + "step": 1266 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009571627004410113, + "loss": 0.5865, + "step": 1267 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009570804526852477, + "loss": 0.6304, + "step": 1268 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009569981295878672, + "loss": 0.5728, + "step": 1269 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009569157311624388, + "loss": 0.5801, + "step": 1270 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009568332574225449, + "loss": 0.6973, + "step": 1271 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009567507083817797, + "loss": 0.5078, + "step": 1272 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009566680840537498, + "loss": 0.4688, + "step": 1273 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009565853844520746, + "loss": 0.5497, + "step": 1274 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009565026095903855, + "loss": 0.644, + "step": 1275 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009564197594823263, + "loss": 0.8943, + "step": 1276 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009563368341415537, + "loss": 0.0568, + "step": 1277 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009562538335817364, + "loss": 0.5256, + "step": 1278 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009561707578165555, + "loss": 0.6365, + "step": 1279 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009560876068597046, + "loss": 0.6284, + "step": 1280 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009560043807248896, + "loss": 0.4534, + "step": 1281 + }, + { + "epoch": 0.16, + "learning_rate": 0.000955921079425829, + "loss": 0.4451, + "step": 1282 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009558377029762532, + "loss": 0.6152, + "step": 1283 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009557542513899058, + "loss": 0.5315, + "step": 1284 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009556707246805421, + "loss": 0.6851, + "step": 1285 + }, + { + "epoch": 0.16, + "learning_rate": 0.00095558712286193, + "loss": 0.4797, + "step": 1286 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009555034459478496, + "loss": 0.5809, + "step": 1287 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009554196939520938, + "loss": 0.5868, + "step": 1288 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009553358668884678, + "loss": 0.6069, + "step": 1289 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009552519647707885, + "loss": 0.4987, + "step": 1290 + }, + { + "epoch": 0.16, + "learning_rate": 0.000955167987612886, + "loss": 0.6744, + "step": 1291 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009550839354286024, + "loss": 0.5667, + "step": 1292 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009549998082317921, + "loss": 0.0564, + "step": 1293 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009549156060363222, + "loss": 0.5596, + "step": 1294 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009548313288560719, + "loss": 0.5593, + "step": 1295 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009547469767049327, + "loss": 0.5471, + "step": 1296 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009546625495968085, + "loss": 0.6033, + "step": 1297 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009545780475456159, + "loss": 0.4569, + "step": 1298 + }, + { + "epoch": 0.16, + "learning_rate": 0.000954493470565283, + "loss": 0.4398, + "step": 1299 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009544088186697515, + "loss": 0.7947, + "step": 1300 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009543240918729743, + "loss": 0.6957, + "step": 1301 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009542392901889174, + "loss": 0.5514, + "step": 1302 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009541544136315585, + "loss": 0.5723, + "step": 1303 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009540694622148884, + "loss": 0.531, + "step": 1304 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009539844359529095, + "loss": 0.5173, + "step": 1305 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009538993348596372, + "loss": 0.599, + "step": 1306 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009538141589490987, + "loss": 0.4545, + "step": 1307 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009537289082353336, + "loss": 0.6393, + "step": 1308 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009536435827323942, + "loss": 0.6761, + "step": 1309 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009535581824543449, + "loss": 0.5511, + "step": 1310 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009534727074152625, + "loss": 0.5288, + "step": 1311 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009533871576292357, + "loss": 0.5508, + "step": 1312 + }, + { + "epoch": 0.16, + "learning_rate": 0.000953301533110366, + "loss": 0.7521, + "step": 1313 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009532158338727675, + "loss": 0.0553, + "step": 1314 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009531300599305656, + "loss": 0.5109, + "step": 1315 + }, + { + "epoch": 0.16, + "learning_rate": 0.000953044211297899, + "loss": 0.6849, + "step": 1316 + }, + { + "epoch": 0.17, + "learning_rate": 0.000952958287988918, + "loss": 0.5917, + "step": 1317 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009528722900177859, + "loss": 0.5631, + "step": 1318 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009527862173986779, + "loss": 0.4768, + "step": 1319 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009527000701457812, + "loss": 0.4818, + "step": 1320 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009526138482732961, + "loss": 0.6005, + "step": 1321 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009525275517954343, + "loss": 0.554, + "step": 1322 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009524411807264206, + "loss": 0.442, + "step": 1323 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009523547350804916, + "loss": 0.5742, + "step": 1324 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009522682148718962, + "loss": 0.5659, + "step": 1325 + }, + { + "epoch": 0.17, + "learning_rate": 0.000952181620114896, + "loss": 0.6058, + "step": 1326 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009520949508237647, + "loss": 0.7731, + "step": 1327 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009520082070127877, + "loss": 0.5463, + "step": 1328 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009519213886962635, + "loss": 0.6114, + "step": 1329 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009518344958885026, + "loss": 0.6456, + "step": 1330 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009517475286038277, + "loss": 0.561, + "step": 1331 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009516604868565737, + "loss": 0.5096, + "step": 1332 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009515733706610878, + "loss": 0.7144, + "step": 1333 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009514861800317301, + "loss": 0.6564, + "step": 1334 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009513989149828718, + "loss": 0.4612, + "step": 1335 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009513115755288973, + "loss": 0.7357, + "step": 1336 + }, + { + "epoch": 0.17, + "learning_rate": 0.000951224161684203, + "loss": 0.538, + "step": 1337 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009511366734631974, + "loss": 0.6615, + "step": 1338 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009510491108803014, + "loss": 0.5573, + "step": 1339 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009509614739499482, + "loss": 0.5209, + "step": 1340 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009508737626865832, + "loss": 0.5623, + "step": 1341 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009507859771046641, + "loss": 0.4768, + "step": 1342 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009506981172186606, + "loss": 0.6641, + "step": 1343 + }, + { + "epoch": 0.17, + "learning_rate": 0.000950610183043055, + "loss": 0.6318, + "step": 1344 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009505221745923415, + "loss": 0.6152, + "step": 1345 + }, + { + "epoch": 0.17, + "learning_rate": 0.000950434091881027, + "loss": 0.5286, + "step": 1346 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009503459349236303, + "loss": 0.5483, + "step": 1347 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009502577037346824, + "loss": 0.5126, + "step": 1348 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009501693983287269, + "loss": 0.5923, + "step": 1349 + }, + { + "epoch": 0.17, + "learning_rate": 0.000950081018720319, + "loss": 0.4856, + "step": 1350 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009499925649240268, + "loss": 0.557, + "step": 1351 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009499040369544303, + "loss": 0.5096, + "step": 1352 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009498154348261216, + "loss": 0.7493, + "step": 1353 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009497267585537055, + "loss": 0.53, + "step": 1354 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009496380081517985, + "loss": 0.467, + "step": 1355 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009495491836350294, + "loss": 0.54, + "step": 1356 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009494602850180397, + "loss": 0.4635, + "step": 1357 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009493713123154826, + "loss": 0.4496, + "step": 1358 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009492822655420235, + "loss": 0.7744, + "step": 1359 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009491931447123403, + "loss": 0.5144, + "step": 1360 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009491039498411231, + "loss": 0.6035, + "step": 1361 + }, + { + "epoch": 0.17, + "learning_rate": 0.000949014680943074, + "loss": 0.5271, + "step": 1362 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009489253380329075, + "loss": 0.5262, + "step": 1363 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009488359211253501, + "loss": 0.6656, + "step": 1364 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009487464302351405, + "loss": 0.5035, + "step": 1365 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009486568653770298, + "loss": 0.5352, + "step": 1366 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009485672265657812, + "loss": 0.6743, + "step": 1367 + }, + { + "epoch": 0.17, + "learning_rate": 0.00094847751381617, + "loss": 0.5503, + "step": 1368 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009483877271429839, + "loss": 0.7668, + "step": 1369 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009482978665610227, + "loss": 0.506, + "step": 1370 + }, + { + "epoch": 0.17, + "learning_rate": 0.000948207932085098, + "loss": 0.4181, + "step": 1371 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009481179237300339, + "loss": 0.6068, + "step": 1372 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009480278415106671, + "loss": 0.4315, + "step": 1373 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009479376854418458, + "loss": 0.6323, + "step": 1374 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009478474555384305, + "loss": 0.8311, + "step": 1375 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009477571518152943, + "loss": 0.536, + "step": 1376 + }, + { + "epoch": 0.17, + "learning_rate": 0.000947666774287322, + "loss": 0.5979, + "step": 1377 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009475763229694105, + "loss": 0.6562, + "step": 1378 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009474857978764697, + "loss": 0.4803, + "step": 1379 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009473951990234205, + "loss": 0.6525, + "step": 1380 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009473045264251969, + "loss": 0.6617, + "step": 1381 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009472137800967444, + "loss": 0.4751, + "step": 1382 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009471229600530209, + "loss": 0.0549, + "step": 1383 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009470320663089968, + "loss": 0.4799, + "step": 1384 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009469410988796541, + "loss": 0.7004, + "step": 1385 + }, + { + "epoch": 0.17, + "learning_rate": 0.000946850057779987, + "loss": 0.5112, + "step": 1386 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009467589430250024, + "loss": 0.6147, + "step": 1387 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009466677546297186, + "loss": 0.4591, + "step": 1388 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009465764926091666, + "loss": 0.6921, + "step": 1389 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009464851569783894, + "loss": 0.8169, + "step": 1390 + }, + { + "epoch": 0.17, + "learning_rate": 0.000946393747752442, + "loss": 0.6029, + "step": 1391 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009463022649463914, + "loss": 0.5906, + "step": 1392 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009462107085753171, + "loss": 0.5653, + "step": 1393 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009461190786543103, + "loss": 0.6768, + "step": 1394 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009460273751984749, + "loss": 0.536, + "step": 1395 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009459355982229266, + "loss": 0.4558, + "step": 1396 + }, + { + "epoch": 0.18, + "learning_rate": 0.000945843747742793, + "loss": 0.6531, + "step": 1397 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009457518237732139, + "loss": 0.5349, + "step": 1398 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009456598263293417, + "loss": 0.6594, + "step": 1399 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009455677554263403, + "loss": 0.4923, + "step": 1400 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009454756110793861, + "loss": 0.5155, + "step": 1401 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009453833933036674, + "loss": 0.4896, + "step": 1402 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009452911021143847, + "loss": 0.7019, + "step": 1403 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009451987375267505, + "loss": 0.0541, + "step": 1404 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009451062995559894, + "loss": 0.5505, + "step": 1405 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009450137882173384, + "loss": 0.4766, + "step": 1406 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009449212035260462, + "loss": 0.5135, + "step": 1407 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009448285454973737, + "loss": 0.51, + "step": 1408 + }, + { + "epoch": 0.18, + "learning_rate": 0.000944735814146594, + "loss": 0.4595, + "step": 1409 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009446430094889922, + "loss": 0.6337, + "step": 1410 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009445501315398656, + "loss": 0.5343, + "step": 1411 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009444571803145234, + "loss": 0.4307, + "step": 1412 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009443641558282871, + "loss": 0.4332, + "step": 1413 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009442710580964897, + "loss": 0.5842, + "step": 1414 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009441778871344772, + "loss": 0.5557, + "step": 1415 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009440846429576071, + "loss": 0.8113, + "step": 1416 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009439913255812488, + "loss": 0.4657, + "step": 1417 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009438979350207844, + "loss": 0.6071, + "step": 1418 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009438044712916073, + "loss": 0.5343, + "step": 1419 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009437109344091235, + "loss": 0.5481, + "step": 1420 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009436173243887509, + "loss": 0.5219, + "step": 1421 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009435236412459194, + "loss": 0.4915, + "step": 1422 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009434298849960712, + "loss": 0.0536, + "step": 1423 + }, + { + "epoch": 0.18, + "learning_rate": 0.00094333605565466, + "loss": 0.6768, + "step": 1424 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009432421532371522, + "loss": 0.4509, + "step": 1425 + }, + { + "epoch": 0.18, + "learning_rate": 0.000943148177759026, + "loss": 0.5349, + "step": 1426 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009430541292357713, + "loss": 0.4517, + "step": 1427 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009429600076828906, + "loss": 0.5247, + "step": 1428 + }, + { + "epoch": 0.18, + "learning_rate": 0.000942865813115898, + "loss": 0.4797, + "step": 1429 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009427715455503199, + "loss": 0.5653, + "step": 1430 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009426772050016946, + "loss": 0.5389, + "step": 1431 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009425827914855725, + "loss": 0.7224, + "step": 1432 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009424883050175159, + "loss": 0.5907, + "step": 1433 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009423937456130994, + "loss": 0.6455, + "step": 1434 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009422991132879093, + "loss": 0.0531, + "step": 1435 + }, + { + "epoch": 0.18, + "learning_rate": 0.000942204408057544, + "loss": 0.5776, + "step": 1436 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009421096299376143, + "loss": 0.5348, + "step": 1437 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009420147789437423, + "loss": 0.6985, + "step": 1438 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009419198550915628, + "loss": 0.4391, + "step": 1439 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009418248583967222, + "loss": 0.4604, + "step": 1440 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009417297888748788, + "loss": 0.5219, + "step": 1441 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009416346465417036, + "loss": 0.5972, + "step": 1442 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009415394314128786, + "loss": 0.4495, + "step": 1443 + }, + { + "epoch": 0.18, + "learning_rate": 0.000941444143504099, + "loss": 0.5081, + "step": 1444 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009413487828310706, + "loss": 0.6532, + "step": 1445 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009412533494095124, + "loss": 0.5077, + "step": 1446 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009411578432551546, + "loss": 0.5778, + "step": 1447 + }, + { + "epoch": 0.18, + "learning_rate": 0.00094106226438374, + "loss": 0.6997, + "step": 1448 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009409666128110231, + "loss": 0.718, + "step": 1449 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009408708885527699, + "loss": 0.5255, + "step": 1450 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009407750916247592, + "loss": 0.5085, + "step": 1451 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009406792220427814, + "loss": 0.5294, + "step": 1452 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009405832798226391, + "loss": 0.6182, + "step": 1453 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009404872649801462, + "loss": 0.0527, + "step": 1454 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009403911775311294, + "loss": 0.6698, + "step": 1455 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009402950174914269, + "loss": 0.6017, + "step": 1456 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009401987848768889, + "loss": 0.6066, + "step": 1457 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009401024797033778, + "loss": 0.7206, + "step": 1458 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009400061019867678, + "loss": 0.6249, + "step": 1459 + }, + { + "epoch": 0.18, + "learning_rate": 0.000939909651742945, + "loss": 0.6366, + "step": 1460 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009398131289878076, + "loss": 0.6927, + "step": 1461 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009397165337372653, + "loss": 0.5435, + "step": 1462 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009396198660072406, + "loss": 0.7397, + "step": 1463 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009395231258136673, + "loss": 0.5975, + "step": 1464 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009394263131724911, + "loss": 0.6458, + "step": 1465 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009393294280996702, + "loss": 0.6482, + "step": 1466 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009392324706111741, + "loss": 0.6925, + "step": 1467 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009391354407229847, + "loss": 0.6848, + "step": 1468 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009390383384510956, + "loss": 0.4257, + "step": 1469 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009389411638115124, + "loss": 0.8191, + "step": 1470 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009388439168202525, + "loss": 0.5589, + "step": 1471 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009387465974933454, + "loss": 0.0519, + "step": 1472 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009386492058468327, + "loss": 0.5511, + "step": 1473 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009385517418967673, + "loss": 0.5759, + "step": 1474 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009384542056592147, + "loss": 0.5172, + "step": 1475 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009383565971502519, + "loss": 0.6306, + "step": 1476 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009382589163859681, + "loss": 0.5765, + "step": 1477 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009381611633824639, + "loss": 0.4484, + "step": 1478 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009380633381558525, + "loss": 0.6602, + "step": 1479 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009379654407222584, + "loss": 0.5265, + "step": 1480 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009378674710978184, + "loss": 0.5802, + "step": 1481 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009377694292986811, + "loss": 0.554, + "step": 1482 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009376713153410068, + "loss": 0.5575, + "step": 1483 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009375731292409679, + "loss": 0.4939, + "step": 1484 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009374748710147487, + "loss": 0.5962, + "step": 1485 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009373765406785453, + "loss": 0.4119, + "step": 1486 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009372781382485657, + "loss": 0.7457, + "step": 1487 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009371796637410299, + "loss": 0.6031, + "step": 1488 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009370811171721696, + "loss": 0.5575, + "step": 1489 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009369824985582283, + "loss": 0.4395, + "step": 1490 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009368838079154618, + "loss": 0.4852, + "step": 1491 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009367850452601372, + "loss": 0.5291, + "step": 1492 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009366862106085341, + "loss": 0.595, + "step": 1493 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009365873039769435, + "loss": 0.4481, + "step": 1494 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009364883253816683, + "loss": 0.551, + "step": 1495 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009363892748390235, + "loss": 0.6022, + "step": 1496 + }, + { + "epoch": 0.19, + "learning_rate": 0.000936290152365336, + "loss": 0.6709, + "step": 1497 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009361909579769439, + "loss": 0.5883, + "step": 1498 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009360916916901981, + "loss": 0.5326, + "step": 1499 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009359923535214608, + "loss": 0.4615, + "step": 1500 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009358929434871057, + "loss": 0.5529, + "step": 1501 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009357934616035193, + "loss": 0.6987, + "step": 1502 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009356939078870994, + "loss": 0.7266, + "step": 1503 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009355942823542554, + "loss": 0.5562, + "step": 1504 + }, + { + "epoch": 0.19, + "learning_rate": 0.000935494585021409, + "loss": 0.6453, + "step": 1505 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009353948159049933, + "loss": 0.5494, + "step": 1506 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009352949750214537, + "loss": 0.5287, + "step": 1507 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009351950623872472, + "loss": 0.5111, + "step": 1508 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009350950780188425, + "loss": 0.4427, + "step": 1509 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009349950219327205, + "loss": 0.5168, + "step": 1510 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009348948941453734, + "loss": 0.6724, + "step": 1511 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009347946946733055, + "loss": 0.5542, + "step": 1512 + }, + { + "epoch": 0.19, + "learning_rate": 0.000934694423533033, + "loss": 0.6335, + "step": 1513 + }, + { + "epoch": 0.19, + "learning_rate": 0.000934594080741084, + "loss": 0.5789, + "step": 1514 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009344936663139978, + "loss": 0.5348, + "step": 1515 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009343931802683265, + "loss": 0.4823, + "step": 1516 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009342926226206329, + "loss": 0.6554, + "step": 1517 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009341919933874924, + "loss": 0.5302, + "step": 1518 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009340912925854918, + "loss": 0.5988, + "step": 1519 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009339905202312302, + "loss": 0.5604, + "step": 1520 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009338896763413179, + "loss": 0.6373, + "step": 1521 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009337887609323772, + "loss": 0.579, + "step": 1522 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009336877740210423, + "loss": 0.6472, + "step": 1523 + }, + { + "epoch": 0.19, + "learning_rate": 0.000933586715623959, + "loss": 0.5867, + "step": 1524 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009334855857577851, + "loss": 0.479, + "step": 1525 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009333843844391898, + "loss": 0.9474, + "step": 1526 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009332831116848548, + "loss": 0.564, + "step": 1527 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009331817675114727, + "loss": 0.6307, + "step": 1528 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009330803519357486, + "loss": 0.545, + "step": 1529 + }, + { + "epoch": 0.19, + "learning_rate": 0.000932978864974399, + "loss": 0.4666, + "step": 1530 + }, + { + "epoch": 0.19, + "learning_rate": 0.000932877306644152, + "loss": 0.5873, + "step": 1531 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009327756769617479, + "loss": 0.4772, + "step": 1532 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009326739759439387, + "loss": 0.6613, + "step": 1533 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009325722036074875, + "loss": 0.5155, + "step": 1534 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009324703599691702, + "loss": 0.5634, + "step": 1535 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009323684450457737, + "loss": 0.562, + "step": 1536 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009322664588540968, + "loss": 0.7212, + "step": 1537 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009321644014109501, + "loss": 0.5271, + "step": 1538 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009320622727331563, + "loss": 0.5647, + "step": 1539 + }, + { + "epoch": 0.19, + "learning_rate": 0.000931960072837549, + "loss": 0.5182, + "step": 1540 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009318578017409744, + "loss": 0.6093, + "step": 1541 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009317554594602901, + "loss": 0.536, + "step": 1542 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009316530460123651, + "loss": 0.5763, + "step": 1543 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009315505614140808, + "loss": 0.5106, + "step": 1544 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009314480056823298, + "loss": 0.6124, + "step": 1545 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009313453788340164, + "loss": 0.5051, + "step": 1546 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009312426808860573, + "loss": 0.4912, + "step": 1547 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009311399118553798, + "loss": 0.5392, + "step": 1548 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009310370717589242, + "loss": 0.8046, + "step": 1549 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009309341606136414, + "loss": 0.6233, + "step": 1550 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009308311784364949, + "loss": 0.5648, + "step": 1551 + }, + { + "epoch": 0.19, + "learning_rate": 0.000930728125244459, + "loss": 0.6122, + "step": 1552 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009306250010545208, + "loss": 0.5787, + "step": 1553 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009305218058836777, + "loss": 0.4167, + "step": 1554 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009304185397489403, + "loss": 0.4808, + "step": 1555 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009303152026673298, + "loss": 0.4683, + "step": 1556 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009302117946558796, + "loss": 0.4865, + "step": 1557 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009301083157316349, + "loss": 0.5798, + "step": 1558 + }, + { + "epoch": 0.2, + "learning_rate": 0.000930004765911652, + "loss": 0.62, + "step": 1559 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009299011452129995, + "loss": 0.67, + "step": 1560 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009297974536527573, + "loss": 0.53, + "step": 1561 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009296936912480172, + "loss": 0.6058, + "step": 1562 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009295898580158826, + "loss": 0.4821, + "step": 1563 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009294859539734683, + "loss": 0.5001, + "step": 1564 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009293819791379016, + "loss": 0.6562, + "step": 1565 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009292779335263204, + "loss": 0.4928, + "step": 1566 + }, + { + "epoch": 0.2, + "learning_rate": 0.000929173817155875, + "loss": 0.5839, + "step": 1567 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009290696300437272, + "loss": 0.5859, + "step": 1568 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009289653722070501, + "loss": 0.6339, + "step": 1569 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009288610436630289, + "loss": 0.5673, + "step": 1570 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009287566444288606, + "loss": 0.5939, + "step": 1571 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009286521745217534, + "loss": 0.4991, + "step": 1572 + }, + { + "epoch": 0.2, + "learning_rate": 0.000928547633958927, + "loss": 0.4623, + "step": 1573 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009284430227576135, + "loss": 0.5845, + "step": 1574 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009283383409350559, + "loss": 0.6709, + "step": 1575 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009282335885085094, + "loss": 0.5274, + "step": 1576 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009281287654952403, + "loss": 0.8302, + "step": 1577 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009280238719125269, + "loss": 0.5778, + "step": 1578 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009279189077776593, + "loss": 0.5201, + "step": 1579 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009278138731079386, + "loss": 0.6333, + "step": 1580 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009277087679206783, + "loss": 0.4254, + "step": 1581 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009276035922332027, + "loss": 0.6949, + "step": 1582 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009274983460628485, + "loss": 0.5187, + "step": 1583 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009273930294269635, + "loss": 0.7378, + "step": 1584 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009272876423429074, + "loss": 0.5288, + "step": 1585 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009271821848280513, + "loss": 0.5054, + "step": 1586 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009270766568997779, + "loss": 0.5936, + "step": 1587 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009269710585754818, + "loss": 0.662, + "step": 1588 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009268653898725689, + "loss": 0.5411, + "step": 1589 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009267596508084568, + "loss": 0.484, + "step": 1590 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009266538414005749, + "loss": 0.5737, + "step": 1591 + }, + { + "epoch": 0.2, + "learning_rate": 0.000926547961666364, + "loss": 0.6486, + "step": 1592 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009264420116232761, + "loss": 0.6699, + "step": 1593 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009263359912887755, + "loss": 0.7415, + "step": 1594 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009262299006803378, + "loss": 0.5831, + "step": 1595 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009261237398154501, + "loss": 0.4941, + "step": 1596 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009260175087116111, + "loss": 0.4736, + "step": 1597 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009259112073863312, + "loss": 0.6881, + "step": 1598 + }, + { + "epoch": 0.2, + "learning_rate": 0.000925804835857132, + "loss": 0.5234, + "step": 1599 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009256983941415475, + "loss": 0.4498, + "step": 1600 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009255918822571223, + "loss": 0.5929, + "step": 1601 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009254853002214132, + "loss": 0.5177, + "step": 1602 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009253786480519881, + "loss": 0.576, + "step": 1603 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009252719257664271, + "loss": 0.6047, + "step": 1604 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009251651333823213, + "loss": 0.054, + "step": 1605 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009250582709172735, + "loss": 0.5662, + "step": 1606 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009249513383888981, + "loss": 0.5204, + "step": 1607 + }, + { + "epoch": 0.2, + "learning_rate": 0.000924844335814821, + "loss": 0.631, + "step": 1608 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009247372632126798, + "loss": 0.7469, + "step": 1609 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009246301206001233, + "loss": 0.5701, + "step": 1610 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009245229079948123, + "loss": 0.714, + "step": 1611 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009244156254144186, + "loss": 0.5836, + "step": 1612 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009243082728766263, + "loss": 0.575, + "step": 1613 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009242008503991302, + "loss": 0.6564, + "step": 1614 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009240933579996372, + "loss": 0.5175, + "step": 1615 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009239857956958653, + "loss": 0.4225, + "step": 1616 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009238781635055443, + "loss": 0.7589, + "step": 1617 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009237704614464157, + "loss": 0.0519, + "step": 1618 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009236626895362319, + "loss": 0.4551, + "step": 1619 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009235548477927575, + "loss": 0.5857, + "step": 1620 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009234469362337681, + "loss": 0.4445, + "step": 1621 + }, + { + "epoch": 0.2, + "learning_rate": 0.000923338954877051, + "loss": 0.5363, + "step": 1622 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009232309037404054, + "loss": 0.4723, + "step": 1623 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009231227828416411, + "loss": 0.5148, + "step": 1624 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009230145921985802, + "loss": 0.4255, + "step": 1625 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009229063318290558, + "loss": 0.4758, + "step": 1626 + }, + { + "epoch": 0.2, + "learning_rate": 0.000922798001750913, + "loss": 0.0527, + "step": 1627 + }, + { + "epoch": 0.2, + "learning_rate": 0.000922689601982008, + "loss": 0.5287, + "step": 1628 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009225811325402085, + "loss": 0.5614, + "step": 1629 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009224725934433937, + "loss": 0.0524, + "step": 1630 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009223639847094544, + "loss": 0.5261, + "step": 1631 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009222553063562931, + "loss": 0.4844, + "step": 1632 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009221465584018231, + "loss": 0.662, + "step": 1633 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009220377408639698, + "loss": 0.4705, + "step": 1634 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009219288537606698, + "loss": 0.8004, + "step": 1635 + }, + { + "epoch": 0.21, + "learning_rate": 0.000921819897109871, + "loss": 0.4448, + "step": 1636 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009217108709295334, + "loss": 0.5111, + "step": 1637 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009216017752376278, + "loss": 0.0519, + "step": 1638 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009214926100521367, + "loss": 0.5522, + "step": 1639 + }, + { + "epoch": 0.21, + "learning_rate": 0.000921383375391054, + "loss": 0.437, + "step": 1640 + }, + { + "epoch": 0.21, + "learning_rate": 0.000921274071272385, + "loss": 0.5017, + "step": 1641 + }, + { + "epoch": 0.21, + "learning_rate": 0.000921164697714147, + "loss": 0.6349, + "step": 1642 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009210552547343678, + "loss": 0.6415, + "step": 1643 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009209457423510874, + "loss": 0.6401, + "step": 1644 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009208361605823567, + "loss": 0.0519, + "step": 1645 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009207265094462387, + "loss": 0.4943, + "step": 1646 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009206167889608072, + "loss": 0.7776, + "step": 1647 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009205069991441479, + "loss": 0.051, + "step": 1648 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009203971400143574, + "loss": 0.5321, + "step": 1649 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009202872115895442, + "loss": 0.6797, + "step": 1650 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009201772138878281, + "loss": 0.5428, + "step": 1651 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009200671469273402, + "loss": 0.6003, + "step": 1652 + }, + { + "epoch": 0.21, + "learning_rate": 0.000919957010726223, + "loss": 0.5367, + "step": 1653 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009198468053026307, + "loss": 0.5887, + "step": 1654 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009197365306747285, + "loss": 0.5653, + "step": 1655 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009196261868606935, + "loss": 0.5519, + "step": 1656 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009195157738787136, + "loss": 0.5601, + "step": 1657 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009194052917469886, + "loss": 0.571, + "step": 1658 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009192947404837295, + "loss": 0.5275, + "step": 1659 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009191841201071588, + "loss": 0.5869, + "step": 1660 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009190734306355102, + "loss": 0.6268, + "step": 1661 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009189626720870288, + "loss": 0.5939, + "step": 1662 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009188518444799713, + "loss": 0.577, + "step": 1663 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009187409478326059, + "loss": 0.4994, + "step": 1664 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009186299821632116, + "loss": 0.6838, + "step": 1665 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009185189474900793, + "loss": 0.4855, + "step": 1666 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009184078438315111, + "loss": 0.5618, + "step": 1667 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009182966712058203, + "loss": 0.5278, + "step": 1668 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009181854296313319, + "loss": 0.421, + "step": 1669 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009180741191263822, + "loss": 0.5415, + "step": 1670 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009179627397093185, + "loss": 0.6277, + "step": 1671 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009178512913984997, + "loss": 0.7073, + "step": 1672 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009177397742122966, + "loss": 0.4784, + "step": 1673 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009176281881690902, + "loss": 0.5017, + "step": 1674 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009175165332872739, + "loss": 0.4269, + "step": 1675 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009174048095852517, + "loss": 0.5992, + "step": 1676 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009172930170814397, + "loss": 0.5933, + "step": 1677 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009171811557942644, + "loss": 0.5941, + "step": 1678 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009170692257421647, + "loss": 0.5837, + "step": 1679 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009169572269435897, + "loss": 0.4717, + "step": 1680 + }, + { + "epoch": 0.21, + "learning_rate": 0.000916845159417001, + "loss": 0.4526, + "step": 1681 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009167330231808706, + "loss": 0.5096, + "step": 1682 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009166208182536822, + "loss": 0.4994, + "step": 1683 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009165085446539307, + "loss": 0.6576, + "step": 1684 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009163962024001227, + "loss": 0.499, + "step": 1685 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009162837915107758, + "loss": 0.6899, + "step": 1686 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009161713120044186, + "loss": 0.0517, + "step": 1687 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009160587638995917, + "loss": 0.6239, + "step": 1688 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009159461472148465, + "loss": 0.5941, + "step": 1689 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009158334619687459, + "loss": 0.5037, + "step": 1690 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009157207081798642, + "loss": 0.5887, + "step": 1691 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009156078858667866, + "loss": 0.6207, + "step": 1692 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009154949950481102, + "loss": 0.4558, + "step": 1693 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009153820357424427, + "loss": 0.4866, + "step": 1694 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009152690079684036, + "loss": 0.5071, + "step": 1695 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009151559117446236, + "loss": 0.5449, + "step": 1696 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009150427470897445, + "loss": 0.582, + "step": 1697 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009149295140224194, + "loss": 0.6422, + "step": 1698 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009148162125613131, + "loss": 0.5394, + "step": 1699 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914702842725101, + "loss": 0.725, + "step": 1700 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009145894045324701, + "loss": 0.5503, + "step": 1701 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914475898002119, + "loss": 0.4528, + "step": 1702 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009143623231527571, + "loss": 0.6364, + "step": 1703 + }, + { + "epoch": 0.21, + "learning_rate": 0.000914248680003105, + "loss": 0.5347, + "step": 1704 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009141349685718949, + "loss": 0.5708, + "step": 1705 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009140211888778704, + "loss": 0.5118, + "step": 1706 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009139073409397856, + "loss": 0.4308, + "step": 1707 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009137934247764065, + "loss": 0.5687, + "step": 1708 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009136794404065102, + "loss": 0.7124, + "step": 1709 + }, + { + "epoch": 0.21, + "learning_rate": 0.000913565387848885, + "loss": 0.5923, + "step": 1710 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009134512671223304, + "loss": 0.6205, + "step": 1711 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009133370782456572, + "loss": 0.6698, + "step": 1712 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009132228212376874, + "loss": 0.5165, + "step": 1713 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009131084961172544, + "loss": 0.6536, + "step": 1714 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009129941029032025, + "loss": 0.5057, + "step": 1715 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009128796416143873, + "loss": 0.6499, + "step": 1716 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009127651122696759, + "loss": 0.5008, + "step": 1717 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009126505148879465, + "loss": 0.5076, + "step": 1718 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009125358494880882, + "loss": 0.5183, + "step": 1719 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009124211160890016, + "loss": 0.4377, + "step": 1720 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009123063147095988, + "loss": 0.6486, + "step": 1721 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009121914453688024, + "loss": 0.5352, + "step": 1722 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009120765080855468, + "loss": 0.5616, + "step": 1723 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009119615028787771, + "loss": 0.6666, + "step": 1724 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009118464297674503, + "loss": 0.4502, + "step": 1725 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009117312887705338, + "loss": 0.4817, + "step": 1726 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009116160799070065, + "loss": 0.6177, + "step": 1727 + }, + { + "epoch": 0.22, + "learning_rate": 0.000911500803195859, + "loss": 0.498, + "step": 1728 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009113854586560921, + "loss": 0.6824, + "step": 1729 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009112700463067187, + "loss": 0.5056, + "step": 1730 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009111545661667623, + "loss": 0.4966, + "step": 1731 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009110390182552578, + "loss": 0.6578, + "step": 1732 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009109234025912511, + "loss": 0.6534, + "step": 1733 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009108077191937997, + "loss": 0.4249, + "step": 1734 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009106919680819716, + "loss": 0.5173, + "step": 1735 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009105761492748466, + "loss": 0.4297, + "step": 1736 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009104602627915151, + "loss": 0.5226, + "step": 1737 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009103443086510792, + "loss": 0.6411, + "step": 1738 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009102282868726517, + "loss": 0.5381, + "step": 1739 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009101121974753568, + "loss": 0.5352, + "step": 1740 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090999604047833, + "loss": 0.5514, + "step": 1741 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009098798159007174, + "loss": 0.5748, + "step": 1742 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009097635237616767, + "loss": 0.5719, + "step": 1743 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009096471640803765, + "loss": 0.5728, + "step": 1744 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009095307368759968, + "loss": 0.379, + "step": 1745 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009094142421677285, + "loss": 0.6588, + "step": 1746 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009092976799747736, + "loss": 0.6067, + "step": 1747 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009091810503163456, + "loss": 0.7541, + "step": 1748 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009090643532116683, + "loss": 0.4622, + "step": 1749 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009089475886799776, + "loss": 0.5068, + "step": 1750 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090883075674052, + "loss": 0.5424, + "step": 1751 + }, + { + "epoch": 0.22, + "learning_rate": 0.000908713857412553, + "loss": 0.0513, + "step": 1752 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009085968907153455, + "loss": 0.4042, + "step": 1753 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009084798566681775, + "loss": 0.4408, + "step": 1754 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009083627552903397, + "loss": 0.5451, + "step": 1755 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009082455866011345, + "loss": 0.5773, + "step": 1756 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009081283506198748, + "loss": 0.5063, + "step": 1757 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009080110473658851, + "loss": 0.5839, + "step": 1758 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009078936768585007, + "loss": 0.4617, + "step": 1759 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009077762391170681, + "loss": 0.5026, + "step": 1760 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009076587341609446, + "loss": 0.4744, + "step": 1761 + }, + { + "epoch": 0.22, + "learning_rate": 0.000907541162009499, + "loss": 0.5879, + "step": 1762 + }, + { + "epoch": 0.22, + "learning_rate": 0.000907423522682111, + "loss": 0.5271, + "step": 1763 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009073058161981713, + "loss": 0.434, + "step": 1764 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009071880425770818, + "loss": 0.5631, + "step": 1765 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009070702018382553, + "loss": 0.5903, + "step": 1766 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009069522940011158, + "loss": 0.5066, + "step": 1767 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009068343190850983, + "loss": 0.7264, + "step": 1768 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009067162771096489, + "loss": 0.3712, + "step": 1769 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009065981680942248, + "loss": 0.5786, + "step": 1770 + }, + { + "epoch": 0.22, + "learning_rate": 0.000906479992058294, + "loss": 0.0522, + "step": 1771 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009063617490213358, + "loss": 0.6545, + "step": 1772 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009062434390028406, + "loss": 0.6016, + "step": 1773 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009061250620223096, + "loss": 0.4541, + "step": 1774 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009060066180992553, + "loss": 0.5338, + "step": 1775 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009058881072532007, + "loss": 0.4325, + "step": 1776 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009057695295036805, + "loss": 0.4965, + "step": 1777 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009056508848702402, + "loss": 0.5067, + "step": 1778 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009055321733724361, + "loss": 0.4049, + "step": 1779 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009054133950298358, + "loss": 0.6248, + "step": 1780 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009052945498620177, + "loss": 0.0521, + "step": 1781 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009051756378885714, + "loss": 0.4999, + "step": 1782 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009050566591290974, + "loss": 0.6401, + "step": 1783 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009049376136032073, + "loss": 0.0518, + "step": 1784 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009048185013305236, + "loss": 0.5146, + "step": 1785 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009046993223306799, + "loss": 0.4925, + "step": 1786 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009045800766233205, + "loss": 0.6403, + "step": 1787 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009044607642281013, + "loss": 0.5576, + "step": 1788 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009043413851646887, + "loss": 0.4806, + "step": 1789 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090422193945276, + "loss": 0.5482, + "step": 1790 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009041024271120041, + "loss": 0.0515, + "step": 1791 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009039828481621202, + "loss": 0.5978, + "step": 1792 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009038632026228189, + "loss": 0.5646, + "step": 1793 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009037434905138217, + "loss": 0.6462, + "step": 1794 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009036237118548609, + "loss": 0.5165, + "step": 1795 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009035038666656798, + "loss": 0.5118, + "step": 1796 + }, + { + "epoch": 0.23, + "learning_rate": 0.000903383954966033, + "loss": 0.4723, + "step": 1797 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009032639767756858, + "loss": 0.6158, + "step": 1798 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009031439321144142, + "loss": 0.5724, + "step": 1799 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009030238210020058, + "loss": 0.5025, + "step": 1800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009029036434582585, + "loss": 0.5083, + "step": 1801 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009027833995029816, + "loss": 0.4801, + "step": 1802 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009026630891559951, + "loss": 0.5807, + "step": 1803 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009025427124371301, + "loss": 0.5375, + "step": 1804 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009024222693662286, + "loss": 0.4376, + "step": 1805 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009023017599631433, + "loss": 0.5475, + "step": 1806 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009021811842477383, + "loss": 0.5409, + "step": 1807 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009020605422398883, + "loss": 0.4633, + "step": 1808 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009019398339594789, + "loss": 0.4664, + "step": 1809 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009018190594264068, + "loss": 0.0506, + "step": 1810 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009016982186605796, + "loss": 0.5272, + "step": 1811 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009015773116819155, + "loss": 0.4983, + "step": 1812 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009014563385103441, + "loss": 0.5542, + "step": 1813 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009013352991658057, + "loss": 0.4508, + "step": 1814 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009012141936682514, + "loss": 0.4967, + "step": 1815 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009010930220376435, + "loss": 0.0509, + "step": 1816 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009009717842939545, + "loss": 0.5548, + "step": 1817 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009008504804571689, + "loss": 0.5282, + "step": 1818 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009007291105472811, + "loss": 0.051, + "step": 1819 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009006076745842969, + "loss": 0.5782, + "step": 1820 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009004861725882329, + "loss": 0.4257, + "step": 1821 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009003646045791165, + "loss": 0.665, + "step": 1822 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009002429705769859, + "loss": 0.5685, + "step": 1823 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009001212706018907, + "loss": 0.5051, + "step": 1824 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008999995046738904, + "loss": 0.5714, + "step": 1825 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008998776728130567, + "loss": 0.6807, + "step": 1826 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008997557750394707, + "loss": 0.6306, + "step": 1827 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008996338113732256, + "loss": 0.5573, + "step": 1828 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008995117818344248, + "loss": 0.7041, + "step": 1829 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008993896864431826, + "loss": 0.6202, + "step": 1830 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008992675252196242, + "loss": 0.8269, + "step": 1831 + }, + { + "epoch": 0.23, + "learning_rate": 0.000899145298183886, + "loss": 0.6689, + "step": 1832 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008990230053561147, + "loss": 0.6364, + "step": 1833 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008989006467564683, + "loss": 0.426, + "step": 1834 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008987782224051153, + "loss": 0.5311, + "step": 1835 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008986557323222353, + "loss": 0.6853, + "step": 1836 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008985331765280185, + "loss": 0.4771, + "step": 1837 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008984105550426662, + "loss": 0.5121, + "step": 1838 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008982878678863903, + "loss": 0.5396, + "step": 1839 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008981651150794136, + "loss": 0.4442, + "step": 1840 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008980422966419697, + "loss": 0.4344, + "step": 1841 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008979194125943031, + "loss": 0.4918, + "step": 1842 + }, + { + "epoch": 0.23, + "learning_rate": 0.000897796462956669, + "loss": 0.5122, + "step": 1843 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008976734477493335, + "loss": 0.629, + "step": 1844 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008975503669925735, + "loss": 0.5018, + "step": 1845 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008974272207066767, + "loss": 0.424, + "step": 1846 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008973040089119415, + "loss": 0.5634, + "step": 1847 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008971807316286772, + "loss": 0.0505, + "step": 1848 + }, + { + "epoch": 0.23, + "learning_rate": 0.000897057388877204, + "loss": 0.4725, + "step": 1849 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008969339806778527, + "loss": 0.6495, + "step": 1850 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008968105070509649, + "loss": 0.4594, + "step": 1851 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008966869680168929, + "loss": 0.4789, + "step": 1852 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008965633635960003, + "loss": 0.6367, + "step": 1853 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008964396938086609, + "loss": 0.5248, + "step": 1854 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008963159586752594, + "loss": 0.4185, + "step": 1855 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008961921582161914, + "loss": 0.5452, + "step": 1856 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008960682924518633, + "loss": 0.639, + "step": 1857 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008959443614026919, + "loss": 0.5219, + "step": 1858 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008958203650891055, + "loss": 0.5242, + "step": 1859 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008956963035315423, + "loss": 0.6188, + "step": 1860 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008955721767504519, + "loss": 0.588, + "step": 1861 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008954479847662942, + "loss": 0.5305, + "step": 1862 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008953237275995401, + "loss": 0.5801, + "step": 1863 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008951994052706714, + "loss": 0.5749, + "step": 1864 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008950750178001801, + "loss": 0.3922, + "step": 1865 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008949505652085694, + "loss": 0.5933, + "step": 1866 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008948260475163533, + "loss": 0.5327, + "step": 1867 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008947014647440561, + "loss": 0.614, + "step": 1868 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008945768169122132, + "loss": 0.5104, + "step": 1869 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008944521040413705, + "loss": 0.6284, + "step": 1870 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008943273261520848, + "loss": 0.5751, + "step": 1871 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008942024832649234, + "loss": 0.4937, + "step": 1872 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008940775754004644, + "loss": 0.5573, + "step": 1873 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008939526025792968, + "loss": 0.4355, + "step": 1874 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008938275648220202, + "loss": 0.4141, + "step": 1875 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008937024621492448, + "loss": 0.6173, + "step": 1876 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008935772945815914, + "loss": 0.5179, + "step": 1877 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008934520621396919, + "loss": 0.6475, + "step": 1878 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008933267648441885, + "loss": 0.5135, + "step": 1879 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008932014027157343, + "loss": 0.5039, + "step": 1880 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008930759757749929, + "loss": 0.5557, + "step": 1881 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008929504840426388, + "loss": 0.5558, + "step": 1882 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008928249275393571, + "loss": 0.4713, + "step": 1883 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008926993062858437, + "loss": 0.4325, + "step": 1884 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008925736203028049, + "loss": 0.6329, + "step": 1885 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008924478696109577, + "loss": 0.4675, + "step": 1886 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008923220542310301, + "loss": 0.5529, + "step": 1887 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008921961741837603, + "loss": 0.4991, + "step": 1888 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008920702294898976, + "loss": 0.5214, + "step": 1889 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008919442201702016, + "loss": 0.5637, + "step": 1890 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008918181462454429, + "loss": 0.5676, + "step": 1891 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008916920077364024, + "loss": 0.5408, + "step": 1892 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008915658046638718, + "loss": 0.5483, + "step": 1893 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008914395370486534, + "loss": 0.4774, + "step": 1894 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008913132049115603, + "loss": 0.8521, + "step": 1895 + }, + { + "epoch": 0.24, + "learning_rate": 0.000891186808273416, + "loss": 0.5951, + "step": 1896 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008910603471550549, + "loss": 0.4761, + "step": 1897 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008909338215773218, + "loss": 0.0518, + "step": 1898 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008908072315610723, + "loss": 0.5693, + "step": 1899 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008906805771271723, + "loss": 0.0518, + "step": 1900 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008905538582964985, + "loss": 0.5476, + "step": 1901 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008904270750899386, + "loss": 0.6503, + "step": 1902 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008903002275283902, + "loss": 0.5498, + "step": 1903 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008901733156327623, + "loss": 0.4637, + "step": 1904 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008900463394239738, + "loss": 0.548, + "step": 1905 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008899192989229543, + "loss": 0.5278, + "step": 1906 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008897921941506444, + "loss": 0.576, + "step": 1907 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008896650251279952, + "loss": 0.5311, + "step": 1908 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008895377918759679, + "loss": 0.4908, + "step": 1909 + }, + { + "epoch": 0.24, + "learning_rate": 0.000889410494415535, + "loss": 0.5198, + "step": 1910 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008892831327676792, + "loss": 0.45, + "step": 1911 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008891557069533936, + "loss": 0.6166, + "step": 1912 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008890282169936821, + "loss": 0.7622, + "step": 1913 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008889006629095593, + "loss": 0.4478, + "step": 1914 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008887730447220503, + "loss": 0.4731, + "step": 1915 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008886453624521905, + "loss": 0.61, + "step": 1916 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008885176161210262, + "loss": 0.5439, + "step": 1917 + }, + { + "epoch": 0.24, + "learning_rate": 0.000888389805749614, + "loss": 0.5286, + "step": 1918 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008882619313590213, + "loss": 0.5873, + "step": 1919 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008881339929703258, + "loss": 0.4283, + "step": 1920 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008880059906046159, + "loss": 0.6442, + "step": 1921 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008878779242829907, + "loss": 0.5176, + "step": 1922 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008877497940265594, + "loss": 0.7172, + "step": 1923 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008876215998564423, + "loss": 0.5916, + "step": 1924 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008874933417937697, + "loss": 0.5649, + "step": 1925 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008873650198596828, + "loss": 0.3606, + "step": 1926 + }, + { + "epoch": 0.24, + "learning_rate": 0.000887236634075333, + "loss": 0.4585, + "step": 1927 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008871081844618827, + "loss": 0.5835, + "step": 1928 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008869796710405043, + "loss": 0.4242, + "step": 1929 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008868510938323811, + "loss": 0.4971, + "step": 1930 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008867224528587068, + "loss": 0.4905, + "step": 1931 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008865937481406857, + "loss": 0.5599, + "step": 1932 + }, + { + "epoch": 0.24, + "learning_rate": 0.000886464979699532, + "loss": 0.4967, + "step": 1933 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008863361475564713, + "loss": 0.7499, + "step": 1934 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008862072517327393, + "loss": 0.4957, + "step": 1935 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008860782922495822, + "loss": 0.6858, + "step": 1936 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008859492691282565, + "loss": 0.5938, + "step": 1937 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008858201823900295, + "loss": 0.5404, + "step": 1938 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008856910320561788, + "loss": 0.6438, + "step": 1939 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008855618181479925, + "loss": 0.5388, + "step": 1940 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008854325406867695, + "loss": 0.4932, + "step": 1941 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008853031996938186, + "loss": 0.5417, + "step": 1942 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008851737951904594, + "loss": 0.5414, + "step": 1943 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008850443271980222, + "loss": 0.4589, + "step": 1944 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008849147957378472, + "loss": 0.5419, + "step": 1945 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008847852008312856, + "loss": 0.5219, + "step": 1946 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008846555424996987, + "loss": 0.6095, + "step": 1947 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008845258207644584, + "loss": 0.4997, + "step": 1948 + }, + { + "epoch": 0.24, + "learning_rate": 0.000884396035646947, + "loss": 0.5797, + "step": 1949 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008842661871685573, + "loss": 0.4948, + "step": 1950 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008841362753506925, + "loss": 0.4602, + "step": 1951 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008840063002147662, + "loss": 0.6107, + "step": 1952 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008838762617822029, + "loss": 0.605, + "step": 1953 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008837461600744365, + "loss": 0.4519, + "step": 1954 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008836159951129126, + "loss": 0.4156, + "step": 1955 + }, + { + "epoch": 0.25, + "learning_rate": 0.000883485766919086, + "loss": 0.5273, + "step": 1956 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008833554755144228, + "loss": 0.5308, + "step": 1957 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008832251209203994, + "loss": 0.448, + "step": 1958 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008830947031585022, + "loss": 0.5015, + "step": 1959 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008829642222502282, + "loss": 0.5183, + "step": 1960 + }, + { + "epoch": 0.25, + "learning_rate": 0.000882833678217085, + "loss": 0.5284, + "step": 1961 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008827030710805905, + "loss": 0.538, + "step": 1962 + }, + { + "epoch": 0.25, + "learning_rate": 0.000882572400862273, + "loss": 0.3646, + "step": 1963 + }, + { + "epoch": 0.25, + "learning_rate": 0.000882441667583671, + "loss": 0.496, + "step": 1964 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008823108712663338, + "loss": 0.5344, + "step": 1965 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008821800119318206, + "loss": 0.502, + "step": 1966 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008820490896017015, + "loss": 0.4606, + "step": 1967 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008819181042975565, + "loss": 0.4769, + "step": 1968 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008817870560409763, + "loss": 0.4505, + "step": 1969 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008816559448535619, + "loss": 0.5635, + "step": 1970 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008815247707569247, + "loss": 0.5099, + "step": 1971 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008813935337726862, + "loss": 0.5518, + "step": 1972 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008812622339224789, + "loss": 0.4662, + "step": 1973 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008811308712279448, + "loss": 0.5894, + "step": 1974 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008809994457107371, + "loss": 0.5577, + "step": 1975 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008808679573925187, + "loss": 0.4551, + "step": 1976 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008807364062949632, + "loss": 0.4484, + "step": 1977 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008806047924397545, + "loss": 0.3859, + "step": 1978 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008804731158485868, + "loss": 0.5159, + "step": 1979 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008803413765431647, + "loss": 0.4716, + "step": 1980 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008802095745452031, + "loss": 0.0506, + "step": 1981 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008800777098764271, + "loss": 0.6703, + "step": 1982 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008799457825585723, + "loss": 0.5804, + "step": 1983 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008798137926133847, + "loss": 0.423, + "step": 1984 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008796817400626202, + "loss": 0.494, + "step": 1985 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008795496249280458, + "loss": 0.5912, + "step": 1986 + }, + { + "epoch": 0.25, + "learning_rate": 0.000879417447231438, + "loss": 0.5642, + "step": 1987 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008792852069945841, + "loss": 0.4414, + "step": 1988 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008791529042392813, + "loss": 0.5382, + "step": 1989 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008790205389873376, + "loss": 0.4036, + "step": 1990 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008788881112605712, + "loss": 0.4675, + "step": 1991 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008787556210808101, + "loss": 0.5317, + "step": 1992 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008786230684698933, + "loss": 0.5961, + "step": 1993 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008784904534496694, + "loss": 0.5199, + "step": 1994 + }, + { + "epoch": 0.25, + "learning_rate": 0.000878357776041998, + "loss": 0.5504, + "step": 1995 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008782250362687483, + "loss": 0.5636, + "step": 1996 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008780922341518004, + "loss": 0.4314, + "step": 1997 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008779593697130441, + "loss": 0.4603, + "step": 1998 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008778264429743801, + "loss": 0.6467, + "step": 1999 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008776934539577186, + "loss": 0.6802, + "step": 2000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008775604026849808, + "loss": 0.5192, + "step": 2001 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008774272891780977, + "loss": 0.5413, + "step": 2002 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008772941134590108, + "loss": 0.5618, + "step": 2003 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008771608755496716, + "loss": 0.5311, + "step": 2004 + }, + { + "epoch": 0.25, + "learning_rate": 0.000877027575472042, + "loss": 0.5252, + "step": 2005 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008768942132480946, + "loss": 0.4827, + "step": 2006 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008767607888998111, + "loss": 0.5858, + "step": 2007 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008766273024491847, + "loss": 0.418, + "step": 2008 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008764937539182181, + "loss": 0.5851, + "step": 2009 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008763601433289243, + "loss": 0.6395, + "step": 2010 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008762264707033267, + "loss": 0.4818, + "step": 2011 + }, + { + "epoch": 0.25, + "learning_rate": 0.000876092736063459, + "loss": 0.5975, + "step": 2012 + }, + { + "epoch": 0.25, + "learning_rate": 0.000875958939431365, + "loss": 0.4856, + "step": 2013 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008758250808290983, + "loss": 0.4823, + "step": 2014 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008756911602787235, + "loss": 0.6399, + "step": 2015 + }, + { + "epoch": 0.25, + "learning_rate": 0.000875557177802315, + "loss": 0.4747, + "step": 2016 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008754231334219574, + "loss": 0.5199, + "step": 2017 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008752890271597454, + "loss": 0.5807, + "step": 2018 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008751548590377842, + "loss": 0.6212, + "step": 2019 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008750206290781891, + "loss": 0.5021, + "step": 2020 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008748863373030853, + "loss": 0.4744, + "step": 2021 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008747519837346085, + "loss": 0.6571, + "step": 2022 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008746175683949047, + "loss": 0.4071, + "step": 2023 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008744830913061297, + "loss": 0.5771, + "step": 2024 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008743485524904496, + "loss": 0.5654, + "step": 2025 + }, + { + "epoch": 0.25, + "learning_rate": 0.000874213951970041, + "loss": 0.6378, + "step": 2026 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008740792897670901, + "loss": 0.5499, + "step": 2027 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008739445659037939, + "loss": 0.5874, + "step": 2028 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008738097804023591, + "loss": 0.5223, + "step": 2029 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008736749332850026, + "loss": 0.4125, + "step": 2030 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008735400245739516, + "loss": 0.5302, + "step": 2031 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008734050542914436, + "loss": 0.4983, + "step": 2032 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008732700224597259, + "loss": 0.5698, + "step": 2033 + }, + { + "epoch": 0.25, + "learning_rate": 0.000873134929101056, + "loss": 0.5531, + "step": 2034 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008729997742377018, + "loss": 0.4771, + "step": 2035 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008728645578919412, + "loss": 0.5515, + "step": 2036 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008727292800860622, + "loss": 0.5117, + "step": 2037 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008725939408423629, + "loss": 0.538, + "step": 2038 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008724585401831516, + "loss": 0.6324, + "step": 2039 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008723230781307467, + "loss": 0.4451, + "step": 2040 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008721875547074766, + "loss": 0.4474, + "step": 2041 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008720519699356804, + "loss": 0.4866, + "step": 2042 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008719163238377062, + "loss": 0.4707, + "step": 2043 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008717806164359133, + "loss": 0.4506, + "step": 2044 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008716448477526706, + "loss": 0.4996, + "step": 2045 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008715090178103568, + "loss": 0.4159, + "step": 2046 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008713731266313616, + "loss": 0.6976, + "step": 2047 + }, + { + "epoch": 0.26, + "learning_rate": 0.000871237174238084, + "loss": 0.5499, + "step": 2048 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008711011606529333, + "loss": 0.5569, + "step": 2049 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008709650858983289, + "loss": 0.475, + "step": 2050 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008708289499967005, + "loss": 0.5586, + "step": 2051 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008706927529704876, + "loss": 0.4252, + "step": 2052 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008705564948421398, + "loss": 0.4301, + "step": 2053 + }, + { + "epoch": 0.26, + "learning_rate": 0.000870420175634117, + "loss": 0.6125, + "step": 2054 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008702837953688887, + "loss": 0.5972, + "step": 2055 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008701473540689351, + "loss": 0.5427, + "step": 2056 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008700108517567459, + "loss": 0.4797, + "step": 2057 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008698742884548212, + "loss": 0.5651, + "step": 2058 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008697376641856712, + "loss": 0.418, + "step": 2059 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008696009789718156, + "loss": 0.6622, + "step": 2060 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008694642328357849, + "loss": 0.5814, + "step": 2061 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008693274258001192, + "loss": 0.4753, + "step": 2062 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008691905578873684, + "loss": 0.5737, + "step": 2063 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008690536291200932, + "loss": 0.5271, + "step": 2064 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008689166395208636, + "loss": 0.4048, + "step": 2065 + }, + { + "epoch": 0.26, + "learning_rate": 0.00086877958911226, + "loss": 0.5292, + "step": 2066 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008686424779168729, + "loss": 0.4491, + "step": 2067 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008685053059573023, + "loss": 0.5074, + "step": 2068 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008683680732561588, + "loss": 0.53, + "step": 2069 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008682307798360628, + "loss": 0.4608, + "step": 2070 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008680934257196446, + "loss": 0.452, + "step": 2071 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008679560109295446, + "loss": 0.3823, + "step": 2072 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008678185354884132, + "loss": 0.5085, + "step": 2073 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008676809994189108, + "loss": 0.4778, + "step": 2074 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008675434027437077, + "loss": 0.4859, + "step": 2075 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008674057454854843, + "loss": 0.4596, + "step": 2076 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008672680276669312, + "loss": 0.4423, + "step": 2077 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008671302493107485, + "loss": 0.4856, + "step": 2078 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008669924104396465, + "loss": 0.5154, + "step": 2079 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008668545110763458, + "loss": 0.3793, + "step": 2080 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008667165512435762, + "loss": 0.433, + "step": 2081 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008665785309640784, + "loss": 0.3708, + "step": 2082 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008664404502606022, + "loss": 0.6379, + "step": 2083 + }, + { + "epoch": 0.26, + "learning_rate": 0.000866302309155908, + "loss": 0.4597, + "step": 2084 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008661641076727659, + "loss": 0.6405, + "step": 2085 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008660258458339558, + "loss": 0.6937, + "step": 2086 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008658875236622679, + "loss": 0.4093, + "step": 2087 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008657491411805023, + "loss": 0.4944, + "step": 2088 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008656106984114685, + "loss": 0.0522, + "step": 2089 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008654721953779865, + "loss": 0.6057, + "step": 2090 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008653336321028862, + "loss": 0.4926, + "step": 2091 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008651950086090071, + "loss": 0.4908, + "step": 2092 + }, + { + "epoch": 0.26, + "learning_rate": 0.000865056324919199, + "loss": 0.48, + "step": 2093 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008649175810563214, + "loss": 0.4515, + "step": 2094 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008647787770432438, + "loss": 0.8413, + "step": 2095 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008646399129028455, + "loss": 0.609, + "step": 2096 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008645009886580158, + "loss": 0.5398, + "step": 2097 + }, + { + "epoch": 0.26, + "learning_rate": 0.000864362004331654, + "loss": 0.6162, + "step": 2098 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008642229599466691, + "loss": 0.5283, + "step": 2099 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008640838555259801, + "loss": 0.7936, + "step": 2100 + }, + { + "epoch": 0.26, + "learning_rate": 0.000863944691092516, + "loss": 0.6002, + "step": 2101 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008638054666692155, + "loss": 0.566, + "step": 2102 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008636661822790274, + "loss": 0.5166, + "step": 2103 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008635268379449101, + "loss": 0.5584, + "step": 2104 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008633874336898321, + "loss": 0.5288, + "step": 2105 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008632479695367719, + "loss": 0.4742, + "step": 2106 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008631084455087174, + "loss": 0.7123, + "step": 2107 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008629688616286668, + "loss": 0.4697, + "step": 2108 + }, + { + "epoch": 0.26, + "learning_rate": 0.000862829217919628, + "loss": 0.6072, + "step": 2109 + }, + { + "epoch": 0.26, + "learning_rate": 0.000862689514404619, + "loss": 0.4656, + "step": 2110 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008625497511066671, + "loss": 0.598, + "step": 2111 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008624099280488103, + "loss": 0.6152, + "step": 2112 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008622700452540952, + "loss": 0.5001, + "step": 2113 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008621301027455797, + "loss": 0.6161, + "step": 2114 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008619901005463307, + "loss": 0.581, + "step": 2115 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008618500386794248, + "loss": 0.5658, + "step": 2116 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008617099171679488, + "loss": 0.6108, + "step": 2117 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008615697360349994, + "loss": 0.5618, + "step": 2118 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008614294953036831, + "loss": 0.4473, + "step": 2119 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008612891949971158, + "loss": 0.5164, + "step": 2120 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008611488351384235, + "loss": 0.5109, + "step": 2121 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008610084157507423, + "loss": 0.3759, + "step": 2122 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008608679368572176, + "loss": 0.3928, + "step": 2123 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008607273984810051, + "loss": 0.5461, + "step": 2124 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008605868006452699, + "loss": 0.5046, + "step": 2125 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008604461433731872, + "loss": 0.4399, + "step": 2126 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008603054266879417, + "loss": 0.4211, + "step": 2127 + }, + { + "epoch": 0.27, + "learning_rate": 0.000860164650612728, + "loss": 0.5768, + "step": 2128 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008600238151707509, + "loss": 0.5736, + "step": 2129 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008598829203852242, + "loss": 0.5303, + "step": 2130 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008597419662793721, + "loss": 0.5938, + "step": 2131 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008596009528764283, + "loss": 0.5507, + "step": 2132 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008594598801996367, + "loss": 0.5018, + "step": 2133 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008593187482722502, + "loss": 0.7045, + "step": 2134 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008591775571175322, + "loss": 0.4169, + "step": 2135 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008590363067587553, + "loss": 0.0531, + "step": 2136 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008588949972192024, + "loss": 0.4917, + "step": 2137 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008587536285221655, + "loss": 0.6194, + "step": 2138 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008586122006909473, + "loss": 0.4506, + "step": 2139 + }, + { + "epoch": 0.27, + "learning_rate": 0.000858470713748859, + "loss": 0.5397, + "step": 2140 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008583291677192227, + "loss": 0.6067, + "step": 2141 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008581875626253696, + "loss": 0.4999, + "step": 2142 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008580458984906408, + "loss": 0.4752, + "step": 2143 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008579041753383872, + "loss": 0.4746, + "step": 2144 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008577623931919691, + "loss": 0.5765, + "step": 2145 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008576205520747571, + "loss": 0.5688, + "step": 2146 + }, + { + "epoch": 0.27, + "learning_rate": 0.000857478652010131, + "loss": 0.6179, + "step": 2147 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008573366930214806, + "loss": 0.6282, + "step": 2148 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008571946751322051, + "loss": 0.5896, + "step": 2149 + }, + { + "epoch": 0.27, + "learning_rate": 0.000857052598365714, + "loss": 0.4735, + "step": 2150 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008569104627454259, + "loss": 0.6399, + "step": 2151 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008567682682947693, + "loss": 0.5465, + "step": 2152 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008566260150371826, + "loss": 0.4504, + "step": 2153 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008564837029961137, + "loss": 0.0519, + "step": 2154 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008563413321950201, + "loss": 0.5204, + "step": 2155 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008561989026573692, + "loss": 0.5585, + "step": 2156 + }, + { + "epoch": 0.27, + "learning_rate": 0.000856056414406638, + "loss": 0.4142, + "step": 2157 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008559138674663132, + "loss": 0.5157, + "step": 2158 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008557712618598909, + "loss": 0.4268, + "step": 2159 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008556285976108775, + "loss": 0.6051, + "step": 2160 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008554858747427883, + "loss": 0.4838, + "step": 2161 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008553430932791488, + "loss": 0.5905, + "step": 2162 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008552002532434942, + "loss": 0.578, + "step": 2163 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008550573546593686, + "loss": 0.4613, + "step": 2164 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008549143975503269, + "loss": 0.4257, + "step": 2165 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008547713819399325, + "loss": 0.6449, + "step": 2166 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008546283078517596, + "loss": 0.4585, + "step": 2167 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008544851753093909, + "loss": 0.4365, + "step": 2168 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008543419843364197, + "loss": 0.6631, + "step": 2169 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008541987349564479, + "loss": 0.6094, + "step": 2170 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008540554271930884, + "loss": 0.5493, + "step": 2171 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008539120610699622, + "loss": 0.5366, + "step": 2172 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008537686366107012, + "loss": 0.4254, + "step": 2173 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008536251538389461, + "loss": 0.5372, + "step": 2174 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008534816127783476, + "loss": 0.5597, + "step": 2175 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008533380134525659, + "loss": 0.6615, + "step": 2176 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008531943558852708, + "loss": 0.5327, + "step": 2177 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008530506401001417, + "loss": 0.5464, + "step": 2178 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008529068661208676, + "loss": 0.562, + "step": 2179 + }, + { + "epoch": 0.27, + "learning_rate": 0.000852763033971147, + "loss": 0.4805, + "step": 2180 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008526191436746881, + "loss": 0.4507, + "step": 2181 + }, + { + "epoch": 0.27, + "learning_rate": 0.000852475195255209, + "loss": 0.342, + "step": 2182 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008523311887364365, + "loss": 0.4727, + "step": 2183 + }, + { + "epoch": 0.27, + "learning_rate": 0.000852187124142108, + "loss": 0.6846, + "step": 2184 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008520430014959697, + "loss": 0.5382, + "step": 2185 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008518988208217779, + "loss": 0.4321, + "step": 2186 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008517545821432979, + "loss": 0.561, + "step": 2187 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008516102854843053, + "loss": 0.5968, + "step": 2188 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008514659308685846, + "loss": 0.5013, + "step": 2189 + }, + { + "epoch": 0.27, + "learning_rate": 0.00085132151831993, + "loss": 0.489, + "step": 2190 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008511770478621457, + "loss": 0.6342, + "step": 2191 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008510325195190447, + "loss": 0.5795, + "step": 2192 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008508879333144502, + "loss": 0.5846, + "step": 2193 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008507432892721945, + "loss": 0.5992, + "step": 2194 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008505985874161199, + "loss": 0.5665, + "step": 2195 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008504538277700777, + "loss": 0.4543, + "step": 2196 + }, + { + "epoch": 0.28, + "learning_rate": 0.000850309010357929, + "loss": 0.0536, + "step": 2197 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008501641352035443, + "loss": 0.5293, + "step": 2198 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008500192023308038, + "loss": 0.4493, + "step": 2199 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008498742117635972, + "loss": 0.4719, + "step": 2200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008497291635258235, + "loss": 0.4796, + "step": 2201 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008495840576413916, + "loss": 0.7146, + "step": 2202 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008494388941342191, + "loss": 0.4771, + "step": 2203 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008492936730282342, + "loss": 0.4631, + "step": 2204 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008491483943473734, + "loss": 0.5836, + "step": 2205 + }, + { + "epoch": 0.28, + "learning_rate": 0.000849003058115584, + "loss": 0.5134, + "step": 2206 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008488576643568218, + "loss": 0.4362, + "step": 2207 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008487122130950523, + "loss": 0.4142, + "step": 2208 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008485667043542506, + "loss": 0.6069, + "step": 2209 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008484211381584015, + "loss": 0.5537, + "step": 2210 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008482755145314986, + "loss": 0.5349, + "step": 2211 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008481298334975456, + "loss": 0.5107, + "step": 2212 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008479840950805553, + "loss": 0.5928, + "step": 2213 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008478382993045504, + "loss": 0.4998, + "step": 2214 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008476924461935625, + "loss": 0.4915, + "step": 2215 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008475465357716328, + "loss": 0.4462, + "step": 2216 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008474005680628124, + "loss": 0.0612, + "step": 2217 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008472545430911612, + "loss": 0.6654, + "step": 2218 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008471084608807489, + "loss": 0.4286, + "step": 2219 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008469623214556548, + "loss": 0.5433, + "step": 2220 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846816124839967, + "loss": 0.4932, + "step": 2221 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008466698710577836, + "loss": 0.5522, + "step": 2222 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846523560133212, + "loss": 0.0768, + "step": 2223 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846377192090369, + "loss": 0.5178, + "step": 2224 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008462307669533806, + "loss": 0.491, + "step": 2225 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008460842847463826, + "loss": 0.5839, + "step": 2226 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008459377454935197, + "loss": 0.4099, + "step": 2227 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008457911492189467, + "loss": 0.5555, + "step": 2228 + }, + { + "epoch": 0.28, + "learning_rate": 0.000845644495946827, + "loss": 0.5975, + "step": 2229 + }, + { + "epoch": 0.28, + "learning_rate": 0.000845497785701334, + "loss": 0.4996, + "step": 2230 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008453510185066505, + "loss": 0.4426, + "step": 2231 + }, + { + "epoch": 0.28, + "learning_rate": 0.000845204194386968, + "loss": 0.5911, + "step": 2232 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008450573133664883, + "loss": 0.541, + "step": 2233 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008449103754694218, + "loss": 0.4574, + "step": 2234 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008447633807199887, + "loss": 0.6087, + "step": 2235 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008446163291424186, + "loss": 0.7024, + "step": 2236 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008444692207609502, + "loss": 0.4406, + "step": 2237 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008443220555998319, + "loss": 0.4194, + "step": 2238 + }, + { + "epoch": 0.28, + "learning_rate": 0.000844174833683321, + "loss": 0.1084, + "step": 2239 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008440275550356847, + "loss": 0.4192, + "step": 2240 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008438802196811991, + "loss": 0.506, + "step": 2241 + }, + { + "epoch": 0.28, + "learning_rate": 0.00084373282764415, + "loss": 0.4662, + "step": 2242 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008435853789488322, + "loss": 0.5142, + "step": 2243 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008434378736195498, + "loss": 0.0616, + "step": 2244 + }, + { + "epoch": 0.28, + "learning_rate": 0.000843290311680617, + "loss": 0.4338, + "step": 2245 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008431426931563564, + "loss": 0.6853, + "step": 2246 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008429950180711002, + "loss": 0.4744, + "step": 2247 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008428472864491903, + "loss": 0.5886, + "step": 2248 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008426994983149775, + "loss": 0.4896, + "step": 2249 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008425516536928222, + "loss": 0.5413, + "step": 2250 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008424037526070936, + "loss": 0.5065, + "step": 2251 + }, + { + "epoch": 0.28, + "learning_rate": 0.000842255795082171, + "loss": 0.5709, + "step": 2252 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008421077811424424, + "loss": 0.6168, + "step": 2253 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008419597108123054, + "loss": 0.4926, + "step": 2254 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008418115841161664, + "loss": 0.5477, + "step": 2255 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008416634010784418, + "loss": 0.4263, + "step": 2256 + }, + { + "epoch": 0.28, + "learning_rate": 0.000841515161723557, + "loss": 0.4005, + "step": 2257 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008413668660759464, + "loss": 0.5908, + "step": 2258 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008412185141600539, + "loss": 0.5466, + "step": 2259 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008410701060003328, + "loss": 0.5955, + "step": 2260 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008409216416212457, + "loss": 0.5323, + "step": 2261 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008407731210472641, + "loss": 0.5027, + "step": 2262 + }, + { + "epoch": 0.28, + "learning_rate": 0.000840624544302869, + "loss": 0.6478, + "step": 2263 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008404759114125508, + "loss": 0.574, + "step": 2264 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008403272224008089, + "loss": 0.6724, + "step": 2265 + }, + { + "epoch": 0.28, + "learning_rate": 0.000840178477292152, + "loss": 0.5762, + "step": 2266 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008400296761110982, + "loss": 0.4835, + "step": 2267 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008398808188821747, + "loss": 0.4821, + "step": 2268 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008397319056299178, + "loss": 0.5348, + "step": 2269 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008395829363788734, + "loss": 0.5374, + "step": 2270 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008394339111535966, + "loss": 0.5122, + "step": 2271 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008392848299786513, + "loss": 0.5525, + "step": 2272 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008391356928786109, + "loss": 0.646, + "step": 2273 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008389864998780582, + "loss": 0.4961, + "step": 2274 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008388372510015849, + "loss": 0.4978, + "step": 2275 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008386879462737919, + "loss": 0.3746, + "step": 2276 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008385385857192898, + "loss": 0.5115, + "step": 2277 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008383891693626977, + "loss": 0.5902, + "step": 2278 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008382396972286446, + "loss": 0.6033, + "step": 2279 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008380901693417677, + "loss": 0.4869, + "step": 2280 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008379405857267148, + "loss": 0.5182, + "step": 2281 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008377909464081415, + "loss": 0.4623, + "step": 2282 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008376412514107135, + "loss": 0.4161, + "step": 2283 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008374915007591053, + "loss": 0.5007, + "step": 2284 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008373416944780006, + "loss": 0.5475, + "step": 2285 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008371918325920924, + "loss": 0.4531, + "step": 2286 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008370419151260827, + "loss": 0.5325, + "step": 2287 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008368919421046828, + "loss": 0.4255, + "step": 2288 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008367419135526131, + "loss": 0.3915, + "step": 2289 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008365918294946031, + "loss": 0.4453, + "step": 2290 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008364416899553915, + "loss": 0.4876, + "step": 2291 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008362914949597261, + "loss": 0.543, + "step": 2292 + }, + { + "epoch": 0.29, + "learning_rate": 0.000836141244532364, + "loss": 0.551, + "step": 2293 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008359909386980712, + "loss": 0.467, + "step": 2294 + }, + { + "epoch": 0.29, + "learning_rate": 0.000835840577481623, + "loss": 0.4562, + "step": 2295 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008356901609078039, + "loss": 0.4852, + "step": 2296 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008355396890014073, + "loss": 0.5186, + "step": 2297 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008353891617872358, + "loss": 0.5238, + "step": 2298 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008352385792901012, + "loss": 0.4932, + "step": 2299 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008350879415348243, + "loss": 0.4842, + "step": 2300 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008349372485462352, + "loss": 0.5408, + "step": 2301 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008347865003491728, + "loss": 0.5522, + "step": 2302 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008346356969684854, + "loss": 0.6768, + "step": 2303 + }, + { + "epoch": 0.29, + "learning_rate": 0.00083448483842903, + "loss": 0.5209, + "step": 2304 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008343339247556734, + "loss": 0.506, + "step": 2305 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008341829559732907, + "loss": 0.5347, + "step": 2306 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008340319321067667, + "loss": 0.4176, + "step": 2307 + }, + { + "epoch": 0.29, + "learning_rate": 0.000833880853180995, + "loss": 0.5299, + "step": 2308 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008337297192208778, + "loss": 0.4883, + "step": 2309 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008335785302513272, + "loss": 0.5262, + "step": 2310 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008334272862972643, + "loss": 0.3978, + "step": 2311 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008332759873836185, + "loss": 0.6, + "step": 2312 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008331246335353291, + "loss": 0.5632, + "step": 2313 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008329732247773437, + "loss": 0.5096, + "step": 2314 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008328217611346198, + "loss": 0.5016, + "step": 2315 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008326702426321234, + "loss": 0.5071, + "step": 2316 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008325186692948294, + "loss": 0.5543, + "step": 2317 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008323670411477223, + "loss": 0.6682, + "step": 2318 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008322153582157949, + "loss": 0.5204, + "step": 2319 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008320636205240498, + "loss": 0.4526, + "step": 2320 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008319118280974982, + "loss": 0.4193, + "step": 2321 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008317599809611602, + "loss": 0.3967, + "step": 2322 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008316080791400656, + "loss": 0.6394, + "step": 2323 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008314561226592521, + "loss": 0.4543, + "step": 2324 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008313041115437673, + "loss": 0.585, + "step": 2325 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008311520458186677, + "loss": 0.5296, + "step": 2326 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008309999255090184, + "loss": 0.5442, + "step": 2327 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008308477506398939, + "loss": 0.5063, + "step": 2328 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008306955212363776, + "loss": 0.4573, + "step": 2329 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008305432373235615, + "loss": 0.6395, + "step": 2330 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008303908989265472, + "loss": 0.6113, + "step": 2331 + }, + { + "epoch": 0.29, + "learning_rate": 0.000830238506070445, + "loss": 0.4553, + "step": 2332 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008300860587803738, + "loss": 0.5052, + "step": 2333 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008299335570814624, + "loss": 0.5521, + "step": 2334 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008297810009988473, + "loss": 0.0604, + "step": 2335 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008296283905576754, + "loss": 0.6903, + "step": 2336 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008294757257831012, + "loss": 0.5156, + "step": 2337 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008293230067002894, + "loss": 0.4689, + "step": 2338 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008291702333344124, + "loss": 0.4735, + "step": 2339 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008290174057106527, + "loss": 0.0596, + "step": 2340 + }, + { + "epoch": 0.29, + "learning_rate": 0.000828864523854201, + "loss": 0.5134, + "step": 2341 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008287115877902572, + "loss": 0.532, + "step": 2342 + }, + { + "epoch": 0.29, + "learning_rate": 0.00082855859754403, + "loss": 0.537, + "step": 2343 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008284055531407373, + "loss": 0.4883, + "step": 2344 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008282524546056056, + "loss": 0.5896, + "step": 2345 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008280993019638706, + "loss": 0.4493, + "step": 2346 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008279460952407767, + "loss": 0.4246, + "step": 2347 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008277928344615775, + "loss": 0.463, + "step": 2348 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008276395196515351, + "loss": 0.0575, + "step": 2349 + }, + { + "epoch": 0.29, + "learning_rate": 0.000827486150835921, + "loss": 0.4812, + "step": 2350 + }, + { + "epoch": 0.29, + "learning_rate": 0.000827332728040015, + "loss": 0.5953, + "step": 2351 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008271792512891063, + "loss": 0.5563, + "step": 2352 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008270257206084929, + "loss": 0.4224, + "step": 2353 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008268721360234814, + "loss": 0.4607, + "step": 2354 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008267184975593877, + "loss": 0.4933, + "step": 2355 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008265648052415364, + "loss": 0.5808, + "step": 2356 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008264110590952608, + "loss": 0.5676, + "step": 2357 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008262572591459034, + "loss": 0.496, + "step": 2358 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008261034054188151, + "loss": 0.5126, + "step": 2359 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008259494979393563, + "loss": 0.6073, + "step": 2360 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008257955367328958, + "loss": 0.6151, + "step": 2361 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008256415218248112, + "loss": 0.5076, + "step": 2362 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008254874532404895, + "loss": 0.4282, + "step": 2363 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008253333310053257, + "loss": 0.5134, + "step": 2364 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008251791551447245, + "loss": 0.3792, + "step": 2365 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008250249256840991, + "loss": 0.5765, + "step": 2366 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008248706426488712, + "loss": 0.4604, + "step": 2367 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008247163060644718, + "loss": 0.5521, + "step": 2368 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008245619159563404, + "loss": 0.5248, + "step": 2369 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008244074723499258, + "loss": 0.4478, + "step": 2370 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008242529752706849, + "loss": 0.4752, + "step": 2371 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008240984247440842, + "loss": 0.3773, + "step": 2372 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008239438207955982, + "loss": 0.4803, + "step": 2373 + }, + { + "epoch": 0.3, + "learning_rate": 0.000823789163450711, + "loss": 0.4091, + "step": 2374 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008236344527349147, + "loss": 0.6159, + "step": 2375 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008234796886737112, + "loss": 0.437, + "step": 2376 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008233248712926101, + "loss": 0.6199, + "step": 2377 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008231700006171305, + "loss": 0.4216, + "step": 2378 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008230150766728001, + "loss": 0.4998, + "step": 2379 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008228600994851554, + "loss": 0.7062, + "step": 2380 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008227050690797416, + "loss": 0.5966, + "step": 2381 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008225499854821127, + "loss": 0.462, + "step": 2382 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008223948487178314, + "loss": 0.5498, + "step": 2383 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008222396588124695, + "loss": 0.5386, + "step": 2384 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008220844157916073, + "loss": 0.4363, + "step": 2385 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008219291196808334, + "loss": 0.6222, + "step": 2386 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008217737705057459, + "loss": 0.4728, + "step": 2387 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008216183682919516, + "loss": 0.5255, + "step": 2388 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008214629130650655, + "loss": 0.4327, + "step": 2389 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008213074048507116, + "loss": 0.4626, + "step": 2390 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008211518436745231, + "loss": 0.4777, + "step": 2391 + }, + { + "epoch": 0.3, + "learning_rate": 0.000820996229562141, + "loss": 0.4189, + "step": 2392 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008208405625392159, + "loss": 0.5475, + "step": 2393 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008206848426314064, + "loss": 0.5457, + "step": 2394 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008205290698643806, + "loss": 0.5647, + "step": 2395 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008203732442638146, + "loss": 0.5518, + "step": 2396 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008202173658553933, + "loss": 0.4434, + "step": 2397 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008200614346648107, + "loss": 0.3731, + "step": 2398 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008199054507177696, + "loss": 0.678, + "step": 2399 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008197494140399808, + "loss": 0.4044, + "step": 2400 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008195933246571643, + "loss": 0.6072, + "step": 2401 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008194371825950488, + "loss": 0.527, + "step": 2402 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008192809878793712, + "loss": 0.5173, + "step": 2403 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008191247405358778, + "loss": 0.4495, + "step": 2404 + }, + { + "epoch": 0.3, + "learning_rate": 0.000818968440590323, + "loss": 0.0555, + "step": 2405 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008188120880684702, + "loss": 0.4755, + "step": 2406 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008186556829960913, + "loss": 0.574, + "step": 2407 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008184992253989667, + "loss": 0.4993, + "step": 2408 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008183427153028859, + "loss": 0.6514, + "step": 2409 + }, + { + "epoch": 0.3, + "learning_rate": 0.000818186152733647, + "loss": 0.4597, + "step": 2410 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008180295377170562, + "loss": 0.5525, + "step": 2411 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008178728702789289, + "loss": 0.4843, + "step": 2412 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008177161504450887, + "loss": 0.5188, + "step": 2413 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008175593782413684, + "loss": 0.7062, + "step": 2414 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008174025536936091, + "loss": 0.6038, + "step": 2415 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008172456768276604, + "loss": 0.3566, + "step": 2416 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008170887476693807, + "loss": 0.575, + "step": 2417 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008169317662446372, + "loss": 0.4243, + "step": 2418 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008167747325793051, + "loss": 0.4192, + "step": 2419 + }, + { + "epoch": 0.3, + "learning_rate": 0.000816617646699269, + "loss": 0.6047, + "step": 2420 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008164605086304214, + "loss": 0.5173, + "step": 2421 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008163033183986641, + "loss": 0.4399, + "step": 2422 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008161460760299069, + "loss": 0.519, + "step": 2423 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008159887815500684, + "loss": 0.4253, + "step": 2424 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008158314349850759, + "loss": 0.5585, + "step": 2425 + }, + { + "epoch": 0.3, + "learning_rate": 0.000815674036360865, + "loss": 0.5519, + "step": 2426 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008155165857033804, + "loss": 0.4684, + "step": 2427 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008153590830385749, + "loss": 0.5406, + "step": 2428 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008152015283924099, + "loss": 0.5732, + "step": 2429 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008150439217908557, + "loss": 0.5547, + "step": 2430 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008148862632598906, + "loss": 0.4866, + "step": 2431 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008147285528255022, + "loss": 0.5814, + "step": 2432 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008145707905136863, + "loss": 0.5713, + "step": 2433 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008144129763504468, + "loss": 0.5028, + "step": 2434 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008142551103617972, + "loss": 0.4448, + "step": 2435 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008140971925737583, + "loss": 0.4202, + "step": 2436 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008139392230123604, + "loss": 0.5012, + "step": 2437 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008137812017036418, + "loss": 0.564, + "step": 2438 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008136231286736495, + "loss": 0.4843, + "step": 2439 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008134650039484393, + "loss": 0.4758, + "step": 2440 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008133068275540751, + "loss": 0.5228, + "step": 2441 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008131485995166295, + "loss": 0.5485, + "step": 2442 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008129903198621833, + "loss": 0.5411, + "step": 2443 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008128319886168267, + "loss": 0.5593, + "step": 2444 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008126736058066573, + "loss": 0.5997, + "step": 2445 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008125151714577819, + "loss": 0.5801, + "step": 2446 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008123566855963156, + "loss": 0.4133, + "step": 2447 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008121981482483818, + "loss": 0.4172, + "step": 2448 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008120395594401129, + "loss": 0.4838, + "step": 2449 + }, + { + "epoch": 0.31, + "learning_rate": 0.000811880919197649, + "loss": 0.4869, + "step": 2450 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008117222275471396, + "loss": 0.5077, + "step": 2451 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008115634845147422, + "loss": 0.5422, + "step": 2452 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008114046901266224, + "loss": 0.457, + "step": 2453 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008112458444089547, + "loss": 0.0573, + "step": 2454 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008110869473879224, + "loss": 0.5612, + "step": 2455 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008109279990897164, + "loss": 0.6722, + "step": 2456 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008107689995405367, + "loss": 0.663, + "step": 2457 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008106099487665919, + "loss": 0.5728, + "step": 2458 + }, + { + "epoch": 0.31, + "learning_rate": 0.000810450846794098, + "loss": 0.5077, + "step": 2459 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008102916936492808, + "loss": 0.537, + "step": 2460 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008101324893583734, + "loss": 0.4862, + "step": 2461 + }, + { + "epoch": 0.31, + "learning_rate": 0.000809973233947618, + "loss": 0.4615, + "step": 2462 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008098139274432652, + "loss": 0.5238, + "step": 2463 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008096545698715736, + "loss": 0.5258, + "step": 2464 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008094951612588105, + "loss": 0.5466, + "step": 2465 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008093357016312517, + "loss": 0.5239, + "step": 2466 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008091761910151813, + "loss": 0.6801, + "step": 2467 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008090166294368917, + "loss": 0.4364, + "step": 2468 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008088570169226839, + "loss": 0.4801, + "step": 2469 + }, + { + "epoch": 0.31, + "learning_rate": 0.000808697353498867, + "loss": 0.4258, + "step": 2470 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008085376391917589, + "loss": 0.5564, + "step": 2471 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008083778740276857, + "loss": 0.4422, + "step": 2472 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008082180580329818, + "loss": 0.4377, + "step": 2473 + }, + { + "epoch": 0.31, + "learning_rate": 0.00080805819123399, + "loss": 0.5668, + "step": 2474 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008078982736570612, + "loss": 0.5214, + "step": 2475 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008077383053285556, + "loss": 0.4863, + "step": 2476 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008075782862748407, + "loss": 0.5736, + "step": 2477 + }, + { + "epoch": 0.31, + "learning_rate": 0.000807418216522293, + "loss": 0.4707, + "step": 2478 + }, + { + "epoch": 0.31, + "learning_rate": 0.000807258096097297, + "loss": 0.5496, + "step": 2479 + }, + { + "epoch": 0.31, + "learning_rate": 0.000807097925026246, + "loss": 0.5028, + "step": 2480 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008069377033355412, + "loss": 0.5699, + "step": 2481 + }, + { + "epoch": 0.31, + "learning_rate": 0.000806777431051592, + "loss": 0.4734, + "step": 2482 + }, + { + "epoch": 0.31, + "learning_rate": 0.000806617108200817, + "loss": 0.4661, + "step": 2483 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008064567348096423, + "loss": 0.4733, + "step": 2484 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008062963109045026, + "loss": 0.5001, + "step": 2485 + }, + { + "epoch": 0.31, + "learning_rate": 0.000806135836511841, + "loss": 0.5052, + "step": 2486 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008059753116581087, + "loss": 0.5905, + "step": 2487 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008058147363697657, + "loss": 0.7382, + "step": 2488 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008056541106732797, + "loss": 0.5077, + "step": 2489 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008054934345951273, + "loss": 0.0562, + "step": 2490 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008053327081617927, + "loss": 0.6393, + "step": 2491 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008051719313997692, + "loss": 0.5464, + "step": 2492 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008050111043355576, + "loss": 0.4681, + "step": 2493 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008048502269956677, + "loss": 0.498, + "step": 2494 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008046892994066173, + "loss": 0.5463, + "step": 2495 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008045283215949323, + "loss": 0.4554, + "step": 2496 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008043672935871471, + "loss": 0.0552, + "step": 2497 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008042062154098042, + "loss": 0.5687, + "step": 2498 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008040450870894548, + "loss": 0.53, + "step": 2499 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008038839086526577, + "loss": 0.4896, + "step": 2500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008037226801259805, + "loss": 0.4269, + "step": 2501 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008035614015359988, + "loss": 0.4502, + "step": 2502 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008034000729092968, + "loss": 0.4318, + "step": 2503 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008032386942724661, + "loss": 0.4427, + "step": 2504 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008030772656521074, + "loss": 0.4438, + "step": 2505 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008029157870748298, + "loss": 0.4951, + "step": 2506 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008027542585672493, + "loss": 0.4806, + "step": 2507 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008025926801559916, + "loss": 0.6456, + "step": 2508 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008024310518676901, + "loss": 0.479, + "step": 2509 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008022693737289859, + "loss": 0.4229, + "step": 2510 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008021076457665293, + "loss": 0.5908, + "step": 2511 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008019458680069778, + "loss": 0.4666, + "step": 2512 + }, + { + "epoch": 0.31, + "learning_rate": 0.000801784040476998, + "loss": 0.5198, + "step": 2513 + }, + { + "epoch": 0.32, + "learning_rate": 0.000801622163203264, + "loss": 0.4735, + "step": 2514 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008014602362124587, + "loss": 0.502, + "step": 2515 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008012982595312725, + "loss": 0.5723, + "step": 2516 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008011362331864049, + "loss": 0.5814, + "step": 2517 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008009741572045627, + "loss": 0.6503, + "step": 2518 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008008120316124612, + "loss": 0.4187, + "step": 2519 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008006498564368243, + "loss": 0.4948, + "step": 2520 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008004876317043835, + "loss": 0.394, + "step": 2521 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008003253574418785, + "loss": 0.3556, + "step": 2522 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008001630336760575, + "loss": 0.6241, + "step": 2523 + }, + { + "epoch": 0.32, + "learning_rate": 0.000800000660433677, + "loss": 0.7538, + "step": 2524 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007998382377415006, + "loss": 0.5703, + "step": 2525 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007996757656263015, + "loss": 0.5706, + "step": 2526 + }, + { + "epoch": 0.32, + "learning_rate": 0.00079951324411486, + "loss": 0.5217, + "step": 2527 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007993506732339653, + "loss": 0.4305, + "step": 2528 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007991880530104136, + "loss": 0.5365, + "step": 2529 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007990253834710108, + "loss": 0.533, + "step": 2530 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007988626646425694, + "loss": 0.5645, + "step": 2531 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007986998965519111, + "loss": 0.5302, + "step": 2532 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007985370792258652, + "loss": 0.4375, + "step": 2533 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007983742126912694, + "loss": 0.4724, + "step": 2534 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007982112969749692, + "loss": 0.5704, + "step": 2535 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007980483321038183, + "loss": 0.421, + "step": 2536 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007978853181046788, + "loss": 0.5046, + "step": 2537 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007977222550044205, + "loss": 0.4749, + "step": 2538 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007975591428299218, + "loss": 0.4727, + "step": 2539 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007973959816080684, + "loss": 0.0539, + "step": 2540 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007972327713657547, + "loss": 0.0535, + "step": 2541 + }, + { + "epoch": 0.32, + "learning_rate": 0.000797069512129883, + "loss": 0.5779, + "step": 2542 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007969062039273639, + "loss": 0.5625, + "step": 2543 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007967428467851157, + "loss": 0.5757, + "step": 2544 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007965794407300647, + "loss": 0.4172, + "step": 2545 + }, + { + "epoch": 0.32, + "learning_rate": 0.000796415985789146, + "loss": 0.5045, + "step": 2546 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007962524819893018, + "loss": 0.6232, + "step": 2547 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007960889293574829, + "loss": 0.053, + "step": 2548 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007959253279206482, + "loss": 0.6752, + "step": 2549 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007957616777057645, + "loss": 0.7879, + "step": 2550 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007955979787398066, + "loss": 0.4264, + "step": 2551 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007954342310497575, + "loss": 0.485, + "step": 2552 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007952704346626077, + "loss": 0.3948, + "step": 2553 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007951065896053566, + "loss": 0.5206, + "step": 2554 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007949426959050108, + "loss": 0.5156, + "step": 2555 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007947787535885853, + "loss": 0.504, + "step": 2556 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007946147626831035, + "loss": 0.64, + "step": 2557 + }, + { + "epoch": 0.32, + "learning_rate": 0.000794450723215596, + "loss": 0.5381, + "step": 2558 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007942866352131017, + "loss": 0.4974, + "step": 2559 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007941224987026681, + "loss": 0.5892, + "step": 2560 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007939583137113497, + "loss": 0.5201, + "step": 2561 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007937940802662098, + "loss": 0.3823, + "step": 2562 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007936297983943192, + "loss": 0.4298, + "step": 2563 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007934654681227571, + "loss": 0.5048, + "step": 2564 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007933010894786101, + "loss": 0.5109, + "step": 2565 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007931366624889734, + "loss": 0.4905, + "step": 2566 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007929721871809494, + "loss": 0.5962, + "step": 2567 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007928076635816497, + "loss": 0.6519, + "step": 2568 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007926430917181924, + "loss": 0.3978, + "step": 2569 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007924784716177047, + "loss": 0.5818, + "step": 2570 + }, + { + "epoch": 0.32, + "learning_rate": 0.000792313803307321, + "loss": 0.4465, + "step": 2571 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007921490868141843, + "loss": 0.6274, + "step": 2572 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007919843221654448, + "loss": 0.4226, + "step": 2573 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007918195093882614, + "loss": 0.5309, + "step": 2574 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007916546485098004, + "loss": 0.4289, + "step": 2575 + }, + { + "epoch": 0.32, + "learning_rate": 0.000791489739557236, + "loss": 0.6088, + "step": 2576 + }, + { + "epoch": 0.32, + "learning_rate": 0.000791324782557751, + "loss": 0.4698, + "step": 2577 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007911597775385351, + "loss": 0.4379, + "step": 2578 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007909947245267869, + "loss": 0.5105, + "step": 2579 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007908296235497121, + "loss": 0.0516, + "step": 2580 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007906644746345251, + "loss": 0.3851, + "step": 2581 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007904992778084473, + "loss": 0.5133, + "step": 2582 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007903340330987087, + "loss": 0.3489, + "step": 2583 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007901687405325471, + "loss": 0.0522, + "step": 2584 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007900034001372078, + "loss": 0.4834, + "step": 2585 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007898380119399446, + "loss": 0.4464, + "step": 2586 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007896725759680183, + "loss": 0.5988, + "step": 2587 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007895070922486984, + "loss": 0.5232, + "step": 2588 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007893415608092619, + "loss": 0.4587, + "step": 2589 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007891759816769939, + "loss": 0.5047, + "step": 2590 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007890103548791867, + "loss": 0.502, + "step": 2591 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007888446804431415, + "loss": 0.4733, + "step": 2592 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007886789583961666, + "loss": 0.4573, + "step": 2593 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007885131887655782, + "loss": 0.5711, + "step": 2594 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007883473715787006, + "loss": 0.4524, + "step": 2595 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007881815068628659, + "loss": 0.4608, + "step": 2596 + }, + { + "epoch": 0.33, + "learning_rate": 0.000788015594645414, + "loss": 0.5959, + "step": 2597 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007878496349536921, + "loss": 0.4568, + "step": 2598 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007876836278150563, + "loss": 0.0517, + "step": 2599 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007875175732568698, + "loss": 0.4968, + "step": 2600 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007873514713065036, + "loss": 0.3925, + "step": 2601 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007871853219913367, + "loss": 0.4681, + "step": 2602 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007870191253387561, + "loss": 0.5753, + "step": 2603 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007868528813761561, + "loss": 0.4949, + "step": 2604 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007866865901309391, + "loss": 0.4137, + "step": 2605 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007865202516305154, + "loss": 0.5878, + "step": 2606 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007863538659023028, + "loss": 0.4668, + "step": 2607 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007861874329737273, + "loss": 0.4237, + "step": 2608 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007860209528722221, + "loss": 0.4385, + "step": 2609 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007858544256252286, + "loss": 0.4526, + "step": 2610 + }, + { + "epoch": 0.33, + "learning_rate": 0.000785687851260196, + "loss": 0.525, + "step": 2611 + }, + { + "epoch": 0.33, + "learning_rate": 0.000785521229804581, + "loss": 0.5216, + "step": 2612 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007853545612858484, + "loss": 0.6278, + "step": 2613 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007851878457314703, + "loss": 0.5673, + "step": 2614 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007850210831689268, + "loss": 0.5493, + "step": 2615 + }, + { + "epoch": 0.33, + "learning_rate": 0.000784854273625706, + "loss": 0.0515, + "step": 2616 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007846874171293033, + "loss": 0.5822, + "step": 2617 + }, + { + "epoch": 0.33, + "learning_rate": 0.000784520513707222, + "loss": 0.5771, + "step": 2618 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007843535633869734, + "loss": 0.4482, + "step": 2619 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007841865661960761, + "loss": 0.4249, + "step": 2620 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007840195221620567, + "loss": 0.4686, + "step": 2621 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007838524313124493, + "loss": 0.3947, + "step": 2622 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007836852936747964, + "loss": 0.5211, + "step": 2623 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007835181092766471, + "loss": 0.4879, + "step": 2624 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007833508781455588, + "loss": 0.4796, + "step": 2625 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007831836003090969, + "loss": 0.5449, + "step": 2626 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007830162757948341, + "loss": 0.4384, + "step": 2627 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007828489046303508, + "loss": 0.5198, + "step": 2628 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007826814868432352, + "loss": 0.4042, + "step": 2629 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007825140224610833, + "loss": 0.4429, + "step": 2630 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007823465115114985, + "loss": 0.5037, + "step": 2631 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007821789540220919, + "loss": 0.467, + "step": 2632 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007820113500204825, + "loss": 0.5023, + "step": 2633 + }, + { + "epoch": 0.33, + "learning_rate": 0.000781843699534297, + "loss": 0.5271, + "step": 2634 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007816760025911694, + "loss": 0.4877, + "step": 2635 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007815082592187417, + "loss": 0.5199, + "step": 2636 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007813404694446632, + "loss": 0.4419, + "step": 2637 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007811726332965914, + "loss": 0.5286, + "step": 2638 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007810047508021909, + "loss": 0.4969, + "step": 2639 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007808368219891342, + "loss": 0.4191, + "step": 2640 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007806688468851014, + "loss": 0.4823, + "step": 2641 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007805008255177803, + "loss": 0.4908, + "step": 2642 + }, + { + "epoch": 0.33, + "learning_rate": 0.000780332757914866, + "loss": 0.4061, + "step": 2643 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007801646441040617, + "loss": 0.495, + "step": 2644 + }, + { + "epoch": 0.33, + "learning_rate": 0.000779996484113078, + "loss": 0.4301, + "step": 2645 + }, + { + "epoch": 0.33, + "learning_rate": 0.000779828277969633, + "loss": 0.474, + "step": 2646 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007796600257014525, + "loss": 0.443, + "step": 2647 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007794917273362699, + "loss": 0.4761, + "step": 2648 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007793233829018263, + "loss": 0.5444, + "step": 2649 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007791549924258702, + "loss": 0.5118, + "step": 2650 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007789865559361577, + "loss": 0.4725, + "step": 2651 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007788180734604528, + "loss": 0.5913, + "step": 2652 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007786495450265267, + "loss": 0.3995, + "step": 2653 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007784809706621585, + "loss": 0.473, + "step": 2654 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007783123503951343, + "loss": 0.4873, + "step": 2655 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007781436842532488, + "loss": 0.5065, + "step": 2656 + }, + { + "epoch": 0.33, + "learning_rate": 0.000777974972264303, + "loss": 0.4725, + "step": 2657 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007778062144561064, + "loss": 0.4919, + "step": 2658 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007776374108564757, + "loss": 0.4467, + "step": 2659 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007774685614932353, + "loss": 0.4579, + "step": 2660 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007772996663942167, + "loss": 0.5046, + "step": 2661 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007771307255872596, + "loss": 0.5212, + "step": 2662 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007769617391002105, + "loss": 0.4539, + "step": 2663 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007767927069609242, + "loss": 0.0763, + "step": 2664 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007766236291972626, + "loss": 0.5975, + "step": 2665 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007764545058370951, + "loss": 0.4894, + "step": 2666 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007762853369082987, + "loss": 0.5121, + "step": 2667 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007761161224387579, + "loss": 0.4886, + "step": 2668 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007759468624563646, + "loss": 0.4639, + "step": 2669 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007757775569890186, + "loss": 0.4026, + "step": 2670 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007756082060646266, + "loss": 0.6777, + "step": 2671 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007754388097111033, + "loss": 0.6287, + "step": 2672 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007752693679563706, + "loss": 0.4606, + "step": 2673 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007750998808283581, + "loss": 0.459, + "step": 2674 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007749303483550026, + "loss": 0.5326, + "step": 2675 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007747607705642484, + "loss": 0.4954, + "step": 2676 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007745911474840476, + "loss": 0.5303, + "step": 2677 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007744214791423596, + "loss": 0.4493, + "step": 2678 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007742517655671511, + "loss": 0.5842, + "step": 2679 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007740820067863965, + "loss": 0.4434, + "step": 2680 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007739122028280775, + "loss": 0.5983, + "step": 2681 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007737423537201831, + "loss": 0.437, + "step": 2682 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007735724594907101, + "loss": 0.5559, + "step": 2683 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007734025201676625, + "loss": 0.6497, + "step": 2684 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007732325357790519, + "loss": 0.5898, + "step": 2685 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007730625063528969, + "loss": 0.4852, + "step": 2686 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007728924319172242, + "loss": 0.5779, + "step": 2687 + }, + { + "epoch": 0.34, + "learning_rate": 0.000772722312500067, + "loss": 0.5115, + "step": 2688 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007725521481294674, + "loss": 0.5387, + "step": 2689 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007723819388334731, + "loss": 0.5464, + "step": 2690 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007722116846401408, + "loss": 0.5114, + "step": 2691 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007720413855775332, + "loss": 0.5173, + "step": 2692 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007718710416737216, + "loss": 0.5759, + "step": 2693 + }, + { + "epoch": 0.34, + "learning_rate": 0.000771700652956784, + "loss": 0.4204, + "step": 2694 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007715302194548058, + "loss": 0.6014, + "step": 2695 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007713597411958802, + "loss": 0.4708, + "step": 2696 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007711892182081073, + "loss": 0.4802, + "step": 2697 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007710186505195951, + "loss": 0.5026, + "step": 2698 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007708480381584584, + "loss": 0.5521, + "step": 2699 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007706773811528196, + "loss": 0.5037, + "step": 2700 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007705066795308087, + "loss": 0.5488, + "step": 2701 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007703359333205626, + "loss": 0.4211, + "step": 2702 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007701651425502261, + "loss": 0.4714, + "step": 2703 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007699943072479507, + "loss": 0.5088, + "step": 2704 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007698234274418958, + "loss": 0.5231, + "step": 2705 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007696525031602278, + "loss": 0.5543, + "step": 2706 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007694815344311207, + "loss": 0.3997, + "step": 2707 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007693105212827554, + "loss": 0.3887, + "step": 2708 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007691394637433204, + "loss": 0.4105, + "step": 2709 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007689683618410118, + "loss": 0.5132, + "step": 2710 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007687972156040326, + "loss": 0.4303, + "step": 2711 + }, + { + "epoch": 0.34, + "learning_rate": 0.000768626025060593, + "loss": 0.4083, + "step": 2712 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007684547902389111, + "loss": 0.5214, + "step": 2713 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007682835111672118, + "loss": 0.2211, + "step": 2714 + }, + { + "epoch": 0.34, + "learning_rate": 0.000768112187873727, + "loss": 0.4454, + "step": 2715 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007679408203866969, + "loss": 0.4355, + "step": 2716 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007677694087343681, + "loss": 0.4945, + "step": 2717 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007675979529449949, + "loss": 0.4447, + "step": 2718 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007674264530468386, + "loss": 0.4825, + "step": 2719 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007672549090681682, + "loss": 0.5132, + "step": 2720 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007670833210372592, + "loss": 0.5149, + "step": 2721 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007669116889823954, + "loss": 0.5074, + "step": 2722 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007667400129318669, + "loss": 0.5372, + "step": 2723 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007665682929139716, + "loss": 0.7104, + "step": 2724 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007663965289570146, + "loss": 0.4399, + "step": 2725 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007662247210893081, + "loss": 0.4884, + "step": 2726 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007660528693391713, + "loss": 0.3879, + "step": 2727 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007658809737349313, + "loss": 0.5277, + "step": 2728 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007657090343049219, + "loss": 0.5225, + "step": 2729 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007655370510774842, + "loss": 0.5134, + "step": 2730 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007653650240809667, + "loss": 0.5712, + "step": 2731 + }, + { + "epoch": 0.34, + "learning_rate": 0.000765192953343725, + "loss": 0.5524, + "step": 2732 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007650208388941217, + "loss": 0.5518, + "step": 2733 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007648486807605273, + "loss": 0.4575, + "step": 2734 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007646764789713184, + "loss": 0.1033, + "step": 2735 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007645042335548799, + "loss": 0.5341, + "step": 2736 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007643319445396031, + "loss": 0.52, + "step": 2737 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007641596119538871, + "loss": 0.3882, + "step": 2738 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007639872358261377, + "loss": 0.5607, + "step": 2739 + }, + { + "epoch": 0.34, + "learning_rate": 0.000763814816184768, + "loss": 0.4977, + "step": 2740 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007636423530581984, + "loss": 0.4729, + "step": 2741 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007634698464748562, + "loss": 0.488, + "step": 2742 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007632972964631764, + "loss": 0.4553, + "step": 2743 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007631247030516007, + "loss": 0.4561, + "step": 2744 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007629520662685779, + "loss": 0.6111, + "step": 2745 + }, + { + "epoch": 0.34, + "learning_rate": 0.000762779386142564, + "loss": 0.519, + "step": 2746 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007626066627020227, + "loss": 0.4108, + "step": 2747 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007624338959754242, + "loss": 0.3987, + "step": 2748 + }, + { + "epoch": 0.34, + "learning_rate": 0.000762261085991246, + "loss": 0.535, + "step": 2749 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007620882327779726, + "loss": 0.4378, + "step": 2750 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007619153363640962, + "loss": 0.4913, + "step": 2751 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007617423967781154, + "loss": 0.4666, + "step": 2752 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007615694140485363, + "loss": 0.0816, + "step": 2753 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007613963882038722, + "loss": 0.578, + "step": 2754 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007612233192726429, + "loss": 0.5151, + "step": 2755 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007610502072833762, + "loss": 0.4843, + "step": 2756 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007608770522646065, + "loss": 0.5995, + "step": 2757 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007607038542448751, + "loss": 0.5098, + "step": 2758 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007605306132527308, + "loss": 0.4897, + "step": 2759 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007603573293167292, + "loss": 0.4845, + "step": 2760 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007601840024654331, + "loss": 0.453, + "step": 2761 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007600106327274125, + "loss": 0.4082, + "step": 2762 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007598372201312444, + "loss": 0.5648, + "step": 2763 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007596637647055124, + "loss": 0.6927, + "step": 2764 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007594902664788078, + "loss": 0.4373, + "step": 2765 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007593167254797288, + "loss": 0.5385, + "step": 2766 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007591431417368804, + "loss": 0.5651, + "step": 2767 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007589695152788749, + "loss": 0.5865, + "step": 2768 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007587958461343316, + "loss": 0.4749, + "step": 2769 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007586221343318766, + "loss": 0.4495, + "step": 2770 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007584483799001431, + "loss": 0.4664, + "step": 2771 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007582745828677719, + "loss": 0.4938, + "step": 2772 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007581007432634102, + "loss": 0.5123, + "step": 2773 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007579268611157122, + "loss": 0.3876, + "step": 2774 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007577529364533392, + "loss": 0.673, + "step": 2775 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007575789693049599, + "loss": 0.5016, + "step": 2776 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007574049596992497, + "loss": 0.4426, + "step": 2777 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007572309076648907, + "loss": 0.5127, + "step": 2778 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007570568132305728, + "loss": 0.4331, + "step": 2779 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007568826764249918, + "loss": 0.4799, + "step": 2780 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007567084972768515, + "loss": 0.4598, + "step": 2781 + }, + { + "epoch": 0.35, + "learning_rate": 0.000756534275814862, + "loss": 0.4108, + "step": 2782 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007563600120677407, + "loss": 0.4569, + "step": 2783 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007561857060642119, + "loss": 0.4644, + "step": 2784 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007560113578330068, + "loss": 0.5192, + "step": 2785 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007558369674028638, + "loss": 0.553, + "step": 2786 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007556625348025278, + "loss": 0.4017, + "step": 2787 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007554880600607512, + "loss": 0.4115, + "step": 2788 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007553135432062929, + "loss": 0.4254, + "step": 2789 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007551389842679188, + "loss": 0.4005, + "step": 2790 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007549643832744023, + "loss": 0.3864, + "step": 2791 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007547897402545228, + "loss": 0.3952, + "step": 2792 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007546150552370673, + "loss": 0.0809, + "step": 2793 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007544403282508296, + "loss": 0.5128, + "step": 2794 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007542655593246102, + "loss": 0.5897, + "step": 2795 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007540907484872168, + "loss": 0.4517, + "step": 2796 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007539158957674639, + "loss": 0.5511, + "step": 2797 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007537410011941725, + "loss": 0.4601, + "step": 2798 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007535660647961715, + "loss": 0.6289, + "step": 2799 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007533910866022955, + "loss": 0.5299, + "step": 2800 + }, + { + "epoch": 0.35, + "learning_rate": 0.000753216066641387, + "loss": 0.5027, + "step": 2801 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007530410049422945, + "loss": 0.4994, + "step": 2802 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007528659015338742, + "loss": 0.5105, + "step": 2803 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007526907564449884, + "loss": 0.4374, + "step": 2804 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007525155697045069, + "loss": 0.4302, + "step": 2805 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007523403413413065, + "loss": 0.5536, + "step": 2806 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007521650713842699, + "loss": 0.4978, + "step": 2807 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007519897598622873, + "loss": 0.3864, + "step": 2808 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007518144068042561, + "loss": 0.5276, + "step": 2809 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007516390122390799, + "loss": 0.4382, + "step": 2810 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007514635761956693, + "loss": 0.5941, + "step": 2811 + }, + { + "epoch": 0.35, + "learning_rate": 0.000751288098702942, + "loss": 0.4053, + "step": 2812 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007511125797898223, + "loss": 0.5109, + "step": 2813 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007509370194852414, + "loss": 0.5822, + "step": 2814 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007507614178181373, + "loss": 0.5634, + "step": 2815 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007505857748174545, + "loss": 0.5206, + "step": 2816 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007504100905121454, + "loss": 0.4001, + "step": 2817 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007502343649311675, + "loss": 0.4679, + "step": 2818 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007500585981034868, + "loss": 0.5105, + "step": 2819 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007498827900580753, + "loss": 0.449, + "step": 2820 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007497069408239113, + "loss": 0.4176, + "step": 2821 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007495310504299808, + "loss": 0.5623, + "step": 2822 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007493551189052764, + "loss": 0.45, + "step": 2823 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007491791462787969, + "loss": 0.4873, + "step": 2824 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007490031325795485, + "loss": 0.554, + "step": 2825 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007488270778365438, + "loss": 0.4518, + "step": 2826 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007486509820788024, + "loss": 0.4592, + "step": 2827 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007484748453353507, + "loss": 0.5831, + "step": 2828 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007482986676352215, + "loss": 0.5353, + "step": 2829 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007481224490074545, + "loss": 0.4073, + "step": 2830 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007479461894810966, + "loss": 0.5204, + "step": 2831 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007477698890852008, + "loss": 0.5001, + "step": 2832 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007475935478488269, + "loss": 0.4669, + "step": 2833 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007474171658010421, + "loss": 0.5253, + "step": 2834 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007472407429709195, + "loss": 0.5751, + "step": 2835 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007470642793875393, + "loss": 0.5044, + "step": 2836 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007468877750799886, + "loss": 0.5022, + "step": 2837 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007467112300773609, + "loss": 0.4946, + "step": 2838 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007465346444087566, + "loss": 0.4497, + "step": 2839 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007463580181032826, + "loss": 0.4319, + "step": 2840 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007461813511900526, + "loss": 0.5382, + "step": 2841 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007460046436981871, + "loss": 0.5046, + "step": 2842 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007458278956568132, + "loss": 0.4276, + "step": 2843 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007456511070950646, + "loss": 0.4403, + "step": 2844 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007454742780420817, + "loss": 0.4799, + "step": 2845 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007452974085270121, + "loss": 0.4033, + "step": 2846 + }, + { + "epoch": 0.36, + "learning_rate": 0.000745120498579009, + "loss": 0.6257, + "step": 2847 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007449435482272331, + "loss": 0.463, + "step": 2848 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007447665575008517, + "loss": 0.4954, + "step": 2849 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007445895264290384, + "loss": 0.3979, + "step": 2850 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007444124550409736, + "loss": 0.4237, + "step": 2851 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007442353433658445, + "loss": 0.5103, + "step": 2852 + }, + { + "epoch": 0.36, + "learning_rate": 0.000744058191432845, + "loss": 0.5352, + "step": 2853 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007438809992711751, + "loss": 0.4786, + "step": 2854 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007437037669100417, + "loss": 0.4601, + "step": 2855 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007435264943786589, + "loss": 0.452, + "step": 2856 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007433491817062468, + "loss": 0.4911, + "step": 2857 + }, + { + "epoch": 0.36, + "learning_rate": 0.000743171828922032, + "loss": 0.6045, + "step": 2858 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007429944360552481, + "loss": 0.6162, + "step": 2859 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007428170031351352, + "loss": 0.5068, + "step": 2860 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007426395301909399, + "loss": 0.5214, + "step": 2861 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007424620172519155, + "loss": 0.5074, + "step": 2862 + }, + { + "epoch": 0.36, + "learning_rate": 0.000742284464347322, + "loss": 0.5603, + "step": 2863 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007421068715064258, + "loss": 0.4951, + "step": 2864 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007419292387584997, + "loss": 0.4337, + "step": 2865 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007417515661328238, + "loss": 0.4512, + "step": 2866 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007415738536586837, + "loss": 0.5216, + "step": 2867 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007413961013653725, + "loss": 0.5402, + "step": 2868 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007412183092821895, + "loss": 0.4608, + "step": 2869 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007410404774384403, + "loss": 0.4171, + "step": 2870 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007408626058634378, + "loss": 0.5446, + "step": 2871 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007406846945865006, + "loss": 0.6511, + "step": 2872 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007405067436369545, + "loss": 0.0849, + "step": 2873 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007403287530441312, + "loss": 0.4929, + "step": 2874 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007401507228373698, + "loss": 0.4625, + "step": 2875 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007399726530460149, + "loss": 0.4999, + "step": 2876 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007397945436994185, + "loss": 0.437, + "step": 2877 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007396163948269385, + "loss": 0.5173, + "step": 2878 + }, + { + "epoch": 0.36, + "learning_rate": 0.00073943820645794, + "loss": 0.3619, + "step": 2879 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007392599786217939, + "loss": 0.5621, + "step": 2880 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007390817113478778, + "loss": 0.6031, + "step": 2881 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007389034046655762, + "loss": 0.3981, + "step": 2882 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007387250586042795, + "loss": 0.571, + "step": 2883 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007385466731933852, + "loss": 0.5939, + "step": 2884 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007383682484622966, + "loss": 0.5627, + "step": 2885 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007381897844404243, + "loss": 0.4655, + "step": 2886 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007380112811571847, + "loss": 0.0732, + "step": 2887 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007378327386420008, + "loss": 0.4805, + "step": 2888 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007376541569243022, + "loss": 0.537, + "step": 2889 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007374755360335252, + "loss": 0.4448, + "step": 2890 + }, + { + "epoch": 0.36, + "learning_rate": 0.000737296875999112, + "loss": 0.4012, + "step": 2891 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007371181768505116, + "loss": 0.5732, + "step": 2892 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007369394386171795, + "loss": 0.489, + "step": 2893 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007367606613285776, + "loss": 0.5165, + "step": 2894 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007365818450141738, + "loss": 0.4194, + "step": 2895 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007364029897034432, + "loss": 0.4547, + "step": 2896 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007362240954258668, + "loss": 0.4836, + "step": 2897 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007360451622109319, + "loss": 0.4456, + "step": 2898 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007358661900881328, + "loss": 0.0672, + "step": 2899 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007356871790869698, + "loss": 0.4415, + "step": 2900 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007355081292369498, + "loss": 0.5256, + "step": 2901 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007353290405675857, + "loss": 0.5392, + "step": 2902 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007351499131083973, + "loss": 0.4067, + "step": 2903 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007349707468889106, + "loss": 0.5995, + "step": 2904 + }, + { + "epoch": 0.36, + "learning_rate": 0.000734791541938658, + "loss": 0.4517, + "step": 2905 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007346122982871779, + "loss": 0.4254, + "step": 2906 + }, + { + "epoch": 0.36, + "learning_rate": 0.000734433015964016, + "loss": 0.5259, + "step": 2907 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007342536949987235, + "loss": 0.4595, + "step": 2908 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007340743354208583, + "loss": 0.5422, + "step": 2909 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007338949372599848, + "loss": 0.5293, + "step": 2910 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007337155005456733, + "loss": 0.5601, + "step": 2911 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007335360253075012, + "loss": 0.4821, + "step": 2912 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007333565115750513, + "loss": 0.0628, + "step": 2913 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007331769593779136, + "loss": 0.0626, + "step": 2914 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007329973687456841, + "loss": 0.457, + "step": 2915 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007328177397079652, + "loss": 0.4697, + "step": 2916 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007326380722943652, + "loss": 0.5239, + "step": 2917 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007324583665344993, + "loss": 0.5619, + "step": 2918 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007322786224579891, + "loss": 0.4473, + "step": 2919 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007320988400944617, + "loss": 0.4619, + "step": 2920 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007319190194735514, + "loss": 0.4725, + "step": 2921 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007317391606248983, + "loss": 0.5031, + "step": 2922 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007315592635781492, + "loss": 0.4727, + "step": 2923 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007313793283629565, + "loss": 0.4062, + "step": 2924 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007311993550089797, + "loss": 0.4526, + "step": 2925 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007310193435458841, + "loss": 0.4973, + "step": 2926 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007308392940033413, + "loss": 0.4717, + "step": 2927 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007306592064110293, + "loss": 0.467, + "step": 2928 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007304790807986326, + "loss": 0.5643, + "step": 2929 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007302989171958413, + "loss": 0.4864, + "step": 2930 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007301187156323526, + "loss": 0.5438, + "step": 2931 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007299384761378693, + "loss": 0.6433, + "step": 2932 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007297581987421007, + "loss": 0.5122, + "step": 2933 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007295778834747624, + "loss": 0.566, + "step": 2934 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007293975303655762, + "loss": 0.5272, + "step": 2935 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007292171394442699, + "loss": 0.3783, + "step": 2936 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007290367107405782, + "loss": 0.5135, + "step": 2937 + }, + { + "epoch": 0.37, + "learning_rate": 0.000728856244284241, + "loss": 0.496, + "step": 2938 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007286757401050054, + "loss": 0.5284, + "step": 2939 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007284951982326242, + "loss": 0.4779, + "step": 2940 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007283146186968565, + "loss": 0.4834, + "step": 2941 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007281340015274678, + "loss": 0.4332, + "step": 2942 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007279533467542294, + "loss": 0.0589, + "step": 2943 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007277726544069192, + "loss": 0.3602, + "step": 2944 + }, + { + "epoch": 0.37, + "learning_rate": 0.000727591924515321, + "loss": 0.4589, + "step": 2945 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007274111571092252, + "loss": 0.5734, + "step": 2946 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007272303522184279, + "loss": 0.4366, + "step": 2947 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007270495098727314, + "loss": 0.4349, + "step": 2948 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007268686301019447, + "loss": 0.4092, + "step": 2949 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007266877129358825, + "loss": 0.0586, + "step": 2950 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007265067584043656, + "loss": 0.4062, + "step": 2951 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007263257665372215, + "loss": 0.3936, + "step": 2952 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007261447373642833, + "loss": 0.4655, + "step": 2953 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007259636709153905, + "loss": 0.4271, + "step": 2954 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007257825672203886, + "loss": 0.5604, + "step": 2955 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007256014263091295, + "loss": 0.4402, + "step": 2956 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007254202482114709, + "loss": 0.6085, + "step": 2957 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007252390329572769, + "loss": 0.4485, + "step": 2958 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007250577805764177, + "loss": 0.5083, + "step": 2959 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007248764910987693, + "loss": 0.4546, + "step": 2960 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007246951645542142, + "loss": 0.4487, + "step": 2961 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007245138009726411, + "loss": 0.5927, + "step": 2962 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007243324003839443, + "loss": 0.4102, + "step": 2963 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007241509628180244, + "loss": 0.5868, + "step": 2964 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007239694883047883, + "loss": 0.4366, + "step": 2965 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007237879768741489, + "loss": 0.509, + "step": 2966 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007236064285560252, + "loss": 0.4434, + "step": 2967 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007234248433803421, + "loss": 0.3858, + "step": 2968 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007232432213770307, + "loss": 0.5615, + "step": 2969 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007230615625760282, + "loss": 0.4257, + "step": 2970 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007228798670072778, + "loss": 0.5331, + "step": 2971 + }, + { + "epoch": 0.37, + "learning_rate": 0.000722698134700729, + "loss": 0.424, + "step": 2972 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007225163656863369, + "loss": 0.457, + "step": 2973 + }, + { + "epoch": 0.37, + "learning_rate": 0.000722334559994063, + "loss": 0.4912, + "step": 2974 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007221527176538746, + "loss": 0.5337, + "step": 2975 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007219708386957453, + "loss": 0.3722, + "step": 2976 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007217889231496548, + "loss": 0.5018, + "step": 2977 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007216069710455886, + "loss": 0.4869, + "step": 2978 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007214249824135379, + "loss": 0.4805, + "step": 2979 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007212429572835007, + "loss": 0.5593, + "step": 2980 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007210608956854807, + "loss": 0.5233, + "step": 2981 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007208787976494871, + "loss": 0.5228, + "step": 2982 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007206966632055358, + "loss": 0.6562, + "step": 2983 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007205144923836485, + "loss": 0.4844, + "step": 2984 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007203322852138528, + "loss": 0.6224, + "step": 2985 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007201500417261822, + "loss": 0.5081, + "step": 2986 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007199677619506763, + "loss": 0.4249, + "step": 2987 + }, + { + "epoch": 0.37, + "learning_rate": 0.000719785445917381, + "loss": 0.4216, + "step": 2988 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007196030936563474, + "loss": 0.4623, + "step": 2989 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007194207051976334, + "loss": 0.4866, + "step": 2990 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007192382805713024, + "loss": 0.4211, + "step": 2991 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007190558198074239, + "loss": 0.4901, + "step": 2992 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007188733229360731, + "loss": 0.4969, + "step": 2993 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007186907899873319, + "loss": 0.451, + "step": 2994 + }, + { + "epoch": 0.38, + "learning_rate": 0.000718508220991287, + "loss": 0.4938, + "step": 2995 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007183256159780321, + "loss": 0.57, + "step": 2996 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007181429749776662, + "loss": 0.5403, + "step": 2997 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007179602980202942, + "loss": 0.4166, + "step": 2998 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007177775851360278, + "loss": 0.4082, + "step": 2999 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007175948363549835, + "loss": 0.452, + "step": 3000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007174120517072845, + "loss": 0.4797, + "step": 3001 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007172292312230593, + "loss": 0.517, + "step": 3002 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007170463749324428, + "loss": 0.389, + "step": 3003 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007168634828655755, + "loss": 0.4304, + "step": 3004 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007166805550526041, + "loss": 0.3803, + "step": 3005 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007164975915236808, + "loss": 0.4308, + "step": 3006 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007163145923089642, + "loss": 0.4821, + "step": 3007 + }, + { + "epoch": 0.38, + "learning_rate": 0.000716131557438618, + "loss": 0.4645, + "step": 3008 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007159484869428128, + "loss": 0.5171, + "step": 3009 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007157653808517241, + "loss": 0.4208, + "step": 3010 + }, + { + "epoch": 0.38, + "learning_rate": 0.000715582239195534, + "loss": 0.4418, + "step": 3011 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007153990620044299, + "loss": 0.4708, + "step": 3012 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007152158493086056, + "loss": 0.4355, + "step": 3013 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007150326011382603, + "loss": 0.4524, + "step": 3014 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007148493175235991, + "loss": 0.3883, + "step": 3015 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007146659984948332, + "loss": 0.4634, + "step": 3016 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007144826440821795, + "loss": 0.3933, + "step": 3017 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007142992543158609, + "loss": 0.4238, + "step": 3018 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007141158292261055, + "loss": 0.3866, + "step": 3019 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007139323688431481, + "loss": 0.6895, + "step": 3020 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007137488731972286, + "loss": 0.4613, + "step": 3021 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007135653423185933, + "loss": 0.4418, + "step": 3022 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007133817762374937, + "loss": 0.0647, + "step": 3023 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007131981749841876, + "loss": 0.563, + "step": 3024 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007130145385889385, + "loss": 0.4338, + "step": 3025 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007128308670820154, + "loss": 0.5199, + "step": 3026 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007126471604936933, + "loss": 0.4158, + "step": 3027 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007124634188542531, + "loss": 0.491, + "step": 3028 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007122796421939815, + "loss": 0.4371, + "step": 3029 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007120958305431705, + "loss": 0.5585, + "step": 3030 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007119119839321185, + "loss": 0.3568, + "step": 3031 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007117281023911289, + "loss": 0.4248, + "step": 3032 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007115441859505119, + "loss": 0.4264, + "step": 3033 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007113602346405824, + "loss": 0.4564, + "step": 3034 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007111762484916619, + "loss": 0.4249, + "step": 3035 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007109922275340769, + "loss": 0.5458, + "step": 3036 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007108081717981603, + "loss": 0.0664, + "step": 3037 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007106240813142501, + "loss": 0.484, + "step": 3038 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007104399561126906, + "loss": 0.3655, + "step": 3039 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007102557962238317, + "loss": 0.5452, + "step": 3040 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007100716016780284, + "loss": 0.4309, + "step": 3041 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007098873725056424, + "loss": 0.4221, + "step": 3042 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007097031087370403, + "loss": 0.6619, + "step": 3043 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007095188104025951, + "loss": 0.4347, + "step": 3044 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007093344775326847, + "loss": 0.4963, + "step": 3045 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007091501101576934, + "loss": 0.6273, + "step": 3046 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007089657083080106, + "loss": 0.4067, + "step": 3047 + }, + { + "epoch": 0.38, + "learning_rate": 0.000708781272014032, + "loss": 0.4749, + "step": 3048 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007085968013061584, + "loss": 0.4548, + "step": 3049 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007084122962147968, + "loss": 0.4039, + "step": 3050 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007082277567703593, + "loss": 0.4038, + "step": 3051 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007080431830032641, + "loss": 0.4956, + "step": 3052 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007078585749439349, + "loss": 0.0614, + "step": 3053 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007076739326228012, + "loss": 0.4692, + "step": 3054 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007074892560702979, + "loss": 0.5027, + "step": 3055 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007073045453168655, + "loss": 0.4273, + "step": 3056 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007071198003929507, + "loss": 0.496, + "step": 3057 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007069350213290049, + "loss": 0.558, + "step": 3058 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007067502081554861, + "loss": 0.0607, + "step": 3059 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007065653609028573, + "loss": 0.6002, + "step": 3060 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007063804796015872, + "loss": 0.4146, + "step": 3061 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007061955642821505, + "loss": 0.4324, + "step": 3062 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007060106149750269, + "loss": 0.4561, + "step": 3063 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007058256317107023, + "loss": 0.4623, + "step": 3064 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007056406145196677, + "loss": 0.4579, + "step": 3065 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007054555634324199, + "loss": 0.4478, + "step": 3066 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007052704784794614, + "loss": 0.3896, + "step": 3067 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007050853596913, + "loss": 0.5198, + "step": 3068 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007049002070984497, + "loss": 0.4922, + "step": 3069 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007047150207314292, + "loss": 0.5226, + "step": 3070 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007045298006207632, + "loss": 0.5743, + "step": 3071 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007043445467969822, + "loss": 0.0588, + "step": 3072 + }, + { + "epoch": 0.39, + "learning_rate": 0.000704159259290622, + "loss": 0.5768, + "step": 3073 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007039739381322237, + "loss": 0.5278, + "step": 3074 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007037885833523343, + "loss": 0.457, + "step": 3075 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007036031949815063, + "loss": 0.475, + "step": 3076 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007034177730502978, + "loss": 0.4316, + "step": 3077 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007032323175892723, + "loss": 0.4653, + "step": 3078 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007030468286289986, + "loss": 0.5875, + "step": 3079 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007028613062000516, + "loss": 0.4089, + "step": 3080 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007026757503330112, + "loss": 0.3584, + "step": 3081 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007024901610584628, + "loss": 0.4741, + "step": 3082 + }, + { + "epoch": 0.39, + "learning_rate": 0.000702304538406998, + "loss": 0.512, + "step": 3083 + }, + { + "epoch": 0.39, + "learning_rate": 0.000702118882409213, + "loss": 0.4185, + "step": 3084 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007019331930957101, + "loss": 0.5021, + "step": 3085 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007017474704970968, + "loss": 0.4263, + "step": 3086 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007015617146439862, + "loss": 0.4667, + "step": 3087 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007013759255669969, + "loss": 0.4016, + "step": 3088 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007011901032967529, + "loss": 0.4949, + "step": 3089 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007010042478638836, + "loss": 0.4998, + "step": 3090 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007008183592990243, + "loss": 0.3802, + "step": 3091 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007006324376328152, + "loss": 0.4523, + "step": 3092 + }, + { + "epoch": 0.39, + "learning_rate": 0.000700446482895902, + "loss": 0.4484, + "step": 3093 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007002604951189366, + "loss": 0.5211, + "step": 3094 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007000744743325755, + "loss": 0.5142, + "step": 3095 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006998884205674805, + "loss": 0.5273, + "step": 3096 + }, + { + "epoch": 0.39, + "learning_rate": 0.00069970233385432, + "loss": 0.462, + "step": 3097 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006995162142237667, + "loss": 0.4072, + "step": 3098 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006993300617064992, + "loss": 0.4434, + "step": 3099 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006991438763332016, + "loss": 0.4266, + "step": 3100 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006989576581345628, + "loss": 0.4938, + "step": 3101 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006987714071412781, + "loss": 0.5288, + "step": 3102 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006985851233840473, + "loss": 0.5229, + "step": 3103 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006983988068935761, + "loss": 0.5262, + "step": 3104 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006982124577005755, + "loss": 0.0571, + "step": 3105 + }, + { + "epoch": 0.39, + "learning_rate": 0.000698026075835762, + "loss": 0.4374, + "step": 3106 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006978396613298571, + "loss": 0.4222, + "step": 3107 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006976532142135881, + "loss": 0.4713, + "step": 3108 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006974667345176874, + "loss": 0.5461, + "step": 3109 + }, + { + "epoch": 0.39, + "learning_rate": 0.000697280222272893, + "loss": 0.4624, + "step": 3110 + }, + { + "epoch": 0.39, + "learning_rate": 0.000697093677509948, + "loss": 0.552, + "step": 3111 + }, + { + "epoch": 0.39, + "learning_rate": 0.000696907100259601, + "loss": 0.5293, + "step": 3112 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006967204905526062, + "loss": 0.4618, + "step": 3113 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006965338484197225, + "loss": 0.5011, + "step": 3114 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006963471738917149, + "loss": 0.4402, + "step": 3115 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006961604669993533, + "loss": 0.5005, + "step": 3116 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006959737277734129, + "loss": 0.5334, + "step": 3117 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006957869562446746, + "loss": 0.4117, + "step": 3118 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006956001524439239, + "loss": 0.3373, + "step": 3119 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006954133164019528, + "loss": 0.4508, + "step": 3120 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006952264481495574, + "loss": 0.4144, + "step": 3121 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006950395477175398, + "loss": 0.5114, + "step": 3122 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006948526151367069, + "loss": 0.5612, + "step": 3123 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006946656504378718, + "loss": 0.3828, + "step": 3124 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006944786536518519, + "loss": 0.3892, + "step": 3125 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006942916248094704, + "loss": 0.4706, + "step": 3126 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006941045639415557, + "loss": 0.5703, + "step": 3127 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006939174710789416, + "loss": 0.6202, + "step": 3128 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006937303462524668, + "loss": 0.0562, + "step": 3129 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006935431894929758, + "loss": 0.5375, + "step": 3130 + }, + { + "epoch": 0.39, + "learning_rate": 0.000693356000831318, + "loss": 0.3851, + "step": 3131 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006931687802983478, + "loss": 0.4719, + "step": 3132 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006929815279249256, + "loss": 0.4789, + "step": 3133 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006927942437419165, + "loss": 0.5212, + "step": 3134 + }, + { + "epoch": 0.39, + "learning_rate": 0.000692606927780191, + "loss": 0.4987, + "step": 3135 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006924195800706247, + "loss": 0.5281, + "step": 3136 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006922322006440986, + "loss": 0.4845, + "step": 3137 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006920447895314989, + "loss": 0.4381, + "step": 3138 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006918573467637173, + "loss": 0.0556, + "step": 3139 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006916698723716499, + "loss": 0.5198, + "step": 3140 + }, + { + "epoch": 0.39, + "learning_rate": 0.000691482366386199, + "loss": 0.4727, + "step": 3141 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006912948288382712, + "loss": 0.4723, + "step": 3142 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006911072597587791, + "loss": 0.4554, + "step": 3143 + }, + { + "epoch": 0.39, + "learning_rate": 0.00069091965917864, + "loss": 0.4575, + "step": 3144 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006907320271287763, + "loss": 0.4442, + "step": 3145 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006905443636401162, + "loss": 0.4043, + "step": 3146 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006903566687435927, + "loss": 0.5183, + "step": 3147 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006901689424701436, + "loss": 0.4562, + "step": 3148 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006899811848507126, + "loss": 0.4678, + "step": 3149 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006897933959162482, + "loss": 0.4432, + "step": 3150 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006896055756977038, + "loss": 0.4412, + "step": 3151 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006894177242260385, + "loss": 0.3993, + "step": 3152 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006892298415322164, + "loss": 0.4595, + "step": 3153 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006890419276472062, + "loss": 0.391, + "step": 3154 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006888539826019824, + "loss": 0.4368, + "step": 3155 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006886660064275245, + "loss": 0.4822, + "step": 3156 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006884779991548171, + "loss": 0.4257, + "step": 3157 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006882899608148496, + "loss": 0.462, + "step": 3158 + }, + { + "epoch": 0.4, + "learning_rate": 0.000688101891438617, + "loss": 0.3861, + "step": 3159 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006879137910571191, + "loss": 0.4694, + "step": 3160 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006877256597013611, + "loss": 0.4146, + "step": 3161 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006875374974023527, + "loss": 0.588, + "step": 3162 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006873493041911097, + "loss": 0.5205, + "step": 3163 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006871610800986518, + "loss": 0.5575, + "step": 3164 + }, + { + "epoch": 0.4, + "learning_rate": 0.000686972825156005, + "loss": 0.4141, + "step": 3165 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006867845393941994, + "loss": 0.5618, + "step": 3166 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006865962228442707, + "loss": 0.4912, + "step": 3167 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006864078755372595, + "loss": 0.5521, + "step": 3168 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006862194975042116, + "loss": 0.4097, + "step": 3169 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006860310887761778, + "loss": 0.4249, + "step": 3170 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006858426493842138, + "loss": 0.6145, + "step": 3171 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006856541793593807, + "loss": 0.4894, + "step": 3172 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006854656787327443, + "loss": 0.4871, + "step": 3173 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006852771475353754, + "loss": 0.4843, + "step": 3174 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006850885857983504, + "loss": 0.4005, + "step": 3175 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006848999935527503, + "loss": 0.574, + "step": 3176 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006847113708296609, + "loss": 0.5104, + "step": 3177 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006845227176601736, + "loss": 0.4404, + "step": 3178 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006843340340753844, + "loss": 0.4087, + "step": 3179 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006841453201063946, + "loss": 0.4865, + "step": 3180 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006839565757843101, + "loss": 0.465, + "step": 3181 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006837678011402424, + "loss": 0.4873, + "step": 3182 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006835789962053073, + "loss": 0.506, + "step": 3183 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006833901610106261, + "loss": 0.483, + "step": 3184 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006832012955873252, + "loss": 0.4149, + "step": 3185 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006830123999665351, + "loss": 0.6279, + "step": 3186 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006828234741793926, + "loss": 0.4915, + "step": 3187 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006826345182570383, + "loss": 0.6515, + "step": 3188 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006824455322306184, + "loss": 0.4299, + "step": 3189 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006822565161312838, + "loss": 0.5299, + "step": 3190 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006820674699901905, + "loss": 0.4338, + "step": 3191 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006818783938384996, + "loss": 0.3982, + "step": 3192 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006816892877073767, + "loss": 0.511, + "step": 3193 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006815001516279926, + "loss": 0.4132, + "step": 3194 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006813109856315233, + "loss": 0.0558, + "step": 3195 + }, + { + "epoch": 0.4, + "learning_rate": 0.000681121789749149, + "loss": 0.5714, + "step": 3196 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006809325640120559, + "loss": 0.4336, + "step": 3197 + }, + { + "epoch": 0.4, + "learning_rate": 0.000680743308451434, + "loss": 0.5911, + "step": 3198 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006805540230984791, + "loss": 0.3863, + "step": 3199 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006803647079843911, + "loss": 0.5356, + "step": 3200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006801753631403757, + "loss": 0.5092, + "step": 3201 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006799859885976427, + "loss": 0.5062, + "step": 3202 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006797965843874075, + "loss": 0.4625, + "step": 3203 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006796071505408896, + "loss": 0.0556, + "step": 3204 + }, + { + "epoch": 0.4, + "learning_rate": 0.000679417687089314, + "loss": 0.4316, + "step": 3205 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006792281940639107, + "loss": 0.5145, + "step": 3206 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006790386714959136, + "loss": 0.4667, + "step": 3207 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006788491194165629, + "loss": 0.428, + "step": 3208 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006786595378571022, + "loss": 0.5833, + "step": 3209 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006784699268487812, + "loss": 0.4137, + "step": 3210 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006782802864228536, + "loss": 0.4517, + "step": 3211 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006780906166105785, + "loss": 0.3928, + "step": 3212 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006779009174432193, + "loss": 0.5813, + "step": 3213 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006777111889520448, + "loss": 0.4353, + "step": 3214 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006775214311683284, + "loss": 0.6316, + "step": 3215 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006773316441233482, + "loss": 0.5619, + "step": 3216 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006771418278483871, + "loss": 0.5918, + "step": 3217 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006769519823747332, + "loss": 0.5134, + "step": 3218 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006767621077336791, + "loss": 0.5082, + "step": 3219 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006765722039565222, + "loss": 0.504, + "step": 3220 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006763822710745648, + "loss": 0.4537, + "step": 3221 + }, + { + "epoch": 0.4, + "learning_rate": 0.000676192309119114, + "loss": 0.4485, + "step": 3222 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006760023181214817, + "loss": 0.4778, + "step": 3223 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006758122981129845, + "loss": 0.4108, + "step": 3224 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006756222491249437, + "loss": 0.408, + "step": 3225 + }, + { + "epoch": 0.4, + "learning_rate": 0.000675432171188686, + "loss": 0.4897, + "step": 3226 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006752420643355417, + "loss": 0.4708, + "step": 3227 + }, + { + "epoch": 0.4, + "learning_rate": 0.000675051928596847, + "loss": 0.4313, + "step": 3228 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006748617640039422, + "loss": 0.5137, + "step": 3229 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006746715705881728, + "loss": 0.4984, + "step": 3230 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006744813483808884, + "loss": 0.4425, + "step": 3231 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006742910974134442, + "loss": 0.3873, + "step": 3232 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006741008177171995, + "loss": 0.5165, + "step": 3233 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006739105093235185, + "loss": 0.4704, + "step": 3234 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006737201722637702, + "loss": 0.4056, + "step": 3235 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006735298065693285, + "loss": 0.4908, + "step": 3236 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006733394122715716, + "loss": 0.3599, + "step": 3237 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006731489894018826, + "loss": 0.4841, + "step": 3238 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006729585379916494, + "loss": 0.4446, + "step": 3239 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006727680580722645, + "loss": 0.5751, + "step": 3240 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006725775496751255, + "loss": 0.4424, + "step": 3241 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006723870128316336, + "loss": 0.4658, + "step": 3242 + }, + { + "epoch": 0.41, + "learning_rate": 0.000672196447573196, + "loss": 0.3973, + "step": 3243 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006720058539312238, + "loss": 0.3688, + "step": 3244 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006718152319371332, + "loss": 0.4988, + "step": 3245 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006716245816223446, + "loss": 0.3846, + "step": 3246 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006714339030182836, + "loss": 0.5234, + "step": 3247 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006712431961563798, + "loss": 0.51, + "step": 3248 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006710524610680682, + "loss": 0.4524, + "step": 3249 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006708616977847879, + "loss": 0.4095, + "step": 3250 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006706709063379829, + "loss": 0.3937, + "step": 3251 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006704800867591018, + "loss": 0.3577, + "step": 3252 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006702892390795978, + "loss": 0.551, + "step": 3253 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006700983633309289, + "loss": 0.3654, + "step": 3254 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006699074595445574, + "loss": 0.4204, + "step": 3255 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006697165277519505, + "loss": 0.4668, + "step": 3256 + }, + { + "epoch": 0.41, + "learning_rate": 0.00066952556798458, + "loss": 0.4847, + "step": 3257 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006693345802739222, + "loss": 0.4257, + "step": 3258 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006691435646514578, + "loss": 0.5436, + "step": 3259 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006689525211486726, + "loss": 0.4122, + "step": 3260 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006687614497970566, + "loss": 0.5002, + "step": 3261 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006685703506281046, + "loss": 0.4246, + "step": 3262 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006683792236733162, + "loss": 0.4182, + "step": 3263 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006681880689641947, + "loss": 0.432, + "step": 3264 + }, + { + "epoch": 0.41, + "learning_rate": 0.000667996886532249, + "loss": 0.4385, + "step": 3265 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006678056764089919, + "loss": 0.5302, + "step": 3266 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006676144386259413, + "loss": 0.5336, + "step": 3267 + }, + { + "epoch": 0.41, + "learning_rate": 0.000667423173214619, + "loss": 0.3923, + "step": 3268 + }, + { + "epoch": 0.41, + "learning_rate": 0.000667231880206552, + "loss": 0.4578, + "step": 3269 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006670405596332715, + "loss": 0.4417, + "step": 3270 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006668492115263131, + "loss": 0.457, + "step": 3271 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006666578359172175, + "loss": 0.4561, + "step": 3272 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006664664328375292, + "loss": 0.4543, + "step": 3273 + }, + { + "epoch": 0.41, + "learning_rate": 0.000666275002318798, + "loss": 0.5151, + "step": 3274 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006660835443925775, + "loss": 0.5303, + "step": 3275 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006658920590904262, + "loss": 0.4299, + "step": 3276 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006657005464439072, + "loss": 0.4343, + "step": 3277 + }, + { + "epoch": 0.41, + "learning_rate": 0.000665509006484588, + "loss": 0.5367, + "step": 3278 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006653174392440404, + "loss": 0.055, + "step": 3279 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006651258447538409, + "loss": 0.3594, + "step": 3280 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006649342230455706, + "loss": 0.3958, + "step": 3281 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006647425741508146, + "loss": 0.5575, + "step": 3282 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006645508981011633, + "loss": 0.473, + "step": 3283 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006643591949282108, + "loss": 0.4332, + "step": 3284 + }, + { + "epoch": 0.41, + "learning_rate": 0.000664167464663556, + "loss": 0.3605, + "step": 3285 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006639757073388025, + "loss": 0.5111, + "step": 3286 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006637839229855576, + "loss": 0.5255, + "step": 3287 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006635921116354339, + "loss": 0.413, + "step": 3288 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006634002733200482, + "loss": 0.467, + "step": 3289 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006632084080710213, + "loss": 0.493, + "step": 3290 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006630165159199793, + "loss": 0.3702, + "step": 3291 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006628245968985515, + "loss": 0.5427, + "step": 3292 + }, + { + "epoch": 0.41, + "learning_rate": 0.000662632651038373, + "loss": 0.6132, + "step": 3293 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006624406783710823, + "loss": 0.5447, + "step": 3294 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006622486789283229, + "loss": 0.45, + "step": 3295 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006620566527417423, + "loss": 0.4304, + "step": 3296 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006618645998429928, + "loss": 0.5883, + "step": 3297 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006616725202637308, + "loss": 0.4528, + "step": 3298 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006614804140356175, + "loss": 0.4027, + "step": 3299 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006612882811903178, + "loss": 0.3896, + "step": 3300 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006610961217595016, + "loss": 0.4103, + "step": 3301 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006609039357748429, + "loss": 0.4773, + "step": 3302 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006607117232680205, + "loss": 0.5546, + "step": 3303 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006605194842707168, + "loss": 0.5343, + "step": 3304 + }, + { + "epoch": 0.41, + "learning_rate": 0.000660327218814619, + "loss": 0.4438, + "step": 3305 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006601349269314187, + "loss": 0.4945, + "step": 3306 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006599426086528122, + "loss": 0.438, + "step": 3307 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006597502640104993, + "loss": 0.4628, + "step": 3308 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006595578930361849, + "loss": 0.4451, + "step": 3309 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006593654957615778, + "loss": 0.516, + "step": 3310 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006591730722183913, + "loss": 0.4183, + "step": 3311 + }, + { + "epoch": 0.42, + "learning_rate": 0.000658980622438343, + "loss": 0.479, + "step": 3312 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006587881464531547, + "loss": 0.5314, + "step": 3313 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006585956442945531, + "loss": 0.5042, + "step": 3314 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006584031159942682, + "loss": 0.0543, + "step": 3315 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006582105615840355, + "loss": 0.4598, + "step": 3316 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006580179810955935, + "loss": 0.5111, + "step": 3317 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006578253745606863, + "loss": 0.4779, + "step": 3318 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006576327420110612, + "loss": 0.4559, + "step": 3319 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006574400834784706, + "loss": 0.4376, + "step": 3320 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006572473989946708, + "loss": 0.4218, + "step": 3321 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006570546885914222, + "loss": 0.4483, + "step": 3322 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006568619523004898, + "loss": 0.3916, + "step": 3323 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006566691901536427, + "loss": 0.4207, + "step": 3324 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006564764021826548, + "loss": 0.5319, + "step": 3325 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006562835884193032, + "loss": 0.4363, + "step": 3326 + }, + { + "epoch": 0.42, + "learning_rate": 0.00065609074889537, + "loss": 0.4553, + "step": 3327 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006558978836426415, + "loss": 0.4523, + "step": 3328 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006557049926929081, + "loss": 0.5221, + "step": 3329 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006555120760779644, + "loss": 0.3722, + "step": 3330 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006553191338296092, + "loss": 0.4218, + "step": 3331 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006551261659796457, + "loss": 0.3973, + "step": 3332 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006549331725598813, + "loss": 0.5036, + "step": 3333 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006547401536021276, + "loss": 0.4535, + "step": 3334 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006545471091382, + "loss": 0.5526, + "step": 3335 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006543540391999189, + "loss": 0.4808, + "step": 3336 + }, + { + "epoch": 0.42, + "learning_rate": 0.000654160943819108, + "loss": 0.4221, + "step": 3337 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006539678230275961, + "loss": 0.4384, + "step": 3338 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006537746768572155, + "loss": 0.5581, + "step": 3339 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006535815053398031, + "loss": 0.5282, + "step": 3340 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006533883085071997, + "loss": 0.0534, + "step": 3341 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006531950863912501, + "loss": 0.4745, + "step": 3342 + }, + { + "epoch": 0.42, + "learning_rate": 0.000653001839023804, + "loss": 0.5499, + "step": 3343 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006528085664367146, + "loss": 0.0529, + "step": 3344 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006526152686618394, + "loss": 0.4709, + "step": 3345 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006524219457310403, + "loss": 0.4543, + "step": 3346 + }, + { + "epoch": 0.42, + "learning_rate": 0.000652228597676183, + "loss": 0.4661, + "step": 3347 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006520352245291375, + "loss": 0.0523, + "step": 3348 + }, + { + "epoch": 0.42, + "learning_rate": 0.000651841826321778, + "loss": 0.4612, + "step": 3349 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006516484030859828, + "loss": 0.4093, + "step": 3350 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006514549548536343, + "loss": 0.4061, + "step": 3351 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006512614816566189, + "loss": 0.481, + "step": 3352 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006510679835268273, + "loss": 0.5049, + "step": 3353 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006508744604961543, + "loss": 0.4059, + "step": 3354 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006506809125964985, + "loss": 0.4357, + "step": 3355 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006504873398597633, + "loss": 0.4073, + "step": 3356 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006502937423178552, + "loss": 0.4473, + "step": 3357 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006501001200026858, + "loss": 0.4058, + "step": 3358 + }, + { + "epoch": 0.42, + "learning_rate": 0.00064990647294617, + "loss": 0.4495, + "step": 3359 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006497128011802272, + "loss": 0.4862, + "step": 3360 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006495191047367806, + "loss": 0.5468, + "step": 3361 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006493253836477581, + "loss": 0.4084, + "step": 3362 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006491316379450906, + "loss": 0.4973, + "step": 3363 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006489378676607139, + "loss": 0.446, + "step": 3364 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006487440728265677, + "loss": 0.4978, + "step": 3365 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006485502534745956, + "loss": 0.4411, + "step": 3366 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006483564096367451, + "loss": 0.5183, + "step": 3367 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006481625413449681, + "loss": 0.4491, + "step": 3368 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006479686486312205, + "loss": 0.4376, + "step": 3369 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006477747315274618, + "loss": 0.4346, + "step": 3370 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006475807900656558, + "loss": 0.4584, + "step": 3371 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006473868242777705, + "loss": 0.5624, + "step": 3372 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006471928341957777, + "loss": 0.4204, + "step": 3373 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006469988198516531, + "loss": 0.4426, + "step": 3374 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006468047812773767, + "loss": 0.4198, + "step": 3375 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006466107185049321, + "loss": 0.5123, + "step": 3376 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006464166315663074, + "loss": 0.4422, + "step": 3377 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006462225204934939, + "loss": 0.4185, + "step": 3378 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006460283853184879, + "loss": 0.53, + "step": 3379 + }, + { + "epoch": 0.42, + "learning_rate": 0.000645834226073289, + "loss": 0.5784, + "step": 3380 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006456400427899008, + "loss": 0.5787, + "step": 3381 + }, + { + "epoch": 0.42, + "learning_rate": 0.000645445835500331, + "loss": 0.4372, + "step": 3382 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006452516042365912, + "loss": 0.4686, + "step": 3383 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006450573490306972, + "loss": 0.4843, + "step": 3384 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006448630699146683, + "loss": 0.4857, + "step": 3385 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006446687669205281, + "loss": 0.4459, + "step": 3386 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006444744400803039, + "loss": 0.0521, + "step": 3387 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006442800894260272, + "loss": 0.4358, + "step": 3388 + }, + { + "epoch": 0.42, + "learning_rate": 0.000644085714989733, + "loss": 0.4929, + "step": 3389 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006438913168034607, + "loss": 0.4601, + "step": 3390 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006436968948992535, + "loss": 0.3993, + "step": 3391 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006435024493091579, + "loss": 0.49, + "step": 3392 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006433079800652253, + "loss": 0.4543, + "step": 3393 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006431134871995105, + "loss": 0.0515, + "step": 3394 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006429189707440721, + "loss": 0.4369, + "step": 3395 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006427244307309725, + "loss": 0.3679, + "step": 3396 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006425298671922785, + "loss": 0.4846, + "step": 3397 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006423352801600601, + "loss": 0.5635, + "step": 3398 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006421406696663918, + "loss": 0.4667, + "step": 3399 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006419460357433518, + "loss": 0.5291, + "step": 3400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006417513784230215, + "loss": 0.4637, + "step": 3401 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006415566977374874, + "loss": 0.5769, + "step": 3402 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006413619937188387, + "loss": 0.5442, + "step": 3403 + }, + { + "epoch": 0.43, + "learning_rate": 0.000641167266399169, + "loss": 0.4349, + "step": 3404 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006409725158105757, + "loss": 0.4397, + "step": 3405 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006407777419851602, + "loss": 0.3615, + "step": 3406 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006405829449550271, + "loss": 0.5513, + "step": 3407 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006403881247522855, + "loss": 0.4985, + "step": 3408 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006401932814090481, + "loss": 0.4934, + "step": 3409 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006399984149574311, + "loss": 0.4221, + "step": 3410 + }, + { + "epoch": 0.43, + "learning_rate": 0.000639803525429555, + "loss": 0.6077, + "step": 3411 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006396086128575437, + "loss": 0.515, + "step": 3412 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006394136772735256, + "loss": 0.4962, + "step": 3413 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006392187187096317, + "loss": 0.4609, + "step": 3414 + }, + { + "epoch": 0.43, + "learning_rate": 0.000639023737197998, + "loss": 0.4026, + "step": 3415 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006388287327707635, + "loss": 0.4151, + "step": 3416 + }, + { + "epoch": 0.43, + "learning_rate": 0.000638633705460071, + "loss": 0.5134, + "step": 3417 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006384386552980678, + "loss": 0.5349, + "step": 3418 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006382435823169041, + "loss": 0.4504, + "step": 3419 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006380484865487346, + "loss": 0.478, + "step": 3420 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006378533680257169, + "loss": 0.4412, + "step": 3421 + }, + { + "epoch": 0.43, + "learning_rate": 0.000637658226780013, + "loss": 0.4875, + "step": 3422 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006374630628437886, + "loss": 0.4348, + "step": 3423 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006372678762492129, + "loss": 0.4484, + "step": 3424 + }, + { + "epoch": 0.43, + "learning_rate": 0.000637072667028459, + "loss": 0.3596, + "step": 3425 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006368774352137037, + "loss": 0.4462, + "step": 3426 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006366821808371276, + "loss": 0.541, + "step": 3427 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006364869039309147, + "loss": 0.4268, + "step": 3428 + }, + { + "epoch": 0.43, + "learning_rate": 0.000636291604527253, + "loss": 0.4573, + "step": 3429 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006360962826583343, + "loss": 0.5615, + "step": 3430 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006359009383563539, + "loss": 0.4586, + "step": 3431 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006357055716535107, + "loss": 0.0517, + "step": 3432 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006355101825820075, + "loss": 0.5156, + "step": 3433 + }, + { + "epoch": 0.43, + "learning_rate": 0.000635314771174051, + "loss": 0.4406, + "step": 3434 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006351193374618511, + "loss": 0.4038, + "step": 3435 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006349238814776213, + "loss": 0.476, + "step": 3436 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006347284032535794, + "loss": 0.4114, + "step": 3437 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006345329028219467, + "loss": 0.3976, + "step": 3438 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006343373802149474, + "loss": 0.4379, + "step": 3439 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006341418354648106, + "loss": 0.4293, + "step": 3440 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006339462686037677, + "loss": 0.5557, + "step": 3441 + }, + { + "epoch": 0.43, + "learning_rate": 0.000633750679664055, + "loss": 0.4261, + "step": 3442 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006335550686779117, + "loss": 0.4159, + "step": 3443 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006333594356775807, + "loss": 0.3489, + "step": 3444 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006331637806953086, + "loss": 0.4963, + "step": 3445 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006329681037633459, + "loss": 0.4792, + "step": 3446 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006327724049139464, + "loss": 0.4897, + "step": 3447 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006325766841793674, + "loss": 0.4657, + "step": 3448 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006323809415918702, + "loss": 0.494, + "step": 3449 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006321851771837194, + "loss": 0.4066, + "step": 3450 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006319893909871833, + "loss": 0.5048, + "step": 3451 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006317935830345338, + "loss": 0.4167, + "step": 3452 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006315977533580464, + "loss": 0.4957, + "step": 3453 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006314019019900002, + "loss": 0.5038, + "step": 3454 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006312060289626778, + "loss": 0.3842, + "step": 3455 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006310101343083652, + "loss": 0.5164, + "step": 3456 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006308142180593524, + "loss": 0.4259, + "step": 3457 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006306182802479328, + "loss": 0.0515, + "step": 3458 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006304223209064029, + "loss": 0.5018, + "step": 3459 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006302263400670636, + "loss": 0.5249, + "step": 3460 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006300303377622184, + "loss": 0.3705, + "step": 3461 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006298343140241753, + "loss": 0.5259, + "step": 3462 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006296382688852448, + "loss": 0.5206, + "step": 3463 + }, + { + "epoch": 0.43, + "learning_rate": 0.000629442202377742, + "loss": 0.3622, + "step": 3464 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006292461145339845, + "loss": 0.4548, + "step": 3465 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006290500053862942, + "loss": 0.3765, + "step": 3466 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006288538749669961, + "loss": 0.052, + "step": 3467 + }, + { + "epoch": 0.43, + "learning_rate": 0.000628657723308419, + "loss": 0.3809, + "step": 3468 + }, + { + "epoch": 0.43, + "learning_rate": 0.000628461550442895, + "loss": 0.4133, + "step": 3469 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006282653564027595, + "loss": 0.4406, + "step": 3470 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006280691412203519, + "loss": 0.3955, + "step": 3471 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006278729049280145, + "loss": 0.4633, + "step": 3472 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006276766475580935, + "loss": 0.4294, + "step": 3473 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006274803691429386, + "loss": 0.4392, + "step": 3474 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006272840697149027, + "loss": 0.4158, + "step": 3475 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006270877493063423, + "loss": 0.0523, + "step": 3476 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006268914079496172, + "loss": 0.4792, + "step": 3477 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006266950456770909, + "loss": 0.4751, + "step": 3478 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006264986625211304, + "loss": 0.3517, + "step": 3479 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006263022585141059, + "loss": 0.4158, + "step": 3480 + }, + { + "epoch": 0.44, + "learning_rate": 0.000626105833688391, + "loss": 0.5149, + "step": 3481 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006259093880763628, + "loss": 0.45, + "step": 3482 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006257129217104023, + "loss": 0.5421, + "step": 3483 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006255164346228929, + "loss": 0.0522, + "step": 3484 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006253199268462224, + "loss": 0.4565, + "step": 3485 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006251233984127816, + "loss": 0.4192, + "step": 3486 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006249268493549647, + "loss": 0.605, + "step": 3487 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006247302797051693, + "loss": 0.0522, + "step": 3488 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006245336894957962, + "loss": 0.384, + "step": 3489 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006243370787592504, + "loss": 0.4036, + "step": 3490 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006241404475279392, + "loss": 0.5714, + "step": 3491 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006239437958342739, + "loss": 0.4623, + "step": 3492 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006237471237106691, + "loss": 0.4395, + "step": 3493 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006235504311895427, + "loss": 0.5295, + "step": 3494 + }, + { + "epoch": 0.44, + "learning_rate": 0.000623353718303316, + "loss": 0.3857, + "step": 3495 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006231569850844134, + "loss": 0.4679, + "step": 3496 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006229602315652634, + "loss": 0.4023, + "step": 3497 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006227634577782967, + "loss": 0.3988, + "step": 3498 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006225666637559485, + "loss": 0.3679, + "step": 3499 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006223698495306565, + "loss": 0.4902, + "step": 3500 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006221730151348624, + "loss": 0.4203, + "step": 3501 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006219761606010104, + "loss": 0.5994, + "step": 3502 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006217792859615487, + "loss": 0.5059, + "step": 3503 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006215823912489285, + "loss": 0.4446, + "step": 3504 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006213854764956047, + "loss": 0.3754, + "step": 3505 + }, + { + "epoch": 0.44, + "learning_rate": 0.000621188541734035, + "loss": 0.4005, + "step": 3506 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006209915869966804, + "loss": 0.4612, + "step": 3507 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006207946123160058, + "loss": 0.5798, + "step": 3508 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006205976177244786, + "loss": 0.4308, + "step": 3509 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006204006032545702, + "loss": 0.5651, + "step": 3510 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006202035689387549, + "loss": 0.4595, + "step": 3511 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006200065148095101, + "loss": 0.4971, + "step": 3512 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006198094408993168, + "loss": 0.4939, + "step": 3513 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006196123472406591, + "loss": 0.3944, + "step": 3514 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006194152338660247, + "loss": 0.3603, + "step": 3515 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006192181008079038, + "loss": 0.4784, + "step": 3516 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006190209480987906, + "loss": 0.6024, + "step": 3517 + }, + { + "epoch": 0.44, + "learning_rate": 0.000618823775771182, + "loss": 0.4645, + "step": 3518 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006186265838575787, + "loss": 0.3779, + "step": 3519 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006184293723904839, + "loss": 0.4036, + "step": 3520 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006182321414024047, + "loss": 0.3548, + "step": 3521 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006180348909258509, + "loss": 0.4364, + "step": 3522 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006178376209933362, + "loss": 0.3812, + "step": 3523 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006176403316373766, + "loss": 0.4648, + "step": 3524 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006174430228904919, + "loss": 0.4833, + "step": 3525 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006172456947852048, + "loss": 0.4031, + "step": 3526 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006170483473540418, + "loss": 0.4552, + "step": 3527 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006168509806295316, + "loss": 0.4247, + "step": 3528 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006166535946442069, + "loss": 0.4506, + "step": 3529 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006164561894306032, + "loss": 0.3901, + "step": 3530 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006162587650212593, + "loss": 0.5596, + "step": 3531 + }, + { + "epoch": 0.44, + "learning_rate": 0.000616061321448717, + "loss": 0.4987, + "step": 3532 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006158638587455215, + "loss": 0.4979, + "step": 3533 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006156663769442211, + "loss": 0.4398, + "step": 3534 + }, + { + "epoch": 0.44, + "learning_rate": 0.000615468876077367, + "loss": 0.4548, + "step": 3535 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006152713561775137, + "loss": 0.5337, + "step": 3536 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006150738172772193, + "loss": 0.5013, + "step": 3537 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006148762594090438, + "loss": 0.4681, + "step": 3538 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006146786826055517, + "loss": 0.4546, + "step": 3539 + }, + { + "epoch": 0.44, + "learning_rate": 0.00061448108689931, + "loss": 0.5042, + "step": 3540 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006142834723228886, + "loss": 0.5051, + "step": 3541 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006140858389088609, + "loss": 0.5253, + "step": 3542 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006138881866898034, + "loss": 0.3909, + "step": 3543 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006136905156982954, + "loss": 0.4989, + "step": 3544 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006134928259669194, + "loss": 0.3978, + "step": 3545 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006132951175282613, + "loss": 0.4673, + "step": 3546 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006130973904149098, + "loss": 0.4227, + "step": 3547 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006128996446594563, + "loss": 0.3995, + "step": 3548 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006127018802944962, + "loss": 0.5176, + "step": 3549 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006125040973526271, + "loss": 0.5421, + "step": 3550 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006123062958664501, + "loss": 0.4076, + "step": 3551 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006121084758685696, + "loss": 0.346, + "step": 3552 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006119106373915922, + "loss": 0.3738, + "step": 3553 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006117127804681284, + "loss": 0.3701, + "step": 3554 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006115149051307913, + "loss": 0.427, + "step": 3555 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006113170114121974, + "loss": 0.4146, + "step": 3556 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006111190993449653, + "loss": 0.3938, + "step": 3557 + }, + { + "epoch": 0.45, + "learning_rate": 0.000610921168961718, + "loss": 0.4789, + "step": 3558 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006107232202950807, + "loss": 0.3907, + "step": 3559 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006105252533776814, + "loss": 0.3909, + "step": 3560 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006103272682421516, + "loss": 0.5017, + "step": 3561 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006101292649211258, + "loss": 0.4868, + "step": 3562 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006099312434472412, + "loss": 0.4462, + "step": 3563 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006097332038531381, + "loss": 0.4572, + "step": 3564 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006095351461714598, + "loss": 0.5438, + "step": 3565 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006093370704348529, + "loss": 0.4484, + "step": 3566 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006091389766759663, + "loss": 0.5358, + "step": 3567 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006089408649274524, + "loss": 0.5078, + "step": 3568 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006087427352219665, + "loss": 0.4672, + "step": 3569 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006085445875921667, + "loss": 0.5255, + "step": 3570 + }, + { + "epoch": 0.45, + "learning_rate": 0.000608346422070714, + "loss": 0.4213, + "step": 3571 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006081482386902726, + "loss": 0.492, + "step": 3572 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006079500374835094, + "loss": 0.0514, + "step": 3573 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006077518184830947, + "loss": 0.5443, + "step": 3574 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006075535817217009, + "loss": 0.4426, + "step": 3575 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006073553272320043, + "loss": 0.5104, + "step": 3576 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006071570550466833, + "loss": 0.42, + "step": 3577 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006069587651984197, + "loss": 0.4501, + "step": 3578 + }, + { + "epoch": 0.45, + "learning_rate": 0.000606760457719898, + "loss": 0.0514, + "step": 3579 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006065621326438058, + "loss": 0.4767, + "step": 3580 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006063637900028334, + "loss": 0.4106, + "step": 3581 + }, + { + "epoch": 0.45, + "learning_rate": 0.000606165429829674, + "loss": 0.4612, + "step": 3582 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006059670521570238, + "loss": 0.4183, + "step": 3583 + }, + { + "epoch": 0.45, + "learning_rate": 0.000605768657017582, + "loss": 0.3916, + "step": 3584 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006055702444440503, + "loss": 0.4816, + "step": 3585 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006053718144691336, + "loss": 0.4684, + "step": 3586 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006051733671255396, + "loss": 0.4429, + "step": 3587 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006049749024459788, + "loss": 0.45, + "step": 3588 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006047764204631646, + "loss": 0.4641, + "step": 3589 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006045779212098132, + "loss": 0.0508, + "step": 3590 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006043794047186437, + "loss": 0.4674, + "step": 3591 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006041808710223782, + "loss": 0.5078, + "step": 3592 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006039823201537411, + "loss": 0.3979, + "step": 3593 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006037837521454602, + "loss": 0.4793, + "step": 3594 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006035851670302659, + "loss": 0.4398, + "step": 3595 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006033865648408915, + "loss": 0.4634, + "step": 3596 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006031879456100729, + "loss": 0.4617, + "step": 3597 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006029893093705492, + "loss": 0.4891, + "step": 3598 + }, + { + "epoch": 0.45, + "learning_rate": 0.000602790656155062, + "loss": 0.5173, + "step": 3599 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006025919859963556, + "loss": 0.4294, + "step": 3600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006023932989271772, + "loss": 0.4783, + "step": 3601 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006021945949802772, + "loss": 0.4617, + "step": 3602 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006019958741884082, + "loss": 0.3911, + "step": 3603 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006017971365843258, + "loss": 0.4591, + "step": 3604 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006015983822007883, + "loss": 0.0503, + "step": 3605 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006013996110705571, + "loss": 0.4471, + "step": 3606 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006012008232263959, + "loss": 0.4021, + "step": 3607 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006010020187010713, + "loss": 0.4592, + "step": 3608 + }, + { + "epoch": 0.45, + "learning_rate": 0.000600803197527353, + "loss": 0.5123, + "step": 3609 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006006043597380127, + "loss": 0.4523, + "step": 3610 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006004055053658258, + "loss": 0.408, + "step": 3611 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006002066344435695, + "loss": 0.4464, + "step": 3612 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006000077470040243, + "loss": 0.465, + "step": 3613 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005998088430799734, + "loss": 0.4557, + "step": 3614 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005996099227042024, + "loss": 0.4939, + "step": 3615 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005994109859094999, + "loss": 0.4608, + "step": 3616 + }, + { + "epoch": 0.45, + "learning_rate": 0.000599212032728657, + "loss": 0.4707, + "step": 3617 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005990130631944677, + "loss": 0.4723, + "step": 3618 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005988140773397286, + "loss": 0.5471, + "step": 3619 + }, + { + "epoch": 0.45, + "learning_rate": 0.000598615075197239, + "loss": 0.0511, + "step": 3620 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005984160567998009, + "loss": 0.4795, + "step": 3621 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005982170221802189, + "loss": 0.4675, + "step": 3622 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005980179713713003, + "loss": 0.5118, + "step": 3623 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005978189044058552, + "loss": 0.4874, + "step": 3624 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005976198213166963, + "loss": 0.5117, + "step": 3625 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005974207221366387, + "loss": 0.5604, + "step": 3626 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005972216068985004, + "loss": 0.4486, + "step": 3627 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005970224756351023, + "loss": 0.0515, + "step": 3628 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005968233283792676, + "loss": 0.528, + "step": 3629 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005966241651638219, + "loss": 0.3485, + "step": 3630 + }, + { + "epoch": 0.46, + "learning_rate": 0.000596424986021594, + "loss": 0.4216, + "step": 3631 + }, + { + "epoch": 0.46, + "learning_rate": 0.000596225790985415, + "loss": 0.5024, + "step": 3632 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005960265800881188, + "loss": 0.4877, + "step": 3633 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005958273533625413, + "loss": 0.4324, + "step": 3634 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005956281108415219, + "loss": 0.4727, + "step": 3635 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005954288525579024, + "loss": 0.4089, + "step": 3636 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005952295785445264, + "loss": 0.4807, + "step": 3637 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005950302888342411, + "loss": 0.5919, + "step": 3638 + }, + { + "epoch": 0.46, + "learning_rate": 0.000594830983459896, + "loss": 0.3964, + "step": 3639 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005946316624543428, + "loss": 0.4207, + "step": 3640 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005944323258504359, + "loss": 0.4889, + "step": 3641 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005942329736810328, + "loss": 0.4288, + "step": 3642 + }, + { + "epoch": 0.46, + "learning_rate": 0.000594033605978993, + "loss": 0.4674, + "step": 3643 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005938342227771785, + "loss": 0.4484, + "step": 3644 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005936348241084544, + "loss": 0.4313, + "step": 3645 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005934354100056882, + "loss": 0.4083, + "step": 3646 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005932359805017494, + "loss": 0.4481, + "step": 3647 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005930365356295103, + "loss": 0.3972, + "step": 3648 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005928370754218463, + "loss": 0.4562, + "step": 3649 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005926375999116348, + "loss": 0.4537, + "step": 3650 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005924381091317555, + "loss": 0.3794, + "step": 3651 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005922386031150912, + "loss": 0.4315, + "step": 3652 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005920390818945269, + "loss": 0.4564, + "step": 3653 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005918395455029501, + "loss": 0.4467, + "step": 3654 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005916399939732507, + "loss": 0.4464, + "step": 3655 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005914404273383215, + "loss": 0.5427, + "step": 3656 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005912408456310573, + "loss": 0.3644, + "step": 3657 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005910412488843558, + "loss": 0.4294, + "step": 3658 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005908416371311167, + "loss": 0.4689, + "step": 3659 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005906420104042427, + "loss": 0.4868, + "step": 3660 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005904423687366387, + "loss": 0.5216, + "step": 3661 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005902427121612122, + "loss": 0.452, + "step": 3662 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005900430407108728, + "loss": 0.4596, + "step": 3663 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005898433544185329, + "loss": 0.4369, + "step": 3664 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005896436533171075, + "loss": 0.4203, + "step": 3665 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005894439374395133, + "loss": 0.4876, + "step": 3666 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005892442068186702, + "loss": 0.464, + "step": 3667 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005890444614875004, + "loss": 0.0515, + "step": 3668 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005888447014789282, + "loss": 0.4662, + "step": 3669 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005886449268258803, + "loss": 0.4216, + "step": 3670 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005884451375612865, + "loss": 0.5159, + "step": 3671 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005882453337180782, + "loss": 0.051, + "step": 3672 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005880455153291895, + "loss": 0.4333, + "step": 3673 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005878456824275571, + "loss": 0.5631, + "step": 3674 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005876458350461198, + "loss": 0.4799, + "step": 3675 + }, + { + "epoch": 0.46, + "learning_rate": 0.000587445973217819, + "loss": 0.4008, + "step": 3676 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005872460969755982, + "loss": 0.4434, + "step": 3677 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005870462063524035, + "loss": 0.3988, + "step": 3678 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005868463013811834, + "loss": 0.4633, + "step": 3679 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005866463820948887, + "loss": 0.3757, + "step": 3680 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005864464485264727, + "loss": 0.3871, + "step": 3681 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005862465007088905, + "loss": 0.0502, + "step": 3682 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005860465386751004, + "loss": 0.4672, + "step": 3683 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005858465624580623, + "loss": 0.4103, + "step": 3684 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005856465720907388, + "loss": 0.4334, + "step": 3685 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005854465676060948, + "loss": 0.424, + "step": 3686 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005852465490370978, + "loss": 0.4795, + "step": 3687 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005850465164167167, + "loss": 0.0499, + "step": 3688 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005848464697779238, + "loss": 0.4666, + "step": 3689 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005846464091536929, + "loss": 0.4233, + "step": 3690 + }, + { + "epoch": 0.46, + "learning_rate": 0.000584446334577001, + "loss": 0.463, + "step": 3691 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005842462460808263, + "loss": 0.4562, + "step": 3692 + }, + { + "epoch": 0.46, + "learning_rate": 0.00058404614369815, + "loss": 0.4438, + "step": 3693 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005838460274619557, + "loss": 0.4418, + "step": 3694 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005836458974052285, + "loss": 0.3728, + "step": 3695 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005834457535609569, + "loss": 0.4492, + "step": 3696 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005832455959621307, + "loss": 0.4243, + "step": 3697 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005830454246417424, + "loss": 0.3816, + "step": 3698 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005828452396327866, + "loss": 0.3615, + "step": 3699 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005826450409682603, + "loss": 0.4357, + "step": 3700 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005824448286811627, + "loss": 0.4347, + "step": 3701 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005822446028044957, + "loss": 0.3944, + "step": 3702 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005820443633712622, + "loss": 0.4084, + "step": 3703 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005818441104144686, + "loss": 0.396, + "step": 3704 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005816438439671231, + "loss": 0.4061, + "step": 3705 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005814435640622359, + "loss": 0.4638, + "step": 3706 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005812432707328195, + "loss": 0.4405, + "step": 3707 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005810429640118889, + "loss": 0.4379, + "step": 3708 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005808426439324612, + "loss": 0.3826, + "step": 3709 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005806423105275553, + "loss": 0.3771, + "step": 3710 + }, + { + "epoch": 0.47, + "learning_rate": 0.000580441963830193, + "loss": 0.4566, + "step": 3711 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005802416038733976, + "loss": 0.5356, + "step": 3712 + }, + { + "epoch": 0.47, + "learning_rate": 0.000580041230690195, + "loss": 0.4625, + "step": 3713 + }, + { + "epoch": 0.47, + "learning_rate": 0.000579840844313613, + "loss": 0.5446, + "step": 3714 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005796404447766823, + "loss": 0.4772, + "step": 3715 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005794400321124344, + "loss": 0.3842, + "step": 3716 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005792396063539045, + "loss": 0.4675, + "step": 3717 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005790391675341288, + "loss": 0.4105, + "step": 3718 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005788387156861462, + "loss": 0.3683, + "step": 3719 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005786382508429978, + "loss": 0.0499, + "step": 3720 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005784377730377264, + "loss": 0.5044, + "step": 3721 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005782372823033774, + "loss": 0.4636, + "step": 3722 + }, + { + "epoch": 0.47, + "learning_rate": 0.000578036778672998, + "loss": 0.5092, + "step": 3723 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005778362621796379, + "loss": 0.4972, + "step": 3724 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005776357328563485, + "loss": 0.4639, + "step": 3725 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005774351907361836, + "loss": 0.4365, + "step": 3726 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005772346358521992, + "loss": 0.3549, + "step": 3727 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005770340682374526, + "loss": 0.416, + "step": 3728 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005768334879250046, + "loss": 0.5254, + "step": 3729 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005766328949479167, + "loss": 0.4351, + "step": 3730 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005764322893392535, + "loss": 0.415, + "step": 3731 + }, + { + "epoch": 0.47, + "learning_rate": 0.000576231671132081, + "loss": 0.4569, + "step": 3732 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005760310403594678, + "loss": 0.5178, + "step": 3733 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005758303970544841, + "loss": 0.4244, + "step": 3734 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005756297412502025, + "loss": 0.5043, + "step": 3735 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005754290729796976, + "loss": 0.4819, + "step": 3736 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005752283922760461, + "loss": 0.491, + "step": 3737 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005750276991723264, + "loss": 0.4943, + "step": 3738 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005748269937016194, + "loss": 0.0471, + "step": 3739 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005746262758970077, + "loss": 0.4097, + "step": 3740 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005744255457915761, + "loss": 0.4397, + "step": 3741 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005742248034184117, + "loss": 0.4563, + "step": 3742 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005740240488106029, + "loss": 0.4814, + "step": 3743 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005738232820012407, + "loss": 0.5345, + "step": 3744 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005736225030234179, + "loss": 0.0511, + "step": 3745 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005734217119102296, + "loss": 0.4817, + "step": 3746 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005732209086947725, + "loss": 0.0517, + "step": 3747 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005730200934101452, + "loss": 0.5179, + "step": 3748 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005728192660894491, + "loss": 0.4929, + "step": 3749 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005726184267657866, + "loss": 0.484, + "step": 3750 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005724175754722626, + "loss": 0.3831, + "step": 3751 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005722167122419839, + "loss": 0.5156, + "step": 3752 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005720158371080592, + "loss": 0.5174, + "step": 3753 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005718149501035994, + "loss": 0.5947, + "step": 3754 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005716140512617168, + "loss": 0.459, + "step": 3755 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005714131406155264, + "loss": 0.3953, + "step": 3756 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005712122181981446, + "loss": 0.432, + "step": 3757 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005710112840426897, + "loss": 0.3881, + "step": 3758 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005708103381822827, + "loss": 0.4147, + "step": 3759 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005706093806500453, + "loss": 0.4082, + "step": 3760 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005704084114791023, + "loss": 0.4329, + "step": 3761 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005702074307025794, + "loss": 0.5056, + "step": 3762 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005700064383536052, + "loss": 0.4132, + "step": 3763 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005698054344653096, + "loss": 0.4363, + "step": 3764 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005696044190708244, + "loss": 0.4618, + "step": 3765 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005694033922032835, + "loss": 0.5989, + "step": 3766 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005692023538958226, + "loss": 0.4408, + "step": 3767 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005690013041815793, + "loss": 0.5242, + "step": 3768 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005688002430936932, + "loss": 0.6178, + "step": 3769 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005685991706653055, + "loss": 0.5143, + "step": 3770 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005683980869295597, + "loss": 0.4586, + "step": 3771 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005681969919196005, + "loss": 0.4258, + "step": 3772 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005679958856685751, + "loss": 0.3669, + "step": 3773 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005677947682096322, + "loss": 0.4421, + "step": 3774 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005675936395759227, + "loss": 0.0517, + "step": 3775 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005673924998005986, + "loss": 0.0518, + "step": 3776 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005671913489168147, + "loss": 0.4532, + "step": 3777 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005669901869577269, + "loss": 0.4504, + "step": 3778 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005667890139564935, + "loss": 0.4708, + "step": 3779 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005665878299462738, + "loss": 0.4432, + "step": 3780 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005663866349602298, + "loss": 0.4609, + "step": 3781 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005661854290315248, + "loss": 0.4225, + "step": 3782 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005659842121933241, + "loss": 0.3964, + "step": 3783 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005657829844787946, + "loss": 0.4099, + "step": 3784 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005655817459211053, + "loss": 0.4019, + "step": 3785 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005653804965534267, + "loss": 0.478, + "step": 3786 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005651792364089312, + "loss": 0.3732, + "step": 3787 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005649779655207929, + "loss": 0.4238, + "step": 3788 + }, + { + "epoch": 0.47, + "learning_rate": 0.000564776683922188, + "loss": 0.4162, + "step": 3789 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005645753916462942, + "loss": 0.4778, + "step": 3790 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005643740887262905, + "loss": 0.4871, + "step": 3791 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005641727751953584, + "loss": 0.4658, + "step": 3792 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005639714510866812, + "loss": 0.358, + "step": 3793 + }, + { + "epoch": 0.48, + "learning_rate": 0.000563770116433443, + "loss": 0.4321, + "step": 3794 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005635687712688307, + "loss": 0.4073, + "step": 3795 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005633674156260322, + "loss": 0.3809, + "step": 3796 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005631660495382378, + "loss": 0.4349, + "step": 3797 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005629646730386388, + "loss": 0.5223, + "step": 3798 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005627632861604286, + "loss": 0.4431, + "step": 3799 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005625618889368024, + "loss": 0.4281, + "step": 3800 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005623604814009568, + "loss": 0.5143, + "step": 3801 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005621590635860904, + "loss": 0.4979, + "step": 3802 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005619576355254031, + "loss": 0.4072, + "step": 3803 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005617561972520971, + "loss": 0.483, + "step": 3804 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005615547487993757, + "loss": 0.4292, + "step": 3805 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005613532902004442, + "loss": 0.6125, + "step": 3806 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005611518214885096, + "loss": 0.4177, + "step": 3807 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005609503426967802, + "loss": 0.4568, + "step": 3808 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005607488538584662, + "loss": 0.5057, + "step": 3809 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005605473550067797, + "loss": 0.4717, + "step": 3810 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005603458461749341, + "loss": 0.4915, + "step": 3811 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005601443273961445, + "loss": 0.4663, + "step": 3812 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005599427987036279, + "loss": 0.3921, + "step": 3813 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005597412601306025, + "loss": 0.4044, + "step": 3814 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005595397117102885, + "loss": 0.4961, + "step": 3815 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005593381534759076, + "loss": 0.4904, + "step": 3816 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005591365854606829, + "loss": 0.4965, + "step": 3817 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005589350076978395, + "loss": 0.5131, + "step": 3818 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005587334202206041, + "loss": 0.0502, + "step": 3819 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005585318230622045, + "loss": 0.5228, + "step": 3820 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005583302162558706, + "loss": 0.4066, + "step": 3821 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005581285998348337, + "loss": 0.5219, + "step": 3822 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005579269738323266, + "loss": 0.3889, + "step": 3823 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005577253382815838, + "loss": 0.4127, + "step": 3824 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005575236932158413, + "loss": 0.4491, + "step": 3825 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005573220386683369, + "loss": 0.4841, + "step": 3826 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005571203746723097, + "loss": 0.3951, + "step": 3827 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005569187012610003, + "loss": 0.4459, + "step": 3828 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005567170184676512, + "loss": 0.3948, + "step": 3829 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005565153263255062, + "loss": 0.4193, + "step": 3830 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005563136248678105, + "loss": 0.4281, + "step": 3831 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005561119141278112, + "loss": 0.3798, + "step": 3832 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005559101941387567, + "loss": 0.4446, + "step": 3833 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005557084649338969, + "loss": 0.4894, + "step": 3834 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005555067265464832, + "loss": 0.4219, + "step": 3835 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005553049790097688, + "loss": 0.5103, + "step": 3836 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005551032223570083, + "loss": 0.4285, + "step": 3837 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005549014566214574, + "loss": 0.5586, + "step": 3838 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005546996818363738, + "loss": 0.3989, + "step": 3839 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005544978980350164, + "loss": 0.4395, + "step": 3840 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005542961052506459, + "loss": 0.3981, + "step": 3841 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005540943035165239, + "loss": 0.3448, + "step": 3842 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005538924928659143, + "loss": 0.6603, + "step": 3843 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005536906733320815, + "loss": 0.4077, + "step": 3844 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005534888449482924, + "loss": 0.3854, + "step": 3845 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005532870077478144, + "loss": 0.3625, + "step": 3846 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005530851617639169, + "loss": 0.4436, + "step": 3847 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005528833070298709, + "loss": 0.3604, + "step": 3848 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005526814435789483, + "loss": 0.4685, + "step": 3849 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005524795714444226, + "loss": 0.521, + "step": 3850 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005522776906595691, + "loss": 0.3623, + "step": 3851 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005520758012576643, + "loss": 0.3524, + "step": 3852 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005518739032719861, + "loss": 0.3729, + "step": 3853 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005516719967358135, + "loss": 0.3706, + "step": 3854 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005514700816824275, + "loss": 0.5282, + "step": 3855 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005512681581451101, + "loss": 0.3986, + "step": 3856 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005510662261571449, + "loss": 0.4268, + "step": 3857 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005508642857518167, + "loss": 0.5026, + "step": 3858 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005506623369624121, + "loss": 0.4003, + "step": 3859 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005504603798222183, + "loss": 0.4253, + "step": 3860 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005502584143645247, + "loss": 0.433, + "step": 3861 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005500564406226216, + "loss": 0.4174, + "step": 3862 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005498544586298009, + "loss": 0.4664, + "step": 3863 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005496524684193556, + "loss": 0.4557, + "step": 3864 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005494504700245805, + "loss": 0.3713, + "step": 3865 + }, + { + "epoch": 0.48, + "learning_rate": 0.000549248463478771, + "loss": 0.4734, + "step": 3866 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005490464488152247, + "loss": 0.4019, + "step": 3867 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005488444260672399, + "loss": 0.4216, + "step": 3868 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005486423952681166, + "loss": 0.4136, + "step": 3869 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005484403564511561, + "loss": 0.0515, + "step": 3870 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005482383096496606, + "loss": 0.3903, + "step": 3871 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005480362548969342, + "loss": 0.4994, + "step": 3872 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005478341922262818, + "loss": 0.4194, + "step": 3873 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005476321216710103, + "loss": 0.4191, + "step": 3874 + }, + { + "epoch": 0.49, + "learning_rate": 0.000547430043264427, + "loss": 0.4153, + "step": 3875 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005472279570398412, + "loss": 0.4624, + "step": 3876 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005470258630305632, + "loss": 0.5234, + "step": 3877 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005468237612699044, + "loss": 0.4362, + "step": 3878 + }, + { + "epoch": 0.49, + "learning_rate": 0.000546621651791178, + "loss": 0.4294, + "step": 3879 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005464195346276979, + "loss": 0.4369, + "step": 3880 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005462174098127798, + "loss": 0.4412, + "step": 3881 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005460152773797402, + "loss": 0.0526, + "step": 3882 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005458131373618971, + "loss": 0.5406, + "step": 3883 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005456109897925698, + "loss": 0.3691, + "step": 3884 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005454088347050786, + "loss": 0.4148, + "step": 3885 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005452066721327454, + "loss": 0.4899, + "step": 3886 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005450045021088928, + "loss": 0.4926, + "step": 3887 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005448023246668453, + "loss": 0.559, + "step": 3888 + }, + { + "epoch": 0.49, + "learning_rate": 0.000544600139839928, + "loss": 0.5234, + "step": 3889 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005443979476614675, + "loss": 0.4067, + "step": 3890 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005441957481647917, + "loss": 0.3936, + "step": 3891 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005439935413832297, + "loss": 0.3749, + "step": 3892 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005437913273501114, + "loss": 0.4586, + "step": 3893 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005435891060987685, + "loss": 0.3865, + "step": 3894 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005433868776625333, + "loss": 0.4224, + "step": 3895 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005431846420747401, + "loss": 0.4282, + "step": 3896 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005429823993687233, + "loss": 0.5294, + "step": 3897 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005427801495778192, + "loss": 0.5658, + "step": 3898 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005425778927353653, + "loss": 0.5162, + "step": 3899 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005423756288746998, + "loss": 0.4381, + "step": 3900 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005421733580291624, + "loss": 0.5271, + "step": 3901 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005419710802320939, + "loss": 0.3964, + "step": 3902 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005417687955168362, + "loss": 0.4943, + "step": 3903 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005415665039167324, + "loss": 0.3782, + "step": 3904 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005413642054651265, + "loss": 0.4861, + "step": 3905 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005411619001953641, + "loss": 0.4384, + "step": 3906 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005409595881407913, + "loss": 0.4149, + "step": 3907 + }, + { + "epoch": 0.49, + "learning_rate": 0.000540757269334756, + "loss": 0.4147, + "step": 3908 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005405549438106067, + "loss": 0.4489, + "step": 3909 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005403526116016931, + "loss": 0.4313, + "step": 3910 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005401502727413663, + "loss": 0.4236, + "step": 3911 + }, + { + "epoch": 0.49, + "learning_rate": 0.000539947927262978, + "loss": 0.462, + "step": 3912 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005397455751998814, + "loss": 0.7457, + "step": 3913 + }, + { + "epoch": 0.49, + "learning_rate": 0.000539543216585431, + "loss": 0.4836, + "step": 3914 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005393408514529814, + "loss": 0.4102, + "step": 3915 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005391384798358892, + "loss": 0.5558, + "step": 3916 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005389361017675119, + "loss": 0.4579, + "step": 3917 + }, + { + "epoch": 0.49, + "learning_rate": 0.000538733717281208, + "loss": 0.399, + "step": 3918 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005385313264103367, + "loss": 0.4697, + "step": 3919 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005383289291882586, + "loss": 0.502, + "step": 3920 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005381265256483355, + "loss": 0.5326, + "step": 3921 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005379241158239298, + "loss": 0.3917, + "step": 3922 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005377216997484054, + "loss": 0.4308, + "step": 3923 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005375192774551269, + "loss": 0.4582, + "step": 3924 + }, + { + "epoch": 0.49, + "learning_rate": 0.00053731684897746, + "loss": 0.4652, + "step": 3925 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005371144143487714, + "loss": 0.4736, + "step": 3926 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005369119736024291, + "loss": 0.4958, + "step": 3927 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005367095267718016, + "loss": 0.3579, + "step": 3928 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005365070738902588, + "loss": 0.0529, + "step": 3929 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005363046149911715, + "loss": 0.4393, + "step": 3930 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005361021501079115, + "loss": 0.4658, + "step": 3931 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005358996792738513, + "loss": 0.4185, + "step": 3932 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005356972025223649, + "loss": 0.4994, + "step": 3933 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005354947198868269, + "loss": 0.4889, + "step": 3934 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005352922314006129, + "loss": 0.4897, + "step": 3935 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005350897370970997, + "loss": 0.4958, + "step": 3936 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005348872370096649, + "loss": 0.3193, + "step": 3937 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005346847311716868, + "loss": 0.4015, + "step": 3938 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005344822196165453, + "loss": 0.524, + "step": 3939 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005342797023776205, + "loss": 0.5438, + "step": 3940 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005340771794882938, + "loss": 0.4863, + "step": 3941 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005338746509819478, + "loss": 0.4536, + "step": 3942 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005336721168919655, + "loss": 0.4779, + "step": 3943 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005334695772517311, + "loss": 0.5254, + "step": 3944 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005332670320946297, + "loss": 0.4935, + "step": 3945 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005330644814540472, + "loss": 0.4825, + "step": 3946 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005328619253633706, + "loss": 0.4505, + "step": 3947 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005326593638559875, + "loss": 0.0528, + "step": 3948 + }, + { + "epoch": 0.49, + "learning_rate": 0.000532456796965287, + "loss": 0.5862, + "step": 3949 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005322542247246583, + "loss": 0.4617, + "step": 3950 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005320516471674918, + "loss": 0.394, + "step": 3951 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005318490643271789, + "loss": 0.4371, + "step": 3952 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005316464762371119, + "loss": 0.4353, + "step": 3953 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005314438829306839, + "loss": 0.4012, + "step": 3954 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005312412844412886, + "loss": 0.0518, + "step": 3955 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005310386808023208, + "loss": 0.4137, + "step": 3956 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005308360720471763, + "loss": 0.3972, + "step": 3957 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005306334582092515, + "loss": 0.472, + "step": 3958 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005304308393219437, + "loss": 0.4453, + "step": 3959 + }, + { + "epoch": 0.5, + "learning_rate": 0.000530228215418651, + "loss": 0.431, + "step": 3960 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005300255865327724, + "loss": 0.4646, + "step": 3961 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005298229526977075, + "loss": 0.051, + "step": 3962 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005296203139468572, + "loss": 0.3944, + "step": 3963 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005294176703136227, + "loss": 0.4352, + "step": 3964 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005292150218314063, + "loss": 0.4083, + "step": 3965 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005290123685336109, + "loss": 0.3812, + "step": 3966 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005288097104536405, + "loss": 0.4475, + "step": 3967 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005286070476248995, + "loss": 0.4337, + "step": 3968 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005284043800807934, + "loss": 0.5179, + "step": 3969 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005282017078547285, + "loss": 0.4163, + "step": 3970 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005279990309801115, + "loss": 0.4421, + "step": 3971 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005277963494903499, + "loss": 0.4906, + "step": 3972 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005275936634188528, + "loss": 0.5369, + "step": 3973 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005273909727990286, + "loss": 0.42, + "step": 3974 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005271882776642878, + "loss": 0.4744, + "step": 3975 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005269855780480411, + "loss": 0.4116, + "step": 3976 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005267828739836996, + "loss": 0.4115, + "step": 3977 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005265801655046758, + "loss": 0.4845, + "step": 3978 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005263774526443824, + "loss": 0.5592, + "step": 3979 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005261747354362333, + "loss": 0.4384, + "step": 3980 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005259720139136426, + "loss": 0.4518, + "step": 3981 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005257692881100252, + "loss": 0.5272, + "step": 3982 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005255665580587974, + "loss": 0.0465, + "step": 3983 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005253638237933751, + "loss": 0.4292, + "step": 3984 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005251610853471756, + "loss": 0.4833, + "step": 3985 + }, + { + "epoch": 0.5, + "learning_rate": 0.000524958342753617, + "loss": 0.4806, + "step": 3986 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005247555960461175, + "loss": 0.4098, + "step": 3987 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005245528452580965, + "loss": 0.4711, + "step": 3988 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005243500904229738, + "loss": 0.4893, + "step": 3989 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005241473315741701, + "loss": 0.4076, + "step": 3990 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005239445687451064, + "loss": 0.4297, + "step": 3991 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005237418019692046, + "loss": 0.5012, + "step": 3992 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005235390312798875, + "loss": 0.4906, + "step": 3993 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005233362567105776, + "loss": 0.4346, + "step": 3994 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005231334782946994, + "loss": 0.4948, + "step": 3995 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005229306960656769, + "loss": 0.3791, + "step": 3996 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005227279100569353, + "loss": 0.4117, + "step": 3997 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005225251203019002, + "loss": 0.4833, + "step": 3998 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005223223268339981, + "loss": 0.5604, + "step": 3999 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005221195296866556, + "loss": 0.4828, + "step": 4000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005219167288933004, + "loss": 0.4879, + "step": 4001 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005217139244873608, + "loss": 0.3912, + "step": 4002 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005215111165022652, + "loss": 0.3881, + "step": 4003 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005213083049714431, + "loss": 0.488, + "step": 4004 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005211054899283244, + "loss": 0.4621, + "step": 4005 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005209026714063394, + "loss": 0.4598, + "step": 4006 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005206998494389193, + "loss": 0.4955, + "step": 4007 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005204970240594955, + "loss": 0.4545, + "step": 4008 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005202941953015005, + "loss": 0.4033, + "step": 4009 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005200913631983665, + "loss": 0.5524, + "step": 4010 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005198885277835274, + "loss": 0.4233, + "step": 4011 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005196856890904168, + "loss": 0.4451, + "step": 4012 + }, + { + "epoch": 0.5, + "learning_rate": 0.000519482847152469, + "loss": 0.4039, + "step": 4013 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005192800020031189, + "loss": 0.475, + "step": 4014 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005190771536758017, + "loss": 0.417, + "step": 4015 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005188743022039539, + "loss": 0.4343, + "step": 4016 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005186714476210116, + "loss": 0.4614, + "step": 4017 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005184685899604118, + "loss": 0.3961, + "step": 4018 + }, + { + "epoch": 0.5, + "learning_rate": 0.000518265729255592, + "loss": 0.4524, + "step": 4019 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005180628655399904, + "loss": 0.4197, + "step": 4020 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005178599988470452, + "loss": 0.5393, + "step": 4021 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005176571292101955, + "loss": 0.3965, + "step": 4022 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005174542566628808, + "loss": 0.4346, + "step": 4023 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005172513812385411, + "loss": 0.4825, + "step": 4024 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005170485029706166, + "loss": 0.5662, + "step": 4025 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005168456218925486, + "loss": 0.3767, + "step": 4026 + }, + { + "epoch": 0.5, + "learning_rate": 0.000516642738037778, + "loss": 0.4025, + "step": 4027 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005164398514397469, + "loss": 0.4438, + "step": 4028 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005162369621318973, + "loss": 0.3912, + "step": 4029 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005160340701476722, + "loss": 0.3855, + "step": 4030 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005158311755205146, + "loss": 0.4254, + "step": 4031 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005156282782838681, + "loss": 0.3784, + "step": 4032 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005154253784711767, + "loss": 0.0562, + "step": 4033 + }, + { + "epoch": 0.51, + "learning_rate": 0.000515222476115885, + "loss": 0.5126, + "step": 4034 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005150195712514376, + "loss": 0.4609, + "step": 4035 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005148166639112799, + "loss": 0.3926, + "step": 4036 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005146137541288576, + "loss": 0.4421, + "step": 4037 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005144108419376169, + "loss": 0.4537, + "step": 4038 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005142079273710038, + "loss": 0.4729, + "step": 4039 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005140050104624657, + "loss": 0.4287, + "step": 4040 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005138020912454497, + "loss": 0.4674, + "step": 4041 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005135991697534035, + "loss": 0.5226, + "step": 4042 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005133962460197748, + "loss": 0.474, + "step": 4043 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005131933200780124, + "loss": 0.4995, + "step": 4044 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005129903919615647, + "loss": 0.3882, + "step": 4045 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005127874617038811, + "loss": 0.4308, + "step": 4046 + }, + { + "epoch": 0.51, + "learning_rate": 0.000512584529338411, + "loss": 0.4415, + "step": 4047 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005123815948986038, + "loss": 0.1376, + "step": 4048 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005121786584179103, + "loss": 0.5435, + "step": 4049 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005119757199297805, + "loss": 0.4608, + "step": 4050 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005117727794676654, + "loss": 0.426, + "step": 4051 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005115698370650161, + "loss": 0.4452, + "step": 4052 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005113668927552842, + "loss": 0.4082, + "step": 4053 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005111639465719212, + "loss": 0.3682, + "step": 4054 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005109609985483794, + "loss": 0.4659, + "step": 4055 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005107580487181112, + "loss": 0.438, + "step": 4056 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005105550971145692, + "loss": 0.3344, + "step": 4057 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005103521437712063, + "loss": 0.4208, + "step": 4058 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005101491887214757, + "loss": 0.4462, + "step": 4059 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005099462319988314, + "loss": 0.4689, + "step": 4060 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005097432736367267, + "loss": 0.3884, + "step": 4061 + }, + { + "epoch": 0.51, + "learning_rate": 0.000509540313668616, + "loss": 0.3844, + "step": 4062 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005093373521279535, + "loss": 0.4196, + "step": 4063 + }, + { + "epoch": 0.51, + "learning_rate": 0.000509134389048194, + "loss": 0.3566, + "step": 4064 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005089314244627922, + "loss": 0.4264, + "step": 4065 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005087284584052031, + "loss": 0.5184, + "step": 4066 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005085254909088825, + "loss": 0.5076, + "step": 4067 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005083225220072856, + "loss": 0.3798, + "step": 4068 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005081195517338682, + "loss": 0.4346, + "step": 4069 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005079165801220866, + "loss": 0.4177, + "step": 4070 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005077136072053972, + "loss": 0.399, + "step": 4071 + }, + { + "epoch": 0.51, + "learning_rate": 0.000507510633017256, + "loss": 0.1244, + "step": 4072 + }, + { + "epoch": 0.51, + "learning_rate": 0.00050730765759112, + "loss": 0.4642, + "step": 4073 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005071046809604461, + "loss": 0.3757, + "step": 4074 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005069017031586916, + "loss": 0.4828, + "step": 4075 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005066987242193133, + "loss": 0.4362, + "step": 4076 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005064957441757691, + "loss": 0.5209, + "step": 4077 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005062927630615164, + "loss": 0.4093, + "step": 4078 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005060897809100133, + "loss": 0.5393, + "step": 4079 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005058867977547175, + "loss": 0.3849, + "step": 4080 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005056838136290875, + "loss": 0.4402, + "step": 4081 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005054808285665815, + "loss": 0.4464, + "step": 4082 + }, + { + "epoch": 0.51, + "learning_rate": 0.000505277842600658, + "loss": 0.3782, + "step": 4083 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005050748557647756, + "loss": 0.3745, + "step": 4084 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005048718680923929, + "loss": 0.4191, + "step": 4085 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005046688796169693, + "loss": 0.4647, + "step": 4086 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005044658903719634, + "loss": 0.3677, + "step": 4087 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005042629003908347, + "loss": 0.0636, + "step": 4088 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005040599097070424, + "loss": 0.4594, + "step": 4089 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005038569183540458, + "loss": 0.5007, + "step": 4090 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005036539263653045, + "loss": 0.4731, + "step": 4091 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005034509337742781, + "loss": 0.4425, + "step": 4092 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005032479406144266, + "loss": 0.5176, + "step": 4093 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005030449469192094, + "loss": 0.4938, + "step": 4094 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005028419527220867, + "loss": 0.4753, + "step": 4095 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005026389580565185, + "loss": 0.3767, + "step": 4096 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005024359629559648, + "loss": 0.442, + "step": 4097 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005022329674538857, + "loss": 0.4686, + "step": 4098 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005020299715837415, + "loss": 0.5953, + "step": 4099 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005018269753789924, + "loss": 0.4113, + "step": 4100 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005016239788730989, + "loss": 0.4001, + "step": 4101 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005014209820995211, + "loss": 0.4559, + "step": 4102 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005012179850917195, + "loss": 0.4512, + "step": 4103 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005010149878831549, + "loss": 0.575, + "step": 4104 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005008119905072873, + "loss": 0.0629, + "step": 4105 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005006089929975774, + "loss": 0.4357, + "step": 4106 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005004059953874859, + "loss": 0.4291, + "step": 4107 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005002029977104733, + "loss": 0.4253, + "step": 4108 + }, + { + "epoch": 0.52, + "learning_rate": 0.0005, + "loss": 0.43, + "step": 4109 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004997970022895268, + "loss": 0.4836, + "step": 4110 + }, + { + "epoch": 0.52, + "learning_rate": 0.000499594004612514, + "loss": 0.4484, + "step": 4111 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004993910070024225, + "loss": 0.5653, + "step": 4112 + }, + { + "epoch": 0.52, + "learning_rate": 0.000499188009492713, + "loss": 0.4813, + "step": 4113 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004989850121168454, + "loss": 0.4565, + "step": 4114 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004987820149082805, + "loss": 0.3934, + "step": 4115 + }, + { + "epoch": 0.52, + "learning_rate": 0.000498579017900479, + "loss": 0.4779, + "step": 4116 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004983760211269011, + "loss": 0.7081, + "step": 4117 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004981730246210077, + "loss": 0.421, + "step": 4118 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004979700284162585, + "loss": 0.5106, + "step": 4119 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004977670325461144, + "loss": 0.5315, + "step": 4120 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004975640370440352, + "loss": 0.426, + "step": 4121 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004973610419434815, + "loss": 0.5387, + "step": 4122 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004971580472779133, + "loss": 0.3843, + "step": 4123 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004969550530807906, + "loss": 0.43, + "step": 4124 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004967520593855735, + "loss": 0.4098, + "step": 4125 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004965490662257218, + "loss": 0.4395, + "step": 4126 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004963460736346955, + "loss": 0.4166, + "step": 4127 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004961430816459544, + "loss": 0.5063, + "step": 4128 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004959400902929578, + "loss": 0.3494, + "step": 4129 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004957370996091654, + "loss": 0.4507, + "step": 4130 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004955341096280366, + "loss": 0.4931, + "step": 4131 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004953311203830308, + "loss": 0.426, + "step": 4132 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004951281319076071, + "loss": 0.5085, + "step": 4133 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004949251442352245, + "loss": 0.4355, + "step": 4134 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004947221573993422, + "loss": 0.473, + "step": 4135 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004945191714334186, + "loss": 0.4451, + "step": 4136 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004943161863709125, + "loss": 0.4255, + "step": 4137 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004941132022452825, + "loss": 0.4323, + "step": 4138 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004939102190899868, + "loss": 0.5042, + "step": 4139 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004937072369384836, + "loss": 0.4824, + "step": 4140 + }, + { + "epoch": 0.52, + "learning_rate": 0.000493504255824231, + "loss": 0.386, + "step": 4141 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004933012757806868, + "loss": 0.0614, + "step": 4142 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004930982968413086, + "loss": 0.3856, + "step": 4143 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004928953190395539, + "loss": 0.5101, + "step": 4144 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004926923424088801, + "loss": 0.4641, + "step": 4145 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004924893669827441, + "loss": 0.405, + "step": 4146 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004922863927946029, + "loss": 0.427, + "step": 4147 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004920834198779133, + "loss": 0.3818, + "step": 4148 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004918804482661318, + "loss": 0.447, + "step": 4149 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004916774779927147, + "loss": 0.4653, + "step": 4150 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004914745090911178, + "loss": 0.4424, + "step": 4151 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004912715415947969, + "loss": 0.4268, + "step": 4152 + }, + { + "epoch": 0.52, + "learning_rate": 0.000491068575537208, + "loss": 0.5728, + "step": 4153 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004908656109518061, + "loss": 0.3988, + "step": 4154 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004906626478720465, + "loss": 0.4531, + "step": 4155 + }, + { + "epoch": 0.52, + "learning_rate": 0.000490459686331384, + "loss": 0.4191, + "step": 4156 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004902567263632735, + "loss": 0.393, + "step": 4157 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004900537680011688, + "loss": 0.4824, + "step": 4158 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004898508112785243, + "loss": 0.5117, + "step": 4159 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004896478562287939, + "loss": 0.3829, + "step": 4160 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004894449028854309, + "loss": 0.4129, + "step": 4161 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004892419512818889, + "loss": 0.4032, + "step": 4162 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004890390014516206, + "loss": 0.3975, + "step": 4163 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004888360534280789, + "loss": 0.5112, + "step": 4164 + }, + { + "epoch": 0.52, + "learning_rate": 0.000488633107244716, + "loss": 0.4127, + "step": 4165 + }, + { + "epoch": 0.52, + "learning_rate": 0.000488430162934984, + "loss": 0.486, + "step": 4166 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048822722053233473, + "loss": 0.0602, + "step": 4167 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004880242800702196, + "loss": 0.46, + "step": 4168 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004878213415820898, + "loss": 0.4218, + "step": 4169 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004876184051013961, + "loss": 0.4229, + "step": 4170 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048741547066158907, + "loss": 0.4431, + "step": 4171 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048721253829611905, + "loss": 0.4408, + "step": 4172 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048700960803843536, + "loss": 0.4437, + "step": 4173 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048680667992198774, + "loss": 0.4731, + "step": 4174 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004866037539802252, + "loss": 0.451, + "step": 4175 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004864008302465966, + "loss": 0.4579, + "step": 4176 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004861979087545503, + "loss": 0.4463, + "step": 4177 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048599498953753417, + "loss": 0.4214, + "step": 4178 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048579207262899627, + "loss": 0.5203, + "step": 4179 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048558915806238336, + "loss": 0.4652, + "step": 4180 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004853862458711425, + "loss": 0.5236, + "step": 4181 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004851833360887201, + "loss": 0.5182, + "step": 4182 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004849804287485625, + "loss": 0.5, + "step": 4183 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004847775238841151, + "loss": 0.4269, + "step": 4184 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048457462152882324, + "loss": 0.4177, + "step": 4185 + }, + { + "epoch": 0.52, + "learning_rate": 0.000484371721716132, + "loss": 0.3507, + "step": 4186 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004841688244794855, + "loss": 0.4015, + "step": 4187 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004839659298523279, + "loss": 0.0592, + "step": 4188 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004837630378681027, + "loss": 0.4003, + "step": 4189 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004835601485602532, + "loss": 0.4019, + "step": 4190 + }, + { + "epoch": 0.53, + "learning_rate": 0.000483357261962222, + "loss": 0.4042, + "step": 4191 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048315437810745145, + "loss": 0.4517, + "step": 4192 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004829514970293832, + "loss": 0.402, + "step": 4193 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048274861876145903, + "loss": 0.3972, + "step": 4194 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048254574333711926, + "loss": 0.4048, + "step": 4195 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004823428707898046, + "loss": 0.4573, + "step": 4196 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048214000115295494, + "loss": 0.427, + "step": 4197 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004819371344600097, + "loss": 0.4093, + "step": 4198 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048173427074440795, + "loss": 0.4012, + "step": 4199 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004815314100395882, + "loss": 0.5428, + "step": 4200 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004813285523789886, + "loss": 0.3875, + "step": 4201 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004811256977960462, + "loss": 0.414, + "step": 4202 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004809228463241982, + "loss": 0.4452, + "step": 4203 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004807199979968812, + "loss": 0.4832, + "step": 4204 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048051715284753115, + "loss": 0.5115, + "step": 4205 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004803143109095832, + "loss": 0.4053, + "step": 4206 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004801114722164725, + "loss": 0.4991, + "step": 4207 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004799086368016334, + "loss": 0.4323, + "step": 4208 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047970580469849976, + "loss": 0.5128, + "step": 4209 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004795029759405046, + "loss": 0.4578, + "step": 4210 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004793001505610809, + "loss": 0.5317, + "step": 4211 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004790973285936607, + "loss": 0.4406, + "step": 4212 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047889451007167565, + "loss": 0.4745, + "step": 4213 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047869169502855684, + "loss": 0.5582, + "step": 4214 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047848888349773466, + "loss": 0.4041, + "step": 4215 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047828607551263935, + "loss": 0.4038, + "step": 4216 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004780832711066996, + "loss": 0.4327, + "step": 4217 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047788047031334445, + "loss": 0.4653, + "step": 4218 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004776776731660021, + "loss": 0.3336, + "step": 4219 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004774748796980999, + "loss": 0.4573, + "step": 4220 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047727208994306476, + "loss": 0.415, + "step": 4221 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047706930393432323, + "loss": 0.4564, + "step": 4222 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004768665217053008, + "loss": 0.4236, + "step": 4223 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047666374328942247, + "loss": 0.4587, + "step": 4224 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004764609687201127, + "loss": 0.381, + "step": 4225 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047625819803079545, + "loss": 0.4601, + "step": 4226 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047605543125489363, + "loss": 0.4432, + "step": 4227 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047585266842582985, + "loss": 0.4456, + "step": 4228 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047564990957702603, + "loss": 0.4482, + "step": 4229 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047544715474190347, + "loss": 0.4614, + "step": 4230 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004752444039538826, + "loss": 0.3567, + "step": 4231 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004750416572463831, + "loss": 0.4164, + "step": 4232 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004748389146528244, + "loss": 0.3737, + "step": 4233 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047463617620662504, + "loss": 0.4373, + "step": 4234 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004744334419412027, + "loss": 0.4075, + "step": 4235 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047423071188997467, + "loss": 0.4241, + "step": 4236 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047402798608635766, + "loss": 0.4604, + "step": 4237 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004738252645637668, + "loss": 0.4956, + "step": 4238 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047362254735561763, + "loss": 0.4311, + "step": 4239 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004734198344953243, + "loss": 0.4473, + "step": 4240 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047321712601630043, + "loss": 0.4081, + "step": 4241 + }, + { + "epoch": 0.53, + "learning_rate": 0.000473014421951959, + "loss": 0.541, + "step": 4242 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047281172233571214, + "loss": 0.4558, + "step": 4243 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047260902720097143, + "loss": 0.0582, + "step": 4244 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047240633658114753, + "loss": 0.4611, + "step": 4245 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004722036505096501, + "loss": 0.38, + "step": 4246 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047200096901988867, + "loss": 0.4749, + "step": 4247 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004717982921452716, + "loss": 0.481, + "step": 4248 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004715956199192065, + "loss": 0.494, + "step": 4249 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047139295237510044, + "loss": 0.5388, + "step": 4250 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047119028954635946, + "loss": 0.4329, + "step": 4251 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047098763146638924, + "loss": 0.3969, + "step": 4252 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004707849781685939, + "loss": 0.3839, + "step": 4253 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004705823296863774, + "loss": 0.4626, + "step": 4254 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004703796860531429, + "loss": 0.0574, + "step": 4255 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047017704730229257, + "loss": 0.4165, + "step": 4256 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004699744134672277, + "loss": 0.4138, + "step": 4257 + }, + { + "epoch": 0.53, + "learning_rate": 0.000469771784581349, + "loss": 0.4392, + "step": 4258 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046956916067805645, + "loss": 0.4117, + "step": 4259 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046936654179074864, + "loss": 0.0572, + "step": 4260 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004691639279528238, + "loss": 0.4797, + "step": 4261 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004689613191976792, + "loss": 0.5117, + "step": 4262 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004687587155587115, + "loss": 0.4766, + "step": 4263 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004685561170693162, + "loss": 0.4996, + "step": 4264 + }, + { + "epoch": 0.53, + "learning_rate": 0.000468353523762888, + "loss": 0.3397, + "step": 4265 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046815093567282115, + "loss": 0.4033, + "step": 4266 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004679483528325084, + "loss": 0.0562, + "step": 4267 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004677457752753419, + "loss": 0.4701, + "step": 4268 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046754320303471307, + "loss": 0.4384, + "step": 4269 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004673406361440125, + "loss": 0.3774, + "step": 4270 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004671380746366295, + "loss": 0.4403, + "step": 4271 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046693551854595284, + "loss": 0.4301, + "step": 4272 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046673296790537036, + "loss": 0.3804, + "step": 4273 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004665304227482691, + "loss": 0.4277, + "step": 4274 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046632788310803467, + "loss": 0.0555, + "step": 4275 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004661253490180523, + "loss": 0.384, + "step": 4276 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046592282051170623, + "loss": 0.4221, + "step": 4277 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046572029762237963, + "loss": 0.4631, + "step": 4278 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004655177803834548, + "loss": 0.5019, + "step": 4279 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004653152688283132, + "loss": 0.4194, + "step": 4280 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046511276299033536, + "loss": 0.4073, + "step": 4281 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004649102629029004, + "loss": 0.5156, + "step": 4282 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004647077685993872, + "loss": 0.4503, + "step": 4283 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004645052801131732, + "loss": 0.3671, + "step": 4284 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004643027974776352, + "loss": 0.3754, + "step": 4285 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004641003207261487, + "loss": 0.4092, + "step": 4286 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004638978498920887, + "loss": 0.4137, + "step": 4287 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046369538500882855, + "loss": 0.4451, + "step": 4288 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046349292610974133, + "loss": 0.5051, + "step": 4289 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004632904732281985, + "loss": 0.481, + "step": 4290 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004630880263975711, + "loss": 0.5807, + "step": 4291 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004628855856512287, + "loss": 0.453, + "step": 4292 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004626831510225401, + "loss": 0.4702, + "step": 4293 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004624807225448732, + "loss": 0.4778, + "step": 4294 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004622783002515946, + "loss": 0.4037, + "step": 4295 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004620758841760703, + "loss": 0.4899, + "step": 4296 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004618734743516647, + "loss": 0.392, + "step": 4297 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004616710708117415, + "loss": 0.3839, + "step": 4298 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004614686735896634, + "loss": 0.4309, + "step": 4299 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046126628271879206, + "loss": 0.4233, + "step": 4300 + }, + { + "epoch": 0.54, + "learning_rate": 0.000461063898232488, + "loss": 0.4899, + "step": 4301 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046086152016411064, + "loss": 0.5226, + "step": 4302 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004606591485470188, + "loss": 0.4868, + "step": 4303 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046045678341456926, + "loss": 0.3771, + "step": 4304 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004602544248001186, + "loss": 0.4288, + "step": 4305 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004600520727370221, + "loss": 0.5249, + "step": 4306 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004598497272586338, + "loss": 0.0542, + "step": 4307 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045964738839830694, + "loss": 0.5135, + "step": 4308 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004594450561893935, + "loss": 0.4932, + "step": 4309 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004592427306652441, + "loss": 0.4832, + "step": 4310 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004590404118592088, + "loss": 0.5796, + "step": 4311 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045883809980463606, + "loss": 0.3793, + "step": 4312 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045863579453487365, + "loss": 0.5104, + "step": 4313 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004584334960832678, + "loss": 0.5442, + "step": 4314 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004582312044831639, + "loss": 0.3784, + "step": 4315 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004580289197679061, + "loss": 0.5492, + "step": 4316 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004578266419708376, + "loss": 0.4294, + "step": 4317 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004576243711253004, + "loss": 0.4071, + "step": 4318 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004574221072646349, + "loss": 0.0537, + "step": 4319 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045721985042218084, + "loss": 0.4587, + "step": 4320 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045701760063127686, + "loss": 0.4468, + "step": 4321 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045681535792526, + "loss": 0.0534, + "step": 4322 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045661312233746663, + "loss": 0.4965, + "step": 4323 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045641089390123155, + "loss": 0.4634, + "step": 4324 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004562086726498888, + "loss": 0.5527, + "step": 4325 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004560064586167706, + "loss": 0.3678, + "step": 4326 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004558042518352085, + "loss": 0.429, + "step": 4327 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045560205233853264, + "loss": 0.053, + "step": 4328 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004553998601600721, + "loss": 0.4144, + "step": 4329 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045519767533315475, + "loss": 0.4614, + "step": 4330 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004549954978911072, + "loss": 0.4594, + "step": 4331 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045479332786725466, + "loss": 0.4117, + "step": 4332 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045459116529492137, + "loss": 0.55, + "step": 4333 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004543890102074302, + "loss": 0.4557, + "step": 4334 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045418686263810296, + "loss": 0.4139, + "step": 4335 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045398472262025985, + "loss": 0.4907, + "step": 4336 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045378259018722023, + "loss": 0.4565, + "step": 4337 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004535804653723021, + "loss": 0.438, + "step": 4338 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045337834820882207, + "loss": 0.4406, + "step": 4339 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045317623873009576, + "loss": 0.5468, + "step": 4340 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045297413696943707, + "loss": 0.4768, + "step": 4341 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045277204296015897, + "loss": 0.4175, + "step": 4342 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045256995673557303, + "loss": 0.4546, + "step": 4343 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004523678783289898, + "loss": 0.4752, + "step": 4344 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004521658077737181, + "loss": 0.3873, + "step": 4345 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045196374510306583, + "loss": 0.4304, + "step": 4346 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045176169035033957, + "loss": 0.5201, + "step": 4347 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004515596435488441, + "loss": 0.3717, + "step": 4348 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004513576047318835, + "loss": 0.4863, + "step": 4349 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004511555739327601, + "loss": 0.4996, + "step": 4350 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004509535511847753, + "loss": 0.5193, + "step": 4351 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004507515365212291, + "loss": 0.3875, + "step": 4352 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045054952997541963, + "loss": 0.3837, + "step": 4353 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045034753158064433, + "loss": 0.4241, + "step": 4354 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004501455413701992, + "loss": 0.4403, + "step": 4355 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004499435593773785, + "loss": 0.4302, + "step": 4356 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004497415856354755, + "loss": 0.4977, + "step": 4357 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044953962017778184, + "loss": 0.4293, + "step": 4358 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044933766303758806, + "loss": 0.402, + "step": 4359 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044913571424818325, + "loss": 0.4497, + "step": 4360 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044893377384285513, + "loss": 0.5533, + "step": 4361 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044873184185489006, + "loss": 0.4933, + "step": 4362 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004485299183175727, + "loss": 0.3779, + "step": 4363 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044832800326418667, + "loss": 0.4433, + "step": 4364 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004481260967280141, + "loss": 0.4257, + "step": 4365 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004479241987423357, + "loss": 0.4696, + "step": 4366 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044772230934043083, + "loss": 0.4526, + "step": 4367 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004475204285555774, + "loss": 0.4429, + "step": 4368 + }, + { + "epoch": 0.55, + "learning_rate": 0.000447318556421052, + "loss": 0.4509, + "step": 4369 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004471166929701293, + "loss": 0.6022, + "step": 4370 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044691483823608316, + "loss": 0.4706, + "step": 4371 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004467129922521857, + "loss": 0.5089, + "step": 4372 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004465111550517077, + "loss": 0.0519, + "step": 4373 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004463093266679185, + "loss": 0.5062, + "step": 4374 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004461075071340858, + "loss": 0.3878, + "step": 4375 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004459056964834761, + "loss": 0.4503, + "step": 4376 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004457038947493542, + "loss": 0.3858, + "step": 4377 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004455021019649837, + "loss": 0.3992, + "step": 4378 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044530031816362634, + "loss": 0.4542, + "step": 4379 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004450985433785427, + "loss": 0.524, + "step": 4380 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044489677764299174, + "loss": 0.4187, + "step": 4381 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044469502099023114, + "loss": 0.4086, + "step": 4382 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044449327345351674, + "loss": 0.3945, + "step": 4383 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004442915350661034, + "loss": 0.4542, + "step": 4384 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004440898058612435, + "loss": 0.4381, + "step": 4385 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044388808587218895, + "loss": 0.5529, + "step": 4386 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004436863751321896, + "loss": 0.3971, + "step": 4387 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004434846736744939, + "loss": 0.4556, + "step": 4388 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004432829815323488, + "loss": 0.3682, + "step": 4389 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004430812987389996, + "loss": 0.4138, + "step": 4390 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004428796253276905, + "loss": 0.3812, + "step": 4391 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004426779613316632, + "loss": 0.399, + "step": 4392 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004424763067841588, + "loss": 0.4752, + "step": 4393 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004422746617184163, + "loss": 0.4573, + "step": 4394 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004420730261676735, + "loss": 0.3939, + "step": 4395 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044187140016516645, + "loss": 0.4179, + "step": 4396 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004416697837441295, + "loss": 0.401, + "step": 4397 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044146817693779556, + "loss": 0.4302, + "step": 4398 + }, + { + "epoch": 0.55, + "learning_rate": 0.000441266579779396, + "loss": 0.4343, + "step": 4399 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044106499230216054, + "loss": 0.4797, + "step": 4400 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044086341453931715, + "loss": 0.4434, + "step": 4401 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004406618465240926, + "loss": 0.4747, + "step": 4402 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004404602882897116, + "loss": 0.4946, + "step": 4403 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004402587398693976, + "loss": 0.4966, + "step": 4404 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044005720129637214, + "loss": 0.4186, + "step": 4405 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043985567260385565, + "loss": 0.3811, + "step": 4406 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043965415382506606, + "loss": 0.3533, + "step": 4407 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004394526449932204, + "loss": 0.4619, + "step": 4408 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043925114614153386, + "loss": 0.4331, + "step": 4409 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043904965730321994, + "loss": 0.4508, + "step": 4410 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004388481785114905, + "loss": 0.454, + "step": 4411 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004386467097995557, + "loss": 0.4493, + "step": 4412 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004384452512006244, + "loss": 0.4403, + "step": 4413 + }, + { + "epoch": 0.55, + "learning_rate": 0.000438243802747903, + "loss": 0.3679, + "step": 4414 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043804236447459697, + "loss": 0.5186, + "step": 4415 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043784093641390973, + "loss": 0.4619, + "step": 4416 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043763951859904335, + "loss": 0.5448, + "step": 4417 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004374381110631977, + "loss": 0.0515, + "step": 4418 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043723671383957136, + "loss": 0.3906, + "step": 4419 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043703532696136126, + "loss": 0.0515, + "step": 4420 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004368339504617623, + "loss": 0.4154, + "step": 4421 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004366325843739678, + "loss": 0.455, + "step": 4422 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004364312287311694, + "loss": 0.5076, + "step": 4423 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043622988356655705, + "loss": 0.4275, + "step": 4424 + }, + { + "epoch": 0.55, + "learning_rate": 0.000436028548913319, + "loss": 0.3964, + "step": 4425 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043582722480464155, + "loss": 0.5168, + "step": 4426 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004356259112737095, + "loss": 0.469, + "step": 4427 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004354246083537061, + "loss": 0.5031, + "step": 4428 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004352233160778121, + "loss": 0.3906, + "step": 4429 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043502203447920706, + "loss": 0.473, + "step": 4430 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004348207635910689, + "loss": 0.436, + "step": 4431 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043461950344657335, + "loss": 0.5774, + "step": 4432 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043441825407889475, + "loss": 0.4513, + "step": 4433 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043421701552120534, + "loss": 0.4893, + "step": 4434 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004340157878066761, + "loss": 0.4094, + "step": 4435 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043381457096847533, + "loss": 0.4376, + "step": 4436 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043361336503977034, + "loss": 0.0508, + "step": 4437 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043341217005372624, + "loss": 0.4713, + "step": 4438 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043321098604350673, + "loss": 0.0508, + "step": 4439 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004330098130422731, + "loss": 0.4371, + "step": 4440 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043280865108318527, + "loss": 0.3979, + "step": 4441 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043260750019940143, + "loss": 0.5277, + "step": 4442 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043240636042407755, + "loss": 0.4011, + "step": 4443 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043220523179036784, + "loss": 0.4451, + "step": 4444 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043200411433142496, + "loss": 0.0506, + "step": 4445 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004318030080803995, + "loss": 0.5045, + "step": 4446 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043160191307044037, + "loss": 0.4766, + "step": 4447 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043140082933469437, + "loss": 0.4874, + "step": 4448 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004311997569063067, + "loss": 0.3914, + "step": 4449 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043099869581842077, + "loss": 0.4349, + "step": 4450 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004307976461041775, + "loss": 0.5286, + "step": 4451 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004305966077967166, + "loss": 0.537, + "step": 4452 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043039558092917567, + "loss": 0.4307, + "step": 4453 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004301945655346905, + "loss": 0.4172, + "step": 4454 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042999356164639474, + "loss": 0.4607, + "step": 4455 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004297925692974206, + "loss": 0.4165, + "step": 4456 + }, + { + "epoch": 0.56, + "learning_rate": 0.000429591588520898, + "loss": 0.4822, + "step": 4457 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004293906193499548, + "loss": 0.438, + "step": 4458 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042918966181771745, + "loss": 0.4934, + "step": 4459 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042898871595731025, + "loss": 0.4163, + "step": 4460 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042878778180185554, + "loss": 0.4091, + "step": 4461 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042858685938447364, + "loss": 0.5542, + "step": 4462 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004283859487382831, + "loss": 0.432, + "step": 4463 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042818504989640085, + "loss": 0.4242, + "step": 4464 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042798416289194094, + "loss": 0.5017, + "step": 4465 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042778328775801625, + "loss": 0.4971, + "step": 4466 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004275824245277375, + "loss": 0.4648, + "step": 4467 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042738157323421347, + "loss": 0.4172, + "step": 4468 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004271807339105509, + "loss": 0.4717, + "step": 4469 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042697990658985464, + "loss": 0.4363, + "step": 4470 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004267790913052275, + "loss": 0.446, + "step": 4471 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004265782880897705, + "loss": 0.4061, + "step": 4472 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042637749697658215, + "loss": 0.5404, + "step": 4473 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042617671799875947, + "loss": 0.4333, + "step": 4474 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042597595118939725, + "loss": 0.4659, + "step": 4475 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042577519658158845, + "loss": 0.5228, + "step": 4476 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004255744542084238, + "loss": 0.5746, + "step": 4477 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042537372410299235, + "loss": 0.3813, + "step": 4478 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004251730062983808, + "loss": 0.4213, + "step": 4479 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042497230082767367, + "loss": 0.4281, + "step": 4480 + }, + { + "epoch": 0.56, + "learning_rate": 0.000424771607723954, + "loss": 0.4656, + "step": 4481 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042457092702030246, + "loss": 0.4852, + "step": 4482 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042437025874979755, + "loss": 0.3732, + "step": 4483 + }, + { + "epoch": 0.56, + "learning_rate": 0.000424169602945516, + "loss": 0.4475, + "step": 4484 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004239689596405323, + "loss": 0.4691, + "step": 4485 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004237683288679192, + "loss": 0.4237, + "step": 4486 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042356771066074674, + "loss": 0.4783, + "step": 4487 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004233671050520834, + "loss": 0.3708, + "step": 4488 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004231665120749956, + "loss": 0.4546, + "step": 4489 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004229659317625474, + "loss": 0.5497, + "step": 4490 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042276536414780097, + "loss": 0.4028, + "step": 4491 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042256480926381633, + "loss": 0.502, + "step": 4492 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004223642671436516, + "loss": 0.4907, + "step": 4493 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042216373782036224, + "loss": 0.4446, + "step": 4494 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042196322132700205, + "loss": 0.4135, + "step": 4495 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042176271769662273, + "loss": 0.4574, + "step": 4496 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004215622269622737, + "loss": 0.4012, + "step": 4497 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004213617491570023, + "loss": 0.4591, + "step": 4498 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042116128431385374, + "loss": 0.468, + "step": 4499 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042096083246587126, + "loss": 0.363, + "step": 4500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042076039364609565, + "loss": 0.4558, + "step": 4501 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004205599678875656, + "loss": 0.4617, + "step": 4502 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004203595552233179, + "loss": 0.381, + "step": 4503 + }, + { + "epoch": 0.56, + "learning_rate": 0.000420159155686387, + "loss": 0.4877, + "step": 4504 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004199587693098051, + "loss": 0.4138, + "step": 4505 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004197583961266025, + "loss": 0.4304, + "step": 4506 + }, + { + "epoch": 0.56, + "learning_rate": 0.00041955803616980706, + "loss": 0.391, + "step": 4507 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041935768947244477, + "loss": 0.4784, + "step": 4508 + }, + { + "epoch": 0.57, + "learning_rate": 0.000419157356067539, + "loss": 0.4089, + "step": 4509 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041895703598811117, + "loss": 0.384, + "step": 4510 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041875672926718055, + "loss": 0.4402, + "step": 4511 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004185564359377643, + "loss": 0.4841, + "step": 4512 + }, + { + "epoch": 0.57, + "learning_rate": 0.000418356156032877, + "loss": 0.5243, + "step": 4513 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041815588958553135, + "loss": 0.5312, + "step": 4514 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041795563662873797, + "loss": 0.3755, + "step": 4515 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004177553971955046, + "loss": 0.4365, + "step": 4516 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004175551713188373, + "loss": 0.38, + "step": 4517 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004173549590317398, + "loss": 0.4017, + "step": 4518 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041715476036721357, + "loss": 0.4818, + "step": 4519 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041695457535825776, + "loss": 0.4224, + "step": 4520 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004167544040378695, + "loss": 0.4624, + "step": 4521 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004165542464390432, + "loss": 0.5366, + "step": 4522 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041635410259477146, + "loss": 0.4092, + "step": 4523 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041615397253804445, + "loss": 0.4025, + "step": 4524 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041595385630185006, + "loss": 0.4072, + "step": 4525 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004157537539191738, + "loss": 0.4291, + "step": 4526 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041555366542299913, + "loss": 0.4719, + "step": 4527 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041535359084630697, + "loss": 0.5007, + "step": 4528 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041515353022207623, + "loss": 0.4423, + "step": 4529 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004149534835832835, + "loss": 0.4282, + "step": 4530 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041475345096290246, + "loss": 0.4871, + "step": 4531 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004145534323939052, + "loss": 0.4481, + "step": 4532 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004143534279092612, + "loss": 0.3893, + "step": 4533 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004141534375419378, + "loss": 0.3822, + "step": 4534 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004139534613248997, + "loss": 0.4474, + "step": 4535 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004137534992911095, + "loss": 0.4934, + "step": 4536 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004135535514735276, + "loss": 0.4034, + "step": 4537 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004133536179051114, + "loss": 0.5022, + "step": 4538 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041315369861881675, + "loss": 0.4399, + "step": 4539 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041295379364759667, + "loss": 0.4097, + "step": 4540 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041275390302440197, + "loss": 0.05, + "step": 4541 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004125540267821811, + "loss": 0.4753, + "step": 4542 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004123541649538803, + "loss": 0.4042, + "step": 4543 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004121543175724429, + "loss": 0.3921, + "step": 4544 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004119544846708106, + "loss": 0.3745, + "step": 4545 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041175466628192185, + "loss": 0.5004, + "step": 4546 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041155486243871366, + "loss": 0.5331, + "step": 4547 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041135507317411967, + "loss": 0.4211, + "step": 4548 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004111552985210719, + "loss": 0.5339, + "step": 4549 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004109555385124997, + "loss": 0.0497, + "step": 4550 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004107557931813297, + "loss": 0.4003, + "step": 4551 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041055606256048687, + "loss": 0.6439, + "step": 4552 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004103563466828928, + "loss": 0.4929, + "step": 4553 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041015664558146714, + "loss": 0.4461, + "step": 4554 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004099569592891273, + "loss": 0.4839, + "step": 4555 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040975728783878787, + "loss": 0.4513, + "step": 4556 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040955763126336125, + "loss": 0.3914, + "step": 4557 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004093579895957573, + "loss": 0.4923, + "step": 4558 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040915836286888345, + "loss": 0.4811, + "step": 4559 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040895875111564446, + "loss": 0.433, + "step": 4560 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040875915436894283, + "loss": 0.4895, + "step": 4561 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004085595726616787, + "loss": 0.4991, + "step": 4562 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040836000602674933, + "loss": 0.4254, + "step": 4563 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040816045449705, + "loss": 0.4254, + "step": 4564 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004079609181054731, + "loss": 0.4158, + "step": 4565 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040776139688490876, + "loss": 0.4585, + "step": 4566 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040756189086824454, + "loss": 0.0493, + "step": 4567 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004073624000883653, + "loss": 0.5289, + "step": 4568 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040716292457815376, + "loss": 0.3812, + "step": 4569 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004069634643704897, + "loss": 0.476, + "step": 4570 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004067640194982508, + "loss": 0.4246, + "step": 4571 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004065645899943119, + "loss": 0.4091, + "step": 4572 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004063651758915454, + "loss": 0.4057, + "step": 4573 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004061657772228216, + "loss": 0.5015, + "step": 4574 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040596639402100724, + "loss": 0.416, + "step": 4575 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004057670263189673, + "loss": 0.416, + "step": 4576 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040556767414956415, + "loss": 0.4318, + "step": 4577 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004053683375456573, + "loss": 0.4788, + "step": 4578 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040516901654010407, + "loss": 0.4138, + "step": 4579 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040496971116575875, + "loss": 0.457, + "step": 4580 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004047704214554737, + "loss": 0.489, + "step": 4581 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040457114744209786, + "loss": 0.4113, + "step": 4582 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004043718891584781, + "loss": 0.4092, + "step": 4583 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004041726466374588, + "loss": 0.4644, + "step": 4584 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040397341991188143, + "loss": 0.4399, + "step": 4585 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040377420901458507, + "loss": 0.406, + "step": 4586 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040357501397840604, + "loss": 0.4149, + "step": 4587 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004033758348361781, + "loss": 0.4551, + "step": 4588 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004031766716207325, + "loss": 0.3795, + "step": 4589 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040297752436489775, + "loss": 0.0506, + "step": 4590 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040277839310149966, + "loss": 0.541, + "step": 4591 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040257927786336147, + "loss": 0.5076, + "step": 4592 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040238017868330386, + "loss": 0.4829, + "step": 4593 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040218109559414484, + "loss": 0.5112, + "step": 4594 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004019820286286997, + "loss": 0.3831, + "step": 4595 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040178297781978136, + "loss": 0.4409, + "step": 4596 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040158394320019927, + "loss": 0.394, + "step": 4597 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040138492480276114, + "loss": 0.5149, + "step": 4598 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004011859226602715, + "loss": 0.444, + "step": 4599 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040098693680553233, + "loss": 0.4125, + "step": 4600 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040078796727134304, + "loss": 0.4531, + "step": 4601 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004005890140905001, + "loss": 0.4795, + "step": 4602 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004003900772957978, + "loss": 0.5053, + "step": 4603 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040019115692002677, + "loss": 0.4536, + "step": 4604 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039999225299597573, + "loss": 0.474, + "step": 4605 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003997933655564305, + "loss": 0.4037, + "step": 4606 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039959449463417425, + "loss": 0.4128, + "step": 4607 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003993956402619873, + "loss": 0.4575, + "step": 4608 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039919680247264713, + "loss": 0.0502, + "step": 4609 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003989979812989287, + "loss": 0.4254, + "step": 4610 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039879917677360425, + "loss": 0.4793, + "step": 4611 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039860038892944305, + "loss": 0.5176, + "step": 4612 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039840161779921174, + "loss": 0.499, + "step": 4613 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039820286341567436, + "loss": 0.4298, + "step": 4614 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003980041258115919, + "loss": 0.4686, + "step": 4615 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039780540501972283, + "loss": 0.0501, + "step": 4616 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003976067010728227, + "loss": 0.4554, + "step": 4617 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003974080140036447, + "loss": 0.4001, + "step": 4618 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039720934384493824, + "loss": 0.4991, + "step": 4619 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039701069062945087, + "loss": 0.453, + "step": 4620 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003968120543899271, + "loss": 0.415, + "step": 4621 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003966134351591086, + "loss": 0.0499, + "step": 4622 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039641483296973414, + "loss": 0.5049, + "step": 4623 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003962162478545398, + "loss": 0.4492, + "step": 4624 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003960176798462591, + "loss": 0.4403, + "step": 4625 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003958191289776221, + "loss": 0.4402, + "step": 4626 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039562059528135634, + "loss": 0.4247, + "step": 4627 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003954220787901868, + "loss": 0.4404, + "step": 4628 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039522357953683545, + "loss": 0.4658, + "step": 4629 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003950250975540212, + "loss": 0.4639, + "step": 4630 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039482663287446033, + "loss": 0.4258, + "step": 4631 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039462818553086644, + "loss": 0.4203, + "step": 4632 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039442975555594974, + "loss": 0.4257, + "step": 4633 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003942313429824181, + "loss": 0.3916, + "step": 4634 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039403294784297625, + "loss": 0.4569, + "step": 4635 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039383457017032607, + "loss": 0.5515, + "step": 4636 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003936362099971666, + "loss": 0.4176, + "step": 4637 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003934378673561942, + "loss": 0.4034, + "step": 4638 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039323954228010193, + "loss": 0.397, + "step": 4639 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003930412348015804, + "loss": 0.4148, + "step": 4640 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039284294495331684, + "loss": 0.4119, + "step": 4641 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003926446727679958, + "loss": 0.0493, + "step": 4642 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039244641827829906, + "loss": 0.4553, + "step": 4643 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003922481815169054, + "loss": 0.4734, + "step": 4644 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039204996251649056, + "loss": 0.4125, + "step": 4645 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003918517613097274, + "loss": 0.392, + "step": 4646 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039165357792928624, + "loss": 0.4017, + "step": 4647 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003914554124078335, + "loss": 0.4425, + "step": 4648 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003912572647780336, + "loss": 0.4429, + "step": 4649 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039105913507254757, + "loss": 0.4971, + "step": 4650 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003908610233240338, + "loss": 0.4893, + "step": 4651 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039066292956514714, + "loss": 0.4556, + "step": 4652 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039046485382854013, + "loss": 0.3999, + "step": 4653 + }, + { + "epoch": 0.58, + "learning_rate": 0.000390266796146862, + "loss": 0.405, + "step": 4654 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039006875655275895, + "loss": 0.3629, + "step": 4655 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038987073507887435, + "loss": 0.4788, + "step": 4656 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038967273175784847, + "loss": 0.4314, + "step": 4657 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003894747466223187, + "loss": 0.4595, + "step": 4658 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003892767797049194, + "loss": 0.3857, + "step": 4659 + }, + { + "epoch": 0.58, + "learning_rate": 0.000389078831038282, + "loss": 0.4673, + "step": 4660 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038888090065503463, + "loss": 0.4091, + "step": 4661 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038868298858780293, + "loss": 0.4005, + "step": 4662 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003884850948692088, + "loss": 0.4575, + "step": 4663 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003882872195318717, + "loss": 0.3966, + "step": 4664 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038808936260840786, + "loss": 0.3876, + "step": 4665 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003878915241314305, + "loss": 0.4913, + "step": 4666 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003876937041335498, + "loss": 0.451, + "step": 4667 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003874959026473729, + "loss": 0.0488, + "step": 4668 + }, + { + "epoch": 0.59, + "learning_rate": 0.000387298119705504, + "loss": 0.4553, + "step": 4669 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038710035534054375, + "loss": 0.4955, + "step": 4670 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038690260958509036, + "loss": 0.4804, + "step": 4671 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038670488247173867, + "loss": 0.3632, + "step": 4672 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003865071740330806, + "loss": 0.4414, + "step": 4673 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038630948430170463, + "loss": 0.3784, + "step": 4674 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003861118133101966, + "loss": 0.5094, + "step": 4675 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003859141610911391, + "loss": 0.4554, + "step": 4676 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038571652767711156, + "loss": 0.4081, + "step": 4677 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003855189131006902, + "loss": 0.4052, + "step": 4678 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003853213173944484, + "loss": 0.4242, + "step": 4679 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003851237405909563, + "loss": 0.3244, + "step": 4680 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003849261827227809, + "loss": 0.5229, + "step": 4681 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038472864382248624, + "loss": 0.4326, + "step": 4682 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038453112392263303, + "loss": 0.413, + "step": 4683 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038433362305577906, + "loss": 0.4337, + "step": 4684 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038413614125447853, + "loss": 0.5012, + "step": 4685 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038393867855128304, + "loss": 0.4814, + "step": 4686 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003837412349787408, + "loss": 0.3668, + "step": 4687 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003835438105693968, + "loss": 0.4418, + "step": 4688 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003833464053557931, + "loss": 0.5103, + "step": 4689 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003831490193704685, + "loss": 0.3942, + "step": 4690 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003829516526459584, + "loss": 0.4574, + "step": 4691 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003827543052147952, + "loss": 0.5529, + "step": 4692 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003825569771095082, + "loss": 0.4319, + "step": 4693 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003823596683626235, + "loss": 0.5342, + "step": 4694 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038216237900666385, + "loss": 0.475, + "step": 4695 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003819651090741489, + "loss": 0.4669, + "step": 4696 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003817678585975953, + "loss": 0.3856, + "step": 4697 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038157062760951624, + "loss": 0.4452, + "step": 4698 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003813734161424215, + "loss": 0.4628, + "step": 4699 + }, + { + "epoch": 0.59, + "learning_rate": 0.000381176224228818, + "loss": 0.4185, + "step": 4700 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003809790519012095, + "loss": 0.4611, + "step": 4701 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003807818991920963, + "loss": 0.51, + "step": 4702 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003805847661339754, + "loss": 0.3998, + "step": 4703 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038038765275934076, + "loss": 0.38, + "step": 4704 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038019055910068315, + "loss": 0.0487, + "step": 4705 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037999348519049007, + "loss": 0.4938, + "step": 4706 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003797964310612453, + "loss": 0.428, + "step": 4707 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037959939674542985, + "loss": 0.4398, + "step": 4708 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003794023822755214, + "loss": 0.4808, + "step": 4709 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037920538768399425, + "loss": 0.3888, + "step": 4710 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037900841300331955, + "loss": 0.4543, + "step": 4711 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037881145826596516, + "loss": 0.3523, + "step": 4712 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003786145235043954, + "loss": 0.3785, + "step": 4713 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037841760875107145, + "loss": 0.4406, + "step": 4714 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003782207140384514, + "loss": 0.4691, + "step": 4715 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003780238393989897, + "loss": 0.4229, + "step": 4716 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037782698486513774, + "loss": 0.4456, + "step": 4717 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003776301504693434, + "loss": 0.3795, + "step": 4718 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003774333362440514, + "loss": 0.3776, + "step": 4719 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037723654222170333, + "loss": 0.4683, + "step": 4720 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003770397684347368, + "loss": 0.4814, + "step": 4721 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003768430149155866, + "loss": 0.4231, + "step": 4722 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003766462816966841, + "loss": 0.4053, + "step": 4723 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037644956881045734, + "loss": 0.4348, + "step": 4724 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003762528762893309, + "loss": 0.4205, + "step": 4725 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037605620416572604, + "loss": 0.4061, + "step": 4726 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037585955247206076, + "loss": 0.4311, + "step": 4727 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003756629212407497, + "loss": 0.4456, + "step": 4728 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037546631050420376, + "loss": 0.5055, + "step": 4729 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037526972029483085, + "loss": 0.5042, + "step": 4730 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037507315064503536, + "loss": 0.424, + "step": 4731 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003748766015872184, + "loss": 0.383, + "step": 4732 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003746800731537776, + "loss": 0.4183, + "step": 4733 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037448356537710716, + "loss": 0.4331, + "step": 4734 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037428707828959795, + "loss": 0.5254, + "step": 4735 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037409061192363724, + "loss": 0.4712, + "step": 4736 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003738941663116091, + "loss": 0.4406, + "step": 4737 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037369774148589426, + "loss": 0.4711, + "step": 4738 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003735013374788696, + "loss": 0.4512, + "step": 4739 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003733049543229091, + "loss": 0.4165, + "step": 4740 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003731085920503828, + "loss": 0.5345, + "step": 4741 + }, + { + "epoch": 0.59, + "learning_rate": 0.000372912250693658, + "loss": 0.3865, + "step": 4742 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037271593028509746, + "loss": 0.5317, + "step": 4743 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037251963085706155, + "loss": 0.401, + "step": 4744 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037232335244190653, + "loss": 0.4355, + "step": 4745 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003721270950719856, + "loss": 0.5284, + "step": 4746 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003719308587796482, + "loss": 0.4317, + "step": 4747 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003717346435972405, + "loss": 0.4202, + "step": 4748 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003715384495571052, + "loss": 0.4417, + "step": 4749 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003713422766915811, + "loss": 0.4272, + "step": 4750 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037114612503300396, + "loss": 0.3667, + "step": 4751 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037094999461370593, + "loss": 0.4862, + "step": 4752 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037075388546601565, + "loss": 0.4659, + "step": 4753 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037055779762225816, + "loss": 0.4377, + "step": 4754 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003703617311147553, + "loss": 0.4131, + "step": 4755 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003701656859758249, + "loss": 0.4655, + "step": 4756 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036996966223778164, + "loss": 0.3933, + "step": 4757 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036977365993293653, + "loss": 0.4165, + "step": 4758 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036957767909359717, + "loss": 0.4546, + "step": 4759 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036938171975206736, + "loss": 0.5145, + "step": 4760 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036918578194064755, + "loss": 0.4755, + "step": 4761 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036898986569163476, + "loss": 0.4036, + "step": 4762 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036879397103732224, + "loss": 0.5042, + "step": 4763 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036859809800999995, + "loss": 0.575, + "step": 4764 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036840224664195365, + "loss": 0.4332, + "step": 4765 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003682064169654663, + "loss": 0.4053, + "step": 4766 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003680106090128168, + "loss": 0.5342, + "step": 4767 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036781482281628066, + "loss": 0.502, + "step": 4768 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003676190584081298, + "loss": 0.4288, + "step": 4769 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003674233158206326, + "loss": 0.3969, + "step": 4770 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036722759508605387, + "loss": 0.5208, + "step": 4771 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003670318962366542, + "loss": 0.5278, + "step": 4772 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003668362193046915, + "loss": 0.4077, + "step": 4773 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003666405643224194, + "loss": 0.4751, + "step": 4774 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036644493132208846, + "loss": 0.5997, + "step": 4775 + }, + { + "epoch": 0.6, + "learning_rate": 0.000366249320335945, + "loss": 0.3765, + "step": 4776 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036605373139623234, + "loss": 0.3718, + "step": 4777 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003658581645351896, + "loss": 0.4807, + "step": 4778 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036566261978505265, + "loss": 0.4064, + "step": 4779 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003654670971780535, + "loss": 0.4825, + "step": 4780 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036527159674642064, + "loss": 0.4795, + "step": 4781 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036507611852237875, + "loss": 0.4419, + "step": 4782 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003648806625381491, + "loss": 0.4235, + "step": 4783 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036468522882594903, + "loss": 0.3806, + "step": 4784 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036448981741799236, + "loss": 0.3513, + "step": 4785 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003642944283464894, + "loss": 0.5254, + "step": 4786 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036409906164364624, + "loss": 0.4818, + "step": 4787 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036390371734166583, + "loss": 0.5085, + "step": 4788 + }, + { + "epoch": 0.6, + "learning_rate": 0.000363708395472747, + "loss": 0.4758, + "step": 4789 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036351309606908545, + "loss": 0.4047, + "step": 4790 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036331781916287256, + "loss": 0.4414, + "step": 4791 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003631225647862963, + "loss": 0.4629, + "step": 4792 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003629273329715412, + "loss": 0.4564, + "step": 4793 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036273212375078734, + "loss": 0.4595, + "step": 4794 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036253693715621163, + "loss": 0.4694, + "step": 4795 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003623417732199872, + "loss": 0.4474, + "step": 4796 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003621466319742833, + "loss": 0.3984, + "step": 4797 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036195151345126556, + "loss": 0.5023, + "step": 4798 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036175641768309587, + "loss": 0.3813, + "step": 4799 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003615613447019322, + "loss": 0.3942, + "step": 4800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036136629453992897, + "loss": 0.0494, + "step": 4801 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036117126722923665, + "loss": 0.3972, + "step": 4802 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003609762628020021, + "loss": 0.4237, + "step": 4803 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036078128129036824, + "loss": 0.4875, + "step": 4804 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003605863227264745, + "loss": 0.4484, + "step": 4805 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003603913871424561, + "loss": 0.3777, + "step": 4806 + }, + { + "epoch": 0.6, + "learning_rate": 0.000360196474570445, + "loss": 0.416, + "step": 4807 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036000158504256906, + "loss": 0.4287, + "step": 4808 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035980671859095215, + "loss": 0.4099, + "step": 4809 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003596118752477146, + "loss": 0.4266, + "step": 4810 + }, + { + "epoch": 0.6, + "learning_rate": 0.000359417055044973, + "loss": 0.4519, + "step": 4811 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003592222580148399, + "loss": 0.3714, + "step": 4812 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003590274841894242, + "loss": 0.4305, + "step": 4813 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035883273360083097, + "loss": 0.3733, + "step": 4814 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003586380062811615, + "loss": 0.468, + "step": 4815 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035844330226251277, + "loss": 0.4275, + "step": 4816 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003582486215769785, + "loss": 0.4028, + "step": 4817 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003580539642566484, + "loss": 0.5076, + "step": 4818 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035785933033360815, + "loss": 0.465, + "step": 4819 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035766471983994, + "loss": 0.4576, + "step": 4820 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003574701328077216, + "loss": 0.4001, + "step": 4821 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003572755692690275, + "loss": 0.0493, + "step": 4822 + }, + { + "epoch": 0.6, + "learning_rate": 0.000357081029255928, + "loss": 0.4, + "step": 4823 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035688651280048957, + "loss": 0.4102, + "step": 4824 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003566920199347747, + "loss": 0.5032, + "step": 4825 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003564975506908421, + "loss": 0.4301, + "step": 4826 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003563031051007467, + "loss": 0.4327, + "step": 4827 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035610868319653927, + "loss": 0.5806, + "step": 4828 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035591428501026694, + "loss": 0.5173, + "step": 4829 + }, + { + "epoch": 0.61, + "learning_rate": 0.000355719910573973, + "loss": 0.414, + "step": 4830 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003555255599196962, + "loss": 0.4297, + "step": 4831 + }, + { + "epoch": 0.61, + "learning_rate": 0.000355331233079472, + "loss": 0.4745, + "step": 4832 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003551369300853317, + "loss": 0.3922, + "step": 4833 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003549426509693028, + "loss": 0.4576, + "step": 4834 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003547483957634087, + "loss": 0.4891, + "step": 4835 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003545541644996689, + "loss": 0.4656, + "step": 4836 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003543599572100994, + "loss": 0.459, + "step": 4837 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003541657739267111, + "loss": 0.4613, + "step": 4838 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003539716146815122, + "loss": 0.4337, + "step": 4839 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035377747950650606, + "loss": 0.4479, + "step": 4840 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035358336843369277, + "loss": 0.5139, + "step": 4841 + }, + { + "epoch": 0.61, + "learning_rate": 0.000353389281495068, + "loss": 0.3531, + "step": 4842 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003531952187226234, + "loss": 0.4159, + "step": 4843 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035300118014834685, + "loss": 0.5668, + "step": 4844 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035280716580422236, + "loss": 0.4205, + "step": 4845 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003526131757222296, + "loss": 0.4307, + "step": 4846 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003524192099343443, + "loss": 0.5321, + "step": 4847 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003522252684725383, + "loss": 0.4026, + "step": 4848 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035203135136877955, + "loss": 0.439, + "step": 4849 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003518374586550318, + "loss": 0.4711, + "step": 4850 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003516435903632548, + "loss": 0.4752, + "step": 4851 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035144974652540463, + "loss": 0.437, + "step": 4852 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003512559271734324, + "loss": 0.5759, + "step": 4853 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003510621323392862, + "loss": 0.6102, + "step": 4854 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003508683620549095, + "loss": 0.5643, + "step": 4855 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035067461635224207, + "loss": 0.4391, + "step": 4856 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035048089526321937, + "loss": 0.4964, + "step": 4857 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035028719881977284, + "loss": 0.4449, + "step": 4858 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003500935270538302, + "loss": 0.4838, + "step": 4859 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003498998799973143, + "loss": 0.4604, + "step": 4860 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003497062576821448, + "loss": 0.4998, + "step": 4861 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003495126601402368, + "loss": 0.5089, + "step": 4862 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003493190874035015, + "loss": 0.4258, + "step": 4863 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034912553950384583, + "loss": 0.4149, + "step": 4864 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034893201647317275, + "loss": 0.55, + "step": 4865 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003487385183433812, + "loss": 0.4734, + "step": 4866 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003485450451463659, + "loss": 0.4071, + "step": 4867 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003483515969140173, + "loss": 0.4692, + "step": 4868 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003481581736782221, + "loss": 0.4178, + "step": 4869 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003479647754708626, + "loss": 0.3692, + "step": 4870 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003477714023238171, + "loss": 0.4663, + "step": 4871 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034757805426895974, + "loss": 0.4778, + "step": 4872 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003473847313381605, + "loss": 0.4114, + "step": 4873 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034719143356328554, + "loss": 0.0491, + "step": 4874 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003469981609761961, + "loss": 0.3748, + "step": 4875 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003468049136087499, + "loss": 0.049, + "step": 4876 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003466116914928005, + "loss": 0.4454, + "step": 4877 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034641849466019695, + "loss": 0.4908, + "step": 4878 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003462253231427844, + "loss": 0.4155, + "step": 4879 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003460321769724038, + "loss": 0.3748, + "step": 4880 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034583905618089204, + "loss": 0.4555, + "step": 4881 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034564596080008126, + "loss": 0.4261, + "step": 4882 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034545289086180007, + "loss": 0.4496, + "step": 4883 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003452598463978726, + "loss": 0.4591, + "step": 4884 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034506682744011875, + "loss": 0.4258, + "step": 4885 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034487383402035434, + "loss": 0.0485, + "step": 4886 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003446808661703908, + "loss": 0.4886, + "step": 4887 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034448792392203576, + "loss": 0.4749, + "step": 4888 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034429500730709205, + "loss": 0.4865, + "step": 4889 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034410211635735853, + "loss": 0.4387, + "step": 4890 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034390925110463, + "loss": 0.3945, + "step": 4891 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034371641158069687, + "loss": 0.3948, + "step": 4892 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034352359781734525, + "loss": 0.3683, + "step": 4893 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003433308098463571, + "loss": 0.3467, + "step": 4894 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003431380476995102, + "loss": 0.4847, + "step": 4895 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034294531140857806, + "loss": 0.5275, + "step": 4896 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003427526010053295, + "loss": 0.433, + "step": 4897 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003425599165215295, + "loss": 0.3917, + "step": 4898 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034236725798893886, + "loss": 0.4646, + "step": 4899 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003421746254393138, + "loss": 0.418, + "step": 4900 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003419820189044064, + "loss": 0.616, + "step": 4901 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034178943841596456, + "loss": 0.4122, + "step": 4902 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034159688400573184, + "loss": 0.3863, + "step": 4903 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034140435570544704, + "loss": 0.3461, + "step": 4904 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034121185354684525, + "loss": 0.4534, + "step": 4905 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003410193775616571, + "loss": 0.4225, + "step": 4906 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034082692778160883, + "loss": 0.4318, + "step": 4907 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034063450423842226, + "loss": 0.3375, + "step": 4908 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003404421069638151, + "loss": 0.4082, + "step": 4909 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003402497359895007, + "loss": 0.4232, + "step": 4910 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034005739134718795, + "loss": 0.4781, + "step": 4911 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033986507306858125, + "loss": 0.4343, + "step": 4912 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033967278118538115, + "loss": 0.4056, + "step": 4913 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003394805157292834, + "loss": 0.453, + "step": 4914 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003392882767319796, + "loss": 0.4373, + "step": 4915 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033909606422515703, + "loss": 0.4819, + "step": 4916 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003389038782404984, + "loss": 0.4457, + "step": 4917 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003387117188096823, + "loss": 0.4399, + "step": 4918 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003385195859643827, + "loss": 0.4285, + "step": 4919 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003383274797362692, + "loss": 0.4716, + "step": 4920 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003381354001570073, + "loss": 0.416, + "step": 4921 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003379433472582577, + "loss": 0.5067, + "step": 4922 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003377513210716772, + "loss": 0.4165, + "step": 4923 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003375593216289178, + "loss": 0.4396, + "step": 4924 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003373673489616272, + "loss": 0.3931, + "step": 4925 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033717540310144854, + "loss": 0.418, + "step": 4926 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033698348408002087, + "loss": 0.4435, + "step": 4927 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033679159192897873, + "loss": 0.4363, + "step": 4928 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033659972667995187, + "loss": 0.4301, + "step": 4929 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033640788836456604, + "loss": 0.4346, + "step": 4930 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033621607701444237, + "loss": 0.4031, + "step": 4931 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003360242926611978, + "loss": 0.397, + "step": 4932 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003358325353364441, + "loss": 0.4529, + "step": 4933 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003356408050717893, + "loss": 0.3838, + "step": 4934 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033544910189883684, + "loss": 0.4431, + "step": 4935 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003352574258491854, + "loss": 0.4972, + "step": 4936 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003350657769544295, + "loss": 0.5159, + "step": 4937 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033487415524615917, + "loss": 0.4529, + "step": 4938 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033468256075595967, + "loss": 0.442, + "step": 4939 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033449099351541225, + "loss": 0.5005, + "step": 4940 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033429945355609294, + "loss": 0.4729, + "step": 4941 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003341079409095739, + "loss": 0.4203, + "step": 4942 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033391645560742263, + "loss": 0.3762, + "step": 4943 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003337249976812021, + "loss": 0.4445, + "step": 4944 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033353356716247075, + "loss": 0.3721, + "step": 4945 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003333421640827826, + "loss": 0.4678, + "step": 4946 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033315078847368694, + "loss": 0.4379, + "step": 4947 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033295944036672867, + "loss": 0.4246, + "step": 4948 + }, + { + "epoch": 0.62, + "learning_rate": 0.000332768119793448, + "loss": 0.4006, + "step": 4949 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033257682678538105, + "loss": 0.4663, + "step": 4950 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033238556137405877, + "loss": 0.4417, + "step": 4951 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033219432359100806, + "loss": 0.424, + "step": 4952 + }, + { + "epoch": 0.62, + "learning_rate": 0.000332003113467751, + "loss": 0.3928, + "step": 4953 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003318119310358054, + "loss": 0.5938, + "step": 4954 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033162077632668395, + "loss": 0.4241, + "step": 4955 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033142964937189534, + "loss": 0.4392, + "step": 4956 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003312385502029434, + "loss": 0.3895, + "step": 4957 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033104747885132746, + "loss": 0.4686, + "step": 4958 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033085643534854224, + "loss": 0.4597, + "step": 4959 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003306654197260779, + "loss": 0.4713, + "step": 4960 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033047443201541995, + "loss": 0.4041, + "step": 4961 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033028347224804954, + "loss": 0.4158, + "step": 4962 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003300925404554427, + "loss": 0.4628, + "step": 4963 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032990163666907115, + "loss": 0.3635, + "step": 4964 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003297107609204022, + "loss": 0.4728, + "step": 4965 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032951991324089823, + "loss": 0.424, + "step": 4966 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003293290936620171, + "loss": 0.3872, + "step": 4967 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032913830221521225, + "loss": 0.4453, + "step": 4968 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032894753893193197, + "loss": 0.448, + "step": 4969 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003287568038436203, + "loss": 0.4366, + "step": 4970 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003285660969817165, + "loss": 0.4747, + "step": 4971 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032837541837765544, + "loss": 0.4734, + "step": 4972 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003281847680628668, + "loss": 0.3649, + "step": 4973 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032799414606877607, + "loss": 0.5847, + "step": 4974 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032780355242680395, + "loss": 0.4454, + "step": 4975 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032761298716836655, + "loss": 0.4473, + "step": 4976 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032742245032487484, + "loss": 0.5132, + "step": 4977 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032723194192773556, + "loss": 0.5432, + "step": 4978 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003270414620083507, + "loss": 0.4832, + "step": 4979 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003268510105981175, + "loss": 0.4255, + "step": 4980 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003266605877284284, + "loss": 0.0525, + "step": 4981 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032647019343067153, + "loss": 0.4224, + "step": 4982 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003262798277362299, + "loss": 0.5188, + "step": 4983 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032608949067648165, + "loss": 0.4019, + "step": 4984 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032589918228280066, + "loss": 0.4495, + "step": 4985 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032570890258655597, + "loss": 0.4567, + "step": 4986 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032551865161911165, + "loss": 0.442, + "step": 4987 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003253284294118274, + "loss": 0.4127, + "step": 4988 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032513823599605797, + "loss": 0.422, + "step": 4989 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003249480714031531, + "loss": 0.4594, + "step": 4990 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032475793566445846, + "loss": 0.0508, + "step": 4991 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003245678288113142, + "loss": 0.4375, + "step": 4992 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003243777508750563, + "loss": 0.4882, + "step": 4993 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003241877018870156, + "loss": 0.4202, + "step": 4994 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003239976818785183, + "loss": 0.5071, + "step": 4995 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032380769088088604, + "loss": 0.3965, + "step": 4996 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032361772892543515, + "loss": 0.3671, + "step": 4997 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003234277960434779, + "loss": 0.4399, + "step": 4998 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032323789226632103, + "loss": 0.4247, + "step": 4999 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003230480176252669, + "loss": 0.4189, + "step": 5000 + }, + { + "epoch": 0.63, + "learning_rate": 0.000322858172151613, + "loss": 0.4189, + "step": 5001 + }, + { + "epoch": 0.63, + "learning_rate": 0.000322668355876652, + "loss": 0.4545, + "step": 5002 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003224785688316717, + "loss": 0.3977, + "step": 5003 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032228881104795514, + "loss": 0.478, + "step": 5004 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032209908255678086, + "loss": 0.3691, + "step": 5005 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032190938338942177, + "loss": 0.4774, + "step": 5006 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003217197135771465, + "loss": 0.4645, + "step": 5007 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032153007315121896, + "loss": 0.4442, + "step": 5008 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003213404621428978, + "loss": 0.3787, + "step": 5009 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003211508805834372, + "loss": 0.4075, + "step": 5010 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032096132850408643, + "loss": 0.4379, + "step": 5011 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003207718059360895, + "loss": 0.4532, + "step": 5012 + }, + { + "epoch": 0.63, + "learning_rate": 0.000320582312910686, + "loss": 0.4882, + "step": 5013 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003203928494591105, + "loss": 0.4089, + "step": 5014 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003202034156125927, + "loss": 0.4004, + "step": 5015 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032001401140235733, + "loss": 0.4285, + "step": 5016 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031982463685962436, + "loss": 0.4757, + "step": 5017 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003196352920156088, + "loss": 0.3783, + "step": 5018 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031944597690152096, + "loss": 0.4548, + "step": 5019 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031925669154856605, + "loss": 0.5152, + "step": 5020 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003190674359879442, + "loss": 0.4319, + "step": 5021 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031887821025085094, + "loss": 0.3838, + "step": 5022 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003186890143684769, + "loss": 0.5863, + "step": 5023 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003184998483720074, + "loss": 0.4634, + "step": 5024 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031831071229262336, + "loss": 0.4139, + "step": 5025 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031812160616150043, + "loss": 0.4132, + "step": 5026 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003179325300098096, + "loss": 0.4578, + "step": 5027 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031774348386871636, + "loss": 0.4401, + "step": 5028 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031755446776938177, + "loss": 0.4578, + "step": 5029 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031736548174296183, + "loss": 0.5036, + "step": 5030 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031717652582060745, + "loss": 0.4501, + "step": 5031 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003169876000334649, + "loss": 0.5502, + "step": 5032 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003167987044126749, + "loss": 0.5245, + "step": 5033 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031660983898937383, + "loss": 0.4768, + "step": 5034 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031642100379469275, + "loss": 0.376, + "step": 5035 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003162321988597577, + "loss": 0.425, + "step": 5036 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031604342421569, + "loss": 0.4645, + "step": 5037 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003158546798936055, + "loss": 0.4612, + "step": 5038 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003156659659246156, + "loss": 0.5502, + "step": 5039 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003154772823398264, + "loss": 0.5035, + "step": 5040 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031528862917033914, + "loss": 0.3796, + "step": 5041 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031510000644725003, + "loss": 0.449, + "step": 5042 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003149114142016497, + "loss": 0.5188, + "step": 5043 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031472285246462474, + "loss": 0.4545, + "step": 5044 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003145343212672559, + "loss": 0.406, + "step": 5045 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031434582064061937, + "loss": 0.4575, + "step": 5046 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003141573506157862, + "loss": 0.3873, + "step": 5047 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031396891122382223, + "loss": 0.3674, + "step": 5048 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031378050249578846, + "loss": 0.5109, + "step": 5049 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031359212446274053, + "loss": 0.4295, + "step": 5050 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031340377715572944, + "loss": 0.5692, + "step": 5051 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003132154606058006, + "loss": 0.4392, + "step": 5052 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003130271748439951, + "loss": 0.4753, + "step": 5053 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003128389199013482, + "loss": 0.6003, + "step": 5054 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031265069580889047, + "loss": 0.4382, + "step": 5055 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003124625025976473, + "loss": 0.3848, + "step": 5056 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003122743402986391, + "loss": 0.4984, + "step": 5057 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031208620894288106, + "loss": 0.4199, + "step": 5058 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003118981085613831, + "loss": 0.0514, + "step": 5059 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003117100391851505, + "loss": 0.406, + "step": 5060 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031152200084518303, + "loss": 0.4369, + "step": 5061 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031133399357247544, + "loss": 0.491, + "step": 5062 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031114601739801753, + "loss": 0.4196, + "step": 5063 + }, + { + "epoch": 0.63, + "learning_rate": 0.000310958072352794, + "loss": 0.4085, + "step": 5064 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031077015846778385, + "loss": 0.5151, + "step": 5065 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031058227577396156, + "loss": 0.5483, + "step": 5066 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031039442430229623, + "loss": 0.4094, + "step": 5067 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003102066040837519, + "loss": 0.4788, + "step": 5068 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031001881514928733, + "loss": 0.4843, + "step": 5069 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003098310575298563, + "loss": 0.4143, + "step": 5070 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003096433312564075, + "loss": 0.4268, + "step": 5071 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003094556363598838, + "loss": 0.4675, + "step": 5072 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030926797287122374, + "loss": 0.5081, + "step": 5073 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030908034082136016, + "loss": 0.5084, + "step": 5074 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003088927402412209, + "loss": 0.5098, + "step": 5075 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003087051711617289, + "loss": 0.4921, + "step": 5076 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003085176336138011, + "loss": 0.3867, + "step": 5077 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030833012762835, + "loss": 0.4544, + "step": 5078 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003081426532362828, + "loss": 0.475, + "step": 5079 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003079552104685011, + "loss": 0.4241, + "step": 5080 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003077677993559014, + "loss": 0.4071, + "step": 5081 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030758041992937535, + "loss": 0.5802, + "step": 5082 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030739307221980906, + "loss": 0.5504, + "step": 5083 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003072057562580835, + "loss": 0.45, + "step": 5084 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003070184720750744, + "loss": 0.4125, + "step": 5085 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003068312197016523, + "loss": 0.4086, + "step": 5086 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030664399916868225, + "loss": 0.4197, + "step": 5087 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003064568105070242, + "loss": 0.4463, + "step": 5088 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003062696537475332, + "loss": 0.4204, + "step": 5089 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003060825289210585, + "loss": 0.4829, + "step": 5090 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003058954360584443, + "loss": 0.3747, + "step": 5091 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030570837519052965, + "loss": 0.4339, + "step": 5092 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003055213463481483, + "loss": 0.4105, + "step": 5093 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030533434956212836, + "loss": 0.5342, + "step": 5094 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003051473848632931, + "loss": 0.3636, + "step": 5095 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003049604522824604, + "loss": 0.3985, + "step": 5096 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003047735518504428, + "loss": 0.401, + "step": 5097 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003045866835980473, + "loss": 0.4315, + "step": 5098 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030439984755607595, + "loss": 0.4633, + "step": 5099 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030421304375532544, + "loss": 0.4137, + "step": 5100 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003040262722265872, + "loss": 0.4431, + "step": 5101 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003038395330006468, + "loss": 0.4283, + "step": 5102 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030365282610828514, + "loss": 0.371, + "step": 5103 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030346615158027753, + "loss": 0.4406, + "step": 5104 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030327950944739393, + "loss": 0.4465, + "step": 5105 + }, + { + "epoch": 0.64, + "learning_rate": 0.000303092899740399, + "loss": 0.4934, + "step": 5106 + }, + { + "epoch": 0.64, + "learning_rate": 0.000302906322490052, + "loss": 0.5804, + "step": 5107 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003027197777271072, + "loss": 0.4677, + "step": 5108 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030253326548231274, + "loss": 0.4225, + "step": 5109 + }, + { + "epoch": 0.64, + "learning_rate": 0.000302346785786412, + "loss": 0.4202, + "step": 5110 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003021603386701429, + "loss": 0.4189, + "step": 5111 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030197392416423806, + "loss": 0.6204, + "step": 5112 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030178754229942447, + "loss": 0.4503, + "step": 5113 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003016011931064239, + "loss": 0.5479, + "step": 5114 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003014148766159529, + "loss": 0.4373, + "step": 5115 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030122859285872214, + "loss": 0.4486, + "step": 5116 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003010423418654373, + "loss": 0.5212, + "step": 5117 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003008561236667986, + "loss": 0.4554, + "step": 5118 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003006699382935009, + "loss": 0.3654, + "step": 5119 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003004837857762333, + "loss": 0.507, + "step": 5120 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030029766614568, + "loss": 0.4048, + "step": 5121 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003001115794325194, + "loss": 0.5486, + "step": 5122 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029992552566742483, + "loss": 0.3965, + "step": 5123 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002997395048810635, + "loss": 0.5438, + "step": 5124 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029955351710409797, + "loss": 0.0507, + "step": 5125 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029936756236718487, + "loss": 0.4221, + "step": 5126 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029918164070097576, + "loss": 0.4232, + "step": 5127 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029899575213611633, + "loss": 0.504, + "step": 5128 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029880989670324717, + "loss": 0.4888, + "step": 5129 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002986240744330033, + "loss": 0.4205, + "step": 5130 + }, + { + "epoch": 0.64, + "learning_rate": 0.000298438285356014, + "loss": 0.495, + "step": 5131 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002982525295029034, + "loss": 0.4634, + "step": 5132 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029806680690429, + "loss": 0.4127, + "step": 5133 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029788111759078705, + "loss": 0.4257, + "step": 5134 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029769546159300205, + "loss": 0.5072, + "step": 5135 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002975098389415371, + "loss": 0.4348, + "step": 5136 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029732424966698903, + "loss": 0.4005, + "step": 5137 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002971386937999485, + "loss": 0.5293, + "step": 5138 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002969531713710014, + "loss": 0.4438, + "step": 5139 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002967676824107278, + "loss": 0.4258, + "step": 5140 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029658222694970224, + "loss": 0.4142, + "step": 5141 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029639680501849364, + "loss": 0.0501, + "step": 5142 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002962114166476657, + "loss": 0.4476, + "step": 5143 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002960260618677765, + "loss": 0.4352, + "step": 5144 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029584074070937823, + "loss": 0.4683, + "step": 5145 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002956554532030179, + "loss": 0.4523, + "step": 5146 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002954701993792368, + "loss": 0.41, + "step": 5147 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002952849792685709, + "loss": 0.6597, + "step": 5148 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002950997929015503, + "loss": 0.4647, + "step": 5149 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029491464030869985, + "loss": 0.4818, + "step": 5150 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029472952152053854, + "loss": 0.4785, + "step": 5151 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002945444365675803, + "loss": 0.5103, + "step": 5152 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002943593854803325, + "loss": 0.4061, + "step": 5153 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029417436828929784, + "loss": 0.587, + "step": 5154 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002939893850249731, + "loss": 0.4106, + "step": 5155 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029380443571784954, + "loss": 0.474, + "step": 5156 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029361952039841274, + "loss": 0.4403, + "step": 5157 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029343463909714274, + "loss": 0.4913, + "step": 5158 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029324979184451404, + "loss": 0.4235, + "step": 5159 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029306497867099515, + "loss": 0.4484, + "step": 5160 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002928801996070495, + "loss": 0.4294, + "step": 5161 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029269545468313456, + "loss": 0.5554, + "step": 5162 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029251074392970224, + "loss": 0.4808, + "step": 5163 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002923260673771988, + "loss": 0.5281, + "step": 5164 + }, + { + "epoch": 0.65, + "learning_rate": 0.000292141425056065, + "loss": 0.4815, + "step": 5165 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029195681699673606, + "loss": 0.3345, + "step": 5166 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002917722432296409, + "loss": 0.3972, + "step": 5167 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002915877037852034, + "loss": 0.4742, + "step": 5168 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002914031986938417, + "loss": 0.4924, + "step": 5169 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029121872798596805, + "loss": 0.4343, + "step": 5170 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029103429169198943, + "loss": 0.0494, + "step": 5171 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029084988984230664, + "loss": 0.4249, + "step": 5172 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029066552246731524, + "loss": 0.5195, + "step": 5173 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029048118959740504, + "loss": 0.5576, + "step": 5174 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029029689126295965, + "loss": 0.4352, + "step": 5175 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029011262749435767, + "loss": 0.447, + "step": 5176 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002899283983219716, + "loss": 0.3639, + "step": 5177 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002897442037761684, + "loss": 0.5, + "step": 5178 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002895600438873093, + "loss": 0.4878, + "step": 5179 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028937591868574984, + "loss": 0.4332, + "step": 5180 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028919182820183985, + "loss": 0.4244, + "step": 5181 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028900777246592313, + "loss": 0.4421, + "step": 5182 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028882375150833815, + "loss": 0.4684, + "step": 5183 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002886397653594175, + "loss": 0.5608, + "step": 5184 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028845581404948804, + "loss": 0.4487, + "step": 5185 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028827189760887096, + "loss": 0.4233, + "step": 5186 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002880880160678815, + "loss": 0.4572, + "step": 5187 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028790416945682955, + "loss": 0.5565, + "step": 5188 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002877203578060186, + "loss": 0.4305, + "step": 5189 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028753658114574683, + "loss": 0.4636, + "step": 5190 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028735283950630664, + "loss": 0.451, + "step": 5191 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028716913291798466, + "loss": 0.4722, + "step": 5192 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002869854614110617, + "loss": 0.4038, + "step": 5193 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028680182501581255, + "loss": 0.4637, + "step": 5194 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002866182237625064, + "loss": 0.4213, + "step": 5195 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028643465768140686, + "loss": 0.4814, + "step": 5196 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002862511268027716, + "loss": 0.5063, + "step": 5197 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028606763115685213, + "loss": 0.0491, + "step": 5198 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002858841707738946, + "loss": 0.3915, + "step": 5199 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002857007456841393, + "loss": 0.5226, + "step": 5200 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028551735591782044, + "loss": 0.5215, + "step": 5201 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028533400150516675, + "loss": 0.4277, + "step": 5202 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002851506824764011, + "loss": 0.5157, + "step": 5203 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002849673988617399, + "loss": 0.3809, + "step": 5204 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028478415069139453, + "loss": 0.4321, + "step": 5205 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002846009379955701, + "loss": 0.4899, + "step": 5206 + }, + { + "epoch": 0.65, + "learning_rate": 0.000284417760804466, + "loss": 0.4293, + "step": 5207 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028423461914827585, + "loss": 0.4025, + "step": 5208 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028405151305718724, + "loss": 0.4473, + "step": 5209 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002838684425613821, + "loss": 0.3992, + "step": 5210 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028368540769103603, + "loss": 0.453, + "step": 5211 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002835024084763193, + "loss": 0.509, + "step": 5212 + }, + { + "epoch": 0.65, + "learning_rate": 0.000283319444947396, + "loss": 0.4594, + "step": 5213 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028313651713442455, + "loss": 0.5262, + "step": 5214 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002829536250675573, + "loss": 0.4918, + "step": 5215 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028277076877694077, + "loss": 0.3911, + "step": 5216 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002825879482927155, + "loss": 0.473, + "step": 5217 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028240516364501654, + "loss": 0.4447, + "step": 5218 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002822224148639723, + "loss": 0.4365, + "step": 5219 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028203970197970574, + "loss": 0.4361, + "step": 5220 + }, + { + "epoch": 0.65, + "learning_rate": 0.000281857025022334, + "loss": 0.4484, + "step": 5221 + }, + { + "epoch": 0.65, + "learning_rate": 0.000281674384021968, + "loss": 0.4864, + "step": 5222 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028149177900871304, + "loss": 0.4775, + "step": 5223 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028130921001266816, + "loss": 0.457, + "step": 5224 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028112667706392693, + "loss": 0.504, + "step": 5225 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002809441801925762, + "loss": 0.5169, + "step": 5226 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002807617194286976, + "loss": 0.3882, + "step": 5227 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028057929480236653, + "loss": 0.0487, + "step": 5228 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002803969063436525, + "loss": 0.608, + "step": 5229 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028021455408261896, + "loss": 0.0485, + "step": 5230 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002800322380493235, + "loss": 0.0484, + "step": 5231 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027984995827381786, + "loss": 0.5126, + "step": 5232 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002796677147861472, + "loss": 0.4645, + "step": 5233 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002794855076163514, + "loss": 0.4106, + "step": 5234 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027930333679446407, + "loss": 0.4406, + "step": 5235 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027912120235051304, + "loss": 0.4255, + "step": 5236 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027893910431451945, + "loss": 0.4491, + "step": 5237 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027875704271649926, + "loss": 0.47, + "step": 5238 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002785750175864621, + "loss": 0.4571, + "step": 5239 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002783930289544118, + "loss": 0.4165, + "step": 5240 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002782110768503454, + "loss": 0.5366, + "step": 5241 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027802916130425484, + "loss": 0.4025, + "step": 5242 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027784728234612555, + "loss": 0.473, + "step": 5243 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027766544000593725, + "loss": 0.4568, + "step": 5244 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027748363431366325, + "loss": 0.4074, + "step": 5245 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027730186529927107, + "loss": 0.403, + "step": 5246 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002771201329927223, + "loss": 0.4822, + "step": 5247 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027693843742397195, + "loss": 0.4122, + "step": 5248 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027675677862296945, + "loss": 0.4102, + "step": 5249 + }, + { + "epoch": 0.66, + "learning_rate": 0.000276575156619658, + "loss": 0.4081, + "step": 5250 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002763935714439748, + "loss": 0.4019, + "step": 5251 + }, + { + "epoch": 0.66, + "learning_rate": 0.000276212023125851, + "loss": 0.4769, + "step": 5252 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027603051169521167, + "loss": 0.5181, + "step": 5253 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002758490371819758, + "loss": 0.4811, + "step": 5254 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002756675996160559, + "loss": 0.428, + "step": 5255 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027548619902735907, + "loss": 0.4696, + "step": 5256 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027530483544578577, + "loss": 0.4968, + "step": 5257 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002751235089012308, + "loss": 0.4039, + "step": 5258 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027494221942358246, + "loss": 0.0506, + "step": 5259 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002747609670427231, + "loss": 0.3893, + "step": 5260 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002745797517885293, + "loss": 0.5507, + "step": 5261 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002743985736908707, + "loss": 0.5376, + "step": 5262 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002742174327796115, + "loss": 0.4363, + "step": 5263 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002740363290846096, + "loss": 0.5332, + "step": 5264 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027385526263571676, + "loss": 0.0489, + "step": 5265 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027367423346277847, + "loss": 0.4725, + "step": 5266 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027349324159563426, + "loss": 0.4456, + "step": 5267 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002733122870641175, + "loss": 0.4399, + "step": 5268 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002731313698980554, + "loss": 0.4814, + "step": 5269 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002729504901272686, + "loss": 0.5984, + "step": 5270 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002727696477815722, + "loss": 0.0494, + "step": 5271 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002725888428907748, + "loss": 0.4015, + "step": 5272 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002724080754846788, + "loss": 0.0494, + "step": 5273 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027222734559308073, + "loss": 0.4098, + "step": 5274 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002720466532457707, + "loss": 0.4376, + "step": 5275 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002718659984725323, + "loss": 0.491, + "step": 5276 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002716853813031435, + "loss": 0.5282, + "step": 5277 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002715048017673758, + "loss": 0.4647, + "step": 5278 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002713242598949948, + "loss": 0.4268, + "step": 5279 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002711437557157591, + "loss": 0.4368, + "step": 5280 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027096328925942207, + "loss": 0.4308, + "step": 5281 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002707828605557301, + "loss": 0.4803, + "step": 5282 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002706024696344241, + "loss": 0.4289, + "step": 5283 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027042211652523784, + "loss": 0.4399, + "step": 5284 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002702418012578995, + "loss": 0.4675, + "step": 5285 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002700615238621309, + "loss": 0.4178, + "step": 5286 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026988128436764757, + "loss": 0.4627, + "step": 5287 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002697010828041587, + "loss": 0.4456, + "step": 5288 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026952091920136756, + "loss": 0.374, + "step": 5289 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002693407935889707, + "loss": 0.5281, + "step": 5290 + }, + { + "epoch": 0.66, + "learning_rate": 0.000269160705996659, + "loss": 0.4695, + "step": 5291 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026898065645411615, + "loss": 0.5032, + "step": 5292 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002688006449910204, + "loss": 0.4407, + "step": 5293 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002686206716370436, + "loss": 0.4316, + "step": 5294 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002684407364218509, + "loss": 0.3912, + "step": 5295 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002682608393751016, + "loss": 0.4077, + "step": 5296 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002680809805264486, + "loss": 0.4052, + "step": 5297 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026790115990553843, + "loss": 0.5065, + "step": 5298 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002677213775420111, + "loss": 0.4301, + "step": 5299 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026754163346550075, + "loss": 0.4701, + "step": 5300 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002673619277056349, + "loss": 0.4375, + "step": 5301 + }, + { + "epoch": 0.66, + "learning_rate": 0.000267182260292035, + "loss": 0.4495, + "step": 5302 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026700263125431587, + "loss": 0.457, + "step": 5303 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026682304062208633, + "loss": 0.5016, + "step": 5304 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002666434884249489, + "loss": 0.3864, + "step": 5305 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026646397469249906, + "loss": 0.4489, + "step": 5306 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026628449945432673, + "loss": 0.444, + "step": 5307 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002661050627400153, + "loss": 0.462, + "step": 5308 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002659256645791417, + "loss": 0.4998, + "step": 5309 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002657463050012765, + "loss": 0.5094, + "step": 5310 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026556698403598395, + "loss": 0.3954, + "step": 5311 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026538770171282187, + "loss": 0.4124, + "step": 5312 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002652084580613422, + "loss": 0.399, + "step": 5313 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002650292531110895, + "loss": 0.4821, + "step": 5314 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002648500868916027, + "loss": 0.4913, + "step": 5315 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026467095943241427, + "loss": 0.3694, + "step": 5316 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002644918707630502, + "loss": 0.4536, + "step": 5317 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026431282091303026, + "loss": 0.4584, + "step": 5318 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026413380991186723, + "loss": 0.3556, + "step": 5319 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026395483778906816, + "loss": 0.4308, + "step": 5320 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026377590457413337, + "loss": 0.4409, + "step": 5321 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002635970102965568, + "loss": 0.4464, + "step": 5322 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026341815498582633, + "loss": 0.5338, + "step": 5323 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002632393386714226, + "loss": 0.4679, + "step": 5324 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002630605613828205, + "loss": 0.3751, + "step": 5325 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026288182314948837, + "loss": 0.5159, + "step": 5326 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026270312400088826, + "loss": 0.5123, + "step": 5327 + }, + { + "epoch": 0.67, + "learning_rate": 0.000262524463966475, + "loss": 0.4275, + "step": 5328 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026234584307569794, + "loss": 0.4337, + "step": 5329 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002621672613579994, + "loss": 0.5262, + "step": 5330 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002619887188428155, + "loss": 0.4293, + "step": 5331 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026181021555957574, + "loss": 0.4674, + "step": 5332 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026163175153770334, + "loss": 0.51, + "step": 5333 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026145332680661484, + "loss": 0.4281, + "step": 5334 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026127494139572063, + "loss": 0.4684, + "step": 5335 + }, + { + "epoch": 0.67, + "learning_rate": 0.000261096595334424, + "loss": 0.4183, + "step": 5336 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002609182886521223, + "loss": 0.4694, + "step": 5337 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002607400213782062, + "loss": 0.4391, + "step": 5338 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026056179354206, + "loss": 0.4054, + "step": 5339 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002603836051730614, + "loss": 0.5249, + "step": 5340 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002602054563005815, + "loss": 0.418, + "step": 5341 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002600273469539852, + "loss": 0.5398, + "step": 5342 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002598492771626304, + "loss": 0.3993, + "step": 5343 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025967124695586873, + "loss": 0.4891, + "step": 5344 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025949325636304556, + "loss": 0.4036, + "step": 5345 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002593153054134993, + "loss": 0.3463, + "step": 5346 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002591373941365622, + "loss": 0.4812, + "step": 5347 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002589595225615596, + "loss": 0.4232, + "step": 5348 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002587816907178107, + "loss": 0.4218, + "step": 5349 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025860389863462764, + "loss": 0.3816, + "step": 5350 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025842614634131637, + "loss": 0.4619, + "step": 5351 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025824843386717633, + "loss": 0.5952, + "step": 5352 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002580707612415002, + "loss": 0.4689, + "step": 5353 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025789312849357416, + "loss": 0.5068, + "step": 5354 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025771553565267786, + "loss": 0.421, + "step": 5355 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002575379827480843, + "loss": 0.0492, + "step": 5356 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025736046980906013, + "loss": 0.3789, + "step": 5357 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002571829968648649, + "loss": 0.4769, + "step": 5358 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025700556394475194, + "loss": 0.5017, + "step": 5359 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025682817107796805, + "loss": 0.4596, + "step": 5360 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025665081829375326, + "loss": 0.4351, + "step": 5361 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025647350562134114, + "loss": 0.5165, + "step": 5362 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002562962330899583, + "loss": 0.049, + "step": 5363 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002561190007288251, + "loss": 0.4835, + "step": 5364 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002559418085671551, + "loss": 0.4574, + "step": 5365 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002557646566341556, + "loss": 0.4957, + "step": 5366 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002555875449590265, + "loss": 0.5198, + "step": 5367 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002554104735709618, + "loss": 0.4164, + "step": 5368 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025523344249914843, + "loss": 0.5084, + "step": 5369 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002550564517727669, + "loss": 0.4469, + "step": 5370 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025487950142099127, + "loss": 0.0487, + "step": 5371 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002547025914729882, + "loss": 0.4229, + "step": 5372 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025452572195791833, + "loss": 0.4679, + "step": 5373 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002543488929049356, + "loss": 0.394, + "step": 5374 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002541721043431869, + "loss": 0.4733, + "step": 5375 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002539953563018129, + "loss": 0.3791, + "step": 5376 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025381864880994745, + "loss": 0.4194, + "step": 5377 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002536419818967176, + "loss": 0.4365, + "step": 5378 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025346535559124354, + "loss": 0.4374, + "step": 5379 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025328876992263916, + "loss": 0.4001, + "step": 5380 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002531122249200114, + "loss": 0.3894, + "step": 5381 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025293572061246074, + "loss": 0.484, + "step": 5382 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025275925702908066, + "loss": 0.3804, + "step": 5383 + }, + { + "epoch": 0.67, + "learning_rate": 0.000252582834198958, + "loss": 0.3823, + "step": 5384 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002524064521511731, + "loss": 0.4644, + "step": 5385 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002522301109147995, + "loss": 0.4872, + "step": 5386 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025205381051890364, + "loss": 0.436, + "step": 5387 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002518775509925456, + "loss": 0.5341, + "step": 5388 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025170133236477867, + "loss": 0.4211, + "step": 5389 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002515251546646494, + "loss": 0.4153, + "step": 5390 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025134901792119756, + "loss": 0.413, + "step": 5391 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025117292216345614, + "loss": 0.4546, + "step": 5392 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002509968674204517, + "loss": 0.4918, + "step": 5393 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025082085372120323, + "loss": 0.4127, + "step": 5394 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002506448810947237, + "loss": 0.4249, + "step": 5395 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002504689495700191, + "loss": 0.6299, + "step": 5396 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025029305917608863, + "loss": 0.3578, + "step": 5397 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025011720994192474, + "loss": 0.4358, + "step": 5398 + }, + { + "epoch": 0.68, + "learning_rate": 0.000249941401896513, + "loss": 0.5215, + "step": 5399 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002497656350688324, + "loss": 0.4279, + "step": 5400 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024958990948785475, + "loss": 0.4641, + "step": 5401 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002494142251825454, + "loss": 0.4378, + "step": 5402 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002492385821818628, + "loss": 0.4828, + "step": 5403 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002490629805147586, + "loss": 0.4463, + "step": 5404 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024888742021017784, + "loss": 0.4846, + "step": 5405 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024871190129705813, + "loss": 0.5713, + "step": 5406 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024853642380433076, + "loss": 0.379, + "step": 5407 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002483609877609202, + "loss": 0.4158, + "step": 5408 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024818559319574415, + "loss": 0.4563, + "step": 5409 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024801024013771283, + "loss": 0.3774, + "step": 5410 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024783492861573037, + "loss": 0.4757, + "step": 5411 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024765965865869373, + "loss": 0.6133, + "step": 5412 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024748443029549305, + "loss": 0.4544, + "step": 5413 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002473092435550117, + "loss": 0.4791, + "step": 5414 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002471340984661262, + "loss": 0.3851, + "step": 5415 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024695899505770575, + "loss": 0.6381, + "step": 5416 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024678393335861326, + "loss": 0.4788, + "step": 5417 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002466089133977046, + "loss": 0.4738, + "step": 5418 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002464339352038286, + "loss": 0.3823, + "step": 5419 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002462589988058274, + "loss": 0.3577, + "step": 5420 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002460841042325362, + "loss": 0.4808, + "step": 5421 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002459092515127833, + "loss": 0.4052, + "step": 5422 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024573444067538984, + "loss": 0.4027, + "step": 5423 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002455596717491705, + "loss": 0.4473, + "step": 5424 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002453849447629328, + "loss": 0.401, + "step": 5425 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002452102597454773, + "loss": 0.379, + "step": 5426 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002450356167255978, + "loss": 0.5, + "step": 5427 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024486101573208114, + "loss": 0.4658, + "step": 5428 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024468645679370717, + "loss": 0.4784, + "step": 5429 + }, + { + "epoch": 0.68, + "learning_rate": 0.000244511939939249, + "loss": 0.0486, + "step": 5430 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002443374651974723, + "loss": 0.4307, + "step": 5431 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024416303259713635, + "loss": 0.4379, + "step": 5432 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002439886421669933, + "loss": 0.4772, + "step": 5433 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002438142939357882, + "loss": 0.39, + "step": 5434 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024363998793225939, + "loss": 0.4498, + "step": 5435 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024346572418513802, + "loss": 0.4949, + "step": 5436 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002432915027231487, + "loss": 0.5101, + "step": 5437 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024311732357500826, + "loss": 0.0483, + "step": 5438 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024294318676942728, + "loss": 0.5074, + "step": 5439 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002427690923351092, + "loss": 0.417, + "step": 5440 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024259504030075025, + "loss": 0.4391, + "step": 5441 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024242103069503995, + "loss": 0.4161, + "step": 5442 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024224706354666065, + "loss": 0.4628, + "step": 5443 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024207313888428795, + "loss": 0.5774, + "step": 5444 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024189925673658992, + "loss": 0.4449, + "step": 5445 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024172541713222807, + "loss": 0.4714, + "step": 5446 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024155162009985675, + "loss": 0.4475, + "step": 5447 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024137786566812364, + "loss": 0.5101, + "step": 5448 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024120415386566862, + "loss": 0.4451, + "step": 5449 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002410304847211252, + "loss": 0.5042, + "step": 5450 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024085685826311966, + "loss": 0.4081, + "step": 5451 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024068327452027145, + "loss": 0.3951, + "step": 5452 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024050973352119238, + "loss": 0.4265, + "step": 5453 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002403362352944878, + "loss": 0.4353, + "step": 5454 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024016277986875585, + "loss": 0.422, + "step": 5455 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023998936727258753, + "loss": 0.4368, + "step": 5456 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002398159975345669, + "loss": 0.4659, + "step": 5457 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023964267068327084, + "loss": 0.4861, + "step": 5458 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023946938674726943, + "loss": 0.4548, + "step": 5459 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002392961457551251, + "loss": 0.4716, + "step": 5460 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023912294773539367, + "loss": 0.5063, + "step": 5461 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002389497927166238, + "loss": 0.3826, + "step": 5462 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023877668072735708, + "loss": 0.4802, + "step": 5463 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023860361179612794, + "loss": 0.495, + "step": 5464 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023843058595146365, + "loss": 0.5162, + "step": 5465 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023825760322188473, + "loss": 0.4581, + "step": 5466 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002380846636359039, + "loss": 0.4789, + "step": 5467 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002379117672220274, + "loss": 0.5619, + "step": 5468 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023773891400875413, + "loss": 0.5455, + "step": 5469 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002375661040245759, + "loss": 0.4369, + "step": 5470 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002373933372979773, + "loss": 0.4285, + "step": 5471 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023722061385743592, + "loss": 0.3597, + "step": 5472 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023704793373142225, + "loss": 0.0484, + "step": 5473 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023687529694839954, + "loss": 0.4232, + "step": 5474 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023670270353682373, + "loss": 0.479, + "step": 5475 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023653015352514384, + "loss": 0.4914, + "step": 5476 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023635764694180173, + "loss": 0.4467, + "step": 5477 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023618518381523203, + "loss": 0.3972, + "step": 5478 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002360127641738623, + "loss": 0.4215, + "step": 5479 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023584038804611279, + "loss": 0.4755, + "step": 5480 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023566805546039688, + "loss": 0.5176, + "step": 5481 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023549576644512016, + "loss": 0.5219, + "step": 5482 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002353235210286816, + "loss": 0.4348, + "step": 5483 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023515131923947281, + "loss": 0.4662, + "step": 5484 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002349791611058782, + "loss": 0.5063, + "step": 5485 + }, + { + "epoch": 0.69, + "learning_rate": 0.000234807046656275, + "loss": 0.5549, + "step": 5486 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002346349759190332, + "loss": 0.4231, + "step": 5487 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002344629489225159, + "loss": 0.3795, + "step": 5488 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002342909656950782, + "loss": 0.4071, + "step": 5489 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023411902626506876, + "loss": 0.4472, + "step": 5490 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023394713066082868, + "loss": 0.4059, + "step": 5491 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023377527891069218, + "loss": 0.4341, + "step": 5492 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023360347104298552, + "loss": 0.5105, + "step": 5493 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023343170708602846, + "loss": 0.5061, + "step": 5494 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023325998706813316, + "loss": 0.3633, + "step": 5495 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023308831101760485, + "loss": 0.048, + "step": 5496 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023291667896274088, + "loss": 0.4203, + "step": 5497 + }, + { + "epoch": 0.69, + "learning_rate": 0.000232745090931832, + "loss": 0.4241, + "step": 5498 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023257354695316147, + "loss": 0.5211, + "step": 5499 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023240204705500517, + "loss": 0.4686, + "step": 5500 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002322305912656319, + "loss": 0.475, + "step": 5501 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023205917961330308, + "loss": 0.4343, + "step": 5502 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023188781212627314, + "loss": 0.4924, + "step": 5503 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023171648883278846, + "loss": 0.5017, + "step": 5504 + }, + { + "epoch": 0.69, + "learning_rate": 0.000231545209761089, + "loss": 0.5071, + "step": 5505 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023137397493940698, + "loss": 0.4957, + "step": 5506 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002312027843959675, + "loss": 0.45, + "step": 5507 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023103163815898814, + "loss": 0.4555, + "step": 5508 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023086053625667952, + "loss": 0.4703, + "step": 5509 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023068947871724484, + "loss": 0.3884, + "step": 5510 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023051846556887951, + "loss": 0.4689, + "step": 5511 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023034749683977225, + "loss": 0.4802, + "step": 5512 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023017657255810427, + "loss": 0.5076, + "step": 5513 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002300056927520493, + "loss": 0.4333, + "step": 5514 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022983485744977388, + "loss": 0.4347, + "step": 5515 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022966406667943728, + "loss": 0.4867, + "step": 5516 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022949332046919146, + "loss": 0.5678, + "step": 5517 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002293226188471805, + "loss": 0.4515, + "step": 5518 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022915196184154175, + "loss": 0.4162, + "step": 5519 + }, + { + "epoch": 0.69, + "learning_rate": 0.000228981349480405, + "loss": 0.5341, + "step": 5520 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002288107817918927, + "loss": 0.4193, + "step": 5521 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022864025880411982, + "loss": 0.3997, + "step": 5522 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022846978054519418, + "loss": 0.5083, + "step": 5523 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022829934704321603, + "loss": 0.3992, + "step": 5524 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002281289583262785, + "loss": 0.5033, + "step": 5525 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002279586144224668, + "loss": 0.4436, + "step": 5526 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022778831535985927, + "loss": 0.4159, + "step": 5527 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022761806116652677, + "loss": 0.3828, + "step": 5528 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022744785187053256, + "loss": 0.4563, + "step": 5529 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022727768749993273, + "loss": 0.4301, + "step": 5530 + }, + { + "epoch": 0.69, + "learning_rate": 0.000227107568082776, + "loss": 0.3652, + "step": 5531 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002269374936471032, + "loss": 0.4172, + "step": 5532 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022676746422094823, + "loss": 0.4841, + "step": 5533 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022659747983233748, + "loss": 0.4702, + "step": 5534 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022642754050929004, + "loss": 0.4167, + "step": 5535 + }, + { + "epoch": 0.69, + "learning_rate": 0.000226257646279817, + "loss": 0.4257, + "step": 5536 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022608779717192264, + "loss": 0.4414, + "step": 5537 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022591799321360352, + "loss": 0.485, + "step": 5538 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022574823443284904, + "loss": 0.4473, + "step": 5539 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022557852085764053, + "loss": 0.3688, + "step": 5540 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002254088525159525, + "loss": 0.3677, + "step": 5541 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022523922943575175, + "loss": 0.4125, + "step": 5542 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022506965164499764, + "loss": 0.4616, + "step": 5543 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022490011917164206, + "loss": 0.3848, + "step": 5544 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022473063204362943, + "loss": 0.3877, + "step": 5545 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022456119028889672, + "loss": 0.3917, + "step": 5546 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022439179393537358, + "loss": 0.4471, + "step": 5547 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002242224430109816, + "loss": 0.444, + "step": 5548 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002240531375436355, + "loss": 0.4111, + "step": 5549 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022388387756124223, + "loss": 0.5042, + "step": 5550 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002237146630917014, + "loss": 0.4612, + "step": 5551 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022354549416290497, + "loss": 0.5092, + "step": 5552 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022337637080273743, + "loss": 0.4156, + "step": 5553 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022320729303907595, + "loss": 0.4277, + "step": 5554 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022303826089978962, + "loss": 0.387, + "step": 5555 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022286927441274063, + "loss": 0.3933, + "step": 5556 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002227003336057834, + "loss": 0.3862, + "step": 5557 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022253143850676482, + "loss": 0.6559, + "step": 5558 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022236258914352426, + "loss": 0.4648, + "step": 5559 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022219378554389353, + "loss": 0.408, + "step": 5560 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022202502773569706, + "loss": 0.4606, + "step": 5561 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002218563157467513, + "loss": 0.3856, + "step": 5562 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002216876496048656, + "loss": 0.4648, + "step": 5563 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022151902933784153, + "loss": 0.4028, + "step": 5564 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022135045497347322, + "loss": 0.4156, + "step": 5565 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002211819265395471, + "loss": 0.3636, + "step": 5566 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022101344406384222, + "loss": 0.5044, + "step": 5567 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022084500757412978, + "loss": 0.5818, + "step": 5568 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022067661709817383, + "loss": 0.4137, + "step": 5569 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022050827266373015, + "loss": 0.4777, + "step": 5570 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022033997429854753, + "loss": 0.3931, + "step": 5571 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022017172203036702, + "loss": 0.5327, + "step": 5572 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022000351588692197, + "loss": 0.4786, + "step": 5573 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021983535589593838, + "loss": 0.0482, + "step": 5574 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002196672420851341, + "loss": 0.4354, + "step": 5575 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021949917448221983, + "loss": 0.4304, + "step": 5576 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021933115311489865, + "loss": 0.4768, + "step": 5577 + }, + { + "epoch": 0.7, + "learning_rate": 0.000219163178010866, + "loss": 0.048, + "step": 5578 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021899524919780927, + "loss": 0.4537, + "step": 5579 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021882736670340874, + "loss": 0.3903, + "step": 5580 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002186595305553369, + "loss": 0.454, + "step": 5581 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021849174078125843, + "loss": 0.4683, + "step": 5582 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021832399740883085, + "loss": 0.3583, + "step": 5583 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021815630046570318, + "loss": 0.4879, + "step": 5584 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021798864997951757, + "loss": 0.4193, + "step": 5585 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021782104597790818, + "loss": 0.3855, + "step": 5586 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021765348848850163, + "loss": 0.4177, + "step": 5587 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021748597753891674, + "loss": 0.4137, + "step": 5588 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002173185131567647, + "loss": 0.4378, + "step": 5589 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021715109536964912, + "loss": 0.4197, + "step": 5590 + }, + { + "epoch": 0.7, + "learning_rate": 0.000216983724205166, + "loss": 0.413, + "step": 5591 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021681639969090317, + "loss": 0.4624, + "step": 5592 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021664912185444124, + "loss": 0.4325, + "step": 5593 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021648189072335312, + "loss": 0.4371, + "step": 5594 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021631470632520373, + "loss": 0.4252, + "step": 5595 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021614756868755059, + "loss": 0.4644, + "step": 5596 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021598047783794334, + "loss": 0.5066, + "step": 5597 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002158134338039241, + "loss": 0.3864, + "step": 5598 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021564643661302674, + "loss": 0.4108, + "step": 5599 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021547948629277807, + "loss": 0.4084, + "step": 5600 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021531258287069683, + "loss": 0.0483, + "step": 5601 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021514572637429407, + "loss": 0.4998, + "step": 5602 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021497891683107317, + "loss": 0.5785, + "step": 5603 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021481215426852974, + "loss": 0.4406, + "step": 5604 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021464543871415177, + "loss": 0.4999, + "step": 5605 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021447877019541901, + "loss": 0.0482, + "step": 5606 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021431214873980403, + "loss": 0.4414, + "step": 5607 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021414557437477138, + "loss": 0.0482, + "step": 5608 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021397904712777794, + "loss": 0.4768, + "step": 5609 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021381256702627272, + "loss": 0.483, + "step": 5610 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021364613409769712, + "loss": 0.5026, + "step": 5611 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002134797483694847, + "loss": 0.0477, + "step": 5612 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021331340986906094, + "loss": 0.4656, + "step": 5613 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021314711862384394, + "loss": 0.5052, + "step": 5614 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021298087466124394, + "loss": 0.4388, + "step": 5615 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021281467800866316, + "loss": 0.4215, + "step": 5616 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002126485286934965, + "loss": 0.4664, + "step": 5617 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021248242674313028, + "loss": 0.454, + "step": 5618 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021231637218494372, + "loss": 0.4907, + "step": 5619 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021215036504630786, + "loss": 0.4308, + "step": 5620 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021198440535458634, + "loss": 0.4685, + "step": 5621 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021181849313713425, + "loss": 0.4783, + "step": 5622 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002116526284212995, + "loss": 0.4802, + "step": 5623 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021148681123442194, + "loss": 0.4823, + "step": 5624 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021132104160383354, + "loss": 0.4756, + "step": 5625 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021115531955685852, + "loss": 0.4165, + "step": 5626 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021098964512081343, + "loss": 0.5341, + "step": 5627 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021082401832300641, + "loss": 0.405, + "step": 5628 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021065843919073824, + "loss": 0.3975, + "step": 5629 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021049290775130176, + "loss": 0.4287, + "step": 5630 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002103274240319818, + "loss": 0.4774, + "step": 5631 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021016198806005554, + "loss": 0.048, + "step": 5632 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020999659986279212, + "loss": 0.5011, + "step": 5633 + }, + { + "epoch": 0.71, + "learning_rate": 0.000209831259467453, + "loss": 0.3823, + "step": 5634 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020966596690129137, + "loss": 0.4232, + "step": 5635 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002095007221915528, + "loss": 0.4819, + "step": 5636 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020933552536547506, + "loss": 0.4146, + "step": 5637 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020917037645028786, + "loss": 0.4825, + "step": 5638 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020900527547321318, + "loss": 0.5001, + "step": 5639 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002088402224614649, + "loss": 0.4628, + "step": 5640 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020867521744224904, + "loss": 0.5427, + "step": 5641 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020851026044276406, + "loss": 0.4506, + "step": 5642 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002083453514901998, + "loss": 0.3726, + "step": 5643 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020818049061173866, + "loss": 0.3846, + "step": 5644 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020801567783455523, + "loss": 0.514, + "step": 5645 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002078509131858158, + "loss": 0.5043, + "step": 5646 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020768619669267897, + "loss": 0.3719, + "step": 5647 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002075215283822953, + "loss": 0.4762, + "step": 5648 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002073569082818077, + "loss": 0.3835, + "step": 5649 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020719233641835046, + "loss": 0.4519, + "step": 5650 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020702781281905052, + "loss": 0.4222, + "step": 5651 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020686333751102675, + "loss": 0.5889, + "step": 5652 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002066989105213899, + "loss": 0.4374, + "step": 5653 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002065345318772429, + "loss": 0.5184, + "step": 5654 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020637020160568065, + "loss": 0.0477, + "step": 5655 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020620591973379026, + "loss": 0.4071, + "step": 5656 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020604168628865027, + "loss": 0.4868, + "step": 5657 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020587750129733197, + "loss": 0.3372, + "step": 5658 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020571336478689817, + "loss": 0.4559, + "step": 5659 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020554927678440404, + "loss": 0.5123, + "step": 5660 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020538523731689667, + "loss": 0.4756, + "step": 5661 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020522124641141472, + "loss": 0.4217, + "step": 5662 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002050573040949893, + "loss": 0.5109, + "step": 5663 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020489341039464355, + "loss": 0.406, + "step": 5664 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020472956533739244, + "loss": 0.3989, + "step": 5665 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002045657689502427, + "loss": 0.4541, + "step": 5666 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002044020212601934, + "loss": 0.5095, + "step": 5667 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002042383222942355, + "loss": 0.4302, + "step": 5668 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020407467207935178, + "loss": 0.4677, + "step": 5669 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020391107064251707, + "loss": 0.4103, + "step": 5670 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020374751801069846, + "loss": 0.5397, + "step": 5671 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020358401421085426, + "loss": 0.5632, + "step": 5672 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002034205592699354, + "loss": 0.4054, + "step": 5673 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020325715321488453, + "loss": 0.4138, + "step": 5674 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020309379607263623, + "loss": 0.4382, + "step": 5675 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020293048787011698, + "loss": 0.4867, + "step": 5676 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020276722863424534, + "loss": 0.5413, + "step": 5677 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020260401839193183, + "loss": 0.4498, + "step": 5678 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020244085717007837, + "loss": 0.4657, + "step": 5679 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002022777449955795, + "loss": 0.5236, + "step": 5680 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020211468189532122, + "loss": 0.3933, + "step": 5681 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020195166789618168, + "loss": 0.4744, + "step": 5682 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020178870302503088, + "loss": 0.4735, + "step": 5683 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020162578730873067, + "loss": 0.3866, + "step": 5684 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020146292077413474, + "loss": 0.4501, + "step": 5685 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020130010344808903, + "loss": 0.4982, + "step": 5686 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020113733535743073, + "loss": 0.4259, + "step": 5687 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020097461652898935, + "loss": 0.4304, + "step": 5688 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020081194698958632, + "loss": 0.437, + "step": 5689 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002006493267660348, + "loss": 0.4432, + "step": 5690 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020048675588513987, + "loss": 0.5094, + "step": 5691 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020032423437369844, + "loss": 0.4769, + "step": 5692 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020016176225849948, + "loss": 0.5544, + "step": 5693 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019999933956632327, + "loss": 0.4376, + "step": 5694 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001998369663239425, + "loss": 0.4713, + "step": 5695 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001996746425581215, + "loss": 0.4747, + "step": 5696 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019951236829561658, + "loss": 0.4926, + "step": 5697 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019935014356317566, + "loss": 0.5319, + "step": 5698 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019918796838753862, + "loss": 0.4684, + "step": 5699 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001990258427954374, + "loss": 0.4033, + "step": 5700 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019886376681359515, + "loss": 0.4364, + "step": 5701 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001987017404687274, + "loss": 0.4633, + "step": 5702 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019853976378754134, + "loss": 0.469, + "step": 5703 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001983778367967361, + "loss": 0.5227, + "step": 5704 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019821595952300214, + "loss": 0.4734, + "step": 5705 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019805413199302225, + "loss": 0.4581, + "step": 5706 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019789235423347084, + "loss": 0.5161, + "step": 5707 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019773062627101423, + "loss": 0.4337, + "step": 5708 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019756894813231012, + "loss": 0.4316, + "step": 5709 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019740731984400846, + "loss": 0.3986, + "step": 5710 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019724574143275075, + "loss": 0.4163, + "step": 5711 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019708421292517043, + "loss": 0.048, + "step": 5712 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019692273434789248, + "loss": 0.4637, + "step": 5713 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019676130572753391, + "loss": 0.4818, + "step": 5714 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019659992709070346, + "loss": 0.6699, + "step": 5715 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019643859846400126, + "loss": 0.4895, + "step": 5716 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019627731987401954, + "loss": 0.3648, + "step": 5717 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019611609134734237, + "loss": 0.3795, + "step": 5718 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019595491291054528, + "loss": 0.0484, + "step": 5719 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019579378459019575, + "loss": 0.432, + "step": 5720 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019563270641285296, + "loss": 0.516, + "step": 5721 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019547167840506792, + "loss": 0.048, + "step": 5722 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019531070059338284, + "loss": 0.5605, + "step": 5723 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019514977300433235, + "loss": 0.449, + "step": 5724 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019498889566444246, + "loss": 0.5209, + "step": 5725 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019482806860023094, + "loss": 0.4954, + "step": 5726 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001946672918382073, + "loss": 0.4946, + "step": 5727 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019450656540487277, + "loss": 0.4119, + "step": 5728 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019434588932672021, + "loss": 0.4655, + "step": 5729 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019418526363023435, + "loss": 0.4823, + "step": 5730 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001940246883418913, + "loss": 0.4517, + "step": 5731 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019386416348815905, + "loss": 0.5043, + "step": 5732 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001937036890954974, + "loss": 0.4622, + "step": 5733 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001935432651903577, + "loss": 0.4231, + "step": 5734 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019338289179918295, + "loss": 0.3976, + "step": 5735 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019322256894840784, + "loss": 0.4688, + "step": 5736 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019306229666445895, + "loss": 0.4357, + "step": 5737 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019290207497375407, + "loss": 0.3723, + "step": 5738 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019274190390270292, + "loss": 0.4471, + "step": 5739 + }, + { + "epoch": 0.72, + "learning_rate": 0.000192581783477707, + "loss": 0.4614, + "step": 5740 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019242171372515926, + "loss": 0.5036, + "step": 5741 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019226169467144439, + "loss": 0.5552, + "step": 5742 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001921017263429388, + "loss": 0.3616, + "step": 5743 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001919418087660102, + "loss": 0.5226, + "step": 5744 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019178194196701825, + "loss": 0.0483, + "step": 5745 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019162212597231427, + "loss": 0.47, + "step": 5746 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019146236080824113, + "loss": 0.4505, + "step": 5747 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019130264650113306, + "loss": 0.3816, + "step": 5748 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019114298307731621, + "loss": 0.4823, + "step": 5749 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001909833705631084, + "loss": 0.3966, + "step": 5750 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019082380898481893, + "loss": 0.0481, + "step": 5751 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019066429836874844, + "loss": 0.3989, + "step": 5752 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019050483874118962, + "loss": 0.4883, + "step": 5753 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019034543012842658, + "loss": 0.542, + "step": 5754 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001901860725567349, + "loss": 0.4128, + "step": 5755 + }, + { + "epoch": 0.72, + "learning_rate": 0.000190026766052382, + "loss": 0.3704, + "step": 5756 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018986751064162667, + "loss": 0.3605, + "step": 5757 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018970830635071928, + "loss": 0.384, + "step": 5758 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001895491532059021, + "loss": 0.5042, + "step": 5759 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018939005123340836, + "loss": 0.4313, + "step": 5760 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001892310004594633, + "loss": 0.4896, + "step": 5761 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018907200091028366, + "loss": 0.4707, + "step": 5762 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018891305261207775, + "loss": 0.48, + "step": 5763 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001887541555910453, + "loss": 0.5337, + "step": 5764 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001885953098733777, + "loss": 0.5244, + "step": 5765 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001884365154852581, + "loss": 0.4653, + "step": 5766 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018827777245286042, + "loss": 0.4047, + "step": 5767 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018811908080235102, + "loss": 0.3934, + "step": 5768 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018796044055988725, + "loss": 0.5461, + "step": 5769 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018780185175161824, + "loss": 0.4727, + "step": 5770 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001876433144036845, + "loss": 0.5029, + "step": 5771 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001874848285422181, + "loss": 0.4296, + "step": 5772 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018732639419334285, + "loss": 0.4041, + "step": 5773 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001871680113831734, + "loss": 0.451, + "step": 5774 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018700968013781667, + "loss": 0.389, + "step": 5775 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018685140048337062, + "loss": 0.0479, + "step": 5776 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018669317244592493, + "loss": 0.4794, + "step": 5777 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018653499605156065, + "loss": 0.436, + "step": 5778 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001863768713263504, + "loss": 0.4601, + "step": 5779 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018621879829635817, + "loss": 0.4939, + "step": 5780 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018606077698763975, + "loss": 0.411, + "step": 5781 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018590280742624178, + "loss": 0.3641, + "step": 5782 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018574488963820284, + "loss": 0.4095, + "step": 5783 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018558702364955304, + "loss": 0.4121, + "step": 5784 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018542920948631363, + "loss": 0.4686, + "step": 5785 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001852714471744978, + "loss": 0.4844, + "step": 5786 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018511373674010945, + "loss": 0.3773, + "step": 5787 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018495607820914452, + "loss": 0.4406, + "step": 5788 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018479847160759018, + "loss": 0.4697, + "step": 5789 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018464091696142538, + "loss": 0.4841, + "step": 5790 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018448341429661973, + "loss": 0.5247, + "step": 5791 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018432596363913506, + "loss": 0.4778, + "step": 5792 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018416856501492424, + "loss": 0.4285, + "step": 5793 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018401121844993168, + "loss": 0.4854, + "step": 5794 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018385392397009332, + "loss": 0.5232, + "step": 5795 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018369668160133607, + "loss": 0.4878, + "step": 5796 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018353949136957866, + "loss": 0.4695, + "step": 5797 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018338235330073117, + "loss": 0.4408, + "step": 5798 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018322526742069496, + "loss": 0.4921, + "step": 5799 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018306823375536296, + "loss": 0.4797, + "step": 5800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018291125233061927, + "loss": 0.5045, + "step": 5801 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001827543231723396, + "loss": 0.5097, + "step": 5802 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018259744630639101, + "loss": 0.4424, + "step": 5803 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018244062175863164, + "loss": 0.4507, + "step": 5804 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018228384955491135, + "loss": 0.4999, + "step": 5805 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018212712972107126, + "loss": 0.4332, + "step": 5806 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001819704622829439, + "loss": 0.4077, + "step": 5807 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018181384726635304, + "loss": 0.432, + "step": 5808 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018165728469711395, + "loss": 0.4675, + "step": 5809 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018150077460103337, + "loss": 0.4439, + "step": 5810 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001813443170039089, + "loss": 0.3838, + "step": 5811 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018118791193152988, + "loss": 0.0483, + "step": 5812 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018103155940967704, + "loss": 0.4399, + "step": 5813 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001808752594641222, + "loss": 0.4857, + "step": 5814 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018071901212062875, + "loss": 0.4525, + "step": 5815 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018056281740495123, + "loss": 0.4307, + "step": 5816 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018040667534283572, + "loss": 0.4554, + "step": 5817 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018025058596001927, + "loss": 0.4897, + "step": 5818 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001800945492822304, + "loss": 0.4984, + "step": 5819 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001799385653351892, + "loss": 0.4883, + "step": 5820 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017978263414460672, + "loss": 0.4176, + "step": 5821 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017962675573618552, + "loss": 0.4427, + "step": 5822 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017947093013561945, + "loss": 0.4808, + "step": 5823 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017931515736859343, + "loss": 0.5109, + "step": 5824 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017915943746078423, + "loss": 0.3978, + "step": 5825 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017900377043785897, + "loss": 0.4878, + "step": 5826 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001788481563254769, + "loss": 0.5126, + "step": 5827 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017869259514928827, + "loss": 0.5488, + "step": 5828 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017853708693493447, + "loss": 0.4636, + "step": 5829 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017838163170804855, + "loss": 0.4077, + "step": 5830 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001782262294942541, + "loss": 0.4127, + "step": 5831 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017807088031916675, + "loss": 0.562, + "step": 5832 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017791558420839293, + "loss": 0.4617, + "step": 5833 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017776034118753065, + "loss": 0.4105, + "step": 5834 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017760515128216865, + "loss": 0.4653, + "step": 5835 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017745001451788738, + "loss": 0.5023, + "step": 5836 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001772949309202585, + "loss": 0.505, + "step": 5837 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017713990051484464, + "loss": 0.4584, + "step": 5838 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017698492332720006, + "loss": 0.4376, + "step": 5839 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017682999938286966, + "loss": 0.5005, + "step": 5840 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017667512870739004, + "loss": 0.4832, + "step": 5841 + }, + { + "epoch": 0.73, + "learning_rate": 0.000176520311326289, + "loss": 0.5265, + "step": 5842 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001763655472650853, + "loss": 0.441, + "step": 5843 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017621083654928915, + "loss": 0.4393, + "step": 5844 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017605617920440187, + "loss": 0.4696, + "step": 5845 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017590157525591594, + "loss": 0.4025, + "step": 5846 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001757470247293152, + "loss": 0.3956, + "step": 5847 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017559252765007437, + "loss": 0.4956, + "step": 5848 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017543808404365963, + "loss": 0.5514, + "step": 5849 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017528369393552835, + "loss": 0.4716, + "step": 5850 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017512935735112888, + "loss": 0.4811, + "step": 5851 + }, + { + "epoch": 0.73, + "learning_rate": 0.000174975074315901, + "loss": 0.4209, + "step": 5852 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017482084485527538, + "loss": 0.4392, + "step": 5853 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017466666899467438, + "loss": 0.4971, + "step": 5854 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017451254675951067, + "loss": 0.5345, + "step": 5855 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017435847817518884, + "loss": 0.4564, + "step": 5856 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017420446326710433, + "loss": 0.506, + "step": 5857 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001740505020606437, + "loss": 0.5134, + "step": 5858 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017389659458118484, + "loss": 0.5547, + "step": 5859 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017374274085409663, + "loss": 0.4734, + "step": 5860 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017358894090473926, + "loss": 0.0482, + "step": 5861 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017343519475846364, + "loss": 0.4479, + "step": 5862 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017328150244061224, + "loss": 0.4883, + "step": 5863 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017312786397651853, + "loss": 0.4984, + "step": 5864 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001729742793915071, + "loss": 0.5125, + "step": 5865 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017282074871089365, + "loss": 0.4137, + "step": 5866 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017266727195998493, + "loss": 0.4985, + "step": 5867 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017251384916407915, + "loss": 0.4269, + "step": 5868 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001723604803484649, + "loss": 0.4133, + "step": 5869 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017220716553842253, + "loss": 0.4127, + "step": 5870 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017205390475922322, + "loss": 0.4985, + "step": 5871 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017190069803612934, + "loss": 0.4735, + "step": 5872 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017174754539439448, + "loss": 0.4211, + "step": 5873 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001715944468592628, + "loss": 0.4, + "step": 5874 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017144140245597007, + "loss": 0.4648, + "step": 5875 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017128841220974284, + "loss": 0.5424, + "step": 5876 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017113547614579916, + "loss": 0.4273, + "step": 5877 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017098259428934736, + "loss": 0.4648, + "step": 5878 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017082976666558757, + "loss": 0.5149, + "step": 5879 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017067699329971072, + "loss": 0.5011, + "step": 5880 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017052427421689875, + "loss": 0.3927, + "step": 5881 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017037160944232465, + "loss": 0.4921, + "step": 5882 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017021899900115277, + "loss": 0.4183, + "step": 5883 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017006644291853791, + "loss": 0.387, + "step": 5884 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016991394121962627, + "loss": 0.5145, + "step": 5885 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001697614939295552, + "loss": 0.4523, + "step": 5886 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016960910107345284, + "loss": 0.449, + "step": 5887 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001694567626764385, + "loss": 0.483, + "step": 5888 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016930447876362247, + "loss": 0.5175, + "step": 5889 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001691522493601062, + "loss": 0.4467, + "step": 5890 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001690000744909817, + "loss": 0.5671, + "step": 5891 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001688479541813324, + "loss": 0.4658, + "step": 5892 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001686958884562328, + "loss": 0.4327, + "step": 5893 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016854387734074804, + "loss": 0.4384, + "step": 5894 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001683919208599346, + "loss": 0.343, + "step": 5895 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001682400190388398, + "loss": 0.0476, + "step": 5896 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016808817190250186, + "loss": 0.5066, + "step": 5897 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016793637947595035, + "loss": 0.4177, + "step": 5898 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001677846417842052, + "loss": 0.4127, + "step": 5899 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016763295885227793, + "loss": 0.4391, + "step": 5900 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001674813307051707, + "loss": 0.4896, + "step": 5901 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001673297573678767, + "loss": 0.4484, + "step": 5902 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016717823886538014, + "loss": 0.4739, + "step": 5903 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001670267752226562, + "loss": 0.3944, + "step": 5904 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001668753664646711, + "loss": 0.4002, + "step": 5905 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016672401261638158, + "loss": 0.4415, + "step": 5906 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001665727137027358, + "loss": 0.4492, + "step": 5907 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016642146974867272, + "loss": 0.4999, + "step": 5908 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001662702807791222, + "loss": 0.4451, + "step": 5909 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016611914681900514, + "loss": 0.5101, + "step": 5910 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016596806789323314, + "loss": 0.4677, + "step": 5911 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016581704402670921, + "loss": 0.4875, + "step": 5912 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016566607524432658, + "loss": 0.4918, + "step": 5913 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001655151615709699, + "loss": 0.4758, + "step": 5914 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016536430303151466, + "loss": 0.457, + "step": 5915 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016521349965082738, + "loss": 0.4375, + "step": 5916 + }, + { + "epoch": 0.74, + "learning_rate": 0.000165062751453765, + "loss": 0.5029, + "step": 5917 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016491205846517576, + "loss": 0.4329, + "step": 5918 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016476142070989886, + "loss": 0.455, + "step": 5919 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001646108382127644, + "loss": 0.3767, + "step": 5920 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016446031099859288, + "loss": 0.4615, + "step": 5921 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016430983909219627, + "loss": 0.4342, + "step": 5922 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016415942251837707, + "loss": 0.5177, + "step": 5923 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016400906130192887, + "loss": 0.3753, + "step": 5924 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001638587554676361, + "loss": 0.5629, + "step": 5925 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016370850504027396, + "loss": 0.5065, + "step": 5926 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016355831004460876, + "loss": 0.3896, + "step": 5927 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016340817050539708, + "loss": 0.4259, + "step": 5928 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016325808644738704, + "loss": 0.4642, + "step": 5929 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016310805789531725, + "loss": 0.525, + "step": 5930 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016295808487391732, + "loss": 0.4684, + "step": 5931 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001628081674079076, + "loss": 0.4435, + "step": 5932 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001626583055219994, + "loss": 0.4286, + "step": 5933 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016250849924089484, + "loss": 0.5364, + "step": 5934 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016235874858928662, + "loss": 0.4834, + "step": 5935 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016220905359185862, + "loss": 0.5752, + "step": 5936 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016205941427328534, + "loss": 0.5034, + "step": 5937 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016190983065823229, + "loss": 0.3976, + "step": 5938 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016176030277135556, + "loss": 0.403, + "step": 5939 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016161083063730226, + "loss": 0.4367, + "step": 5940 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016146141428071014, + "loss": 0.4961, + "step": 5941 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001613120537262081, + "loss": 0.451, + "step": 5942 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001611627489984152, + "loss": 0.5236, + "step": 5943 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016101350012194182, + "loss": 0.4344, + "step": 5944 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016086430712138904, + "loss": 0.4819, + "step": 5945 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016071517002134872, + "loss": 0.4485, + "step": 5946 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016056608884640339, + "loss": 0.4789, + "step": 5947 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016041706362112645, + "loss": 0.4021, + "step": 5948 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016026809437008228, + "loss": 0.4285, + "step": 5949 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016011918111782548, + "loss": 0.4962, + "step": 5950 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015997032388890187, + "loss": 0.4141, + "step": 5951 + }, + { + "epoch": 0.75, + "learning_rate": 0.000159821522707848, + "loss": 0.4065, + "step": 5952 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015967277759919106, + "loss": 0.4691, + "step": 5953 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015952408858744914, + "loss": 0.4993, + "step": 5954 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015937545569713103, + "loss": 0.5726, + "step": 5955 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015922687895273598, + "loss": 0.5384, + "step": 5956 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015907835837875435, + "loss": 0.4226, + "step": 5957 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001589298939996671, + "loss": 0.4669, + "step": 5958 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015878148583994622, + "loss": 0.3817, + "step": 5959 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015863313392405382, + "loss": 0.4616, + "step": 5960 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015848483827644316, + "loss": 0.5021, + "step": 5961 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001583365989215582, + "loss": 0.5081, + "step": 5962 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015818841588383358, + "loss": 0.5109, + "step": 5963 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015804028918769487, + "loss": 0.483, + "step": 5964 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015789221885755773, + "loss": 0.439, + "step": 5965 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015774420491782904, + "loss": 0.5188, + "step": 5966 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001575962473929064, + "loss": 0.4548, + "step": 5967 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015744834630717798, + "loss": 0.402, + "step": 5968 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015730050168502252, + "loss": 0.0482, + "step": 5969 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015715271355080972, + "loss": 0.4851, + "step": 5970 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015700498192889996, + "loss": 0.4824, + "step": 5971 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015685730684364386, + "loss": 0.4855, + "step": 5972 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015670968831938315, + "loss": 0.5352, + "step": 5973 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015656212638045025, + "loss": 0.4766, + "step": 5974 + }, + { + "epoch": 0.75, + "learning_rate": 0.000156414621051168, + "loss": 0.4967, + "step": 5975 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015626717235585008, + "loss": 0.4065, + "step": 5976 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015611978031880087, + "loss": 0.4742, + "step": 5977 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015597244496431546, + "loss": 0.4755, + "step": 5978 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015582516631667908, + "loss": 0.5256, + "step": 5979 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015567794440016824, + "loss": 0.5529, + "step": 5980 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015553077923904986, + "loss": 0.451, + "step": 5981 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001553836708575815, + "loss": 0.5376, + "step": 5982 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001552366192800113, + "loss": 0.4294, + "step": 5983 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015508962453057823, + "loss": 0.4733, + "step": 5984 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001549426866335119, + "loss": 0.4216, + "step": 5985 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001547958056130321, + "loss": 0.447, + "step": 5986 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001546489814933496, + "loss": 0.5674, + "step": 5987 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015450221429866596, + "loss": 0.4722, + "step": 5988 + }, + { + "epoch": 0.75, + "learning_rate": 0.000154355504053173, + "loss": 0.5216, + "step": 5989 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015420885078105335, + "loss": 0.5403, + "step": 5990 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001540622545064802, + "loss": 0.3829, + "step": 5991 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015391571525361742, + "loss": 0.3865, + "step": 5992 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001537692330466195, + "loss": 0.4384, + "step": 5993 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015362280790963106, + "loss": 0.4749, + "step": 5994 + }, + { + "epoch": 0.75, + "learning_rate": 0.000153476439866788, + "loss": 0.4778, + "step": 5995 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015333012894221638, + "loss": 0.048, + "step": 5996 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015318387516003303, + "loss": 0.5096, + "step": 5997 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001530376785443452, + "loss": 0.4403, + "step": 5998 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015289153911925108, + "loss": 0.5416, + "step": 5999 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015274545690883884, + "loss": 0.048, + "step": 6000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001525994319371876, + "loss": 0.5786, + "step": 6001 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015245346422836713, + "loss": 0.4705, + "step": 6002 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015230755380643768, + "loss": 0.4772, + "step": 6003 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015216170069544976, + "loss": 0.3636, + "step": 6004 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015201590491944472, + "loss": 0.4667, + "step": 6005 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001518701665024545, + "loss": 0.4629, + "step": 6006 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015172448546850166, + "loss": 0.4026, + "step": 6007 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015157886184159874, + "loss": 0.4946, + "step": 6008 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015143329564574947, + "loss": 0.4949, + "step": 6009 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001512877869049478, + "loss": 0.5085, + "step": 6010 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015114233564317831, + "loss": 0.3995, + "step": 6011 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015099694188441598, + "loss": 0.3549, + "step": 6012 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015085160565262652, + "loss": 0.0479, + "step": 6013 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015070632697176595, + "loss": 0.5289, + "step": 6014 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015056110586578102, + "loss": 0.4524, + "step": 6015 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015041594235860867, + "loss": 0.5939, + "step": 6016 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015027083647417656, + "loss": 0.4904, + "step": 6017 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015012578823640283, + "loss": 0.5669, + "step": 6018 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001499807976691962, + "loss": 0.5354, + "step": 6019 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001498358647964557, + "loss": 0.4201, + "step": 6020 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014969098964207113, + "loss": 0.42, + "step": 6021 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001495461722299225, + "loss": 0.3976, + "step": 6022 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014940141258388024, + "loss": 0.5493, + "step": 6023 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001492567107278055, + "loss": 0.4451, + "step": 6024 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001491120666855499, + "loss": 0.5278, + "step": 6025 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014896748048095538, + "loss": 0.5891, + "step": 6026 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001488229521378544, + "loss": 0.4856, + "step": 6027 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014867848168006998, + "loss": 0.4446, + "step": 6028 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001485340691314156, + "loss": 0.4252, + "step": 6029 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014838971451569484, + "loss": 0.3686, + "step": 6030 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001482454178567021, + "loss": 0.5212, + "step": 6031 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014810117917822218, + "loss": 0.6208, + "step": 6032 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014795699850403028, + "loss": 0.5977, + "step": 6033 + }, + { + "epoch": 0.76, + "learning_rate": 0.000147812875857892, + "loss": 0.6367, + "step": 6034 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001476688112635634, + "loss": 0.5063, + "step": 6035 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014752480474479102, + "loss": 0.453, + "step": 6036 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001473808563253119, + "loss": 0.4542, + "step": 6037 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014723696602885305, + "loss": 0.3873, + "step": 6038 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014709313387913247, + "loss": 0.423, + "step": 6039 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001469493598998583, + "loss": 0.4998, + "step": 6040 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014680564411472918, + "loss": 0.4383, + "step": 6041 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014666198654743418, + "loss": 0.4706, + "step": 6042 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014651838722165244, + "loss": 0.4514, + "step": 6043 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001463748461610539, + "loss": 0.4598, + "step": 6044 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014623136338929883, + "loss": 0.4833, + "step": 6045 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014608793893003784, + "loss": 0.465, + "step": 6046 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001459445728069118, + "loss": 0.4318, + "step": 6047 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014580126504355206, + "loss": 0.4038, + "step": 6048 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014565801566358044, + "loss": 0.5056, + "step": 6049 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014551482469060906, + "loss": 0.4597, + "step": 6050 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001453716921482406, + "loss": 0.4393, + "step": 6051 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014522861806006755, + "loss": 0.5631, + "step": 6052 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014508560244967328, + "loss": 0.426, + "step": 6053 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014494264534063145, + "loss": 0.4601, + "step": 6054 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014479974675650603, + "loss": 0.4637, + "step": 6055 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014465690672085118, + "loss": 0.5394, + "step": 6056 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014451412525721174, + "loss": 0.5566, + "step": 6057 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014437140238912256, + "loss": 0.3825, + "step": 6058 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014422873814010916, + "loss": 0.4907, + "step": 6059 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014408613253368696, + "loss": 0.4873, + "step": 6060 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014394358559336208, + "loss": 0.4973, + "step": 6061 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001438010973426308, + "loss": 0.0479, + "step": 6062 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014365866780497995, + "loss": 0.4561, + "step": 6063 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014351629700388636, + "loss": 0.4507, + "step": 6064 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001433739849628174, + "loss": 0.3875, + "step": 6065 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001432317317052308, + "loss": 0.4829, + "step": 6066 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014308953725457424, + "loss": 0.4893, + "step": 6067 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001429474016342861, + "loss": 0.4639, + "step": 6068 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001428053248677949, + "loss": 0.4388, + "step": 6069 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014266330697851955, + "loss": 0.4659, + "step": 6070 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014252134798986904, + "loss": 0.551, + "step": 6071 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001423794479252429, + "loss": 0.4409, + "step": 6072 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014223760680803093, + "loss": 0.3676, + "step": 6073 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001420958246616129, + "loss": 0.4575, + "step": 6074 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014195410150935923, + "loss": 0.4376, + "step": 6075 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014181243737463039, + "loss": 0.4814, + "step": 6076 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014167083228077727, + "loss": 0.4761, + "step": 6077 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014152928625114092, + "loss": 0.3812, + "step": 6078 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014138779930905276, + "loss": 0.4156, + "step": 6079 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014124637147783432, + "loss": 0.5461, + "step": 6080 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014110500278079774, + "loss": 0.5126, + "step": 6081 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001409636932412447, + "loss": 0.4396, + "step": 6082 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014082244288246787, + "loss": 0.4639, + "step": 6083 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014068125172774976, + "loss": 0.452, + "step": 6084 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001405401198003634, + "loss": 0.5544, + "step": 6085 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014039904712357166, + "loss": 0.5017, + "step": 6086 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014025803372062795, + "loss": 0.4834, + "step": 6087 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014011707961477589, + "loss": 0.3864, + "step": 6088 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013997618482924933, + "loss": 0.4982, + "step": 6089 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013983534938727212, + "loss": 0.4427, + "step": 6090 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013969457331205844, + "loss": 0.4547, + "step": 6091 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001395538566268129, + "loss": 0.4772, + "step": 6092 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013941319935473006, + "loss": 0.048, + "step": 6093 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001392726015189949, + "loss": 0.4653, + "step": 6094 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001391320631427825, + "loss": 0.5063, + "step": 6095 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013899158424925784, + "loss": 0.4695, + "step": 6096 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013885116486157662, + "loss": 0.4525, + "step": 6097 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013871080500288434, + "loss": 0.5581, + "step": 6098 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013857050469631704, + "loss": 0.5757, + "step": 6099 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001384302639650006, + "loss": 0.6628, + "step": 6100 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001382900828320512, + "loss": 0.5118, + "step": 6101 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001381499613205753, + "loss": 0.4799, + "step": 6102 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013800989945366948, + "loss": 0.4207, + "step": 6103 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013786989725442033, + "loss": 0.4459, + "step": 6104 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013772995474590477, + "loss": 0.048, + "step": 6105 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001375900719511899, + "loss": 0.5011, + "step": 6106 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013745024889333284, + "loss": 0.505, + "step": 6107 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013731048559538102, + "loss": 0.5203, + "step": 6108 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013717078208037192, + "loss": 0.4249, + "step": 6109 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001370311383713333, + "loss": 0.5425, + "step": 6110 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001368915544912827, + "loss": 0.4894, + "step": 6111 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013675203046322826, + "loss": 0.4857, + "step": 6112 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013661256631016793, + "loss": 0.5044, + "step": 6113 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001364731620550899, + "loss": 0.4056, + "step": 6114 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013633381772097265, + "loss": 0.4596, + "step": 6115 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013619453333078447, + "loss": 0.5623, + "step": 6116 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001360553089074841, + "loss": 0.4181, + "step": 6117 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013591614447401996, + "loss": 0.4344, + "step": 6118 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013577704005333097, + "loss": 0.4601, + "step": 6119 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013563799566834606, + "loss": 0.4014, + "step": 6120 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013549901134198418, + "loss": 0.4095, + "step": 6121 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013536008709715447, + "loss": 0.4564, + "step": 6122 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013522122295675615, + "loss": 0.4493, + "step": 6123 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013508241894367868, + "loss": 0.4214, + "step": 6124 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013494367508080102, + "loss": 0.5563, + "step": 6125 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001348049913909929, + "loss": 0.4273, + "step": 6126 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013466636789711384, + "loss": 0.4995, + "step": 6127 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013452780462201348, + "loss": 0.5071, + "step": 6128 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013438930158853169, + "loss": 0.5038, + "step": 6129 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001342508588194979, + "loss": 0.4926, + "step": 6130 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013411247633773204, + "loss": 0.5072, + "step": 6131 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001339741541660442, + "loss": 0.4796, + "step": 6132 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001338358923272343, + "loss": 0.4135, + "step": 6133 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013369769084409211, + "loss": 0.3931, + "step": 6134 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013355954973939794, + "loss": 0.4044, + "step": 6135 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013342146903592178, + "loss": 0.4948, + "step": 6136 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013328344875642383, + "loss": 0.4611, + "step": 6137 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001331454889236543, + "loss": 0.5782, + "step": 6138 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001330075895603536, + "loss": 0.4796, + "step": 6139 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013286975068925168, + "loss": 0.5103, + "step": 6140 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013273197233306893, + "loss": 0.5721, + "step": 6141 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013259425451451573, + "loss": 0.4578, + "step": 6142 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013245659725629244, + "loss": 0.4904, + "step": 6143 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013231900058108937, + "loss": 0.472, + "step": 6144 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013218146451158692, + "loss": 0.4459, + "step": 6145 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013204398907045562, + "loss": 0.4971, + "step": 6146 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013190657428035558, + "loss": 0.4795, + "step": 6147 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013176922016393727, + "loss": 0.6119, + "step": 6148 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013163192674384123, + "loss": 0.3936, + "step": 6149 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001314946940426977, + "loss": 0.4415, + "step": 6150 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013135752208312713, + "loss": 0.5436, + "step": 6151 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013122041088773988, + "loss": 0.4647, + "step": 6152 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013108336047913633, + "loss": 0.0477, + "step": 6153 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001309463708799069, + "loss": 0.4901, + "step": 6154 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013080944211263162, + "loss": 0.5206, + "step": 6155 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013067257419988095, + "loss": 0.4415, + "step": 6156 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001305357671642151, + "loss": 0.0478, + "step": 6157 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001303990210281844, + "loss": 0.5186, + "step": 6158 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001302623358143289, + "loss": 0.4523, + "step": 6159 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001301257115451787, + "loss": 0.5039, + "step": 6160 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012998914824325418, + "loss": 0.4252, + "step": 6161 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012985264593106505, + "loss": 0.4122, + "step": 6162 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001297162046311114, + "loss": 0.5266, + "step": 6163 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012957982436588317, + "loss": 0.0478, + "step": 6164 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012944350515786018, + "loss": 0.467, + "step": 6165 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001293072470295124, + "loss": 0.5182, + "step": 6166 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012917105000329943, + "loss": 0.5339, + "step": 6167 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012903491410167113, + "loss": 0.5432, + "step": 6168 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012889883934706674, + "loss": 0.5466, + "step": 6169 + }, + { + "epoch": 0.77, + "learning_rate": 0.000128762825761916, + "loss": 0.4288, + "step": 6170 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012862687336863833, + "loss": 0.4448, + "step": 6171 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012849098218964321, + "loss": 0.4502, + "step": 6172 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012835515224732957, + "loss": 0.5909, + "step": 6173 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012821938356408674, + "loss": 0.4229, + "step": 6174 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012808367616229377, + "loss": 0.4987, + "step": 6175 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001279480300643198, + "loss": 0.4564, + "step": 6176 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012781244529252338, + "loss": 0.5156, + "step": 6177 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001276769218692534, + "loss": 0.4426, + "step": 6178 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001275414598168485, + "loss": 0.4199, + "step": 6179 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012740605915763714, + "loss": 0.4189, + "step": 6180 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012727071991393784, + "loss": 0.4615, + "step": 6181 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012713544210805878, + "loss": 0.426, + "step": 6182 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001270002257622983, + "loss": 0.4213, + "step": 6183 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012686507089894412, + "loss": 0.474, + "step": 6184 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012672997754027427, + "loss": 0.4705, + "step": 6185 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001265949457085565, + "loss": 0.5093, + "step": 6186 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012645997542604838, + "loss": 0.4137, + "step": 6187 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012632506671499743, + "loss": 0.4822, + "step": 6188 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012619021959764094, + "loss": 0.4329, + "step": 6189 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001260554340962062, + "loss": 0.4182, + "step": 6190 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001259207102329099, + "loss": 0.5121, + "step": 6191 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012578604802995912, + "loss": 0.3719, + "step": 6192 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012565144750955042, + "loss": 0.4938, + "step": 6193 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001255169086938704, + "loss": 0.5286, + "step": 6194 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012538243160509539, + "loss": 0.447, + "step": 6195 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012524801626539152, + "loss": 0.4568, + "step": 6196 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012511366269691475, + "loss": 0.4181, + "step": 6197 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001249793709218111, + "loss": 0.4547, + "step": 6198 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001248451409622159, + "loss": 0.4601, + "step": 6199 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001247109728402547, + "loss": 0.4229, + "step": 6200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012457686657804274, + "loss": 0.5792, + "step": 6201 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012444282219768504, + "loss": 0.0479, + "step": 6202 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012430883972127648, + "loss": 0.3944, + "step": 6203 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012417491917090167, + "loss": 0.4662, + "step": 6204 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012404106056863523, + "loss": 0.4642, + "step": 6205 + }, + { + "epoch": 0.78, + "learning_rate": 0.000123907263936541, + "loss": 0.4155, + "step": 6206 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012377352929667323, + "loss": 0.4481, + "step": 6207 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001236398566710757, + "loss": 0.6287, + "step": 6208 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001235062460817819, + "loss": 0.6359, + "step": 6209 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012337269755081522, + "loss": 0.4016, + "step": 6210 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012323921110018893, + "loss": 0.3926, + "step": 6211 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012310578675190555, + "loss": 0.4481, + "step": 6212 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012297242452795793, + "loss": 0.4404, + "step": 6213 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001228391244503284, + "loss": 0.4189, + "step": 6214 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001227058865409894, + "loss": 0.4684, + "step": 6215 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012257271082190242, + "loss": 0.4507, + "step": 6216 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012243959731501923, + "loss": 0.4249, + "step": 6217 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001223065460422814, + "loss": 0.549, + "step": 6218 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012217355702562, + "loss": 0.4897, + "step": 6219 + }, + { + "epoch": 0.78, + "learning_rate": 0.000122040630286956, + "loss": 0.4406, + "step": 6220 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012190776584819974, + "loss": 0.4365, + "step": 6221 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001217749637312518, + "loss": 0.475, + "step": 6222 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012164222395800218, + "loss": 0.407, + "step": 6223 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001215095465503307, + "loss": 0.4581, + "step": 6224 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012137693153010687, + "loss": 0.432, + "step": 6225 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012124437891918994, + "loss": 0.5032, + "step": 6226 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012111188873942903, + "loss": 0.0476, + "step": 6227 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001209794610126625, + "loss": 0.5567, + "step": 6228 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012084709576071884, + "loss": 0.4298, + "step": 6229 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001207147930054161, + "loss": 0.4835, + "step": 6230 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012058255276856206, + "loss": 0.5392, + "step": 6231 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012045037507195422, + "loss": 0.4369, + "step": 6232 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012031825993737965, + "loss": 0.4825, + "step": 6233 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012018620738661545, + "loss": 0.4875, + "step": 6234 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012005421744142775, + "loss": 0.452, + "step": 6235 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011992229012357297, + "loss": 0.4586, + "step": 6236 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011979042545479695, + "loss": 0.4614, + "step": 6237 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011965862345683526, + "loss": 0.4681, + "step": 6238 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011952688415141317, + "loss": 0.4368, + "step": 6239 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011939520756024547, + "loss": 0.416, + "step": 6240 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011926359370503692, + "loss": 0.42, + "step": 6241 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011913204260748145, + "loss": 0.4218, + "step": 6242 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011900055428926304, + "loss": 0.4614, + "step": 6243 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011886912877205525, + "loss": 0.5436, + "step": 6244 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011873776607752119, + "loss": 0.4349, + "step": 6245 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011860646622731375, + "loss": 0.4377, + "step": 6246 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011847522924307536, + "loss": 0.3813, + "step": 6247 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011834405514643804, + "loss": 0.4092, + "step": 6248 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011821294395902377, + "loss": 0.4492, + "step": 6249 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011808189570244354, + "loss": 0.4756, + "step": 6250 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011795091039829853, + "loss": 0.5723, + "step": 6251 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011781998806817934, + "loss": 0.5404, + "step": 6252 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011768912873366616, + "loss": 0.4204, + "step": 6253 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011755833241632902, + "loss": 0.4966, + "step": 6254 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011742759913772705, + "loss": 0.3951, + "step": 6255 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011729692891940946, + "loss": 0.467, + "step": 6256 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011716632178291498, + "loss": 0.4359, + "step": 6257 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011703577774977193, + "loss": 0.488, + "step": 6258 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011690529684149798, + "loss": 0.4795, + "step": 6259 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011677487907960072, + "loss": 0.386, + "step": 6260 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011664452448557722, + "loss": 0.4961, + "step": 6261 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011651423308091408, + "loss": 0.4817, + "step": 6262 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011638400488708767, + "loss": 0.4563, + "step": 6263 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011625383992556355, + "loss": 0.467, + "step": 6264 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011612373821779726, + "loss": 0.5078, + "step": 6265 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011599369978523377, + "loss": 0.5601, + "step": 6266 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011586372464930756, + "loss": 0.4613, + "step": 6267 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001157338128314428, + "loss": 0.4673, + "step": 6268 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011560396435305303, + "loss": 0.4279, + "step": 6269 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011547417923554165, + "loss": 0.5858, + "step": 6270 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011534445750030142, + "loss": 0.4467, + "step": 6271 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011521479916871447, + "loss": 0.4574, + "step": 6272 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011508520426215279, + "loss": 0.4534, + "step": 6273 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011495567280197783, + "loss": 0.4928, + "step": 6274 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011482620480954053, + "loss": 0.5004, + "step": 6275 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001146968003061814, + "loss": 0.4124, + "step": 6276 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011456745931323053, + "loss": 0.4198, + "step": 6277 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011443818185200755, + "loss": 0.4657, + "step": 6278 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011430896794382134, + "loss": 0.5341, + "step": 6279 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011417981760997065, + "loss": 0.4261, + "step": 6280 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011405073087174361, + "loss": 0.483, + "step": 6281 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011392170775041788, + "loss": 0.0473, + "step": 6282 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011379274826726066, + "loss": 0.0474, + "step": 6283 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011366385244352862, + "loss": 0.5275, + "step": 6284 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011353502030046808, + "loss": 0.4276, + "step": 6285 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011340625185931452, + "loss": 0.4434, + "step": 6286 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011327754714129323, + "loss": 0.4425, + "step": 6287 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011314890616761886, + "loss": 0.4264, + "step": 6288 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011302032895949571, + "loss": 0.3524, + "step": 6289 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011289181553811733, + "loss": 0.4557, + "step": 6290 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011276336592466696, + "loss": 0.4025, + "step": 6291 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011263498014031726, + "loss": 0.4319, + "step": 6292 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001125066582062304, + "loss": 0.5209, + "step": 6293 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011237840014355777, + "loss": 0.4918, + "step": 6294 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011225020597344055, + "loss": 0.4509, + "step": 6295 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011212207571700933, + "loss": 0.5432, + "step": 6296 + }, + { + "epoch": 0.79, + "learning_rate": 0.000111994009395384, + "loss": 0.4948, + "step": 6297 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011186600702967432, + "loss": 0.5189, + "step": 6298 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011173806864097885, + "loss": 0.5737, + "step": 6299 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011161019425038604, + "loss": 0.5044, + "step": 6300 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011148238387897386, + "loss": 0.4088, + "step": 6301 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011135463754780961, + "loss": 0.5089, + "step": 6302 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011122695527794985, + "loss": 0.4201, + "step": 6303 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011109933709044073, + "loss": 0.5099, + "step": 6304 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011097178300631794, + "loss": 0.4677, + "step": 6305 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011084429304660648, + "loss": 0.4744, + "step": 6306 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011071686723232099, + "loss": 0.4866, + "step": 6307 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011058950558446506, + "loss": 0.4843, + "step": 6308 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011046220812403213, + "loss": 0.4559, + "step": 6309 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011033497487200494, + "loss": 0.0475, + "step": 6310 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011020780584935564, + "loss": 0.4241, + "step": 6311 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011008070107704583, + "loss": 0.4903, + "step": 6312 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010995366057602641, + "loss": 0.4027, + "step": 6313 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010982668436723775, + "loss": 0.4536, + "step": 6314 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010969977247160984, + "loss": 0.4952, + "step": 6315 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010957292491006154, + "loss": 0.4923, + "step": 6316 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010944614170350153, + "loss": 0.5951, + "step": 6317 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010931942287282786, + "loss": 0.4315, + "step": 6318 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010919276843892784, + "loss": 0.4753, + "step": 6319 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010906617842267819, + "loss": 0.3671, + "step": 6320 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010893965284494501, + "loss": 0.4313, + "step": 6321 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010881319172658405, + "loss": 0.5116, + "step": 6322 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010868679508843976, + "loss": 0.5631, + "step": 6323 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001085604629513467, + "loss": 0.4201, + "step": 6324 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010843419533612831, + "loss": 0.0473, + "step": 6325 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010830799226359767, + "loss": 0.5243, + "step": 6326 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010818185375455714, + "loss": 0.4446, + "step": 6327 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001080557798297983, + "loss": 0.4418, + "step": 6328 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001079297705101025, + "loss": 0.4907, + "step": 6329 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010780382581623977, + "loss": 0.5027, + "step": 6330 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010767794576897, + "loss": 0.4536, + "step": 6331 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010755213038904233, + "loss": 0.5514, + "step": 6332 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010742637969719515, + "loss": 0.4254, + "step": 6333 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010730069371415623, + "loss": 0.3875, + "step": 6334 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010717507246064273, + "loss": 0.4407, + "step": 6335 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010704951595736112, + "loss": 0.4913, + "step": 6336 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001069240242250072, + "loss": 0.5753, + "step": 6337 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010679859728426583, + "loss": 0.4735, + "step": 6338 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010667323515581157, + "loss": 0.5573, + "step": 6339 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010654793786030809, + "loss": 0.4454, + "step": 6340 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010642270541840866, + "loss": 0.5371, + "step": 6341 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001062975378507553, + "loss": 0.4255, + "step": 6342 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001061724351779798, + "loss": 0.5492, + "step": 6343 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010604739742070313, + "loss": 0.4515, + "step": 6344 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010592242459953571, + "loss": 0.5322, + "step": 6345 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010579751673507681, + "loss": 0.4044, + "step": 6346 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010567267384791535, + "loss": 0.4189, + "step": 6347 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010554789595862956, + "loss": 0.501, + "step": 6348 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010542318308778687, + "loss": 0.4562, + "step": 6349 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001052985352559439, + "loss": 0.4962, + "step": 6350 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010517395248364686, + "loss": 0.4788, + "step": 6351 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010504943479143065, + "loss": 0.4032, + "step": 6352 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010492498219982005, + "loss": 0.3792, + "step": 6353 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010480059472932879, + "loss": 0.4769, + "step": 6354 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010467627240045996, + "loss": 0.4674, + "step": 6355 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010455201523370588, + "loss": 0.4276, + "step": 6356 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001044278232495482, + "loss": 0.3774, + "step": 6357 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010430369646845784, + "loss": 0.3881, + "step": 6358 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001041796349108946, + "loss": 0.4902, + "step": 6359 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001040556385973081, + "loss": 0.4838, + "step": 6360 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010393170754813685, + "loss": 0.4321, + "step": 6361 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010380784178380864, + "loss": 0.4653, + "step": 6362 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010368404132474069, + "loss": 0.3802, + "step": 6363 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010356030619133916, + "loss": 0.4238, + "step": 6364 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010343663640399969, + "loss": 0.5708, + "step": 6365 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010331303198310715, + "loss": 0.4553, + "step": 6366 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010318949294903523, + "loss": 0.0476, + "step": 6367 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010306601932214743, + "loss": 0.4536, + "step": 6368 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010294261112279601, + "loss": 0.4523, + "step": 6369 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010281926837132277, + "loss": 0.4984, + "step": 6370 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001026959910880585, + "loss": 0.4719, + "step": 6371 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010257277929332332, + "loss": 0.4258, + "step": 6372 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010244963300742661, + "loss": 0.4921, + "step": 6373 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010232655225066662, + "loss": 0.5037, + "step": 6374 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010220353704333107, + "loss": 0.5206, + "step": 6375 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010208058740569698, + "loss": 0.4337, + "step": 6376 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010195770335803035, + "loss": 0.4768, + "step": 6377 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010183488492058646, + "loss": 0.0473, + "step": 6378 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010171213211360969, + "loss": 0.5381, + "step": 6379 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010158944495733386, + "loss": 0.4652, + "step": 6380 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010146682347198156, + "loss": 0.5006, + "step": 6381 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010134426767776478, + "loss": 0.5107, + "step": 6382 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010122177759488471, + "loss": 0.4607, + "step": 6383 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010109935324353187, + "loss": 0.4934, + "step": 6384 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010097699464388538, + "loss": 0.4106, + "step": 6385 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010085470181611411, + "loss": 0.5023, + "step": 6386 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010073247478037583, + "loss": 0.5221, + "step": 6387 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010061031355681765, + "loss": 0.5256, + "step": 6388 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010048821816557541, + "loss": 0.5143, + "step": 6389 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010036618862677449, + "loss": 0.0474, + "step": 6390 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010024422496052931, + "loss": 0.4414, + "step": 6391 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010012232718694342, + "loss": 0.5002, + "step": 6392 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010000049532610949, + "loss": 0.4355, + "step": 6393 + }, + { + "epoch": 0.8, + "learning_rate": 9.987872939810938e-05, + "loss": 0.5376, + "step": 6394 + }, + { + "epoch": 0.8, + "learning_rate": 9.975702942301418e-05, + "loss": 0.4742, + "step": 6395 + }, + { + "epoch": 0.8, + "learning_rate": 9.963539542088368e-05, + "loss": 0.406, + "step": 6396 + }, + { + "epoch": 0.8, + "learning_rate": 9.951382741176723e-05, + "loss": 0.5249, + "step": 6397 + }, + { + "epoch": 0.8, + "learning_rate": 9.939232541570315e-05, + "loss": 0.5304, + "step": 6398 + }, + { + "epoch": 0.8, + "learning_rate": 9.927088945271895e-05, + "loss": 0.498, + "step": 6399 + }, + { + "epoch": 0.8, + "learning_rate": 9.914951954283113e-05, + "loss": 0.4991, + "step": 6400 + }, + { + "epoch": 0.8, + "learning_rate": 9.902821570604542e-05, + "loss": 0.4619, + "step": 6401 + }, + { + "epoch": 0.8, + "learning_rate": 9.89069779623567e-05, + "loss": 0.522, + "step": 6402 + }, + { + "epoch": 0.8, + "learning_rate": 9.878580633174867e-05, + "loss": 0.4385, + "step": 6403 + }, + { + "epoch": 0.8, + "learning_rate": 9.866470083419432e-05, + "loss": 0.4072, + "step": 6404 + }, + { + "epoch": 0.8, + "learning_rate": 9.854366148965588e-05, + "loss": 0.509, + "step": 6405 + }, + { + "epoch": 0.8, + "learning_rate": 9.842268831808454e-05, + "loss": 0.514, + "step": 6406 + }, + { + "epoch": 0.8, + "learning_rate": 9.830178133942047e-05, + "loss": 0.5469, + "step": 6407 + }, + { + "epoch": 0.8, + "learning_rate": 9.818094057359317e-05, + "loss": 0.4142, + "step": 6408 + }, + { + "epoch": 0.8, + "learning_rate": 9.806016604052103e-05, + "loss": 0.4171, + "step": 6409 + }, + { + "epoch": 0.8, + "learning_rate": 9.793945776011176e-05, + "loss": 0.4933, + "step": 6410 + }, + { + "epoch": 0.8, + "learning_rate": 9.78188157522617e-05, + "loss": 0.4639, + "step": 6411 + }, + { + "epoch": 0.8, + "learning_rate": 9.769824003685663e-05, + "loss": 0.5161, + "step": 6412 + }, + { + "epoch": 0.8, + "learning_rate": 9.75777306337714e-05, + "loss": 0.5296, + "step": 6413 + }, + { + "epoch": 0.8, + "learning_rate": 9.745728756286986e-05, + "loss": 0.4107, + "step": 6414 + }, + { + "epoch": 0.8, + "learning_rate": 9.733691084400481e-05, + "loss": 0.4817, + "step": 6415 + }, + { + "epoch": 0.8, + "learning_rate": 9.721660049701836e-05, + "loss": 0.4875, + "step": 6416 + }, + { + "epoch": 0.8, + "learning_rate": 9.709635654174153e-05, + "loss": 0.4777, + "step": 6417 + }, + { + "epoch": 0.8, + "learning_rate": 9.697617899799427e-05, + "loss": 0.5812, + "step": 6418 + }, + { + "epoch": 0.8, + "learning_rate": 9.685606788558577e-05, + "loss": 0.475, + "step": 6419 + }, + { + "epoch": 0.8, + "learning_rate": 9.673602322431424e-05, + "loss": 0.5128, + "step": 6420 + }, + { + "epoch": 0.8, + "learning_rate": 9.661604503396692e-05, + "loss": 0.5674, + "step": 6421 + }, + { + "epoch": 0.8, + "learning_rate": 9.649613333432011e-05, + "loss": 0.3763, + "step": 6422 + }, + { + "epoch": 0.81, + "learning_rate": 9.637628814513927e-05, + "loss": 0.4388, + "step": 6423 + }, + { + "epoch": 0.81, + "learning_rate": 9.625650948617842e-05, + "loss": 0.6062, + "step": 6424 + }, + { + "epoch": 0.81, + "learning_rate": 9.613679737718112e-05, + "loss": 0.5, + "step": 6425 + }, + { + "epoch": 0.81, + "learning_rate": 9.601715183787979e-05, + "loss": 0.4203, + "step": 6426 + }, + { + "epoch": 0.81, + "learning_rate": 9.589757288799606e-05, + "loss": 0.4651, + "step": 6427 + }, + { + "epoch": 0.81, + "learning_rate": 9.577806054724009e-05, + "loss": 0.4259, + "step": 6428 + }, + { + "epoch": 0.81, + "learning_rate": 9.565861483531151e-05, + "loss": 0.4574, + "step": 6429 + }, + { + "epoch": 0.81, + "learning_rate": 9.553923577189883e-05, + "loss": 0.4066, + "step": 6430 + }, + { + "epoch": 0.81, + "learning_rate": 9.541992337667954e-05, + "loss": 0.5443, + "step": 6431 + }, + { + "epoch": 0.81, + "learning_rate": 9.530067766932038e-05, + "loss": 0.5262, + "step": 6432 + }, + { + "epoch": 0.81, + "learning_rate": 9.518149866947656e-05, + "loss": 0.43, + "step": 6433 + }, + { + "epoch": 0.81, + "learning_rate": 9.506238639679282e-05, + "loss": 0.5333, + "step": 6434 + }, + { + "epoch": 0.81, + "learning_rate": 9.494334087090267e-05, + "loss": 0.481, + "step": 6435 + }, + { + "epoch": 0.81, + "learning_rate": 9.482436211142869e-05, + "loss": 0.0475, + "step": 6436 + }, + { + "epoch": 0.81, + "learning_rate": 9.470545013798237e-05, + "loss": 0.4474, + "step": 6437 + }, + { + "epoch": 0.81, + "learning_rate": 9.458660497016425e-05, + "loss": 0.4407, + "step": 6438 + }, + { + "epoch": 0.81, + "learning_rate": 9.446782662756404e-05, + "loss": 0.5095, + "step": 6439 + }, + { + "epoch": 0.81, + "learning_rate": 9.43491151297599e-05, + "loss": 0.3815, + "step": 6440 + }, + { + "epoch": 0.81, + "learning_rate": 9.423047049631955e-05, + "loss": 0.3865, + "step": 6441 + }, + { + "epoch": 0.81, + "learning_rate": 9.411189274679937e-05, + "loss": 0.4425, + "step": 6442 + }, + { + "epoch": 0.81, + "learning_rate": 9.399338190074485e-05, + "loss": 0.5431, + "step": 6443 + }, + { + "epoch": 0.81, + "learning_rate": 9.387493797769037e-05, + "loss": 0.4441, + "step": 6444 + }, + { + "epoch": 0.81, + "learning_rate": 9.375656099715935e-05, + "loss": 0.4738, + "step": 6445 + }, + { + "epoch": 0.81, + "learning_rate": 9.363825097866424e-05, + "loss": 0.3964, + "step": 6446 + }, + { + "epoch": 0.81, + "learning_rate": 9.352000794170606e-05, + "loss": 0.4841, + "step": 6447 + }, + { + "epoch": 0.81, + "learning_rate": 9.340183190577534e-05, + "loss": 0.4221, + "step": 6448 + }, + { + "epoch": 0.81, + "learning_rate": 9.328372289035114e-05, + "loss": 0.5164, + "step": 6449 + }, + { + "epoch": 0.81, + "learning_rate": 9.316568091490174e-05, + "loss": 0.4534, + "step": 6450 + }, + { + "epoch": 0.81, + "learning_rate": 9.304770599888424e-05, + "loss": 0.4182, + "step": 6451 + }, + { + "epoch": 0.81, + "learning_rate": 9.292979816174474e-05, + "loss": 0.5514, + "step": 6452 + }, + { + "epoch": 0.81, + "learning_rate": 9.28119574229182e-05, + "loss": 0.5867, + "step": 6453 + }, + { + "epoch": 0.81, + "learning_rate": 9.269418380182875e-05, + "loss": 0.4596, + "step": 6454 + }, + { + "epoch": 0.81, + "learning_rate": 9.257647731788905e-05, + "loss": 0.4593, + "step": 6455 + }, + { + "epoch": 0.81, + "learning_rate": 9.245883799050103e-05, + "loss": 0.3982, + "step": 6456 + }, + { + "epoch": 0.81, + "learning_rate": 9.234126583905544e-05, + "loss": 0.5574, + "step": 6457 + }, + { + "epoch": 0.81, + "learning_rate": 9.222376088293199e-05, + "loss": 0.5139, + "step": 6458 + }, + { + "epoch": 0.81, + "learning_rate": 9.210632314149925e-05, + "loss": 0.4434, + "step": 6459 + }, + { + "epoch": 0.81, + "learning_rate": 9.198895263411484e-05, + "loss": 0.5121, + "step": 6460 + }, + { + "epoch": 0.81, + "learning_rate": 9.187164938012521e-05, + "loss": 0.6655, + "step": 6461 + }, + { + "epoch": 0.81, + "learning_rate": 9.175441339886558e-05, + "loss": 0.4713, + "step": 6462 + }, + { + "epoch": 0.81, + "learning_rate": 9.163724470966028e-05, + "loss": 0.4587, + "step": 6463 + }, + { + "epoch": 0.81, + "learning_rate": 9.152014333182257e-05, + "loss": 0.4517, + "step": 6464 + }, + { + "epoch": 0.81, + "learning_rate": 9.140310928465445e-05, + "loss": 0.5549, + "step": 6465 + }, + { + "epoch": 0.81, + "learning_rate": 9.1286142587447e-05, + "loss": 0.4673, + "step": 6466 + }, + { + "epoch": 0.81, + "learning_rate": 9.116924325948011e-05, + "loss": 0.4691, + "step": 6467 + }, + { + "epoch": 0.81, + "learning_rate": 9.105241132002246e-05, + "loss": 0.0477, + "step": 6468 + }, + { + "epoch": 0.81, + "learning_rate": 9.093564678833171e-05, + "loss": 0.4463, + "step": 6469 + }, + { + "epoch": 0.81, + "learning_rate": 9.081894968365451e-05, + "loss": 0.3699, + "step": 6470 + }, + { + "epoch": 0.81, + "learning_rate": 9.070232002522644e-05, + "loss": 0.0476, + "step": 6471 + }, + { + "epoch": 0.81, + "learning_rate": 9.058575783227157e-05, + "loss": 0.4463, + "step": 6472 + }, + { + "epoch": 0.81, + "learning_rate": 9.046926312400322e-05, + "loss": 0.6089, + "step": 6473 + }, + { + "epoch": 0.81, + "learning_rate": 9.035283591962351e-05, + "loss": 0.494, + "step": 6474 + }, + { + "epoch": 0.81, + "learning_rate": 9.023647623832332e-05, + "loss": 0.5869, + "step": 6475 + }, + { + "epoch": 0.81, + "learning_rate": 9.012018409928275e-05, + "loss": 0.4999, + "step": 6476 + }, + { + "epoch": 0.81, + "learning_rate": 9.000395952167013e-05, + "loss": 0.5081, + "step": 6477 + }, + { + "epoch": 0.81, + "learning_rate": 8.988780252464318e-05, + "loss": 0.4242, + "step": 6478 + }, + { + "epoch": 0.81, + "learning_rate": 8.977171312734833e-05, + "loss": 0.4506, + "step": 6479 + }, + { + "epoch": 0.81, + "learning_rate": 8.965569134892088e-05, + "loss": 0.4506, + "step": 6480 + }, + { + "epoch": 0.81, + "learning_rate": 8.953973720848496e-05, + "loss": 0.4799, + "step": 6481 + }, + { + "epoch": 0.81, + "learning_rate": 8.942385072515352e-05, + "loss": 0.4279, + "step": 6482 + }, + { + "epoch": 0.81, + "learning_rate": 8.930803191802856e-05, + "loss": 0.4346, + "step": 6483 + }, + { + "epoch": 0.81, + "learning_rate": 8.919228080620046e-05, + "loss": 0.5393, + "step": 6484 + }, + { + "epoch": 0.81, + "learning_rate": 8.907659740874891e-05, + "loss": 0.4358, + "step": 6485 + }, + { + "epoch": 0.81, + "learning_rate": 8.896098174474227e-05, + "loss": 0.4517, + "step": 6486 + }, + { + "epoch": 0.81, + "learning_rate": 8.884543383323773e-05, + "loss": 0.538, + "step": 6487 + }, + { + "epoch": 0.81, + "learning_rate": 8.87299536932813e-05, + "loss": 0.4675, + "step": 6488 + }, + { + "epoch": 0.81, + "learning_rate": 8.861454134390784e-05, + "loss": 0.4882, + "step": 6489 + }, + { + "epoch": 0.81, + "learning_rate": 8.849919680414115e-05, + "loss": 0.5195, + "step": 6490 + }, + { + "epoch": 0.81, + "learning_rate": 8.83839200929935e-05, + "loss": 0.5275, + "step": 6491 + }, + { + "epoch": 0.81, + "learning_rate": 8.82687112294664e-05, + "loss": 0.4648, + "step": 6492 + }, + { + "epoch": 0.81, + "learning_rate": 8.815357023254983e-05, + "loss": 0.0474, + "step": 6493 + }, + { + "epoch": 0.81, + "learning_rate": 8.803849712122292e-05, + "loss": 0.5281, + "step": 6494 + }, + { + "epoch": 0.81, + "learning_rate": 8.792349191445331e-05, + "loss": 0.4932, + "step": 6495 + }, + { + "epoch": 0.81, + "learning_rate": 8.780855463119757e-05, + "loss": 0.3956, + "step": 6496 + }, + { + "epoch": 0.81, + "learning_rate": 8.769368529040133e-05, + "loss": 0.5164, + "step": 6497 + }, + { + "epoch": 0.81, + "learning_rate": 8.757888391099838e-05, + "loss": 0.5853, + "step": 6498 + }, + { + "epoch": 0.81, + "learning_rate": 8.746415051191187e-05, + "loss": 0.4048, + "step": 6499 + }, + { + "epoch": 0.81, + "learning_rate": 8.734948511205359e-05, + "loss": 0.4959, + "step": 6500 + }, + { + "epoch": 0.81, + "learning_rate": 8.723488773032406e-05, + "loss": 0.5908, + "step": 6501 + }, + { + "epoch": 0.81, + "learning_rate": 8.712035838561272e-05, + "loss": 0.422, + "step": 6502 + }, + { + "epoch": 0.82, + "learning_rate": 8.700589709679758e-05, + "loss": 0.4129, + "step": 6503 + }, + { + "epoch": 0.82, + "learning_rate": 8.689150388274558e-05, + "loss": 0.3931, + "step": 6504 + }, + { + "epoch": 0.82, + "learning_rate": 8.677717876231261e-05, + "loss": 0.3967, + "step": 6505 + }, + { + "epoch": 0.82, + "learning_rate": 8.666292175434282e-05, + "loss": 0.4089, + "step": 6506 + }, + { + "epoch": 0.82, + "learning_rate": 8.654873287766963e-05, + "loss": 0.4482, + "step": 6507 + }, + { + "epoch": 0.82, + "learning_rate": 8.643461215111503e-05, + "loss": 0.4901, + "step": 6508 + }, + { + "epoch": 0.82, + "learning_rate": 8.632055959348978e-05, + "loss": 0.4066, + "step": 6509 + }, + { + "epoch": 0.82, + "learning_rate": 8.620657522359359e-05, + "loss": 0.4066, + "step": 6510 + }, + { + "epoch": 0.82, + "learning_rate": 8.609265906021451e-05, + "loss": 0.444, + "step": 6511 + }, + { + "epoch": 0.82, + "learning_rate": 8.597881112212974e-05, + "loss": 0.6019, + "step": 6512 + }, + { + "epoch": 0.82, + "learning_rate": 8.586503142810504e-05, + "loss": 0.4929, + "step": 6513 + }, + { + "epoch": 0.82, + "learning_rate": 8.575131999689511e-05, + "loss": 0.4845, + "step": 6514 + }, + { + "epoch": 0.82, + "learning_rate": 8.563767684724305e-05, + "loss": 0.5387, + "step": 6515 + }, + { + "epoch": 0.82, + "learning_rate": 8.552410199788108e-05, + "loss": 0.536, + "step": 6516 + }, + { + "epoch": 0.82, + "learning_rate": 8.541059546752988e-05, + "loss": 0.4652, + "step": 6517 + }, + { + "epoch": 0.82, + "learning_rate": 8.529715727489911e-05, + "loss": 0.5675, + "step": 6518 + }, + { + "epoch": 0.82, + "learning_rate": 8.518378743868716e-05, + "loss": 0.4638, + "step": 6519 + }, + { + "epoch": 0.82, + "learning_rate": 8.507048597758071e-05, + "loss": 0.5841, + "step": 6520 + }, + { + "epoch": 0.82, + "learning_rate": 8.495725291025569e-05, + "loss": 0.538, + "step": 6521 + }, + { + "epoch": 0.82, + "learning_rate": 8.484408825537653e-05, + "loss": 0.4258, + "step": 6522 + }, + { + "epoch": 0.82, + "learning_rate": 8.473099203159646e-05, + "loss": 0.4778, + "step": 6523 + }, + { + "epoch": 0.82, + "learning_rate": 8.461796425755735e-05, + "loss": 0.479, + "step": 6524 + }, + { + "epoch": 0.82, + "learning_rate": 8.45050049518899e-05, + "loss": 0.5869, + "step": 6525 + }, + { + "epoch": 0.82, + "learning_rate": 8.439211413321335e-05, + "loss": 0.4456, + "step": 6526 + }, + { + "epoch": 0.82, + "learning_rate": 8.427929182013588e-05, + "loss": 0.4839, + "step": 6527 + }, + { + "epoch": 0.82, + "learning_rate": 8.416653803125412e-05, + "loss": 0.4125, + "step": 6528 + }, + { + "epoch": 0.82, + "learning_rate": 8.405385278515355e-05, + "loss": 0.344, + "step": 6529 + }, + { + "epoch": 0.82, + "learning_rate": 8.394123610040838e-05, + "loss": 0.5127, + "step": 6530 + }, + { + "epoch": 0.82, + "learning_rate": 8.382868799558142e-05, + "loss": 0.5319, + "step": 6531 + }, + { + "epoch": 0.82, + "learning_rate": 8.371620848922435e-05, + "loss": 0.4354, + "step": 6532 + }, + { + "epoch": 0.82, + "learning_rate": 8.360379759987729e-05, + "loss": 0.5233, + "step": 6533 + }, + { + "epoch": 0.82, + "learning_rate": 8.34914553460694e-05, + "loss": 0.4525, + "step": 6534 + }, + { + "epoch": 0.82, + "learning_rate": 8.337918174631798e-05, + "loss": 0.541, + "step": 6535 + }, + { + "epoch": 0.82, + "learning_rate": 8.32669768191296e-05, + "loss": 0.4281, + "step": 6536 + }, + { + "epoch": 0.82, + "learning_rate": 8.31548405829991e-05, + "loss": 0.4183, + "step": 6537 + }, + { + "epoch": 0.82, + "learning_rate": 8.304277305641028e-05, + "loss": 0.6257, + "step": 6538 + }, + { + "epoch": 0.82, + "learning_rate": 8.293077425783541e-05, + "loss": 0.4742, + "step": 6539 + }, + { + "epoch": 0.82, + "learning_rate": 8.281884420573554e-05, + "loss": 0.6049, + "step": 6540 + }, + { + "epoch": 0.82, + "learning_rate": 8.270698291856049e-05, + "loss": 0.4313, + "step": 6541 + }, + { + "epoch": 0.82, + "learning_rate": 8.259519041474833e-05, + "loss": 0.395, + "step": 6542 + }, + { + "epoch": 0.82, + "learning_rate": 8.248346671272622e-05, + "loss": 0.4119, + "step": 6543 + }, + { + "epoch": 0.82, + "learning_rate": 8.23718118309098e-05, + "loss": 0.5095, + "step": 6544 + }, + { + "epoch": 0.82, + "learning_rate": 8.226022578770348e-05, + "loss": 0.4216, + "step": 6545 + }, + { + "epoch": 0.82, + "learning_rate": 8.214870860150015e-05, + "loss": 0.4654, + "step": 6546 + }, + { + "epoch": 0.82, + "learning_rate": 8.203726029068148e-05, + "loss": 0.5469, + "step": 6547 + }, + { + "epoch": 0.82, + "learning_rate": 8.19258808736178e-05, + "loss": 0.3989, + "step": 6548 + }, + { + "epoch": 0.82, + "learning_rate": 8.181457036866807e-05, + "loss": 0.4954, + "step": 6549 + }, + { + "epoch": 0.82, + "learning_rate": 8.170332879417968e-05, + "loss": 0.4338, + "step": 6550 + }, + { + "epoch": 0.82, + "learning_rate": 8.159215616848892e-05, + "loss": 0.4893, + "step": 6551 + }, + { + "epoch": 0.82, + "learning_rate": 8.148105250992066e-05, + "loss": 0.5394, + "step": 6552 + }, + { + "epoch": 0.82, + "learning_rate": 8.137001783678843e-05, + "loss": 0.4018, + "step": 6553 + }, + { + "epoch": 0.82, + "learning_rate": 8.12590521673941e-05, + "loss": 0.5995, + "step": 6554 + }, + { + "epoch": 0.82, + "learning_rate": 8.114815552002858e-05, + "loss": 0.5122, + "step": 6555 + }, + { + "epoch": 0.82, + "learning_rate": 8.10373279129712e-05, + "loss": 0.5471, + "step": 6556 + }, + { + "epoch": 0.82, + "learning_rate": 8.092656936449e-05, + "loss": 0.4763, + "step": 6557 + }, + { + "epoch": 0.82, + "learning_rate": 8.081587989284133e-05, + "loss": 0.521, + "step": 6558 + }, + { + "epoch": 0.82, + "learning_rate": 8.070525951627061e-05, + "loss": 0.4639, + "step": 6559 + }, + { + "epoch": 0.82, + "learning_rate": 8.059470825301152e-05, + "loss": 0.4503, + "step": 6560 + }, + { + "epoch": 0.82, + "learning_rate": 8.048422612128653e-05, + "loss": 0.4631, + "step": 6561 + }, + { + "epoch": 0.82, + "learning_rate": 8.037381313930665e-05, + "loss": 0.3835, + "step": 6562 + }, + { + "epoch": 0.82, + "learning_rate": 8.026346932527163e-05, + "loss": 0.4553, + "step": 6563 + }, + { + "epoch": 0.82, + "learning_rate": 8.015319469736948e-05, + "loss": 0.0474, + "step": 6564 + }, + { + "epoch": 0.82, + "learning_rate": 8.004298927377712e-05, + "loss": 0.5139, + "step": 6565 + }, + { + "epoch": 0.82, + "learning_rate": 7.993285307265996e-05, + "loss": 0.5203, + "step": 6566 + }, + { + "epoch": 0.82, + "learning_rate": 7.982278611217198e-05, + "loss": 0.4379, + "step": 6567 + }, + { + "epoch": 0.82, + "learning_rate": 7.971278841045582e-05, + "loss": 0.3878, + "step": 6568 + }, + { + "epoch": 0.82, + "learning_rate": 7.96028599856426e-05, + "loss": 0.6246, + "step": 6569 + }, + { + "epoch": 0.82, + "learning_rate": 7.94930008558521e-05, + "loss": 0.5004, + "step": 6570 + }, + { + "epoch": 0.82, + "learning_rate": 7.938321103919278e-05, + "loss": 0.5084, + "step": 6571 + }, + { + "epoch": 0.82, + "learning_rate": 7.927349055376132e-05, + "loss": 0.5394, + "step": 6572 + }, + { + "epoch": 0.82, + "learning_rate": 7.916383941764326e-05, + "loss": 0.5094, + "step": 6573 + }, + { + "epoch": 0.82, + "learning_rate": 7.905425764891272e-05, + "loss": 0.4837, + "step": 6574 + }, + { + "epoch": 0.82, + "learning_rate": 7.894474526563223e-05, + "loss": 0.5698, + "step": 6575 + }, + { + "epoch": 0.82, + "learning_rate": 7.883530228585306e-05, + "loss": 0.5405, + "step": 6576 + }, + { + "epoch": 0.82, + "learning_rate": 7.872592872761486e-05, + "loss": 0.4641, + "step": 6577 + }, + { + "epoch": 0.82, + "learning_rate": 7.861662460894614e-05, + "loss": 0.5298, + "step": 6578 + }, + { + "epoch": 0.82, + "learning_rate": 7.850738994786339e-05, + "loss": 0.5459, + "step": 6579 + }, + { + "epoch": 0.82, + "learning_rate": 7.839822476237224e-05, + "loss": 0.3602, + "step": 6580 + }, + { + "epoch": 0.82, + "learning_rate": 7.828912907046659e-05, + "loss": 0.4845, + "step": 6581 + }, + { + "epoch": 0.83, + "learning_rate": 7.818010289012889e-05, + "loss": 0.5267, + "step": 6582 + }, + { + "epoch": 0.83, + "learning_rate": 7.807114623933026e-05, + "loss": 0.3936, + "step": 6583 + }, + { + "epoch": 0.83, + "learning_rate": 7.796225913603016e-05, + "loss": 0.4406, + "step": 6584 + }, + { + "epoch": 0.83, + "learning_rate": 7.785344159817697e-05, + "loss": 0.4391, + "step": 6585 + }, + { + "epoch": 0.83, + "learning_rate": 7.774469364370701e-05, + "loss": 0.5326, + "step": 6586 + }, + { + "epoch": 0.83, + "learning_rate": 7.763601529054554e-05, + "loss": 0.4835, + "step": 6587 + }, + { + "epoch": 0.83, + "learning_rate": 7.752740655660634e-05, + "loss": 0.4829, + "step": 6588 + }, + { + "epoch": 0.83, + "learning_rate": 7.741886745979155e-05, + "loss": 0.4584, + "step": 6589 + }, + { + "epoch": 0.83, + "learning_rate": 7.7310398017992e-05, + "loss": 0.5265, + "step": 6590 + }, + { + "epoch": 0.83, + "learning_rate": 7.720199824908692e-05, + "loss": 0.5175, + "step": 6591 + }, + { + "epoch": 0.83, + "learning_rate": 7.709366817094421e-05, + "loss": 0.5188, + "step": 6592 + }, + { + "epoch": 0.83, + "learning_rate": 7.698540780141988e-05, + "loss": 0.563, + "step": 6593 + }, + { + "epoch": 0.83, + "learning_rate": 7.687721715835899e-05, + "loss": 0.0476, + "step": 6594 + }, + { + "epoch": 0.83, + "learning_rate": 7.676909625959472e-05, + "loss": 0.5299, + "step": 6595 + }, + { + "epoch": 0.83, + "learning_rate": 7.666104512294903e-05, + "loss": 0.4344, + "step": 6596 + }, + { + "epoch": 0.83, + "learning_rate": 7.655306376623205e-05, + "loss": 0.5079, + "step": 6597 + }, + { + "epoch": 0.83, + "learning_rate": 7.644515220724264e-05, + "loss": 0.4983, + "step": 6598 + }, + { + "epoch": 0.83, + "learning_rate": 7.633731046376819e-05, + "loss": 0.502, + "step": 6599 + }, + { + "epoch": 0.83, + "learning_rate": 7.622953855358456e-05, + "loss": 0.5145, + "step": 6600 + }, + { + "epoch": 0.83, + "learning_rate": 7.612183649445581e-05, + "loss": 0.4448, + "step": 6601 + }, + { + "epoch": 0.83, + "learning_rate": 7.601420430413486e-05, + "loss": 0.5852, + "step": 6602 + }, + { + "epoch": 0.83, + "learning_rate": 7.590664200036296e-05, + "loss": 0.0476, + "step": 6603 + }, + { + "epoch": 0.83, + "learning_rate": 7.579914960086987e-05, + "loss": 0.542, + "step": 6604 + }, + { + "epoch": 0.83, + "learning_rate": 7.569172712337375e-05, + "loss": 0.4885, + "step": 6605 + }, + { + "epoch": 0.83, + "learning_rate": 7.558437458558132e-05, + "loss": 0.4812, + "step": 6606 + }, + { + "epoch": 0.83, + "learning_rate": 7.547709200518793e-05, + "loss": 0.4167, + "step": 6607 + }, + { + "epoch": 0.83, + "learning_rate": 7.536987939987688e-05, + "loss": 0.5101, + "step": 6608 + }, + { + "epoch": 0.83, + "learning_rate": 7.526273678732043e-05, + "loss": 0.4133, + "step": 6609 + }, + { + "epoch": 0.83, + "learning_rate": 7.515566418517911e-05, + "loss": 0.4341, + "step": 6610 + }, + { + "epoch": 0.83, + "learning_rate": 7.504866161110202e-05, + "loss": 0.5001, + "step": 6611 + }, + { + "epoch": 0.83, + "learning_rate": 7.494172908272656e-05, + "loss": 0.4539, + "step": 6612 + }, + { + "epoch": 0.83, + "learning_rate": 7.483486661767868e-05, + "loss": 0.431, + "step": 6613 + }, + { + "epoch": 0.83, + "learning_rate": 7.472807423357298e-05, + "loss": 0.0476, + "step": 6614 + }, + { + "epoch": 0.83, + "learning_rate": 7.46213519480119e-05, + "loss": 0.4078, + "step": 6615 + }, + { + "epoch": 0.83, + "learning_rate": 7.451469977858688e-05, + "loss": 0.5195, + "step": 6616 + }, + { + "epoch": 0.83, + "learning_rate": 7.440811774287775e-05, + "loss": 0.4926, + "step": 6617 + }, + { + "epoch": 0.83, + "learning_rate": 7.430160585845252e-05, + "loss": 0.5029, + "step": 6618 + }, + { + "epoch": 0.83, + "learning_rate": 7.419516414286792e-05, + "loss": 0.6014, + "step": 6619 + }, + { + "epoch": 0.83, + "learning_rate": 7.408879261366886e-05, + "loss": 0.4848, + "step": 6620 + }, + { + "epoch": 0.83, + "learning_rate": 7.398249128838892e-05, + "loss": 0.4655, + "step": 6621 + }, + { + "epoch": 0.83, + "learning_rate": 7.387626018455002e-05, + "loss": 0.5977, + "step": 6622 + }, + { + "epoch": 0.83, + "learning_rate": 7.377009931966227e-05, + "loss": 0.5119, + "step": 6623 + }, + { + "epoch": 0.83, + "learning_rate": 7.366400871122452e-05, + "loss": 0.3945, + "step": 6624 + }, + { + "epoch": 0.83, + "learning_rate": 7.355798837672394e-05, + "loss": 0.5017, + "step": 6625 + }, + { + "epoch": 0.83, + "learning_rate": 7.345203833363618e-05, + "loss": 0.6035, + "step": 6626 + }, + { + "epoch": 0.83, + "learning_rate": 7.334615859942506e-05, + "loss": 0.5325, + "step": 6627 + }, + { + "epoch": 0.83, + "learning_rate": 7.32403491915431e-05, + "loss": 0.5553, + "step": 6628 + }, + { + "epoch": 0.83, + "learning_rate": 7.313461012743117e-05, + "loss": 0.5815, + "step": 6629 + }, + { + "epoch": 0.83, + "learning_rate": 7.302894142451826e-05, + "loss": 0.4546, + "step": 6630 + }, + { + "epoch": 0.83, + "learning_rate": 7.292334310022214e-05, + "loss": 0.572, + "step": 6631 + }, + { + "epoch": 0.83, + "learning_rate": 7.28178151719488e-05, + "loss": 0.4552, + "step": 6632 + }, + { + "epoch": 0.83, + "learning_rate": 7.271235765709261e-05, + "loss": 0.5688, + "step": 6633 + }, + { + "epoch": 0.83, + "learning_rate": 7.260697057303644e-05, + "loss": 0.4784, + "step": 6634 + }, + { + "epoch": 0.83, + "learning_rate": 7.250165393715141e-05, + "loss": 0.048, + "step": 6635 + }, + { + "epoch": 0.83, + "learning_rate": 7.239640776679729e-05, + "loss": 0.4767, + "step": 6636 + }, + { + "epoch": 0.83, + "learning_rate": 7.229123207932176e-05, + "loss": 0.4789, + "step": 6637 + }, + { + "epoch": 0.83, + "learning_rate": 7.218612689206133e-05, + "loss": 0.4852, + "step": 6638 + }, + { + "epoch": 0.83, + "learning_rate": 7.208109222234072e-05, + "loss": 0.4487, + "step": 6639 + }, + { + "epoch": 0.83, + "learning_rate": 7.197612808747311e-05, + "loss": 0.5569, + "step": 6640 + }, + { + "epoch": 0.83, + "learning_rate": 7.187123450475985e-05, + "loss": 0.5072, + "step": 6641 + }, + { + "epoch": 0.83, + "learning_rate": 7.176641149149077e-05, + "loss": 0.5087, + "step": 6642 + }, + { + "epoch": 0.83, + "learning_rate": 7.166165906494416e-05, + "loss": 0.4688, + "step": 6643 + }, + { + "epoch": 0.83, + "learning_rate": 7.15569772423867e-05, + "loss": 0.4451, + "step": 6644 + }, + { + "epoch": 0.83, + "learning_rate": 7.145236604107308e-05, + "loss": 0.5155, + "step": 6645 + }, + { + "epoch": 0.83, + "learning_rate": 7.134782547824681e-05, + "loss": 0.5489, + "step": 6646 + }, + { + "epoch": 0.83, + "learning_rate": 7.124335557113942e-05, + "loss": 0.4698, + "step": 6647 + }, + { + "epoch": 0.83, + "learning_rate": 7.113895633697104e-05, + "loss": 0.5271, + "step": 6648 + }, + { + "epoch": 0.83, + "learning_rate": 7.103462779295e-05, + "loss": 0.4624, + "step": 6649 + }, + { + "epoch": 0.83, + "learning_rate": 7.0930369956273e-05, + "loss": 0.4457, + "step": 6650 + }, + { + "epoch": 0.83, + "learning_rate": 7.082618284412518e-05, + "loss": 0.4183, + "step": 6651 + }, + { + "epoch": 0.83, + "learning_rate": 7.072206647367974e-05, + "loss": 0.7712, + "step": 6652 + }, + { + "epoch": 0.83, + "learning_rate": 7.061802086209857e-05, + "loss": 0.5283, + "step": 6653 + }, + { + "epoch": 0.83, + "learning_rate": 7.051404602653166e-05, + "loss": 0.4586, + "step": 6654 + }, + { + "epoch": 0.83, + "learning_rate": 7.041014198411755e-05, + "loss": 0.4611, + "step": 6655 + }, + { + "epoch": 0.83, + "learning_rate": 7.030630875198286e-05, + "loss": 0.474, + "step": 6656 + }, + { + "epoch": 0.83, + "learning_rate": 7.020254634724272e-05, + "loss": 0.4213, + "step": 6657 + }, + { + "epoch": 0.83, + "learning_rate": 7.009885478700063e-05, + "loss": 0.4647, + "step": 6658 + }, + { + "epoch": 0.83, + "learning_rate": 6.999523408834807e-05, + "loss": 0.4797, + "step": 6659 + }, + { + "epoch": 0.83, + "learning_rate": 6.989168426836518e-05, + "loss": 0.4948, + "step": 6660 + }, + { + "epoch": 0.83, + "learning_rate": 6.978820534412033e-05, + "loss": 0.4249, + "step": 6661 + }, + { + "epoch": 0.84, + "learning_rate": 6.968479733267019e-05, + "loss": 0.4884, + "step": 6662 + }, + { + "epoch": 0.84, + "learning_rate": 6.958146025105971e-05, + "loss": 0.4568, + "step": 6663 + }, + { + "epoch": 0.84, + "learning_rate": 6.947819411632222e-05, + "loss": 0.4338, + "step": 6664 + }, + { + "epoch": 0.84, + "learning_rate": 6.937499894547933e-05, + "loss": 0.4705, + "step": 6665 + }, + { + "epoch": 0.84, + "learning_rate": 6.927187475554097e-05, + "loss": 0.0478, + "step": 6666 + }, + { + "epoch": 0.84, + "learning_rate": 6.916882156350517e-05, + "loss": 0.4641, + "step": 6667 + }, + { + "epoch": 0.84, + "learning_rate": 6.906583938635852e-05, + "loss": 0.4235, + "step": 6668 + }, + { + "epoch": 0.84, + "learning_rate": 6.896292824107581e-05, + "loss": 0.4974, + "step": 6669 + }, + { + "epoch": 0.84, + "learning_rate": 6.886008814462008e-05, + "loss": 0.6305, + "step": 6670 + }, + { + "epoch": 0.84, + "learning_rate": 6.875731911394278e-05, + "loss": 0.4825, + "step": 6671 + }, + { + "epoch": 0.84, + "learning_rate": 6.865462116598353e-05, + "loss": 0.5303, + "step": 6672 + }, + { + "epoch": 0.84, + "learning_rate": 6.855199431767034e-05, + "loss": 0.4642, + "step": 6673 + }, + { + "epoch": 0.84, + "learning_rate": 6.844943858591929e-05, + "loss": 0.0482, + "step": 6674 + }, + { + "epoch": 0.84, + "learning_rate": 6.834695398763491e-05, + "loss": 0.4952, + "step": 6675 + }, + { + "epoch": 0.84, + "learning_rate": 6.824454053970996e-05, + "loss": 0.4385, + "step": 6676 + }, + { + "epoch": 0.84, + "learning_rate": 6.814219825902557e-05, + "loss": 0.5563, + "step": 6677 + }, + { + "epoch": 0.84, + "learning_rate": 6.803992716245094e-05, + "loss": 0.4695, + "step": 6678 + }, + { + "epoch": 0.84, + "learning_rate": 6.793772726684389e-05, + "loss": 0.5298, + "step": 6679 + }, + { + "epoch": 0.84, + "learning_rate": 6.783559858904991e-05, + "loss": 0.4757, + "step": 6680 + }, + { + "epoch": 0.84, + "learning_rate": 6.77335411459033e-05, + "loss": 0.4356, + "step": 6681 + }, + { + "epoch": 0.84, + "learning_rate": 6.76315549542264e-05, + "loss": 0.5125, + "step": 6682 + }, + { + "epoch": 0.84, + "learning_rate": 6.75296400308299e-05, + "loss": 0.4956, + "step": 6683 + }, + { + "epoch": 0.84, + "learning_rate": 6.742779639251257e-05, + "loss": 0.4541, + "step": 6684 + }, + { + "epoch": 0.84, + "learning_rate": 6.732602405606148e-05, + "loss": 0.5863, + "step": 6685 + }, + { + "epoch": 0.84, + "learning_rate": 6.72243230382521e-05, + "loss": 0.392, + "step": 6686 + }, + { + "epoch": 0.84, + "learning_rate": 6.712269335584797e-05, + "loss": 0.4395, + "step": 6687 + }, + { + "epoch": 0.84, + "learning_rate": 6.702113502560114e-05, + "loss": 0.5188, + "step": 6688 + }, + { + "epoch": 0.84, + "learning_rate": 6.691964806425143e-05, + "loss": 0.533, + "step": 6689 + }, + { + "epoch": 0.84, + "learning_rate": 6.681823248852731e-05, + "loss": 0.5118, + "step": 6690 + }, + { + "epoch": 0.84, + "learning_rate": 6.671688831514527e-05, + "loss": 0.4779, + "step": 6691 + }, + { + "epoch": 0.84, + "learning_rate": 6.66156155608102e-05, + "loss": 0.434, + "step": 6692 + }, + { + "epoch": 0.84, + "learning_rate": 6.651441424221505e-05, + "loss": 0.6368, + "step": 6693 + }, + { + "epoch": 0.84, + "learning_rate": 6.641328437604106e-05, + "loss": 0.4546, + "step": 6694 + }, + { + "epoch": 0.84, + "learning_rate": 6.631222597895786e-05, + "loss": 0.4086, + "step": 6695 + }, + { + "epoch": 0.84, + "learning_rate": 6.621123906762288e-05, + "loss": 0.5503, + "step": 6696 + }, + { + "epoch": 0.84, + "learning_rate": 6.611032365868213e-05, + "loss": 0.4786, + "step": 6697 + }, + { + "epoch": 0.84, + "learning_rate": 6.600947976876981e-05, + "loss": 0.4663, + "step": 6698 + }, + { + "epoch": 0.84, + "learning_rate": 6.590870741450811e-05, + "loss": 0.3881, + "step": 6699 + }, + { + "epoch": 0.84, + "learning_rate": 6.580800661250764e-05, + "loss": 0.5135, + "step": 6700 + }, + { + "epoch": 0.84, + "learning_rate": 6.570737737936716e-05, + "loss": 0.4614, + "step": 6701 + }, + { + "epoch": 0.84, + "learning_rate": 6.56068197316737e-05, + "loss": 0.5616, + "step": 6702 + }, + { + "epoch": 0.84, + "learning_rate": 6.550633368600223e-05, + "loss": 0.4753, + "step": 6703 + }, + { + "epoch": 0.84, + "learning_rate": 6.540591925891609e-05, + "loss": 0.4191, + "step": 6704 + }, + { + "epoch": 0.84, + "learning_rate": 6.5305576466967e-05, + "loss": 0.4589, + "step": 6705 + }, + { + "epoch": 0.84, + "learning_rate": 6.520530532669449e-05, + "loss": 0.3854, + "step": 6706 + }, + { + "epoch": 0.84, + "learning_rate": 6.510510585462664e-05, + "loss": 0.4052, + "step": 6707 + }, + { + "epoch": 0.84, + "learning_rate": 6.500497806727951e-05, + "loss": 0.4187, + "step": 6708 + }, + { + "epoch": 0.84, + "learning_rate": 6.490492198115738e-05, + "loss": 0.4951, + "step": 6709 + }, + { + "epoch": 0.84, + "learning_rate": 6.480493761275286e-05, + "loss": 0.0477, + "step": 6710 + }, + { + "epoch": 0.84, + "learning_rate": 6.47050249785463e-05, + "loss": 0.5116, + "step": 6711 + }, + { + "epoch": 0.84, + "learning_rate": 6.46051840950067e-05, + "loss": 0.4817, + "step": 6712 + }, + { + "epoch": 0.84, + "learning_rate": 6.450541497859114e-05, + "loss": 0.4815, + "step": 6713 + }, + { + "epoch": 0.84, + "learning_rate": 6.440571764574466e-05, + "loss": 0.4856, + "step": 6714 + }, + { + "epoch": 0.84, + "learning_rate": 6.430609211290062e-05, + "loss": 0.5412, + "step": 6715 + }, + { + "epoch": 0.84, + "learning_rate": 6.420653839648061e-05, + "loss": 0.542, + "step": 6716 + }, + { + "epoch": 0.84, + "learning_rate": 6.410705651289434e-05, + "loss": 0.5496, + "step": 6717 + }, + { + "epoch": 0.84, + "learning_rate": 6.40076464785394e-05, + "loss": 0.5458, + "step": 6718 + }, + { + "epoch": 0.84, + "learning_rate": 6.390830830980194e-05, + "loss": 0.4481, + "step": 6719 + }, + { + "epoch": 0.84, + "learning_rate": 6.380904202305604e-05, + "loss": 0.6284, + "step": 6720 + }, + { + "epoch": 0.84, + "learning_rate": 6.370984763466403e-05, + "loss": 0.5204, + "step": 6721 + }, + { + "epoch": 0.84, + "learning_rate": 6.36107251609765e-05, + "loss": 0.5001, + "step": 6722 + }, + { + "epoch": 0.84, + "learning_rate": 6.351167461833168e-05, + "loss": 0.496, + "step": 6723 + }, + { + "epoch": 0.84, + "learning_rate": 6.341269602305655e-05, + "loss": 0.4186, + "step": 6724 + }, + { + "epoch": 0.84, + "learning_rate": 6.331378939146593e-05, + "loss": 0.479, + "step": 6725 + }, + { + "epoch": 0.84, + "learning_rate": 6.321495473986289e-05, + "loss": 0.699, + "step": 6726 + }, + { + "epoch": 0.84, + "learning_rate": 6.311619208453834e-05, + "loss": 0.6002, + "step": 6727 + }, + { + "epoch": 0.84, + "learning_rate": 6.301750144177177e-05, + "loss": 0.4877, + "step": 6728 + }, + { + "epoch": 0.84, + "learning_rate": 6.291888282783054e-05, + "loss": 0.5768, + "step": 6729 + }, + { + "epoch": 0.84, + "learning_rate": 6.28203362589701e-05, + "loss": 0.465, + "step": 6730 + }, + { + "epoch": 0.84, + "learning_rate": 6.272186175143436e-05, + "loss": 0.5717, + "step": 6731 + }, + { + "epoch": 0.84, + "learning_rate": 6.262345932145475e-05, + "loss": 0.4612, + "step": 6732 + }, + { + "epoch": 0.84, + "learning_rate": 6.252512898525137e-05, + "loss": 0.4767, + "step": 6733 + }, + { + "epoch": 0.84, + "learning_rate": 6.242687075903214e-05, + "loss": 0.4146, + "step": 6734 + }, + { + "epoch": 0.84, + "learning_rate": 6.232868465899327e-05, + "loss": 0.423, + "step": 6735 + }, + { + "epoch": 0.84, + "learning_rate": 6.223057070131899e-05, + "loss": 0.485, + "step": 6736 + }, + { + "epoch": 0.84, + "learning_rate": 6.213252890218163e-05, + "loss": 0.5751, + "step": 6737 + }, + { + "epoch": 0.84, + "learning_rate": 6.20345592777416e-05, + "loss": 0.4779, + "step": 6738 + }, + { + "epoch": 0.84, + "learning_rate": 6.193666184414764e-05, + "loss": 0.6147, + "step": 6739 + }, + { + "epoch": 0.84, + "learning_rate": 6.183883661753614e-05, + "loss": 0.4416, + "step": 6740 + }, + { + "epoch": 0.84, + "learning_rate": 6.174108361403203e-05, + "loss": 0.5695, + "step": 6741 + }, + { + "epoch": 0.85, + "learning_rate": 6.164340284974806e-05, + "loss": 0.494, + "step": 6742 + }, + { + "epoch": 0.85, + "learning_rate": 6.154579434078528e-05, + "loss": 0.484, + "step": 6743 + }, + { + "epoch": 0.85, + "learning_rate": 6.144825810323267e-05, + "loss": 0.4951, + "step": 6744 + }, + { + "epoch": 0.85, + "learning_rate": 6.135079415316735e-05, + "loss": 0.5251, + "step": 6745 + }, + { + "epoch": 0.85, + "learning_rate": 6.125340250665463e-05, + "loss": 0.5139, + "step": 6746 + }, + { + "epoch": 0.85, + "learning_rate": 6.115608317974758e-05, + "loss": 0.4591, + "step": 6747 + }, + { + "epoch": 0.85, + "learning_rate": 6.105883618848774e-05, + "loss": 0.5291, + "step": 6748 + }, + { + "epoch": 0.85, + "learning_rate": 6.0961661548904424e-05, + "loss": 0.4827, + "step": 6749 + }, + { + "epoch": 0.85, + "learning_rate": 6.086455927701528e-05, + "loss": 0.5278, + "step": 6750 + }, + { + "epoch": 0.85, + "learning_rate": 6.076752938882585e-05, + "loss": 0.4576, + "step": 6751 + }, + { + "epoch": 0.85, + "learning_rate": 6.067057190032976e-05, + "loss": 0.5133, + "step": 6752 + }, + { + "epoch": 0.85, + "learning_rate": 6.057368682750886e-05, + "loss": 0.4625, + "step": 6753 + }, + { + "epoch": 0.85, + "learning_rate": 6.047687418633274e-05, + "loss": 0.5682, + "step": 6754 + }, + { + "epoch": 0.85, + "learning_rate": 6.038013399275938e-05, + "loss": 0.4772, + "step": 6755 + }, + { + "epoch": 0.85, + "learning_rate": 6.028346626273468e-05, + "loss": 0.5531, + "step": 6756 + }, + { + "epoch": 0.85, + "learning_rate": 6.01868710121925e-05, + "loss": 0.4916, + "step": 6757 + }, + { + "epoch": 0.85, + "learning_rate": 6.0090348257055005e-05, + "loss": 0.5323, + "step": 6758 + }, + { + "epoch": 0.85, + "learning_rate": 5.999389801323218e-05, + "loss": 0.4139, + "step": 6759 + }, + { + "epoch": 0.85, + "learning_rate": 5.98975202966221e-05, + "loss": 0.5167, + "step": 6760 + }, + { + "epoch": 0.85, + "learning_rate": 5.980121512311115e-05, + "loss": 0.4622, + "step": 6761 + }, + { + "epoch": 0.85, + "learning_rate": 5.97049825085732e-05, + "loss": 0.5442, + "step": 6762 + }, + { + "epoch": 0.85, + "learning_rate": 5.960882246887073e-05, + "loss": 0.4637, + "step": 6763 + }, + { + "epoch": 0.85, + "learning_rate": 5.951273501985388e-05, + "loss": 0.457, + "step": 6764 + }, + { + "epoch": 0.85, + "learning_rate": 5.941672017736111e-05, + "loss": 0.4381, + "step": 6765 + }, + { + "epoch": 0.85, + "learning_rate": 5.932077795721863e-05, + "loss": 0.55, + "step": 6766 + }, + { + "epoch": 0.85, + "learning_rate": 5.922490837524086e-05, + "loss": 0.4351, + "step": 6767 + }, + { + "epoch": 0.85, + "learning_rate": 5.912911144723021e-05, + "loss": 0.6355, + "step": 6768 + }, + { + "epoch": 0.85, + "learning_rate": 5.903338718897705e-05, + "loss": 0.4656, + "step": 6769 + }, + { + "epoch": 0.85, + "learning_rate": 5.893773561626003e-05, + "loss": 0.5048, + "step": 6770 + }, + { + "epoch": 0.85, + "learning_rate": 5.8842156744845385e-05, + "loss": 0.5323, + "step": 6771 + }, + { + "epoch": 0.85, + "learning_rate": 5.874665059048767e-05, + "loss": 0.5362, + "step": 6772 + }, + { + "epoch": 0.85, + "learning_rate": 5.86512171689294e-05, + "loss": 0.4724, + "step": 6773 + }, + { + "epoch": 0.85, + "learning_rate": 5.855585649590112e-05, + "loss": 0.5354, + "step": 6774 + }, + { + "epoch": 0.85, + "learning_rate": 5.84605685871214e-05, + "loss": 0.4979, + "step": 6775 + }, + { + "epoch": 0.85, + "learning_rate": 5.836535345829652e-05, + "loss": 0.5117, + "step": 6776 + }, + { + "epoch": 0.85, + "learning_rate": 5.8270211125121207e-05, + "loss": 0.4089, + "step": 6777 + }, + { + "epoch": 0.85, + "learning_rate": 5.817514160327797e-05, + "loss": 0.5242, + "step": 6778 + }, + { + "epoch": 0.85, + "learning_rate": 5.808014490843727e-05, + "loss": 0.5093, + "step": 6779 + }, + { + "epoch": 0.85, + "learning_rate": 5.79852210562577e-05, + "loss": 0.5434, + "step": 6780 + }, + { + "epoch": 0.85, + "learning_rate": 5.7890370062385733e-05, + "loss": 0.4573, + "step": 6781 + }, + { + "epoch": 0.85, + "learning_rate": 5.779559194245587e-05, + "loss": 0.589, + "step": 6782 + }, + { + "epoch": 0.85, + "learning_rate": 5.7700886712090783e-05, + "loss": 0.499, + "step": 6783 + }, + { + "epoch": 0.85, + "learning_rate": 5.760625438690065e-05, + "loss": 0.4762, + "step": 6784 + }, + { + "epoch": 0.85, + "learning_rate": 5.75116949824841e-05, + "loss": 0.5474, + "step": 6785 + }, + { + "epoch": 0.85, + "learning_rate": 5.7417208514427556e-05, + "loss": 0.3874, + "step": 6786 + }, + { + "epoch": 0.85, + "learning_rate": 5.732279499830539e-05, + "loss": 0.498, + "step": 6787 + }, + { + "epoch": 0.85, + "learning_rate": 5.722845444968011e-05, + "loss": 0.5656, + "step": 6788 + }, + { + "epoch": 0.85, + "learning_rate": 5.713418688410199e-05, + "loss": 0.4406, + "step": 6789 + }, + { + "epoch": 0.85, + "learning_rate": 5.703999231710949e-05, + "loss": 0.4674, + "step": 6790 + }, + { + "epoch": 0.85, + "learning_rate": 5.6945870764228745e-05, + "loss": 0.5455, + "step": 6791 + }, + { + "epoch": 0.85, + "learning_rate": 5.6851822240974074e-05, + "loss": 0.5029, + "step": 6792 + }, + { + "epoch": 0.85, + "learning_rate": 5.6757846762847786e-05, + "loss": 0.0477, + "step": 6793 + }, + { + "epoch": 0.85, + "learning_rate": 5.666394434534e-05, + "loss": 0.5338, + "step": 6794 + }, + { + "epoch": 0.85, + "learning_rate": 5.657011500392889e-05, + "loss": 0.5651, + "step": 6795 + }, + { + "epoch": 0.85, + "learning_rate": 5.647635875408058e-05, + "loss": 0.5298, + "step": 6796 + }, + { + "epoch": 0.85, + "learning_rate": 5.6382675611249214e-05, + "loss": 0.4813, + "step": 6797 + }, + { + "epoch": 0.85, + "learning_rate": 5.62890655908766e-05, + "loss": 0.5043, + "step": 6798 + }, + { + "epoch": 0.85, + "learning_rate": 5.619552870839279e-05, + "loss": 0.0478, + "step": 6799 + }, + { + "epoch": 0.85, + "learning_rate": 5.610206497921566e-05, + "loss": 0.5356, + "step": 6800 + }, + { + "epoch": 0.85, + "learning_rate": 5.600867441875113e-05, + "loss": 0.4729, + "step": 6801 + }, + { + "epoch": 0.85, + "learning_rate": 5.5915357042392866e-05, + "loss": 0.4647, + "step": 6802 + }, + { + "epoch": 0.85, + "learning_rate": 5.582211286552269e-05, + "loss": 0.4182, + "step": 6803 + }, + { + "epoch": 0.85, + "learning_rate": 5.572894190351019e-05, + "loss": 0.4368, + "step": 6804 + }, + { + "epoch": 0.85, + "learning_rate": 5.563584417171303e-05, + "loss": 0.4576, + "step": 6805 + }, + { + "epoch": 0.85, + "learning_rate": 5.554281968547659e-05, + "loss": 0.5249, + "step": 6806 + }, + { + "epoch": 0.85, + "learning_rate": 5.544986846013439e-05, + "loss": 0.4583, + "step": 6807 + }, + { + "epoch": 0.85, + "learning_rate": 5.535699051100773e-05, + "loss": 0.5625, + "step": 6808 + }, + { + "epoch": 0.85, + "learning_rate": 5.526418585340609e-05, + "loss": 0.416, + "step": 6809 + }, + { + "epoch": 0.85, + "learning_rate": 5.5171454502626396e-05, + "loss": 0.395, + "step": 6810 + }, + { + "epoch": 0.85, + "learning_rate": 5.507879647395392e-05, + "loss": 0.4958, + "step": 6811 + }, + { + "epoch": 0.85, + "learning_rate": 5.4986211782661664e-05, + "loss": 0.5284, + "step": 6812 + }, + { + "epoch": 0.85, + "learning_rate": 5.4893700444010706e-05, + "loss": 0.5363, + "step": 6813 + }, + { + "epoch": 0.85, + "learning_rate": 5.480126247324968e-05, + "loss": 0.3976, + "step": 6814 + }, + { + "epoch": 0.85, + "learning_rate": 5.470889788561545e-05, + "loss": 0.0476, + "step": 6815 + }, + { + "epoch": 0.85, + "learning_rate": 5.461660669633267e-05, + "loss": 0.4644, + "step": 6816 + }, + { + "epoch": 0.85, + "learning_rate": 5.452438892061396e-05, + "loss": 0.5009, + "step": 6817 + }, + { + "epoch": 0.85, + "learning_rate": 5.4432244573659695e-05, + "loss": 0.5143, + "step": 6818 + }, + { + "epoch": 0.85, + "learning_rate": 5.434017367065841e-05, + "loss": 0.4641, + "step": 6819 + }, + { + "epoch": 0.85, + "learning_rate": 5.424817622678618e-05, + "loss": 0.5364, + "step": 6820 + }, + { + "epoch": 0.85, + "learning_rate": 5.4156252257207217e-05, + "loss": 0.4695, + "step": 6821 + }, + { + "epoch": 0.86, + "learning_rate": 5.40644017770735e-05, + "loss": 0.5796, + "step": 6822 + }, + { + "epoch": 0.86, + "learning_rate": 5.39726248015251e-05, + "loss": 0.4293, + "step": 6823 + }, + { + "epoch": 0.86, + "learning_rate": 5.388092134568967e-05, + "loss": 0.4258, + "step": 6824 + }, + { + "epoch": 0.86, + "learning_rate": 5.3789291424683027e-05, + "loss": 0.4827, + "step": 6825 + }, + { + "epoch": 0.86, + "learning_rate": 5.369773505360864e-05, + "loss": 0.6006, + "step": 6826 + }, + { + "epoch": 0.86, + "learning_rate": 5.360625224755811e-05, + "loss": 0.4274, + "step": 6827 + }, + { + "epoch": 0.86, + "learning_rate": 5.3514843021610584e-05, + "loss": 0.5195, + "step": 6828 + }, + { + "epoch": 0.86, + "learning_rate": 5.3423507390833294e-05, + "loss": 0.6292, + "step": 6829 + }, + { + "epoch": 0.86, + "learning_rate": 5.333224537028131e-05, + "loss": 0.5, + "step": 6830 + }, + { + "epoch": 0.86, + "learning_rate": 5.324105697499759e-05, + "loss": 0.5312, + "step": 6831 + }, + { + "epoch": 0.86, + "learning_rate": 5.314994222001296e-05, + "loss": 0.4928, + "step": 6832 + }, + { + "epoch": 0.86, + "learning_rate": 5.305890112034595e-05, + "loss": 0.4446, + "step": 6833 + }, + { + "epoch": 0.86, + "learning_rate": 5.296793369100328e-05, + "loss": 0.4211, + "step": 6834 + }, + { + "epoch": 0.86, + "learning_rate": 5.287703994697907e-05, + "loss": 0.5095, + "step": 6835 + }, + { + "epoch": 0.86, + "learning_rate": 5.278621990325572e-05, + "loss": 0.4916, + "step": 6836 + }, + { + "epoch": 0.86, + "learning_rate": 5.26954735748032e-05, + "loss": 0.5148, + "step": 6837 + }, + { + "epoch": 0.86, + "learning_rate": 5.2604800976579435e-05, + "loss": 0.4833, + "step": 6838 + }, + { + "epoch": 0.86, + "learning_rate": 5.251420212353031e-05, + "loss": 0.5533, + "step": 6839 + }, + { + "epoch": 0.86, + "learning_rate": 5.242367703058937e-05, + "loss": 0.5175, + "step": 6840 + }, + { + "epoch": 0.86, + "learning_rate": 5.233322571267818e-05, + "loss": 0.463, + "step": 6841 + }, + { + "epoch": 0.86, + "learning_rate": 5.224284818470581e-05, + "loss": 0.4716, + "step": 6842 + }, + { + "epoch": 0.86, + "learning_rate": 5.215254446156953e-05, + "loss": 0.604, + "step": 6843 + }, + { + "epoch": 0.86, + "learning_rate": 5.20623145581543e-05, + "loss": 0.3817, + "step": 6844 + }, + { + "epoch": 0.86, + "learning_rate": 5.19721584893329e-05, + "loss": 0.4311, + "step": 6845 + }, + { + "epoch": 0.86, + "learning_rate": 5.188207626996599e-05, + "loss": 0.5752, + "step": 6846 + }, + { + "epoch": 0.86, + "learning_rate": 5.179206791490204e-05, + "loss": 0.4651, + "step": 6847 + }, + { + "epoch": 0.86, + "learning_rate": 5.170213343897745e-05, + "loss": 0.5536, + "step": 6848 + }, + { + "epoch": 0.86, + "learning_rate": 5.161227285701603e-05, + "loss": 0.47, + "step": 6849 + }, + { + "epoch": 0.86, + "learning_rate": 5.1522486183829875e-05, + "loss": 0.6018, + "step": 6850 + }, + { + "epoch": 0.86, + "learning_rate": 5.143277343421876e-05, + "loss": 0.5018, + "step": 6851 + }, + { + "epoch": 0.86, + "learning_rate": 5.134313462297024e-05, + "loss": 0.5085, + "step": 6852 + }, + { + "epoch": 0.86, + "learning_rate": 5.1253569764859566e-05, + "loss": 0.5745, + "step": 6853 + }, + { + "epoch": 0.86, + "learning_rate": 5.116407887465002e-05, + "loss": 0.5001, + "step": 6854 + }, + { + "epoch": 0.86, + "learning_rate": 5.1074661967092536e-05, + "loss": 0.5576, + "step": 6855 + }, + { + "epoch": 0.86, + "learning_rate": 5.098531905692605e-05, + "loss": 0.467, + "step": 6856 + }, + { + "epoch": 0.86, + "learning_rate": 5.089605015887699e-05, + "loss": 0.4609, + "step": 6857 + }, + { + "epoch": 0.86, + "learning_rate": 5.080685528765977e-05, + "loss": 0.4224, + "step": 6858 + }, + { + "epoch": 0.86, + "learning_rate": 5.071773445797662e-05, + "loss": 0.4452, + "step": 6859 + }, + { + "epoch": 0.86, + "learning_rate": 5.0628687684517563e-05, + "loss": 0.4938, + "step": 6860 + }, + { + "epoch": 0.86, + "learning_rate": 5.053971498196036e-05, + "loss": 0.4364, + "step": 6861 + }, + { + "epoch": 0.86, + "learning_rate": 5.0450816364970565e-05, + "loss": 0.4268, + "step": 6862 + }, + { + "epoch": 0.86, + "learning_rate": 5.036199184820167e-05, + "loss": 0.5348, + "step": 6863 + }, + { + "epoch": 0.86, + "learning_rate": 5.027324144629458e-05, + "loss": 0.4952, + "step": 6864 + }, + { + "epoch": 0.86, + "learning_rate": 5.018456517387837e-05, + "loss": 0.517, + "step": 6865 + }, + { + "epoch": 0.86, + "learning_rate": 5.009596304556974e-05, + "loss": 0.0474, + "step": 6866 + }, + { + "epoch": 0.86, + "learning_rate": 5.000743507597322e-05, + "loss": 0.6229, + "step": 6867 + }, + { + "epoch": 0.86, + "learning_rate": 4.991898127968098e-05, + "loss": 0.4966, + "step": 6868 + }, + { + "epoch": 0.86, + "learning_rate": 4.983060167127312e-05, + "loss": 0.5417, + "step": 6869 + }, + { + "epoch": 0.86, + "learning_rate": 4.974229626531762e-05, + "loss": 0.5493, + "step": 6870 + }, + { + "epoch": 0.86, + "learning_rate": 4.965406507636972e-05, + "loss": 0.4849, + "step": 6871 + }, + { + "epoch": 0.86, + "learning_rate": 4.956590811897299e-05, + "loss": 0.5142, + "step": 6872 + }, + { + "epoch": 0.86, + "learning_rate": 4.9477825407658496e-05, + "loss": 0.4966, + "step": 6873 + }, + { + "epoch": 0.86, + "learning_rate": 4.938981695694511e-05, + "loss": 0.5365, + "step": 6874 + }, + { + "epoch": 0.86, + "learning_rate": 4.930188278133946e-05, + "loss": 0.4354, + "step": 6875 + }, + { + "epoch": 0.86, + "learning_rate": 4.9214022895336e-05, + "loss": 0.5256, + "step": 6876 + }, + { + "epoch": 0.86, + "learning_rate": 4.912623731341676e-05, + "loss": 0.5866, + "step": 6877 + }, + { + "epoch": 0.86, + "learning_rate": 4.903852605005183e-05, + "loss": 0.53, + "step": 6878 + }, + { + "epoch": 0.86, + "learning_rate": 4.895088911969864e-05, + "loss": 0.472, + "step": 6879 + }, + { + "epoch": 0.86, + "learning_rate": 4.8863326536802646e-05, + "loss": 0.5421, + "step": 6880 + }, + { + "epoch": 0.86, + "learning_rate": 4.877583831579707e-05, + "loss": 0.4653, + "step": 6881 + }, + { + "epoch": 0.86, + "learning_rate": 4.868842447110267e-05, + "loss": 0.5057, + "step": 6882 + }, + { + "epoch": 0.86, + "learning_rate": 4.860108501712823e-05, + "loss": 0.5233, + "step": 6883 + }, + { + "epoch": 0.86, + "learning_rate": 4.851381996826998e-05, + "loss": 0.4739, + "step": 6884 + }, + { + "epoch": 0.86, + "learning_rate": 4.8426629338912174e-05, + "loss": 0.4343, + "step": 6885 + }, + { + "epoch": 0.86, + "learning_rate": 4.833951314342644e-05, + "loss": 0.4562, + "step": 6886 + }, + { + "epoch": 0.86, + "learning_rate": 4.825247139617245e-05, + "loss": 0.4453, + "step": 6887 + }, + { + "epoch": 0.86, + "learning_rate": 4.816550411149745e-05, + "loss": 0.0477, + "step": 6888 + }, + { + "epoch": 0.86, + "learning_rate": 4.8078611303736475e-05, + "loss": 0.493, + "step": 6889 + }, + { + "epoch": 0.86, + "learning_rate": 4.7991792987212294e-05, + "loss": 0.6029, + "step": 6890 + }, + { + "epoch": 0.86, + "learning_rate": 4.790504917623545e-05, + "loss": 0.4437, + "step": 6891 + }, + { + "epoch": 0.86, + "learning_rate": 4.781837988510396e-05, + "loss": 0.5126, + "step": 6892 + }, + { + "epoch": 0.86, + "learning_rate": 4.773178512810372e-05, + "loss": 0.5424, + "step": 6893 + }, + { + "epoch": 0.86, + "learning_rate": 4.764526491950844e-05, + "loss": 0.4525, + "step": 6894 + }, + { + "epoch": 0.86, + "learning_rate": 4.755881927357952e-05, + "loss": 0.0473, + "step": 6895 + }, + { + "epoch": 0.86, + "learning_rate": 4.747244820456581e-05, + "loss": 0.457, + "step": 6896 + }, + { + "epoch": 0.86, + "learning_rate": 4.738615172670407e-05, + "loss": 0.5786, + "step": 6897 + }, + { + "epoch": 0.86, + "learning_rate": 4.7299929854218826e-05, + "loss": 0.5002, + "step": 6898 + }, + { + "epoch": 0.86, + "learning_rate": 4.721378260132225e-05, + "loss": 0.452, + "step": 6899 + }, + { + "epoch": 0.86, + "learning_rate": 4.7127709982214165e-05, + "loss": 0.5438, + "step": 6900 + }, + { + "epoch": 0.87, + "learning_rate": 4.704171201108204e-05, + "loss": 0.5457, + "step": 6901 + }, + { + "epoch": 0.87, + "learning_rate": 4.6955788702101154e-05, + "loss": 0.4661, + "step": 6902 + }, + { + "epoch": 0.87, + "learning_rate": 4.6869940069434516e-05, + "loss": 0.6616, + "step": 6903 + }, + { + "epoch": 0.87, + "learning_rate": 4.678416612723263e-05, + "loss": 0.5955, + "step": 6904 + }, + { + "epoch": 0.87, + "learning_rate": 4.6698466889633914e-05, + "loss": 0.4792, + "step": 6905 + }, + { + "epoch": 0.87, + "learning_rate": 4.661284237076435e-05, + "loss": 0.5438, + "step": 6906 + }, + { + "epoch": 0.87, + "learning_rate": 4.65272925847377e-05, + "loss": 0.4895, + "step": 6907 + }, + { + "epoch": 0.87, + "learning_rate": 4.644181754565513e-05, + "loss": 0.5223, + "step": 6908 + }, + { + "epoch": 0.87, + "learning_rate": 4.635641726760581e-05, + "loss": 0.3837, + "step": 6909 + }, + { + "epoch": 0.87, + "learning_rate": 4.627109176466643e-05, + "loss": 0.4818, + "step": 6910 + }, + { + "epoch": 0.87, + "learning_rate": 4.6185841050901455e-05, + "loss": 0.4732, + "step": 6911 + }, + { + "epoch": 0.87, + "learning_rate": 4.6100665140362875e-05, + "loss": 0.5521, + "step": 6912 + }, + { + "epoch": 0.87, + "learning_rate": 4.601556404709045e-05, + "loss": 0.4071, + "step": 6913 + }, + { + "epoch": 0.87, + "learning_rate": 4.593053778511169e-05, + "loss": 0.0477, + "step": 6914 + }, + { + "epoch": 0.87, + "learning_rate": 4.5845586368441546e-05, + "loss": 0.4365, + "step": 6915 + }, + { + "epoch": 0.87, + "learning_rate": 4.576070981108271e-05, + "loss": 0.4614, + "step": 6916 + }, + { + "epoch": 0.87, + "learning_rate": 4.567590812702571e-05, + "loss": 0.5422, + "step": 6917 + }, + { + "epoch": 0.87, + "learning_rate": 4.559118133024853e-05, + "loss": 0.4451, + "step": 6918 + }, + { + "epoch": 0.87, + "learning_rate": 4.550652943471695e-05, + "loss": 0.4994, + "step": 6919 + }, + { + "epoch": 0.87, + "learning_rate": 4.542195245438424e-05, + "loss": 0.4647, + "step": 6920 + }, + { + "epoch": 0.87, + "learning_rate": 4.5337450403191424e-05, + "loss": 0.4873, + "step": 6921 + }, + { + "epoch": 0.87, + "learning_rate": 4.525302329506736e-05, + "loss": 0.4852, + "step": 6922 + }, + { + "epoch": 0.87, + "learning_rate": 4.5168671143928144e-05, + "loss": 0.5964, + "step": 6923 + }, + { + "epoch": 0.87, + "learning_rate": 4.508439396367775e-05, + "loss": 0.4712, + "step": 6924 + }, + { + "epoch": 0.87, + "learning_rate": 4.5000191768207856e-05, + "loss": 0.5172, + "step": 6925 + }, + { + "epoch": 0.87, + "learning_rate": 4.491606457139763e-05, + "loss": 0.5994, + "step": 6926 + }, + { + "epoch": 0.87, + "learning_rate": 4.483201238711404e-05, + "loss": 0.5087, + "step": 6927 + }, + { + "epoch": 0.87, + "learning_rate": 4.47480352292115e-05, + "loss": 0.6488, + "step": 6928 + }, + { + "epoch": 0.87, + "learning_rate": 4.466413311153239e-05, + "loss": 0.5438, + "step": 6929 + }, + { + "epoch": 0.87, + "learning_rate": 4.458030604790614e-05, + "loss": 0.4526, + "step": 6930 + }, + { + "epoch": 0.87, + "learning_rate": 4.449655405215036e-05, + "loss": 0.5357, + "step": 6931 + }, + { + "epoch": 0.87, + "learning_rate": 4.441287713807007e-05, + "loss": 0.5778, + "step": 6932 + }, + { + "epoch": 0.87, + "learning_rate": 4.432927531945791e-05, + "loss": 0.5129, + "step": 6933 + }, + { + "epoch": 0.87, + "learning_rate": 4.4245748610094115e-05, + "loss": 0.4247, + "step": 6934 + }, + { + "epoch": 0.87, + "learning_rate": 4.4162297023746746e-05, + "loss": 0.5601, + "step": 6935 + }, + { + "epoch": 0.87, + "learning_rate": 4.407892057417112e-05, + "loss": 0.4518, + "step": 6936 + }, + { + "epoch": 0.87, + "learning_rate": 4.399561927511042e-05, + "loss": 0.4875, + "step": 6937 + }, + { + "epoch": 0.87, + "learning_rate": 4.391239314029544e-05, + "loss": 0.5927, + "step": 6938 + }, + { + "epoch": 0.87, + "learning_rate": 4.382924218344459e-05, + "loss": 0.49, + "step": 6939 + }, + { + "epoch": 0.87, + "learning_rate": 4.374616641826362e-05, + "loss": 0.451, + "step": 6940 + }, + { + "epoch": 0.87, + "learning_rate": 4.366316585844632e-05, + "loss": 0.5381, + "step": 6941 + }, + { + "epoch": 0.87, + "learning_rate": 4.3580240517673675e-05, + "loss": 0.4984, + "step": 6942 + }, + { + "epoch": 0.87, + "learning_rate": 4.3497390409614644e-05, + "loss": 0.4886, + "step": 6943 + }, + { + "epoch": 0.87, + "learning_rate": 4.3414615547925526e-05, + "loss": 0.0473, + "step": 6944 + }, + { + "epoch": 0.87, + "learning_rate": 4.3331915946250245e-05, + "loss": 0.4974, + "step": 6945 + }, + { + "epoch": 0.87, + "learning_rate": 4.324929161822039e-05, + "loss": 0.5568, + "step": 6946 + }, + { + "epoch": 0.87, + "learning_rate": 4.316674257745512e-05, + "loss": 0.4919, + "step": 6947 + }, + { + "epoch": 0.87, + "learning_rate": 4.3084268837561224e-05, + "loss": 0.4823, + "step": 6948 + }, + { + "epoch": 0.87, + "learning_rate": 4.300187041213294e-05, + "loss": 0.5117, + "step": 6949 + }, + { + "epoch": 0.87, + "learning_rate": 4.291954731475228e-05, + "loss": 0.4724, + "step": 6950 + }, + { + "epoch": 0.87, + "learning_rate": 4.28372995589888e-05, + "loss": 0.5161, + "step": 6951 + }, + { + "epoch": 0.87, + "learning_rate": 4.2755127158399465e-05, + "loss": 0.5638, + "step": 6952 + }, + { + "epoch": 0.87, + "learning_rate": 4.267303012652895e-05, + "loss": 0.6024, + "step": 6953 + }, + { + "epoch": 0.87, + "learning_rate": 4.25910084769095e-05, + "loss": 0.5714, + "step": 6954 + }, + { + "epoch": 0.87, + "learning_rate": 4.2509062223061e-05, + "loss": 0.5915, + "step": 6955 + }, + { + "epoch": 0.87, + "learning_rate": 4.242719137849077e-05, + "loss": 0.502, + "step": 6956 + }, + { + "epoch": 0.87, + "learning_rate": 4.2345395956693786e-05, + "loss": 0.6011, + "step": 6957 + }, + { + "epoch": 0.87, + "learning_rate": 4.2263675971152715e-05, + "loss": 0.6058, + "step": 6958 + }, + { + "epoch": 0.87, + "learning_rate": 4.218203143533739e-05, + "loss": 0.5181, + "step": 6959 + }, + { + "epoch": 0.87, + "learning_rate": 4.210046236270565e-05, + "loss": 0.5204, + "step": 6960 + }, + { + "epoch": 0.87, + "learning_rate": 4.201896876670258e-05, + "loss": 0.4789, + "step": 6961 + }, + { + "epoch": 0.87, + "learning_rate": 4.1937550660761105e-05, + "loss": 0.601, + "step": 6962 + }, + { + "epoch": 0.87, + "learning_rate": 4.185620805830142e-05, + "loss": 0.4847, + "step": 6963 + }, + { + "epoch": 0.87, + "learning_rate": 4.177494097273155e-05, + "loss": 0.4943, + "step": 6964 + }, + { + "epoch": 0.87, + "learning_rate": 4.16937494174468e-05, + "loss": 0.4625, + "step": 6965 + }, + { + "epoch": 0.87, + "learning_rate": 4.161263340583038e-05, + "loss": 0.4781, + "step": 6966 + }, + { + "epoch": 0.87, + "learning_rate": 4.153159295125253e-05, + "loss": 0.468, + "step": 6967 + }, + { + "epoch": 0.87, + "learning_rate": 4.145062806707151e-05, + "loss": 0.49, + "step": 6968 + }, + { + "epoch": 0.87, + "learning_rate": 4.136973876663286e-05, + "loss": 0.5275, + "step": 6969 + }, + { + "epoch": 0.87, + "learning_rate": 4.128892506326981e-05, + "loss": 0.5646, + "step": 6970 + }, + { + "epoch": 0.87, + "learning_rate": 4.120818697030309e-05, + "loss": 0.5315, + "step": 6971 + }, + { + "epoch": 0.87, + "learning_rate": 4.11275245010409e-05, + "loss": 0.4961, + "step": 6972 + }, + { + "epoch": 0.87, + "learning_rate": 4.104693766877915e-05, + "loss": 0.3928, + "step": 6973 + }, + { + "epoch": 0.87, + "learning_rate": 4.0966426486800933e-05, + "loss": 0.4431, + "step": 6974 + }, + { + "epoch": 0.87, + "learning_rate": 4.088599096837714e-05, + "loss": 0.5021, + "step": 6975 + }, + { + "epoch": 0.87, + "learning_rate": 4.080563112676622e-05, + "loss": 0.4911, + "step": 6976 + }, + { + "epoch": 0.87, + "learning_rate": 4.072534697521407e-05, + "loss": 0.5576, + "step": 6977 + }, + { + "epoch": 0.87, + "learning_rate": 4.064513852695417e-05, + "loss": 0.5304, + "step": 6978 + }, + { + "epoch": 0.87, + "learning_rate": 4.056500579520728e-05, + "loss": 0.4475, + "step": 6979 + }, + { + "epoch": 0.87, + "learning_rate": 4.0484948793181934e-05, + "loss": 0.5952, + "step": 6980 + }, + { + "epoch": 0.88, + "learning_rate": 4.0404967534074134e-05, + "loss": 0.5776, + "step": 6981 + }, + { + "epoch": 0.88, + "learning_rate": 4.03250620310675e-05, + "loss": 0.4089, + "step": 6982 + }, + { + "epoch": 0.88, + "learning_rate": 4.024523229733279e-05, + "loss": 0.4214, + "step": 6983 + }, + { + "epoch": 0.88, + "learning_rate": 4.016547834602868e-05, + "loss": 0.6075, + "step": 6984 + }, + { + "epoch": 0.88, + "learning_rate": 4.0085800190301166e-05, + "loss": 0.4286, + "step": 6985 + }, + { + "epoch": 0.88, + "learning_rate": 4.0006197843283744e-05, + "loss": 0.4844, + "step": 6986 + }, + { + "epoch": 0.88, + "learning_rate": 3.992667131809757e-05, + "loss": 0.4608, + "step": 6987 + }, + { + "epoch": 0.88, + "learning_rate": 3.984722062785101e-05, + "loss": 0.4698, + "step": 6988 + }, + { + "epoch": 0.88, + "learning_rate": 3.976784578564024e-05, + "loss": 0.5078, + "step": 6989 + }, + { + "epoch": 0.88, + "learning_rate": 3.96885468045487e-05, + "loss": 0.5215, + "step": 6990 + }, + { + "epoch": 0.88, + "learning_rate": 3.960932369764747e-05, + "loss": 0.5045, + "step": 6991 + }, + { + "epoch": 0.88, + "learning_rate": 3.953017647799506e-05, + "loss": 0.4633, + "step": 6992 + }, + { + "epoch": 0.88, + "learning_rate": 3.9451105158637534e-05, + "loss": 0.5135, + "step": 6993 + }, + { + "epoch": 0.88, + "learning_rate": 3.937210975260836e-05, + "loss": 0.4247, + "step": 6994 + }, + { + "epoch": 0.88, + "learning_rate": 3.92931902729286e-05, + "loss": 0.5542, + "step": 6995 + }, + { + "epoch": 0.88, + "learning_rate": 3.921434673260665e-05, + "loss": 0.5999, + "step": 6996 + }, + { + "epoch": 0.88, + "learning_rate": 3.9135579144638446e-05, + "loss": 0.4773, + "step": 6997 + }, + { + "epoch": 0.88, + "learning_rate": 3.905688752200748e-05, + "loss": 0.5255, + "step": 6998 + }, + { + "epoch": 0.88, + "learning_rate": 3.897827187768466e-05, + "loss": 0.5836, + "step": 6999 + }, + { + "epoch": 0.88, + "learning_rate": 3.889973222462839e-05, + "loss": 0.5146, + "step": 7000 + }, + { + "epoch": 0.88, + "learning_rate": 3.882126857578455e-05, + "loss": 0.6034, + "step": 7001 + }, + { + "epoch": 0.88, + "learning_rate": 3.874288094408657e-05, + "loss": 0.4791, + "step": 7002 + }, + { + "epoch": 0.88, + "learning_rate": 3.8664569342455045e-05, + "loss": 0.5352, + "step": 7003 + }, + { + "epoch": 0.88, + "learning_rate": 3.8586333783798445e-05, + "loss": 0.54, + "step": 7004 + }, + { + "epoch": 0.88, + "learning_rate": 3.850817428101239e-05, + "loss": 0.5624, + "step": 7005 + }, + { + "epoch": 0.88, + "learning_rate": 3.843009084698018e-05, + "loss": 0.4969, + "step": 7006 + }, + { + "epoch": 0.88, + "learning_rate": 3.835208349457242e-05, + "loss": 0.5409, + "step": 7007 + }, + { + "epoch": 0.88, + "learning_rate": 3.827415223664732e-05, + "loss": 0.5065, + "step": 7008 + }, + { + "epoch": 0.88, + "learning_rate": 3.8196297086050434e-05, + "loss": 0.6348, + "step": 7009 + }, + { + "epoch": 0.88, + "learning_rate": 3.811851805561478e-05, + "loss": 0.5365, + "step": 7010 + }, + { + "epoch": 0.88, + "learning_rate": 3.804081515816082e-05, + "loss": 0.5594, + "step": 7011 + }, + { + "epoch": 0.88, + "learning_rate": 3.79631884064966e-05, + "loss": 0.5748, + "step": 7012 + }, + { + "epoch": 0.88, + "learning_rate": 3.788563781341742e-05, + "loss": 0.4263, + "step": 7013 + }, + { + "epoch": 0.88, + "learning_rate": 3.780816339170617e-05, + "loss": 0.5748, + "step": 7014 + }, + { + "epoch": 0.88, + "learning_rate": 3.773076515413315e-05, + "loss": 0.4825, + "step": 7015 + }, + { + "epoch": 0.88, + "learning_rate": 3.7653443113456084e-05, + "loss": 0.0471, + "step": 7016 + }, + { + "epoch": 0.88, + "learning_rate": 3.757619728242018e-05, + "loss": 0.578, + "step": 7017 + }, + { + "epoch": 0.88, + "learning_rate": 3.74990276737579e-05, + "loss": 0.554, + "step": 7018 + }, + { + "epoch": 0.88, + "learning_rate": 3.742193430018942e-05, + "loss": 0.4614, + "step": 7019 + }, + { + "epoch": 0.88, + "learning_rate": 3.7344917174422124e-05, + "loss": 0.4791, + "step": 7020 + }, + { + "epoch": 0.88, + "learning_rate": 3.726797630915107e-05, + "loss": 0.5977, + "step": 7021 + }, + { + "epoch": 0.88, + "learning_rate": 3.719111171705841e-05, + "loss": 0.5197, + "step": 7022 + }, + { + "epoch": 0.88, + "learning_rate": 3.711432341081406e-05, + "loss": 0.5071, + "step": 7023 + }, + { + "epoch": 0.88, + "learning_rate": 3.70376114030751e-05, + "loss": 0.5928, + "step": 7024 + }, + { + "epoch": 0.88, + "learning_rate": 3.696097570648627e-05, + "loss": 0.4669, + "step": 7025 + }, + { + "epoch": 0.88, + "learning_rate": 3.688441633367951e-05, + "loss": 0.542, + "step": 7026 + }, + { + "epoch": 0.88, + "learning_rate": 3.680793329727422e-05, + "loss": 0.6603, + "step": 7027 + }, + { + "epoch": 0.88, + "learning_rate": 3.6731526609877386e-05, + "loss": 0.4948, + "step": 7028 + }, + { + "epoch": 0.88, + "learning_rate": 3.6655196284083314e-05, + "loss": 0.4817, + "step": 7029 + }, + { + "epoch": 0.88, + "learning_rate": 3.657894233247361e-05, + "loss": 0.5443, + "step": 7030 + }, + { + "epoch": 0.88, + "learning_rate": 3.6502764767617544e-05, + "loss": 0.519, + "step": 7031 + }, + { + "epoch": 0.88, + "learning_rate": 3.642666360207142e-05, + "loss": 0.5389, + "step": 7032 + }, + { + "epoch": 0.88, + "learning_rate": 3.6350638848379304e-05, + "loss": 0.469, + "step": 7033 + }, + { + "epoch": 0.88, + "learning_rate": 3.627469051907251e-05, + "loss": 0.5497, + "step": 7034 + }, + { + "epoch": 0.88, + "learning_rate": 3.6198818626669797e-05, + "loss": 0.5674, + "step": 7035 + }, + { + "epoch": 0.88, + "learning_rate": 3.612302318367722e-05, + "loss": 0.4238, + "step": 7036 + }, + { + "epoch": 0.88, + "learning_rate": 3.6047304202588394e-05, + "loss": 0.4681, + "step": 7037 + }, + { + "epoch": 0.88, + "learning_rate": 3.597166169588423e-05, + "loss": 0.5311, + "step": 7038 + }, + { + "epoch": 0.88, + "learning_rate": 3.589609567603313e-05, + "loss": 0.5681, + "step": 7039 + }, + { + "epoch": 0.88, + "learning_rate": 3.5820606155490655e-05, + "loss": 0.4783, + "step": 7040 + }, + { + "epoch": 0.88, + "learning_rate": 3.574519314669999e-05, + "loss": 0.5117, + "step": 7041 + }, + { + "epoch": 0.88, + "learning_rate": 3.566985666209166e-05, + "loss": 0.5009, + "step": 7042 + }, + { + "epoch": 0.88, + "learning_rate": 3.559459671408349e-05, + "loss": 0.5166, + "step": 7043 + }, + { + "epoch": 0.88, + "learning_rate": 3.5519413315080854e-05, + "loss": 0.5277, + "step": 7044 + }, + { + "epoch": 0.88, + "learning_rate": 3.544430647747632e-05, + "loss": 0.5634, + "step": 7045 + }, + { + "epoch": 0.88, + "learning_rate": 3.5369276213649995e-05, + "loss": 0.0473, + "step": 7046 + }, + { + "epoch": 0.88, + "learning_rate": 3.52943225359692e-05, + "loss": 0.5579, + "step": 7047 + }, + { + "epoch": 0.88, + "learning_rate": 3.5219445456788734e-05, + "loss": 0.4886, + "step": 7048 + }, + { + "epoch": 0.88, + "learning_rate": 3.514464498845077e-05, + "loss": 0.4241, + "step": 7049 + }, + { + "epoch": 0.88, + "learning_rate": 3.506992114328483e-05, + "loss": 0.4299, + "step": 7050 + }, + { + "epoch": 0.88, + "learning_rate": 3.499527393360791e-05, + "loss": 0.5963, + "step": 7051 + }, + { + "epoch": 0.88, + "learning_rate": 3.492070337172415e-05, + "loss": 0.5048, + "step": 7052 + }, + { + "epoch": 0.88, + "learning_rate": 3.484620946992534e-05, + "loss": 0.4784, + "step": 7053 + }, + { + "epoch": 0.88, + "learning_rate": 3.4771792240490316e-05, + "loss": 0.4829, + "step": 7054 + }, + { + "epoch": 0.88, + "learning_rate": 3.46974516956855e-05, + "loss": 0.5463, + "step": 7055 + }, + { + "epoch": 0.88, + "learning_rate": 3.462318784776458e-05, + "loss": 0.4729, + "step": 7056 + }, + { + "epoch": 0.88, + "learning_rate": 3.4549000708968716e-05, + "loss": 0.5232, + "step": 7057 + }, + { + "epoch": 0.88, + "learning_rate": 3.447489029152634e-05, + "loss": 0.5399, + "step": 7058 + }, + { + "epoch": 0.88, + "learning_rate": 3.440085660765319e-05, + "loss": 0.5851, + "step": 7059 + }, + { + "epoch": 0.88, + "learning_rate": 3.432689966955249e-05, + "loss": 0.5396, + "step": 7060 + }, + { + "epoch": 0.89, + "learning_rate": 3.4253019489414564e-05, + "loss": 0.5437, + "step": 7061 + }, + { + "epoch": 0.89, + "learning_rate": 3.417921607941737e-05, + "loss": 0.5199, + "step": 7062 + }, + { + "epoch": 0.89, + "learning_rate": 3.410548945172609e-05, + "loss": 0.45, + "step": 7063 + }, + { + "epoch": 0.89, + "learning_rate": 3.403183961849332e-05, + "loss": 0.5455, + "step": 7064 + }, + { + "epoch": 0.89, + "learning_rate": 3.3958266591858746e-05, + "loss": 0.6049, + "step": 7065 + }, + { + "epoch": 0.89, + "learning_rate": 3.388477038394972e-05, + "loss": 0.4841, + "step": 7066 + }, + { + "epoch": 0.89, + "learning_rate": 3.3811351006880766e-05, + "loss": 0.4969, + "step": 7067 + }, + { + "epoch": 0.89, + "learning_rate": 3.373800847275377e-05, + "loss": 0.4417, + "step": 7068 + }, + { + "epoch": 0.89, + "learning_rate": 3.366474279365789e-05, + "loss": 0.4718, + "step": 7069 + }, + { + "epoch": 0.89, + "learning_rate": 3.3591553981669746e-05, + "loss": 0.0477, + "step": 7070 + }, + { + "epoch": 0.89, + "learning_rate": 3.351844204885318e-05, + "loss": 0.0474, + "step": 7071 + }, + { + "epoch": 0.89, + "learning_rate": 3.34454070072594e-05, + "loss": 0.4321, + "step": 7072 + }, + { + "epoch": 0.89, + "learning_rate": 3.337244886892693e-05, + "loss": 0.7078, + "step": 7073 + }, + { + "epoch": 0.89, + "learning_rate": 3.3299567645881655e-05, + "loss": 0.5551, + "step": 7074 + }, + { + "epoch": 0.89, + "learning_rate": 3.3226763350136855e-05, + "loss": 0.566, + "step": 7075 + }, + { + "epoch": 0.89, + "learning_rate": 3.315403599369282e-05, + "loss": 0.4943, + "step": 7076 + }, + { + "epoch": 0.89, + "learning_rate": 3.308138558853746e-05, + "loss": 0.3923, + "step": 7077 + }, + { + "epoch": 0.89, + "learning_rate": 3.3008812146645914e-05, + "loss": 0.4531, + "step": 7078 + }, + { + "epoch": 0.89, + "learning_rate": 3.293631567998062e-05, + "loss": 0.4887, + "step": 7079 + }, + { + "epoch": 0.89, + "learning_rate": 3.28638962004914e-05, + "loss": 0.4315, + "step": 7080 + }, + { + "epoch": 0.89, + "learning_rate": 3.279155372011522e-05, + "loss": 0.5648, + "step": 7081 + }, + { + "epoch": 0.89, + "learning_rate": 3.271928825077652e-05, + "loss": 0.4324, + "step": 7082 + }, + { + "epoch": 0.89, + "learning_rate": 3.264709980438701e-05, + "loss": 0.4085, + "step": 7083 + }, + { + "epoch": 0.89, + "learning_rate": 3.257498839284556e-05, + "loss": 0.5468, + "step": 7084 + }, + { + "epoch": 0.89, + "learning_rate": 3.25029540280386e-05, + "loss": 0.4817, + "step": 7085 + }, + { + "epoch": 0.89, + "learning_rate": 3.243099672183958e-05, + "loss": 0.5594, + "step": 7086 + }, + { + "epoch": 0.89, + "learning_rate": 3.235911648610951e-05, + "loss": 0.5249, + "step": 7087 + }, + { + "epoch": 0.89, + "learning_rate": 3.228731333269646e-05, + "loss": 0.5889, + "step": 7088 + }, + { + "epoch": 0.89, + "learning_rate": 3.221558727343604e-05, + "loss": 0.4525, + "step": 7089 + }, + { + "epoch": 0.89, + "learning_rate": 3.2143938320151e-05, + "loss": 0.527, + "step": 7090 + }, + { + "epoch": 0.89, + "learning_rate": 3.207236648465123e-05, + "loss": 0.4465, + "step": 7091 + }, + { + "epoch": 0.89, + "learning_rate": 3.200087177873423e-05, + "loss": 0.4889, + "step": 7092 + }, + { + "epoch": 0.89, + "learning_rate": 3.192945421418464e-05, + "loss": 0.4692, + "step": 7093 + }, + { + "epoch": 0.89, + "learning_rate": 3.185811380277431e-05, + "loss": 0.423, + "step": 7094 + }, + { + "epoch": 0.89, + "learning_rate": 3.178685055626252e-05, + "loss": 0.4413, + "step": 7095 + }, + { + "epoch": 0.89, + "learning_rate": 3.171566448639568e-05, + "loss": 0.5251, + "step": 7096 + }, + { + "epoch": 0.89, + "learning_rate": 3.16445556049077e-05, + "loss": 0.4458, + "step": 7097 + }, + { + "epoch": 0.89, + "learning_rate": 3.157352392351942e-05, + "loss": 0.4506, + "step": 7098 + }, + { + "epoch": 0.89, + "learning_rate": 3.1502569453939255e-05, + "loss": 0.4915, + "step": 7099 + }, + { + "epoch": 0.89, + "learning_rate": 3.1431692207862736e-05, + "loss": 0.4843, + "step": 7100 + }, + { + "epoch": 0.89, + "learning_rate": 3.13608921969728e-05, + "loss": 0.5009, + "step": 7101 + }, + { + "epoch": 0.89, + "learning_rate": 3.1290169432939555e-05, + "loss": 0.5388, + "step": 7102 + }, + { + "epoch": 0.89, + "learning_rate": 3.1219523927420336e-05, + "loss": 0.473, + "step": 7103 + }, + { + "epoch": 0.89, + "learning_rate": 3.114895569205994e-05, + "loss": 0.4861, + "step": 7104 + }, + { + "epoch": 0.89, + "learning_rate": 3.107846473849013e-05, + "loss": 0.5449, + "step": 7105 + }, + { + "epoch": 0.89, + "learning_rate": 3.1008051078330156e-05, + "loss": 0.4471, + "step": 7106 + }, + { + "epoch": 0.89, + "learning_rate": 3.093771472318652e-05, + "loss": 0.5972, + "step": 7107 + }, + { + "epoch": 0.89, + "learning_rate": 3.086745568465288e-05, + "loss": 0.5986, + "step": 7108 + }, + { + "epoch": 0.89, + "learning_rate": 3.079727397431015e-05, + "loss": 0.5055, + "step": 7109 + }, + { + "epoch": 0.89, + "learning_rate": 3.072716960372657e-05, + "loss": 0.6194, + "step": 7110 + }, + { + "epoch": 0.89, + "learning_rate": 3.065714258445756e-05, + "loss": 0.5391, + "step": 7111 + }, + { + "epoch": 0.89, + "learning_rate": 3.058719292804601e-05, + "loss": 0.5007, + "step": 7112 + }, + { + "epoch": 0.89, + "learning_rate": 3.0517320646021696e-05, + "loss": 0.4648, + "step": 7113 + }, + { + "epoch": 0.89, + "learning_rate": 3.0447525749901895e-05, + "loss": 0.4258, + "step": 7114 + }, + { + "epoch": 0.89, + "learning_rate": 3.037780825119102e-05, + "loss": 0.5281, + "step": 7115 + }, + { + "epoch": 0.89, + "learning_rate": 3.030816816138082e-05, + "loss": 0.569, + "step": 7116 + }, + { + "epoch": 0.89, + "learning_rate": 3.023860549195018e-05, + "loss": 0.4841, + "step": 7117 + }, + { + "epoch": 0.89, + "learning_rate": 3.0169120254365302e-05, + "loss": 0.491, + "step": 7118 + }, + { + "epoch": 0.89, + "learning_rate": 3.0099712460079696e-05, + "loss": 0.5461, + "step": 7119 + }, + { + "epoch": 0.89, + "learning_rate": 3.003038212053383e-05, + "loss": 0.5118, + "step": 7120 + }, + { + "epoch": 0.89, + "learning_rate": 2.9961129247155662e-05, + "loss": 0.5049, + "step": 7121 + }, + { + "epoch": 0.89, + "learning_rate": 2.989195385136029e-05, + "loss": 0.505, + "step": 7122 + }, + { + "epoch": 0.89, + "learning_rate": 2.9822855944550088e-05, + "loss": 0.735, + "step": 7123 + }, + { + "epoch": 0.89, + "learning_rate": 2.9753835538114616e-05, + "loss": 0.5364, + "step": 7124 + }, + { + "epoch": 0.89, + "learning_rate": 2.9684892643430607e-05, + "loss": 0.4746, + "step": 7125 + }, + { + "epoch": 0.89, + "learning_rate": 2.961602727186219e-05, + "loss": 0.5854, + "step": 7126 + }, + { + "epoch": 0.89, + "learning_rate": 2.9547239434760454e-05, + "loss": 0.5121, + "step": 7127 + }, + { + "epoch": 0.89, + "learning_rate": 2.947852914346394e-05, + "loss": 0.4805, + "step": 7128 + }, + { + "epoch": 0.89, + "learning_rate": 2.9409896409298307e-05, + "loss": 0.4629, + "step": 7129 + }, + { + "epoch": 0.89, + "learning_rate": 2.934134124357646e-05, + "loss": 0.5198, + "step": 7130 + }, + { + "epoch": 0.89, + "learning_rate": 2.9272863657598516e-05, + "loss": 0.5398, + "step": 7131 + }, + { + "epoch": 0.89, + "learning_rate": 2.920446366265178e-05, + "loss": 0.4598, + "step": 7132 + }, + { + "epoch": 0.89, + "learning_rate": 2.913614127001074e-05, + "loss": 0.4723, + "step": 7133 + }, + { + "epoch": 0.89, + "learning_rate": 2.906789649093722e-05, + "loss": 0.5398, + "step": 7134 + }, + { + "epoch": 0.89, + "learning_rate": 2.8999729336680047e-05, + "loss": 0.5722, + "step": 7135 + }, + { + "epoch": 0.89, + "learning_rate": 2.8931639818475465e-05, + "loss": 0.5106, + "step": 7136 + }, + { + "epoch": 0.89, + "learning_rate": 2.886362794754671e-05, + "loss": 0.5603, + "step": 7137 + }, + { + "epoch": 0.89, + "learning_rate": 2.879569373510449e-05, + "loss": 0.4268, + "step": 7138 + }, + { + "epoch": 0.89, + "learning_rate": 2.872783719234645e-05, + "loss": 0.4662, + "step": 7139 + }, + { + "epoch": 0.89, + "learning_rate": 2.8660058330457594e-05, + "loss": 0.6024, + "step": 7140 + }, + { + "epoch": 0.9, + "learning_rate": 2.8592357160610095e-05, + "loss": 0.5979, + "step": 7141 + }, + { + "epoch": 0.9, + "learning_rate": 2.8524733693963135e-05, + "loss": 0.0476, + "step": 7142 + }, + { + "epoch": 0.9, + "learning_rate": 2.845718794166341e-05, + "loss": 0.5249, + "step": 7143 + }, + { + "epoch": 0.9, + "learning_rate": 2.8389719914844513e-05, + "loss": 0.652, + "step": 7144 + }, + { + "epoch": 0.9, + "learning_rate": 2.8322329624627497e-05, + "loss": 0.4974, + "step": 7145 + }, + { + "epoch": 0.9, + "learning_rate": 2.8255017082120305e-05, + "loss": 0.7131, + "step": 7146 + }, + { + "epoch": 0.9, + "learning_rate": 2.8187782298418397e-05, + "loss": 0.4882, + "step": 7147 + }, + { + "epoch": 0.9, + "learning_rate": 2.8120625284604075e-05, + "loss": 0.0476, + "step": 7148 + }, + { + "epoch": 0.9, + "learning_rate": 2.8053546051747036e-05, + "loss": 0.4712, + "step": 7149 + }, + { + "epoch": 0.9, + "learning_rate": 2.7986544610904106e-05, + "loss": 0.5563, + "step": 7150 + }, + { + "epoch": 0.9, + "learning_rate": 2.7919620973119342e-05, + "loss": 0.4359, + "step": 7151 + }, + { + "epoch": 0.9, + "learning_rate": 2.7852775149423802e-05, + "loss": 0.6149, + "step": 7152 + }, + { + "epoch": 0.9, + "learning_rate": 2.778600715083596e-05, + "loss": 0.5984, + "step": 7153 + }, + { + "epoch": 0.9, + "learning_rate": 2.771931698836122e-05, + "loss": 0.4167, + "step": 7154 + }, + { + "epoch": 0.9, + "learning_rate": 2.7652704672992303e-05, + "loss": 0.5569, + "step": 7155 + }, + { + "epoch": 0.9, + "learning_rate": 2.758617021570925e-05, + "loss": 0.5353, + "step": 7156 + }, + { + "epoch": 0.9, + "learning_rate": 2.7519713627478794e-05, + "loss": 0.4122, + "step": 7157 + }, + { + "epoch": 0.9, + "learning_rate": 2.745333491925528e-05, + "loss": 0.3697, + "step": 7158 + }, + { + "epoch": 0.9, + "learning_rate": 2.7387034101980067e-05, + "loss": 0.4937, + "step": 7159 + }, + { + "epoch": 0.9, + "learning_rate": 2.7320811186581695e-05, + "loss": 0.4448, + "step": 7160 + }, + { + "epoch": 0.9, + "learning_rate": 2.725466618397576e-05, + "loss": 0.533, + "step": 7161 + }, + { + "epoch": 0.9, + "learning_rate": 2.7188599105065103e-05, + "loss": 0.4833, + "step": 7162 + }, + { + "epoch": 0.9, + "learning_rate": 2.712260996073984e-05, + "loss": 0.4998, + "step": 7163 + }, + { + "epoch": 0.9, + "learning_rate": 2.7056698761876997e-05, + "loss": 0.4614, + "step": 7164 + }, + { + "epoch": 0.9, + "learning_rate": 2.6990865519340823e-05, + "loss": 0.494, + "step": 7165 + }, + { + "epoch": 0.9, + "learning_rate": 2.6925110243982807e-05, + "loss": 0.6734, + "step": 7166 + }, + { + "epoch": 0.9, + "learning_rate": 2.6859432946641616e-05, + "loss": 0.4335, + "step": 7167 + }, + { + "epoch": 0.9, + "learning_rate": 2.6793833638142918e-05, + "loss": 0.5505, + "step": 7168 + }, + { + "epoch": 0.9, + "learning_rate": 2.6728312329299565e-05, + "loss": 0.5427, + "step": 7169 + }, + { + "epoch": 0.9, + "learning_rate": 2.6662869030911753e-05, + "loss": 0.5465, + "step": 7170 + }, + { + "epoch": 0.9, + "learning_rate": 2.659750375376646e-05, + "loss": 0.4781, + "step": 7171 + }, + { + "epoch": 0.9, + "learning_rate": 2.653221650863802e-05, + "loss": 0.5312, + "step": 7172 + }, + { + "epoch": 0.9, + "learning_rate": 2.6467007306287983e-05, + "loss": 0.4429, + "step": 7173 + }, + { + "epoch": 0.9, + "learning_rate": 2.640187615746481e-05, + "loss": 0.561, + "step": 7174 + }, + { + "epoch": 0.9, + "learning_rate": 2.6336823072904303e-05, + "loss": 0.5325, + "step": 7175 + }, + { + "epoch": 0.9, + "learning_rate": 2.6271848063329275e-05, + "loss": 0.4852, + "step": 7176 + }, + { + "epoch": 0.9, + "learning_rate": 2.6206951139449708e-05, + "loss": 0.541, + "step": 7177 + }, + { + "epoch": 0.9, + "learning_rate": 2.614213231196283e-05, + "loss": 0.5155, + "step": 7178 + }, + { + "epoch": 0.9, + "learning_rate": 2.6077391591552646e-05, + "loss": 0.0475, + "step": 7179 + }, + { + "epoch": 0.9, + "learning_rate": 2.6012728988890677e-05, + "loss": 0.5677, + "step": 7180 + }, + { + "epoch": 0.9, + "learning_rate": 2.5948144514635342e-05, + "loss": 0.4897, + "step": 7181 + }, + { + "epoch": 0.9, + "learning_rate": 2.588363817943229e-05, + "loss": 0.5289, + "step": 7182 + }, + { + "epoch": 0.9, + "learning_rate": 2.5819209993914183e-05, + "loss": 0.4424, + "step": 7183 + }, + { + "epoch": 0.9, + "learning_rate": 2.575485996870097e-05, + "loss": 0.6376, + "step": 7184 + }, + { + "epoch": 0.9, + "learning_rate": 2.5690588114399614e-05, + "loss": 0.5007, + "step": 7185 + }, + { + "epoch": 0.9, + "learning_rate": 2.5626394441604028e-05, + "loss": 0.4235, + "step": 7186 + }, + { + "epoch": 0.9, + "learning_rate": 2.5562278960895525e-05, + "loss": 0.533, + "step": 7187 + }, + { + "epoch": 0.9, + "learning_rate": 2.5498241682842425e-05, + "loss": 0.4917, + "step": 7188 + }, + { + "epoch": 0.9, + "learning_rate": 2.5434282618000072e-05, + "loss": 0.0474, + "step": 7189 + }, + { + "epoch": 0.9, + "learning_rate": 2.5370401776911077e-05, + "loss": 0.4635, + "step": 7190 + }, + { + "epoch": 0.9, + "learning_rate": 2.530659917010497e-05, + "loss": 0.5217, + "step": 7191 + }, + { + "epoch": 0.9, + "learning_rate": 2.524287480809845e-05, + "loss": 0.4687, + "step": 7192 + }, + { + "epoch": 0.9, + "learning_rate": 2.5179228701395496e-05, + "loss": 0.519, + "step": 7193 + }, + { + "epoch": 0.9, + "learning_rate": 2.5115660860487e-05, + "loss": 0.4647, + "step": 7194 + }, + { + "epoch": 0.9, + "learning_rate": 2.5052171295850856e-05, + "loss": 0.4982, + "step": 7195 + }, + { + "epoch": 0.9, + "learning_rate": 2.4988760017952305e-05, + "loss": 0.6697, + "step": 7196 + }, + { + "epoch": 0.9, + "learning_rate": 2.49254270372436e-05, + "loss": 0.5251, + "step": 7197 + }, + { + "epoch": 0.9, + "learning_rate": 2.486217236416399e-05, + "loss": 0.556, + "step": 7198 + }, + { + "epoch": 0.9, + "learning_rate": 2.479899600913993e-05, + "loss": 0.4938, + "step": 7199 + }, + { + "epoch": 0.9, + "learning_rate": 2.4735897982584965e-05, + "loss": 0.6381, + "step": 7200 + }, + { + "epoch": 0.9, + "learning_rate": 2.4672878294899557e-05, + "loss": 0.4574, + "step": 7201 + }, + { + "epoch": 0.9, + "learning_rate": 2.4609936956471513e-05, + "loss": 0.51, + "step": 7202 + }, + { + "epoch": 0.9, + "learning_rate": 2.4547073977675528e-05, + "loss": 0.473, + "step": 7203 + }, + { + "epoch": 0.9, + "learning_rate": 2.4484289368873425e-05, + "loss": 0.6301, + "step": 7204 + }, + { + "epoch": 0.9, + "learning_rate": 2.442158314041426e-05, + "loss": 0.4755, + "step": 7205 + }, + { + "epoch": 0.9, + "learning_rate": 2.435895530263388e-05, + "loss": 0.5601, + "step": 7206 + }, + { + "epoch": 0.9, + "learning_rate": 2.4296405865855575e-05, + "loss": 0.5276, + "step": 7207 + }, + { + "epoch": 0.9, + "learning_rate": 2.4233934840389327e-05, + "loss": 0.5347, + "step": 7208 + }, + { + "epoch": 0.9, + "learning_rate": 2.4171542236532452e-05, + "loss": 0.4784, + "step": 7209 + }, + { + "epoch": 0.9, + "learning_rate": 2.4109228064569277e-05, + "loss": 0.5704, + "step": 7210 + }, + { + "epoch": 0.9, + "learning_rate": 2.4046992334771145e-05, + "loss": 0.3981, + "step": 7211 + }, + { + "epoch": 0.9, + "learning_rate": 2.398483505739657e-05, + "loss": 0.5342, + "step": 7212 + }, + { + "epoch": 0.9, + "learning_rate": 2.3922756242691023e-05, + "loss": 0.5736, + "step": 7213 + }, + { + "epoch": 0.9, + "learning_rate": 2.386075590088721e-05, + "loss": 0.5436, + "step": 7214 + }, + { + "epoch": 0.9, + "learning_rate": 2.379883404220462e-05, + "loss": 0.4602, + "step": 7215 + }, + { + "epoch": 0.9, + "learning_rate": 2.373699067685009e-05, + "loss": 0.5464, + "step": 7216 + }, + { + "epoch": 0.9, + "learning_rate": 2.3675225815017353e-05, + "loss": 0.4824, + "step": 7217 + }, + { + "epoch": 0.9, + "learning_rate": 2.3613539466887268e-05, + "loss": 0.5249, + "step": 7218 + }, + { + "epoch": 0.9, + "learning_rate": 2.35519316426277e-05, + "loss": 0.4849, + "step": 7219 + }, + { + "epoch": 0.9, + "learning_rate": 2.3490402352393692e-05, + "loss": 0.5184, + "step": 7220 + }, + { + "epoch": 0.91, + "learning_rate": 2.3428951606327242e-05, + "loss": 0.4653, + "step": 7221 + }, + { + "epoch": 0.91, + "learning_rate": 2.3367579414557415e-05, + "loss": 0.4571, + "step": 7222 + }, + { + "epoch": 0.91, + "learning_rate": 2.3306285787200288e-05, + "loss": 0.5405, + "step": 7223 + }, + { + "epoch": 0.91, + "learning_rate": 2.3245070734359e-05, + "loss": 0.5479, + "step": 7224 + }, + { + "epoch": 0.91, + "learning_rate": 2.3183934266123806e-05, + "loss": 0.506, + "step": 7225 + }, + { + "epoch": 0.91, + "learning_rate": 2.312287639257199e-05, + "loss": 0.5688, + "step": 7226 + }, + { + "epoch": 0.91, + "learning_rate": 2.3061897123767884e-05, + "loss": 0.541, + "step": 7227 + }, + { + "epoch": 0.91, + "learning_rate": 2.300099646976278e-05, + "loss": 0.5088, + "step": 7228 + }, + { + "epoch": 0.91, + "learning_rate": 2.294017444059515e-05, + "loss": 0.4906, + "step": 7229 + }, + { + "epoch": 0.91, + "learning_rate": 2.2879431046290365e-05, + "loss": 0.5636, + "step": 7230 + }, + { + "epoch": 0.91, + "learning_rate": 2.281876629686086e-05, + "loss": 0.5009, + "step": 7231 + }, + { + "epoch": 0.91, + "learning_rate": 2.275818020230619e-05, + "loss": 0.0473, + "step": 7232 + }, + { + "epoch": 0.91, + "learning_rate": 2.2697672772612976e-05, + "loss": 0.5168, + "step": 7233 + }, + { + "epoch": 0.91, + "learning_rate": 2.263724401775469e-05, + "loss": 0.4785, + "step": 7234 + }, + { + "epoch": 0.91, + "learning_rate": 2.257689394769191e-05, + "loss": 0.4204, + "step": 7235 + }, + { + "epoch": 0.91, + "learning_rate": 2.2516622572372415e-05, + "loss": 0.5363, + "step": 7236 + }, + { + "epoch": 0.91, + "learning_rate": 2.24564299017308e-05, + "loss": 0.3792, + "step": 7237 + }, + { + "epoch": 0.91, + "learning_rate": 2.239631594568875e-05, + "loss": 0.5697, + "step": 7238 + }, + { + "epoch": 0.91, + "learning_rate": 2.2336280714155e-05, + "loss": 0.4844, + "step": 7239 + }, + { + "epoch": 0.91, + "learning_rate": 2.2276324217025245e-05, + "loss": 0.625, + "step": 7240 + }, + { + "epoch": 0.91, + "learning_rate": 2.2216446464182304e-05, + "loss": 0.5667, + "step": 7241 + }, + { + "epoch": 0.91, + "learning_rate": 2.2156647465496005e-05, + "loss": 0.4453, + "step": 7242 + }, + { + "epoch": 0.91, + "learning_rate": 2.2096927230823128e-05, + "loss": 0.463, + "step": 7243 + }, + { + "epoch": 0.91, + "learning_rate": 2.203728577000741e-05, + "loss": 0.5846, + "step": 7244 + }, + { + "epoch": 0.91, + "learning_rate": 2.1977723092879766e-05, + "loss": 0.4741, + "step": 7245 + }, + { + "epoch": 0.91, + "learning_rate": 2.1918239209258063e-05, + "loss": 0.5636, + "step": 7246 + }, + { + "epoch": 0.91, + "learning_rate": 2.185883412894707e-05, + "loss": 0.6113, + "step": 7247 + }, + { + "epoch": 0.91, + "learning_rate": 2.1799507861738787e-05, + "loss": 0.5315, + "step": 7248 + }, + { + "epoch": 0.91, + "learning_rate": 2.174026041741206e-05, + "loss": 0.4728, + "step": 7249 + }, + { + "epoch": 0.91, + "learning_rate": 2.1681091805732746e-05, + "loss": 0.533, + "step": 7250 + }, + { + "epoch": 0.91, + "learning_rate": 2.1622002036453814e-05, + "loss": 0.5353, + "step": 7251 + }, + { + "epoch": 0.91, + "learning_rate": 2.1562991119315035e-05, + "loss": 0.682, + "step": 7252 + }, + { + "epoch": 0.91, + "learning_rate": 2.1504059064043403e-05, + "loss": 0.4698, + "step": 7253 + }, + { + "epoch": 0.91, + "learning_rate": 2.1445205880352814e-05, + "loss": 0.5389, + "step": 7254 + }, + { + "epoch": 0.91, + "learning_rate": 2.1386431577944177e-05, + "loss": 0.5096, + "step": 7255 + }, + { + "epoch": 0.91, + "learning_rate": 2.1327736166505408e-05, + "loss": 0.4839, + "step": 7256 + }, + { + "epoch": 0.91, + "learning_rate": 2.1269119655711323e-05, + "loss": 0.4439, + "step": 7257 + }, + { + "epoch": 0.91, + "learning_rate": 2.121058205522397e-05, + "loss": 0.5776, + "step": 7258 + }, + { + "epoch": 0.91, + "learning_rate": 2.1152123374692132e-05, + "loss": 0.5652, + "step": 7259 + }, + { + "epoch": 0.91, + "learning_rate": 2.109374362375166e-05, + "loss": 0.5737, + "step": 7260 + }, + { + "epoch": 0.91, + "learning_rate": 2.1035442812025462e-05, + "loss": 0.5359, + "step": 7261 + }, + { + "epoch": 0.91, + "learning_rate": 2.097722094912341e-05, + "loss": 0.5223, + "step": 7262 + }, + { + "epoch": 0.91, + "learning_rate": 2.091907804464227e-05, + "loss": 0.4915, + "step": 7263 + }, + { + "epoch": 0.91, + "learning_rate": 2.0861014108165986e-05, + "loss": 0.4897, + "step": 7264 + }, + { + "epoch": 0.91, + "learning_rate": 2.0803029149265406e-05, + "loss": 0.54, + "step": 7265 + }, + { + "epoch": 0.91, + "learning_rate": 2.0745123177498104e-05, + "loss": 0.5365, + "step": 7266 + }, + { + "epoch": 0.91, + "learning_rate": 2.0687296202409002e-05, + "loss": 0.493, + "step": 7267 + }, + { + "epoch": 0.91, + "learning_rate": 2.062954823352986e-05, + "loss": 0.4775, + "step": 7268 + }, + { + "epoch": 0.91, + "learning_rate": 2.0571879280379345e-05, + "loss": 0.542, + "step": 7269 + }, + { + "epoch": 0.91, + "learning_rate": 2.0514289352463245e-05, + "loss": 0.4603, + "step": 7270 + }, + { + "epoch": 0.91, + "learning_rate": 2.045677845927413e-05, + "loss": 0.4107, + "step": 7271 + }, + { + "epoch": 0.91, + "learning_rate": 2.0399346610291747e-05, + "loss": 0.4984, + "step": 7272 + }, + { + "epoch": 0.91, + "learning_rate": 2.0341993814982752e-05, + "loss": 0.5873, + "step": 7273 + }, + { + "epoch": 0.91, + "learning_rate": 2.0284720082800636e-05, + "loss": 0.452, + "step": 7274 + }, + { + "epoch": 0.91, + "learning_rate": 2.0227525423185955e-05, + "loss": 0.5464, + "step": 7275 + }, + { + "epoch": 0.91, + "learning_rate": 2.0170409845566283e-05, + "loss": 0.5157, + "step": 7276 + }, + { + "epoch": 0.91, + "learning_rate": 2.0113373359356146e-05, + "loss": 0.5286, + "step": 7277 + }, + { + "epoch": 0.91, + "learning_rate": 2.005641597395691e-05, + "loss": 0.557, + "step": 7278 + }, + { + "epoch": 0.91, + "learning_rate": 1.9999537698757064e-05, + "loss": 0.4436, + "step": 7279 + }, + { + "epoch": 0.91, + "learning_rate": 1.9942738543131944e-05, + "loss": 0.453, + "step": 7280 + }, + { + "epoch": 0.91, + "learning_rate": 1.9886018516443948e-05, + "loss": 0.5887, + "step": 7281 + }, + { + "epoch": 0.91, + "learning_rate": 1.9829377628042266e-05, + "loss": 0.5016, + "step": 7282 + }, + { + "epoch": 0.91, + "learning_rate": 1.9772815887263207e-05, + "loss": 0.5629, + "step": 7283 + }, + { + "epoch": 0.91, + "learning_rate": 1.971633330342998e-05, + "loss": 0.5836, + "step": 7284 + }, + { + "epoch": 0.91, + "learning_rate": 1.965992988585269e-05, + "loss": 0.5768, + "step": 7285 + }, + { + "epoch": 0.91, + "learning_rate": 1.9603605643828513e-05, + "loss": 0.5077, + "step": 7286 + }, + { + "epoch": 0.91, + "learning_rate": 1.9547360586641473e-05, + "loss": 0.5859, + "step": 7287 + }, + { + "epoch": 0.91, + "learning_rate": 1.9491194723562534e-05, + "loss": 0.5559, + "step": 7288 + }, + { + "epoch": 0.91, + "learning_rate": 1.9435108063849684e-05, + "loss": 0.5798, + "step": 7289 + }, + { + "epoch": 0.91, + "learning_rate": 1.9379100616747747e-05, + "loss": 0.5114, + "step": 7290 + }, + { + "epoch": 0.91, + "learning_rate": 1.9323172391488676e-05, + "loss": 0.5479, + "step": 7291 + }, + { + "epoch": 0.91, + "learning_rate": 1.926732339729115e-05, + "loss": 0.4644, + "step": 7292 + }, + { + "epoch": 0.91, + "learning_rate": 1.9211553643360913e-05, + "loss": 0.5995, + "step": 7293 + }, + { + "epoch": 0.91, + "learning_rate": 1.9155863138890672e-05, + "loss": 0.4789, + "step": 7294 + }, + { + "epoch": 0.91, + "learning_rate": 1.9100251893060026e-05, + "loss": 0.5176, + "step": 7295 + }, + { + "epoch": 0.91, + "learning_rate": 1.9044719915035367e-05, + "loss": 0.4863, + "step": 7296 + }, + { + "epoch": 0.91, + "learning_rate": 1.898926721397032e-05, + "loss": 0.5013, + "step": 7297 + }, + { + "epoch": 0.91, + "learning_rate": 1.893389379900512e-05, + "loss": 0.511, + "step": 7298 + }, + { + "epoch": 0.91, + "learning_rate": 1.887859967926725e-05, + "loss": 0.427, + "step": 7299 + }, + { + "epoch": 0.92, + "learning_rate": 1.8823384863870864e-05, + "loss": 0.5275, + "step": 7300 + }, + { + "epoch": 0.92, + "learning_rate": 1.8768249361917234e-05, + "loss": 0.5678, + "step": 7301 + }, + { + "epoch": 0.92, + "learning_rate": 1.8713193182494424e-05, + "loss": 0.4761, + "step": 7302 + }, + { + "epoch": 0.92, + "learning_rate": 1.8658216334677402e-05, + "loss": 0.54, + "step": 7303 + }, + { + "epoch": 0.92, + "learning_rate": 1.860331882752825e-05, + "loss": 0.4581, + "step": 7304 + }, + { + "epoch": 0.92, + "learning_rate": 1.8548500670095724e-05, + "loss": 0.4301, + "step": 7305 + }, + { + "epoch": 0.92, + "learning_rate": 1.8493761871415772e-05, + "loss": 0.6289, + "step": 7306 + }, + { + "epoch": 0.92, + "learning_rate": 1.8439102440510946e-05, + "loss": 0.4353, + "step": 7307 + }, + { + "epoch": 0.92, + "learning_rate": 1.8384522386391043e-05, + "loss": 0.5504, + "step": 7308 + }, + { + "epoch": 0.92, + "learning_rate": 1.833002171805259e-05, + "loss": 0.739, + "step": 7309 + }, + { + "epoch": 0.92, + "learning_rate": 1.827560044447896e-05, + "loss": 0.5165, + "step": 7310 + }, + { + "epoch": 0.92, + "learning_rate": 1.8221258574640575e-05, + "loss": 0.5153, + "step": 7311 + }, + { + "epoch": 0.92, + "learning_rate": 1.816699611749473e-05, + "loss": 0.4342, + "step": 7312 + }, + { + "epoch": 0.92, + "learning_rate": 1.81128130819857e-05, + "loss": 0.5338, + "step": 7313 + }, + { + "epoch": 0.92, + "learning_rate": 1.8058709477044523e-05, + "loss": 0.5463, + "step": 7314 + }, + { + "epoch": 0.92, + "learning_rate": 1.800468531158922e-05, + "loss": 0.4935, + "step": 7315 + }, + { + "epoch": 0.92, + "learning_rate": 1.795074059452484e-05, + "loss": 0.5178, + "step": 7316 + }, + { + "epoch": 0.92, + "learning_rate": 1.7896875334743046e-05, + "loss": 0.4084, + "step": 7317 + }, + { + "epoch": 0.92, + "learning_rate": 1.784308954112268e-05, + "loss": 0.5411, + "step": 7318 + }, + { + "epoch": 0.92, + "learning_rate": 1.7789383222529267e-05, + "loss": 0.5753, + "step": 7319 + }, + { + "epoch": 0.92, + "learning_rate": 1.7735756387815495e-05, + "loss": 0.5277, + "step": 7320 + }, + { + "epoch": 0.92, + "learning_rate": 1.7682209045820684e-05, + "loss": 0.4346, + "step": 7321 + }, + { + "epoch": 0.92, + "learning_rate": 1.7628741205371224e-05, + "loss": 0.443, + "step": 7322 + }, + { + "epoch": 0.92, + "learning_rate": 1.7575352875280283e-05, + "loss": 0.4963, + "step": 7323 + }, + { + "epoch": 0.92, + "learning_rate": 1.7522044064348042e-05, + "loss": 0.5916, + "step": 7324 + }, + { + "epoch": 0.92, + "learning_rate": 1.7468814781361476e-05, + "loss": 0.4458, + "step": 7325 + }, + { + "epoch": 0.92, + "learning_rate": 1.7415665035094453e-05, + "loss": 0.0473, + "step": 7326 + }, + { + "epoch": 0.92, + "learning_rate": 1.7362594834307855e-05, + "loss": 0.449, + "step": 7327 + }, + { + "epoch": 0.92, + "learning_rate": 1.7309604187749293e-05, + "loss": 0.5256, + "step": 7328 + }, + { + "epoch": 0.92, + "learning_rate": 1.72566931041534e-05, + "loss": 0.4305, + "step": 7329 + }, + { + "epoch": 0.92, + "learning_rate": 1.720386159224163e-05, + "loss": 0.5203, + "step": 7330 + }, + { + "epoch": 0.92, + "learning_rate": 1.7151109660722308e-05, + "loss": 0.5972, + "step": 7331 + }, + { + "epoch": 0.92, + "learning_rate": 1.7098437318290584e-05, + "loss": 0.4836, + "step": 7332 + }, + { + "epoch": 0.92, + "learning_rate": 1.704584457362862e-05, + "loss": 0.4902, + "step": 7333 + }, + { + "epoch": 0.92, + "learning_rate": 1.699333143540538e-05, + "loss": 0.5385, + "step": 7334 + }, + { + "epoch": 0.92, + "learning_rate": 1.6940897912276765e-05, + "loss": 0.5931, + "step": 7335 + }, + { + "epoch": 0.92, + "learning_rate": 1.688854401288542e-05, + "loss": 0.4973, + "step": 7336 + }, + { + "epoch": 0.92, + "learning_rate": 1.683626974586111e-05, + "loss": 0.4196, + "step": 7337 + }, + { + "epoch": 0.92, + "learning_rate": 1.678407511982022e-05, + "loss": 0.5892, + "step": 7338 + }, + { + "epoch": 0.92, + "learning_rate": 1.6731960143366143e-05, + "loss": 0.5093, + "step": 7339 + }, + { + "epoch": 0.92, + "learning_rate": 1.6679924825089066e-05, + "loss": 0.5005, + "step": 7340 + }, + { + "epoch": 0.92, + "learning_rate": 1.6627969173566127e-05, + "loss": 0.6311, + "step": 7341 + }, + { + "epoch": 0.92, + "learning_rate": 1.657609319736125e-05, + "loss": 0.431, + "step": 7342 + }, + { + "epoch": 0.92, + "learning_rate": 1.6524296905025325e-05, + "loss": 0.5018, + "step": 7343 + }, + { + "epoch": 0.92, + "learning_rate": 1.647258030509602e-05, + "loss": 0.4846, + "step": 7344 + }, + { + "epoch": 0.92, + "learning_rate": 1.6420943406097954e-05, + "loss": 0.4664, + "step": 7345 + }, + { + "epoch": 0.92, + "learning_rate": 1.6369386216542548e-05, + "loss": 0.4846, + "step": 7346 + }, + { + "epoch": 0.92, + "learning_rate": 1.6317908744928e-05, + "loss": 0.5861, + "step": 7347 + }, + { + "epoch": 0.92, + "learning_rate": 1.6266510999739525e-05, + "loss": 0.5388, + "step": 7348 + }, + { + "epoch": 0.92, + "learning_rate": 1.6215192989449125e-05, + "loss": 0.4785, + "step": 7349 + }, + { + "epoch": 0.92, + "learning_rate": 1.616395472251564e-05, + "loss": 0.5008, + "step": 7350 + }, + { + "epoch": 0.92, + "learning_rate": 1.6112796207384818e-05, + "loss": 0.4497, + "step": 7351 + }, + { + "epoch": 0.92, + "learning_rate": 1.6061717452489245e-05, + "loss": 0.5232, + "step": 7352 + }, + { + "epoch": 0.92, + "learning_rate": 1.601071846624841e-05, + "loss": 0.4655, + "step": 7353 + }, + { + "epoch": 0.92, + "learning_rate": 1.5959799257068475e-05, + "loss": 0.4462, + "step": 7354 + }, + { + "epoch": 0.92, + "learning_rate": 1.590895983334256e-05, + "loss": 0.6841, + "step": 7355 + }, + { + "epoch": 0.92, + "learning_rate": 1.5858200203450744e-05, + "loss": 0.5138, + "step": 7356 + }, + { + "epoch": 0.92, + "learning_rate": 1.5807520375759776e-05, + "loss": 0.5303, + "step": 7357 + }, + { + "epoch": 0.92, + "learning_rate": 1.575692035862336e-05, + "loss": 0.4596, + "step": 7358 + }, + { + "epoch": 0.92, + "learning_rate": 1.5706400160382107e-05, + "loss": 0.5341, + "step": 7359 + }, + { + "epoch": 0.92, + "learning_rate": 1.5655959789363182e-05, + "loss": 0.6113, + "step": 7360 + }, + { + "epoch": 0.92, + "learning_rate": 1.5605599253880886e-05, + "loss": 0.6218, + "step": 7361 + }, + { + "epoch": 0.92, + "learning_rate": 1.55553185622363e-05, + "loss": 0.4509, + "step": 7362 + }, + { + "epoch": 0.92, + "learning_rate": 1.5505117722717288e-05, + "loss": 0.4261, + "step": 7363 + }, + { + "epoch": 0.92, + "learning_rate": 1.5454996743598514e-05, + "loss": 0.4929, + "step": 7364 + }, + { + "epoch": 0.92, + "learning_rate": 1.540495563314154e-05, + "loss": 0.5157, + "step": 7365 + }, + { + "epoch": 0.92, + "learning_rate": 1.535499439959481e-05, + "loss": 0.0477, + "step": 7366 + }, + { + "epoch": 0.92, + "learning_rate": 1.5305113051193474e-05, + "loss": 0.0476, + "step": 7367 + }, + { + "epoch": 0.92, + "learning_rate": 1.5255311596159726e-05, + "loss": 0.5303, + "step": 7368 + }, + { + "epoch": 0.92, + "learning_rate": 1.5205590042702332e-05, + "loss": 0.4971, + "step": 7369 + }, + { + "epoch": 0.92, + "learning_rate": 1.5155948399017016e-05, + "loss": 0.4944, + "step": 7370 + }, + { + "epoch": 0.92, + "learning_rate": 1.5106386673286344e-05, + "loss": 0.4999, + "step": 7371 + }, + { + "epoch": 0.92, + "learning_rate": 1.5056904873679722e-05, + "loss": 0.4817, + "step": 7372 + }, + { + "epoch": 0.92, + "learning_rate": 1.5007503008353296e-05, + "loss": 0.0479, + "step": 7373 + }, + { + "epoch": 0.92, + "learning_rate": 1.4958181085450107e-05, + "loss": 0.6525, + "step": 7374 + }, + { + "epoch": 0.92, + "learning_rate": 1.4908939113100095e-05, + "loss": 0.5256, + "step": 7375 + }, + { + "epoch": 0.92, + "learning_rate": 1.4859777099419769e-05, + "loss": 0.4471, + "step": 7376 + }, + { + "epoch": 0.92, + "learning_rate": 1.4810695052512646e-05, + "loss": 0.4855, + "step": 7377 + }, + { + "epoch": 0.92, + "learning_rate": 1.4761692980469144e-05, + "loss": 0.4789, + "step": 7378 + }, + { + "epoch": 0.92, + "learning_rate": 1.4712770891366246e-05, + "loss": 0.4797, + "step": 7379 + }, + { + "epoch": 0.93, + "learning_rate": 1.4663928793268e-05, + "loss": 0.3944, + "step": 7380 + }, + { + "epoch": 0.93, + "learning_rate": 1.4615166694225135e-05, + "loss": 0.458, + "step": 7381 + }, + { + "epoch": 0.93, + "learning_rate": 1.4566484602275221e-05, + "loss": 0.4886, + "step": 7382 + }, + { + "epoch": 0.93, + "learning_rate": 1.4517882525442616e-05, + "loss": 0.4791, + "step": 7383 + }, + { + "epoch": 0.93, + "learning_rate": 1.4469360471738469e-05, + "loss": 0.6072, + "step": 7384 + }, + { + "epoch": 0.93, + "learning_rate": 1.4420918449160881e-05, + "loss": 0.5259, + "step": 7385 + }, + { + "epoch": 0.93, + "learning_rate": 1.4372556465694576e-05, + "loss": 0.4976, + "step": 7386 + }, + { + "epoch": 0.93, + "learning_rate": 1.4324274529311233e-05, + "loss": 0.6414, + "step": 7387 + }, + { + "epoch": 0.93, + "learning_rate": 1.4276072647969207e-05, + "loss": 0.4382, + "step": 7388 + }, + { + "epoch": 0.93, + "learning_rate": 1.4227950829613811e-05, + "loss": 0.5936, + "step": 7389 + }, + { + "epoch": 0.93, + "learning_rate": 1.4179909082177033e-05, + "loss": 0.4551, + "step": 7390 + }, + { + "epoch": 0.93, + "learning_rate": 1.4131947413577705e-05, + "loss": 0.5255, + "step": 7391 + }, + { + "epoch": 0.93, + "learning_rate": 1.4084065831721394e-05, + "loss": 0.6429, + "step": 7392 + }, + { + "epoch": 0.93, + "learning_rate": 1.4036264344500615e-05, + "loss": 0.5874, + "step": 7393 + }, + { + "epoch": 0.93, + "learning_rate": 1.3988542959794625e-05, + "loss": 0.5242, + "step": 7394 + }, + { + "epoch": 0.93, + "learning_rate": 1.3940901685469298e-05, + "loss": 0.5834, + "step": 7395 + }, + { + "epoch": 0.93, + "learning_rate": 1.3893340529377629e-05, + "loss": 0.5349, + "step": 7396 + }, + { + "epoch": 0.93, + "learning_rate": 1.384585949935918e-05, + "loss": 0.5491, + "step": 7397 + }, + { + "epoch": 0.93, + "learning_rate": 1.379845860324025e-05, + "loss": 0.4983, + "step": 7398 + }, + { + "epoch": 0.93, + "learning_rate": 1.3751137848834138e-05, + "loss": 0.5331, + "step": 7399 + }, + { + "epoch": 0.93, + "learning_rate": 1.3703897243940833e-05, + "loss": 0.5687, + "step": 7400 + }, + { + "epoch": 0.93, + "learning_rate": 1.3656736796347102e-05, + "loss": 0.5497, + "step": 7401 + }, + { + "epoch": 0.93, + "learning_rate": 1.3609656513826563e-05, + "loss": 0.5791, + "step": 7402 + }, + { + "epoch": 0.93, + "learning_rate": 1.3562656404139395e-05, + "loss": 0.5211, + "step": 7403 + }, + { + "epoch": 0.93, + "learning_rate": 1.3515736475032847e-05, + "loss": 0.3898, + "step": 7404 + }, + { + "epoch": 0.93, + "learning_rate": 1.3468896734240898e-05, + "loss": 0.5083, + "step": 7405 + }, + { + "epoch": 0.93, + "learning_rate": 1.3422137189484207e-05, + "loss": 0.4465, + "step": 7406 + }, + { + "epoch": 0.93, + "learning_rate": 1.3375457848470162e-05, + "loss": 0.5054, + "step": 7407 + }, + { + "epoch": 0.93, + "learning_rate": 1.332885871889311e-05, + "loss": 0.5138, + "step": 7408 + }, + { + "epoch": 0.93, + "learning_rate": 1.3282339808434074e-05, + "loss": 0.4611, + "step": 7409 + }, + { + "epoch": 0.93, + "learning_rate": 1.3235901124760919e-05, + "loss": 0.5815, + "step": 7410 + }, + { + "epoch": 0.93, + "learning_rate": 1.3189542675528133e-05, + "loss": 0.5345, + "step": 7411 + }, + { + "epoch": 0.93, + "learning_rate": 1.314326446837727e-05, + "loss": 0.5906, + "step": 7412 + }, + { + "epoch": 0.93, + "learning_rate": 1.3097066510936283e-05, + "loss": 0.4426, + "step": 7413 + }, + { + "epoch": 0.93, + "learning_rate": 1.3050948810820141e-05, + "loss": 0.5446, + "step": 7414 + }, + { + "epoch": 0.93, + "learning_rate": 1.300491137563059e-05, + "loss": 0.0476, + "step": 7415 + }, + { + "epoch": 0.93, + "learning_rate": 1.2958954212956009e-05, + "loss": 0.5959, + "step": 7416 + }, + { + "epoch": 0.93, + "learning_rate": 1.291307733037167e-05, + "loss": 0.5386, + "step": 7417 + }, + { + "epoch": 0.93, + "learning_rate": 1.2867280735439524e-05, + "loss": 0.5729, + "step": 7418 + }, + { + "epoch": 0.93, + "learning_rate": 1.2821564435708421e-05, + "loss": 0.559, + "step": 7419 + }, + { + "epoch": 0.93, + "learning_rate": 1.277592843871378e-05, + "loss": 0.5118, + "step": 7420 + }, + { + "epoch": 0.93, + "learning_rate": 1.273037275197797e-05, + "loss": 0.5966, + "step": 7421 + }, + { + "epoch": 0.93, + "learning_rate": 1.268489738300993e-05, + "loss": 0.5466, + "step": 7422 + }, + { + "epoch": 0.93, + "learning_rate": 1.2639502339305553e-05, + "loss": 0.4965, + "step": 7423 + }, + { + "epoch": 0.93, + "learning_rate": 1.2594187628347409e-05, + "loss": 0.4425, + "step": 7424 + }, + { + "epoch": 0.93, + "learning_rate": 1.25489532576048e-05, + "loss": 0.5485, + "step": 7425 + }, + { + "epoch": 0.93, + "learning_rate": 1.2503799234533819e-05, + "loss": 0.6732, + "step": 7426 + }, + { + "epoch": 0.93, + "learning_rate": 1.2458725566577289e-05, + "loss": 0.4894, + "step": 7427 + }, + { + "epoch": 0.93, + "learning_rate": 1.2413732261164824e-05, + "loss": 0.4774, + "step": 7428 + }, + { + "epoch": 0.93, + "learning_rate": 1.2368819325712821e-05, + "loss": 0.5505, + "step": 7429 + }, + { + "epoch": 0.93, + "learning_rate": 1.2323986767624251e-05, + "loss": 0.6165, + "step": 7430 + }, + { + "epoch": 0.93, + "learning_rate": 1.227923459428909e-05, + "loss": 0.5862, + "step": 7431 + }, + { + "epoch": 0.93, + "learning_rate": 1.2234562813083883e-05, + "loss": 0.5679, + "step": 7432 + }, + { + "epoch": 0.93, + "learning_rate": 1.2189971431372016e-05, + "loss": 0.5097, + "step": 7433 + }, + { + "epoch": 0.93, + "learning_rate": 1.2145460456503609e-05, + "loss": 0.5322, + "step": 7434 + }, + { + "epoch": 0.93, + "learning_rate": 1.2101029895815407e-05, + "loss": 0.5145, + "step": 7435 + }, + { + "epoch": 0.93, + "learning_rate": 1.2056679756631106e-05, + "loss": 0.506, + "step": 7436 + }, + { + "epoch": 0.93, + "learning_rate": 1.2012410046260968e-05, + "loss": 0.4989, + "step": 7437 + }, + { + "epoch": 0.93, + "learning_rate": 1.1968220772002103e-05, + "loss": 0.546, + "step": 7438 + }, + { + "epoch": 0.93, + "learning_rate": 1.1924111941138294e-05, + "loss": 0.4954, + "step": 7439 + }, + { + "epoch": 0.93, + "learning_rate": 1.188008356094017e-05, + "loss": 0.4711, + "step": 7440 + }, + { + "epoch": 0.93, + "learning_rate": 1.1836135638665035e-05, + "loss": 0.5267, + "step": 7441 + }, + { + "epoch": 0.93, + "learning_rate": 1.1792268181556875e-05, + "loss": 0.4282, + "step": 7442 + }, + { + "epoch": 0.93, + "learning_rate": 1.1748481196846406e-05, + "loss": 0.5544, + "step": 7443 + }, + { + "epoch": 0.93, + "learning_rate": 1.1704774691751241e-05, + "loss": 0.5493, + "step": 7444 + }, + { + "epoch": 0.93, + "learning_rate": 1.1661148673475619e-05, + "loss": 0.5421, + "step": 7445 + }, + { + "epoch": 0.93, + "learning_rate": 1.1617603149210454e-05, + "loss": 0.5204, + "step": 7446 + }, + { + "epoch": 0.93, + "learning_rate": 1.1574138126133505e-05, + "loss": 0.4557, + "step": 7447 + }, + { + "epoch": 0.93, + "learning_rate": 1.153075361140915e-05, + "loss": 0.6217, + "step": 7448 + }, + { + "epoch": 0.93, + "learning_rate": 1.1487449612188616e-05, + "loss": 0.6171, + "step": 7449 + }, + { + "epoch": 0.93, + "learning_rate": 1.1444226135609859e-05, + "loss": 0.5281, + "step": 7450 + }, + { + "epoch": 0.93, + "learning_rate": 1.1401083188797346e-05, + "loss": 0.6041, + "step": 7451 + }, + { + "epoch": 0.93, + "learning_rate": 1.1358020778862499e-05, + "loss": 0.5327, + "step": 7452 + }, + { + "epoch": 0.93, + "learning_rate": 1.131503891290342e-05, + "loss": 0.5627, + "step": 7453 + }, + { + "epoch": 0.93, + "learning_rate": 1.1272137598004884e-05, + "loss": 0.58, + "step": 7454 + }, + { + "epoch": 0.93, + "learning_rate": 1.1229316841238457e-05, + "loss": 0.3986, + "step": 7455 + }, + { + "epoch": 0.93, + "learning_rate": 1.1186576649662327e-05, + "loss": 0.4526, + "step": 7456 + }, + { + "epoch": 0.93, + "learning_rate": 1.114391703032147e-05, + "loss": 0.5604, + "step": 7457 + }, + { + "epoch": 0.93, + "learning_rate": 1.1101337990247595e-05, + "loss": 0.5175, + "step": 7458 + }, + { + "epoch": 0.93, + "learning_rate": 1.1058839536459086e-05, + "loss": 0.4596, + "step": 7459 + }, + { + "epoch": 0.94, + "learning_rate": 1.1016421675961009e-05, + "loss": 0.649, + "step": 7460 + }, + { + "epoch": 0.94, + "learning_rate": 1.0974084415745267e-05, + "loss": 0.5029, + "step": 7461 + }, + { + "epoch": 0.94, + "learning_rate": 1.0931827762790392e-05, + "loss": 0.5602, + "step": 7462 + }, + { + "epoch": 0.94, + "learning_rate": 1.08896517240617e-05, + "loss": 0.4446, + "step": 7463 + }, + { + "epoch": 0.94, + "learning_rate": 1.0847556306511019e-05, + "loss": 0.478, + "step": 7464 + }, + { + "epoch": 0.94, + "learning_rate": 1.0805541517077133e-05, + "loss": 0.509, + "step": 7465 + }, + { + "epoch": 0.94, + "learning_rate": 1.0763607362685446e-05, + "loss": 0.4393, + "step": 7466 + }, + { + "epoch": 0.94, + "learning_rate": 1.0721753850247984e-05, + "loss": 0.4551, + "step": 7467 + }, + { + "epoch": 0.94, + "learning_rate": 1.0679980986663674e-05, + "loss": 0.4396, + "step": 7468 + }, + { + "epoch": 0.94, + "learning_rate": 1.0638288778817894e-05, + "loss": 0.5406, + "step": 7469 + }, + { + "epoch": 0.94, + "learning_rate": 1.059667723358304e-05, + "loss": 0.4453, + "step": 7470 + }, + { + "epoch": 0.94, + "learning_rate": 1.0555146357817846e-05, + "loss": 0.4329, + "step": 7471 + }, + { + "epoch": 0.94, + "learning_rate": 1.0513696158368057e-05, + "loss": 0.4703, + "step": 7472 + }, + { + "epoch": 0.94, + "learning_rate": 1.0472326642065988e-05, + "loss": 0.5875, + "step": 7473 + }, + { + "epoch": 0.94, + "learning_rate": 1.0431037815730682e-05, + "loss": 0.4683, + "step": 7474 + }, + { + "epoch": 0.94, + "learning_rate": 1.0389829686167807e-05, + "loss": 0.5243, + "step": 7475 + }, + { + "epoch": 0.94, + "learning_rate": 1.0348702260169873e-05, + "loss": 0.5587, + "step": 7476 + }, + { + "epoch": 0.94, + "learning_rate": 1.030765554451596e-05, + "loss": 0.0474, + "step": 7477 + }, + { + "epoch": 0.94, + "learning_rate": 1.026668954597193e-05, + "loss": 0.4999, + "step": 7478 + }, + { + "epoch": 0.94, + "learning_rate": 1.0225804271290218e-05, + "loss": 0.4501, + "step": 7479 + }, + { + "epoch": 0.94, + "learning_rate": 1.0184999727210154e-05, + "loss": 0.0475, + "step": 7480 + }, + { + "epoch": 0.94, + "learning_rate": 1.0144275920457524e-05, + "loss": 0.5304, + "step": 7481 + }, + { + "epoch": 0.94, + "learning_rate": 1.0103632857745016e-05, + "loss": 0.4963, + "step": 7482 + }, + { + "epoch": 0.94, + "learning_rate": 1.006307054577188e-05, + "loss": 0.4907, + "step": 7483 + }, + { + "epoch": 0.94, + "learning_rate": 1.0022588991224157e-05, + "loss": 0.5502, + "step": 7484 + }, + { + "epoch": 0.94, + "learning_rate": 9.982188200774455e-06, + "loss": 0.4458, + "step": 7485 + }, + { + "epoch": 0.94, + "learning_rate": 9.941868181082115e-06, + "loss": 0.4948, + "step": 7486 + }, + { + "epoch": 0.94, + "learning_rate": 9.901628938793206e-06, + "loss": 0.499, + "step": 7487 + }, + { + "epoch": 0.94, + "learning_rate": 9.861470480540424e-06, + "loss": 0.6323, + "step": 7488 + }, + { + "epoch": 0.94, + "learning_rate": 9.821392812943253e-06, + "loss": 0.5701, + "step": 7489 + }, + { + "epoch": 0.94, + "learning_rate": 9.781395942607685e-06, + "loss": 0.4441, + "step": 7490 + }, + { + "epoch": 0.94, + "learning_rate": 9.741479876126614e-06, + "loss": 0.3784, + "step": 7491 + }, + { + "epoch": 0.94, + "learning_rate": 9.701644620079386e-06, + "loss": 0.6035, + "step": 7492 + }, + { + "epoch": 0.94, + "learning_rate": 9.661890181032195e-06, + "loss": 0.5161, + "step": 7493 + }, + { + "epoch": 0.94, + "learning_rate": 9.622216565537856e-06, + "loss": 0.5367, + "step": 7494 + }, + { + "epoch": 0.94, + "learning_rate": 9.582623780135801e-06, + "loss": 0.6031, + "step": 7495 + }, + { + "epoch": 0.94, + "learning_rate": 9.543111831352259e-06, + "loss": 0.4555, + "step": 7496 + }, + { + "epoch": 0.94, + "learning_rate": 9.50368072570007e-06, + "loss": 0.4154, + "step": 7497 + }, + { + "epoch": 0.94, + "learning_rate": 9.464330469678762e-06, + "loss": 0.5438, + "step": 7498 + }, + { + "epoch": 0.94, + "learning_rate": 9.425061069774533e-06, + "loss": 0.0478, + "step": 7499 + }, + { + "epoch": 0.94, + "learning_rate": 9.385872532460204e-06, + "loss": 0.5813, + "step": 7500 + }, + { + "epoch": 0.94, + "learning_rate": 9.346764864195334e-06, + "loss": 0.4587, + "step": 7501 + }, + { + "epoch": 0.94, + "learning_rate": 9.307738071426153e-06, + "loss": 0.5677, + "step": 7502 + }, + { + "epoch": 0.94, + "learning_rate": 9.268792160585515e-06, + "loss": 0.4026, + "step": 7503 + }, + { + "epoch": 0.94, + "learning_rate": 9.229927138092898e-06, + "loss": 0.5375, + "step": 7504 + }, + { + "epoch": 0.94, + "learning_rate": 9.191143010354619e-06, + "loss": 0.5624, + "step": 7505 + }, + { + "epoch": 0.94, + "learning_rate": 9.15243978376351e-06, + "loss": 0.5353, + "step": 7506 + }, + { + "epoch": 0.94, + "learning_rate": 9.113817464699192e-06, + "loss": 0.5536, + "step": 7507 + }, + { + "epoch": 0.94, + "learning_rate": 9.075276059527738e-06, + "loss": 0.4439, + "step": 7508 + }, + { + "epoch": 0.94, + "learning_rate": 9.036815574602009e-06, + "loss": 0.4402, + "step": 7509 + }, + { + "epoch": 0.94, + "learning_rate": 8.998436016261657e-06, + "loss": 0.5674, + "step": 7510 + }, + { + "epoch": 0.94, + "learning_rate": 8.96013739083279e-06, + "loss": 0.4603, + "step": 7511 + }, + { + "epoch": 0.94, + "learning_rate": 8.921919704628301e-06, + "loss": 0.6084, + "step": 7512 + }, + { + "epoch": 0.94, + "learning_rate": 8.883782963947706e-06, + "loss": 0.4868, + "step": 7513 + }, + { + "epoch": 0.94, + "learning_rate": 8.845727175077146e-06, + "loss": 0.6306, + "step": 7514 + }, + { + "epoch": 0.94, + "learning_rate": 8.807752344289377e-06, + "loss": 0.4448, + "step": 7515 + }, + { + "epoch": 0.94, + "learning_rate": 8.769858477844005e-06, + "loss": 0.5464, + "step": 7516 + }, + { + "epoch": 0.94, + "learning_rate": 8.732045581987036e-06, + "loss": 0.5104, + "step": 7517 + }, + { + "epoch": 0.94, + "learning_rate": 8.69431366295137e-06, + "loss": 0.5391, + "step": 7518 + }, + { + "epoch": 0.94, + "learning_rate": 8.656662726956366e-06, + "loss": 0.4713, + "step": 7519 + }, + { + "epoch": 0.94, + "learning_rate": 8.61909278020817e-06, + "loss": 0.475, + "step": 7520 + }, + { + "epoch": 0.94, + "learning_rate": 8.581603828899497e-06, + "loss": 0.5817, + "step": 7521 + }, + { + "epoch": 0.94, + "learning_rate": 8.544195879209737e-06, + "loss": 0.4489, + "step": 7522 + }, + { + "epoch": 0.94, + "learning_rate": 8.506868937304901e-06, + "loss": 0.5476, + "step": 7523 + }, + { + "epoch": 0.94, + "learning_rate": 8.469623009337734e-06, + "loss": 0.5503, + "step": 7524 + }, + { + "epoch": 0.94, + "learning_rate": 8.432458101447494e-06, + "loss": 0.4429, + "step": 7525 + }, + { + "epoch": 0.94, + "learning_rate": 8.395374219760221e-06, + "loss": 0.6356, + "step": 7526 + }, + { + "epoch": 0.94, + "learning_rate": 8.358371370388473e-06, + "loss": 0.5798, + "step": 7527 + }, + { + "epoch": 0.94, + "learning_rate": 8.321449559431648e-06, + "loss": 0.58, + "step": 7528 + }, + { + "epoch": 0.94, + "learning_rate": 8.28460879297549e-06, + "loss": 0.5662, + "step": 7529 + }, + { + "epoch": 0.94, + "learning_rate": 8.24784907709264e-06, + "loss": 0.5651, + "step": 7530 + }, + { + "epoch": 0.94, + "learning_rate": 8.21117041784225e-06, + "loss": 0.549, + "step": 7531 + }, + { + "epoch": 0.94, + "learning_rate": 8.174572821270154e-06, + "loss": 0.4811, + "step": 7532 + }, + { + "epoch": 0.94, + "learning_rate": 8.138056293408858e-06, + "loss": 0.671, + "step": 7533 + }, + { + "epoch": 0.94, + "learning_rate": 8.10162084027738e-06, + "loss": 0.6262, + "step": 7534 + }, + { + "epoch": 0.94, + "learning_rate": 8.065266467881528e-06, + "loss": 0.5673, + "step": 7535 + }, + { + "epoch": 0.94, + "learning_rate": 8.028993182213673e-06, + "loss": 0.5413, + "step": 7536 + }, + { + "epoch": 0.94, + "learning_rate": 7.992800989252758e-06, + "loss": 0.5179, + "step": 7537 + }, + { + "epoch": 0.94, + "learning_rate": 7.956689894964508e-06, + "loss": 0.0479, + "step": 7538 + }, + { + "epoch": 0.94, + "learning_rate": 7.920659905301163e-06, + "loss": 0.6379, + "step": 7539 + }, + { + "epoch": 0.95, + "learning_rate": 7.884711026201585e-06, + "loss": 0.4768, + "step": 7540 + }, + { + "epoch": 0.95, + "learning_rate": 7.848843263591421e-06, + "loss": 0.5747, + "step": 7541 + }, + { + "epoch": 0.95, + "learning_rate": 7.81305662338272e-06, + "loss": 0.5114, + "step": 7542 + }, + { + "epoch": 0.95, + "learning_rate": 7.777351111474373e-06, + "loss": 0.4329, + "step": 7543 + }, + { + "epoch": 0.95, + "learning_rate": 7.74172673375173e-06, + "loss": 0.4506, + "step": 7544 + }, + { + "epoch": 0.95, + "learning_rate": 7.706183496086871e-06, + "loss": 0.439, + "step": 7545 + }, + { + "epoch": 0.95, + "learning_rate": 7.670721404338443e-06, + "loss": 0.4668, + "step": 7546 + }, + { + "epoch": 0.95, + "learning_rate": 7.63534046435177e-06, + "loss": 0.5427, + "step": 7547 + }, + { + "epoch": 0.95, + "learning_rate": 7.600040681958742e-06, + "loss": 0.5852, + "step": 7548 + }, + { + "epoch": 0.95, + "learning_rate": 7.564822062977983e-06, + "loss": 0.5026, + "step": 7549 + }, + { + "epoch": 0.95, + "learning_rate": 7.529684613214515e-06, + "loss": 0.5242, + "step": 7550 + }, + { + "epoch": 0.95, + "learning_rate": 7.494628338460319e-06, + "loss": 0.588, + "step": 7551 + }, + { + "epoch": 0.95, + "learning_rate": 7.4596532444936586e-06, + "loss": 0.5677, + "step": 7552 + }, + { + "epoch": 0.95, + "learning_rate": 7.4247593370795345e-06, + "loss": 0.6387, + "step": 7553 + }, + { + "epoch": 0.95, + "learning_rate": 7.389946621969679e-06, + "loss": 0.7043, + "step": 7554 + }, + { + "epoch": 0.95, + "learning_rate": 7.355215104902335e-06, + "loss": 0.5358, + "step": 7555 + }, + { + "epoch": 0.95, + "learning_rate": 7.320564791602313e-06, + "loss": 0.5316, + "step": 7556 + }, + { + "epoch": 0.95, + "learning_rate": 7.28599568778121e-06, + "loss": 0.394, + "step": 7557 + }, + { + "epoch": 0.95, + "learning_rate": 7.25150779913708e-06, + "loss": 0.5415, + "step": 7558 + }, + { + "epoch": 0.95, + "learning_rate": 7.2171011313545975e-06, + "loss": 0.501, + "step": 7559 + }, + { + "epoch": 0.95, + "learning_rate": 7.18277569010517e-06, + "loss": 0.5402, + "step": 7560 + }, + { + "epoch": 0.95, + "learning_rate": 7.148531481046661e-06, + "loss": 0.4275, + "step": 7561 + }, + { + "epoch": 0.95, + "learning_rate": 7.114368509823666e-06, + "loss": 0.4677, + "step": 7562 + }, + { + "epoch": 0.95, + "learning_rate": 7.080286782067347e-06, + "loss": 0.4784, + "step": 7563 + }, + { + "epoch": 0.95, + "learning_rate": 7.046286303395433e-06, + "loss": 0.0475, + "step": 7564 + }, + { + "epoch": 0.95, + "learning_rate": 7.012367079412385e-06, + "loss": 0.4868, + "step": 7565 + }, + { + "epoch": 0.95, + "learning_rate": 6.97852911570912e-06, + "loss": 0.6702, + "step": 7566 + }, + { + "epoch": 0.95, + "learning_rate": 6.944772417863232e-06, + "loss": 0.5582, + "step": 7567 + }, + { + "epoch": 0.95, + "learning_rate": 6.911096991438881e-06, + "loss": 0.53, + "step": 7568 + }, + { + "epoch": 0.95, + "learning_rate": 6.87750284198696e-06, + "loss": 0.5176, + "step": 7569 + }, + { + "epoch": 0.95, + "learning_rate": 6.843989975044762e-06, + "loss": 0.6164, + "step": 7570 + }, + { + "epoch": 0.95, + "learning_rate": 6.810558396136424e-06, + "loss": 0.4761, + "step": 7571 + }, + { + "epoch": 0.95, + "learning_rate": 6.7772081107724255e-06, + "loss": 0.5406, + "step": 7572 + }, + { + "epoch": 0.95, + "learning_rate": 6.743939124450038e-06, + "loss": 0.4941, + "step": 7573 + }, + { + "epoch": 0.95, + "learning_rate": 6.7107514426529845e-06, + "loss": 0.5801, + "step": 7574 + }, + { + "epoch": 0.95, + "learning_rate": 6.6776450708517785e-06, + "loss": 0.5544, + "step": 7575 + }, + { + "epoch": 0.95, + "learning_rate": 6.644620014503389e-06, + "loss": 0.551, + "step": 7576 + }, + { + "epoch": 0.95, + "learning_rate": 6.611676279051293e-06, + "loss": 0.4351, + "step": 7577 + }, + { + "epoch": 0.95, + "learning_rate": 6.5788138699258145e-06, + "loss": 0.6226, + "step": 7578 + }, + { + "epoch": 0.95, + "learning_rate": 6.5460327925437325e-06, + "loss": 0.499, + "step": 7579 + }, + { + "epoch": 0.95, + "learning_rate": 6.51333305230839e-06, + "loss": 0.5115, + "step": 7580 + }, + { + "epoch": 0.95, + "learning_rate": 6.480714654609754e-06, + "loss": 0.4911, + "step": 7581 + }, + { + "epoch": 0.95, + "learning_rate": 6.448177604824412e-06, + "loss": 0.4514, + "step": 7582 + }, + { + "epoch": 0.95, + "learning_rate": 6.4157219083154635e-06, + "loss": 0.4855, + "step": 7583 + }, + { + "epoch": 0.95, + "learning_rate": 6.383347570432741e-06, + "loss": 0.5433, + "step": 7584 + }, + { + "epoch": 0.95, + "learning_rate": 6.351054596512529e-06, + "loss": 0.4507, + "step": 7585 + }, + { + "epoch": 0.95, + "learning_rate": 6.318842991877738e-06, + "loss": 0.5854, + "step": 7586 + }, + { + "epoch": 0.95, + "learning_rate": 6.286712761837954e-06, + "loss": 0.5999, + "step": 7587 + }, + { + "epoch": 0.95, + "learning_rate": 6.254663911689218e-06, + "loss": 0.5828, + "step": 7588 + }, + { + "epoch": 0.95, + "learning_rate": 6.222696446714193e-06, + "loss": 0.5327, + "step": 7589 + }, + { + "epoch": 0.95, + "learning_rate": 6.1908103721822205e-06, + "loss": 0.4979, + "step": 7590 + }, + { + "epoch": 0.95, + "learning_rate": 6.159005693349095e-06, + "loss": 0.4409, + "step": 7591 + }, + { + "epoch": 0.95, + "learning_rate": 6.127282415457236e-06, + "loss": 0.4761, + "step": 7592 + }, + { + "epoch": 0.95, + "learning_rate": 6.095640543735736e-06, + "loss": 0.59, + "step": 7593 + }, + { + "epoch": 0.95, + "learning_rate": 6.064080083400203e-06, + "loss": 0.4749, + "step": 7594 + }, + { + "epoch": 0.95, + "learning_rate": 6.032601039652807e-06, + "loss": 0.6135, + "step": 7595 + }, + { + "epoch": 0.95, + "learning_rate": 6.001203417682177e-06, + "loss": 0.0478, + "step": 7596 + }, + { + "epoch": 0.95, + "learning_rate": 5.969887222663839e-06, + "loss": 0.5995, + "step": 7597 + }, + { + "epoch": 0.95, + "learning_rate": 5.938652459759608e-06, + "loss": 0.631, + "step": 7598 + }, + { + "epoch": 0.95, + "learning_rate": 5.9074991341180326e-06, + "loss": 0.548, + "step": 7599 + }, + { + "epoch": 0.95, + "learning_rate": 5.8764272508741724e-06, + "loss": 0.5192, + "step": 7600 + }, + { + "epoch": 0.95, + "learning_rate": 5.8454368151496515e-06, + "loss": 0.5427, + "step": 7601 + }, + { + "epoch": 0.95, + "learning_rate": 5.814527832052719e-06, + "loss": 0.436, + "step": 7602 + }, + { + "epoch": 0.95, + "learning_rate": 5.783700306678186e-06, + "loss": 0.5526, + "step": 7603 + }, + { + "epoch": 0.95, + "learning_rate": 5.752954244107378e-06, + "loss": 0.6538, + "step": 7604 + }, + { + "epoch": 0.95, + "learning_rate": 5.7222896494082964e-06, + "loss": 0.5887, + "step": 7605 + }, + { + "epoch": 0.95, + "learning_rate": 5.691706527635399e-06, + "loss": 0.3965, + "step": 7606 + }, + { + "epoch": 0.95, + "learning_rate": 5.661204883829762e-06, + "loss": 0.5897, + "step": 7607 + }, + { + "epoch": 0.95, + "learning_rate": 5.630784723019145e-06, + "loss": 0.5374, + "step": 7608 + }, + { + "epoch": 0.95, + "learning_rate": 5.6004460502177e-06, + "loss": 0.6295, + "step": 7609 + }, + { + "epoch": 0.95, + "learning_rate": 5.570188870426207e-06, + "loss": 0.4886, + "step": 7610 + }, + { + "epoch": 0.95, + "learning_rate": 5.540013188632065e-06, + "loss": 0.6086, + "step": 7611 + }, + { + "epoch": 0.95, + "learning_rate": 5.509919009809128e-06, + "loss": 0.5153, + "step": 7612 + }, + { + "epoch": 0.95, + "learning_rate": 5.4799063389179836e-06, + "loss": 0.4633, + "step": 7613 + }, + { + "epoch": 0.95, + "learning_rate": 5.449975180905675e-06, + "loss": 0.4447, + "step": 7614 + }, + { + "epoch": 0.95, + "learning_rate": 5.420125540705756e-06, + "loss": 0.5769, + "step": 7615 + }, + { + "epoch": 0.95, + "learning_rate": 5.390357423238457e-06, + "loss": 0.5375, + "step": 7616 + }, + { + "epoch": 0.95, + "learning_rate": 5.360670833410519e-06, + "loss": 0.6647, + "step": 7617 + }, + { + "epoch": 0.95, + "learning_rate": 5.3310657761152516e-06, + "loss": 0.5808, + "step": 7618 + }, + { + "epoch": 0.96, + "learning_rate": 5.3015422562325274e-06, + "loss": 0.4778, + "step": 7619 + }, + { + "epoch": 0.96, + "learning_rate": 5.272100278628788e-06, + "loss": 0.5687, + "step": 7620 + }, + { + "epoch": 0.96, + "learning_rate": 5.242739848156985e-06, + "loss": 0.5985, + "step": 7621 + }, + { + "epoch": 0.96, + "learning_rate": 5.213460969656691e-06, + "loss": 0.4753, + "step": 7622 + }, + { + "epoch": 0.96, + "learning_rate": 5.184263647953991e-06, + "loss": 0.4958, + "step": 7623 + }, + { + "epoch": 0.96, + "learning_rate": 5.155147887861589e-06, + "loss": 0.4933, + "step": 7624 + }, + { + "epoch": 0.96, + "learning_rate": 5.126113694178647e-06, + "loss": 0.4901, + "step": 7625 + }, + { + "epoch": 0.96, + "learning_rate": 5.097161071690948e-06, + "loss": 0.532, + "step": 7626 + }, + { + "epoch": 0.96, + "learning_rate": 5.0682900251708406e-06, + "loss": 0.6028, + "step": 7627 + }, + { + "epoch": 0.96, + "learning_rate": 5.039500559377186e-06, + "loss": 0.5652, + "step": 7628 + }, + { + "epoch": 0.96, + "learning_rate": 5.010792679055465e-06, + "loss": 0.4747, + "step": 7629 + }, + { + "epoch": 0.96, + "learning_rate": 4.982166388937615e-06, + "loss": 0.4772, + "step": 7630 + }, + { + "epoch": 0.96, + "learning_rate": 4.953621693742249e-06, + "loss": 0.0475, + "step": 7631 + }, + { + "epoch": 0.96, + "learning_rate": 4.9251585981743285e-06, + "loss": 0.4998, + "step": 7632 + }, + { + "epoch": 0.96, + "learning_rate": 4.896777106925543e-06, + "loss": 0.5056, + "step": 7633 + }, + { + "epoch": 0.96, + "learning_rate": 4.868477224674095e-06, + "loss": 0.5669, + "step": 7634 + }, + { + "epoch": 0.96, + "learning_rate": 4.840258956084753e-06, + "loss": 0.5116, + "step": 7635 + }, + { + "epoch": 0.96, + "learning_rate": 4.812122305808742e-06, + "loss": 0.5306, + "step": 7636 + }, + { + "epoch": 0.96, + "learning_rate": 4.7840672784839054e-06, + "loss": 0.6213, + "step": 7637 + }, + { + "epoch": 0.96, + "learning_rate": 4.756093878734657e-06, + "loss": 0.6122, + "step": 7638 + }, + { + "epoch": 0.96, + "learning_rate": 4.728202111171808e-06, + "loss": 0.5802, + "step": 7639 + }, + { + "epoch": 0.96, + "learning_rate": 4.70039198039296e-06, + "loss": 0.5065, + "step": 7640 + }, + { + "epoch": 0.96, + "learning_rate": 4.672663490981999e-06, + "loss": 0.3914, + "step": 7641 + }, + { + "epoch": 0.96, + "learning_rate": 4.645016647509603e-06, + "loss": 0.4086, + "step": 7642 + }, + { + "epoch": 0.96, + "learning_rate": 4.617451454532739e-06, + "loss": 0.6154, + "step": 7643 + }, + { + "epoch": 0.96, + "learning_rate": 4.589967916595106e-06, + "loss": 0.0477, + "step": 7644 + }, + { + "epoch": 0.96, + "learning_rate": 4.562566038226857e-06, + "loss": 0.5553, + "step": 7645 + }, + { + "epoch": 0.96, + "learning_rate": 4.535245823944767e-06, + "loss": 0.4767, + "step": 7646 + }, + { + "epoch": 0.96, + "learning_rate": 4.508007278252013e-06, + "loss": 0.4869, + "step": 7647 + }, + { + "epoch": 0.96, + "learning_rate": 4.480850405638393e-06, + "loss": 0.4717, + "step": 7648 + }, + { + "epoch": 0.96, + "learning_rate": 4.4537752105802685e-06, + "loss": 0.5063, + "step": 7649 + }, + { + "epoch": 0.96, + "learning_rate": 4.426781697540461e-06, + "loss": 0.556, + "step": 7650 + }, + { + "epoch": 0.96, + "learning_rate": 4.399869870968465e-06, + "loss": 0.485, + "step": 7651 + }, + { + "epoch": 0.96, + "learning_rate": 4.373039735300122e-06, + "loss": 0.5737, + "step": 7652 + }, + { + "epoch": 0.96, + "learning_rate": 4.3462912949579496e-06, + "loss": 0.5021, + "step": 7653 + }, + { + "epoch": 0.96, + "learning_rate": 4.319624554350976e-06, + "loss": 0.5381, + "step": 7654 + }, + { + "epoch": 0.96, + "learning_rate": 4.293039517874686e-06, + "loss": 0.5812, + "step": 7655 + }, + { + "epoch": 0.96, + "learning_rate": 4.26653618991113e-06, + "loss": 0.436, + "step": 7656 + }, + { + "epoch": 0.96, + "learning_rate": 4.240114574829035e-06, + "loss": 0.5581, + "step": 7657 + }, + { + "epoch": 0.96, + "learning_rate": 4.213774676983472e-06, + "loss": 0.5175, + "step": 7658 + }, + { + "epoch": 0.96, + "learning_rate": 4.187516500716027e-06, + "loss": 0.6621, + "step": 7659 + }, + { + "epoch": 0.96, + "learning_rate": 4.1613400503550115e-06, + "loss": 0.5179, + "step": 7660 + }, + { + "epoch": 0.96, + "learning_rate": 4.135245330215087e-06, + "loss": 0.4761, + "step": 7661 + }, + { + "epoch": 0.96, + "learning_rate": 4.109232344597535e-06, + "loss": 0.501, + "step": 7662 + }, + { + "epoch": 0.96, + "learning_rate": 4.083301097790093e-06, + "loss": 0.5122, + "step": 7663 + }, + { + "epoch": 0.96, + "learning_rate": 4.057451594067119e-06, + "loss": 0.562, + "step": 7664 + }, + { + "epoch": 0.96, + "learning_rate": 4.031683837689426e-06, + "loss": 0.5001, + "step": 7665 + }, + { + "epoch": 0.96, + "learning_rate": 4.00599783290434e-06, + "loss": 0.4365, + "step": 7666 + }, + { + "epoch": 0.96, + "learning_rate": 3.980393583945807e-06, + "loss": 0.5281, + "step": 7667 + }, + { + "epoch": 0.96, + "learning_rate": 3.954871095034229e-06, + "loss": 0.4429, + "step": 7668 + }, + { + "epoch": 0.96, + "learning_rate": 3.929430370376464e-06, + "loss": 0.5496, + "step": 7669 + }, + { + "epoch": 0.96, + "learning_rate": 3.904071414165988e-06, + "loss": 0.5204, + "step": 7670 + }, + { + "epoch": 0.96, + "learning_rate": 3.87879423058285e-06, + "loss": 0.5145, + "step": 7671 + }, + { + "epoch": 0.96, + "learning_rate": 3.853598823793436e-06, + "loss": 0.4926, + "step": 7672 + }, + { + "epoch": 0.96, + "learning_rate": 3.828485197950871e-06, + "loss": 0.4912, + "step": 7673 + }, + { + "epoch": 0.96, + "learning_rate": 3.8034533571946196e-06, + "loss": 0.6182, + "step": 7674 + }, + { + "epoch": 0.96, + "learning_rate": 3.778503305650771e-06, + "loss": 0.5221, + "step": 7675 + }, + { + "epoch": 0.96, + "learning_rate": 3.753635047431869e-06, + "loss": 0.6266, + "step": 7676 + }, + { + "epoch": 0.96, + "learning_rate": 3.7288485866370235e-06, + "loss": 0.4503, + "step": 7677 + }, + { + "epoch": 0.96, + "learning_rate": 3.7041439273517995e-06, + "loss": 0.5037, + "step": 7678 + }, + { + "epoch": 0.96, + "learning_rate": 3.6795210736484395e-06, + "loss": 0.5393, + "step": 7679 + }, + { + "epoch": 0.96, + "learning_rate": 3.654980029585475e-06, + "loss": 0.558, + "step": 7680 + }, + { + "epoch": 0.96, + "learning_rate": 3.6305207992081147e-06, + "loss": 0.564, + "step": 7681 + }, + { + "epoch": 0.96, + "learning_rate": 3.606143386548022e-06, + "loss": 0.4969, + "step": 7682 + }, + { + "epoch": 0.96, + "learning_rate": 3.5818477956233163e-06, + "loss": 0.0478, + "step": 7683 + }, + { + "epoch": 0.96, + "learning_rate": 3.5576340304387944e-06, + "loss": 0.6107, + "step": 7684 + }, + { + "epoch": 0.96, + "learning_rate": 3.5335020949856523e-06, + "loss": 0.5154, + "step": 7685 + }, + { + "epoch": 0.96, + "learning_rate": 3.5094519932415414e-06, + "loss": 0.4509, + "step": 7686 + }, + { + "epoch": 0.96, + "learning_rate": 3.4854837291707352e-06, + "loss": 0.4355, + "step": 7687 + }, + { + "epoch": 0.96, + "learning_rate": 3.4615973067239627e-06, + "loss": 0.4923, + "step": 7688 + }, + { + "epoch": 0.96, + "learning_rate": 3.4377927298385183e-06, + "loss": 0.6141, + "step": 7689 + }, + { + "epoch": 0.96, + "learning_rate": 3.4140700024382077e-06, + "loss": 0.5288, + "step": 7690 + }, + { + "epoch": 0.96, + "learning_rate": 3.3904291284331256e-06, + "loss": 0.5229, + "step": 7691 + }, + { + "epoch": 0.96, + "learning_rate": 3.3668701117202105e-06, + "loss": 0.4642, + "step": 7692 + }, + { + "epoch": 0.96, + "learning_rate": 3.3433929561827447e-06, + "loss": 0.5232, + "step": 7693 + }, + { + "epoch": 0.96, + "learning_rate": 3.319997665690411e-06, + "loss": 0.4523, + "step": 7694 + }, + { + "epoch": 0.96, + "learning_rate": 3.296684244099568e-06, + "loss": 0.6161, + "step": 7695 + }, + { + "epoch": 0.96, + "learning_rate": 3.2734526952530876e-06, + "loss": 0.5129, + "step": 7696 + }, + { + "epoch": 0.96, + "learning_rate": 3.2503030229801833e-06, + "loss": 0.5826, + "step": 7697 + }, + { + "epoch": 0.96, + "learning_rate": 3.2272352310966924e-06, + "loss": 0.0477, + "step": 7698 + }, + { + "epoch": 0.97, + "learning_rate": 3.204249323404962e-06, + "loss": 0.4906, + "step": 7699 + }, + { + "epoch": 0.97, + "learning_rate": 3.1813453036937946e-06, + "loss": 0.5529, + "step": 7700 + }, + { + "epoch": 0.97, + "learning_rate": 3.158523175738559e-06, + "loss": 0.3964, + "step": 7701 + }, + { + "epoch": 0.97, + "learning_rate": 3.1357829433009688e-06, + "loss": 0.5775, + "step": 7702 + }, + { + "epoch": 0.97, + "learning_rate": 3.1131246101294144e-06, + "loss": 0.4332, + "step": 7703 + }, + { + "epoch": 0.97, + "learning_rate": 3.0905481799587408e-06, + "loss": 0.4451, + "step": 7704 + }, + { + "epoch": 0.97, + "learning_rate": 3.06805365651025e-06, + "loss": 0.5503, + "step": 7705 + }, + { + "epoch": 0.97, + "learning_rate": 3.045641043491698e-06, + "loss": 0.506, + "step": 7706 + }, + { + "epoch": 0.97, + "learning_rate": 3.0233103445975184e-06, + "loss": 0.5131, + "step": 7707 + }, + { + "epoch": 0.97, + "learning_rate": 3.001061563508489e-06, + "loss": 0.6552, + "step": 7708 + }, + { + "epoch": 0.97, + "learning_rate": 2.9788947038919544e-06, + "loss": 0.4823, + "step": 7709 + }, + { + "epoch": 0.97, + "learning_rate": 2.9568097694016583e-06, + "loss": 0.5544, + "step": 7710 + }, + { + "epoch": 0.97, + "learning_rate": 2.9348067636779664e-06, + "loss": 0.5624, + "step": 7711 + }, + { + "epoch": 0.97, + "learning_rate": 2.912885690347644e-06, + "loss": 0.5237, + "step": 7712 + }, + { + "epoch": 0.97, + "learning_rate": 2.891046553024079e-06, + "loss": 0.4182, + "step": 7713 + }, + { + "epoch": 0.97, + "learning_rate": 2.8692893553069476e-06, + "loss": 0.5978, + "step": 7714 + }, + { + "epoch": 0.97, + "learning_rate": 2.847614100782603e-06, + "loss": 0.4791, + "step": 7715 + }, + { + "epoch": 0.97, + "learning_rate": 2.8260207930238534e-06, + "loss": 0.4585, + "step": 7716 + }, + { + "epoch": 0.97, + "learning_rate": 2.804509435589908e-06, + "loss": 0.6189, + "step": 7717 + }, + { + "epoch": 0.97, + "learning_rate": 2.7830800320265414e-06, + "loss": 0.5748, + "step": 7718 + }, + { + "epoch": 0.97, + "learning_rate": 2.7617325858660947e-06, + "loss": 0.5308, + "step": 7719 + }, + { + "epoch": 0.97, + "learning_rate": 2.740467100627253e-06, + "loss": 0.5765, + "step": 7720 + }, + { + "epoch": 0.97, + "learning_rate": 2.7192835798152127e-06, + "loss": 0.5663, + "step": 7721 + }, + { + "epoch": 0.97, + "learning_rate": 2.6981820269217917e-06, + "loss": 0.5931, + "step": 7722 + }, + { + "epoch": 0.97, + "learning_rate": 2.6771624454250966e-06, + "loss": 0.4829, + "step": 7723 + }, + { + "epoch": 0.97, + "learning_rate": 2.656224838789967e-06, + "loss": 0.5958, + "step": 7724 + }, + { + "epoch": 0.97, + "learning_rate": 2.6353692104674755e-06, + "loss": 0.0475, + "step": 7725 + }, + { + "epoch": 0.97, + "learning_rate": 2.6145955638954278e-06, + "loss": 0.5753, + "step": 7726 + }, + { + "epoch": 0.97, + "learning_rate": 2.593903902497863e-06, + "loss": 0.579, + "step": 7727 + }, + { + "epoch": 0.97, + "learning_rate": 2.5732942296854967e-06, + "loss": 0.0477, + "step": 7728 + }, + { + "epoch": 0.97, + "learning_rate": 2.552766548855501e-06, + "loss": 0.661, + "step": 7729 + }, + { + "epoch": 0.97, + "learning_rate": 2.5323208633914464e-06, + "loss": 0.584, + "step": 7730 + }, + { + "epoch": 0.97, + "learning_rate": 2.511957176663415e-06, + "loss": 0.4963, + "step": 7731 + }, + { + "epoch": 0.97, + "learning_rate": 2.4916754920281116e-06, + "loss": 0.5585, + "step": 7732 + }, + { + "epoch": 0.97, + "learning_rate": 2.4714758128285277e-06, + "loss": 0.491, + "step": 7733 + }, + { + "epoch": 0.97, + "learning_rate": 2.451358142394222e-06, + "loss": 0.5922, + "step": 7734 + }, + { + "epoch": 0.97, + "learning_rate": 2.431322484041265e-06, + "loss": 0.502, + "step": 7735 + }, + { + "epoch": 0.97, + "learning_rate": 2.4113688410721255e-06, + "loss": 0.5589, + "step": 7736 + }, + { + "epoch": 0.97, + "learning_rate": 2.3914972167758953e-06, + "loss": 0.5259, + "step": 7737 + }, + { + "epoch": 0.97, + "learning_rate": 2.371707614428009e-06, + "loss": 0.4514, + "step": 7738 + }, + { + "epoch": 0.97, + "learning_rate": 2.3520000372904693e-06, + "loss": 0.4934, + "step": 7739 + }, + { + "epoch": 0.97, + "learning_rate": 2.3323744886116773e-06, + "loss": 0.5502, + "step": 7740 + }, + { + "epoch": 0.97, + "learning_rate": 2.312830971626545e-06, + "loss": 0.746, + "step": 7741 + }, + { + "epoch": 0.97, + "learning_rate": 2.293369489556496e-06, + "loss": 0.6011, + "step": 7742 + }, + { + "epoch": 0.97, + "learning_rate": 2.2739900456094643e-06, + "loss": 0.5055, + "step": 7743 + }, + { + "epoch": 0.97, + "learning_rate": 2.254692642979728e-06, + "loss": 0.5859, + "step": 7744 + }, + { + "epoch": 0.97, + "learning_rate": 2.2354772848481864e-06, + "loss": 0.571, + "step": 7745 + }, + { + "epoch": 0.97, + "learning_rate": 2.2163439743820846e-06, + "loss": 0.4457, + "step": 7746 + }, + { + "epoch": 0.97, + "learning_rate": 2.197292714735233e-06, + "loss": 0.5922, + "step": 7747 + }, + { + "epoch": 0.97, + "learning_rate": 2.178323509047897e-06, + "loss": 0.4532, + "step": 7748 + }, + { + "epoch": 0.97, + "learning_rate": 2.159436360446909e-06, + "loss": 0.4952, + "step": 7749 + }, + { + "epoch": 0.97, + "learning_rate": 2.1406312720453347e-06, + "loss": 0.5062, + "step": 7750 + }, + { + "epoch": 0.97, + "learning_rate": 2.1219082469428608e-06, + "loss": 0.5042, + "step": 7751 + }, + { + "epoch": 0.97, + "learning_rate": 2.1032672882257963e-06, + "loss": 0.4928, + "step": 7752 + }, + { + "epoch": 0.97, + "learning_rate": 2.0847083989666284e-06, + "loss": 0.5787, + "step": 7753 + }, + { + "epoch": 0.97, + "learning_rate": 2.0662315822245203e-06, + "loss": 0.6583, + "step": 7754 + }, + { + "epoch": 0.97, + "learning_rate": 2.047836841045092e-06, + "loss": 0.5721, + "step": 7755 + }, + { + "epoch": 0.97, + "learning_rate": 2.029524178460307e-06, + "loss": 0.4817, + "step": 7756 + }, + { + "epoch": 0.97, + "learning_rate": 2.011293597488695e-06, + "loss": 0.6191, + "step": 7757 + }, + { + "epoch": 0.97, + "learning_rate": 1.993145101135296e-06, + "loss": 0.4613, + "step": 7758 + }, + { + "epoch": 0.97, + "learning_rate": 1.975078692391552e-06, + "loss": 0.4095, + "step": 7759 + }, + { + "epoch": 0.97, + "learning_rate": 1.9570943742353574e-06, + "loss": 0.4344, + "step": 7760 + }, + { + "epoch": 0.97, + "learning_rate": 1.9391921496311194e-06, + "loss": 0.5253, + "step": 7761 + }, + { + "epoch": 0.97, + "learning_rate": 1.9213720215296992e-06, + "loss": 0.5691, + "step": 7762 + }, + { + "epoch": 0.97, + "learning_rate": 1.9036339928685253e-06, + "loss": 0.521, + "step": 7763 + }, + { + "epoch": 0.97, + "learning_rate": 1.885978066571259e-06, + "loss": 0.4917, + "step": 7764 + }, + { + "epoch": 0.97, + "learning_rate": 1.868404245548183e-06, + "loss": 0.514, + "step": 7765 + }, + { + "epoch": 0.97, + "learning_rate": 1.8509125326960918e-06, + "loss": 0.5812, + "step": 7766 + }, + { + "epoch": 0.97, + "learning_rate": 1.8335029308981788e-06, + "loss": 0.5229, + "step": 7767 + }, + { + "epoch": 0.97, + "learning_rate": 1.8161754430240928e-06, + "loss": 0.5727, + "step": 7768 + }, + { + "epoch": 0.97, + "learning_rate": 1.7989300719299939e-06, + "loss": 0.4387, + "step": 7769 + }, + { + "epoch": 0.97, + "learning_rate": 1.7817668204583858e-06, + "loss": 0.4661, + "step": 7770 + }, + { + "epoch": 0.97, + "learning_rate": 1.7646856914384502e-06, + "loss": 0.5323, + "step": 7771 + }, + { + "epoch": 0.97, + "learning_rate": 1.7476866876856013e-06, + "loss": 0.6218, + "step": 7772 + }, + { + "epoch": 0.97, + "learning_rate": 1.7307698120018755e-06, + "loss": 0.5381, + "step": 7773 + }, + { + "epoch": 0.97, + "learning_rate": 1.7139350671757092e-06, + "loss": 0.4756, + "step": 7774 + }, + { + "epoch": 0.97, + "learning_rate": 1.6971824559819937e-06, + "loss": 0.5327, + "step": 7775 + }, + { + "epoch": 0.97, + "learning_rate": 1.6805119811821311e-06, + "loss": 0.5291, + "step": 7776 + }, + { + "epoch": 0.97, + "learning_rate": 1.6639236455239792e-06, + "loss": 0.5518, + "step": 7777 + }, + { + "epoch": 0.97, + "learning_rate": 1.6474174517417395e-06, + "loss": 0.4697, + "step": 7778 + }, + { + "epoch": 0.98, + "learning_rate": 1.6309934025562911e-06, + "loss": 0.0476, + "step": 7779 + }, + { + "epoch": 0.98, + "learning_rate": 1.6146515006747465e-06, + "loss": 0.5811, + "step": 7780 + }, + { + "epoch": 0.98, + "learning_rate": 1.598391748790784e-06, + "loss": 0.5057, + "step": 7781 + }, + { + "epoch": 0.98, + "learning_rate": 1.5822141495845932e-06, + "loss": 0.6151, + "step": 7782 + }, + { + "epoch": 0.98, + "learning_rate": 1.5661187057227077e-06, + "loss": 0.584, + "step": 7783 + }, + { + "epoch": 0.98, + "learning_rate": 1.5501054198582276e-06, + "loss": 0.3854, + "step": 7784 + }, + { + "epoch": 0.98, + "learning_rate": 1.534174294630597e-06, + "loss": 0.4679, + "step": 7785 + }, + { + "epoch": 0.98, + "learning_rate": 1.518325332665882e-06, + "loss": 0.6237, + "step": 7786 + }, + { + "epoch": 0.98, + "learning_rate": 1.5025585365763817e-06, + "loss": 0.4276, + "step": 7787 + }, + { + "epoch": 0.98, + "learning_rate": 1.4868739089610727e-06, + "loss": 0.3993, + "step": 7788 + }, + { + "epoch": 0.98, + "learning_rate": 1.4712714524052206e-06, + "loss": 0.6298, + "step": 7789 + }, + { + "epoch": 0.98, + "learning_rate": 1.4557511694806568e-06, + "loss": 0.5386, + "step": 7790 + }, + { + "epoch": 0.98, + "learning_rate": 1.4403130627455575e-06, + "loss": 0.5382, + "step": 7791 + }, + { + "epoch": 0.98, + "learning_rate": 1.4249571347447199e-06, + "loss": 0.5945, + "step": 7792 + }, + { + "epoch": 0.98, + "learning_rate": 1.4096833880092309e-06, + "loss": 0.5078, + "step": 7793 + }, + { + "epoch": 0.98, + "learning_rate": 1.3944918250567428e-06, + "loss": 0.4732, + "step": 7794 + }, + { + "epoch": 0.98, + "learning_rate": 1.3793824483912531e-06, + "loss": 0.6105, + "step": 7795 + }, + { + "epoch": 0.98, + "learning_rate": 1.3643552605032694e-06, + "loss": 0.571, + "step": 7796 + }, + { + "epoch": 0.98, + "learning_rate": 1.3494102638698658e-06, + "loss": 0.5862, + "step": 7797 + }, + { + "epoch": 0.98, + "learning_rate": 1.3345474609543496e-06, + "loss": 0.6293, + "step": 7798 + }, + { + "epoch": 0.98, + "learning_rate": 1.3197668542066499e-06, + "loss": 0.4729, + "step": 7799 + }, + { + "epoch": 0.98, + "learning_rate": 1.3050684460630958e-06, + "loss": 0.5336, + "step": 7800 + }, + { + "epoch": 0.98, + "learning_rate": 1.2904522389464157e-06, + "loss": 0.5599, + "step": 7801 + }, + { + "epoch": 0.98, + "learning_rate": 1.275918235265794e-06, + "loss": 0.4423, + "step": 7802 + }, + { + "epoch": 0.98, + "learning_rate": 1.2614664374170358e-06, + "loss": 0.5481, + "step": 7803 + }, + { + "epoch": 0.98, + "learning_rate": 1.247096847782181e-06, + "loss": 0.0478, + "step": 7804 + }, + { + "epoch": 0.98, + "learning_rate": 1.2328094687297787e-06, + "loss": 0.5897, + "step": 7805 + }, + { + "epoch": 0.98, + "learning_rate": 1.2186043026148896e-06, + "loss": 0.4556, + "step": 7806 + }, + { + "epoch": 0.98, + "learning_rate": 1.2044813517789744e-06, + "loss": 0.6562, + "step": 7807 + }, + { + "epoch": 0.98, + "learning_rate": 1.1904406185499482e-06, + "loss": 0.6118, + "step": 7808 + }, + { + "epoch": 0.98, + "learning_rate": 1.1764821052421827e-06, + "loss": 0.559, + "step": 7809 + }, + { + "epoch": 0.98, + "learning_rate": 1.1626058141565033e-06, + "loss": 0.5691, + "step": 7810 + }, + { + "epoch": 0.98, + "learning_rate": 1.1488117475801363e-06, + "loss": 0.5483, + "step": 7811 + }, + { + "epoch": 0.98, + "learning_rate": 1.1350999077868186e-06, + "loss": 0.6143, + "step": 7812 + }, + { + "epoch": 0.98, + "learning_rate": 1.1214702970367419e-06, + "loss": 0.4749, + "step": 7813 + }, + { + "epoch": 0.98, + "learning_rate": 1.1079229175764428e-06, + "loss": 0.0477, + "step": 7814 + }, + { + "epoch": 0.98, + "learning_rate": 1.0944577716389126e-06, + "loss": 0.4448, + "step": 7815 + }, + { + "epoch": 0.98, + "learning_rate": 1.0810748614437649e-06, + "loss": 0.4868, + "step": 7816 + }, + { + "epoch": 0.98, + "learning_rate": 1.0677741891969017e-06, + "loss": 0.531, + "step": 7817 + }, + { + "epoch": 0.98, + "learning_rate": 1.0545557570906805e-06, + "loss": 0.6083, + "step": 7818 + }, + { + "epoch": 0.98, + "learning_rate": 1.041419567303914e-06, + "loss": 0.4865, + "step": 7819 + }, + { + "epoch": 0.98, + "learning_rate": 1.02836562200187e-06, + "loss": 0.6169, + "step": 7820 + }, + { + "epoch": 0.98, + "learning_rate": 1.0153939233363275e-06, + "loss": 0.5186, + "step": 7821 + }, + { + "epoch": 0.98, + "learning_rate": 1.0025044734453536e-06, + "loss": 0.5806, + "step": 7822 + }, + { + "epoch": 0.98, + "learning_rate": 9.896972744535826e-07, + "loss": 0.5525, + "step": 7823 + }, + { + "epoch": 0.98, + "learning_rate": 9.769723284721033e-07, + "loss": 0.6251, + "step": 7824 + }, + { + "epoch": 0.98, + "learning_rate": 9.643296375982935e-07, + "loss": 0.5927, + "step": 7825 + }, + { + "epoch": 0.98, + "learning_rate": 9.51769203916153e-07, + "loss": 0.6241, + "step": 7826 + }, + { + "epoch": 0.98, + "learning_rate": 9.392910294960811e-07, + "loss": 0.4334, + "step": 7827 + }, + { + "epoch": 0.98, + "learning_rate": 9.268951163947659e-07, + "loss": 0.4764, + "step": 7828 + }, + { + "epoch": 0.98, + "learning_rate": 9.145814666555174e-07, + "loss": 0.4572, + "step": 7829 + }, + { + "epoch": 0.98, + "learning_rate": 9.023500823080455e-07, + "loss": 0.7252, + "step": 7830 + }, + { + "epoch": 0.98, + "learning_rate": 8.902009653684595e-07, + "loss": 0.5682, + "step": 7831 + }, + { + "epoch": 0.98, + "learning_rate": 8.781341178393243e-07, + "loss": 0.5514, + "step": 7832 + }, + { + "epoch": 0.98, + "learning_rate": 8.661495417096044e-07, + "loss": 0.4839, + "step": 7833 + }, + { + "epoch": 0.98, + "learning_rate": 8.542472389548305e-07, + "loss": 0.4756, + "step": 7834 + }, + { + "epoch": 0.98, + "learning_rate": 8.424272115368226e-07, + "loss": 0.4519, + "step": 7835 + }, + { + "epoch": 0.98, + "learning_rate": 8.306894614039107e-07, + "loss": 0.6108, + "step": 7836 + }, + { + "epoch": 0.98, + "learning_rate": 8.190339904908805e-07, + "loss": 0.597, + "step": 7837 + }, + { + "epoch": 0.98, + "learning_rate": 8.074608007188621e-07, + "loss": 0.5741, + "step": 7838 + }, + { + "epoch": 0.98, + "learning_rate": 7.959698939956072e-07, + "loss": 0.5217, + "step": 7839 + }, + { + "epoch": 0.98, + "learning_rate": 7.845612722151007e-07, + "loss": 0.4463, + "step": 7840 + }, + { + "epoch": 0.98, + "learning_rate": 7.73234937257894e-07, + "loss": 0.6842, + "step": 7841 + }, + { + "epoch": 0.98, + "learning_rate": 7.619908909908824e-07, + "loss": 0.53, + "step": 7842 + }, + { + "epoch": 0.98, + "learning_rate": 7.508291352674724e-07, + "loss": 0.5125, + "step": 7843 + }, + { + "epoch": 0.98, + "learning_rate": 7.397496719275254e-07, + "loss": 0.5366, + "step": 7844 + }, + { + "epoch": 0.98, + "learning_rate": 7.287525027972475e-07, + "loss": 0.0475, + "step": 7845 + }, + { + "epoch": 0.98, + "learning_rate": 7.178376296893552e-07, + "loss": 0.4686, + "step": 7846 + }, + { + "epoch": 0.98, + "learning_rate": 7.070050544029094e-07, + "loss": 0.4774, + "step": 7847 + }, + { + "epoch": 0.98, + "learning_rate": 6.962547787235374e-07, + "loss": 0.4916, + "step": 7848 + }, + { + "epoch": 0.98, + "learning_rate": 6.855868044232105e-07, + "loss": 0.427, + "step": 7849 + }, + { + "epoch": 0.98, + "learning_rate": 6.750011332603556e-07, + "loss": 0.7803, + "step": 7850 + }, + { + "epoch": 0.98, + "learning_rate": 6.644977669797991e-07, + "loss": 0.5032, + "step": 7851 + }, + { + "epoch": 0.98, + "learning_rate": 6.540767073129339e-07, + "loss": 0.4697, + "step": 7852 + }, + { + "epoch": 0.98, + "learning_rate": 6.437379559773304e-07, + "loss": 0.5334, + "step": 7853 + }, + { + "epoch": 0.98, + "learning_rate": 6.334815146772921e-07, + "loss": 0.5, + "step": 7854 + }, + { + "epoch": 0.98, + "learning_rate": 6.233073851033555e-07, + "loss": 0.6113, + "step": 7855 + }, + { + "epoch": 0.98, + "learning_rate": 6.132155689325125e-07, + "loss": 0.4866, + "step": 7856 + }, + { + "epoch": 0.98, + "learning_rate": 6.032060678283213e-07, + "loss": 0.491, + "step": 7857 + }, + { + "epoch": 0.98, + "learning_rate": 5.932788834405733e-07, + "loss": 0.4611, + "step": 7858 + }, + { + "epoch": 0.99, + "learning_rate": 5.834340174055707e-07, + "loss": 0.4009, + "step": 7859 + }, + { + "epoch": 0.99, + "learning_rate": 5.736714713461821e-07, + "loss": 0.4484, + "step": 7860 + }, + { + "epoch": 0.99, + "learning_rate": 5.639912468714537e-07, + "loss": 0.4761, + "step": 7861 + }, + { + "epoch": 0.99, + "learning_rate": 5.543933455771089e-07, + "loss": 0.458, + "step": 7862 + }, + { + "epoch": 0.99, + "learning_rate": 5.448777690451601e-07, + "loss": 0.4619, + "step": 7863 + }, + { + "epoch": 0.99, + "learning_rate": 5.354445188440749e-07, + "loss": 0.5344, + "step": 7864 + }, + { + "epoch": 0.99, + "learning_rate": 5.260935965287761e-07, + "loss": 0.5562, + "step": 7865 + }, + { + "epoch": 0.99, + "learning_rate": 5.168250036405309e-07, + "loss": 0.0478, + "step": 7866 + }, + { + "epoch": 0.99, + "learning_rate": 5.076387417071726e-07, + "loss": 0.4437, + "step": 7867 + }, + { + "epoch": 0.99, + "learning_rate": 4.985348122428235e-07, + "loss": 0.5638, + "step": 7868 + }, + { + "epoch": 0.99, + "learning_rate": 4.895132167482274e-07, + "loss": 0.4995, + "step": 7869 + }, + { + "epoch": 0.99, + "learning_rate": 4.805739567103062e-07, + "loss": 0.4465, + "step": 7870 + }, + { + "epoch": 0.99, + "learning_rate": 4.717170336026033e-07, + "loss": 0.4673, + "step": 7871 + }, + { + "epoch": 0.99, + "learning_rate": 4.6294244888500647e-07, + "loss": 0.6117, + "step": 7872 + }, + { + "epoch": 0.99, + "learning_rate": 4.5425020400385874e-07, + "loss": 0.5306, + "step": 7873 + }, + { + "epoch": 0.99, + "learning_rate": 4.4564030039195845e-07, + "loss": 0.5198, + "step": 7874 + }, + { + "epoch": 0.99, + "learning_rate": 4.371127394683927e-07, + "loss": 0.5348, + "step": 7875 + }, + { + "epoch": 0.99, + "learning_rate": 4.286675226388703e-07, + "loss": 0.0475, + "step": 7876 + }, + { + "epoch": 0.99, + "learning_rate": 4.2030465129544447e-07, + "loss": 0.6111, + "step": 7877 + }, + { + "epoch": 0.99, + "learning_rate": 4.120241268165126e-07, + "loss": 0.4967, + "step": 7878 + }, + { + "epoch": 0.99, + "learning_rate": 4.0382595056703833e-07, + "loss": 0.4869, + "step": 7879 + }, + { + "epoch": 0.99, + "learning_rate": 3.957101238983296e-07, + "loss": 0.5543, + "step": 7880 + }, + { + "epoch": 0.99, + "learning_rate": 3.8767664814809424e-07, + "loss": 0.5054, + "step": 7881 + }, + { + "epoch": 0.99, + "learning_rate": 3.7972552464055066e-07, + "loss": 0.0476, + "step": 7882 + }, + { + "epoch": 0.99, + "learning_rate": 3.718567546862617e-07, + "loss": 0.6113, + "step": 7883 + }, + { + "epoch": 0.99, + "learning_rate": 3.6407033958230086e-07, + "loss": 0.5037, + "step": 7884 + }, + { + "epoch": 0.99, + "learning_rate": 3.563662806121415e-07, + "loss": 0.5294, + "step": 7885 + }, + { + "epoch": 0.99, + "learning_rate": 3.487445790456012e-07, + "loss": 0.4808, + "step": 7886 + }, + { + "epoch": 0.99, + "learning_rate": 3.412052361390083e-07, + "loss": 0.4591, + "step": 7887 + }, + { + "epoch": 0.99, + "learning_rate": 3.3374825313509106e-07, + "loss": 0.483, + "step": 7888 + }, + { + "epoch": 0.99, + "learning_rate": 3.263736312629773e-07, + "loss": 0.5454, + "step": 7889 + }, + { + "epoch": 0.99, + "learning_rate": 3.190813717382501e-07, + "loss": 0.4285, + "step": 7890 + }, + { + "epoch": 0.99, + "learning_rate": 3.1187147576294815e-07, + "loss": 0.5436, + "step": 7891 + }, + { + "epoch": 0.99, + "learning_rate": 3.0474394452545406e-07, + "loss": 0.5551, + "step": 7892 + }, + { + "epoch": 0.99, + "learning_rate": 2.976987792006058e-07, + "loss": 0.5409, + "step": 7893 + }, + { + "epoch": 0.99, + "learning_rate": 2.907359809497523e-07, + "loss": 0.533, + "step": 7894 + }, + { + "epoch": 0.99, + "learning_rate": 2.8385555092053096e-07, + "loss": 0.646, + "step": 7895 + }, + { + "epoch": 0.99, + "learning_rate": 2.7705749024703464e-07, + "loss": 0.5385, + "step": 7896 + }, + { + "epoch": 0.99, + "learning_rate": 2.7034180004981146e-07, + "loss": 0.4281, + "step": 7897 + }, + { + "epoch": 0.99, + "learning_rate": 2.637084814358648e-07, + "loss": 0.0477, + "step": 7898 + }, + { + "epoch": 0.99, + "learning_rate": 2.5715753549854227e-07, + "loss": 0.4658, + "step": 7899 + }, + { + "epoch": 0.99, + "learning_rate": 2.506889633176468e-07, + "loss": 0.5637, + "step": 7900 + }, + { + "epoch": 0.99, + "learning_rate": 2.443027659594921e-07, + "loss": 0.5946, + "step": 7901 + }, + { + "epoch": 0.99, + "learning_rate": 2.3799894447662506e-07, + "loss": 0.4062, + "step": 7902 + }, + { + "epoch": 0.99, + "learning_rate": 2.3177749990815898e-07, + "loss": 0.5251, + "step": 7903 + }, + { + "epoch": 0.99, + "learning_rate": 2.2563843327960688e-07, + "loss": 0.5648, + "step": 7904 + }, + { + "epoch": 0.99, + "learning_rate": 2.1958174560282595e-07, + "loss": 0.4984, + "step": 7905 + }, + { + "epoch": 0.99, + "learning_rate": 2.1360743787623982e-07, + "loss": 0.444, + "step": 7906 + }, + { + "epoch": 0.99, + "learning_rate": 2.0771551108450525e-07, + "loss": 0.4882, + "step": 7907 + }, + { + "epoch": 0.99, + "learning_rate": 2.019059661989009e-07, + "loss": 0.4939, + "step": 7908 + }, + { + "epoch": 0.99, + "learning_rate": 1.9617880417699408e-07, + "loss": 0.488, + "step": 7909 + }, + { + "epoch": 0.99, + "learning_rate": 1.9053402596275193e-07, + "loss": 0.504, + "step": 7910 + }, + { + "epoch": 0.99, + "learning_rate": 1.849716324867079e-07, + "loss": 0.5244, + "step": 7911 + }, + { + "epoch": 0.99, + "learning_rate": 1.7949162466568414e-07, + "loss": 0.4855, + "step": 7912 + }, + { + "epoch": 0.99, + "learning_rate": 1.740940034029026e-07, + "loss": 0.5443, + "step": 7913 + }, + { + "epoch": 0.99, + "learning_rate": 1.6877876958815153e-07, + "loss": 0.6101, + "step": 7914 + }, + { + "epoch": 0.99, + "learning_rate": 1.635459240975079e-07, + "loss": 0.4833, + "step": 7915 + }, + { + "epoch": 0.99, + "learning_rate": 1.583954677935595e-07, + "loss": 0.4698, + "step": 7916 + }, + { + "epoch": 0.99, + "learning_rate": 1.5332740152518288e-07, + "loss": 0.5421, + "step": 7917 + }, + { + "epoch": 0.99, + "learning_rate": 1.4834172612787633e-07, + "loss": 0.5222, + "step": 7918 + }, + { + "epoch": 0.99, + "learning_rate": 1.4343844242331594e-07, + "loss": 0.5052, + "step": 7919 + }, + { + "epoch": 0.99, + "learning_rate": 1.386175512197996e-07, + "loss": 0.6548, + "step": 7920 + }, + { + "epoch": 0.99, + "learning_rate": 1.3387905331191386e-07, + "loss": 0.5387, + "step": 7921 + }, + { + "epoch": 0.99, + "learning_rate": 1.292229494808117e-07, + "loss": 0.5176, + "step": 7922 + }, + { + "epoch": 0.99, + "learning_rate": 1.2464924049387928e-07, + "loss": 0.6306, + "step": 7923 + }, + { + "epoch": 0.99, + "learning_rate": 1.2015792710501349e-07, + "loss": 0.5841, + "step": 7924 + }, + { + "epoch": 0.99, + "learning_rate": 1.1574901005456661e-07, + "loss": 0.5014, + "step": 7925 + }, + { + "epoch": 0.99, + "learning_rate": 1.1142249006929061e-07, + "loss": 0.4728, + "step": 7926 + }, + { + "epoch": 0.99, + "learning_rate": 1.0717836786228174e-07, + "loss": 0.5928, + "step": 7927 + }, + { + "epoch": 0.99, + "learning_rate": 1.0301664413309153e-07, + "loss": 0.501, + "step": 7928 + }, + { + "epoch": 0.99, + "learning_rate": 9.893731956778229e-08, + "loss": 0.5613, + "step": 7929 + }, + { + "epoch": 0.99, + "learning_rate": 9.494039483870509e-08, + "loss": 0.0475, + "step": 7930 + }, + { + "epoch": 0.99, + "learning_rate": 9.102587060472178e-08, + "loss": 0.0476, + "step": 7931 + }, + { + "epoch": 0.99, + "learning_rate": 8.719374751098296e-08, + "loss": 0.5878, + "step": 7932 + }, + { + "epoch": 0.99, + "learning_rate": 8.344402618926106e-08, + "loss": 0.7438, + "step": 7933 + }, + { + "epoch": 0.99, + "learning_rate": 7.97767072575617e-08, + "loss": 0.5001, + "step": 7934 + }, + { + "epoch": 0.99, + "learning_rate": 7.619179132040132e-08, + "loss": 0.6372, + "step": 7935 + }, + { + "epoch": 0.99, + "learning_rate": 7.268927896869615e-08, + "loss": 0.5803, + "step": 7936 + }, + { + "epoch": 0.99, + "learning_rate": 6.926917077976214e-08, + "loss": 0.6897, + "step": 7937 + }, + { + "epoch": 0.99, + "learning_rate": 6.593146731737054e-08, + "loss": 0.6082, + "step": 7938 + }, + { + "epoch": 1.0, + "learning_rate": 6.267616913158136e-08, + "loss": 0.5822, + "step": 7939 + }, + { + "epoch": 1.0, + "learning_rate": 5.9503276759076406e-08, + "loss": 0.4899, + "step": 7940 + }, + { + "epoch": 1.0, + "learning_rate": 5.6412790722826236e-08, + "loss": 0.613, + "step": 7941 + }, + { + "epoch": 1.0, + "learning_rate": 5.3404711532256675e-08, + "loss": 0.5228, + "step": 7942 + }, + { + "epoch": 1.0, + "learning_rate": 5.047903968319334e-08, + "loss": 0.5957, + "step": 7943 + }, + { + "epoch": 1.0, + "learning_rate": 4.763577565780608e-08, + "loss": 0.4762, + "step": 7944 + }, + { + "epoch": 1.0, + "learning_rate": 4.487491992483106e-08, + "loss": 0.5176, + "step": 7945 + }, + { + "epoch": 1.0, + "learning_rate": 4.2196472939348694e-08, + "loss": 0.479, + "step": 7946 + }, + { + "epoch": 1.0, + "learning_rate": 3.9600435142839176e-08, + "loss": 0.5626, + "step": 7947 + }, + { + "epoch": 1.0, + "learning_rate": 3.7086806963237964e-08, + "loss": 0.5175, + "step": 7948 + }, + { + "epoch": 1.0, + "learning_rate": 3.465558881476927e-08, + "loss": 0.5612, + "step": 7949 + }, + { + "epoch": 1.0, + "learning_rate": 3.2306781098279114e-08, + "loss": 0.5146, + "step": 7950 + }, + { + "epoch": 1.0, + "learning_rate": 3.004038420095778e-08, + "loss": 0.3992, + "step": 7951 + }, + { + "epoch": 1.0, + "learning_rate": 2.7856398496228785e-08, + "loss": 0.5352, + "step": 7952 + }, + { + "epoch": 1.0, + "learning_rate": 2.5754824344248472e-08, + "loss": 0.6089, + "step": 7953 + }, + { + "epoch": 1.0, + "learning_rate": 2.3735662091350918e-08, + "loss": 0.5491, + "step": 7954 + }, + { + "epoch": 1.0, + "learning_rate": 2.179891207032547e-08, + "loss": 0.5558, + "step": 7955 + }, + { + "epoch": 1.0, + "learning_rate": 1.9944574600472275e-08, + "loss": 0.4954, + "step": 7956 + }, + { + "epoch": 1.0, + "learning_rate": 1.817264998738022e-08, + "loss": 0.5411, + "step": 7957 + }, + { + "epoch": 1.0, + "learning_rate": 1.648313852320449e-08, + "loss": 0.5078, + "step": 7958 + }, + { + "epoch": 1.0, + "learning_rate": 1.4876040486333509e-08, + "loss": 0.6061, + "step": 7959 + }, + { + "epoch": 1.0, + "learning_rate": 1.3351356141777515e-08, + "loss": 0.5858, + "step": 7960 + }, + { + "epoch": 1.0, + "learning_rate": 1.190908574077998e-08, + "loss": 0.5497, + "step": 7961 + }, + { + "epoch": 1.0, + "learning_rate": 1.054922952109516e-08, + "loss": 0.4708, + "step": 7962 + }, + { + "epoch": 1.0, + "learning_rate": 9.271787706877089e-09, + "loss": 0.5343, + "step": 7963 + }, + { + "epoch": 1.0, + "learning_rate": 8.076760508624048e-09, + "loss": 0.5408, + "step": 7964 + }, + { + "epoch": 1.0, + "learning_rate": 6.9641481234006264e-09, + "loss": 0.4531, + "step": 7965 + }, + { + "epoch": 1.0, + "learning_rate": 5.933950734615668e-09, + "loss": 0.5758, + "step": 7966 + }, + { + "epoch": 1.0, + "learning_rate": 4.98616851196676e-09, + "loss": 0.5009, + "step": 7967 + }, + { + "epoch": 1.0, + "learning_rate": 4.120801611828817e-09, + "loss": 0.6803, + "step": 7968 + }, + { + "epoch": 1.0, + "learning_rate": 3.3378501766989645e-09, + "loss": 0.5134, + "step": 7969 + }, + { + "epoch": 1.0, + "learning_rate": 2.6373143356961393e-09, + "loss": 0.567, + "step": 7970 + }, + { + "epoch": 1.0, + "learning_rate": 2.0191942042835366e-09, + "loss": 0.5302, + "step": 7971 + }, + { + "epoch": 1.0, + "learning_rate": 1.4834898843796297e-09, + "loss": 0.741, + "step": 7972 + }, + { + "epoch": 1.0, + "learning_rate": 1.030201464247149e-09, + "loss": 0.4818, + "step": 7973 + }, + { + "epoch": 1.0, + "learning_rate": 6.593290186596157e-10, + "loss": 0.6036, + "step": 7974 + }, + { + "epoch": 1.0, + "learning_rate": 3.708726086237846e-10, + "loss": 0.4231, + "step": 7975 + }, + { + "epoch": 1.0, + "learning_rate": 1.6483228182373466e-10, + "loss": 0.5962, + "step": 7976 + }, + { + "epoch": 1.0, + "learning_rate": 4.120807217677935e-11, + "loss": 0.2431, + "step": 7977 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.1664, + "step": 7978 + }, + { + "epoch": 1.0, + "step": 7978, + "total_flos": 2.3543594708946125e+17, + "train_loss": 0.34777136033087575, + "train_runtime": 39013.2843, + "train_samples_per_second": 26.174, + "train_steps_per_second": 0.204 + } + ], + "logging_steps": 1.0, + "max_steps": 7978, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 2.3543594708946125e+17, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}