diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,105559 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9997441800972116, + "global_step": 17589, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 3.787878787878788e-08, + "loss": 0.9154, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 7.575757575757576e-08, + "loss": 0.8746, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.1363636363636364e-07, + "loss": 0.8803, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 1.5151515151515152e-07, + "loss": 0.9134, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 1.893939393939394e-07, + "loss": 0.8956, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 2.2727272727272729e-07, + "loss": 0.9051, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 2.651515151515152e-07, + "loss": 0.9207, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 3.0303030303030305e-07, + "loss": 0.9017, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 3.409090909090909e-07, + "loss": 0.918, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 3.787878787878788e-07, + "loss": 0.9073, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.1666666666666667e-07, + "loss": 0.8966, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 4.5454545454545457e-07, + "loss": 0.869, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 4.924242424242425e-07, + "loss": 0.9353, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 5.303030303030304e-07, + "loss": 0.8738, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 5.681818181818182e-07, + "loss": 0.8989, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 6.060606060606061e-07, + "loss": 0.8742, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 6.43939393939394e-07, + "loss": 0.8959, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 6.818181818181818e-07, + "loss": 0.8664, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 7.196969696969698e-07, + "loss": 0.8564, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 7.575757575757576e-07, + "loss": 0.8495, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 7.954545454545455e-07, + "loss": 0.8599, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 8.333333333333333e-07, + "loss": 0.8252, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 8.712121212121213e-07, + "loss": 0.7966, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 9.090909090909091e-07, + "loss": 0.8095, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 9.469696969696971e-07, + "loss": 0.8124, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 9.84848484848485e-07, + "loss": 0.805, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 1.0227272727272729e-06, + "loss": 0.7896, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 1.0606060606060608e-06, + "loss": 0.808, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 1.0984848484848485e-06, + "loss": 0.8087, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 1.1363636363636364e-06, + "loss": 0.7526, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 1.1742424242424245e-06, + "loss": 0.795, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 1.2121212121212122e-06, + "loss": 0.7064, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 1.25e-06, + "loss": 0.7588, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 1.287878787878788e-06, + "loss": 0.7093, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 1.3257575757575757e-06, + "loss": 0.7257, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 1.3636363636363636e-06, + "loss": 0.7528, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 1.4015151515151515e-06, + "loss": 0.725, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 1.4393939393939396e-06, + "loss": 0.762, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 1.4772727272727275e-06, + "loss": 0.6899, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 1.5151515151515152e-06, + "loss": 0.7211, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 1.5530303030303032e-06, + "loss": 0.6943, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 1.590909090909091e-06, + "loss": 0.7212, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 1.628787878787879e-06, + "loss": 0.6788, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 1.6666666666666667e-06, + "loss": 0.6887, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 1.7045454545454546e-06, + "loss": 0.6736, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 1.7424242424242427e-06, + "loss": 0.723, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 1.7803030303030306e-06, + "loss": 0.6948, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 1.8181818181818183e-06, + "loss": 0.7479, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 1.8560606060606062e-06, + "loss": 0.6968, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 1.8939393939393941e-06, + "loss": 0.6589, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 1.931818181818182e-06, + "loss": 0.6641, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 1.96969696969697e-06, + "loss": 0.7071, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 2.0075757575757576e-06, + "loss": 0.7267, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 2.0454545454545457e-06, + "loss": 0.7085, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 2.0833333333333334e-06, + "loss": 0.6772, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 2.1212121212121216e-06, + "loss": 0.6667, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 2.1590909090909092e-06, + "loss": 0.6579, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 2.196969696969697e-06, + "loss": 0.6723, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 2.234848484848485e-06, + "loss": 0.6919, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 2.2727272727272728e-06, + "loss": 0.6292, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 2.310606060606061e-06, + "loss": 0.6369, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 2.348484848484849e-06, + "loss": 0.6878, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 2.3863636363636367e-06, + "loss": 0.6514, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 2.4242424242424244e-06, + "loss": 0.687, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 2.4621212121212125e-06, + "loss": 0.6408, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-06, + "loss": 0.6012, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 2.537878787878788e-06, + "loss": 0.702, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 2.575757575757576e-06, + "loss": 0.6979, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 2.6136363636363637e-06, + "loss": 0.6188, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 2.6515151515151514e-06, + "loss": 0.6369, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 2.6893939393939395e-06, + "loss": 0.6311, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 2.7272727272727272e-06, + "loss": 0.6379, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 2.7651515151515153e-06, + "loss": 0.6421, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 2.803030303030303e-06, + "loss": 0.6591, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 2.8409090909090916e-06, + "loss": 0.678, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 2.8787878787878793e-06, + "loss": 0.6911, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 2.916666666666667e-06, + "loss": 0.662, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 2.954545454545455e-06, + "loss": 0.6495, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 2.992424242424243e-06, + "loss": 0.6641, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 3.0303030303030305e-06, + "loss": 0.6874, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 3.0681818181818186e-06, + "loss": 0.6774, + "step": 81 + }, + { + "epoch": 0.01, + "learning_rate": 3.1060606060606063e-06, + "loss": 0.6677, + "step": 82 + }, + { + "epoch": 0.01, + "learning_rate": 3.143939393939394e-06, + "loss": 0.6551, + "step": 83 + }, + { + "epoch": 0.01, + "learning_rate": 3.181818181818182e-06, + "loss": 0.6054, + "step": 84 + }, + { + "epoch": 0.01, + "learning_rate": 3.21969696969697e-06, + "loss": 0.6411, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 3.257575757575758e-06, + "loss": 0.6709, + "step": 86 + }, + { + "epoch": 0.01, + "learning_rate": 3.2954545454545456e-06, + "loss": 0.6131, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 3.3333333333333333e-06, + "loss": 0.6428, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 3.3712121212121214e-06, + "loss": 0.636, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 3.409090909090909e-06, + "loss": 0.654, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 3.4469696969696977e-06, + "loss": 0.6597, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 3.4848484848484854e-06, + "loss": 0.6141, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 3.522727272727273e-06, + "loss": 0.6818, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 3.560606060606061e-06, + "loss": 0.6565, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 3.598484848484849e-06, + "loss": 0.6208, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 3.6363636363636366e-06, + "loss": 0.6454, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 3.6742424242424247e-06, + "loss": 0.6867, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 3.7121212121212124e-06, + "loss": 0.6045, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.6514, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 3.7878787878787882e-06, + "loss": 0.6208, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 3.825757575757576e-06, + "loss": 0.6416, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 3.863636363636364e-06, + "loss": 0.6316, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 3.901515151515151e-06, + "loss": 0.6256, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 3.93939393939394e-06, + "loss": 0.6058, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 3.9772727272727275e-06, + "loss": 0.601, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 4.015151515151515e-06, + "loss": 0.6216, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 4.053030303030303e-06, + "loss": 0.6265, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 4.0909090909090915e-06, + "loss": 0.634, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 4.128787878787879e-06, + "loss": 0.6333, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 4.166666666666667e-06, + "loss": 0.6249, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 4.204545454545455e-06, + "loss": 0.6054, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 4.242424242424243e-06, + "loss": 0.61, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 4.280303030303031e-06, + "loss": 0.6185, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 4.3181818181818185e-06, + "loss": 0.6326, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 4.356060606060606e-06, + "loss": 0.6754, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 4.393939393939394e-06, + "loss": 0.6414, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 4.4318181818181824e-06, + "loss": 0.6255, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 4.46969696969697e-06, + "loss": 0.6109, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 4.507575757575758e-06, + "loss": 0.6669, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 4.5454545454545455e-06, + "loss": 0.6022, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 4.583333333333333e-06, + "loss": 0.6511, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 4.621212121212122e-06, + "loss": 0.5916, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 4.6590909090909095e-06, + "loss": 0.6651, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 4.696969696969698e-06, + "loss": 0.6574, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 4.734848484848486e-06, + "loss": 0.6283, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 4.772727272727273e-06, + "loss": 0.5947, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 4.810606060606061e-06, + "loss": 0.6489, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 4.848484848484849e-06, + "loss": 0.6161, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 4.8863636363636365e-06, + "loss": 0.601, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 4.924242424242425e-06, + "loss": 0.6248, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 4.962121212121213e-06, + "loss": 0.5867, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 5e-06, + "loss": 0.5966, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 5.037878787878788e-06, + "loss": 0.5791, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 5.075757575757576e-06, + "loss": 0.6553, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 5.113636363636364e-06, + "loss": 0.6847, + "step": 135 + }, + { + "epoch": 0.02, + "learning_rate": 5.151515151515152e-06, + "loss": 0.6196, + "step": 136 + }, + { + "epoch": 0.02, + "learning_rate": 5.18939393939394e-06, + "loss": 0.6361, + "step": 137 + }, + { + "epoch": 0.02, + "learning_rate": 5.2272727272727274e-06, + "loss": 0.6258, + "step": 138 + }, + { + "epoch": 0.02, + "learning_rate": 5.265151515151515e-06, + "loss": 0.575, + "step": 139 + }, + { + "epoch": 0.02, + "learning_rate": 5.303030303030303e-06, + "loss": 0.6264, + "step": 140 + }, + { + "epoch": 0.02, + "learning_rate": 5.340909090909091e-06, + "loss": 0.5943, + "step": 141 + }, + { + "epoch": 0.02, + "learning_rate": 5.378787878787879e-06, + "loss": 0.6548, + "step": 142 + }, + { + "epoch": 0.02, + "learning_rate": 5.416666666666667e-06, + "loss": 0.6275, + "step": 143 + }, + { + "epoch": 0.02, + "learning_rate": 5.4545454545454545e-06, + "loss": 0.6434, + "step": 144 + }, + { + "epoch": 0.02, + "learning_rate": 5.492424242424242e-06, + "loss": 0.6291, + "step": 145 + }, + { + "epoch": 0.02, + "learning_rate": 5.530303030303031e-06, + "loss": 0.5959, + "step": 146 + }, + { + "epoch": 0.03, + "learning_rate": 5.568181818181818e-06, + "loss": 0.5677, + "step": 147 + }, + { + "epoch": 0.03, + "learning_rate": 5.606060606060606e-06, + "loss": 0.6108, + "step": 148 + }, + { + "epoch": 0.03, + "learning_rate": 5.643939393939395e-06, + "loss": 0.6497, + "step": 149 + }, + { + "epoch": 0.03, + "learning_rate": 5.681818181818183e-06, + "loss": 0.6413, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 5.719696969696971e-06, + "loss": 0.6326, + "step": 151 + }, + { + "epoch": 0.03, + "learning_rate": 5.7575757575757586e-06, + "loss": 0.5527, + "step": 152 + }, + { + "epoch": 0.03, + "learning_rate": 5.795454545454546e-06, + "loss": 0.6507, + "step": 153 + }, + { + "epoch": 0.03, + "learning_rate": 5.833333333333334e-06, + "loss": 0.5954, + "step": 154 + }, + { + "epoch": 0.03, + "learning_rate": 5.871212121212122e-06, + "loss": 0.6138, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 5.90909090909091e-06, + "loss": 0.6018, + "step": 156 + }, + { + "epoch": 0.03, + "learning_rate": 5.946969696969698e-06, + "loss": 0.6039, + "step": 157 + }, + { + "epoch": 0.03, + "learning_rate": 5.984848484848486e-06, + "loss": 0.6232, + "step": 158 + }, + { + "epoch": 0.03, + "learning_rate": 6.022727272727273e-06, + "loss": 0.6054, + "step": 159 + }, + { + "epoch": 0.03, + "learning_rate": 6.060606060606061e-06, + "loss": 0.5816, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 6.0984848484848495e-06, + "loss": 0.627, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 6.136363636363637e-06, + "loss": 0.6247, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 6.174242424242425e-06, + "loss": 0.5734, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 6.212121212121213e-06, + "loss": 0.6254, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 6.25e-06, + "loss": 0.5783, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 6.287878787878788e-06, + "loss": 0.6269, + "step": 166 + }, + { + "epoch": 0.03, + "learning_rate": 6.3257575757575765e-06, + "loss": 0.5981, + "step": 167 + }, + { + "epoch": 0.03, + "learning_rate": 6.363636363636364e-06, + "loss": 0.6063, + "step": 168 + }, + { + "epoch": 0.03, + "learning_rate": 6.401515151515152e-06, + "loss": 0.5895, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 6.43939393939394e-06, + "loss": 0.6271, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 6.477272727272727e-06, + "loss": 0.6493, + "step": 171 + }, + { + "epoch": 0.03, + "learning_rate": 6.515151515151516e-06, + "loss": 0.6248, + "step": 172 + }, + { + "epoch": 0.03, + "learning_rate": 6.5530303030303036e-06, + "loss": 0.6146, + "step": 173 + }, + { + "epoch": 0.03, + "learning_rate": 6.590909090909091e-06, + "loss": 0.6691, + "step": 174 + }, + { + "epoch": 0.03, + "learning_rate": 6.628787878787879e-06, + "loss": 0.6246, + "step": 175 + }, + { + "epoch": 0.03, + "learning_rate": 6.666666666666667e-06, + "loss": 0.6074, + "step": 176 + }, + { + "epoch": 0.03, + "learning_rate": 6.704545454545454e-06, + "loss": 0.6203, + "step": 177 + }, + { + "epoch": 0.03, + "learning_rate": 6.742424242424243e-06, + "loss": 0.6181, + "step": 178 + }, + { + "epoch": 0.03, + "learning_rate": 6.780303030303031e-06, + "loss": 0.6435, + "step": 179 + }, + { + "epoch": 0.03, + "learning_rate": 6.818181818181818e-06, + "loss": 0.6201, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 6.856060606060606e-06, + "loss": 0.6015, + "step": 181 + }, + { + "epoch": 0.03, + "learning_rate": 6.893939393939395e-06, + "loss": 0.6271, + "step": 182 + }, + { + "epoch": 0.03, + "learning_rate": 6.931818181818183e-06, + "loss": 0.623, + "step": 183 + }, + { + "epoch": 0.03, + "learning_rate": 6.969696969696971e-06, + "loss": 0.5925, + "step": 184 + }, + { + "epoch": 0.03, + "learning_rate": 7.0075757575757585e-06, + "loss": 0.6148, + "step": 185 + }, + { + "epoch": 0.03, + "learning_rate": 7.045454545454546e-06, + "loss": 0.5997, + "step": 186 + }, + { + "epoch": 0.03, + "learning_rate": 7.083333333333335e-06, + "loss": 0.5767, + "step": 187 + }, + { + "epoch": 0.03, + "learning_rate": 7.121212121212122e-06, + "loss": 0.5987, + "step": 188 + }, + { + "epoch": 0.03, + "learning_rate": 7.15909090909091e-06, + "loss": 0.6163, + "step": 189 + }, + { + "epoch": 0.03, + "learning_rate": 7.196969696969698e-06, + "loss": 0.5867, + "step": 190 + }, + { + "epoch": 0.03, + "learning_rate": 7.2348484848484855e-06, + "loss": 0.6031, + "step": 191 + }, + { + "epoch": 0.03, + "learning_rate": 7.272727272727273e-06, + "loss": 0.6061, + "step": 192 + }, + { + "epoch": 0.03, + "learning_rate": 7.310606060606062e-06, + "loss": 0.6452, + "step": 193 + }, + { + "epoch": 0.03, + "learning_rate": 7.348484848484849e-06, + "loss": 0.5981, + "step": 194 + }, + { + "epoch": 0.03, + "learning_rate": 7.386363636363637e-06, + "loss": 0.5982, + "step": 195 + }, + { + "epoch": 0.03, + "learning_rate": 7.424242424242425e-06, + "loss": 0.6044, + "step": 196 + }, + { + "epoch": 0.03, + "learning_rate": 7.4621212121212125e-06, + "loss": 0.6062, + "step": 197 + }, + { + "epoch": 0.03, + "learning_rate": 7.500000000000001e-06, + "loss": 0.6195, + "step": 198 + }, + { + "epoch": 0.03, + "learning_rate": 7.537878787878789e-06, + "loss": 0.5454, + "step": 199 + }, + { + "epoch": 0.03, + "learning_rate": 7.5757575757575764e-06, + "loss": 0.6198, + "step": 200 + }, + { + "epoch": 0.03, + "learning_rate": 7.613636363636364e-06, + "loss": 0.5853, + "step": 201 + }, + { + "epoch": 0.03, + "learning_rate": 7.651515151515152e-06, + "loss": 0.5772, + "step": 202 + }, + { + "epoch": 0.03, + "learning_rate": 7.68939393939394e-06, + "loss": 0.5885, + "step": 203 + }, + { + "epoch": 0.03, + "learning_rate": 7.727272727272727e-06, + "loss": 0.6359, + "step": 204 + }, + { + "epoch": 0.03, + "learning_rate": 7.765151515151516e-06, + "loss": 0.5704, + "step": 205 + }, + { + "epoch": 0.04, + "learning_rate": 7.803030303030303e-06, + "loss": 0.5818, + "step": 206 + }, + { + "epoch": 0.04, + "learning_rate": 7.840909090909091e-06, + "loss": 0.6148, + "step": 207 + }, + { + "epoch": 0.04, + "learning_rate": 7.87878787878788e-06, + "loss": 0.6387, + "step": 208 + }, + { + "epoch": 0.04, + "learning_rate": 7.916666666666667e-06, + "loss": 0.6213, + "step": 209 + }, + { + "epoch": 0.04, + "learning_rate": 7.954545454545455e-06, + "loss": 0.6417, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 7.992424242424242e-06, + "loss": 0.6244, + "step": 211 + }, + { + "epoch": 0.04, + "learning_rate": 8.03030303030303e-06, + "loss": 0.6131, + "step": 212 + }, + { + "epoch": 0.04, + "learning_rate": 8.068181818181819e-06, + "loss": 0.6298, + "step": 213 + }, + { + "epoch": 0.04, + "learning_rate": 8.106060606060606e-06, + "loss": 0.6079, + "step": 214 + }, + { + "epoch": 0.04, + "learning_rate": 8.143939393939394e-06, + "loss": 0.6119, + "step": 215 + }, + { + "epoch": 0.04, + "learning_rate": 8.181818181818183e-06, + "loss": 0.604, + "step": 216 + }, + { + "epoch": 0.04, + "learning_rate": 8.219696969696971e-06, + "loss": 0.6254, + "step": 217 + }, + { + "epoch": 0.04, + "learning_rate": 8.257575757575758e-06, + "loss": 0.5895, + "step": 218 + }, + { + "epoch": 0.04, + "learning_rate": 8.295454545454547e-06, + "loss": 0.5849, + "step": 219 + }, + { + "epoch": 0.04, + "learning_rate": 8.333333333333334e-06, + "loss": 0.6211, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 8.371212121212122e-06, + "loss": 0.6407, + "step": 221 + }, + { + "epoch": 0.04, + "learning_rate": 8.40909090909091e-06, + "loss": 0.6015, + "step": 222 + }, + { + "epoch": 0.04, + "learning_rate": 8.446969696969698e-06, + "loss": 0.6046, + "step": 223 + }, + { + "epoch": 0.04, + "learning_rate": 8.484848484848486e-06, + "loss": 0.5946, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 8.522727272727273e-06, + "loss": 0.6535, + "step": 225 + }, + { + "epoch": 0.04, + "learning_rate": 8.560606060606062e-06, + "loss": 0.6668, + "step": 226 + }, + { + "epoch": 0.04, + "learning_rate": 8.59848484848485e-06, + "loss": 0.603, + "step": 227 + }, + { + "epoch": 0.04, + "learning_rate": 8.636363636363637e-06, + "loss": 0.6458, + "step": 228 + }, + { + "epoch": 0.04, + "learning_rate": 8.674242424242426e-06, + "loss": 0.6232, + "step": 229 + }, + { + "epoch": 0.04, + "learning_rate": 8.712121212121212e-06, + "loss": 0.6314, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 8.750000000000001e-06, + "loss": 0.5853, + "step": 231 + }, + { + "epoch": 0.04, + "learning_rate": 8.787878787878788e-06, + "loss": 0.562, + "step": 232 + }, + { + "epoch": 0.04, + "learning_rate": 8.825757575757576e-06, + "loss": 0.6212, + "step": 233 + }, + { + "epoch": 0.04, + "learning_rate": 8.863636363636365e-06, + "loss": 0.5966, + "step": 234 + }, + { + "epoch": 0.04, + "learning_rate": 8.901515151515152e-06, + "loss": 0.5904, + "step": 235 + }, + { + "epoch": 0.04, + "learning_rate": 8.93939393939394e-06, + "loss": 0.6004, + "step": 236 + }, + { + "epoch": 0.04, + "learning_rate": 8.977272727272727e-06, + "loss": 0.6159, + "step": 237 + }, + { + "epoch": 0.04, + "learning_rate": 9.015151515151516e-06, + "loss": 0.6263, + "step": 238 + }, + { + "epoch": 0.04, + "learning_rate": 9.053030303030304e-06, + "loss": 0.5734, + "step": 239 + }, + { + "epoch": 0.04, + "learning_rate": 9.090909090909091e-06, + "loss": 0.5822, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 9.12878787878788e-06, + "loss": 0.6085, + "step": 241 + }, + { + "epoch": 0.04, + "learning_rate": 9.166666666666666e-06, + "loss": 0.6283, + "step": 242 + }, + { + "epoch": 0.04, + "learning_rate": 9.204545454545455e-06, + "loss": 0.5782, + "step": 243 + }, + { + "epoch": 0.04, + "learning_rate": 9.242424242424244e-06, + "loss": 0.5689, + "step": 244 + }, + { + "epoch": 0.04, + "learning_rate": 9.28030303030303e-06, + "loss": 0.5978, + "step": 245 + }, + { + "epoch": 0.04, + "learning_rate": 9.318181818181819e-06, + "loss": 0.5881, + "step": 246 + }, + { + "epoch": 0.04, + "learning_rate": 9.356060606060606e-06, + "loss": 0.5546, + "step": 247 + }, + { + "epoch": 0.04, + "learning_rate": 9.393939393939396e-06, + "loss": 0.6173, + "step": 248 + }, + { + "epoch": 0.04, + "learning_rate": 9.431818181818183e-06, + "loss": 0.6234, + "step": 249 + }, + { + "epoch": 0.04, + "learning_rate": 9.469696969696971e-06, + "loss": 0.609, + "step": 250 + }, + { + "epoch": 0.04, + "learning_rate": 9.507575757575758e-06, + "loss": 0.5703, + "step": 251 + }, + { + "epoch": 0.04, + "learning_rate": 9.545454545454547e-06, + "loss": 0.6183, + "step": 252 + }, + { + "epoch": 0.04, + "learning_rate": 9.583333333333335e-06, + "loss": 0.6105, + "step": 253 + }, + { + "epoch": 0.04, + "learning_rate": 9.621212121212122e-06, + "loss": 0.5873, + "step": 254 + }, + { + "epoch": 0.04, + "learning_rate": 9.65909090909091e-06, + "loss": 0.6023, + "step": 255 + }, + { + "epoch": 0.04, + "learning_rate": 9.696969696969698e-06, + "loss": 0.591, + "step": 256 + }, + { + "epoch": 0.04, + "learning_rate": 9.734848484848486e-06, + "loss": 0.5687, + "step": 257 + }, + { + "epoch": 0.04, + "learning_rate": 9.772727272727273e-06, + "loss": 0.5511, + "step": 258 + }, + { + "epoch": 0.04, + "learning_rate": 9.810606060606061e-06, + "loss": 0.5804, + "step": 259 + }, + { + "epoch": 0.04, + "learning_rate": 9.84848484848485e-06, + "loss": 0.5992, + "step": 260 + }, + { + "epoch": 0.04, + "learning_rate": 9.886363636363637e-06, + "loss": 0.6332, + "step": 261 + }, + { + "epoch": 0.04, + "learning_rate": 9.924242424242425e-06, + "loss": 0.6116, + "step": 262 + }, + { + "epoch": 0.04, + "learning_rate": 9.962121212121212e-06, + "loss": 0.6207, + "step": 263 + }, + { + "epoch": 0.05, + "learning_rate": 1e-05, + "loss": 0.5899, + "step": 264 + }, + { + "epoch": 0.05, + "learning_rate": 1.003787878787879e-05, + "loss": 0.5903, + "step": 265 + }, + { + "epoch": 0.05, + "learning_rate": 1.0075757575757576e-05, + "loss": 0.6072, + "step": 266 + }, + { + "epoch": 0.05, + "learning_rate": 1.0113636363636365e-05, + "loss": 0.6019, + "step": 267 + }, + { + "epoch": 0.05, + "learning_rate": 1.0151515151515152e-05, + "loss": 0.6349, + "step": 268 + }, + { + "epoch": 0.05, + "learning_rate": 1.0189393939393942e-05, + "loss": 0.6085, + "step": 269 + }, + { + "epoch": 0.05, + "learning_rate": 1.0227272727272729e-05, + "loss": 0.5699, + "step": 270 + }, + { + "epoch": 0.05, + "learning_rate": 1.0265151515151517e-05, + "loss": 0.6052, + "step": 271 + }, + { + "epoch": 0.05, + "learning_rate": 1.0303030303030304e-05, + "loss": 0.6101, + "step": 272 + }, + { + "epoch": 0.05, + "learning_rate": 1.0340909090909093e-05, + "loss": 0.5978, + "step": 273 + }, + { + "epoch": 0.05, + "learning_rate": 1.037878787878788e-05, + "loss": 0.5923, + "step": 274 + }, + { + "epoch": 0.05, + "learning_rate": 1.0416666666666668e-05, + "loss": 0.5991, + "step": 275 + }, + { + "epoch": 0.05, + "learning_rate": 1.0454545454545455e-05, + "loss": 0.5717, + "step": 276 + }, + { + "epoch": 0.05, + "learning_rate": 1.0492424242424243e-05, + "loss": 0.5954, + "step": 277 + }, + { + "epoch": 0.05, + "learning_rate": 1.053030303030303e-05, + "loss": 0.5995, + "step": 278 + }, + { + "epoch": 0.05, + "learning_rate": 1.056818181818182e-05, + "loss": 0.5584, + "step": 279 + }, + { + "epoch": 0.05, + "learning_rate": 1.0606060606060606e-05, + "loss": 0.5698, + "step": 280 + }, + { + "epoch": 0.05, + "learning_rate": 1.0643939393939396e-05, + "loss": 0.567, + "step": 281 + }, + { + "epoch": 0.05, + "learning_rate": 1.0681818181818183e-05, + "loss": 0.6304, + "step": 282 + }, + { + "epoch": 0.05, + "learning_rate": 1.0719696969696971e-05, + "loss": 0.578, + "step": 283 + }, + { + "epoch": 0.05, + "learning_rate": 1.0757575757575758e-05, + "loss": 0.6327, + "step": 284 + }, + { + "epoch": 0.05, + "learning_rate": 1.0795454545454547e-05, + "loss": 0.6012, + "step": 285 + }, + { + "epoch": 0.05, + "learning_rate": 1.0833333333333334e-05, + "loss": 0.6461, + "step": 286 + }, + { + "epoch": 0.05, + "learning_rate": 1.0871212121212122e-05, + "loss": 0.655, + "step": 287 + }, + { + "epoch": 0.05, + "learning_rate": 1.0909090909090909e-05, + "loss": 0.6305, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 1.0946969696969697e-05, + "loss": 0.615, + "step": 289 + }, + { + "epoch": 0.05, + "learning_rate": 1.0984848484848484e-05, + "loss": 0.5968, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 1.1022727272727275e-05, + "loss": 0.5776, + "step": 291 + }, + { + "epoch": 0.05, + "learning_rate": 1.1060606060606061e-05, + "loss": 0.609, + "step": 292 + }, + { + "epoch": 0.05, + "learning_rate": 1.109848484848485e-05, + "loss": 0.5931, + "step": 293 + }, + { + "epoch": 0.05, + "learning_rate": 1.1136363636363637e-05, + "loss": 0.566, + "step": 294 + }, + { + "epoch": 0.05, + "learning_rate": 1.1174242424242425e-05, + "loss": 0.6001, + "step": 295 + }, + { + "epoch": 0.05, + "learning_rate": 1.1212121212121212e-05, + "loss": 0.6429, + "step": 296 + }, + { + "epoch": 0.05, + "learning_rate": 1.125e-05, + "loss": 0.5726, + "step": 297 + }, + { + "epoch": 0.05, + "learning_rate": 1.128787878787879e-05, + "loss": 0.6158, + "step": 298 + }, + { + "epoch": 0.05, + "learning_rate": 1.1325757575757576e-05, + "loss": 0.5792, + "step": 299 + }, + { + "epoch": 0.05, + "learning_rate": 1.1363636363636366e-05, + "loss": 0.6314, + "step": 300 + }, + { + "epoch": 0.05, + "learning_rate": 1.1401515151515151e-05, + "loss": 0.5608, + "step": 301 + }, + { + "epoch": 0.05, + "learning_rate": 1.1439393939393942e-05, + "loss": 0.5685, + "step": 302 + }, + { + "epoch": 0.05, + "learning_rate": 1.1477272727272729e-05, + "loss": 0.624, + "step": 303 + }, + { + "epoch": 0.05, + "learning_rate": 1.1515151515151517e-05, + "loss": 0.6147, + "step": 304 + }, + { + "epoch": 0.05, + "learning_rate": 1.1553030303030304e-05, + "loss": 0.6071, + "step": 305 + }, + { + "epoch": 0.05, + "learning_rate": 1.1590909090909093e-05, + "loss": 0.6129, + "step": 306 + }, + { + "epoch": 0.05, + "learning_rate": 1.162878787878788e-05, + "loss": 0.5789, + "step": 307 + }, + { + "epoch": 0.05, + "learning_rate": 1.1666666666666668e-05, + "loss": 0.5922, + "step": 308 + }, + { + "epoch": 0.05, + "learning_rate": 1.1704545454545455e-05, + "loss": 0.5434, + "step": 309 + }, + { + "epoch": 0.05, + "learning_rate": 1.1742424242424243e-05, + "loss": 0.5978, + "step": 310 + }, + { + "epoch": 0.05, + "learning_rate": 1.178030303030303e-05, + "loss": 0.5931, + "step": 311 + }, + { + "epoch": 0.05, + "learning_rate": 1.181818181818182e-05, + "loss": 0.5848, + "step": 312 + }, + { + "epoch": 0.05, + "learning_rate": 1.1856060606060607e-05, + "loss": 0.626, + "step": 313 + }, + { + "epoch": 0.05, + "learning_rate": 1.1893939393939396e-05, + "loss": 0.6286, + "step": 314 + }, + { + "epoch": 0.05, + "learning_rate": 1.1931818181818183e-05, + "loss": 0.6182, + "step": 315 + }, + { + "epoch": 0.05, + "learning_rate": 1.1969696969696971e-05, + "loss": 0.6404, + "step": 316 + }, + { + "epoch": 0.05, + "learning_rate": 1.2007575757575758e-05, + "loss": 0.5997, + "step": 317 + }, + { + "epoch": 0.05, + "learning_rate": 1.2045454545454547e-05, + "loss": 0.5654, + "step": 318 + }, + { + "epoch": 0.05, + "learning_rate": 1.2083333333333333e-05, + "loss": 0.5628, + "step": 319 + }, + { + "epoch": 0.05, + "learning_rate": 1.2121212121212122e-05, + "loss": 0.5671, + "step": 320 + }, + { + "epoch": 0.05, + "learning_rate": 1.2159090909090909e-05, + "loss": 0.5767, + "step": 321 + }, + { + "epoch": 0.05, + "learning_rate": 1.2196969696969699e-05, + "loss": 0.5911, + "step": 322 + }, + { + "epoch": 0.06, + "learning_rate": 1.2234848484848484e-05, + "loss": 0.5949, + "step": 323 + }, + { + "epoch": 0.06, + "learning_rate": 1.2272727272727274e-05, + "loss": 0.5694, + "step": 324 + }, + { + "epoch": 0.06, + "learning_rate": 1.2310606060606061e-05, + "loss": 0.57, + "step": 325 + }, + { + "epoch": 0.06, + "learning_rate": 1.234848484848485e-05, + "loss": 0.5934, + "step": 326 + }, + { + "epoch": 0.06, + "learning_rate": 1.2386363636363637e-05, + "loss": 0.5732, + "step": 327 + }, + { + "epoch": 0.06, + "learning_rate": 1.2424242424242425e-05, + "loss": 0.6393, + "step": 328 + }, + { + "epoch": 0.06, + "learning_rate": 1.2462121212121212e-05, + "loss": 0.5827, + "step": 329 + }, + { + "epoch": 0.06, + "learning_rate": 1.25e-05, + "loss": 0.5367, + "step": 330 + }, + { + "epoch": 0.06, + "learning_rate": 1.2537878787878789e-05, + "loss": 0.5645, + "step": 331 + }, + { + "epoch": 0.06, + "learning_rate": 1.2575757575757576e-05, + "loss": 0.5551, + "step": 332 + }, + { + "epoch": 0.06, + "learning_rate": 1.2613636363636366e-05, + "loss": 0.6464, + "step": 333 + }, + { + "epoch": 0.06, + "learning_rate": 1.2651515151515153e-05, + "loss": 0.566, + "step": 334 + }, + { + "epoch": 0.06, + "learning_rate": 1.2689393939393942e-05, + "loss": 0.5936, + "step": 335 + }, + { + "epoch": 0.06, + "learning_rate": 1.2727272727272728e-05, + "loss": 0.6147, + "step": 336 + }, + { + "epoch": 0.06, + "learning_rate": 1.2765151515151517e-05, + "loss": 0.5874, + "step": 337 + }, + { + "epoch": 0.06, + "learning_rate": 1.2803030303030304e-05, + "loss": 0.6363, + "step": 338 + }, + { + "epoch": 0.06, + "learning_rate": 1.2840909090909092e-05, + "loss": 0.562, + "step": 339 + }, + { + "epoch": 0.06, + "learning_rate": 1.287878787878788e-05, + "loss": 0.6521, + "step": 340 + }, + { + "epoch": 0.06, + "learning_rate": 1.2916666666666668e-05, + "loss": 0.5872, + "step": 341 + }, + { + "epoch": 0.06, + "learning_rate": 1.2954545454545455e-05, + "loss": 0.5642, + "step": 342 + }, + { + "epoch": 0.06, + "learning_rate": 1.2992424242424245e-05, + "loss": 0.5514, + "step": 343 + }, + { + "epoch": 0.06, + "learning_rate": 1.3030303030303032e-05, + "loss": 0.6387, + "step": 344 + }, + { + "epoch": 0.06, + "learning_rate": 1.306818181818182e-05, + "loss": 0.6008, + "step": 345 + }, + { + "epoch": 0.06, + "learning_rate": 1.3106060606060607e-05, + "loss": 0.6015, + "step": 346 + }, + { + "epoch": 0.06, + "learning_rate": 1.3143939393939396e-05, + "loss": 0.57, + "step": 347 + }, + { + "epoch": 0.06, + "learning_rate": 1.3181818181818183e-05, + "loss": 0.585, + "step": 348 + }, + { + "epoch": 0.06, + "learning_rate": 1.3219696969696971e-05, + "loss": 0.5971, + "step": 349 + }, + { + "epoch": 0.06, + "learning_rate": 1.3257575757575758e-05, + "loss": 0.5959, + "step": 350 + }, + { + "epoch": 0.06, + "learning_rate": 1.3295454545454546e-05, + "loss": 0.6488, + "step": 351 + }, + { + "epoch": 0.06, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.5857, + "step": 352 + }, + { + "epoch": 0.06, + "learning_rate": 1.3371212121212122e-05, + "loss": 0.6323, + "step": 353 + }, + { + "epoch": 0.06, + "learning_rate": 1.3409090909090909e-05, + "loss": 0.6135, + "step": 354 + }, + { + "epoch": 0.06, + "learning_rate": 1.3446969696969699e-05, + "loss": 0.6354, + "step": 355 + }, + { + "epoch": 0.06, + "learning_rate": 1.3484848484848486e-05, + "loss": 0.5833, + "step": 356 + }, + { + "epoch": 0.06, + "learning_rate": 1.3522727272727274e-05, + "loss": 0.5976, + "step": 357 + }, + { + "epoch": 0.06, + "learning_rate": 1.3560606060606061e-05, + "loss": 0.6021, + "step": 358 + }, + { + "epoch": 0.06, + "learning_rate": 1.359848484848485e-05, + "loss": 0.5359, + "step": 359 + }, + { + "epoch": 0.06, + "learning_rate": 1.3636363636363637e-05, + "loss": 0.6215, + "step": 360 + }, + { + "epoch": 0.06, + "learning_rate": 1.3674242424242425e-05, + "loss": 0.5914, + "step": 361 + }, + { + "epoch": 0.06, + "learning_rate": 1.3712121212121212e-05, + "loss": 0.5997, + "step": 362 + }, + { + "epoch": 0.06, + "learning_rate": 1.375e-05, + "loss": 0.5876, + "step": 363 + }, + { + "epoch": 0.06, + "learning_rate": 1.378787878787879e-05, + "loss": 0.563, + "step": 364 + }, + { + "epoch": 0.06, + "learning_rate": 1.3825757575757578e-05, + "loss": 0.5925, + "step": 365 + }, + { + "epoch": 0.06, + "learning_rate": 1.3863636363636366e-05, + "loss": 0.597, + "step": 366 + }, + { + "epoch": 0.06, + "learning_rate": 1.3901515151515153e-05, + "loss": 0.5753, + "step": 367 + }, + { + "epoch": 0.06, + "learning_rate": 1.3939393939393942e-05, + "loss": 0.6087, + "step": 368 + }, + { + "epoch": 0.06, + "learning_rate": 1.3977272727272728e-05, + "loss": 0.6422, + "step": 369 + }, + { + "epoch": 0.06, + "learning_rate": 1.4015151515151517e-05, + "loss": 0.6614, + "step": 370 + }, + { + "epoch": 0.06, + "learning_rate": 1.4053030303030304e-05, + "loss": 0.5547, + "step": 371 + }, + { + "epoch": 0.06, + "learning_rate": 1.4090909090909092e-05, + "loss": 0.5686, + "step": 372 + }, + { + "epoch": 0.06, + "learning_rate": 1.4128787878787879e-05, + "loss": 0.6044, + "step": 373 + }, + { + "epoch": 0.06, + "learning_rate": 1.416666666666667e-05, + "loss": 0.5859, + "step": 374 + }, + { + "epoch": 0.06, + "learning_rate": 1.4204545454545455e-05, + "loss": 0.538, + "step": 375 + }, + { + "epoch": 0.06, + "learning_rate": 1.4242424242424245e-05, + "loss": 0.5645, + "step": 376 + }, + { + "epoch": 0.06, + "learning_rate": 1.4280303030303032e-05, + "loss": 0.5737, + "step": 377 + }, + { + "epoch": 0.06, + "learning_rate": 1.431818181818182e-05, + "loss": 0.6007, + "step": 378 + }, + { + "epoch": 0.06, + "learning_rate": 1.4356060606060607e-05, + "loss": 0.5767, + "step": 379 + }, + { + "epoch": 0.06, + "learning_rate": 1.4393939393939396e-05, + "loss": 0.5786, + "step": 380 + }, + { + "epoch": 0.06, + "learning_rate": 1.4431818181818182e-05, + "loss": 0.6216, + "step": 381 + }, + { + "epoch": 0.07, + "learning_rate": 1.4469696969696971e-05, + "loss": 0.5518, + "step": 382 + }, + { + "epoch": 0.07, + "learning_rate": 1.4507575757575758e-05, + "loss": 0.6123, + "step": 383 + }, + { + "epoch": 0.07, + "learning_rate": 1.4545454545454546e-05, + "loss": 0.6008, + "step": 384 + }, + { + "epoch": 0.07, + "learning_rate": 1.4583333333333333e-05, + "loss": 0.6235, + "step": 385 + }, + { + "epoch": 0.07, + "learning_rate": 1.4621212121212123e-05, + "loss": 0.5731, + "step": 386 + }, + { + "epoch": 0.07, + "learning_rate": 1.465909090909091e-05, + "loss": 0.6411, + "step": 387 + }, + { + "epoch": 0.07, + "learning_rate": 1.4696969696969699e-05, + "loss": 0.63, + "step": 388 + }, + { + "epoch": 0.07, + "learning_rate": 1.4734848484848486e-05, + "loss": 0.5725, + "step": 389 + }, + { + "epoch": 0.07, + "learning_rate": 1.4772727272727274e-05, + "loss": 0.6142, + "step": 390 + }, + { + "epoch": 0.07, + "learning_rate": 1.4810606060606061e-05, + "loss": 0.5612, + "step": 391 + }, + { + "epoch": 0.07, + "learning_rate": 1.484848484848485e-05, + "loss": 0.5745, + "step": 392 + }, + { + "epoch": 0.07, + "learning_rate": 1.4886363636363636e-05, + "loss": 0.6193, + "step": 393 + }, + { + "epoch": 0.07, + "learning_rate": 1.4924242424242425e-05, + "loss": 0.5601, + "step": 394 + }, + { + "epoch": 0.07, + "learning_rate": 1.4962121212121212e-05, + "loss": 0.5798, + "step": 395 + }, + { + "epoch": 0.07, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.5933, + "step": 396 + }, + { + "epoch": 0.07, + "learning_rate": 1.503787878787879e-05, + "loss": 0.5442, + "step": 397 + }, + { + "epoch": 0.07, + "learning_rate": 1.5075757575757577e-05, + "loss": 0.5934, + "step": 398 + }, + { + "epoch": 0.07, + "learning_rate": 1.5113636363636366e-05, + "loss": 0.6177, + "step": 399 + }, + { + "epoch": 0.07, + "learning_rate": 1.5151515151515153e-05, + "loss": 0.6411, + "step": 400 + }, + { + "epoch": 0.07, + "learning_rate": 1.5189393939393941e-05, + "loss": 0.5769, + "step": 401 + }, + { + "epoch": 0.07, + "learning_rate": 1.5227272727272728e-05, + "loss": 0.5869, + "step": 402 + }, + { + "epoch": 0.07, + "learning_rate": 1.5265151515151517e-05, + "loss": 0.5835, + "step": 403 + }, + { + "epoch": 0.07, + "learning_rate": 1.5303030303030304e-05, + "loss": 0.596, + "step": 404 + }, + { + "epoch": 0.07, + "learning_rate": 1.5340909090909094e-05, + "loss": 0.5937, + "step": 405 + }, + { + "epoch": 0.07, + "learning_rate": 1.537878787878788e-05, + "loss": 0.6014, + "step": 406 + }, + { + "epoch": 0.07, + "learning_rate": 1.5416666666666668e-05, + "loss": 0.6204, + "step": 407 + }, + { + "epoch": 0.07, + "learning_rate": 1.5454545454545454e-05, + "loss": 0.6271, + "step": 408 + }, + { + "epoch": 0.07, + "learning_rate": 1.5492424242424245e-05, + "loss": 0.6, + "step": 409 + }, + { + "epoch": 0.07, + "learning_rate": 1.553030303030303e-05, + "loss": 0.6267, + "step": 410 + }, + { + "epoch": 0.07, + "learning_rate": 1.5568181818181822e-05, + "loss": 0.5638, + "step": 411 + }, + { + "epoch": 0.07, + "learning_rate": 1.5606060606060605e-05, + "loss": 0.5847, + "step": 412 + }, + { + "epoch": 0.07, + "learning_rate": 1.5643939393939395e-05, + "loss": 0.5692, + "step": 413 + }, + { + "epoch": 0.07, + "learning_rate": 1.5681818181818182e-05, + "loss": 0.5627, + "step": 414 + }, + { + "epoch": 0.07, + "learning_rate": 1.5719696969696973e-05, + "loss": 0.6231, + "step": 415 + }, + { + "epoch": 0.07, + "learning_rate": 1.575757575757576e-05, + "loss": 0.5516, + "step": 416 + }, + { + "epoch": 0.07, + "learning_rate": 1.5795454545454546e-05, + "loss": 0.5816, + "step": 417 + }, + { + "epoch": 0.07, + "learning_rate": 1.5833333333333333e-05, + "loss": 0.5808, + "step": 418 + }, + { + "epoch": 0.07, + "learning_rate": 1.5871212121212123e-05, + "loss": 0.5681, + "step": 419 + }, + { + "epoch": 0.07, + "learning_rate": 1.590909090909091e-05, + "loss": 0.6073, + "step": 420 + }, + { + "epoch": 0.07, + "learning_rate": 1.5946969696969697e-05, + "loss": 0.5749, + "step": 421 + }, + { + "epoch": 0.07, + "learning_rate": 1.5984848484848484e-05, + "loss": 0.5515, + "step": 422 + }, + { + "epoch": 0.07, + "learning_rate": 1.6022727272727274e-05, + "loss": 0.5653, + "step": 423 + }, + { + "epoch": 0.07, + "learning_rate": 1.606060606060606e-05, + "loss": 0.6179, + "step": 424 + }, + { + "epoch": 0.07, + "learning_rate": 1.609848484848485e-05, + "loss": 0.5722, + "step": 425 + }, + { + "epoch": 0.07, + "learning_rate": 1.6136363636363638e-05, + "loss": 0.5631, + "step": 426 + }, + { + "epoch": 0.07, + "learning_rate": 1.6174242424242425e-05, + "loss": 0.5685, + "step": 427 + }, + { + "epoch": 0.07, + "learning_rate": 1.6212121212121212e-05, + "loss": 0.5902, + "step": 428 + }, + { + "epoch": 0.07, + "learning_rate": 1.6250000000000002e-05, + "loss": 0.5854, + "step": 429 + }, + { + "epoch": 0.07, + "learning_rate": 1.628787878787879e-05, + "loss": 0.611, + "step": 430 + }, + { + "epoch": 0.07, + "learning_rate": 1.6325757575757576e-05, + "loss": 0.5814, + "step": 431 + }, + { + "epoch": 0.07, + "learning_rate": 1.6363636363636366e-05, + "loss": 0.6081, + "step": 432 + }, + { + "epoch": 0.07, + "learning_rate": 1.6401515151515153e-05, + "loss": 0.5733, + "step": 433 + }, + { + "epoch": 0.07, + "learning_rate": 1.6439393939393943e-05, + "loss": 0.6096, + "step": 434 + }, + { + "epoch": 0.07, + "learning_rate": 1.647727272727273e-05, + "loss": 0.5459, + "step": 435 + }, + { + "epoch": 0.07, + "learning_rate": 1.6515151515151517e-05, + "loss": 0.6089, + "step": 436 + }, + { + "epoch": 0.07, + "learning_rate": 1.6553030303030304e-05, + "loss": 0.5928, + "step": 437 + }, + { + "epoch": 0.07, + "learning_rate": 1.6590909090909094e-05, + "loss": 0.6241, + "step": 438 + }, + { + "epoch": 0.07, + "learning_rate": 1.662878787878788e-05, + "loss": 0.6188, + "step": 439 + }, + { + "epoch": 0.08, + "learning_rate": 1.6666666666666667e-05, + "loss": 0.5848, + "step": 440 + }, + { + "epoch": 0.08, + "learning_rate": 1.6704545454545454e-05, + "loss": 0.5824, + "step": 441 + }, + { + "epoch": 0.08, + "learning_rate": 1.6742424242424245e-05, + "loss": 0.6433, + "step": 442 + }, + { + "epoch": 0.08, + "learning_rate": 1.678030303030303e-05, + "loss": 0.5533, + "step": 443 + }, + { + "epoch": 0.08, + "learning_rate": 1.681818181818182e-05, + "loss": 0.617, + "step": 444 + }, + { + "epoch": 0.08, + "learning_rate": 1.685606060606061e-05, + "loss": 0.5943, + "step": 445 + }, + { + "epoch": 0.08, + "learning_rate": 1.6893939393939395e-05, + "loss": 0.6042, + "step": 446 + }, + { + "epoch": 0.08, + "learning_rate": 1.6931818181818182e-05, + "loss": 0.6008, + "step": 447 + }, + { + "epoch": 0.08, + "learning_rate": 1.6969696969696972e-05, + "loss": 0.5648, + "step": 448 + }, + { + "epoch": 0.08, + "learning_rate": 1.700757575757576e-05, + "loss": 0.6259, + "step": 449 + }, + { + "epoch": 0.08, + "learning_rate": 1.7045454545454546e-05, + "loss": 0.5815, + "step": 450 + }, + { + "epoch": 0.08, + "learning_rate": 1.7083333333333333e-05, + "loss": 0.5852, + "step": 451 + }, + { + "epoch": 0.08, + "learning_rate": 1.7121212121212123e-05, + "loss": 0.6094, + "step": 452 + }, + { + "epoch": 0.08, + "learning_rate": 1.715909090909091e-05, + "loss": 0.5676, + "step": 453 + }, + { + "epoch": 0.08, + "learning_rate": 1.71969696969697e-05, + "loss": 0.5997, + "step": 454 + }, + { + "epoch": 0.08, + "learning_rate": 1.7234848484848484e-05, + "loss": 0.6101, + "step": 455 + }, + { + "epoch": 0.08, + "learning_rate": 1.7272727272727274e-05, + "loss": 0.6235, + "step": 456 + }, + { + "epoch": 0.08, + "learning_rate": 1.731060606060606e-05, + "loss": 0.558, + "step": 457 + }, + { + "epoch": 0.08, + "learning_rate": 1.734848484848485e-05, + "loss": 0.566, + "step": 458 + }, + { + "epoch": 0.08, + "learning_rate": 1.7386363636363638e-05, + "loss": 0.5669, + "step": 459 + }, + { + "epoch": 0.08, + "learning_rate": 1.7424242424242425e-05, + "loss": 0.604, + "step": 460 + }, + { + "epoch": 0.08, + "learning_rate": 1.746212121212121e-05, + "loss": 0.5614, + "step": 461 + }, + { + "epoch": 0.08, + "learning_rate": 1.7500000000000002e-05, + "loss": 0.5938, + "step": 462 + }, + { + "epoch": 0.08, + "learning_rate": 1.753787878787879e-05, + "loss": 0.6003, + "step": 463 + }, + { + "epoch": 0.08, + "learning_rate": 1.7575757575757576e-05, + "loss": 0.5634, + "step": 464 + }, + { + "epoch": 0.08, + "learning_rate": 1.7613636363636366e-05, + "loss": 0.5727, + "step": 465 + }, + { + "epoch": 0.08, + "learning_rate": 1.7651515151515153e-05, + "loss": 0.5794, + "step": 466 + }, + { + "epoch": 0.08, + "learning_rate": 1.7689393939393943e-05, + "loss": 0.5872, + "step": 467 + }, + { + "epoch": 0.08, + "learning_rate": 1.772727272727273e-05, + "loss": 0.5658, + "step": 468 + }, + { + "epoch": 0.08, + "learning_rate": 1.7765151515151517e-05, + "loss": 0.5839, + "step": 469 + }, + { + "epoch": 0.08, + "learning_rate": 1.7803030303030303e-05, + "loss": 0.5627, + "step": 470 + }, + { + "epoch": 0.08, + "learning_rate": 1.7840909090909094e-05, + "loss": 0.5697, + "step": 471 + }, + { + "epoch": 0.08, + "learning_rate": 1.787878787878788e-05, + "loss": 0.5943, + "step": 472 + }, + { + "epoch": 0.08, + "learning_rate": 1.7916666666666667e-05, + "loss": 0.5938, + "step": 473 + }, + { + "epoch": 0.08, + "learning_rate": 1.7954545454545454e-05, + "loss": 0.6179, + "step": 474 + }, + { + "epoch": 0.08, + "learning_rate": 1.7992424242424244e-05, + "loss": 0.5672, + "step": 475 + }, + { + "epoch": 0.08, + "learning_rate": 1.803030303030303e-05, + "loss": 0.6273, + "step": 476 + }, + { + "epoch": 0.08, + "learning_rate": 1.806818181818182e-05, + "loss": 0.5977, + "step": 477 + }, + { + "epoch": 0.08, + "learning_rate": 1.810606060606061e-05, + "loss": 0.5529, + "step": 478 + }, + { + "epoch": 0.08, + "learning_rate": 1.8143939393939395e-05, + "loss": 0.5901, + "step": 479 + }, + { + "epoch": 0.08, + "learning_rate": 1.8181818181818182e-05, + "loss": 0.5839, + "step": 480 + }, + { + "epoch": 0.08, + "learning_rate": 1.8219696969696972e-05, + "loss": 0.5601, + "step": 481 + }, + { + "epoch": 0.08, + "learning_rate": 1.825757575757576e-05, + "loss": 0.5772, + "step": 482 + }, + { + "epoch": 0.08, + "learning_rate": 1.8295454545454546e-05, + "loss": 0.5899, + "step": 483 + }, + { + "epoch": 0.08, + "learning_rate": 1.8333333333333333e-05, + "loss": 0.5569, + "step": 484 + }, + { + "epoch": 0.08, + "learning_rate": 1.8371212121212123e-05, + "loss": 0.563, + "step": 485 + }, + { + "epoch": 0.08, + "learning_rate": 1.840909090909091e-05, + "loss": 0.5689, + "step": 486 + }, + { + "epoch": 0.08, + "learning_rate": 1.84469696969697e-05, + "loss": 0.6003, + "step": 487 + }, + { + "epoch": 0.08, + "learning_rate": 1.8484848484848487e-05, + "loss": 0.5543, + "step": 488 + }, + { + "epoch": 0.08, + "learning_rate": 1.8522727272727274e-05, + "loss": 0.5917, + "step": 489 + }, + { + "epoch": 0.08, + "learning_rate": 1.856060606060606e-05, + "loss": 0.5494, + "step": 490 + }, + { + "epoch": 0.08, + "learning_rate": 1.859848484848485e-05, + "loss": 0.5738, + "step": 491 + }, + { + "epoch": 0.08, + "learning_rate": 1.8636363636363638e-05, + "loss": 0.5497, + "step": 492 + }, + { + "epoch": 0.08, + "learning_rate": 1.8674242424242425e-05, + "loss": 0.5857, + "step": 493 + }, + { + "epoch": 0.08, + "learning_rate": 1.871212121212121e-05, + "loss": 0.5883, + "step": 494 + }, + { + "epoch": 0.08, + "learning_rate": 1.8750000000000002e-05, + "loss": 0.5751, + "step": 495 + }, + { + "epoch": 0.08, + "learning_rate": 1.8787878787878792e-05, + "loss": 0.5679, + "step": 496 + }, + { + "epoch": 0.08, + "learning_rate": 1.882575757575758e-05, + "loss": 0.5509, + "step": 497 + }, + { + "epoch": 0.08, + "learning_rate": 1.8863636363636366e-05, + "loss": 0.6058, + "step": 498 + }, + { + "epoch": 0.09, + "learning_rate": 1.8901515151515153e-05, + "loss": 0.5716, + "step": 499 + }, + { + "epoch": 0.09, + "learning_rate": 1.8939393939393943e-05, + "loss": 0.5693, + "step": 500 + }, + { + "epoch": 0.09, + "learning_rate": 1.897727272727273e-05, + "loss": 0.61, + "step": 501 + }, + { + "epoch": 0.09, + "learning_rate": 1.9015151515151516e-05, + "loss": 0.535, + "step": 502 + }, + { + "epoch": 0.09, + "learning_rate": 1.9053030303030303e-05, + "loss": 0.5773, + "step": 503 + }, + { + "epoch": 0.09, + "learning_rate": 1.9090909090909094e-05, + "loss": 0.6008, + "step": 504 + }, + { + "epoch": 0.09, + "learning_rate": 1.912878787878788e-05, + "loss": 0.6044, + "step": 505 + }, + { + "epoch": 0.09, + "learning_rate": 1.916666666666667e-05, + "loss": 0.6318, + "step": 506 + }, + { + "epoch": 0.09, + "learning_rate": 1.9204545454545454e-05, + "loss": 0.5782, + "step": 507 + }, + { + "epoch": 0.09, + "learning_rate": 1.9242424242424244e-05, + "loss": 0.6152, + "step": 508 + }, + { + "epoch": 0.09, + "learning_rate": 1.928030303030303e-05, + "loss": 0.5689, + "step": 509 + }, + { + "epoch": 0.09, + "learning_rate": 1.931818181818182e-05, + "loss": 0.5849, + "step": 510 + }, + { + "epoch": 0.09, + "learning_rate": 1.9356060606060608e-05, + "loss": 0.5494, + "step": 511 + }, + { + "epoch": 0.09, + "learning_rate": 1.9393939393939395e-05, + "loss": 0.5761, + "step": 512 + }, + { + "epoch": 0.09, + "learning_rate": 1.9431818181818182e-05, + "loss": 0.6433, + "step": 513 + }, + { + "epoch": 0.09, + "learning_rate": 1.9469696969696972e-05, + "loss": 0.5979, + "step": 514 + }, + { + "epoch": 0.09, + "learning_rate": 1.950757575757576e-05, + "loss": 0.5893, + "step": 515 + }, + { + "epoch": 0.09, + "learning_rate": 1.9545454545454546e-05, + "loss": 0.5878, + "step": 516 + }, + { + "epoch": 0.09, + "learning_rate": 1.9583333333333333e-05, + "loss": 0.5707, + "step": 517 + }, + { + "epoch": 0.09, + "learning_rate": 1.9621212121212123e-05, + "loss": 0.5703, + "step": 518 + }, + { + "epoch": 0.09, + "learning_rate": 1.965909090909091e-05, + "loss": 0.6052, + "step": 519 + }, + { + "epoch": 0.09, + "learning_rate": 1.96969696969697e-05, + "loss": 0.5792, + "step": 520 + }, + { + "epoch": 0.09, + "learning_rate": 1.9734848484848487e-05, + "loss": 0.5992, + "step": 521 + }, + { + "epoch": 0.09, + "learning_rate": 1.9772727272727274e-05, + "loss": 0.5819, + "step": 522 + }, + { + "epoch": 0.09, + "learning_rate": 1.981060606060606e-05, + "loss": 0.5821, + "step": 523 + }, + { + "epoch": 0.09, + "learning_rate": 1.984848484848485e-05, + "loss": 0.5775, + "step": 524 + }, + { + "epoch": 0.09, + "learning_rate": 1.9886363636363638e-05, + "loss": 0.5822, + "step": 525 + }, + { + "epoch": 0.09, + "learning_rate": 1.9924242424242425e-05, + "loss": 0.5683, + "step": 526 + }, + { + "epoch": 0.09, + "learning_rate": 1.996212121212121e-05, + "loss": 0.6096, + "step": 527 + }, + { + "epoch": 0.09, + "learning_rate": 2e-05, + "loss": 0.6331, + "step": 528 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999999830464453e-05, + "loss": 0.6005, + "step": 529 + }, + { + "epoch": 0.09, + "learning_rate": 1.99999993218578e-05, + "loss": 0.5715, + "step": 530 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999998474180075e-05, + "loss": 0.6038, + "step": 531 + }, + { + "epoch": 0.09, + "learning_rate": 1.99999972874313e-05, + "loss": 0.6143, + "step": 532 + }, + { + "epoch": 0.09, + "learning_rate": 1.999999576161151e-05, + "loss": 0.5818, + "step": 533 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999993896720763e-05, + "loss": 0.607, + "step": 534 + }, + { + "epoch": 0.09, + "learning_rate": 1.999999169275912e-05, + "loss": 0.6165, + "step": 535 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999989149726658e-05, + "loss": 0.5891, + "step": 536 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999986267623458e-05, + "loss": 0.573, + "step": 537 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999983046449625e-05, + "loss": 0.6131, + "step": 538 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999979486205264e-05, + "loss": 0.5848, + "step": 539 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999975586890498e-05, + "loss": 0.6129, + "step": 540 + }, + { + "epoch": 0.09, + "learning_rate": 1.999997134850545e-05, + "loss": 0.5463, + "step": 541 + }, + { + "epoch": 0.09, + "learning_rate": 1.999996677105028e-05, + "loss": 0.5638, + "step": 542 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999961854525128e-05, + "loss": 0.6171, + "step": 543 + }, + { + "epoch": 0.09, + "learning_rate": 1.999995659893017e-05, + "loss": 0.6288, + "step": 544 + }, + { + "epoch": 0.09, + "learning_rate": 1.999995100426558e-05, + "loss": 0.5824, + "step": 545 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999945070531554e-05, + "loss": 0.5774, + "step": 546 + }, + { + "epoch": 0.09, + "learning_rate": 1.999993879772828e-05, + "loss": 0.6311, + "step": 547 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999932185855985e-05, + "loss": 0.5694, + "step": 548 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999925234914884e-05, + "loss": 0.5577, + "step": 549 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999917944905217e-05, + "loss": 0.5813, + "step": 550 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999910315827228e-05, + "loss": 0.6237, + "step": 551 + }, + { + "epoch": 0.09, + "learning_rate": 1.999990234768118e-05, + "loss": 0.5953, + "step": 552 + }, + { + "epoch": 0.09, + "learning_rate": 1.999989404046734e-05, + "loss": 0.6116, + "step": 553 + }, + { + "epoch": 0.09, + "learning_rate": 1.999988539418599e-05, + "loss": 0.5782, + "step": 554 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999876408837422e-05, + "loss": 0.6422, + "step": 555 + }, + { + "epoch": 0.09, + "learning_rate": 1.9999867084421945e-05, + "loss": 0.5968, + "step": 556 + }, + { + "epoch": 0.09, + "learning_rate": 1.999985742093987e-05, + "loss": 0.5906, + "step": 557 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999847418391528e-05, + "loss": 0.5452, + "step": 558 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999837076777258e-05, + "loss": 0.5385, + "step": 559 + }, + { + "epoch": 0.1, + "learning_rate": 1.999982639609741e-05, + "loss": 0.5796, + "step": 560 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999815376352346e-05, + "loss": 0.6147, + "step": 561 + }, + { + "epoch": 0.1, + "learning_rate": 1.999980401754244e-05, + "loss": 0.6094, + "step": 562 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999792319668075e-05, + "loss": 0.5745, + "step": 563 + }, + { + "epoch": 0.1, + "learning_rate": 1.999978028272965e-05, + "loss": 0.575, + "step": 564 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999767906727575e-05, + "loss": 0.6327, + "step": 565 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999755191662263e-05, + "loss": 0.6026, + "step": 566 + }, + { + "epoch": 0.1, + "learning_rate": 1.999974213753415e-05, + "loss": 0.6702, + "step": 567 + }, + { + "epoch": 0.1, + "learning_rate": 1.999972874434368e-05, + "loss": 0.5672, + "step": 568 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999715012091306e-05, + "loss": 0.5844, + "step": 569 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999700940777493e-05, + "loss": 0.613, + "step": 570 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999686530402717e-05, + "loss": 0.5941, + "step": 571 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999671780967466e-05, + "loss": 0.5722, + "step": 572 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999656692472244e-05, + "loss": 0.5359, + "step": 573 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999641264917557e-05, + "loss": 0.5972, + "step": 574 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999625498303936e-05, + "loss": 0.5846, + "step": 575 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999609392631906e-05, + "loss": 0.598, + "step": 576 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999592947902024e-05, + "loss": 0.5881, + "step": 577 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999576164114834e-05, + "loss": 0.5898, + "step": 578 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999559041270917e-05, + "loss": 0.6207, + "step": 579 + }, + { + "epoch": 0.1, + "learning_rate": 1.999954157937085e-05, + "loss": 0.5878, + "step": 580 + }, + { + "epoch": 0.1, + "learning_rate": 1.999952377841522e-05, + "loss": 0.6605, + "step": 581 + }, + { + "epoch": 0.1, + "learning_rate": 1.999950563840464e-05, + "loss": 0.5458, + "step": 582 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999487159339716e-05, + "loss": 0.6031, + "step": 583 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999468341221083e-05, + "loss": 0.573, + "step": 584 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999449184049374e-05, + "loss": 0.5894, + "step": 585 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999429687825237e-05, + "loss": 0.6066, + "step": 586 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999409852549336e-05, + "loss": 0.5968, + "step": 587 + }, + { + "epoch": 0.1, + "learning_rate": 1.999938967822234e-05, + "loss": 0.5717, + "step": 588 + }, + { + "epoch": 0.1, + "learning_rate": 1.999936916484494e-05, + "loss": 0.5626, + "step": 589 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999348312417824e-05, + "loss": 0.5539, + "step": 590 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999327120941706e-05, + "loss": 0.5441, + "step": 591 + }, + { + "epoch": 0.1, + "learning_rate": 1.99993055904173e-05, + "loss": 0.6203, + "step": 592 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999283720845333e-05, + "loss": 0.5796, + "step": 593 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999261512226556e-05, + "loss": 0.5691, + "step": 594 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999238964561717e-05, + "loss": 0.5812, + "step": 595 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999216077851574e-05, + "loss": 0.6215, + "step": 596 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999192852096915e-05, + "loss": 0.5612, + "step": 597 + }, + { + "epoch": 0.1, + "learning_rate": 1.999916928729852e-05, + "loss": 0.6153, + "step": 598 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999145383457188e-05, + "loss": 0.5684, + "step": 599 + }, + { + "epoch": 0.1, + "learning_rate": 1.999912114057373e-05, + "loss": 0.6029, + "step": 600 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999096558648975e-05, + "loss": 0.5384, + "step": 601 + }, + { + "epoch": 0.1, + "learning_rate": 1.999907163768375e-05, + "loss": 0.6144, + "step": 602 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999046377678898e-05, + "loss": 0.5576, + "step": 603 + }, + { + "epoch": 0.1, + "learning_rate": 1.9999020778635275e-05, + "loss": 0.5747, + "step": 604 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998994840553755e-05, + "loss": 0.6243, + "step": 605 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998968563435216e-05, + "loss": 0.6619, + "step": 606 + }, + { + "epoch": 0.1, + "learning_rate": 1.999894194728055e-05, + "loss": 0.6321, + "step": 607 + }, + { + "epoch": 0.1, + "learning_rate": 1.999891499209065e-05, + "loss": 0.5791, + "step": 608 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998887697866444e-05, + "loss": 0.5849, + "step": 609 + }, + { + "epoch": 0.1, + "learning_rate": 1.999886006460885e-05, + "loss": 0.5597, + "step": 610 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998832092318802e-05, + "loss": 0.5597, + "step": 611 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998803780997253e-05, + "loss": 0.5845, + "step": 612 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998775130645163e-05, + "loss": 0.5678, + "step": 613 + }, + { + "epoch": 0.1, + "learning_rate": 1.99987461412635e-05, + "loss": 0.5566, + "step": 614 + }, + { + "epoch": 0.1, + "learning_rate": 1.9998716812853253e-05, + "loss": 0.5625, + "step": 615 + }, + { + "epoch": 0.11, + "learning_rate": 1.999868714541541e-05, + "loss": 0.5532, + "step": 616 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998657138950982e-05, + "loss": 0.6081, + "step": 617 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998626793460983e-05, + "loss": 0.6074, + "step": 618 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998596108946443e-05, + "loss": 0.5866, + "step": 619 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998565085408402e-05, + "loss": 0.5675, + "step": 620 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998533722847914e-05, + "loss": 0.5416, + "step": 621 + }, + { + "epoch": 0.11, + "learning_rate": 1.999850202126604e-05, + "loss": 0.5632, + "step": 622 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998469980663854e-05, + "loss": 0.5592, + "step": 623 + }, + { + "epoch": 0.11, + "learning_rate": 1.999843760104245e-05, + "loss": 0.5402, + "step": 624 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998404882402914e-05, + "loss": 0.5503, + "step": 625 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998371824746364e-05, + "loss": 0.5864, + "step": 626 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998338428073916e-05, + "loss": 0.5676, + "step": 627 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998304692386705e-05, + "loss": 0.5806, + "step": 628 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998270617685877e-05, + "loss": 0.5804, + "step": 629 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998236203972584e-05, + "loss": 0.5776, + "step": 630 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998201451247997e-05, + "loss": 0.529, + "step": 631 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998166359513287e-05, + "loss": 0.5808, + "step": 632 + }, + { + "epoch": 0.11, + "learning_rate": 1.999813092876965e-05, + "loss": 0.5521, + "step": 633 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998095159018283e-05, + "loss": 0.5279, + "step": 634 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998059050260403e-05, + "loss": 0.5558, + "step": 635 + }, + { + "epoch": 0.11, + "learning_rate": 1.9998022602497233e-05, + "loss": 0.5522, + "step": 636 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997985815730007e-05, + "loss": 0.5948, + "step": 637 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997948689959976e-05, + "loss": 0.5968, + "step": 638 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997911225188395e-05, + "loss": 0.5938, + "step": 639 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997873421416537e-05, + "loss": 0.5792, + "step": 640 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997835278645684e-05, + "loss": 0.572, + "step": 641 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997796796877124e-05, + "loss": 0.6089, + "step": 642 + }, + { + "epoch": 0.11, + "learning_rate": 1.999775797611217e-05, + "loss": 0.5833, + "step": 643 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997718816352132e-05, + "loss": 0.5879, + "step": 644 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997679317598343e-05, + "loss": 0.5719, + "step": 645 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997639479852133e-05, + "loss": 0.553, + "step": 646 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997599303114864e-05, + "loss": 0.6213, + "step": 647 + }, + { + "epoch": 0.11, + "learning_rate": 1.999755878738789e-05, + "loss": 0.5812, + "step": 648 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997517932672592e-05, + "loss": 0.5455, + "step": 649 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997476738970348e-05, + "loss": 0.6192, + "step": 650 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997435206282564e-05, + "loss": 0.5349, + "step": 651 + }, + { + "epoch": 0.11, + "learning_rate": 1.999739333461064e-05, + "loss": 0.6336, + "step": 652 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997351123955993e-05, + "loss": 0.6063, + "step": 653 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997308574320062e-05, + "loss": 0.6181, + "step": 654 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997265685704287e-05, + "loss": 0.6001, + "step": 655 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997222458110125e-05, + "loss": 0.5837, + "step": 656 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997178891539035e-05, + "loss": 0.5874, + "step": 657 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997134985992502e-05, + "loss": 0.5769, + "step": 658 + }, + { + "epoch": 0.11, + "learning_rate": 1.9997090741472006e-05, + "loss": 0.6065, + "step": 659 + }, + { + "epoch": 0.11, + "learning_rate": 1.999704615797906e-05, + "loss": 0.5724, + "step": 660 + }, + { + "epoch": 0.11, + "learning_rate": 1.999700123551516e-05, + "loss": 0.5653, + "step": 661 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996955974081844e-05, + "loss": 0.5889, + "step": 662 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996910373680635e-05, + "loss": 0.5763, + "step": 663 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996864434313084e-05, + "loss": 0.5831, + "step": 664 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996818155980752e-05, + "loss": 0.5597, + "step": 665 + }, + { + "epoch": 0.11, + "learning_rate": 1.99967715386852e-05, + "loss": 0.5697, + "step": 666 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996724582428017e-05, + "loss": 0.5226, + "step": 667 + }, + { + "epoch": 0.11, + "learning_rate": 1.999667728721079e-05, + "loss": 0.5887, + "step": 668 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996629653035128e-05, + "loss": 0.5474, + "step": 669 + }, + { + "epoch": 0.11, + "learning_rate": 1.999658167990264e-05, + "loss": 0.5692, + "step": 670 + }, + { + "epoch": 0.11, + "learning_rate": 1.999653336781495e-05, + "loss": 0.6124, + "step": 671 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996484716773707e-05, + "loss": 0.5729, + "step": 672 + }, + { + "epoch": 0.11, + "learning_rate": 1.9996435726780553e-05, + "loss": 0.5536, + "step": 673 + }, + { + "epoch": 0.11, + "learning_rate": 1.999638639783715e-05, + "loss": 0.5955, + "step": 674 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996336729945173e-05, + "loss": 0.5153, + "step": 675 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996286723106303e-05, + "loss": 0.6031, + "step": 676 + }, + { + "epoch": 0.12, + "learning_rate": 1.999623637732224e-05, + "loss": 0.5713, + "step": 677 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996185692594686e-05, + "loss": 0.5838, + "step": 678 + }, + { + "epoch": 0.12, + "learning_rate": 1.999613466892536e-05, + "loss": 0.5704, + "step": 679 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996083306315998e-05, + "loss": 0.5158, + "step": 680 + }, + { + "epoch": 0.12, + "learning_rate": 1.9996031604768332e-05, + "loss": 0.6058, + "step": 681 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995979564284126e-05, + "loss": 0.5478, + "step": 682 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995927184865137e-05, + "loss": 0.6006, + "step": 683 + }, + { + "epoch": 0.12, + "learning_rate": 1.999587446651314e-05, + "loss": 0.6228, + "step": 684 + }, + { + "epoch": 0.12, + "learning_rate": 1.999582140922993e-05, + "loss": 0.5473, + "step": 685 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995768013017302e-05, + "loss": 0.5498, + "step": 686 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995714277877063e-05, + "loss": 0.5919, + "step": 687 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995660203811038e-05, + "loss": 0.6184, + "step": 688 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995605790821062e-05, + "loss": 0.5564, + "step": 689 + }, + { + "epoch": 0.12, + "learning_rate": 1.999555103890898e-05, + "loss": 0.5822, + "step": 690 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995495948076648e-05, + "loss": 0.5769, + "step": 691 + }, + { + "epoch": 0.12, + "learning_rate": 1.999544051832593e-05, + "loss": 0.5852, + "step": 692 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995384749658705e-05, + "loss": 0.5532, + "step": 693 + }, + { + "epoch": 0.12, + "learning_rate": 1.999532864207687e-05, + "loss": 0.5478, + "step": 694 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995272195582328e-05, + "loss": 0.55, + "step": 695 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995215410176988e-05, + "loss": 0.5339, + "step": 696 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995158285862777e-05, + "loss": 0.5887, + "step": 697 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995100822641632e-05, + "loss": 0.5448, + "step": 698 + }, + { + "epoch": 0.12, + "learning_rate": 1.9995043020515502e-05, + "loss": 0.5416, + "step": 699 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994984879486348e-05, + "loss": 0.5719, + "step": 700 + }, + { + "epoch": 0.12, + "learning_rate": 1.999492639955614e-05, + "loss": 0.5678, + "step": 701 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994867580726856e-05, + "loss": 0.6247, + "step": 702 + }, + { + "epoch": 0.12, + "learning_rate": 1.99948084230005e-05, + "loss": 0.5581, + "step": 703 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994748926379072e-05, + "loss": 0.5687, + "step": 704 + }, + { + "epoch": 0.12, + "learning_rate": 1.999468909086459e-05, + "loss": 0.5802, + "step": 705 + }, + { + "epoch": 0.12, + "learning_rate": 1.999462891645908e-05, + "loss": 0.5718, + "step": 706 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994568403164592e-05, + "loss": 0.5783, + "step": 707 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994507550983164e-05, + "loss": 0.6163, + "step": 708 + }, + { + "epoch": 0.12, + "learning_rate": 1.999444635991687e-05, + "loss": 0.5694, + "step": 709 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994384829967784e-05, + "loss": 0.5959, + "step": 710 + }, + { + "epoch": 0.12, + "learning_rate": 1.999432296113799e-05, + "loss": 0.6115, + "step": 711 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994260753429582e-05, + "loss": 0.6007, + "step": 712 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994198206844674e-05, + "loss": 0.5494, + "step": 713 + }, + { + "epoch": 0.12, + "learning_rate": 1.9994135321385386e-05, + "loss": 0.5804, + "step": 714 + }, + { + "epoch": 0.12, + "learning_rate": 1.999407209705385e-05, + "loss": 0.5678, + "step": 715 + }, + { + "epoch": 0.12, + "learning_rate": 1.999400853385221e-05, + "loss": 0.5639, + "step": 716 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993944631782623e-05, + "loss": 0.568, + "step": 717 + }, + { + "epoch": 0.12, + "learning_rate": 1.999388039084725e-05, + "loss": 0.5341, + "step": 718 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993815811048273e-05, + "loss": 0.6008, + "step": 719 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993750892387884e-05, + "loss": 0.5658, + "step": 720 + }, + { + "epoch": 0.12, + "learning_rate": 1.999368563486828e-05, + "loss": 0.5392, + "step": 721 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993620038491677e-05, + "loss": 0.5747, + "step": 722 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993554103260298e-05, + "loss": 0.5337, + "step": 723 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993487829176373e-05, + "loss": 0.5887, + "step": 724 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993421216242163e-05, + "loss": 0.5431, + "step": 725 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993354264459913e-05, + "loss": 0.5469, + "step": 726 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993286973831898e-05, + "loss": 0.573, + "step": 727 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993219344360403e-05, + "loss": 0.6051, + "step": 728 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993151376047713e-05, + "loss": 0.5793, + "step": 729 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993083068896146e-05, + "loss": 0.5695, + "step": 730 + }, + { + "epoch": 0.12, + "learning_rate": 1.9993014422908e-05, + "loss": 0.5708, + "step": 731 + }, + { + "epoch": 0.12, + "learning_rate": 1.999294543808562e-05, + "loss": 0.6058, + "step": 732 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992876114431333e-05, + "loss": 0.5183, + "step": 733 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992806451947496e-05, + "loss": 0.5829, + "step": 734 + }, + { + "epoch": 0.13, + "learning_rate": 1.999273645063647e-05, + "loss": 0.582, + "step": 735 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992666110500628e-05, + "loss": 0.5783, + "step": 736 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992595431542353e-05, + "loss": 0.5748, + "step": 737 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992524413764042e-05, + "loss": 0.6113, + "step": 738 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992453057168104e-05, + "loss": 0.5472, + "step": 739 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992381361756964e-05, + "loss": 0.6027, + "step": 740 + }, + { + "epoch": 0.13, + "learning_rate": 1.999230932753304e-05, + "loss": 0.5795, + "step": 741 + }, + { + "epoch": 0.13, + "learning_rate": 1.999223695449879e-05, + "loss": 0.5928, + "step": 742 + }, + { + "epoch": 0.13, + "learning_rate": 1.999216424265665e-05, + "loss": 0.5963, + "step": 743 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992091192009105e-05, + "loss": 0.5741, + "step": 744 + }, + { + "epoch": 0.13, + "learning_rate": 1.9992017802558616e-05, + "loss": 0.5573, + "step": 745 + }, + { + "epoch": 0.13, + "learning_rate": 1.999194407430768e-05, + "loss": 0.6228, + "step": 746 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991870007258797e-05, + "loss": 0.5703, + "step": 747 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991795601414474e-05, + "loss": 0.5884, + "step": 748 + }, + { + "epoch": 0.13, + "learning_rate": 1.999172085677724e-05, + "loss": 0.5631, + "step": 749 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991645773349622e-05, + "loss": 0.5693, + "step": 750 + }, + { + "epoch": 0.13, + "learning_rate": 1.999157035113417e-05, + "loss": 0.545, + "step": 751 + }, + { + "epoch": 0.13, + "learning_rate": 1.999149459013344e-05, + "loss": 0.5963, + "step": 752 + }, + { + "epoch": 0.13, + "learning_rate": 1.999141849035e-05, + "loss": 0.5588, + "step": 753 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991342051786433e-05, + "loss": 0.5718, + "step": 754 + }, + { + "epoch": 0.13, + "learning_rate": 1.999126527444533e-05, + "loss": 0.5763, + "step": 755 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991188158329296e-05, + "loss": 0.5863, + "step": 756 + }, + { + "epoch": 0.13, + "learning_rate": 1.999111070344094e-05, + "loss": 0.5962, + "step": 757 + }, + { + "epoch": 0.13, + "learning_rate": 1.9991032909782896e-05, + "loss": 0.5809, + "step": 758 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990954777357795e-05, + "loss": 0.5655, + "step": 759 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990876306168288e-05, + "loss": 0.5777, + "step": 760 + }, + { + "epoch": 0.13, + "learning_rate": 1.999079749621704e-05, + "loss": 0.5789, + "step": 761 + }, + { + "epoch": 0.13, + "learning_rate": 1.999071834750672e-05, + "loss": 0.6133, + "step": 762 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990638860040007e-05, + "loss": 0.587, + "step": 763 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990559033819606e-05, + "loss": 0.5901, + "step": 764 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990478868848214e-05, + "loss": 0.6234, + "step": 765 + }, + { + "epoch": 0.13, + "learning_rate": 1.9990398365128557e-05, + "loss": 0.5392, + "step": 766 + }, + { + "epoch": 0.13, + "learning_rate": 1.999031752266336e-05, + "loss": 0.6046, + "step": 767 + }, + { + "epoch": 0.13, + "learning_rate": 1.999023634145537e-05, + "loss": 0.571, + "step": 768 + }, + { + "epoch": 0.13, + "learning_rate": 1.999015482150733e-05, + "loss": 0.5853, + "step": 769 + }, + { + "epoch": 0.13, + "learning_rate": 1.999007296282201e-05, + "loss": 0.5709, + "step": 770 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989990765402183e-05, + "loss": 0.5329, + "step": 771 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989908229250638e-05, + "loss": 0.5458, + "step": 772 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989825354370177e-05, + "loss": 0.5744, + "step": 773 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989742140763605e-05, + "loss": 0.5976, + "step": 774 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989658588433746e-05, + "loss": 0.5752, + "step": 775 + }, + { + "epoch": 0.13, + "learning_rate": 1.998957469738343e-05, + "loss": 0.563, + "step": 776 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989490467615504e-05, + "loss": 0.5511, + "step": 777 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989405899132825e-05, + "loss": 0.5643, + "step": 778 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989320991938258e-05, + "loss": 0.5281, + "step": 779 + }, + { + "epoch": 0.13, + "learning_rate": 1.998923574603468e-05, + "loss": 0.556, + "step": 780 + }, + { + "epoch": 0.13, + "learning_rate": 1.998915016142499e-05, + "loss": 0.5763, + "step": 781 + }, + { + "epoch": 0.13, + "learning_rate": 1.9989064238112078e-05, + "loss": 0.5766, + "step": 782 + }, + { + "epoch": 0.13, + "learning_rate": 1.9988977976098866e-05, + "loss": 0.5288, + "step": 783 + }, + { + "epoch": 0.13, + "learning_rate": 1.9988891375388278e-05, + "loss": 0.5521, + "step": 784 + }, + { + "epoch": 0.13, + "learning_rate": 1.9988804435983252e-05, + "loss": 0.586, + "step": 785 + }, + { + "epoch": 0.13, + "learning_rate": 1.998871715788673e-05, + "loss": 0.5889, + "step": 786 + }, + { + "epoch": 0.13, + "learning_rate": 1.9988629541101672e-05, + "loss": 0.5286, + "step": 787 + }, + { + "epoch": 0.13, + "learning_rate": 1.998854158563105e-05, + "loss": 0.5855, + "step": 788 + }, + { + "epoch": 0.13, + "learning_rate": 1.9988453291477856e-05, + "loss": 0.5198, + "step": 789 + }, + { + "epoch": 0.13, + "learning_rate": 1.9988364658645066e-05, + "loss": 0.5745, + "step": 790 + }, + { + "epoch": 0.13, + "learning_rate": 1.99882756871357e-05, + "loss": 0.5882, + "step": 791 + }, + { + "epoch": 0.14, + "learning_rate": 1.9988186376952766e-05, + "loss": 0.5636, + "step": 792 + }, + { + "epoch": 0.14, + "learning_rate": 1.9988096728099294e-05, + "loss": 0.5408, + "step": 793 + }, + { + "epoch": 0.14, + "learning_rate": 1.998800674057833e-05, + "loss": 0.5466, + "step": 794 + }, + { + "epoch": 0.14, + "learning_rate": 1.998791641439292e-05, + "loss": 0.5961, + "step": 795 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987825749546123e-05, + "loss": 0.5409, + "step": 796 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987734746041017e-05, + "loss": 0.5871, + "step": 797 + }, + { + "epoch": 0.14, + "learning_rate": 1.998764340388069e-05, + "loss": 0.5752, + "step": 798 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987551723068243e-05, + "loss": 0.544, + "step": 799 + }, + { + "epoch": 0.14, + "learning_rate": 1.998745970360677e-05, + "loss": 0.6197, + "step": 800 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987367345499404e-05, + "loss": 0.5926, + "step": 801 + }, + { + "epoch": 0.14, + "learning_rate": 1.998727464874927e-05, + "loss": 0.5874, + "step": 802 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987181613359517e-05, + "loss": 0.5442, + "step": 803 + }, + { + "epoch": 0.14, + "learning_rate": 1.9987088239333294e-05, + "loss": 0.5785, + "step": 804 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986994526673767e-05, + "loss": 0.5808, + "step": 805 + }, + { + "epoch": 0.14, + "learning_rate": 1.998690047538412e-05, + "loss": 0.5142, + "step": 806 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986806085467535e-05, + "loss": 0.5398, + "step": 807 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986711356927215e-05, + "loss": 0.5547, + "step": 808 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986616289766373e-05, + "loss": 0.5303, + "step": 809 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986520883988233e-05, + "loss": 0.5932, + "step": 810 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986425139596028e-05, + "loss": 0.558, + "step": 811 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986329056593003e-05, + "loss": 0.563, + "step": 812 + }, + { + "epoch": 0.14, + "learning_rate": 1.998623263498242e-05, + "loss": 0.5616, + "step": 813 + }, + { + "epoch": 0.14, + "learning_rate": 1.9986135874767544e-05, + "loss": 0.5924, + "step": 814 + }, + { + "epoch": 0.14, + "learning_rate": 1.998603877595166e-05, + "loss": 0.5824, + "step": 815 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985941338538056e-05, + "loss": 0.5849, + "step": 816 + }, + { + "epoch": 0.14, + "learning_rate": 1.998584356253004e-05, + "loss": 0.5853, + "step": 817 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985745447930925e-05, + "loss": 0.5439, + "step": 818 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985646994744038e-05, + "loss": 0.5826, + "step": 819 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985548202972718e-05, + "loss": 0.5662, + "step": 820 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985449072620313e-05, + "loss": 0.5581, + "step": 821 + }, + { + "epoch": 0.14, + "learning_rate": 1.9985349603690187e-05, + "loss": 0.5423, + "step": 822 + }, + { + "epoch": 0.14, + "learning_rate": 1.998524979618571e-05, + "loss": 0.5428, + "step": 823 + }, + { + "epoch": 0.14, + "learning_rate": 1.998514965011027e-05, + "loss": 0.5505, + "step": 824 + }, + { + "epoch": 0.14, + "learning_rate": 1.998504916546726e-05, + "loss": 0.5684, + "step": 825 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984948342260085e-05, + "loss": 0.5792, + "step": 826 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984847180492168e-05, + "loss": 0.5626, + "step": 827 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984745680166933e-05, + "loss": 0.5658, + "step": 828 + }, + { + "epoch": 0.14, + "learning_rate": 1.998464384128783e-05, + "loss": 0.5235, + "step": 829 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984541663858305e-05, + "loss": 0.582, + "step": 830 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984439147881827e-05, + "loss": 0.5811, + "step": 831 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984336293361868e-05, + "loss": 0.5326, + "step": 832 + }, + { + "epoch": 0.14, + "learning_rate": 1.998423310030192e-05, + "loss": 0.5446, + "step": 833 + }, + { + "epoch": 0.14, + "learning_rate": 1.998412956870548e-05, + "loss": 0.5434, + "step": 834 + }, + { + "epoch": 0.14, + "learning_rate": 1.9984025698576056e-05, + "loss": 0.5237, + "step": 835 + }, + { + "epoch": 0.14, + "learning_rate": 1.998392148991717e-05, + "loss": 0.5429, + "step": 836 + }, + { + "epoch": 0.14, + "learning_rate": 1.998381694273236e-05, + "loss": 0.5271, + "step": 837 + }, + { + "epoch": 0.14, + "learning_rate": 1.998371205702517e-05, + "loss": 0.5676, + "step": 838 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983606832799154e-05, + "loss": 0.5968, + "step": 839 + }, + { + "epoch": 0.14, + "learning_rate": 1.998350127005788e-05, + "loss": 0.5474, + "step": 840 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983395368804927e-05, + "loss": 0.5578, + "step": 841 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983289129043886e-05, + "loss": 0.5414, + "step": 842 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983182550778362e-05, + "loss": 0.5857, + "step": 843 + }, + { + "epoch": 0.14, + "learning_rate": 1.9983075634011967e-05, + "loss": 0.5413, + "step": 844 + }, + { + "epoch": 0.14, + "learning_rate": 1.9982968378748323e-05, + "loss": 0.605, + "step": 845 + }, + { + "epoch": 0.14, + "learning_rate": 1.9982860784991072e-05, + "loss": 0.5494, + "step": 846 + }, + { + "epoch": 0.14, + "learning_rate": 1.998275285274386e-05, + "loss": 0.5773, + "step": 847 + }, + { + "epoch": 0.14, + "learning_rate": 1.9982644582010346e-05, + "loss": 0.5517, + "step": 848 + }, + { + "epoch": 0.14, + "learning_rate": 1.9982535972794202e-05, + "loss": 0.5379, + "step": 849 + }, + { + "epoch": 0.14, + "learning_rate": 1.9982427025099112e-05, + "loss": 0.5776, + "step": 850 + }, + { + "epoch": 0.15, + "learning_rate": 1.9982317738928765e-05, + "loss": 0.6051, + "step": 851 + }, + { + "epoch": 0.15, + "learning_rate": 1.998220811428687e-05, + "loss": 0.5891, + "step": 852 + }, + { + "epoch": 0.15, + "learning_rate": 1.9982098151177147e-05, + "loss": 0.5693, + "step": 853 + }, + { + "epoch": 0.15, + "learning_rate": 1.998198784960332e-05, + "loss": 0.5831, + "step": 854 + }, + { + "epoch": 0.15, + "learning_rate": 1.998187720956913e-05, + "loss": 0.5661, + "step": 855 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981766231078333e-05, + "loss": 0.5393, + "step": 856 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981654914134684e-05, + "loss": 0.5728, + "step": 857 + }, + { + "epoch": 0.15, + "learning_rate": 1.998154325874196e-05, + "loss": 0.5546, + "step": 858 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981431264903953e-05, + "loss": 0.5934, + "step": 859 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981318932624455e-05, + "loss": 0.5909, + "step": 860 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981206261907274e-05, + "loss": 0.503, + "step": 861 + }, + { + "epoch": 0.15, + "learning_rate": 1.9981093252756232e-05, + "loss": 0.5499, + "step": 862 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980979905175162e-05, + "loss": 0.5924, + "step": 863 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980866219167906e-05, + "loss": 0.5502, + "step": 864 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980752194738318e-05, + "loss": 0.551, + "step": 865 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980637831890262e-05, + "loss": 0.5559, + "step": 866 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980523130627625e-05, + "loss": 0.5665, + "step": 867 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980408090954287e-05, + "loss": 0.5395, + "step": 868 + }, + { + "epoch": 0.15, + "learning_rate": 1.998029271287415e-05, + "loss": 0.5462, + "step": 869 + }, + { + "epoch": 0.15, + "learning_rate": 1.998017699639113e-05, + "loss": 0.5853, + "step": 870 + }, + { + "epoch": 0.15, + "learning_rate": 1.9980060941509148e-05, + "loss": 0.531, + "step": 871 + }, + { + "epoch": 0.15, + "learning_rate": 1.997994454823214e-05, + "loss": 0.5853, + "step": 872 + }, + { + "epoch": 0.15, + "learning_rate": 1.997982781656405e-05, + "loss": 0.5847, + "step": 873 + }, + { + "epoch": 0.15, + "learning_rate": 1.997971074650884e-05, + "loss": 0.5797, + "step": 874 + }, + { + "epoch": 0.15, + "learning_rate": 1.997959333807048e-05, + "loss": 0.5785, + "step": 875 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979475591252943e-05, + "loss": 0.5881, + "step": 876 + }, + { + "epoch": 0.15, + "learning_rate": 1.997935750606023e-05, + "loss": 0.5572, + "step": 877 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979239082496344e-05, + "loss": 0.5834, + "step": 878 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979120320565295e-05, + "loss": 0.5872, + "step": 879 + }, + { + "epoch": 0.15, + "learning_rate": 1.9979001220271113e-05, + "loss": 0.5907, + "step": 880 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978881781617837e-05, + "loss": 0.5724, + "step": 881 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978762004609518e-05, + "loss": 0.579, + "step": 882 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978641889250215e-05, + "loss": 0.5289, + "step": 883 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978521435544002e-05, + "loss": 0.5768, + "step": 884 + }, + { + "epoch": 0.15, + "learning_rate": 1.997840064349496e-05, + "loss": 0.5442, + "step": 885 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978279513107193e-05, + "loss": 0.5444, + "step": 886 + }, + { + "epoch": 0.15, + "learning_rate": 1.9978158044384797e-05, + "loss": 0.5847, + "step": 887 + }, + { + "epoch": 0.15, + "learning_rate": 1.99780362373319e-05, + "loss": 0.5808, + "step": 888 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977914091952625e-05, + "loss": 0.5655, + "step": 889 + }, + { + "epoch": 0.15, + "learning_rate": 1.997779160825112e-05, + "loss": 0.6005, + "step": 890 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977668786231536e-05, + "loss": 0.5714, + "step": 891 + }, + { + "epoch": 0.15, + "learning_rate": 1.997754562589803e-05, + "loss": 0.6015, + "step": 892 + }, + { + "epoch": 0.15, + "learning_rate": 1.997742212725479e-05, + "loss": 0.5409, + "step": 893 + }, + { + "epoch": 0.15, + "learning_rate": 1.9977298290305998e-05, + "loss": 0.5983, + "step": 894 + }, + { + "epoch": 0.15, + "learning_rate": 1.997717411505585e-05, + "loss": 0.5778, + "step": 895 + }, + { + "epoch": 0.15, + "learning_rate": 1.997704960150856e-05, + "loss": 0.5316, + "step": 896 + }, + { + "epoch": 0.15, + "learning_rate": 1.997692474966835e-05, + "loss": 0.6083, + "step": 897 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976799559539457e-05, + "loss": 0.4855, + "step": 898 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976674031126116e-05, + "loss": 0.5941, + "step": 899 + }, + { + "epoch": 0.15, + "learning_rate": 1.997654816443259e-05, + "loss": 0.5645, + "step": 900 + }, + { + "epoch": 0.15, + "learning_rate": 1.997642195946315e-05, + "loss": 0.5599, + "step": 901 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976295416222067e-05, + "loss": 0.5462, + "step": 902 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976168534713634e-05, + "loss": 0.5242, + "step": 903 + }, + { + "epoch": 0.15, + "learning_rate": 1.9976041314942156e-05, + "loss": 0.5359, + "step": 904 + }, + { + "epoch": 0.15, + "learning_rate": 1.9975913756911947e-05, + "loss": 0.5462, + "step": 905 + }, + { + "epoch": 0.15, + "learning_rate": 1.997578586062733e-05, + "loss": 0.5742, + "step": 906 + }, + { + "epoch": 0.15, + "learning_rate": 1.997565762609264e-05, + "loss": 0.5797, + "step": 907 + }, + { + "epoch": 0.15, + "learning_rate": 1.997552905331223e-05, + "loss": 0.5302, + "step": 908 + }, + { + "epoch": 0.16, + "learning_rate": 1.9975400142290454e-05, + "loss": 0.5561, + "step": 909 + }, + { + "epoch": 0.16, + "learning_rate": 1.9975270893031688e-05, + "loss": 0.5642, + "step": 910 + }, + { + "epoch": 0.16, + "learning_rate": 1.9975141305540312e-05, + "loss": 0.5662, + "step": 911 + }, + { + "epoch": 0.16, + "learning_rate": 1.9975011379820718e-05, + "loss": 0.5522, + "step": 912 + }, + { + "epoch": 0.16, + "learning_rate": 1.9974881115877318e-05, + "loss": 0.5554, + "step": 913 + }, + { + "epoch": 0.16, + "learning_rate": 1.997475051371452e-05, + "loss": 0.6026, + "step": 914 + }, + { + "epoch": 0.16, + "learning_rate": 1.997461957333676e-05, + "loss": 0.5125, + "step": 915 + }, + { + "epoch": 0.16, + "learning_rate": 1.9974488294748472e-05, + "loss": 0.5714, + "step": 916 + }, + { + "epoch": 0.16, + "learning_rate": 1.997435667795411e-05, + "loss": 0.5633, + "step": 917 + }, + { + "epoch": 0.16, + "learning_rate": 1.9974224722958136e-05, + "loss": 0.5736, + "step": 918 + }, + { + "epoch": 0.16, + "learning_rate": 1.997409242976503e-05, + "loss": 0.5538, + "step": 919 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973959798379266e-05, + "loss": 0.5562, + "step": 920 + }, + { + "epoch": 0.16, + "learning_rate": 1.997382682880535e-05, + "loss": 0.5155, + "step": 921 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973693521047787e-05, + "loss": 0.5512, + "step": 922 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973559875111103e-05, + "loss": 0.5487, + "step": 923 + }, + { + "epoch": 0.16, + "learning_rate": 1.997342589099982e-05, + "loss": 0.5748, + "step": 924 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973291568718486e-05, + "loss": 0.5267, + "step": 925 + }, + { + "epoch": 0.16, + "learning_rate": 1.9973156908271657e-05, + "loss": 0.5421, + "step": 926 + }, + { + "epoch": 0.16, + "learning_rate": 1.99730219096639e-05, + "loss": 0.5608, + "step": 927 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972886572899785e-05, + "loss": 0.5971, + "step": 928 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972750897983906e-05, + "loss": 0.5347, + "step": 929 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972614884920867e-05, + "loss": 0.5334, + "step": 930 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972478533715272e-05, + "loss": 0.5461, + "step": 931 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972341844371753e-05, + "loss": 0.5917, + "step": 932 + }, + { + "epoch": 0.16, + "learning_rate": 1.9972204816894935e-05, + "loss": 0.5575, + "step": 933 + }, + { + "epoch": 0.16, + "learning_rate": 1.997206745128947e-05, + "loss": 0.5521, + "step": 934 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971929747560014e-05, + "loss": 0.5444, + "step": 935 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971791705711236e-05, + "loss": 0.6219, + "step": 936 + }, + { + "epoch": 0.16, + "learning_rate": 1.997165332574782e-05, + "loss": 0.6124, + "step": 937 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971514607674455e-05, + "loss": 0.5878, + "step": 938 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971375551495842e-05, + "loss": 0.531, + "step": 939 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971236157216702e-05, + "loss": 0.5581, + "step": 940 + }, + { + "epoch": 0.16, + "learning_rate": 1.9971096424841757e-05, + "loss": 0.5908, + "step": 941 + }, + { + "epoch": 0.16, + "learning_rate": 1.9970956354375748e-05, + "loss": 0.561, + "step": 942 + }, + { + "epoch": 0.16, + "learning_rate": 1.9970815945823423e-05, + "loss": 0.5444, + "step": 943 + }, + { + "epoch": 0.16, + "learning_rate": 1.997067519918954e-05, + "loss": 0.5396, + "step": 944 + }, + { + "epoch": 0.16, + "learning_rate": 1.9970534114478876e-05, + "loss": 0.5776, + "step": 945 + }, + { + "epoch": 0.16, + "learning_rate": 1.997039269169621e-05, + "loss": 0.5474, + "step": 946 + }, + { + "epoch": 0.16, + "learning_rate": 1.9970250930846343e-05, + "loss": 0.5188, + "step": 947 + }, + { + "epoch": 0.16, + "learning_rate": 1.997010883193408e-05, + "loss": 0.56, + "step": 948 + }, + { + "epoch": 0.16, + "learning_rate": 1.9969966394964234e-05, + "loss": 0.5806, + "step": 949 + }, + { + "epoch": 0.16, + "learning_rate": 1.9969823619941638e-05, + "loss": 0.5666, + "step": 950 + }, + { + "epoch": 0.16, + "learning_rate": 1.9969680506871138e-05, + "loss": 0.5543, + "step": 951 + }, + { + "epoch": 0.16, + "learning_rate": 1.9969537055757574e-05, + "loss": 0.5744, + "step": 952 + }, + { + "epoch": 0.16, + "learning_rate": 1.9969393266605826e-05, + "loss": 0.52, + "step": 953 + }, + { + "epoch": 0.16, + "learning_rate": 1.996924913942076e-05, + "loss": 0.5265, + "step": 954 + }, + { + "epoch": 0.16, + "learning_rate": 1.996910467420726e-05, + "loss": 0.5554, + "step": 955 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968959870970233e-05, + "loss": 0.5654, + "step": 956 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968814729714584e-05, + "loss": 0.5297, + "step": 957 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968669250445233e-05, + "loss": 0.5947, + "step": 958 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968523433167116e-05, + "loss": 0.5691, + "step": 959 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968377277885178e-05, + "loss": 0.5679, + "step": 960 + }, + { + "epoch": 0.16, + "learning_rate": 1.996823078460437e-05, + "loss": 0.526, + "step": 961 + }, + { + "epoch": 0.16, + "learning_rate": 1.9968083953329666e-05, + "loss": 0.5536, + "step": 962 + }, + { + "epoch": 0.16, + "learning_rate": 1.9967936784066037e-05, + "loss": 0.5839, + "step": 963 + }, + { + "epoch": 0.16, + "learning_rate": 1.9967789276818477e-05, + "loss": 0.6249, + "step": 964 + }, + { + "epoch": 0.16, + "learning_rate": 1.9967641431591988e-05, + "loss": 0.5861, + "step": 965 + }, + { + "epoch": 0.16, + "learning_rate": 1.9967493248391584e-05, + "loss": 0.529, + "step": 966 + }, + { + "epoch": 0.16, + "learning_rate": 1.9967344727222283e-05, + "loss": 0.5664, + "step": 967 + }, + { + "epoch": 0.17, + "learning_rate": 1.996719586808913e-05, + "loss": 0.5684, + "step": 968 + }, + { + "epoch": 0.17, + "learning_rate": 1.9967046670997162e-05, + "loss": 0.5105, + "step": 969 + }, + { + "epoch": 0.17, + "learning_rate": 1.996689713595145e-05, + "loss": 0.5528, + "step": 970 + }, + { + "epoch": 0.17, + "learning_rate": 1.9966747262957057e-05, + "loss": 0.5194, + "step": 971 + }, + { + "epoch": 0.17, + "learning_rate": 1.9966597052019063e-05, + "loss": 0.6091, + "step": 972 + }, + { + "epoch": 0.17, + "learning_rate": 1.9966446503142566e-05, + "loss": 0.6026, + "step": 973 + }, + { + "epoch": 0.17, + "learning_rate": 1.996629561633267e-05, + "loss": 0.5303, + "step": 974 + }, + { + "epoch": 0.17, + "learning_rate": 1.9966144391594487e-05, + "loss": 0.556, + "step": 975 + }, + { + "epoch": 0.17, + "learning_rate": 1.996599282893315e-05, + "loss": 0.5532, + "step": 976 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965840928353796e-05, + "loss": 0.4954, + "step": 977 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965688689861574e-05, + "loss": 0.5444, + "step": 978 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965536113461645e-05, + "loss": 0.5552, + "step": 979 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965383199159185e-05, + "loss": 0.5569, + "step": 980 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965229946959378e-05, + "loss": 0.5618, + "step": 981 + }, + { + "epoch": 0.17, + "learning_rate": 1.9965076356867423e-05, + "loss": 0.5768, + "step": 982 + }, + { + "epoch": 0.17, + "learning_rate": 1.9964922428888525e-05, + "loss": 0.629, + "step": 983 + }, + { + "epoch": 0.17, + "learning_rate": 1.9964768163027904e-05, + "loss": 0.5456, + "step": 984 + }, + { + "epoch": 0.17, + "learning_rate": 1.9964613559290787e-05, + "loss": 0.5243, + "step": 985 + }, + { + "epoch": 0.17, + "learning_rate": 1.9964458617682422e-05, + "loss": 0.5594, + "step": 986 + }, + { + "epoch": 0.17, + "learning_rate": 1.9964303338208065e-05, + "loss": 0.5113, + "step": 987 + }, + { + "epoch": 0.17, + "learning_rate": 1.996414772087297e-05, + "loss": 0.5734, + "step": 988 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963991765682422e-05, + "loss": 0.5048, + "step": 989 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963835472641704e-05, + "loss": 0.5678, + "step": 990 + }, + { + "epoch": 0.17, + "learning_rate": 1.996367884175612e-05, + "loss": 0.5891, + "step": 991 + }, + { + "epoch": 0.17, + "learning_rate": 1.996352187303098e-05, + "loss": 0.5838, + "step": 992 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963364566471607e-05, + "loss": 0.5588, + "step": 993 + }, + { + "epoch": 0.17, + "learning_rate": 1.996320692208333e-05, + "loss": 0.5865, + "step": 994 + }, + { + "epoch": 0.17, + "learning_rate": 1.9963048939871502e-05, + "loss": 0.5578, + "step": 995 + }, + { + "epoch": 0.17, + "learning_rate": 1.996289061984147e-05, + "loss": 0.5614, + "step": 996 + }, + { + "epoch": 0.17, + "learning_rate": 1.996273196199861e-05, + "loss": 0.5829, + "step": 997 + }, + { + "epoch": 0.17, + "learning_rate": 1.99625729663483e-05, + "loss": 0.5519, + "step": 998 + }, + { + "epoch": 0.17, + "learning_rate": 1.9962413632895926e-05, + "loss": 0.537, + "step": 999 + }, + { + "epoch": 0.17, + "learning_rate": 1.99622539616469e-05, + "loss": 0.556, + "step": 1000 + }, + { + "epoch": 0.17, + "learning_rate": 1.996209395260663e-05, + "loss": 0.5291, + "step": 1001 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961933605780542e-05, + "loss": 0.5607, + "step": 1002 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961772921174072e-05, + "loss": 0.5348, + "step": 1003 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961611898792666e-05, + "loss": 0.5345, + "step": 1004 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961450538641792e-05, + "loss": 0.5736, + "step": 1005 + }, + { + "epoch": 0.17, + "learning_rate": 1.9961288840726914e-05, + "loss": 0.5589, + "step": 1006 + }, + { + "epoch": 0.17, + "learning_rate": 1.996112680505352e-05, + "loss": 0.582, + "step": 1007 + }, + { + "epoch": 0.17, + "learning_rate": 1.9960964431627098e-05, + "loss": 0.564, + "step": 1008 + }, + { + "epoch": 0.17, + "learning_rate": 1.996080172045316e-05, + "loss": 0.5617, + "step": 1009 + }, + { + "epoch": 0.17, + "learning_rate": 1.9960638671537218e-05, + "loss": 0.569, + "step": 1010 + }, + { + "epoch": 0.17, + "learning_rate": 1.9960475284884805e-05, + "loss": 0.5331, + "step": 1011 + }, + { + "epoch": 0.17, + "learning_rate": 1.9960311560501457e-05, + "loss": 0.5871, + "step": 1012 + }, + { + "epoch": 0.17, + "learning_rate": 1.9960147498392723e-05, + "loss": 0.5587, + "step": 1013 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959983098564175e-05, + "loss": 0.5466, + "step": 1014 + }, + { + "epoch": 0.17, + "learning_rate": 1.995981836102138e-05, + "loss": 0.5488, + "step": 1015 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959653285769925e-05, + "loss": 0.5839, + "step": 1016 + }, + { + "epoch": 0.17, + "learning_rate": 1.995948787281541e-05, + "loss": 0.572, + "step": 1017 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959322122163443e-05, + "loss": 0.557, + "step": 1018 + }, + { + "epoch": 0.17, + "learning_rate": 1.9959156033819644e-05, + "loss": 0.5284, + "step": 1019 + }, + { + "epoch": 0.17, + "learning_rate": 1.9958989607789636e-05, + "loss": 0.5507, + "step": 1020 + }, + { + "epoch": 0.17, + "learning_rate": 1.9958822844079075e-05, + "loss": 0.5526, + "step": 1021 + }, + { + "epoch": 0.17, + "learning_rate": 1.995865574269361e-05, + "loss": 0.5784, + "step": 1022 + }, + { + "epoch": 0.17, + "learning_rate": 1.995848830363891e-05, + "loss": 0.5315, + "step": 1023 + }, + { + "epoch": 0.17, + "learning_rate": 1.9958320526920643e-05, + "loss": 0.5545, + "step": 1024 + }, + { + "epoch": 0.17, + "learning_rate": 1.9958152412544507e-05, + "loss": 0.5396, + "step": 1025 + }, + { + "epoch": 0.17, + "learning_rate": 1.99579839605162e-05, + "loss": 0.5477, + "step": 1026 + }, + { + "epoch": 0.18, + "learning_rate": 1.9957815170841428e-05, + "loss": 0.5989, + "step": 1027 + }, + { + "epoch": 0.18, + "learning_rate": 1.9957646043525926e-05, + "loss": 0.5533, + "step": 1028 + }, + { + "epoch": 0.18, + "learning_rate": 1.995747657857542e-05, + "loss": 0.5157, + "step": 1029 + }, + { + "epoch": 0.18, + "learning_rate": 1.9957306775995654e-05, + "loss": 0.5459, + "step": 1030 + }, + { + "epoch": 0.18, + "learning_rate": 1.9957136635792394e-05, + "loss": 0.5612, + "step": 1031 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956966157971403e-05, + "loss": 0.5416, + "step": 1032 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956795342538465e-05, + "loss": 0.5328, + "step": 1033 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956624189499367e-05, + "loss": 0.6119, + "step": 1034 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956452698859915e-05, + "loss": 0.5439, + "step": 1035 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956280870625924e-05, + "loss": 0.5861, + "step": 1036 + }, + { + "epoch": 0.18, + "learning_rate": 1.9956108704803222e-05, + "loss": 0.5258, + "step": 1037 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955936201397643e-05, + "loss": 0.5412, + "step": 1038 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955763360415038e-05, + "loss": 0.5615, + "step": 1039 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955590181861265e-05, + "loss": 0.5758, + "step": 1040 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955416665742203e-05, + "loss": 0.5746, + "step": 1041 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955242812063727e-05, + "loss": 0.5533, + "step": 1042 + }, + { + "epoch": 0.18, + "learning_rate": 1.9955068620831735e-05, + "loss": 0.5225, + "step": 1043 + }, + { + "epoch": 0.18, + "learning_rate": 1.995489409205214e-05, + "loss": 0.5391, + "step": 1044 + }, + { + "epoch": 0.18, + "learning_rate": 1.9954719225730847e-05, + "loss": 0.54, + "step": 1045 + }, + { + "epoch": 0.18, + "learning_rate": 1.9954544021873796e-05, + "loss": 0.5894, + "step": 1046 + }, + { + "epoch": 0.18, + "learning_rate": 1.995436848048692e-05, + "loss": 0.5873, + "step": 1047 + }, + { + "epoch": 0.18, + "learning_rate": 1.9954192601576176e-05, + "loss": 0.551, + "step": 1048 + }, + { + "epoch": 0.18, + "learning_rate": 1.9954016385147524e-05, + "loss": 0.5623, + "step": 1049 + }, + { + "epoch": 0.18, + "learning_rate": 1.995383983120695e-05, + "loss": 0.5388, + "step": 1050 + }, + { + "epoch": 0.18, + "learning_rate": 1.995366293976042e-05, + "loss": 0.5958, + "step": 1051 + }, + { + "epoch": 0.18, + "learning_rate": 1.9953485710813953e-05, + "loss": 0.559, + "step": 1052 + }, + { + "epoch": 0.18, + "learning_rate": 1.9953308144373543e-05, + "loss": 0.6139, + "step": 1053 + }, + { + "epoch": 0.18, + "learning_rate": 1.9953130240445216e-05, + "loss": 0.5769, + "step": 1054 + }, + { + "epoch": 0.18, + "learning_rate": 1.9952951999035005e-05, + "loss": 0.5899, + "step": 1055 + }, + { + "epoch": 0.18, + "learning_rate": 1.9952773420148958e-05, + "loss": 0.5552, + "step": 1056 + }, + { + "epoch": 0.18, + "learning_rate": 1.995259450379312e-05, + "loss": 0.5511, + "step": 1057 + }, + { + "epoch": 0.18, + "learning_rate": 1.9952415249973565e-05, + "loss": 0.5106, + "step": 1058 + }, + { + "epoch": 0.18, + "learning_rate": 1.9952235658696372e-05, + "loss": 0.5593, + "step": 1059 + }, + { + "epoch": 0.18, + "learning_rate": 1.9952055729967622e-05, + "loss": 0.5351, + "step": 1060 + }, + { + "epoch": 0.18, + "learning_rate": 1.9951875463793425e-05, + "loss": 0.5933, + "step": 1061 + }, + { + "epoch": 0.18, + "learning_rate": 1.995169486017989e-05, + "loss": 0.5826, + "step": 1062 + }, + { + "epoch": 0.18, + "learning_rate": 1.995151391913314e-05, + "loss": 0.5361, + "step": 1063 + }, + { + "epoch": 0.18, + "learning_rate": 1.9951332640659306e-05, + "loss": 0.535, + "step": 1064 + }, + { + "epoch": 0.18, + "learning_rate": 1.9951151024764542e-05, + "loss": 0.5518, + "step": 1065 + }, + { + "epoch": 0.18, + "learning_rate": 1.9950969071455006e-05, + "loss": 0.5732, + "step": 1066 + }, + { + "epoch": 0.18, + "learning_rate": 1.995078678073686e-05, + "loss": 0.5455, + "step": 1067 + }, + { + "epoch": 0.18, + "learning_rate": 1.9950604152616295e-05, + "loss": 0.5769, + "step": 1068 + }, + { + "epoch": 0.18, + "learning_rate": 1.9950421187099497e-05, + "loss": 0.5815, + "step": 1069 + }, + { + "epoch": 0.18, + "learning_rate": 1.9950237884192668e-05, + "loss": 0.6039, + "step": 1070 + }, + { + "epoch": 0.18, + "learning_rate": 1.9950054243902027e-05, + "loss": 0.5631, + "step": 1071 + }, + { + "epoch": 0.18, + "learning_rate": 1.99498702662338e-05, + "loss": 0.5833, + "step": 1072 + }, + { + "epoch": 0.18, + "learning_rate": 1.9949685951194227e-05, + "loss": 0.5219, + "step": 1073 + }, + { + "epoch": 0.18, + "learning_rate": 1.9949501298789556e-05, + "loss": 0.5303, + "step": 1074 + }, + { + "epoch": 0.18, + "learning_rate": 1.9949316309026045e-05, + "loss": 0.5801, + "step": 1075 + }, + { + "epoch": 0.18, + "learning_rate": 1.9949130981909968e-05, + "loss": 0.5944, + "step": 1076 + }, + { + "epoch": 0.18, + "learning_rate": 1.994894531744761e-05, + "loss": 0.5025, + "step": 1077 + }, + { + "epoch": 0.18, + "learning_rate": 1.994875931564527e-05, + "loss": 0.5377, + "step": 1078 + }, + { + "epoch": 0.18, + "learning_rate": 1.994857297650925e-05, + "loss": 0.541, + "step": 1079 + }, + { + "epoch": 0.18, + "learning_rate": 1.994838630004587e-05, + "loss": 0.5608, + "step": 1080 + }, + { + "epoch": 0.18, + "learning_rate": 1.994819928626146e-05, + "loss": 0.5712, + "step": 1081 + }, + { + "epoch": 0.18, + "learning_rate": 1.9948011935162354e-05, + "loss": 0.5656, + "step": 1082 + }, + { + "epoch": 0.18, + "learning_rate": 1.9947824246754915e-05, + "loss": 0.5007, + "step": 1083 + }, + { + "epoch": 0.18, + "learning_rate": 1.9947636221045506e-05, + "loss": 0.5332, + "step": 1084 + }, + { + "epoch": 0.19, + "learning_rate": 1.9947447858040493e-05, + "loss": 0.5309, + "step": 1085 + }, + { + "epoch": 0.19, + "learning_rate": 1.9947259157746272e-05, + "loss": 0.6128, + "step": 1086 + }, + { + "epoch": 0.19, + "learning_rate": 1.9947070120169237e-05, + "loss": 0.5751, + "step": 1087 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946880745315802e-05, + "loss": 0.5382, + "step": 1088 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946691033192384e-05, + "loss": 0.5635, + "step": 1089 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946500983805413e-05, + "loss": 0.5755, + "step": 1090 + }, + { + "epoch": 0.19, + "learning_rate": 1.994631059716134e-05, + "loss": 0.5222, + "step": 1091 + }, + { + "epoch": 0.19, + "learning_rate": 1.9946119873266615e-05, + "loss": 0.5552, + "step": 1092 + }, + { + "epoch": 0.19, + "learning_rate": 1.9945928812127707e-05, + "loss": 0.5843, + "step": 1093 + }, + { + "epoch": 0.19, + "learning_rate": 1.9945737413751098e-05, + "loss": 0.5415, + "step": 1094 + }, + { + "epoch": 0.19, + "learning_rate": 1.9945545678143272e-05, + "loss": 0.5284, + "step": 1095 + }, + { + "epoch": 0.19, + "learning_rate": 1.9945353605310732e-05, + "loss": 0.5486, + "step": 1096 + }, + { + "epoch": 0.19, + "learning_rate": 1.994516119525999e-05, + "loss": 0.5353, + "step": 1097 + }, + { + "epoch": 0.19, + "learning_rate": 1.994496844799757e-05, + "loss": 0.5999, + "step": 1098 + }, + { + "epoch": 0.19, + "learning_rate": 1.994477536353001e-05, + "loss": 0.5767, + "step": 1099 + }, + { + "epoch": 0.19, + "learning_rate": 1.994458194186386e-05, + "loss": 0.5439, + "step": 1100 + }, + { + "epoch": 0.19, + "learning_rate": 1.994438818300567e-05, + "loss": 0.5575, + "step": 1101 + }, + { + "epoch": 0.19, + "learning_rate": 1.994419408696201e-05, + "loss": 0.5475, + "step": 1102 + }, + { + "epoch": 0.19, + "learning_rate": 1.994399965373947e-05, + "loss": 0.5636, + "step": 1103 + }, + { + "epoch": 0.19, + "learning_rate": 1.9943804883344638e-05, + "loss": 0.5463, + "step": 1104 + }, + { + "epoch": 0.19, + "learning_rate": 1.9943609775784115e-05, + "loss": 0.5841, + "step": 1105 + }, + { + "epoch": 0.19, + "learning_rate": 1.9943414331064516e-05, + "loss": 0.5333, + "step": 1106 + }, + { + "epoch": 0.19, + "learning_rate": 1.9943218549192478e-05, + "loss": 0.5622, + "step": 1107 + }, + { + "epoch": 0.19, + "learning_rate": 1.994302243017463e-05, + "loss": 0.5147, + "step": 1108 + }, + { + "epoch": 0.19, + "learning_rate": 1.994282597401762e-05, + "loss": 0.5799, + "step": 1109 + }, + { + "epoch": 0.19, + "learning_rate": 1.994262918072812e-05, + "loss": 0.5943, + "step": 1110 + }, + { + "epoch": 0.19, + "learning_rate": 1.9942432050312793e-05, + "loss": 0.5578, + "step": 1111 + }, + { + "epoch": 0.19, + "learning_rate": 1.994223458277833e-05, + "loss": 0.5481, + "step": 1112 + }, + { + "epoch": 0.19, + "learning_rate": 1.994203677813142e-05, + "loss": 0.5288, + "step": 1113 + }, + { + "epoch": 0.19, + "learning_rate": 1.994183863637877e-05, + "loss": 0.529, + "step": 1114 + }, + { + "epoch": 0.19, + "learning_rate": 1.9941640157527107e-05, + "loss": 0.5259, + "step": 1115 + }, + { + "epoch": 0.19, + "learning_rate": 1.9941441341583154e-05, + "loss": 0.5187, + "step": 1116 + }, + { + "epoch": 0.19, + "learning_rate": 1.994124218855365e-05, + "loss": 0.5295, + "step": 1117 + }, + { + "epoch": 0.19, + "learning_rate": 1.9941042698445353e-05, + "loss": 0.5181, + "step": 1118 + }, + { + "epoch": 0.19, + "learning_rate": 1.9940842871265027e-05, + "loss": 0.5621, + "step": 1119 + }, + { + "epoch": 0.19, + "learning_rate": 1.9940642707019446e-05, + "loss": 0.522, + "step": 1120 + }, + { + "epoch": 0.19, + "learning_rate": 1.9940442205715397e-05, + "loss": 0.5476, + "step": 1121 + }, + { + "epoch": 0.19, + "learning_rate": 1.9940241367359675e-05, + "loss": 0.5355, + "step": 1122 + }, + { + "epoch": 0.19, + "learning_rate": 1.9940040191959097e-05, + "loss": 0.5886, + "step": 1123 + }, + { + "epoch": 0.19, + "learning_rate": 1.993983867952048e-05, + "loss": 0.5855, + "step": 1124 + }, + { + "epoch": 0.19, + "learning_rate": 1.9939636830050655e-05, + "loss": 0.5562, + "step": 1125 + }, + { + "epoch": 0.19, + "learning_rate": 1.9939434643556467e-05, + "loss": 0.5983, + "step": 1126 + }, + { + "epoch": 0.19, + "learning_rate": 1.9939232120044777e-05, + "loss": 0.534, + "step": 1127 + }, + { + "epoch": 0.19, + "learning_rate": 1.9939029259522448e-05, + "loss": 0.5277, + "step": 1128 + }, + { + "epoch": 0.19, + "learning_rate": 1.9938826061996355e-05, + "loss": 0.5594, + "step": 1129 + }, + { + "epoch": 0.19, + "learning_rate": 1.9938622527473392e-05, + "loss": 0.5425, + "step": 1130 + }, + { + "epoch": 0.19, + "learning_rate": 1.993841865596046e-05, + "loss": 0.5611, + "step": 1131 + }, + { + "epoch": 0.19, + "learning_rate": 1.993821444746447e-05, + "loss": 0.5237, + "step": 1132 + }, + { + "epoch": 0.19, + "learning_rate": 1.993800990199235e-05, + "loss": 0.5451, + "step": 1133 + }, + { + "epoch": 0.19, + "learning_rate": 1.993780501955103e-05, + "loss": 0.532, + "step": 1134 + }, + { + "epoch": 0.19, + "learning_rate": 1.993759980014746e-05, + "loss": 0.5415, + "step": 1135 + }, + { + "epoch": 0.19, + "learning_rate": 1.9937394243788598e-05, + "loss": 0.611, + "step": 1136 + }, + { + "epoch": 0.19, + "learning_rate": 1.9937188350481417e-05, + "loss": 0.5952, + "step": 1137 + }, + { + "epoch": 0.19, + "learning_rate": 1.993698212023289e-05, + "loss": 0.5464, + "step": 1138 + }, + { + "epoch": 0.19, + "learning_rate": 1.9936775553050017e-05, + "loss": 0.5347, + "step": 1139 + }, + { + "epoch": 0.19, + "learning_rate": 1.9936568648939802e-05, + "loss": 0.5445, + "step": 1140 + }, + { + "epoch": 0.19, + "learning_rate": 1.9936361407909256e-05, + "loss": 0.5573, + "step": 1141 + }, + { + "epoch": 0.19, + "learning_rate": 1.993615382996541e-05, + "loss": 0.5159, + "step": 1142 + }, + { + "epoch": 0.19, + "learning_rate": 1.99359459151153e-05, + "loss": 0.5429, + "step": 1143 + }, + { + "epoch": 0.2, + "learning_rate": 1.9935737663365976e-05, + "loss": 0.5626, + "step": 1144 + }, + { + "epoch": 0.2, + "learning_rate": 1.99355290747245e-05, + "loss": 0.5876, + "step": 1145 + }, + { + "epoch": 0.2, + "learning_rate": 1.9935320149197945e-05, + "loss": 0.5857, + "step": 1146 + }, + { + "epoch": 0.2, + "learning_rate": 1.9935110886793394e-05, + "loss": 0.5158, + "step": 1147 + }, + { + "epoch": 0.2, + "learning_rate": 1.9934901287517945e-05, + "loss": 0.5216, + "step": 1148 + }, + { + "epoch": 0.2, + "learning_rate": 1.9934691351378702e-05, + "loss": 0.583, + "step": 1149 + }, + { + "epoch": 0.2, + "learning_rate": 1.993448107838278e-05, + "loss": 0.5358, + "step": 1150 + }, + { + "epoch": 0.2, + "learning_rate": 1.993427046853732e-05, + "loss": 0.5696, + "step": 1151 + }, + { + "epoch": 0.2, + "learning_rate": 1.993405952184945e-05, + "loss": 0.5475, + "step": 1152 + }, + { + "epoch": 0.2, + "learning_rate": 1.9933848238326333e-05, + "loss": 0.55, + "step": 1153 + }, + { + "epoch": 0.2, + "learning_rate": 1.9933636617975127e-05, + "loss": 0.5475, + "step": 1154 + }, + { + "epoch": 0.2, + "learning_rate": 1.9933424660803006e-05, + "loss": 0.5528, + "step": 1155 + }, + { + "epoch": 0.2, + "learning_rate": 1.9933212366817166e-05, + "loss": 0.5647, + "step": 1156 + }, + { + "epoch": 0.2, + "learning_rate": 1.9932999736024795e-05, + "loss": 0.5308, + "step": 1157 + }, + { + "epoch": 0.2, + "learning_rate": 1.9932786768433106e-05, + "loss": 0.5334, + "step": 1158 + }, + { + "epoch": 0.2, + "learning_rate": 1.9932573464049323e-05, + "loss": 0.5456, + "step": 1159 + }, + { + "epoch": 0.2, + "learning_rate": 1.9932359822880677e-05, + "loss": 0.5609, + "step": 1160 + }, + { + "epoch": 0.2, + "learning_rate": 1.9932145844934415e-05, + "loss": 0.5672, + "step": 1161 + }, + { + "epoch": 0.2, + "learning_rate": 1.9931931530217783e-05, + "loss": 0.5456, + "step": 1162 + }, + { + "epoch": 0.2, + "learning_rate": 1.9931716878738056e-05, + "loss": 0.5422, + "step": 1163 + }, + { + "epoch": 0.2, + "learning_rate": 1.9931501890502512e-05, + "loss": 0.5814, + "step": 1164 + }, + { + "epoch": 0.2, + "learning_rate": 1.9931286565518435e-05, + "loss": 0.5548, + "step": 1165 + }, + { + "epoch": 0.2, + "learning_rate": 1.993107090379313e-05, + "loss": 0.5583, + "step": 1166 + }, + { + "epoch": 0.2, + "learning_rate": 1.9930854905333913e-05, + "loss": 0.568, + "step": 1167 + }, + { + "epoch": 0.2, + "learning_rate": 1.99306385701481e-05, + "loss": 0.5881, + "step": 1168 + }, + { + "epoch": 0.2, + "learning_rate": 1.993042189824303e-05, + "loss": 0.5924, + "step": 1169 + }, + { + "epoch": 0.2, + "learning_rate": 1.9930204889626052e-05, + "loss": 0.5432, + "step": 1170 + }, + { + "epoch": 0.2, + "learning_rate": 1.9929987544304522e-05, + "loss": 0.5545, + "step": 1171 + }, + { + "epoch": 0.2, + "learning_rate": 1.992976986228581e-05, + "loss": 0.5345, + "step": 1172 + }, + { + "epoch": 0.2, + "learning_rate": 1.9929551843577296e-05, + "loss": 0.5432, + "step": 1173 + }, + { + "epoch": 0.2, + "learning_rate": 1.9929333488186375e-05, + "loss": 0.5718, + "step": 1174 + }, + { + "epoch": 0.2, + "learning_rate": 1.9929114796120447e-05, + "loss": 0.5235, + "step": 1175 + }, + { + "epoch": 0.2, + "learning_rate": 1.992889576738693e-05, + "loss": 0.5263, + "step": 1176 + }, + { + "epoch": 0.2, + "learning_rate": 1.9928676401993247e-05, + "loss": 0.5388, + "step": 1177 + }, + { + "epoch": 0.2, + "learning_rate": 1.9928456699946842e-05, + "loss": 0.5789, + "step": 1178 + }, + { + "epoch": 0.2, + "learning_rate": 1.9928236661255158e-05, + "loss": 0.5727, + "step": 1179 + }, + { + "epoch": 0.2, + "learning_rate": 1.992801628592566e-05, + "loss": 0.5425, + "step": 1180 + }, + { + "epoch": 0.2, + "learning_rate": 1.992779557396582e-05, + "loss": 0.5325, + "step": 1181 + }, + { + "epoch": 0.2, + "learning_rate": 1.9927574525383123e-05, + "loss": 0.5537, + "step": 1182 + }, + { + "epoch": 0.2, + "learning_rate": 1.992735314018506e-05, + "loss": 0.5822, + "step": 1183 + }, + { + "epoch": 0.2, + "learning_rate": 1.9927131418379143e-05, + "loss": 0.5788, + "step": 1184 + }, + { + "epoch": 0.2, + "learning_rate": 1.992690935997288e-05, + "loss": 0.574, + "step": 1185 + }, + { + "epoch": 0.2, + "learning_rate": 1.9926686964973813e-05, + "loss": 0.5333, + "step": 1186 + }, + { + "epoch": 0.2, + "learning_rate": 1.9926464233389474e-05, + "loss": 0.5677, + "step": 1187 + }, + { + "epoch": 0.2, + "learning_rate": 1.992624116522742e-05, + "loss": 0.5696, + "step": 1188 + }, + { + "epoch": 0.2, + "learning_rate": 1.9926017760495208e-05, + "loss": 0.5436, + "step": 1189 + }, + { + "epoch": 0.2, + "learning_rate": 1.9925794019200423e-05, + "loss": 0.5444, + "step": 1190 + }, + { + "epoch": 0.2, + "learning_rate": 1.992556994135064e-05, + "loss": 0.5368, + "step": 1191 + }, + { + "epoch": 0.2, + "learning_rate": 1.992534552695347e-05, + "loss": 0.5456, + "step": 1192 + }, + { + "epoch": 0.2, + "learning_rate": 1.992512077601651e-05, + "loss": 0.5057, + "step": 1193 + }, + { + "epoch": 0.2, + "learning_rate": 1.992489568854739e-05, + "loss": 0.5599, + "step": 1194 + }, + { + "epoch": 0.2, + "learning_rate": 1.9924670264553736e-05, + "loss": 0.5374, + "step": 1195 + }, + { + "epoch": 0.2, + "learning_rate": 1.9924444504043193e-05, + "loss": 0.559, + "step": 1196 + }, + { + "epoch": 0.2, + "learning_rate": 1.9924218407023415e-05, + "loss": 0.5389, + "step": 1197 + }, + { + "epoch": 0.2, + "learning_rate": 1.9923991973502068e-05, + "loss": 0.5704, + "step": 1198 + }, + { + "epoch": 0.2, + "learning_rate": 1.9923765203486836e-05, + "loss": 0.5887, + "step": 1199 + }, + { + "epoch": 0.2, + "learning_rate": 1.99235380969854e-05, + "loss": 0.5621, + "step": 1200 + }, + { + "epoch": 0.2, + "learning_rate": 1.9923310654005465e-05, + "loss": 0.5829, + "step": 1201 + }, + { + "epoch": 0.2, + "learning_rate": 1.992308287455474e-05, + "loss": 0.5678, + "step": 1202 + }, + { + "epoch": 0.21, + "learning_rate": 1.9922854758640952e-05, + "loss": 0.5089, + "step": 1203 + }, + { + "epoch": 0.21, + "learning_rate": 1.9922626306271832e-05, + "loss": 0.5547, + "step": 1204 + }, + { + "epoch": 0.21, + "learning_rate": 1.992239751745513e-05, + "loss": 0.5353, + "step": 1205 + }, + { + "epoch": 0.21, + "learning_rate": 1.9922168392198602e-05, + "loss": 0.5632, + "step": 1206 + }, + { + "epoch": 0.21, + "learning_rate": 1.9921938930510017e-05, + "loss": 0.5252, + "step": 1207 + }, + { + "epoch": 0.21, + "learning_rate": 1.9921709132397152e-05, + "loss": 0.5889, + "step": 1208 + }, + { + "epoch": 0.21, + "learning_rate": 1.9921478997867803e-05, + "loss": 0.5334, + "step": 1209 + }, + { + "epoch": 0.21, + "learning_rate": 1.992124852692977e-05, + "loss": 0.5544, + "step": 1210 + }, + { + "epoch": 0.21, + "learning_rate": 1.9921017719590875e-05, + "loss": 0.5723, + "step": 1211 + }, + { + "epoch": 0.21, + "learning_rate": 1.9920786575858933e-05, + "loss": 0.5666, + "step": 1212 + }, + { + "epoch": 0.21, + "learning_rate": 1.9920555095741788e-05, + "loss": 0.5464, + "step": 1213 + }, + { + "epoch": 0.21, + "learning_rate": 1.992032327924729e-05, + "loss": 0.5414, + "step": 1214 + }, + { + "epoch": 0.21, + "learning_rate": 1.9920091126383294e-05, + "loss": 0.5121, + "step": 1215 + }, + { + "epoch": 0.21, + "learning_rate": 1.9919858637157675e-05, + "loss": 0.5468, + "step": 1216 + }, + { + "epoch": 0.21, + "learning_rate": 1.991962581157832e-05, + "loss": 0.5554, + "step": 1217 + }, + { + "epoch": 0.21, + "learning_rate": 1.9919392649653114e-05, + "loss": 0.5391, + "step": 1218 + }, + { + "epoch": 0.21, + "learning_rate": 1.991915915138997e-05, + "loss": 0.5672, + "step": 1219 + }, + { + "epoch": 0.21, + "learning_rate": 1.9918925316796806e-05, + "loss": 0.5608, + "step": 1220 + }, + { + "epoch": 0.21, + "learning_rate": 1.9918691145881542e-05, + "loss": 0.5536, + "step": 1221 + }, + { + "epoch": 0.21, + "learning_rate": 1.9918456638652128e-05, + "loss": 0.5499, + "step": 1222 + }, + { + "epoch": 0.21, + "learning_rate": 1.991822179511651e-05, + "loss": 0.565, + "step": 1223 + }, + { + "epoch": 0.21, + "learning_rate": 1.9917986615282652e-05, + "loss": 0.5716, + "step": 1224 + }, + { + "epoch": 0.21, + "learning_rate": 1.9917751099158532e-05, + "loss": 0.5788, + "step": 1225 + }, + { + "epoch": 0.21, + "learning_rate": 1.9917515246752127e-05, + "loss": 0.5452, + "step": 1226 + }, + { + "epoch": 0.21, + "learning_rate": 1.9917279058071443e-05, + "loss": 0.5284, + "step": 1227 + }, + { + "epoch": 0.21, + "learning_rate": 1.9917042533124483e-05, + "loss": 0.5663, + "step": 1228 + }, + { + "epoch": 0.21, + "learning_rate": 1.991680567191927e-05, + "loss": 0.5147, + "step": 1229 + }, + { + "epoch": 0.21, + "learning_rate": 1.9916568474463833e-05, + "loss": 0.5774, + "step": 1230 + }, + { + "epoch": 0.21, + "learning_rate": 1.9916330940766216e-05, + "loss": 0.5474, + "step": 1231 + }, + { + "epoch": 0.21, + "learning_rate": 1.991609307083447e-05, + "loss": 0.5352, + "step": 1232 + }, + { + "epoch": 0.21, + "learning_rate": 1.9915854864676665e-05, + "loss": 0.5617, + "step": 1233 + }, + { + "epoch": 0.21, + "learning_rate": 1.9915616322300875e-05, + "loss": 0.5646, + "step": 1234 + }, + { + "epoch": 0.21, + "learning_rate": 1.991537744371519e-05, + "loss": 0.5269, + "step": 1235 + }, + { + "epoch": 0.21, + "learning_rate": 1.9915138228927712e-05, + "loss": 0.5812, + "step": 1236 + }, + { + "epoch": 0.21, + "learning_rate": 1.9914898677946546e-05, + "loss": 0.5781, + "step": 1237 + }, + { + "epoch": 0.21, + "learning_rate": 1.9914658790779818e-05, + "loss": 0.4995, + "step": 1238 + }, + { + "epoch": 0.21, + "learning_rate": 1.991441856743566e-05, + "loss": 0.5957, + "step": 1239 + }, + { + "epoch": 0.21, + "learning_rate": 1.991417800792222e-05, + "loss": 0.561, + "step": 1240 + }, + { + "epoch": 0.21, + "learning_rate": 1.9913937112247655e-05, + "loss": 0.5644, + "step": 1241 + }, + { + "epoch": 0.21, + "learning_rate": 1.991369588042013e-05, + "loss": 0.5565, + "step": 1242 + }, + { + "epoch": 0.21, + "learning_rate": 1.9913454312447827e-05, + "loss": 0.5285, + "step": 1243 + }, + { + "epoch": 0.21, + "learning_rate": 1.9913212408338935e-05, + "loss": 0.522, + "step": 1244 + }, + { + "epoch": 0.21, + "learning_rate": 1.9912970168101656e-05, + "loss": 0.5545, + "step": 1245 + }, + { + "epoch": 0.21, + "learning_rate": 1.9912727591744204e-05, + "loss": 0.5344, + "step": 1246 + }, + { + "epoch": 0.21, + "learning_rate": 1.991248467927481e-05, + "loss": 0.5211, + "step": 1247 + }, + { + "epoch": 0.21, + "learning_rate": 1.99122414307017e-05, + "loss": 0.5179, + "step": 1248 + }, + { + "epoch": 0.21, + "learning_rate": 1.991199784603313e-05, + "loss": 0.5731, + "step": 1249 + }, + { + "epoch": 0.21, + "learning_rate": 1.9911753925277354e-05, + "loss": 0.5379, + "step": 1250 + }, + { + "epoch": 0.21, + "learning_rate": 1.9911509668442646e-05, + "loss": 0.5657, + "step": 1251 + }, + { + "epoch": 0.21, + "learning_rate": 1.9911265075537288e-05, + "loss": 0.5803, + "step": 1252 + }, + { + "epoch": 0.21, + "learning_rate": 1.9911020146569573e-05, + "loss": 0.5519, + "step": 1253 + }, + { + "epoch": 0.21, + "learning_rate": 1.9910774881547803e-05, + "loss": 0.5142, + "step": 1254 + }, + { + "epoch": 0.21, + "learning_rate": 1.9910529280480298e-05, + "loss": 0.5149, + "step": 1255 + }, + { + "epoch": 0.21, + "learning_rate": 1.991028334337538e-05, + "loss": 0.5089, + "step": 1256 + }, + { + "epoch": 0.21, + "learning_rate": 1.99100370702414e-05, + "loss": 0.5254, + "step": 1257 + }, + { + "epoch": 0.21, + "learning_rate": 1.9909790461086698e-05, + "loss": 0.5506, + "step": 1258 + }, + { + "epoch": 0.21, + "learning_rate": 1.9909543515919637e-05, + "loss": 0.5227, + "step": 1259 + }, + { + "epoch": 0.21, + "learning_rate": 1.990929623474859e-05, + "loss": 0.524, + "step": 1260 + }, + { + "epoch": 0.22, + "learning_rate": 1.9909048617581944e-05, + "loss": 0.5392, + "step": 1261 + }, + { + "epoch": 0.22, + "learning_rate": 1.9908800664428095e-05, + "loss": 0.542, + "step": 1262 + }, + { + "epoch": 0.22, + "learning_rate": 1.990855237529545e-05, + "loss": 0.594, + "step": 1263 + }, + { + "epoch": 0.22, + "learning_rate": 1.9908303750192428e-05, + "loss": 0.5717, + "step": 1264 + }, + { + "epoch": 0.22, + "learning_rate": 1.990805478912746e-05, + "loss": 0.5237, + "step": 1265 + }, + { + "epoch": 0.22, + "learning_rate": 1.990780549210898e-05, + "loss": 0.5325, + "step": 1266 + }, + { + "epoch": 0.22, + "learning_rate": 1.990755585914545e-05, + "loss": 0.5269, + "step": 1267 + }, + { + "epoch": 0.22, + "learning_rate": 1.9907305890245335e-05, + "loss": 0.5626, + "step": 1268 + }, + { + "epoch": 0.22, + "learning_rate": 1.9907055585417102e-05, + "loss": 0.5549, + "step": 1269 + }, + { + "epoch": 0.22, + "learning_rate": 1.9906804944669247e-05, + "loss": 0.5164, + "step": 1270 + }, + { + "epoch": 0.22, + "learning_rate": 1.9906553968010264e-05, + "loss": 0.5631, + "step": 1271 + }, + { + "epoch": 0.22, + "learning_rate": 1.990630265544866e-05, + "loss": 0.5523, + "step": 1272 + }, + { + "epoch": 0.22, + "learning_rate": 1.990605100699296e-05, + "loss": 0.5528, + "step": 1273 + }, + { + "epoch": 0.22, + "learning_rate": 1.9905799022651703e-05, + "loss": 0.5493, + "step": 1274 + }, + { + "epoch": 0.22, + "learning_rate": 1.990554670243342e-05, + "loss": 0.5132, + "step": 1275 + }, + { + "epoch": 0.22, + "learning_rate": 1.9905294046346676e-05, + "loss": 0.508, + "step": 1276 + }, + { + "epoch": 0.22, + "learning_rate": 1.9905041054400033e-05, + "loss": 0.5509, + "step": 1277 + }, + { + "epoch": 0.22, + "learning_rate": 1.990478772660207e-05, + "loss": 0.5579, + "step": 1278 + }, + { + "epoch": 0.22, + "learning_rate": 1.9904534062961384e-05, + "loss": 0.5259, + "step": 1279 + }, + { + "epoch": 0.22, + "learning_rate": 1.9904280063486563e-05, + "loss": 0.5403, + "step": 1280 + }, + { + "epoch": 0.22, + "learning_rate": 1.990402572818623e-05, + "loss": 0.5581, + "step": 1281 + }, + { + "epoch": 0.22, + "learning_rate": 1.9903771057069002e-05, + "loss": 0.555, + "step": 1282 + }, + { + "epoch": 0.22, + "learning_rate": 1.9903516050143518e-05, + "loss": 0.5441, + "step": 1283 + }, + { + "epoch": 0.22, + "learning_rate": 1.9903260707418424e-05, + "loss": 0.5518, + "step": 1284 + }, + { + "epoch": 0.22, + "learning_rate": 1.9903005028902376e-05, + "loss": 0.5206, + "step": 1285 + }, + { + "epoch": 0.22, + "learning_rate": 1.990274901460405e-05, + "loss": 0.5032, + "step": 1286 + }, + { + "epoch": 0.22, + "learning_rate": 1.9902492664532116e-05, + "loss": 0.5548, + "step": 1287 + }, + { + "epoch": 0.22, + "learning_rate": 1.9902235978695274e-05, + "loss": 0.5251, + "step": 1288 + }, + { + "epoch": 0.22, + "learning_rate": 1.9901978957102222e-05, + "loss": 0.5612, + "step": 1289 + }, + { + "epoch": 0.22, + "learning_rate": 1.9901721599761682e-05, + "loss": 0.5274, + "step": 1290 + }, + { + "epoch": 0.22, + "learning_rate": 1.9901463906682377e-05, + "loss": 0.5143, + "step": 1291 + }, + { + "epoch": 0.22, + "learning_rate": 1.990120587787304e-05, + "loss": 0.5204, + "step": 1292 + }, + { + "epoch": 0.22, + "learning_rate": 1.9900947513342427e-05, + "loss": 0.602, + "step": 1293 + }, + { + "epoch": 0.22, + "learning_rate": 1.990068881309929e-05, + "loss": 0.5854, + "step": 1294 + }, + { + "epoch": 0.22, + "learning_rate": 1.9900429777152413e-05, + "loss": 0.5501, + "step": 1295 + }, + { + "epoch": 0.22, + "learning_rate": 1.9900170405510567e-05, + "loss": 0.5692, + "step": 1296 + }, + { + "epoch": 0.22, + "learning_rate": 1.9899910698182553e-05, + "loss": 0.5386, + "step": 1297 + }, + { + "epoch": 0.22, + "learning_rate": 1.9899650655177178e-05, + "loss": 0.5323, + "step": 1298 + }, + { + "epoch": 0.22, + "learning_rate": 1.9899390276503254e-05, + "loss": 0.5298, + "step": 1299 + }, + { + "epoch": 0.22, + "learning_rate": 1.989912956216961e-05, + "loss": 0.555, + "step": 1300 + }, + { + "epoch": 0.22, + "learning_rate": 1.9898868512185095e-05, + "loss": 0.5514, + "step": 1301 + }, + { + "epoch": 0.22, + "learning_rate": 1.9898607126558552e-05, + "loss": 0.5478, + "step": 1302 + }, + { + "epoch": 0.22, + "learning_rate": 1.9898345405298847e-05, + "loss": 0.5426, + "step": 1303 + }, + { + "epoch": 0.22, + "learning_rate": 1.9898083348414848e-05, + "loss": 0.5473, + "step": 1304 + }, + { + "epoch": 0.22, + "learning_rate": 1.989782095591545e-05, + "loss": 0.5449, + "step": 1305 + }, + { + "epoch": 0.22, + "learning_rate": 1.9897558227809548e-05, + "loss": 0.6178, + "step": 1306 + }, + { + "epoch": 0.22, + "learning_rate": 1.9897295164106043e-05, + "loss": 0.5209, + "step": 1307 + }, + { + "epoch": 0.22, + "learning_rate": 1.9897031764813862e-05, + "loss": 0.5046, + "step": 1308 + }, + { + "epoch": 0.22, + "learning_rate": 1.9896768029941935e-05, + "loss": 0.5704, + "step": 1309 + }, + { + "epoch": 0.22, + "learning_rate": 1.9896503959499204e-05, + "loss": 0.5301, + "step": 1310 + }, + { + "epoch": 0.22, + "learning_rate": 1.989623955349462e-05, + "loss": 0.5605, + "step": 1311 + }, + { + "epoch": 0.22, + "learning_rate": 1.989597481193715e-05, + "loss": 0.5482, + "step": 1312 + }, + { + "epoch": 0.22, + "learning_rate": 1.9895709734835774e-05, + "loss": 0.5454, + "step": 1313 + }, + { + "epoch": 0.22, + "learning_rate": 1.9895444322199474e-05, + "loss": 0.6146, + "step": 1314 + }, + { + "epoch": 0.22, + "learning_rate": 1.9895178574037256e-05, + "loss": 0.5288, + "step": 1315 + }, + { + "epoch": 0.22, + "learning_rate": 1.9894912490358125e-05, + "loss": 0.5011, + "step": 1316 + }, + { + "epoch": 0.22, + "learning_rate": 1.9894646071171103e-05, + "loss": 0.4939, + "step": 1317 + }, + { + "epoch": 0.22, + "learning_rate": 1.989437931648523e-05, + "loss": 0.5504, + "step": 1318 + }, + { + "epoch": 0.22, + "learning_rate": 1.9894112226309547e-05, + "loss": 0.5825, + "step": 1319 + }, + { + "epoch": 0.23, + "learning_rate": 1.989384480065311e-05, + "loss": 0.5878, + "step": 1320 + }, + { + "epoch": 0.23, + "learning_rate": 1.9893577039524982e-05, + "loss": 0.5303, + "step": 1321 + }, + { + "epoch": 0.23, + "learning_rate": 1.989330894293425e-05, + "loss": 0.5487, + "step": 1322 + }, + { + "epoch": 0.23, + "learning_rate": 1.9893040510890003e-05, + "loss": 0.5651, + "step": 1323 + }, + { + "epoch": 0.23, + "learning_rate": 1.9892771743401337e-05, + "loss": 0.5674, + "step": 1324 + }, + { + "epoch": 0.23, + "learning_rate": 1.9892502640477375e-05, + "loss": 0.5862, + "step": 1325 + }, + { + "epoch": 0.23, + "learning_rate": 1.9892233202127228e-05, + "loss": 0.5538, + "step": 1326 + }, + { + "epoch": 0.23, + "learning_rate": 1.9891963428360043e-05, + "loss": 0.5183, + "step": 1327 + }, + { + "epoch": 0.23, + "learning_rate": 1.9891693319184965e-05, + "loss": 0.5567, + "step": 1328 + }, + { + "epoch": 0.23, + "learning_rate": 1.9891422874611148e-05, + "loss": 0.5769, + "step": 1329 + }, + { + "epoch": 0.23, + "learning_rate": 1.989115209464777e-05, + "loss": 0.533, + "step": 1330 + }, + { + "epoch": 0.23, + "learning_rate": 1.9890880979304e-05, + "loss": 0.5706, + "step": 1331 + }, + { + "epoch": 0.23, + "learning_rate": 1.989060952858904e-05, + "loss": 0.5558, + "step": 1332 + }, + { + "epoch": 0.23, + "learning_rate": 1.9890337742512096e-05, + "loss": 0.5576, + "step": 1333 + }, + { + "epoch": 0.23, + "learning_rate": 1.989006562108238e-05, + "loss": 0.544, + "step": 1334 + }, + { + "epoch": 0.23, + "learning_rate": 1.988979316430912e-05, + "loss": 0.5227, + "step": 1335 + }, + { + "epoch": 0.23, + "learning_rate": 1.988952037220155e-05, + "loss": 0.5201, + "step": 1336 + }, + { + "epoch": 0.23, + "learning_rate": 1.9889247244768925e-05, + "loss": 0.5537, + "step": 1337 + }, + { + "epoch": 0.23, + "learning_rate": 1.98889737820205e-05, + "loss": 0.5663, + "step": 1338 + }, + { + "epoch": 0.23, + "learning_rate": 1.9888699983965553e-05, + "loss": 0.5654, + "step": 1339 + }, + { + "epoch": 0.23, + "learning_rate": 1.9888425850613365e-05, + "loss": 0.5473, + "step": 1340 + }, + { + "epoch": 0.23, + "learning_rate": 1.9888151381973227e-05, + "loss": 0.5567, + "step": 1341 + }, + { + "epoch": 0.23, + "learning_rate": 1.9887876578054458e-05, + "loss": 0.5654, + "step": 1342 + }, + { + "epoch": 0.23, + "learning_rate": 1.9887601438866363e-05, + "loss": 0.5753, + "step": 1343 + }, + { + "epoch": 0.23, + "learning_rate": 1.9887325964418277e-05, + "loss": 0.5101, + "step": 1344 + }, + { + "epoch": 0.23, + "learning_rate": 1.988705015471954e-05, + "loss": 0.5203, + "step": 1345 + }, + { + "epoch": 0.23, + "learning_rate": 1.9886774009779507e-05, + "loss": 0.5317, + "step": 1346 + }, + { + "epoch": 0.23, + "learning_rate": 1.9886497529607532e-05, + "loss": 0.5332, + "step": 1347 + }, + { + "epoch": 0.23, + "learning_rate": 1.9886220714212998e-05, + "loss": 0.5665, + "step": 1348 + }, + { + "epoch": 0.23, + "learning_rate": 1.9885943563605287e-05, + "loss": 0.6018, + "step": 1349 + }, + { + "epoch": 0.23, + "learning_rate": 1.98856660777938e-05, + "loss": 0.5749, + "step": 1350 + }, + { + "epoch": 0.23, + "learning_rate": 1.9885388256787943e-05, + "loss": 0.5438, + "step": 1351 + }, + { + "epoch": 0.23, + "learning_rate": 1.9885110100597137e-05, + "loss": 0.5168, + "step": 1352 + }, + { + "epoch": 0.23, + "learning_rate": 1.9884831609230813e-05, + "loss": 0.5501, + "step": 1353 + }, + { + "epoch": 0.23, + "learning_rate": 1.988455278269841e-05, + "loss": 0.5427, + "step": 1354 + }, + { + "epoch": 0.23, + "learning_rate": 1.9884273621009393e-05, + "loss": 0.586, + "step": 1355 + }, + { + "epoch": 0.23, + "learning_rate": 1.9883994124173216e-05, + "loss": 0.5405, + "step": 1356 + }, + { + "epoch": 0.23, + "learning_rate": 1.9883714292199362e-05, + "loss": 0.5516, + "step": 1357 + }, + { + "epoch": 0.23, + "learning_rate": 1.988343412509732e-05, + "loss": 0.5618, + "step": 1358 + }, + { + "epoch": 0.23, + "learning_rate": 1.9883153622876582e-05, + "loss": 0.5731, + "step": 1359 + }, + { + "epoch": 0.23, + "learning_rate": 1.988287278554667e-05, + "loss": 0.5472, + "step": 1360 + }, + { + "epoch": 0.23, + "learning_rate": 1.9882591613117098e-05, + "loss": 0.5515, + "step": 1361 + }, + { + "epoch": 0.23, + "learning_rate": 1.9882310105597404e-05, + "loss": 0.5389, + "step": 1362 + }, + { + "epoch": 0.23, + "learning_rate": 1.988202826299713e-05, + "loss": 0.5705, + "step": 1363 + }, + { + "epoch": 0.23, + "learning_rate": 1.988174608532584e-05, + "loss": 0.5429, + "step": 1364 + }, + { + "epoch": 0.23, + "learning_rate": 1.9881463572593088e-05, + "loss": 0.5008, + "step": 1365 + }, + { + "epoch": 0.23, + "learning_rate": 1.988118072480847e-05, + "loss": 0.5109, + "step": 1366 + }, + { + "epoch": 0.23, + "learning_rate": 1.988089754198156e-05, + "loss": 0.5674, + "step": 1367 + }, + { + "epoch": 0.23, + "learning_rate": 1.9880614024121973e-05, + "loss": 0.5013, + "step": 1368 + }, + { + "epoch": 0.23, + "learning_rate": 1.9880330171239313e-05, + "loss": 0.5415, + "step": 1369 + }, + { + "epoch": 0.23, + "learning_rate": 1.9880045983343214e-05, + "loss": 0.5133, + "step": 1370 + }, + { + "epoch": 0.23, + "learning_rate": 1.9879761460443304e-05, + "loss": 0.5295, + "step": 1371 + }, + { + "epoch": 0.23, + "learning_rate": 1.987947660254923e-05, + "loss": 0.5385, + "step": 1372 + }, + { + "epoch": 0.23, + "learning_rate": 1.987919140967066e-05, + "loss": 0.5566, + "step": 1373 + }, + { + "epoch": 0.23, + "learning_rate": 1.9878905881817254e-05, + "loss": 0.5736, + "step": 1374 + }, + { + "epoch": 0.23, + "learning_rate": 1.9878620018998696e-05, + "loss": 0.5354, + "step": 1375 + }, + { + "epoch": 0.23, + "learning_rate": 1.9878333821224683e-05, + "loss": 0.5467, + "step": 1376 + }, + { + "epoch": 0.23, + "learning_rate": 1.9878047288504914e-05, + "loss": 0.5363, + "step": 1377 + }, + { + "epoch": 0.24, + "learning_rate": 1.9877760420849108e-05, + "loss": 0.4934, + "step": 1378 + }, + { + "epoch": 0.24, + "learning_rate": 1.987747321826699e-05, + "loss": 0.5013, + "step": 1379 + }, + { + "epoch": 0.24, + "learning_rate": 1.9877185680768297e-05, + "loss": 0.5585, + "step": 1380 + }, + { + "epoch": 0.24, + "learning_rate": 1.9876897808362784e-05, + "loss": 0.5284, + "step": 1381 + }, + { + "epoch": 0.24, + "learning_rate": 1.9876609601060207e-05, + "loss": 0.5436, + "step": 1382 + }, + { + "epoch": 0.24, + "learning_rate": 1.9876321058870337e-05, + "loss": 0.5504, + "step": 1383 + }, + { + "epoch": 0.24, + "learning_rate": 1.9876032181802962e-05, + "loss": 0.5351, + "step": 1384 + }, + { + "epoch": 0.24, + "learning_rate": 1.9875742969867878e-05, + "loss": 0.4956, + "step": 1385 + }, + { + "epoch": 0.24, + "learning_rate": 1.9875453423074883e-05, + "loss": 0.581, + "step": 1386 + }, + { + "epoch": 0.24, + "learning_rate": 1.9875163541433805e-05, + "loss": 0.5537, + "step": 1387 + }, + { + "epoch": 0.24, + "learning_rate": 1.9874873324954464e-05, + "loss": 0.5187, + "step": 1388 + }, + { + "epoch": 0.24, + "learning_rate": 1.9874582773646706e-05, + "loss": 0.5387, + "step": 1389 + }, + { + "epoch": 0.24, + "learning_rate": 1.9874291887520383e-05, + "loss": 0.5396, + "step": 1390 + }, + { + "epoch": 0.24, + "learning_rate": 1.9874000666585354e-05, + "loss": 0.5287, + "step": 1391 + }, + { + "epoch": 0.24, + "learning_rate": 1.9873709110851497e-05, + "loss": 0.5103, + "step": 1392 + }, + { + "epoch": 0.24, + "learning_rate": 1.9873417220328697e-05, + "loss": 0.5227, + "step": 1393 + }, + { + "epoch": 0.24, + "learning_rate": 1.9873124995026852e-05, + "loss": 0.5544, + "step": 1394 + }, + { + "epoch": 0.24, + "learning_rate": 1.987283243495587e-05, + "loss": 0.5791, + "step": 1395 + }, + { + "epoch": 0.24, + "learning_rate": 1.987253954012567e-05, + "loss": 0.5775, + "step": 1396 + }, + { + "epoch": 0.24, + "learning_rate": 1.987224631054618e-05, + "loss": 0.56, + "step": 1397 + }, + { + "epoch": 0.24, + "learning_rate": 1.9871952746227347e-05, + "loss": 0.5744, + "step": 1398 + }, + { + "epoch": 0.24, + "learning_rate": 1.9871658847179125e-05, + "loss": 0.5347, + "step": 1399 + }, + { + "epoch": 0.24, + "learning_rate": 1.9871364613411478e-05, + "loss": 0.5267, + "step": 1400 + }, + { + "epoch": 0.24, + "learning_rate": 1.9871070044934384e-05, + "loss": 0.5943, + "step": 1401 + }, + { + "epoch": 0.24, + "learning_rate": 1.987077514175783e-05, + "loss": 0.5396, + "step": 1402 + }, + { + "epoch": 0.24, + "learning_rate": 1.9870479903891814e-05, + "loss": 0.5296, + "step": 1403 + }, + { + "epoch": 0.24, + "learning_rate": 1.987018433134635e-05, + "loss": 0.5658, + "step": 1404 + }, + { + "epoch": 0.24, + "learning_rate": 1.9869888424131455e-05, + "loss": 0.5592, + "step": 1405 + }, + { + "epoch": 0.24, + "learning_rate": 1.9869592182257165e-05, + "loss": 0.5305, + "step": 1406 + }, + { + "epoch": 0.24, + "learning_rate": 1.9869295605733528e-05, + "loss": 0.4942, + "step": 1407 + }, + { + "epoch": 0.24, + "learning_rate": 1.986899869457059e-05, + "loss": 0.5098, + "step": 1408 + }, + { + "epoch": 0.24, + "learning_rate": 1.9868701448778433e-05, + "loss": 0.5838, + "step": 1409 + }, + { + "epoch": 0.24, + "learning_rate": 1.9868403868367125e-05, + "loss": 0.5324, + "step": 1410 + }, + { + "epoch": 0.24, + "learning_rate": 1.986810595334676e-05, + "loss": 0.5382, + "step": 1411 + }, + { + "epoch": 0.24, + "learning_rate": 1.986780770372744e-05, + "loss": 0.5447, + "step": 1412 + }, + { + "epoch": 0.24, + "learning_rate": 1.9867509119519273e-05, + "loss": 0.5438, + "step": 1413 + }, + { + "epoch": 0.24, + "learning_rate": 1.986721020073239e-05, + "loss": 0.553, + "step": 1414 + }, + { + "epoch": 0.24, + "learning_rate": 1.986691094737692e-05, + "loss": 0.5351, + "step": 1415 + }, + { + "epoch": 0.24, + "learning_rate": 1.9866611359463013e-05, + "loss": 0.5455, + "step": 1416 + }, + { + "epoch": 0.24, + "learning_rate": 1.9866311437000828e-05, + "loss": 0.5067, + "step": 1417 + }, + { + "epoch": 0.24, + "learning_rate": 1.986601118000053e-05, + "loss": 0.5296, + "step": 1418 + }, + { + "epoch": 0.24, + "learning_rate": 1.9865710588472307e-05, + "loss": 0.5563, + "step": 1419 + }, + { + "epoch": 0.24, + "learning_rate": 1.986540966242635e-05, + "loss": 0.5495, + "step": 1420 + }, + { + "epoch": 0.24, + "learning_rate": 1.9865108401872856e-05, + "loss": 0.5561, + "step": 1421 + }, + { + "epoch": 0.24, + "learning_rate": 1.9864806806822047e-05, + "loss": 0.5328, + "step": 1422 + }, + { + "epoch": 0.24, + "learning_rate": 1.9864504877284143e-05, + "loss": 0.5439, + "step": 1423 + }, + { + "epoch": 0.24, + "learning_rate": 1.9864202613269388e-05, + "loss": 0.5251, + "step": 1424 + }, + { + "epoch": 0.24, + "learning_rate": 1.9863900014788028e-05, + "loss": 0.5404, + "step": 1425 + }, + { + "epoch": 0.24, + "learning_rate": 1.9863597081850317e-05, + "loss": 0.5441, + "step": 1426 + }, + { + "epoch": 0.24, + "learning_rate": 1.9863293814466537e-05, + "loss": 0.5679, + "step": 1427 + }, + { + "epoch": 0.24, + "learning_rate": 1.9862990212646966e-05, + "loss": 0.5936, + "step": 1428 + }, + { + "epoch": 0.24, + "learning_rate": 1.9862686276401898e-05, + "loss": 0.5234, + "step": 1429 + }, + { + "epoch": 0.24, + "learning_rate": 1.986238200574164e-05, + "loss": 0.5143, + "step": 1430 + }, + { + "epoch": 0.24, + "learning_rate": 1.9862077400676506e-05, + "loss": 0.5285, + "step": 1431 + }, + { + "epoch": 0.24, + "learning_rate": 1.986177246121683e-05, + "loss": 0.5638, + "step": 1432 + }, + { + "epoch": 0.24, + "learning_rate": 1.9861467187372944e-05, + "loss": 0.5485, + "step": 1433 + }, + { + "epoch": 0.24, + "learning_rate": 1.9861161579155202e-05, + "loss": 0.5254, + "step": 1434 + }, + { + "epoch": 0.24, + "learning_rate": 1.986085563657397e-05, + "loss": 0.5398, + "step": 1435 + }, + { + "epoch": 0.24, + "learning_rate": 1.986054935963962e-05, + "loss": 0.6018, + "step": 1436 + }, + { + "epoch": 0.25, + "learning_rate": 1.9860242748362535e-05, + "loss": 0.572, + "step": 1437 + }, + { + "epoch": 0.25, + "learning_rate": 1.9859935802753113e-05, + "loss": 0.564, + "step": 1438 + }, + { + "epoch": 0.25, + "learning_rate": 1.9859628522821758e-05, + "loss": 0.5107, + "step": 1439 + }, + { + "epoch": 0.25, + "learning_rate": 1.9859320908578893e-05, + "loss": 0.5421, + "step": 1440 + }, + { + "epoch": 0.25, + "learning_rate": 1.985901296003495e-05, + "loss": 0.5387, + "step": 1441 + }, + { + "epoch": 0.25, + "learning_rate": 1.9858704677200366e-05, + "loss": 0.5283, + "step": 1442 + }, + { + "epoch": 0.25, + "learning_rate": 1.9858396060085595e-05, + "loss": 0.537, + "step": 1443 + }, + { + "epoch": 0.25, + "learning_rate": 1.98580871087011e-05, + "loss": 0.5307, + "step": 1444 + }, + { + "epoch": 0.25, + "learning_rate": 1.985777782305736e-05, + "loss": 0.5512, + "step": 1445 + }, + { + "epoch": 0.25, + "learning_rate": 1.9857468203164864e-05, + "loss": 0.5392, + "step": 1446 + }, + { + "epoch": 0.25, + "learning_rate": 1.9857158249034102e-05, + "loss": 0.5538, + "step": 1447 + }, + { + "epoch": 0.25, + "learning_rate": 1.985684796067559e-05, + "loss": 0.5222, + "step": 1448 + }, + { + "epoch": 0.25, + "learning_rate": 1.9856537338099852e-05, + "loss": 0.5133, + "step": 1449 + }, + { + "epoch": 0.25, + "learning_rate": 1.9856226381317413e-05, + "loss": 0.5175, + "step": 1450 + }, + { + "epoch": 0.25, + "learning_rate": 1.9855915090338817e-05, + "loss": 0.5323, + "step": 1451 + }, + { + "epoch": 0.25, + "learning_rate": 1.9855603465174623e-05, + "loss": 0.499, + "step": 1452 + }, + { + "epoch": 0.25, + "learning_rate": 1.98552915058354e-05, + "loss": 0.5468, + "step": 1453 + }, + { + "epoch": 0.25, + "learning_rate": 1.9854979212331717e-05, + "loss": 0.5415, + "step": 1454 + }, + { + "epoch": 0.25, + "learning_rate": 1.985466658467417e-05, + "loss": 0.535, + "step": 1455 + }, + { + "epoch": 0.25, + "learning_rate": 1.985435362287335e-05, + "loss": 0.5436, + "step": 1456 + }, + { + "epoch": 0.25, + "learning_rate": 1.9854040326939884e-05, + "loss": 0.5738, + "step": 1457 + }, + { + "epoch": 0.25, + "learning_rate": 1.9853726696884384e-05, + "loss": 0.549, + "step": 1458 + }, + { + "epoch": 0.25, + "learning_rate": 1.9853412732717484e-05, + "loss": 0.5367, + "step": 1459 + }, + { + "epoch": 0.25, + "learning_rate": 1.9853098434449835e-05, + "loss": 0.5525, + "step": 1460 + }, + { + "epoch": 0.25, + "learning_rate": 1.9852783802092094e-05, + "loss": 0.5549, + "step": 1461 + }, + { + "epoch": 0.25, + "learning_rate": 1.9852468835654922e-05, + "loss": 0.5763, + "step": 1462 + }, + { + "epoch": 0.25, + "learning_rate": 1.9852153535149003e-05, + "loss": 0.5434, + "step": 1463 + }, + { + "epoch": 0.25, + "learning_rate": 1.985183790058503e-05, + "loss": 0.5147, + "step": 1464 + }, + { + "epoch": 0.25, + "learning_rate": 1.98515219319737e-05, + "loss": 0.5407, + "step": 1465 + }, + { + "epoch": 0.25, + "learning_rate": 1.9851205629325735e-05, + "loss": 0.5332, + "step": 1466 + }, + { + "epoch": 0.25, + "learning_rate": 1.985088899265185e-05, + "loss": 0.5825, + "step": 1467 + }, + { + "epoch": 0.25, + "learning_rate": 1.9850572021962788e-05, + "loss": 0.4976, + "step": 1468 + }, + { + "epoch": 0.25, + "learning_rate": 1.9850254717269295e-05, + "loss": 0.52, + "step": 1469 + }, + { + "epoch": 0.25, + "learning_rate": 1.984993707858213e-05, + "loss": 0.5504, + "step": 1470 + }, + { + "epoch": 0.25, + "learning_rate": 1.984961910591206e-05, + "loss": 0.5667, + "step": 1471 + }, + { + "epoch": 0.25, + "learning_rate": 1.9849300799269867e-05, + "loss": 0.5553, + "step": 1472 + }, + { + "epoch": 0.25, + "learning_rate": 1.9848982158666354e-05, + "loss": 0.5644, + "step": 1473 + }, + { + "epoch": 0.25, + "learning_rate": 1.9848663184112312e-05, + "loss": 0.5211, + "step": 1474 + }, + { + "epoch": 0.25, + "learning_rate": 1.9848343875618563e-05, + "loss": 0.5532, + "step": 1475 + }, + { + "epoch": 0.25, + "learning_rate": 1.9848024233195932e-05, + "loss": 0.521, + "step": 1476 + }, + { + "epoch": 0.25, + "learning_rate": 1.9847704256855258e-05, + "loss": 0.573, + "step": 1477 + }, + { + "epoch": 0.25, + "learning_rate": 1.9847383946607392e-05, + "loss": 0.55, + "step": 1478 + }, + { + "epoch": 0.25, + "learning_rate": 1.984706330246319e-05, + "loss": 0.5465, + "step": 1479 + }, + { + "epoch": 0.25, + "learning_rate": 1.9846742324433527e-05, + "loss": 0.5658, + "step": 1480 + }, + { + "epoch": 0.25, + "learning_rate": 1.984642101252929e-05, + "loss": 0.6051, + "step": 1481 + }, + { + "epoch": 0.25, + "learning_rate": 1.9846099366761367e-05, + "loss": 0.5418, + "step": 1482 + }, + { + "epoch": 0.25, + "learning_rate": 1.9845777387140667e-05, + "loss": 0.5869, + "step": 1483 + }, + { + "epoch": 0.25, + "learning_rate": 1.984545507367811e-05, + "loss": 0.494, + "step": 1484 + }, + { + "epoch": 0.25, + "learning_rate": 1.984513242638462e-05, + "loss": 0.5223, + "step": 1485 + }, + { + "epoch": 0.25, + "learning_rate": 1.9844809445271144e-05, + "loss": 0.5789, + "step": 1486 + }, + { + "epoch": 0.25, + "learning_rate": 1.9844486130348624e-05, + "loss": 0.5282, + "step": 1487 + }, + { + "epoch": 0.25, + "learning_rate": 1.984416248162803e-05, + "loss": 0.5185, + "step": 1488 + }, + { + "epoch": 0.25, + "learning_rate": 1.9843838499120333e-05, + "loss": 0.4921, + "step": 1489 + }, + { + "epoch": 0.25, + "learning_rate": 1.984351418283652e-05, + "loss": 0.5262, + "step": 1490 + }, + { + "epoch": 0.25, + "learning_rate": 1.9843189532787586e-05, + "loss": 0.5161, + "step": 1491 + }, + { + "epoch": 0.25, + "learning_rate": 1.984286454898454e-05, + "loss": 0.5078, + "step": 1492 + }, + { + "epoch": 0.25, + "learning_rate": 1.98425392314384e-05, + "loss": 0.5337, + "step": 1493 + }, + { + "epoch": 0.25, + "learning_rate": 1.9842213580160194e-05, + "loss": 0.5037, + "step": 1494 + }, + { + "epoch": 0.25, + "learning_rate": 1.9841887595160973e-05, + "loss": 0.5133, + "step": 1495 + }, + { + "epoch": 0.26, + "learning_rate": 1.984156127645178e-05, + "loss": 0.5625, + "step": 1496 + }, + { + "epoch": 0.26, + "learning_rate": 1.9841234624043686e-05, + "loss": 0.5444, + "step": 1497 + }, + { + "epoch": 0.26, + "learning_rate": 1.9840907637947768e-05, + "loss": 0.5272, + "step": 1498 + }, + { + "epoch": 0.26, + "learning_rate": 1.9840580318175102e-05, + "loss": 0.5195, + "step": 1499 + }, + { + "epoch": 0.26, + "learning_rate": 1.98402526647368e-05, + "loss": 0.5343, + "step": 1500 + }, + { + "epoch": 0.26, + "learning_rate": 1.9839924677643964e-05, + "loss": 0.5562, + "step": 1501 + }, + { + "epoch": 0.26, + "learning_rate": 1.9839596356907718e-05, + "loss": 0.4946, + "step": 1502 + }, + { + "epoch": 0.26, + "learning_rate": 1.9839267702539194e-05, + "loss": 0.5496, + "step": 1503 + }, + { + "epoch": 0.26, + "learning_rate": 1.9838938714549535e-05, + "loss": 0.5388, + "step": 1504 + }, + { + "epoch": 0.26, + "learning_rate": 1.9838609392949895e-05, + "loss": 0.5471, + "step": 1505 + }, + { + "epoch": 0.26, + "learning_rate": 1.9838279737751445e-05, + "loss": 0.538, + "step": 1506 + }, + { + "epoch": 0.26, + "learning_rate": 1.9837949748965356e-05, + "loss": 0.5518, + "step": 1507 + }, + { + "epoch": 0.26, + "learning_rate": 1.9837619426602824e-05, + "loss": 0.5275, + "step": 1508 + }, + { + "epoch": 0.26, + "learning_rate": 1.9837288770675042e-05, + "loss": 0.5051, + "step": 1509 + }, + { + "epoch": 0.26, + "learning_rate": 1.9836957781193228e-05, + "loss": 0.551, + "step": 1510 + }, + { + "epoch": 0.26, + "learning_rate": 1.98366264581686e-05, + "loss": 0.4731, + "step": 1511 + }, + { + "epoch": 0.26, + "learning_rate": 1.9836294801612393e-05, + "loss": 0.5452, + "step": 1512 + }, + { + "epoch": 0.26, + "learning_rate": 1.9835962811535857e-05, + "loss": 0.5333, + "step": 1513 + }, + { + "epoch": 0.26, + "learning_rate": 1.9835630487950245e-05, + "loss": 0.5439, + "step": 1514 + }, + { + "epoch": 0.26, + "learning_rate": 1.9835297830866827e-05, + "loss": 0.5508, + "step": 1515 + }, + { + "epoch": 0.26, + "learning_rate": 1.9834964840296878e-05, + "loss": 0.5327, + "step": 1516 + }, + { + "epoch": 0.26, + "learning_rate": 1.9834631516251693e-05, + "loss": 0.551, + "step": 1517 + }, + { + "epoch": 0.26, + "learning_rate": 1.9834297858742574e-05, + "loss": 0.6113, + "step": 1518 + }, + { + "epoch": 0.26, + "learning_rate": 1.9833963867780835e-05, + "loss": 0.5455, + "step": 1519 + }, + { + "epoch": 0.26, + "learning_rate": 1.9833629543377797e-05, + "loss": 0.5153, + "step": 1520 + }, + { + "epoch": 0.26, + "learning_rate": 1.9833294885544797e-05, + "loss": 0.5272, + "step": 1521 + }, + { + "epoch": 0.26, + "learning_rate": 1.9832959894293187e-05, + "loss": 0.541, + "step": 1522 + }, + { + "epoch": 0.26, + "learning_rate": 1.983262456963432e-05, + "loss": 0.5733, + "step": 1523 + }, + { + "epoch": 0.26, + "learning_rate": 1.9832288911579565e-05, + "loss": 0.5546, + "step": 1524 + }, + { + "epoch": 0.26, + "learning_rate": 1.9831952920140308e-05, + "loss": 0.4968, + "step": 1525 + }, + { + "epoch": 0.26, + "learning_rate": 1.983161659532794e-05, + "loss": 0.538, + "step": 1526 + }, + { + "epoch": 0.26, + "learning_rate": 1.9831279937153865e-05, + "loss": 0.5559, + "step": 1527 + }, + { + "epoch": 0.26, + "learning_rate": 1.9830942945629495e-05, + "loss": 0.5592, + "step": 1528 + }, + { + "epoch": 0.26, + "learning_rate": 1.9830605620766262e-05, + "loss": 0.5492, + "step": 1529 + }, + { + "epoch": 0.26, + "learning_rate": 1.98302679625756e-05, + "loss": 0.5108, + "step": 1530 + }, + { + "epoch": 0.26, + "learning_rate": 1.9829929971068956e-05, + "loss": 0.54, + "step": 1531 + }, + { + "epoch": 0.26, + "learning_rate": 1.9829591646257796e-05, + "loss": 0.565, + "step": 1532 + }, + { + "epoch": 0.26, + "learning_rate": 1.9829252988153585e-05, + "loss": 0.5351, + "step": 1533 + }, + { + "epoch": 0.26, + "learning_rate": 1.982891399676781e-05, + "loss": 0.549, + "step": 1534 + }, + { + "epoch": 0.26, + "learning_rate": 1.9828574672111967e-05, + "loss": 0.5549, + "step": 1535 + }, + { + "epoch": 0.26, + "learning_rate": 1.982823501419756e-05, + "loss": 0.5244, + "step": 1536 + }, + { + "epoch": 0.26, + "learning_rate": 1.98278950230361e-05, + "loss": 0.5, + "step": 1537 + }, + { + "epoch": 0.26, + "learning_rate": 1.9827554698639125e-05, + "loss": 0.55, + "step": 1538 + }, + { + "epoch": 0.26, + "learning_rate": 1.9827214041018167e-05, + "loss": 0.5192, + "step": 1539 + }, + { + "epoch": 0.26, + "learning_rate": 1.9826873050184782e-05, + "loss": 0.5033, + "step": 1540 + }, + { + "epoch": 0.26, + "learning_rate": 1.9826531726150523e-05, + "loss": 0.5041, + "step": 1541 + }, + { + "epoch": 0.26, + "learning_rate": 1.9826190068926973e-05, + "loss": 0.5405, + "step": 1542 + }, + { + "epoch": 0.26, + "learning_rate": 1.9825848078525714e-05, + "loss": 0.5599, + "step": 1543 + }, + { + "epoch": 0.26, + "learning_rate": 1.982550575495834e-05, + "loss": 0.5747, + "step": 1544 + }, + { + "epoch": 0.26, + "learning_rate": 1.9825163098236462e-05, + "loss": 0.5205, + "step": 1545 + }, + { + "epoch": 0.26, + "learning_rate": 1.982482010837169e-05, + "loss": 0.5185, + "step": 1546 + }, + { + "epoch": 0.26, + "learning_rate": 1.9824476785375668e-05, + "loss": 0.564, + "step": 1547 + }, + { + "epoch": 0.26, + "learning_rate": 1.9824133129260022e-05, + "loss": 0.5385, + "step": 1548 + }, + { + "epoch": 0.26, + "learning_rate": 1.982378914003641e-05, + "loss": 0.5301, + "step": 1549 + }, + { + "epoch": 0.26, + "learning_rate": 1.98234448177165e-05, + "loss": 0.5519, + "step": 1550 + }, + { + "epoch": 0.26, + "learning_rate": 1.9823100162311967e-05, + "loss": 0.5588, + "step": 1551 + }, + { + "epoch": 0.26, + "learning_rate": 1.9822755173834488e-05, + "loss": 0.5739, + "step": 1552 + }, + { + "epoch": 0.26, + "learning_rate": 1.9822409852295766e-05, + "loss": 0.559, + "step": 1553 + }, + { + "epoch": 0.27, + "learning_rate": 1.9822064197707515e-05, + "loss": 0.4904, + "step": 1554 + }, + { + "epoch": 0.27, + "learning_rate": 1.982171821008145e-05, + "loss": 0.5252, + "step": 1555 + }, + { + "epoch": 0.27, + "learning_rate": 1.98213718894293e-05, + "loss": 0.4825, + "step": 1556 + }, + { + "epoch": 0.27, + "learning_rate": 1.9821025235762816e-05, + "loss": 0.5776, + "step": 1557 + }, + { + "epoch": 0.27, + "learning_rate": 1.982067824909374e-05, + "loss": 0.5331, + "step": 1558 + }, + { + "epoch": 0.27, + "learning_rate": 1.9820330929433847e-05, + "loss": 0.5299, + "step": 1559 + }, + { + "epoch": 0.27, + "learning_rate": 1.981998327679491e-05, + "loss": 0.5686, + "step": 1560 + }, + { + "epoch": 0.27, + "learning_rate": 1.9819635291188716e-05, + "loss": 0.5974, + "step": 1561 + }, + { + "epoch": 0.27, + "learning_rate": 1.9819286972627066e-05, + "loss": 0.528, + "step": 1562 + }, + { + "epoch": 0.27, + "learning_rate": 1.9818938321121776e-05, + "loss": 0.5527, + "step": 1563 + }, + { + "epoch": 0.27, + "learning_rate": 1.9818589336684656e-05, + "loss": 0.5503, + "step": 1564 + }, + { + "epoch": 0.27, + "learning_rate": 1.9818240019327546e-05, + "loss": 0.5346, + "step": 1565 + }, + { + "epoch": 0.27, + "learning_rate": 1.981789036906229e-05, + "loss": 0.5234, + "step": 1566 + }, + { + "epoch": 0.27, + "learning_rate": 1.9817540385900744e-05, + "loss": 0.5606, + "step": 1567 + }, + { + "epoch": 0.27, + "learning_rate": 1.9817190069854773e-05, + "loss": 0.5347, + "step": 1568 + }, + { + "epoch": 0.27, + "learning_rate": 1.9816839420936255e-05, + "loss": 0.5391, + "step": 1569 + }, + { + "epoch": 0.27, + "learning_rate": 1.9816488439157082e-05, + "loss": 0.5634, + "step": 1570 + }, + { + "epoch": 0.27, + "learning_rate": 1.9816137124529154e-05, + "loss": 0.5195, + "step": 1571 + }, + { + "epoch": 0.27, + "learning_rate": 1.981578547706438e-05, + "loss": 0.4939, + "step": 1572 + }, + { + "epoch": 0.27, + "learning_rate": 1.9815433496774687e-05, + "loss": 0.5494, + "step": 1573 + }, + { + "epoch": 0.27, + "learning_rate": 1.981508118367201e-05, + "loss": 0.5487, + "step": 1574 + }, + { + "epoch": 0.27, + "learning_rate": 1.981472853776829e-05, + "loss": 0.5692, + "step": 1575 + }, + { + "epoch": 0.27, + "learning_rate": 1.981437555907549e-05, + "loss": 0.5512, + "step": 1576 + }, + { + "epoch": 0.27, + "learning_rate": 1.9814022247605576e-05, + "loss": 0.5283, + "step": 1577 + }, + { + "epoch": 0.27, + "learning_rate": 1.981366860337053e-05, + "loss": 0.5305, + "step": 1578 + }, + { + "epoch": 0.27, + "learning_rate": 1.9813314626382335e-05, + "loss": 0.5445, + "step": 1579 + }, + { + "epoch": 0.27, + "learning_rate": 1.9812960316653003e-05, + "loss": 0.5771, + "step": 1580 + }, + { + "epoch": 0.27, + "learning_rate": 1.9812605674194544e-05, + "loss": 0.5491, + "step": 1581 + }, + { + "epoch": 0.27, + "learning_rate": 1.981225069901898e-05, + "loss": 0.5296, + "step": 1582 + }, + { + "epoch": 0.27, + "learning_rate": 1.981189539113835e-05, + "loss": 0.5652, + "step": 1583 + }, + { + "epoch": 0.27, + "learning_rate": 1.9811539750564702e-05, + "loss": 0.5626, + "step": 1584 + }, + { + "epoch": 0.27, + "learning_rate": 1.9811183777310095e-05, + "loss": 0.5139, + "step": 1585 + }, + { + "epoch": 0.27, + "learning_rate": 1.9810827471386602e-05, + "loss": 0.545, + "step": 1586 + }, + { + "epoch": 0.27, + "learning_rate": 1.9810470832806294e-05, + "loss": 0.5227, + "step": 1587 + }, + { + "epoch": 0.27, + "learning_rate": 1.9810113861581274e-05, + "loss": 0.5879, + "step": 1588 + }, + { + "epoch": 0.27, + "learning_rate": 1.980975655772364e-05, + "loss": 0.5181, + "step": 1589 + }, + { + "epoch": 0.27, + "learning_rate": 1.980939892124551e-05, + "loss": 0.507, + "step": 1590 + }, + { + "epoch": 0.27, + "learning_rate": 1.9809040952159008e-05, + "loss": 0.4878, + "step": 1591 + }, + { + "epoch": 0.27, + "learning_rate": 1.9808682650476275e-05, + "loss": 0.5452, + "step": 1592 + }, + { + "epoch": 0.27, + "learning_rate": 1.9808324016209456e-05, + "loss": 0.488, + "step": 1593 + }, + { + "epoch": 0.27, + "learning_rate": 1.9807965049370716e-05, + "loss": 0.547, + "step": 1594 + }, + { + "epoch": 0.27, + "learning_rate": 1.980760574997222e-05, + "loss": 0.5289, + "step": 1595 + }, + { + "epoch": 0.27, + "learning_rate": 1.980724611802616e-05, + "loss": 0.5062, + "step": 1596 + }, + { + "epoch": 0.27, + "learning_rate": 1.9806886153544722e-05, + "loss": 0.5428, + "step": 1597 + }, + { + "epoch": 0.27, + "learning_rate": 1.9806525856540112e-05, + "loss": 0.515, + "step": 1598 + }, + { + "epoch": 0.27, + "learning_rate": 1.9806165227024553e-05, + "loss": 0.5581, + "step": 1599 + }, + { + "epoch": 0.27, + "learning_rate": 1.9805804265010263e-05, + "loss": 0.5629, + "step": 1600 + }, + { + "epoch": 0.27, + "learning_rate": 1.980544297050949e-05, + "loss": 0.544, + "step": 1601 + }, + { + "epoch": 0.27, + "learning_rate": 1.9805081343534486e-05, + "loss": 0.5545, + "step": 1602 + }, + { + "epoch": 0.27, + "learning_rate": 1.98047193840975e-05, + "loss": 0.512, + "step": 1603 + }, + { + "epoch": 0.27, + "learning_rate": 1.9804357092210817e-05, + "loss": 0.5292, + "step": 1604 + }, + { + "epoch": 0.27, + "learning_rate": 1.9803994467886718e-05, + "loss": 0.5338, + "step": 1605 + }, + { + "epoch": 0.27, + "learning_rate": 1.9803631511137495e-05, + "loss": 0.5219, + "step": 1606 + }, + { + "epoch": 0.27, + "learning_rate": 1.980326822197546e-05, + "loss": 0.5662, + "step": 1607 + }, + { + "epoch": 0.27, + "learning_rate": 1.980290460041293e-05, + "loss": 0.5389, + "step": 1608 + }, + { + "epoch": 0.27, + "learning_rate": 1.980254064646223e-05, + "loss": 0.5379, + "step": 1609 + }, + { + "epoch": 0.27, + "learning_rate": 1.9802176360135705e-05, + "loss": 0.5262, + "step": 1610 + }, + { + "epoch": 0.27, + "learning_rate": 1.9801811741445704e-05, + "loss": 0.5169, + "step": 1611 + }, + { + "epoch": 0.27, + "learning_rate": 1.9801446790404592e-05, + "loss": 0.5164, + "step": 1612 + }, + { + "epoch": 0.28, + "learning_rate": 1.9801081507024748e-05, + "loss": 0.5294, + "step": 1613 + }, + { + "epoch": 0.28, + "learning_rate": 1.9800715891318546e-05, + "loss": 0.5385, + "step": 1614 + }, + { + "epoch": 0.28, + "learning_rate": 1.9800349943298392e-05, + "loss": 0.5309, + "step": 1615 + }, + { + "epoch": 0.28, + "learning_rate": 1.9799983662976693e-05, + "loss": 0.5815, + "step": 1616 + }, + { + "epoch": 0.28, + "learning_rate": 1.979961705036587e-05, + "loss": 0.5487, + "step": 1617 + }, + { + "epoch": 0.28, + "learning_rate": 1.979925010547835e-05, + "loss": 0.5492, + "step": 1618 + }, + { + "epoch": 0.28, + "learning_rate": 1.9798882828326572e-05, + "loss": 0.5731, + "step": 1619 + }, + { + "epoch": 0.28, + "learning_rate": 1.9798515218923e-05, + "loss": 0.5107, + "step": 1620 + }, + { + "epoch": 0.28, + "learning_rate": 1.979814727728009e-05, + "loss": 0.526, + "step": 1621 + }, + { + "epoch": 0.28, + "learning_rate": 1.979777900341032e-05, + "loss": 0.5537, + "step": 1622 + }, + { + "epoch": 0.28, + "learning_rate": 1.9797410397326176e-05, + "loss": 0.5596, + "step": 1623 + }, + { + "epoch": 0.28, + "learning_rate": 1.979704145904016e-05, + "loss": 0.4852, + "step": 1624 + }, + { + "epoch": 0.28, + "learning_rate": 1.9796672188564777e-05, + "loss": 0.5255, + "step": 1625 + }, + { + "epoch": 0.28, + "learning_rate": 1.9796302585912552e-05, + "loss": 0.5225, + "step": 1626 + }, + { + "epoch": 0.28, + "learning_rate": 1.9795932651096014e-05, + "loss": 0.4942, + "step": 1627 + }, + { + "epoch": 0.28, + "learning_rate": 1.979556238412771e-05, + "loss": 0.521, + "step": 1628 + }, + { + "epoch": 0.28, + "learning_rate": 1.979519178502019e-05, + "loss": 0.5357, + "step": 1629 + }, + { + "epoch": 0.28, + "learning_rate": 1.9794820853786023e-05, + "loss": 0.5418, + "step": 1630 + }, + { + "epoch": 0.28, + "learning_rate": 1.9794449590437786e-05, + "loss": 0.5462, + "step": 1631 + }, + { + "epoch": 0.28, + "learning_rate": 1.9794077994988066e-05, + "loss": 0.4973, + "step": 1632 + }, + { + "epoch": 0.28, + "learning_rate": 1.9793706067449464e-05, + "loss": 0.5266, + "step": 1633 + }, + { + "epoch": 0.28, + "learning_rate": 1.9793333807834593e-05, + "loss": 0.5232, + "step": 1634 + }, + { + "epoch": 0.28, + "learning_rate": 1.979296121615607e-05, + "loss": 0.5521, + "step": 1635 + }, + { + "epoch": 0.28, + "learning_rate": 1.9792588292426532e-05, + "loss": 0.4994, + "step": 1636 + }, + { + "epoch": 0.28, + "learning_rate": 1.9792215036658625e-05, + "loss": 0.5627, + "step": 1637 + }, + { + "epoch": 0.28, + "learning_rate": 1.9791841448865003e-05, + "loss": 0.5481, + "step": 1638 + }, + { + "epoch": 0.28, + "learning_rate": 1.979146752905833e-05, + "loss": 0.5563, + "step": 1639 + }, + { + "epoch": 0.28, + "learning_rate": 1.9791093277251294e-05, + "loss": 0.5032, + "step": 1640 + }, + { + "epoch": 0.28, + "learning_rate": 1.9790718693456573e-05, + "loss": 0.4945, + "step": 1641 + }, + { + "epoch": 0.28, + "learning_rate": 1.979034377768688e-05, + "loss": 0.5676, + "step": 1642 + }, + { + "epoch": 0.28, + "learning_rate": 1.9789968529954916e-05, + "loss": 0.5547, + "step": 1643 + }, + { + "epoch": 0.28, + "learning_rate": 1.978959295027341e-05, + "loss": 0.5376, + "step": 1644 + }, + { + "epoch": 0.28, + "learning_rate": 1.97892170386551e-05, + "loss": 0.492, + "step": 1645 + }, + { + "epoch": 0.28, + "learning_rate": 1.9788840795112727e-05, + "loss": 0.551, + "step": 1646 + }, + { + "epoch": 0.28, + "learning_rate": 1.9788464219659055e-05, + "loss": 0.5498, + "step": 1647 + }, + { + "epoch": 0.28, + "learning_rate": 1.978808731230684e-05, + "loss": 0.5554, + "step": 1648 + }, + { + "epoch": 0.28, + "learning_rate": 1.9787710073068875e-05, + "loss": 0.5484, + "step": 1649 + }, + { + "epoch": 0.28, + "learning_rate": 1.9787332501957942e-05, + "loss": 0.5169, + "step": 1650 + }, + { + "epoch": 0.28, + "learning_rate": 1.9786954598986846e-05, + "loss": 0.5478, + "step": 1651 + }, + { + "epoch": 0.28, + "learning_rate": 1.9786576364168404e-05, + "loss": 0.5248, + "step": 1652 + }, + { + "epoch": 0.28, + "learning_rate": 1.978619779751544e-05, + "loss": 0.5889, + "step": 1653 + }, + { + "epoch": 0.28, + "learning_rate": 1.9785818899040786e-05, + "loss": 0.5482, + "step": 1654 + }, + { + "epoch": 0.28, + "learning_rate": 1.978543966875729e-05, + "loss": 0.5256, + "step": 1655 + }, + { + "epoch": 0.28, + "learning_rate": 1.9785060106677818e-05, + "loss": 0.5852, + "step": 1656 + }, + { + "epoch": 0.28, + "learning_rate": 1.978468021281523e-05, + "loss": 0.5517, + "step": 1657 + }, + { + "epoch": 0.28, + "learning_rate": 1.9784299987182412e-05, + "loss": 0.5033, + "step": 1658 + }, + { + "epoch": 0.28, + "learning_rate": 1.9783919429792257e-05, + "loss": 0.5569, + "step": 1659 + }, + { + "epoch": 0.28, + "learning_rate": 1.9783538540657666e-05, + "loss": 0.4728, + "step": 1660 + }, + { + "epoch": 0.28, + "learning_rate": 1.9783157319791556e-05, + "loss": 0.5721, + "step": 1661 + }, + { + "epoch": 0.28, + "learning_rate": 1.9782775767206853e-05, + "loss": 0.5008, + "step": 1662 + }, + { + "epoch": 0.28, + "learning_rate": 1.978239388291649e-05, + "loss": 0.5981, + "step": 1663 + }, + { + "epoch": 0.28, + "learning_rate": 1.9782011666933425e-05, + "loss": 0.5155, + "step": 1664 + }, + { + "epoch": 0.28, + "learning_rate": 1.9781629119270607e-05, + "loss": 0.5605, + "step": 1665 + }, + { + "epoch": 0.28, + "learning_rate": 1.9781246239941017e-05, + "loss": 0.5242, + "step": 1666 + }, + { + "epoch": 0.28, + "learning_rate": 1.9780863028957628e-05, + "loss": 0.5152, + "step": 1667 + }, + { + "epoch": 0.28, + "learning_rate": 1.978047948633344e-05, + "loss": 0.5327, + "step": 1668 + }, + { + "epoch": 0.28, + "learning_rate": 1.9780095612081454e-05, + "loss": 0.552, + "step": 1669 + }, + { + "epoch": 0.28, + "learning_rate": 1.9779711406214694e-05, + "loss": 0.526, + "step": 1670 + }, + { + "epoch": 0.28, + "learning_rate": 1.9779326868746173e-05, + "loss": 0.5078, + "step": 1671 + }, + { + "epoch": 0.29, + "learning_rate": 1.9778941999688945e-05, + "loss": 0.566, + "step": 1672 + }, + { + "epoch": 0.29, + "learning_rate": 1.977855679905605e-05, + "loss": 0.5031, + "step": 1673 + }, + { + "epoch": 0.29, + "learning_rate": 1.977817126686055e-05, + "loss": 0.5469, + "step": 1674 + }, + { + "epoch": 0.29, + "learning_rate": 1.977778540311552e-05, + "loss": 0.5588, + "step": 1675 + }, + { + "epoch": 0.29, + "learning_rate": 1.977739920783404e-05, + "loss": 0.4809, + "step": 1676 + }, + { + "epoch": 0.29, + "learning_rate": 1.977701268102921e-05, + "loss": 0.5456, + "step": 1677 + }, + { + "epoch": 0.29, + "learning_rate": 1.9776625822714135e-05, + "loss": 0.5417, + "step": 1678 + }, + { + "epoch": 0.29, + "learning_rate": 1.9776238632901927e-05, + "loss": 0.5588, + "step": 1679 + }, + { + "epoch": 0.29, + "learning_rate": 1.977585111160572e-05, + "loss": 0.5093, + "step": 1680 + }, + { + "epoch": 0.29, + "learning_rate": 1.9775463258838652e-05, + "loss": 0.4968, + "step": 1681 + }, + { + "epoch": 0.29, + "learning_rate": 1.9775075074613873e-05, + "loss": 0.5798, + "step": 1682 + }, + { + "epoch": 0.29, + "learning_rate": 1.9774686558944544e-05, + "loss": 0.5097, + "step": 1683 + }, + { + "epoch": 0.29, + "learning_rate": 1.9774297711843843e-05, + "loss": 0.5471, + "step": 1684 + }, + { + "epoch": 0.29, + "learning_rate": 1.9773908533324953e-05, + "loss": 0.5036, + "step": 1685 + }, + { + "epoch": 0.29, + "learning_rate": 1.9773519023401066e-05, + "loss": 0.5159, + "step": 1686 + }, + { + "epoch": 0.29, + "learning_rate": 1.9773129182085393e-05, + "loss": 0.5051, + "step": 1687 + }, + { + "epoch": 0.29, + "learning_rate": 1.9772739009391154e-05, + "loss": 0.5437, + "step": 1688 + }, + { + "epoch": 0.29, + "learning_rate": 1.977234850533157e-05, + "loss": 0.5027, + "step": 1689 + }, + { + "epoch": 0.29, + "learning_rate": 1.977195766991989e-05, + "loss": 0.5332, + "step": 1690 + }, + { + "epoch": 0.29, + "learning_rate": 1.9771566503169367e-05, + "loss": 0.5895, + "step": 1691 + }, + { + "epoch": 0.29, + "learning_rate": 1.9771175005093263e-05, + "loss": 0.5411, + "step": 1692 + }, + { + "epoch": 0.29, + "learning_rate": 1.9770783175704846e-05, + "loss": 0.5071, + "step": 1693 + }, + { + "epoch": 0.29, + "learning_rate": 1.9770391015017408e-05, + "loss": 0.4862, + "step": 1694 + }, + { + "epoch": 0.29, + "learning_rate": 1.9769998523044248e-05, + "loss": 0.5357, + "step": 1695 + }, + { + "epoch": 0.29, + "learning_rate": 1.9769605699798667e-05, + "loss": 0.5492, + "step": 1696 + }, + { + "epoch": 0.29, + "learning_rate": 1.976921254529399e-05, + "loss": 0.5575, + "step": 1697 + }, + { + "epoch": 0.29, + "learning_rate": 1.9768819059543548e-05, + "loss": 0.5515, + "step": 1698 + }, + { + "epoch": 0.29, + "learning_rate": 1.9768425242560682e-05, + "loss": 0.5416, + "step": 1699 + }, + { + "epoch": 0.29, + "learning_rate": 1.976803109435874e-05, + "loss": 0.5469, + "step": 1700 + }, + { + "epoch": 0.29, + "learning_rate": 1.9767636614951092e-05, + "loss": 0.5544, + "step": 1701 + }, + { + "epoch": 0.29, + "learning_rate": 1.9767241804351114e-05, + "loss": 0.52, + "step": 1702 + }, + { + "epoch": 0.29, + "learning_rate": 1.976684666257219e-05, + "loss": 0.5523, + "step": 1703 + }, + { + "epoch": 0.29, + "learning_rate": 1.9766451189627723e-05, + "loss": 0.5026, + "step": 1704 + }, + { + "epoch": 0.29, + "learning_rate": 1.976605538553112e-05, + "loss": 0.4651, + "step": 1705 + }, + { + "epoch": 0.29, + "learning_rate": 1.9765659250295794e-05, + "loss": 0.5578, + "step": 1706 + }, + { + "epoch": 0.29, + "learning_rate": 1.9765262783935188e-05, + "loss": 0.5297, + "step": 1707 + }, + { + "epoch": 0.29, + "learning_rate": 1.9764865986462734e-05, + "loss": 0.559, + "step": 1708 + }, + { + "epoch": 0.29, + "learning_rate": 1.97644688578919e-05, + "loss": 0.5727, + "step": 1709 + }, + { + "epoch": 0.29, + "learning_rate": 1.9764071398236142e-05, + "loss": 0.5521, + "step": 1710 + }, + { + "epoch": 0.29, + "learning_rate": 1.9763673607508938e-05, + "loss": 0.5245, + "step": 1711 + }, + { + "epoch": 0.29, + "learning_rate": 1.9763275485723775e-05, + "loss": 0.5647, + "step": 1712 + }, + { + "epoch": 0.29, + "learning_rate": 1.9762877032894156e-05, + "loss": 0.5071, + "step": 1713 + }, + { + "epoch": 0.29, + "learning_rate": 1.976247824903359e-05, + "loss": 0.568, + "step": 1714 + }, + { + "epoch": 0.29, + "learning_rate": 1.9762079134155597e-05, + "loss": 0.5173, + "step": 1715 + }, + { + "epoch": 0.29, + "learning_rate": 1.9761679688273708e-05, + "loss": 0.5449, + "step": 1716 + }, + { + "epoch": 0.29, + "learning_rate": 1.9761279911401474e-05, + "loss": 0.5385, + "step": 1717 + }, + { + "epoch": 0.29, + "learning_rate": 1.9760879803552444e-05, + "loss": 0.5335, + "step": 1718 + }, + { + "epoch": 0.29, + "learning_rate": 1.9760479364740187e-05, + "loss": 0.5196, + "step": 1719 + }, + { + "epoch": 0.29, + "learning_rate": 1.9760078594978277e-05, + "loss": 0.5262, + "step": 1720 + }, + { + "epoch": 0.29, + "learning_rate": 1.975967749428031e-05, + "loss": 0.5301, + "step": 1721 + }, + { + "epoch": 0.29, + "learning_rate": 1.9759276062659882e-05, + "loss": 0.5648, + "step": 1722 + }, + { + "epoch": 0.29, + "learning_rate": 1.9758874300130604e-05, + "loss": 0.5333, + "step": 1723 + }, + { + "epoch": 0.29, + "learning_rate": 1.97584722067061e-05, + "loss": 0.5462, + "step": 1724 + }, + { + "epoch": 0.29, + "learning_rate": 1.97580697824e-05, + "loss": 0.5399, + "step": 1725 + }, + { + "epoch": 0.29, + "learning_rate": 1.9757667027225955e-05, + "loss": 0.5154, + "step": 1726 + }, + { + "epoch": 0.29, + "learning_rate": 1.9757263941197617e-05, + "loss": 0.548, + "step": 1727 + }, + { + "epoch": 0.29, + "learning_rate": 1.975686052432866e-05, + "loss": 0.5468, + "step": 1728 + }, + { + "epoch": 0.29, + "learning_rate": 1.9756456776632752e-05, + "loss": 0.5389, + "step": 1729 + }, + { + "epoch": 0.3, + "learning_rate": 1.975605269812359e-05, + "loss": 0.4901, + "step": 1730 + }, + { + "epoch": 0.3, + "learning_rate": 1.9755648288814876e-05, + "loss": 0.5456, + "step": 1731 + }, + { + "epoch": 0.3, + "learning_rate": 1.9755243548720314e-05, + "loss": 0.501, + "step": 1732 + }, + { + "epoch": 0.3, + "learning_rate": 1.975483847785364e-05, + "loss": 0.532, + "step": 1733 + }, + { + "epoch": 0.3, + "learning_rate": 1.975443307622858e-05, + "loss": 0.5741, + "step": 1734 + }, + { + "epoch": 0.3, + "learning_rate": 1.975402734385888e-05, + "loss": 0.5228, + "step": 1735 + }, + { + "epoch": 0.3, + "learning_rate": 1.9753621280758304e-05, + "loss": 0.5244, + "step": 1736 + }, + { + "epoch": 0.3, + "learning_rate": 1.9753214886940614e-05, + "loss": 0.5731, + "step": 1737 + }, + { + "epoch": 0.3, + "learning_rate": 1.975280816241959e-05, + "loss": 0.5747, + "step": 1738 + }, + { + "epoch": 0.3, + "learning_rate": 1.9752401107209028e-05, + "loss": 0.5153, + "step": 1739 + }, + { + "epoch": 0.3, + "learning_rate": 1.9751993721322725e-05, + "loss": 0.5533, + "step": 1740 + }, + { + "epoch": 0.3, + "learning_rate": 1.9751586004774495e-05, + "loss": 0.5604, + "step": 1741 + }, + { + "epoch": 0.3, + "learning_rate": 1.9751177957578165e-05, + "loss": 0.5368, + "step": 1742 + }, + { + "epoch": 0.3, + "learning_rate": 1.9750769579747566e-05, + "loss": 0.5037, + "step": 1743 + }, + { + "epoch": 0.3, + "learning_rate": 1.9750360871296552e-05, + "loss": 0.4782, + "step": 1744 + }, + { + "epoch": 0.3, + "learning_rate": 1.9749951832238973e-05, + "loss": 0.5263, + "step": 1745 + }, + { + "epoch": 0.3, + "learning_rate": 1.9749542462588707e-05, + "loss": 0.4749, + "step": 1746 + }, + { + "epoch": 0.3, + "learning_rate": 1.9749132762359625e-05, + "loss": 0.5502, + "step": 1747 + }, + { + "epoch": 0.3, + "learning_rate": 1.974872273156563e-05, + "loss": 0.5481, + "step": 1748 + }, + { + "epoch": 0.3, + "learning_rate": 1.9748312370220613e-05, + "loss": 0.5265, + "step": 1749 + }, + { + "epoch": 0.3, + "learning_rate": 1.9747901678338496e-05, + "loss": 0.5607, + "step": 1750 + }, + { + "epoch": 0.3, + "learning_rate": 1.9747490655933203e-05, + "loss": 0.5618, + "step": 1751 + }, + { + "epoch": 0.3, + "learning_rate": 1.974707930301867e-05, + "loss": 0.5466, + "step": 1752 + }, + { + "epoch": 0.3, + "learning_rate": 1.974666761960884e-05, + "loss": 0.5566, + "step": 1753 + }, + { + "epoch": 0.3, + "learning_rate": 1.974625560571768e-05, + "loss": 0.5131, + "step": 1754 + }, + { + "epoch": 0.3, + "learning_rate": 1.974584326135916e-05, + "loss": 0.5675, + "step": 1755 + }, + { + "epoch": 0.3, + "learning_rate": 1.9745430586547254e-05, + "loss": 0.5446, + "step": 1756 + }, + { + "epoch": 0.3, + "learning_rate": 1.974501758129596e-05, + "loss": 0.5551, + "step": 1757 + }, + { + "epoch": 0.3, + "learning_rate": 1.9744604245619282e-05, + "loss": 0.5356, + "step": 1758 + }, + { + "epoch": 0.3, + "learning_rate": 1.974419057953123e-05, + "loss": 0.5686, + "step": 1759 + }, + { + "epoch": 0.3, + "learning_rate": 1.9743776583045836e-05, + "loss": 0.5136, + "step": 1760 + }, + { + "epoch": 0.3, + "learning_rate": 1.9743362256177133e-05, + "loss": 0.5257, + "step": 1761 + }, + { + "epoch": 0.3, + "learning_rate": 1.9742947598939178e-05, + "loss": 0.5665, + "step": 1762 + }, + { + "epoch": 0.3, + "learning_rate": 1.974253261134602e-05, + "loss": 0.4882, + "step": 1763 + }, + { + "epoch": 0.3, + "learning_rate": 1.9742117293411734e-05, + "loss": 0.4958, + "step": 1764 + }, + { + "epoch": 0.3, + "learning_rate": 1.9741701645150404e-05, + "loss": 0.5349, + "step": 1765 + }, + { + "epoch": 0.3, + "learning_rate": 1.9741285666576124e-05, + "loss": 0.5596, + "step": 1766 + }, + { + "epoch": 0.3, + "learning_rate": 1.9740869357702997e-05, + "loss": 0.5187, + "step": 1767 + }, + { + "epoch": 0.3, + "learning_rate": 1.9740452718545138e-05, + "loss": 0.5517, + "step": 1768 + }, + { + "epoch": 0.3, + "learning_rate": 1.9740035749116674e-05, + "loss": 0.5247, + "step": 1769 + }, + { + "epoch": 0.3, + "learning_rate": 1.9739618449431744e-05, + "loss": 0.5596, + "step": 1770 + }, + { + "epoch": 0.3, + "learning_rate": 1.97392008195045e-05, + "loss": 0.5622, + "step": 1771 + }, + { + "epoch": 0.3, + "learning_rate": 1.97387828593491e-05, + "loss": 0.5451, + "step": 1772 + }, + { + "epoch": 0.3, + "learning_rate": 1.9738364568979714e-05, + "loss": 0.5412, + "step": 1773 + }, + { + "epoch": 0.3, + "learning_rate": 1.973794594841053e-05, + "loss": 0.535, + "step": 1774 + }, + { + "epoch": 0.3, + "learning_rate": 1.9737526997655734e-05, + "loss": 0.5909, + "step": 1775 + }, + { + "epoch": 0.3, + "learning_rate": 1.9737107716729543e-05, + "loss": 0.552, + "step": 1776 + }, + { + "epoch": 0.3, + "learning_rate": 1.973668810564616e-05, + "loss": 0.557, + "step": 1777 + }, + { + "epoch": 0.3, + "learning_rate": 1.9736268164419824e-05, + "loss": 0.5322, + "step": 1778 + }, + { + "epoch": 0.3, + "learning_rate": 1.9735847893064773e-05, + "loss": 0.5567, + "step": 1779 + }, + { + "epoch": 0.3, + "learning_rate": 1.973542729159525e-05, + "loss": 0.5373, + "step": 1780 + }, + { + "epoch": 0.3, + "learning_rate": 1.9735006360025523e-05, + "loss": 0.5383, + "step": 1781 + }, + { + "epoch": 0.3, + "learning_rate": 1.973458509836986e-05, + "loss": 0.5533, + "step": 1782 + }, + { + "epoch": 0.3, + "learning_rate": 1.973416350664255e-05, + "loss": 0.5398, + "step": 1783 + }, + { + "epoch": 0.3, + "learning_rate": 1.9733741584857882e-05, + "loss": 0.5234, + "step": 1784 + }, + { + "epoch": 0.3, + "learning_rate": 1.9733319333030168e-05, + "loss": 0.5556, + "step": 1785 + }, + { + "epoch": 0.3, + "learning_rate": 1.973289675117372e-05, + "loss": 0.5147, + "step": 1786 + }, + { + "epoch": 0.3, + "learning_rate": 1.973247383930287e-05, + "loss": 0.5479, + "step": 1787 + }, + { + "epoch": 0.3, + "learning_rate": 1.9732050597431955e-05, + "loss": 0.5445, + "step": 1788 + }, + { + "epoch": 0.31, + "learning_rate": 1.973162702557533e-05, + "loss": 0.5546, + "step": 1789 + }, + { + "epoch": 0.31, + "learning_rate": 1.9731203123747353e-05, + "loss": 0.4802, + "step": 1790 + }, + { + "epoch": 0.31, + "learning_rate": 1.9730778891962398e-05, + "loss": 0.5606, + "step": 1791 + }, + { + "epoch": 0.31, + "learning_rate": 1.9730354330234857e-05, + "loss": 0.5518, + "step": 1792 + }, + { + "epoch": 0.31, + "learning_rate": 1.9729929438579113e-05, + "loss": 0.5033, + "step": 1793 + }, + { + "epoch": 0.31, + "learning_rate": 1.972950421700958e-05, + "loss": 0.5397, + "step": 1794 + }, + { + "epoch": 0.31, + "learning_rate": 1.9729078665540672e-05, + "loss": 0.5371, + "step": 1795 + }, + { + "epoch": 0.31, + "learning_rate": 1.9728652784186824e-05, + "loss": 0.535, + "step": 1796 + }, + { + "epoch": 0.31, + "learning_rate": 1.9728226572962474e-05, + "loss": 0.5392, + "step": 1797 + }, + { + "epoch": 0.31, + "learning_rate": 1.9727800031882074e-05, + "loss": 0.5454, + "step": 1798 + }, + { + "epoch": 0.31, + "learning_rate": 1.9727373160960085e-05, + "loss": 0.5171, + "step": 1799 + }, + { + "epoch": 0.31, + "learning_rate": 1.972694596021098e-05, + "loss": 0.5392, + "step": 1800 + }, + { + "epoch": 0.31, + "learning_rate": 1.972651842964925e-05, + "loss": 0.5463, + "step": 1801 + }, + { + "epoch": 0.31, + "learning_rate": 1.9726090569289384e-05, + "loss": 0.5357, + "step": 1802 + }, + { + "epoch": 0.31, + "learning_rate": 1.9725662379145895e-05, + "loss": 0.5821, + "step": 1803 + }, + { + "epoch": 0.31, + "learning_rate": 1.9725233859233297e-05, + "loss": 0.5365, + "step": 1804 + }, + { + "epoch": 0.31, + "learning_rate": 1.9724805009566127e-05, + "loss": 0.5466, + "step": 1805 + }, + { + "epoch": 0.31, + "learning_rate": 1.9724375830158918e-05, + "loss": 0.513, + "step": 1806 + }, + { + "epoch": 0.31, + "learning_rate": 1.9723946321026227e-05, + "loss": 0.5501, + "step": 1807 + }, + { + "epoch": 0.31, + "learning_rate": 1.9723516482182615e-05, + "loss": 0.573, + "step": 1808 + }, + { + "epoch": 0.31, + "learning_rate": 1.972308631364266e-05, + "loss": 0.5385, + "step": 1809 + }, + { + "epoch": 0.31, + "learning_rate": 1.9722655815420943e-05, + "loss": 0.5435, + "step": 1810 + }, + { + "epoch": 0.31, + "learning_rate": 1.9722224987532065e-05, + "loss": 0.5091, + "step": 1811 + }, + { + "epoch": 0.31, + "learning_rate": 1.972179382999063e-05, + "loss": 0.523, + "step": 1812 + }, + { + "epoch": 0.31, + "learning_rate": 1.9721362342811262e-05, + "loss": 0.5409, + "step": 1813 + }, + { + "epoch": 0.31, + "learning_rate": 1.972093052600859e-05, + "loss": 0.542, + "step": 1814 + }, + { + "epoch": 0.31, + "learning_rate": 1.9720498379597256e-05, + "loss": 0.5333, + "step": 1815 + }, + { + "epoch": 0.31, + "learning_rate": 1.972006590359191e-05, + "loss": 0.5649, + "step": 1816 + }, + { + "epoch": 0.31, + "learning_rate": 1.9719633098007215e-05, + "loss": 0.5221, + "step": 1817 + }, + { + "epoch": 0.31, + "learning_rate": 1.9719199962857852e-05, + "loss": 0.5477, + "step": 1818 + }, + { + "epoch": 0.31, + "learning_rate": 1.9718766498158506e-05, + "loss": 0.4854, + "step": 1819 + }, + { + "epoch": 0.31, + "learning_rate": 1.9718332703923873e-05, + "loss": 0.4971, + "step": 1820 + }, + { + "epoch": 0.31, + "learning_rate": 1.971789858016866e-05, + "loss": 0.5315, + "step": 1821 + }, + { + "epoch": 0.31, + "learning_rate": 1.971746412690759e-05, + "loss": 0.5077, + "step": 1822 + }, + { + "epoch": 0.31, + "learning_rate": 1.971702934415539e-05, + "loss": 0.5255, + "step": 1823 + }, + { + "epoch": 0.31, + "learning_rate": 1.9716594231926807e-05, + "loss": 0.5347, + "step": 1824 + }, + { + "epoch": 0.31, + "learning_rate": 1.9716158790236594e-05, + "loss": 0.4934, + "step": 1825 + }, + { + "epoch": 0.31, + "learning_rate": 1.971572301909951e-05, + "loss": 0.5469, + "step": 1826 + }, + { + "epoch": 0.31, + "learning_rate": 1.9715286918530333e-05, + "loss": 0.5605, + "step": 1827 + }, + { + "epoch": 0.31, + "learning_rate": 1.9714850488543857e-05, + "loss": 0.5565, + "step": 1828 + }, + { + "epoch": 0.31, + "learning_rate": 1.971441372915487e-05, + "loss": 0.5175, + "step": 1829 + }, + { + "epoch": 0.31, + "learning_rate": 1.971397664037819e-05, + "loss": 0.5837, + "step": 1830 + }, + { + "epoch": 0.31, + "learning_rate": 1.971353922222863e-05, + "loss": 0.5496, + "step": 1831 + }, + { + "epoch": 0.31, + "learning_rate": 1.9713101474721027e-05, + "loss": 0.5128, + "step": 1832 + }, + { + "epoch": 0.31, + "learning_rate": 1.971266339787022e-05, + "loss": 0.5048, + "step": 1833 + }, + { + "epoch": 0.31, + "learning_rate": 1.9712224991691067e-05, + "loss": 0.5187, + "step": 1834 + }, + { + "epoch": 0.31, + "learning_rate": 1.971178625619843e-05, + "loss": 0.5447, + "step": 1835 + }, + { + "epoch": 0.31, + "learning_rate": 1.9711347191407183e-05, + "loss": 0.5349, + "step": 1836 + }, + { + "epoch": 0.31, + "learning_rate": 1.9710907797332215e-05, + "loss": 0.5355, + "step": 1837 + }, + { + "epoch": 0.31, + "learning_rate": 1.971046807398843e-05, + "loss": 0.5923, + "step": 1838 + }, + { + "epoch": 0.31, + "learning_rate": 1.9710028021390732e-05, + "loss": 0.5616, + "step": 1839 + }, + { + "epoch": 0.31, + "learning_rate": 1.9709587639554046e-05, + "loss": 0.5194, + "step": 1840 + }, + { + "epoch": 0.31, + "learning_rate": 1.97091469284933e-05, + "loss": 0.5469, + "step": 1841 + }, + { + "epoch": 0.31, + "learning_rate": 1.9708705888223435e-05, + "loss": 0.5099, + "step": 1842 + }, + { + "epoch": 0.31, + "learning_rate": 1.9708264518759413e-05, + "loss": 0.543, + "step": 1843 + }, + { + "epoch": 0.31, + "learning_rate": 1.9707822820116193e-05, + "loss": 0.5405, + "step": 1844 + }, + { + "epoch": 0.31, + "learning_rate": 1.9707380792308756e-05, + "loss": 0.5483, + "step": 1845 + }, + { + "epoch": 0.31, + "learning_rate": 1.9706938435352092e-05, + "loss": 0.5306, + "step": 1846 + }, + { + "epoch": 0.31, + "learning_rate": 1.9706495749261193e-05, + "loss": 0.5777, + "step": 1847 + }, + { + "epoch": 0.32, + "learning_rate": 1.970605273405107e-05, + "loss": 0.5096, + "step": 1848 + }, + { + "epoch": 0.32, + "learning_rate": 1.9705609389736753e-05, + "loss": 0.5187, + "step": 1849 + }, + { + "epoch": 0.32, + "learning_rate": 1.9705165716333263e-05, + "loss": 0.5054, + "step": 1850 + }, + { + "epoch": 0.32, + "learning_rate": 1.9704721713855653e-05, + "loss": 0.5158, + "step": 1851 + }, + { + "epoch": 0.32, + "learning_rate": 1.970427738231897e-05, + "loss": 0.5223, + "step": 1852 + }, + { + "epoch": 0.32, + "learning_rate": 1.970383272173829e-05, + "loss": 0.5404, + "step": 1853 + }, + { + "epoch": 0.32, + "learning_rate": 1.970338773212868e-05, + "loss": 0.5201, + "step": 1854 + }, + { + "epoch": 0.32, + "learning_rate": 1.9702942413505234e-05, + "loss": 0.504, + "step": 1855 + }, + { + "epoch": 0.32, + "learning_rate": 1.970249676588305e-05, + "loss": 0.5295, + "step": 1856 + }, + { + "epoch": 0.32, + "learning_rate": 1.9702050789277235e-05, + "loss": 0.5519, + "step": 1857 + }, + { + "epoch": 0.32, + "learning_rate": 1.9701604483702917e-05, + "loss": 0.5258, + "step": 1858 + }, + { + "epoch": 0.32, + "learning_rate": 1.970115784917523e-05, + "loss": 0.5851, + "step": 1859 + }, + { + "epoch": 0.32, + "learning_rate": 1.9700710885709305e-05, + "loss": 0.5536, + "step": 1860 + }, + { + "epoch": 0.32, + "learning_rate": 1.9700263593320315e-05, + "loss": 0.5412, + "step": 1861 + }, + { + "epoch": 0.32, + "learning_rate": 1.9699815972023418e-05, + "loss": 0.5006, + "step": 1862 + }, + { + "epoch": 0.32, + "learning_rate": 1.9699368021833788e-05, + "loss": 0.4843, + "step": 1863 + }, + { + "epoch": 0.32, + "learning_rate": 1.9698919742766618e-05, + "loss": 0.5409, + "step": 1864 + }, + { + "epoch": 0.32, + "learning_rate": 1.9698471134837107e-05, + "loss": 0.5531, + "step": 1865 + }, + { + "epoch": 0.32, + "learning_rate": 1.9698022198060468e-05, + "loss": 0.534, + "step": 1866 + }, + { + "epoch": 0.32, + "learning_rate": 1.969757293245192e-05, + "loss": 0.5285, + "step": 1867 + }, + { + "epoch": 0.32, + "learning_rate": 1.9697123338026696e-05, + "loss": 0.5165, + "step": 1868 + }, + { + "epoch": 0.32, + "learning_rate": 1.9696673414800044e-05, + "loss": 0.5459, + "step": 1869 + }, + { + "epoch": 0.32, + "learning_rate": 1.969622316278722e-05, + "loss": 0.4896, + "step": 1870 + }, + { + "epoch": 0.32, + "learning_rate": 1.969577258200348e-05, + "loss": 0.5043, + "step": 1871 + }, + { + "epoch": 0.32, + "learning_rate": 1.9695321672464117e-05, + "loss": 0.5653, + "step": 1872 + }, + { + "epoch": 0.32, + "learning_rate": 1.9694870434184415e-05, + "loss": 0.5235, + "step": 1873 + }, + { + "epoch": 0.32, + "learning_rate": 1.9694418867179668e-05, + "loss": 0.5783, + "step": 1874 + }, + { + "epoch": 0.32, + "learning_rate": 1.9693966971465193e-05, + "loss": 0.5442, + "step": 1875 + }, + { + "epoch": 0.32, + "learning_rate": 1.969351474705631e-05, + "loss": 0.532, + "step": 1876 + }, + { + "epoch": 0.32, + "learning_rate": 1.9693062193968355e-05, + "loss": 0.5525, + "step": 1877 + }, + { + "epoch": 0.32, + "learning_rate": 1.9692609312216673e-05, + "loss": 0.5473, + "step": 1878 + }, + { + "epoch": 0.32, + "learning_rate": 1.9692156101816617e-05, + "loss": 0.5176, + "step": 1879 + }, + { + "epoch": 0.32, + "learning_rate": 1.9691702562783556e-05, + "loss": 0.5214, + "step": 1880 + }, + { + "epoch": 0.32, + "learning_rate": 1.969124869513287e-05, + "loss": 0.4964, + "step": 1881 + }, + { + "epoch": 0.32, + "learning_rate": 1.9690794498879944e-05, + "loss": 0.5141, + "step": 1882 + }, + { + "epoch": 0.32, + "learning_rate": 1.969033997404018e-05, + "loss": 0.5641, + "step": 1883 + }, + { + "epoch": 0.32, + "learning_rate": 1.968988512062899e-05, + "loss": 0.5366, + "step": 1884 + }, + { + "epoch": 0.32, + "learning_rate": 1.9689429938661798e-05, + "loss": 0.5209, + "step": 1885 + }, + { + "epoch": 0.32, + "learning_rate": 1.9688974428154036e-05, + "loss": 0.53, + "step": 1886 + }, + { + "epoch": 0.32, + "learning_rate": 1.968851858912115e-05, + "loss": 0.5582, + "step": 1887 + }, + { + "epoch": 0.32, + "learning_rate": 1.96880624215786e-05, + "loss": 0.4911, + "step": 1888 + }, + { + "epoch": 0.32, + "learning_rate": 1.9687605925541847e-05, + "loss": 0.5314, + "step": 1889 + }, + { + "epoch": 0.32, + "learning_rate": 1.9687149101026372e-05, + "loss": 0.5318, + "step": 1890 + }, + { + "epoch": 0.32, + "learning_rate": 1.9686691948047665e-05, + "loss": 0.5003, + "step": 1891 + }, + { + "epoch": 0.32, + "learning_rate": 1.9686234466621226e-05, + "loss": 0.5967, + "step": 1892 + }, + { + "epoch": 0.32, + "learning_rate": 1.968577665676257e-05, + "loss": 0.5686, + "step": 1893 + }, + { + "epoch": 0.32, + "learning_rate": 1.9685318518487212e-05, + "loss": 0.5003, + "step": 1894 + }, + { + "epoch": 0.32, + "learning_rate": 1.968486005181069e-05, + "loss": 0.5446, + "step": 1895 + }, + { + "epoch": 0.32, + "learning_rate": 1.9684401256748555e-05, + "loss": 0.5555, + "step": 1896 + }, + { + "epoch": 0.32, + "learning_rate": 1.9683942133316357e-05, + "loss": 0.5484, + "step": 1897 + }, + { + "epoch": 0.32, + "learning_rate": 1.9683482681529668e-05, + "loss": 0.5238, + "step": 1898 + }, + { + "epoch": 0.32, + "learning_rate": 1.968302290140406e-05, + "loss": 0.5458, + "step": 1899 + }, + { + "epoch": 0.32, + "learning_rate": 1.9682562792955135e-05, + "loss": 0.5602, + "step": 1900 + }, + { + "epoch": 0.32, + "learning_rate": 1.9682102356198477e-05, + "loss": 0.5591, + "step": 1901 + }, + { + "epoch": 0.32, + "learning_rate": 1.9681641591149712e-05, + "loss": 0.5892, + "step": 1902 + }, + { + "epoch": 0.32, + "learning_rate": 1.9681180497824457e-05, + "loss": 0.5155, + "step": 1903 + }, + { + "epoch": 0.32, + "learning_rate": 1.9680719076238347e-05, + "loss": 0.4558, + "step": 1904 + }, + { + "epoch": 0.32, + "learning_rate": 1.9680257326407027e-05, + "loss": 0.5409, + "step": 1905 + }, + { + "epoch": 0.33, + "learning_rate": 1.967979524834616e-05, + "loss": 0.5376, + "step": 1906 + }, + { + "epoch": 0.33, + "learning_rate": 1.9679332842071405e-05, + "loss": 0.4722, + "step": 1907 + }, + { + "epoch": 0.33, + "learning_rate": 1.9678870107598443e-05, + "loss": 0.5618, + "step": 1908 + }, + { + "epoch": 0.33, + "learning_rate": 1.9678407044942965e-05, + "loss": 0.566, + "step": 1909 + }, + { + "epoch": 0.33, + "learning_rate": 1.9677943654120676e-05, + "loss": 0.5574, + "step": 1910 + }, + { + "epoch": 0.33, + "learning_rate": 1.9677479935147283e-05, + "loss": 0.5175, + "step": 1911 + }, + { + "epoch": 0.33, + "learning_rate": 1.967701588803851e-05, + "loss": 0.4933, + "step": 1912 + }, + { + "epoch": 0.33, + "learning_rate": 1.9676551512810094e-05, + "loss": 0.5659, + "step": 1913 + }, + { + "epoch": 0.33, + "learning_rate": 1.9676086809477778e-05, + "loss": 0.539, + "step": 1914 + }, + { + "epoch": 0.33, + "learning_rate": 1.9675621778057322e-05, + "loss": 0.5635, + "step": 1915 + }, + { + "epoch": 0.33, + "learning_rate": 1.967515641856449e-05, + "loss": 0.5073, + "step": 1916 + }, + { + "epoch": 0.33, + "learning_rate": 1.9674690731015064e-05, + "loss": 0.5679, + "step": 1917 + }, + { + "epoch": 0.33, + "learning_rate": 1.967422471542483e-05, + "loss": 0.5699, + "step": 1918 + }, + { + "epoch": 0.33, + "learning_rate": 1.9673758371809594e-05, + "loss": 0.5548, + "step": 1919 + }, + { + "epoch": 0.33, + "learning_rate": 1.9673291700185165e-05, + "loss": 0.5617, + "step": 1920 + }, + { + "epoch": 0.33, + "learning_rate": 1.9672824700567372e-05, + "loss": 0.5438, + "step": 1921 + }, + { + "epoch": 0.33, + "learning_rate": 1.9672357372972043e-05, + "loss": 0.4914, + "step": 1922 + }, + { + "epoch": 0.33, + "learning_rate": 1.9671889717415026e-05, + "loss": 0.5587, + "step": 1923 + }, + { + "epoch": 0.33, + "learning_rate": 1.967142173391218e-05, + "loss": 0.5112, + "step": 1924 + }, + { + "epoch": 0.33, + "learning_rate": 1.967095342247937e-05, + "loss": 0.5612, + "step": 1925 + }, + { + "epoch": 0.33, + "learning_rate": 1.9670484783132474e-05, + "loss": 0.5168, + "step": 1926 + }, + { + "epoch": 0.33, + "learning_rate": 1.9670015815887384e-05, + "loss": 0.5113, + "step": 1927 + }, + { + "epoch": 0.33, + "learning_rate": 1.9669546520760004e-05, + "loss": 0.5341, + "step": 1928 + }, + { + "epoch": 0.33, + "learning_rate": 1.9669076897766245e-05, + "loss": 0.5242, + "step": 1929 + }, + { + "epoch": 0.33, + "learning_rate": 1.966860694692203e-05, + "loss": 0.5396, + "step": 1930 + }, + { + "epoch": 0.33, + "learning_rate": 1.966813666824329e-05, + "loss": 0.5682, + "step": 1931 + }, + { + "epoch": 0.33, + "learning_rate": 1.966766606174598e-05, + "loss": 0.5549, + "step": 1932 + }, + { + "epoch": 0.33, + "learning_rate": 1.9667195127446044e-05, + "loss": 0.5736, + "step": 1933 + }, + { + "epoch": 0.33, + "learning_rate": 1.9666723865359463e-05, + "loss": 0.5396, + "step": 1934 + }, + { + "epoch": 0.33, + "learning_rate": 1.9666252275502207e-05, + "loss": 0.5366, + "step": 1935 + }, + { + "epoch": 0.33, + "learning_rate": 1.966578035789027e-05, + "loss": 0.539, + "step": 1936 + }, + { + "epoch": 0.33, + "learning_rate": 1.9665308112539655e-05, + "loss": 0.5552, + "step": 1937 + }, + { + "epoch": 0.33, + "learning_rate": 1.966483553946637e-05, + "loss": 0.5813, + "step": 1938 + }, + { + "epoch": 0.33, + "learning_rate": 1.9664362638686444e-05, + "loss": 0.5007, + "step": 1939 + }, + { + "epoch": 0.33, + "learning_rate": 1.9663889410215905e-05, + "loss": 0.5077, + "step": 1940 + }, + { + "epoch": 0.33, + "learning_rate": 1.9663415854070805e-05, + "loss": 0.5255, + "step": 1941 + }, + { + "epoch": 0.33, + "learning_rate": 1.9662941970267195e-05, + "loss": 0.5346, + "step": 1942 + }, + { + "epoch": 0.33, + "learning_rate": 1.966246775882115e-05, + "loss": 0.5015, + "step": 1943 + }, + { + "epoch": 0.33, + "learning_rate": 1.9661993219748746e-05, + "loss": 0.5412, + "step": 1944 + }, + { + "epoch": 0.33, + "learning_rate": 1.966151835306607e-05, + "loss": 0.4874, + "step": 1945 + }, + { + "epoch": 0.33, + "learning_rate": 1.966104315878923e-05, + "loss": 0.5273, + "step": 1946 + }, + { + "epoch": 0.33, + "learning_rate": 1.966056763693433e-05, + "loss": 0.5108, + "step": 1947 + }, + { + "epoch": 0.33, + "learning_rate": 1.96600917875175e-05, + "loss": 0.531, + "step": 1948 + }, + { + "epoch": 0.33, + "learning_rate": 1.9659615610554872e-05, + "loss": 0.5342, + "step": 1949 + }, + { + "epoch": 0.33, + "learning_rate": 1.9659139106062595e-05, + "loss": 0.5889, + "step": 1950 + }, + { + "epoch": 0.33, + "learning_rate": 1.965866227405682e-05, + "loss": 0.5309, + "step": 1951 + }, + { + "epoch": 0.33, + "learning_rate": 1.965818511455372e-05, + "loss": 0.5241, + "step": 1952 + }, + { + "epoch": 0.33, + "learning_rate": 1.9657707627569474e-05, + "loss": 0.5092, + "step": 1953 + }, + { + "epoch": 0.33, + "learning_rate": 1.965722981312027e-05, + "loss": 0.5516, + "step": 1954 + }, + { + "epoch": 0.33, + "learning_rate": 1.9656751671222308e-05, + "loss": 0.5182, + "step": 1955 + }, + { + "epoch": 0.33, + "learning_rate": 1.9656273201891807e-05, + "loss": 0.5664, + "step": 1956 + }, + { + "epoch": 0.33, + "learning_rate": 1.9655794405144982e-05, + "loss": 0.5333, + "step": 1957 + }, + { + "epoch": 0.33, + "learning_rate": 1.9655315280998077e-05, + "loss": 0.5424, + "step": 1958 + }, + { + "epoch": 0.33, + "learning_rate": 1.9654835829467326e-05, + "loss": 0.5345, + "step": 1959 + }, + { + "epoch": 0.33, + "learning_rate": 1.9654356050568998e-05, + "loss": 0.5443, + "step": 1960 + }, + { + "epoch": 0.33, + "learning_rate": 1.965387594431935e-05, + "loss": 0.5689, + "step": 1961 + }, + { + "epoch": 0.33, + "learning_rate": 1.965339551073467e-05, + "loss": 0.566, + "step": 1962 + }, + { + "epoch": 0.33, + "learning_rate": 1.9652914749831242e-05, + "loss": 0.5342, + "step": 1963 + }, + { + "epoch": 0.33, + "learning_rate": 1.9652433661625372e-05, + "loss": 0.5854, + "step": 1964 + }, + { + "epoch": 0.34, + "learning_rate": 1.9651952246133368e-05, + "loss": 0.5562, + "step": 1965 + }, + { + "epoch": 0.34, + "learning_rate": 1.9651470503371556e-05, + "loss": 0.514, + "step": 1966 + }, + { + "epoch": 0.34, + "learning_rate": 1.9650988433356265e-05, + "loss": 0.5439, + "step": 1967 + }, + { + "epoch": 0.34, + "learning_rate": 1.9650506036103852e-05, + "loss": 0.536, + "step": 1968 + }, + { + "epoch": 0.34, + "learning_rate": 1.9650023311630665e-05, + "loss": 0.5508, + "step": 1969 + }, + { + "epoch": 0.34, + "learning_rate": 1.964954025995307e-05, + "loss": 0.5072, + "step": 1970 + }, + { + "epoch": 0.34, + "learning_rate": 1.9649056881087457e-05, + "loss": 0.5283, + "step": 1971 + }, + { + "epoch": 0.34, + "learning_rate": 1.96485731750502e-05, + "loss": 0.5345, + "step": 1972 + }, + { + "epoch": 0.34, + "learning_rate": 1.9648089141857716e-05, + "loss": 0.5388, + "step": 1973 + }, + { + "epoch": 0.34, + "learning_rate": 1.9647604781526406e-05, + "loss": 0.5268, + "step": 1974 + }, + { + "epoch": 0.34, + "learning_rate": 1.9647120094072697e-05, + "loss": 0.507, + "step": 1975 + }, + { + "epoch": 0.34, + "learning_rate": 1.9646635079513027e-05, + "loss": 0.522, + "step": 1976 + }, + { + "epoch": 0.34, + "learning_rate": 1.9646149737863837e-05, + "loss": 0.5356, + "step": 1977 + }, + { + "epoch": 0.34, + "learning_rate": 1.964566406914158e-05, + "loss": 0.531, + "step": 1978 + }, + { + "epoch": 0.34, + "learning_rate": 1.9645178073362732e-05, + "loss": 0.5248, + "step": 1979 + }, + { + "epoch": 0.34, + "learning_rate": 1.964469175054377e-05, + "loss": 0.5637, + "step": 1980 + }, + { + "epoch": 0.34, + "learning_rate": 1.9644205100701173e-05, + "loss": 0.5315, + "step": 1981 + }, + { + "epoch": 0.34, + "learning_rate": 1.964371812385146e-05, + "loss": 0.5259, + "step": 1982 + }, + { + "epoch": 0.34, + "learning_rate": 1.964323082001113e-05, + "loss": 0.5579, + "step": 1983 + }, + { + "epoch": 0.34, + "learning_rate": 1.964274318919671e-05, + "loss": 0.5002, + "step": 1984 + }, + { + "epoch": 0.34, + "learning_rate": 1.964225523142473e-05, + "loss": 0.5596, + "step": 1985 + }, + { + "epoch": 0.34, + "learning_rate": 1.9641766946711743e-05, + "loss": 0.5181, + "step": 1986 + }, + { + "epoch": 0.34, + "learning_rate": 1.96412783350743e-05, + "loss": 0.5374, + "step": 1987 + }, + { + "epoch": 0.34, + "learning_rate": 1.964078939652897e-05, + "loss": 0.5416, + "step": 1988 + }, + { + "epoch": 0.34, + "learning_rate": 1.964030013109233e-05, + "loss": 0.5327, + "step": 1989 + }, + { + "epoch": 0.34, + "learning_rate": 1.9639810538780973e-05, + "loss": 0.565, + "step": 1990 + }, + { + "epoch": 0.34, + "learning_rate": 1.9639320619611494e-05, + "loss": 0.5091, + "step": 1991 + }, + { + "epoch": 0.34, + "learning_rate": 1.963883037360051e-05, + "loss": 0.5082, + "step": 1992 + }, + { + "epoch": 0.34, + "learning_rate": 1.9638339800764645e-05, + "loss": 0.4923, + "step": 1993 + }, + { + "epoch": 0.34, + "learning_rate": 1.9637848901120526e-05, + "loss": 0.5371, + "step": 1994 + }, + { + "epoch": 0.34, + "learning_rate": 1.9637357674684804e-05, + "loss": 0.52, + "step": 1995 + }, + { + "epoch": 0.34, + "learning_rate": 1.9636866121474132e-05, + "loss": 0.549, + "step": 1996 + }, + { + "epoch": 0.34, + "learning_rate": 1.963637424150518e-05, + "loss": 0.517, + "step": 1997 + }, + { + "epoch": 0.34, + "learning_rate": 1.963588203479462e-05, + "loss": 0.5303, + "step": 1998 + }, + { + "epoch": 0.34, + "learning_rate": 1.963538950135915e-05, + "loss": 0.5335, + "step": 1999 + }, + { + "epoch": 0.34, + "learning_rate": 1.963489664121546e-05, + "loss": 0.4979, + "step": 2000 + }, + { + "epoch": 0.34, + "learning_rate": 1.963440345438027e-05, + "loss": 0.51, + "step": 2001 + }, + { + "epoch": 0.34, + "learning_rate": 1.9633909940870306e-05, + "loss": 0.5246, + "step": 2002 + }, + { + "epoch": 0.34, + "learning_rate": 1.9633416100702288e-05, + "loss": 0.5289, + "step": 2003 + }, + { + "epoch": 0.34, + "learning_rate": 1.963292193389297e-05, + "loss": 0.5321, + "step": 2004 + }, + { + "epoch": 0.34, + "learning_rate": 1.963242744045911e-05, + "loss": 0.5134, + "step": 2005 + }, + { + "epoch": 0.34, + "learning_rate": 1.963193262041747e-05, + "loss": 0.5873, + "step": 2006 + }, + { + "epoch": 0.34, + "learning_rate": 1.9631437473784827e-05, + "loss": 0.5637, + "step": 2007 + }, + { + "epoch": 0.34, + "learning_rate": 1.963094200057797e-05, + "loss": 0.5195, + "step": 2008 + }, + { + "epoch": 0.34, + "learning_rate": 1.9630446200813704e-05, + "loss": 0.4901, + "step": 2009 + }, + { + "epoch": 0.34, + "learning_rate": 1.9629950074508836e-05, + "loss": 0.5279, + "step": 2010 + }, + { + "epoch": 0.34, + "learning_rate": 1.962945362168019e-05, + "loss": 0.5655, + "step": 2011 + }, + { + "epoch": 0.34, + "learning_rate": 1.96289568423446e-05, + "loss": 0.5963, + "step": 2012 + }, + { + "epoch": 0.34, + "learning_rate": 1.9628459736518907e-05, + "loss": 0.5454, + "step": 2013 + }, + { + "epoch": 0.34, + "learning_rate": 1.962796230421997e-05, + "loss": 0.5199, + "step": 2014 + }, + { + "epoch": 0.34, + "learning_rate": 1.9627464545464647e-05, + "loss": 0.5337, + "step": 2015 + }, + { + "epoch": 0.34, + "learning_rate": 1.962696646026983e-05, + "loss": 0.4837, + "step": 2016 + }, + { + "epoch": 0.34, + "learning_rate": 1.9626468048652395e-05, + "loss": 0.5689, + "step": 2017 + }, + { + "epoch": 0.34, + "learning_rate": 1.9625969310629252e-05, + "loss": 0.5149, + "step": 2018 + }, + { + "epoch": 0.34, + "learning_rate": 1.96254702462173e-05, + "loss": 0.5134, + "step": 2019 + }, + { + "epoch": 0.34, + "learning_rate": 1.962497085543347e-05, + "loss": 0.5592, + "step": 2020 + }, + { + "epoch": 0.34, + "learning_rate": 1.9624471138294695e-05, + "loss": 0.5209, + "step": 2021 + }, + { + "epoch": 0.34, + "learning_rate": 1.9623971094817916e-05, + "loss": 0.4897, + "step": 2022 + }, + { + "epoch": 0.35, + "learning_rate": 1.9623470725020085e-05, + "loss": 0.5389, + "step": 2023 + }, + { + "epoch": 0.35, + "learning_rate": 1.9622970028918172e-05, + "loss": 0.5186, + "step": 2024 + }, + { + "epoch": 0.35, + "learning_rate": 1.9622469006529156e-05, + "loss": 0.4884, + "step": 2025 + }, + { + "epoch": 0.35, + "learning_rate": 1.9621967657870018e-05, + "loss": 0.5141, + "step": 2026 + }, + { + "epoch": 0.35, + "learning_rate": 1.9621465982957764e-05, + "loss": 0.5148, + "step": 2027 + }, + { + "epoch": 0.35, + "learning_rate": 1.9620963981809404e-05, + "loss": 0.5344, + "step": 2028 + }, + { + "epoch": 0.35, + "learning_rate": 1.9620461654441954e-05, + "loss": 0.4907, + "step": 2029 + }, + { + "epoch": 0.35, + "learning_rate": 1.9619959000872454e-05, + "loss": 0.5566, + "step": 2030 + }, + { + "epoch": 0.35, + "learning_rate": 1.9619456021117945e-05, + "loss": 0.5008, + "step": 2031 + }, + { + "epoch": 0.35, + "learning_rate": 1.9618952715195476e-05, + "loss": 0.5442, + "step": 2032 + }, + { + "epoch": 0.35, + "learning_rate": 1.961844908312212e-05, + "loss": 0.5263, + "step": 2033 + }, + { + "epoch": 0.35, + "learning_rate": 1.9617945124914948e-05, + "loss": 0.5631, + "step": 2034 + }, + { + "epoch": 0.35, + "learning_rate": 1.961744084059105e-05, + "loss": 0.5386, + "step": 2035 + }, + { + "epoch": 0.35, + "learning_rate": 1.961693623016753e-05, + "loss": 0.549, + "step": 2036 + }, + { + "epoch": 0.35, + "learning_rate": 1.9616431293661487e-05, + "loss": 0.5004, + "step": 2037 + }, + { + "epoch": 0.35, + "learning_rate": 1.961592603109005e-05, + "loss": 0.4806, + "step": 2038 + }, + { + "epoch": 0.35, + "learning_rate": 1.9615420442470354e-05, + "loss": 0.5596, + "step": 2039 + }, + { + "epoch": 0.35, + "learning_rate": 1.961491452781953e-05, + "loss": 0.5513, + "step": 2040 + }, + { + "epoch": 0.35, + "learning_rate": 1.9614408287154742e-05, + "loss": 0.4908, + "step": 2041 + }, + { + "epoch": 0.35, + "learning_rate": 1.9613901720493155e-05, + "loss": 0.5188, + "step": 2042 + }, + { + "epoch": 0.35, + "learning_rate": 1.9613394827851938e-05, + "loss": 0.531, + "step": 2043 + }, + { + "epoch": 0.35, + "learning_rate": 1.961288760924829e-05, + "loss": 0.4958, + "step": 2044 + }, + { + "epoch": 0.35, + "learning_rate": 1.9612380064699393e-05, + "loss": 0.5421, + "step": 2045 + }, + { + "epoch": 0.35, + "learning_rate": 1.961187219422247e-05, + "loss": 0.5584, + "step": 2046 + }, + { + "epoch": 0.35, + "learning_rate": 1.9611363997834738e-05, + "loss": 0.5486, + "step": 2047 + }, + { + "epoch": 0.35, + "learning_rate": 1.9610855475553427e-05, + "loss": 0.5688, + "step": 2048 + }, + { + "epoch": 0.35, + "learning_rate": 1.9610346627395776e-05, + "loss": 0.5709, + "step": 2049 + }, + { + "epoch": 0.35, + "learning_rate": 1.9609837453379045e-05, + "loss": 0.5479, + "step": 2050 + }, + { + "epoch": 0.35, + "learning_rate": 1.96093279535205e-05, + "loss": 0.5635, + "step": 2051 + }, + { + "epoch": 0.35, + "learning_rate": 1.9608818127837408e-05, + "loss": 0.5493, + "step": 2052 + }, + { + "epoch": 0.35, + "learning_rate": 1.960830797634706e-05, + "loss": 0.5205, + "step": 2053 + }, + { + "epoch": 0.35, + "learning_rate": 1.9607797499066755e-05, + "loss": 0.4979, + "step": 2054 + }, + { + "epoch": 0.35, + "learning_rate": 1.9607286696013806e-05, + "loss": 0.5293, + "step": 2055 + }, + { + "epoch": 0.35, + "learning_rate": 1.960677556720552e-05, + "loss": 0.5783, + "step": 2056 + }, + { + "epoch": 0.35, + "learning_rate": 1.9606264112659245e-05, + "loss": 0.5246, + "step": 2057 + }, + { + "epoch": 0.35, + "learning_rate": 1.9605752332392307e-05, + "loss": 0.5448, + "step": 2058 + }, + { + "epoch": 0.35, + "learning_rate": 1.9605240226422067e-05, + "loss": 0.519, + "step": 2059 + }, + { + "epoch": 0.35, + "learning_rate": 1.960472779476589e-05, + "loss": 0.5319, + "step": 2060 + }, + { + "epoch": 0.35, + "learning_rate": 1.960421503744115e-05, + "loss": 0.5205, + "step": 2061 + }, + { + "epoch": 0.35, + "learning_rate": 1.9603701954465225e-05, + "loss": 0.5161, + "step": 2062 + }, + { + "epoch": 0.35, + "learning_rate": 1.9603188545855522e-05, + "loss": 0.5501, + "step": 2063 + }, + { + "epoch": 0.35, + "learning_rate": 1.960267481162945e-05, + "loss": 0.543, + "step": 2064 + }, + { + "epoch": 0.35, + "learning_rate": 1.9602160751804422e-05, + "loss": 0.5264, + "step": 2065 + }, + { + "epoch": 0.35, + "learning_rate": 1.9601646366397872e-05, + "loss": 0.5241, + "step": 2066 + }, + { + "epoch": 0.35, + "learning_rate": 1.960113165542724e-05, + "loss": 0.5223, + "step": 2067 + }, + { + "epoch": 0.35, + "learning_rate": 1.9600616618909977e-05, + "loss": 0.5259, + "step": 2068 + }, + { + "epoch": 0.35, + "learning_rate": 1.960010125686355e-05, + "loss": 0.5143, + "step": 2069 + }, + { + "epoch": 0.35, + "learning_rate": 1.959958556930543e-05, + "loss": 0.5043, + "step": 2070 + }, + { + "epoch": 0.35, + "learning_rate": 1.9599069556253105e-05, + "loss": 0.5461, + "step": 2071 + }, + { + "epoch": 0.35, + "learning_rate": 1.9598553217724067e-05, + "loss": 0.5287, + "step": 2072 + }, + { + "epoch": 0.35, + "learning_rate": 1.9598036553735832e-05, + "loss": 0.4997, + "step": 2073 + }, + { + "epoch": 0.35, + "learning_rate": 1.9597519564305908e-05, + "loss": 0.4916, + "step": 2074 + }, + { + "epoch": 0.35, + "learning_rate": 1.9597002249451834e-05, + "loss": 0.557, + "step": 2075 + }, + { + "epoch": 0.35, + "learning_rate": 1.9596484609191148e-05, + "loss": 0.5148, + "step": 2076 + }, + { + "epoch": 0.35, + "learning_rate": 1.9595966643541397e-05, + "loss": 0.5478, + "step": 2077 + }, + { + "epoch": 0.35, + "learning_rate": 1.959544835252015e-05, + "loss": 0.505, + "step": 2078 + }, + { + "epoch": 0.35, + "learning_rate": 1.9594929736144978e-05, + "loss": 0.5428, + "step": 2079 + }, + { + "epoch": 0.35, + "learning_rate": 1.9594410794433462e-05, + "loss": 0.5202, + "step": 2080 + }, + { + "epoch": 0.35, + "learning_rate": 1.9593891527403202e-05, + "loss": 0.5473, + "step": 2081 + }, + { + "epoch": 0.36, + "learning_rate": 1.959337193507181e-05, + "loss": 0.5448, + "step": 2082 + }, + { + "epoch": 0.36, + "learning_rate": 1.959285201745689e-05, + "loss": 0.556, + "step": 2083 + }, + { + "epoch": 0.36, + "learning_rate": 1.9592331774576086e-05, + "loss": 0.5353, + "step": 2084 + }, + { + "epoch": 0.36, + "learning_rate": 1.959181120644703e-05, + "loss": 0.4852, + "step": 2085 + }, + { + "epoch": 0.36, + "learning_rate": 1.959129031308737e-05, + "loss": 0.556, + "step": 2086 + }, + { + "epoch": 0.36, + "learning_rate": 1.959076909451478e-05, + "loss": 0.5195, + "step": 2087 + }, + { + "epoch": 0.36, + "learning_rate": 1.9590247550746917e-05, + "loss": 0.525, + "step": 2088 + }, + { + "epoch": 0.36, + "learning_rate": 1.9589725681801476e-05, + "loss": 0.5081, + "step": 2089 + }, + { + "epoch": 0.36, + "learning_rate": 1.958920348769615e-05, + "loss": 0.507, + "step": 2090 + }, + { + "epoch": 0.36, + "learning_rate": 1.9588680968448642e-05, + "loss": 0.5437, + "step": 2091 + }, + { + "epoch": 0.36, + "learning_rate": 1.9588158124076674e-05, + "loss": 0.6006, + "step": 2092 + }, + { + "epoch": 0.36, + "learning_rate": 1.958763495459797e-05, + "loss": 0.5455, + "step": 2093 + }, + { + "epoch": 0.36, + "learning_rate": 1.9587111460030273e-05, + "loss": 0.5288, + "step": 2094 + }, + { + "epoch": 0.36, + "learning_rate": 1.958658764039133e-05, + "loss": 0.4943, + "step": 2095 + }, + { + "epoch": 0.36, + "learning_rate": 1.95860634956989e-05, + "loss": 0.5201, + "step": 2096 + }, + { + "epoch": 0.36, + "learning_rate": 1.9585539025970763e-05, + "loss": 0.5011, + "step": 2097 + }, + { + "epoch": 0.36, + "learning_rate": 1.9585014231224692e-05, + "loss": 0.552, + "step": 2098 + }, + { + "epoch": 0.36, + "learning_rate": 1.958448911147849e-05, + "loss": 0.578, + "step": 2099 + }, + { + "epoch": 0.36, + "learning_rate": 1.958396366674996e-05, + "loss": 0.5609, + "step": 2100 + }, + { + "epoch": 0.36, + "learning_rate": 1.9583437897056915e-05, + "loss": 0.514, + "step": 2101 + }, + { + "epoch": 0.36, + "learning_rate": 1.9582911802417186e-05, + "loss": 0.5402, + "step": 2102 + }, + { + "epoch": 0.36, + "learning_rate": 1.9582385382848612e-05, + "loss": 0.5182, + "step": 2103 + }, + { + "epoch": 0.36, + "learning_rate": 1.958185863836904e-05, + "loss": 0.5176, + "step": 2104 + }, + { + "epoch": 0.36, + "learning_rate": 1.9581331568996327e-05, + "loss": 0.5447, + "step": 2105 + }, + { + "epoch": 0.36, + "learning_rate": 1.958080417474835e-05, + "loss": 0.5335, + "step": 2106 + }, + { + "epoch": 0.36, + "learning_rate": 1.9580276455642994e-05, + "loss": 0.5282, + "step": 2107 + }, + { + "epoch": 0.36, + "learning_rate": 1.9579748411698144e-05, + "loss": 0.5577, + "step": 2108 + }, + { + "epoch": 0.36, + "learning_rate": 1.9579220042931708e-05, + "loss": 0.4896, + "step": 2109 + }, + { + "epoch": 0.36, + "learning_rate": 1.9578691349361604e-05, + "loss": 0.557, + "step": 2110 + }, + { + "epoch": 0.36, + "learning_rate": 1.9578162331005755e-05, + "loss": 0.5077, + "step": 2111 + }, + { + "epoch": 0.36, + "learning_rate": 1.9577632987882103e-05, + "loss": 0.489, + "step": 2112 + }, + { + "epoch": 0.36, + "learning_rate": 1.957710332000859e-05, + "loss": 0.5192, + "step": 2113 + }, + { + "epoch": 0.36, + "learning_rate": 1.9576573327403182e-05, + "loss": 0.4849, + "step": 2114 + }, + { + "epoch": 0.36, + "learning_rate": 1.9576043010083844e-05, + "loss": 0.5295, + "step": 2115 + }, + { + "epoch": 0.36, + "learning_rate": 1.957551236806856e-05, + "loss": 0.5105, + "step": 2116 + }, + { + "epoch": 0.36, + "learning_rate": 1.9574981401375327e-05, + "loss": 0.5517, + "step": 2117 + }, + { + "epoch": 0.36, + "learning_rate": 1.957445011002214e-05, + "loss": 0.5166, + "step": 2118 + }, + { + "epoch": 0.36, + "learning_rate": 1.9573918494027015e-05, + "loss": 0.5188, + "step": 2119 + }, + { + "epoch": 0.36, + "learning_rate": 1.9573386553407986e-05, + "loss": 0.5423, + "step": 2120 + }, + { + "epoch": 0.36, + "learning_rate": 1.957285428818308e-05, + "loss": 0.5237, + "step": 2121 + }, + { + "epoch": 0.36, + "learning_rate": 1.957232169837035e-05, + "loss": 0.5235, + "step": 2122 + }, + { + "epoch": 0.36, + "learning_rate": 1.9571788783987853e-05, + "loss": 0.5091, + "step": 2123 + }, + { + "epoch": 0.36, + "learning_rate": 1.9571255545053657e-05, + "loss": 0.4951, + "step": 2124 + }, + { + "epoch": 0.36, + "learning_rate": 1.957072198158585e-05, + "loss": 0.5137, + "step": 2125 + }, + { + "epoch": 0.36, + "learning_rate": 1.9570188093602512e-05, + "loss": 0.5026, + "step": 2126 + }, + { + "epoch": 0.36, + "learning_rate": 1.956965388112175e-05, + "loss": 0.5467, + "step": 2127 + }, + { + "epoch": 0.36, + "learning_rate": 1.956911934416168e-05, + "loss": 0.5782, + "step": 2128 + }, + { + "epoch": 0.36, + "learning_rate": 1.956858448274043e-05, + "loss": 0.5435, + "step": 2129 + }, + { + "epoch": 0.36, + "learning_rate": 1.9568049296876127e-05, + "loss": 0.5387, + "step": 2130 + }, + { + "epoch": 0.36, + "learning_rate": 1.9567513786586922e-05, + "loss": 0.529, + "step": 2131 + }, + { + "epoch": 0.36, + "learning_rate": 1.9566977951890976e-05, + "loss": 0.5626, + "step": 2132 + }, + { + "epoch": 0.36, + "learning_rate": 1.956644179280645e-05, + "loss": 0.5379, + "step": 2133 + }, + { + "epoch": 0.36, + "learning_rate": 1.956590530935153e-05, + "loss": 0.5672, + "step": 2134 + }, + { + "epoch": 0.36, + "learning_rate": 1.9565368501544403e-05, + "loss": 0.5465, + "step": 2135 + }, + { + "epoch": 0.36, + "learning_rate": 1.9564831369403266e-05, + "loss": 0.5512, + "step": 2136 + }, + { + "epoch": 0.36, + "learning_rate": 1.9564293912946347e-05, + "loss": 0.5399, + "step": 2137 + }, + { + "epoch": 0.36, + "learning_rate": 1.9563756132191855e-05, + "loss": 0.5004, + "step": 2138 + }, + { + "epoch": 0.36, + "learning_rate": 1.956321802715803e-05, + "loss": 0.5301, + "step": 2139 + }, + { + "epoch": 0.36, + "learning_rate": 1.956267959786312e-05, + "loss": 0.5464, + "step": 2140 + }, + { + "epoch": 0.37, + "learning_rate": 1.9562140844325376e-05, + "loss": 0.5347, + "step": 2141 + }, + { + "epoch": 0.37, + "learning_rate": 1.9561601766563068e-05, + "loss": 0.5193, + "step": 2142 + }, + { + "epoch": 0.37, + "learning_rate": 1.9561062364594474e-05, + "loss": 0.5519, + "step": 2143 + }, + { + "epoch": 0.37, + "learning_rate": 1.9560522638437888e-05, + "loss": 0.522, + "step": 2144 + }, + { + "epoch": 0.37, + "learning_rate": 1.9559982588111604e-05, + "loss": 0.5212, + "step": 2145 + }, + { + "epoch": 0.37, + "learning_rate": 1.9559442213633936e-05, + "loss": 0.4883, + "step": 2146 + }, + { + "epoch": 0.37, + "learning_rate": 1.955890151502321e-05, + "loss": 0.529, + "step": 2147 + }, + { + "epoch": 0.37, + "learning_rate": 1.9558360492297754e-05, + "loss": 0.5201, + "step": 2148 + }, + { + "epoch": 0.37, + "learning_rate": 1.9557819145475916e-05, + "loss": 0.5309, + "step": 2149 + }, + { + "epoch": 0.37, + "learning_rate": 1.9557277474576054e-05, + "loss": 0.4984, + "step": 2150 + }, + { + "epoch": 0.37, + "learning_rate": 1.9556735479616527e-05, + "loss": 0.4756, + "step": 2151 + }, + { + "epoch": 0.37, + "learning_rate": 1.9556193160615717e-05, + "loss": 0.5451, + "step": 2152 + }, + { + "epoch": 0.37, + "learning_rate": 1.955565051759201e-05, + "loss": 0.5631, + "step": 2153 + }, + { + "epoch": 0.37, + "learning_rate": 1.9555107550563814e-05, + "loss": 0.5745, + "step": 2154 + }, + { + "epoch": 0.37, + "learning_rate": 1.9554564259549525e-05, + "loss": 0.5053, + "step": 2155 + }, + { + "epoch": 0.37, + "learning_rate": 1.9554020644567578e-05, + "loss": 0.5352, + "step": 2156 + }, + { + "epoch": 0.37, + "learning_rate": 1.9553476705636402e-05, + "loss": 0.5212, + "step": 2157 + }, + { + "epoch": 0.37, + "learning_rate": 1.955293244277443e-05, + "loss": 0.5345, + "step": 2158 + }, + { + "epoch": 0.37, + "learning_rate": 1.9552387856000133e-05, + "loss": 0.5225, + "step": 2159 + }, + { + "epoch": 0.37, + "learning_rate": 1.9551842945331964e-05, + "loss": 0.5604, + "step": 2160 + }, + { + "epoch": 0.37, + "learning_rate": 1.95512977107884e-05, + "loss": 0.5238, + "step": 2161 + }, + { + "epoch": 0.37, + "learning_rate": 1.9550752152387934e-05, + "loss": 0.5313, + "step": 2162 + }, + { + "epoch": 0.37, + "learning_rate": 1.9550206270149067e-05, + "loss": 0.5672, + "step": 2163 + }, + { + "epoch": 0.37, + "learning_rate": 1.9549660064090298e-05, + "loss": 0.4899, + "step": 2164 + }, + { + "epoch": 0.37, + "learning_rate": 1.9549113534230152e-05, + "loss": 0.5159, + "step": 2165 + }, + { + "epoch": 0.37, + "learning_rate": 1.9548566680587163e-05, + "loss": 0.5089, + "step": 2166 + }, + { + "epoch": 0.37, + "learning_rate": 1.954801950317987e-05, + "loss": 0.4983, + "step": 2167 + }, + { + "epoch": 0.37, + "learning_rate": 1.9547472002026827e-05, + "loss": 0.496, + "step": 2168 + }, + { + "epoch": 0.37, + "learning_rate": 1.95469241771466e-05, + "loss": 0.5264, + "step": 2169 + }, + { + "epoch": 0.37, + "learning_rate": 1.954637602855776e-05, + "loss": 0.5262, + "step": 2170 + }, + { + "epoch": 0.37, + "learning_rate": 1.95458275562789e-05, + "loss": 0.4998, + "step": 2171 + }, + { + "epoch": 0.37, + "learning_rate": 1.954527876032861e-05, + "loss": 0.5446, + "step": 2172 + }, + { + "epoch": 0.37, + "learning_rate": 1.95447296407255e-05, + "loss": 0.5197, + "step": 2173 + }, + { + "epoch": 0.37, + "learning_rate": 1.954418019748819e-05, + "loss": 0.553, + "step": 2174 + }, + { + "epoch": 0.37, + "learning_rate": 1.954363043063531e-05, + "loss": 0.5131, + "step": 2175 + }, + { + "epoch": 0.37, + "learning_rate": 1.9543080340185502e-05, + "loss": 0.5271, + "step": 2176 + }, + { + "epoch": 0.37, + "learning_rate": 1.9542529926157416e-05, + "loss": 0.5275, + "step": 2177 + }, + { + "epoch": 0.37, + "learning_rate": 1.9541979188569717e-05, + "loss": 0.5397, + "step": 2178 + }, + { + "epoch": 0.37, + "learning_rate": 1.9541428127441076e-05, + "loss": 0.527, + "step": 2179 + }, + { + "epoch": 0.37, + "learning_rate": 1.954087674279018e-05, + "loss": 0.5424, + "step": 2180 + }, + { + "epoch": 0.37, + "learning_rate": 1.9540325034635724e-05, + "loss": 0.5707, + "step": 2181 + }, + { + "epoch": 0.37, + "learning_rate": 1.9539773002996418e-05, + "loss": 0.534, + "step": 2182 + }, + { + "epoch": 0.37, + "learning_rate": 1.9539220647890974e-05, + "loss": 0.5113, + "step": 2183 + }, + { + "epoch": 0.37, + "learning_rate": 1.9538667969338125e-05, + "loss": 0.5291, + "step": 2184 + }, + { + "epoch": 0.37, + "learning_rate": 1.953811496735661e-05, + "loss": 0.5302, + "step": 2185 + }, + { + "epoch": 0.37, + "learning_rate": 1.953756164196518e-05, + "loss": 0.4855, + "step": 2186 + }, + { + "epoch": 0.37, + "learning_rate": 1.9537007993182596e-05, + "loss": 0.509, + "step": 2187 + }, + { + "epoch": 0.37, + "learning_rate": 1.953645402102763e-05, + "loss": 0.5607, + "step": 2188 + }, + { + "epoch": 0.37, + "learning_rate": 1.953589972551907e-05, + "loss": 0.5389, + "step": 2189 + }, + { + "epoch": 0.37, + "learning_rate": 1.95353451066757e-05, + "loss": 0.5281, + "step": 2190 + }, + { + "epoch": 0.37, + "learning_rate": 1.9534790164516337e-05, + "loss": 0.5319, + "step": 2191 + }, + { + "epoch": 0.37, + "learning_rate": 1.953423489905979e-05, + "loss": 0.4778, + "step": 2192 + }, + { + "epoch": 0.37, + "learning_rate": 1.953367931032489e-05, + "loss": 0.5221, + "step": 2193 + }, + { + "epoch": 0.37, + "learning_rate": 1.9533123398330478e-05, + "loss": 0.5039, + "step": 2194 + }, + { + "epoch": 0.37, + "learning_rate": 1.9532567163095397e-05, + "loss": 0.5386, + "step": 2195 + }, + { + "epoch": 0.37, + "learning_rate": 1.953201060463851e-05, + "loss": 0.534, + "step": 2196 + }, + { + "epoch": 0.37, + "learning_rate": 1.953145372297869e-05, + "loss": 0.516, + "step": 2197 + }, + { + "epoch": 0.37, + "learning_rate": 1.953089651813482e-05, + "loss": 0.5418, + "step": 2198 + }, + { + "epoch": 0.38, + "learning_rate": 1.9530338990125786e-05, + "loss": 0.4821, + "step": 2199 + }, + { + "epoch": 0.38, + "learning_rate": 1.9529781138970502e-05, + "loss": 0.5316, + "step": 2200 + }, + { + "epoch": 0.38, + "learning_rate": 1.9529222964687876e-05, + "loss": 0.5627, + "step": 2201 + }, + { + "epoch": 0.38, + "learning_rate": 1.9528664467296835e-05, + "loss": 0.5217, + "step": 2202 + }, + { + "epoch": 0.38, + "learning_rate": 1.9528105646816318e-05, + "loss": 0.5214, + "step": 2203 + }, + { + "epoch": 0.38, + "learning_rate": 1.9527546503265274e-05, + "loss": 0.5072, + "step": 2204 + }, + { + "epoch": 0.38, + "learning_rate": 1.952698703666266e-05, + "loss": 0.5412, + "step": 2205 + }, + { + "epoch": 0.38, + "learning_rate": 1.9526427247027446e-05, + "loss": 0.5175, + "step": 2206 + }, + { + "epoch": 0.38, + "learning_rate": 1.9525867134378615e-05, + "loss": 0.5317, + "step": 2207 + }, + { + "epoch": 0.38, + "learning_rate": 1.9525306698735155e-05, + "loss": 0.4991, + "step": 2208 + }, + { + "epoch": 0.38, + "learning_rate": 1.9524745940116074e-05, + "loss": 0.5512, + "step": 2209 + }, + { + "epoch": 0.38, + "learning_rate": 1.952418485854038e-05, + "loss": 0.4994, + "step": 2210 + }, + { + "epoch": 0.38, + "learning_rate": 1.9523623454027095e-05, + "loss": 0.5346, + "step": 2211 + }, + { + "epoch": 0.38, + "learning_rate": 1.9523061726595267e-05, + "loss": 0.5338, + "step": 2212 + }, + { + "epoch": 0.38, + "learning_rate": 1.9522499676263935e-05, + "loss": 0.535, + "step": 2213 + }, + { + "epoch": 0.38, + "learning_rate": 1.952193730305215e-05, + "loss": 0.5454, + "step": 2214 + }, + { + "epoch": 0.38, + "learning_rate": 1.9521374606978993e-05, + "loss": 0.5526, + "step": 2215 + }, + { + "epoch": 0.38, + "learning_rate": 1.9520811588063538e-05, + "loss": 0.5294, + "step": 2216 + }, + { + "epoch": 0.38, + "learning_rate": 1.952024824632487e-05, + "loss": 0.5322, + "step": 2217 + }, + { + "epoch": 0.38, + "learning_rate": 1.95196845817821e-05, + "loss": 0.4919, + "step": 2218 + }, + { + "epoch": 0.38, + "learning_rate": 1.9519120594454335e-05, + "loss": 0.5437, + "step": 2219 + }, + { + "epoch": 0.38, + "learning_rate": 1.9518556284360696e-05, + "loss": 0.5449, + "step": 2220 + }, + { + "epoch": 0.38, + "learning_rate": 1.9517991651520322e-05, + "loss": 0.5275, + "step": 2221 + }, + { + "epoch": 0.38, + "learning_rate": 1.9517426695952358e-05, + "loss": 0.5123, + "step": 2222 + }, + { + "epoch": 0.38, + "learning_rate": 1.951686141767595e-05, + "loss": 0.5031, + "step": 2223 + }, + { + "epoch": 0.38, + "learning_rate": 1.951629581671028e-05, + "loss": 0.5519, + "step": 2224 + }, + { + "epoch": 0.38, + "learning_rate": 1.9515729893074517e-05, + "loss": 0.4941, + "step": 2225 + }, + { + "epoch": 0.38, + "learning_rate": 1.9515163646787855e-05, + "loss": 0.532, + "step": 2226 + }, + { + "epoch": 0.38, + "learning_rate": 1.9514597077869483e-05, + "loss": 0.5478, + "step": 2227 + }, + { + "epoch": 0.38, + "learning_rate": 1.9514030186338624e-05, + "loss": 0.5201, + "step": 2228 + }, + { + "epoch": 0.38, + "learning_rate": 1.951346297221449e-05, + "loss": 0.5146, + "step": 2229 + }, + { + "epoch": 0.38, + "learning_rate": 1.9512895435516322e-05, + "loss": 0.5315, + "step": 2230 + }, + { + "epoch": 0.38, + "learning_rate": 1.951232757626336e-05, + "loss": 0.498, + "step": 2231 + }, + { + "epoch": 0.38, + "learning_rate": 1.9511759394474858e-05, + "loss": 0.4923, + "step": 2232 + }, + { + "epoch": 0.38, + "learning_rate": 1.9511190890170082e-05, + "loss": 0.5066, + "step": 2233 + }, + { + "epoch": 0.38, + "learning_rate": 1.9510622063368306e-05, + "loss": 0.535, + "step": 2234 + }, + { + "epoch": 0.38, + "learning_rate": 1.9510052914088822e-05, + "loss": 0.5351, + "step": 2235 + }, + { + "epoch": 0.38, + "learning_rate": 1.9509483442350926e-05, + "loss": 0.5123, + "step": 2236 + }, + { + "epoch": 0.38, + "learning_rate": 1.9508913648173922e-05, + "loss": 0.5391, + "step": 2237 + }, + { + "epoch": 0.38, + "learning_rate": 1.9508343531577137e-05, + "loss": 0.5599, + "step": 2238 + }, + { + "epoch": 0.38, + "learning_rate": 1.95077730925799e-05, + "loss": 0.5141, + "step": 2239 + }, + { + "epoch": 0.38, + "learning_rate": 1.9507202331201555e-05, + "loss": 0.5425, + "step": 2240 + }, + { + "epoch": 0.38, + "learning_rate": 1.9506631247461447e-05, + "loss": 0.5289, + "step": 2241 + }, + { + "epoch": 0.38, + "learning_rate": 1.950605984137895e-05, + "loss": 0.5111, + "step": 2242 + }, + { + "epoch": 0.38, + "learning_rate": 1.950548811297343e-05, + "loss": 0.5522, + "step": 2243 + }, + { + "epoch": 0.38, + "learning_rate": 1.9504916062264285e-05, + "loss": 0.552, + "step": 2244 + }, + { + "epoch": 0.38, + "learning_rate": 1.9504343689270896e-05, + "loss": 0.5265, + "step": 2245 + }, + { + "epoch": 0.38, + "learning_rate": 1.950377099401268e-05, + "loss": 0.5237, + "step": 2246 + }, + { + "epoch": 0.38, + "learning_rate": 1.950319797650905e-05, + "loss": 0.5267, + "step": 2247 + }, + { + "epoch": 0.38, + "learning_rate": 1.950262463677944e-05, + "loss": 0.5252, + "step": 2248 + }, + { + "epoch": 0.38, + "learning_rate": 1.950205097484329e-05, + "loss": 0.4927, + "step": 2249 + }, + { + "epoch": 0.38, + "learning_rate": 1.9501476990720053e-05, + "loss": 0.5087, + "step": 2250 + }, + { + "epoch": 0.38, + "learning_rate": 1.9500902684429185e-05, + "loss": 0.5439, + "step": 2251 + }, + { + "epoch": 0.38, + "learning_rate": 1.9500328055990162e-05, + "loss": 0.509, + "step": 2252 + }, + { + "epoch": 0.38, + "learning_rate": 1.949975310542247e-05, + "loss": 0.5869, + "step": 2253 + }, + { + "epoch": 0.38, + "learning_rate": 1.94991778327456e-05, + "loss": 0.5266, + "step": 2254 + }, + { + "epoch": 0.38, + "learning_rate": 1.9498602237979062e-05, + "loss": 0.5441, + "step": 2255 + }, + { + "epoch": 0.38, + "learning_rate": 1.949802632114237e-05, + "loss": 0.5352, + "step": 2256 + }, + { + "epoch": 0.38, + "learning_rate": 1.9497450082255055e-05, + "loss": 0.5121, + "step": 2257 + }, + { + "epoch": 0.39, + "learning_rate": 1.949687352133665e-05, + "loss": 0.5061, + "step": 2258 + }, + { + "epoch": 0.39, + "learning_rate": 1.949629663840671e-05, + "loss": 0.5329, + "step": 2259 + }, + { + "epoch": 0.39, + "learning_rate": 1.9495719433484793e-05, + "loss": 0.4995, + "step": 2260 + }, + { + "epoch": 0.39, + "learning_rate": 1.9495141906590468e-05, + "loss": 0.544, + "step": 2261 + }, + { + "epoch": 0.39, + "learning_rate": 1.9494564057743323e-05, + "loss": 0.5073, + "step": 2262 + }, + { + "epoch": 0.39, + "learning_rate": 1.9493985886962943e-05, + "loss": 0.5347, + "step": 2263 + }, + { + "epoch": 0.39, + "learning_rate": 1.949340739426894e-05, + "loss": 0.4892, + "step": 2264 + }, + { + "epoch": 0.39, + "learning_rate": 1.9492828579680923e-05, + "loss": 0.517, + "step": 2265 + }, + { + "epoch": 0.39, + "learning_rate": 1.9492249443218525e-05, + "loss": 0.5477, + "step": 2266 + }, + { + "epoch": 0.39, + "learning_rate": 1.9491669984901377e-05, + "loss": 0.5267, + "step": 2267 + }, + { + "epoch": 0.39, + "learning_rate": 1.949109020474913e-05, + "loss": 0.5125, + "step": 2268 + }, + { + "epoch": 0.39, + "learning_rate": 1.9490510102781437e-05, + "loss": 0.5433, + "step": 2269 + }, + { + "epoch": 0.39, + "learning_rate": 1.9489929679017976e-05, + "loss": 0.5281, + "step": 2270 + }, + { + "epoch": 0.39, + "learning_rate": 1.948934893347842e-05, + "loss": 0.4988, + "step": 2271 + }, + { + "epoch": 0.39, + "learning_rate": 1.9488767866182467e-05, + "loss": 0.5369, + "step": 2272 + }, + { + "epoch": 0.39, + "learning_rate": 1.9488186477149816e-05, + "loss": 0.5214, + "step": 2273 + }, + { + "epoch": 0.39, + "learning_rate": 1.9487604766400178e-05, + "loss": 0.5192, + "step": 2274 + }, + { + "epoch": 0.39, + "learning_rate": 1.9487022733953278e-05, + "loss": 0.5495, + "step": 2275 + }, + { + "epoch": 0.39, + "learning_rate": 1.9486440379828855e-05, + "loss": 0.5149, + "step": 2276 + }, + { + "epoch": 0.39, + "learning_rate": 1.9485857704046652e-05, + "loss": 0.5612, + "step": 2277 + }, + { + "epoch": 0.39, + "learning_rate": 1.9485274706626428e-05, + "loss": 0.5236, + "step": 2278 + }, + { + "epoch": 0.39, + "learning_rate": 1.9484691387587944e-05, + "loss": 0.5294, + "step": 2279 + }, + { + "epoch": 0.39, + "learning_rate": 1.9484107746950988e-05, + "loss": 0.5428, + "step": 2280 + }, + { + "epoch": 0.39, + "learning_rate": 1.948352378473534e-05, + "loss": 0.4568, + "step": 2281 + }, + { + "epoch": 0.39, + "learning_rate": 1.948293950096081e-05, + "loss": 0.5279, + "step": 2282 + }, + { + "epoch": 0.39, + "learning_rate": 1.9482354895647203e-05, + "loss": 0.521, + "step": 2283 + }, + { + "epoch": 0.39, + "learning_rate": 1.9481769968814346e-05, + "loss": 0.4952, + "step": 2284 + }, + { + "epoch": 0.39, + "learning_rate": 1.9481184720482066e-05, + "loss": 0.4969, + "step": 2285 + }, + { + "epoch": 0.39, + "learning_rate": 1.9480599150670212e-05, + "loss": 0.5206, + "step": 2286 + }, + { + "epoch": 0.39, + "learning_rate": 1.9480013259398638e-05, + "loss": 0.5101, + "step": 2287 + }, + { + "epoch": 0.39, + "learning_rate": 1.9479427046687208e-05, + "loss": 0.5392, + "step": 2288 + }, + { + "epoch": 0.39, + "learning_rate": 1.94788405125558e-05, + "loss": 0.5192, + "step": 2289 + }, + { + "epoch": 0.39, + "learning_rate": 1.9478253657024305e-05, + "loss": 0.5249, + "step": 2290 + }, + { + "epoch": 0.39, + "learning_rate": 1.9477666480112615e-05, + "loss": 0.5333, + "step": 2291 + }, + { + "epoch": 0.39, + "learning_rate": 1.9477078981840643e-05, + "loss": 0.5192, + "step": 2292 + }, + { + "epoch": 0.39, + "learning_rate": 1.947649116222831e-05, + "loss": 0.5295, + "step": 2293 + }, + { + "epoch": 0.39, + "learning_rate": 1.947590302129555e-05, + "loss": 0.5205, + "step": 2294 + }, + { + "epoch": 0.39, + "learning_rate": 1.94753145590623e-05, + "loss": 0.5201, + "step": 2295 + }, + { + "epoch": 0.39, + "learning_rate": 1.947472577554851e-05, + "loss": 0.5276, + "step": 2296 + }, + { + "epoch": 0.39, + "learning_rate": 1.947413667077415e-05, + "loss": 0.5372, + "step": 2297 + }, + { + "epoch": 0.39, + "learning_rate": 1.94735472447592e-05, + "loss": 0.5771, + "step": 2298 + }, + { + "epoch": 0.39, + "learning_rate": 1.9472957497523633e-05, + "loss": 0.5324, + "step": 2299 + }, + { + "epoch": 0.39, + "learning_rate": 1.9472367429087457e-05, + "loss": 0.5036, + "step": 2300 + }, + { + "epoch": 0.39, + "learning_rate": 1.947177703947067e-05, + "loss": 0.5342, + "step": 2301 + }, + { + "epoch": 0.39, + "learning_rate": 1.9471186328693296e-05, + "loss": 0.4797, + "step": 2302 + }, + { + "epoch": 0.39, + "learning_rate": 1.947059529677536e-05, + "loss": 0.5113, + "step": 2303 + }, + { + "epoch": 0.39, + "learning_rate": 1.947000394373691e-05, + "loss": 0.5147, + "step": 2304 + }, + { + "epoch": 0.39, + "learning_rate": 1.946941226959799e-05, + "loss": 0.5006, + "step": 2305 + }, + { + "epoch": 0.39, + "learning_rate": 1.9468820274378665e-05, + "loss": 0.5305, + "step": 2306 + }, + { + "epoch": 0.39, + "learning_rate": 1.946822795809901e-05, + "loss": 0.5208, + "step": 2307 + }, + { + "epoch": 0.39, + "learning_rate": 1.94676353207791e-05, + "loss": 0.5261, + "step": 2308 + }, + { + "epoch": 0.39, + "learning_rate": 1.946704236243904e-05, + "loss": 0.475, + "step": 2309 + }, + { + "epoch": 0.39, + "learning_rate": 1.9466449083098927e-05, + "loss": 0.5146, + "step": 2310 + }, + { + "epoch": 0.39, + "learning_rate": 1.9465855482778883e-05, + "loss": 0.5412, + "step": 2311 + }, + { + "epoch": 0.39, + "learning_rate": 1.9465261561499037e-05, + "loss": 0.586, + "step": 2312 + }, + { + "epoch": 0.39, + "learning_rate": 1.9464667319279518e-05, + "loss": 0.511, + "step": 2313 + }, + { + "epoch": 0.39, + "learning_rate": 1.9464072756140487e-05, + "loss": 0.5471, + "step": 2314 + }, + { + "epoch": 0.39, + "learning_rate": 1.9463477872102095e-05, + "loss": 0.5166, + "step": 2315 + }, + { + "epoch": 0.39, + "learning_rate": 1.9462882667184515e-05, + "loss": 0.4948, + "step": 2316 + }, + { + "epoch": 0.4, + "learning_rate": 1.946228714140793e-05, + "loss": 0.533, + "step": 2317 + }, + { + "epoch": 0.4, + "learning_rate": 1.946169129479253e-05, + "loss": 0.5081, + "step": 2318 + }, + { + "epoch": 0.4, + "learning_rate": 1.946109512735852e-05, + "loss": 0.5631, + "step": 2319 + }, + { + "epoch": 0.4, + "learning_rate": 1.9460498639126117e-05, + "loss": 0.4786, + "step": 2320 + }, + { + "epoch": 0.4, + "learning_rate": 1.9459901830115547e-05, + "loss": 0.5187, + "step": 2321 + }, + { + "epoch": 0.4, + "learning_rate": 1.9459304700347037e-05, + "loss": 0.5125, + "step": 2322 + }, + { + "epoch": 0.4, + "learning_rate": 1.945870724984084e-05, + "loss": 0.4995, + "step": 2323 + }, + { + "epoch": 0.4, + "learning_rate": 1.9458109478617218e-05, + "loss": 0.5274, + "step": 2324 + }, + { + "epoch": 0.4, + "learning_rate": 1.945751138669643e-05, + "loss": 0.5487, + "step": 2325 + }, + { + "epoch": 0.4, + "learning_rate": 1.9456912974098765e-05, + "loss": 0.5428, + "step": 2326 + }, + { + "epoch": 0.4, + "learning_rate": 1.945631424084451e-05, + "loss": 0.5171, + "step": 2327 + }, + { + "epoch": 0.4, + "learning_rate": 1.945571518695396e-05, + "loss": 0.5412, + "step": 2328 + }, + { + "epoch": 0.4, + "learning_rate": 1.9455115812447437e-05, + "loss": 0.5043, + "step": 2329 + }, + { + "epoch": 0.4, + "learning_rate": 1.945451611734526e-05, + "loss": 0.5291, + "step": 2330 + }, + { + "epoch": 0.4, + "learning_rate": 1.9453916101667763e-05, + "loss": 0.5041, + "step": 2331 + }, + { + "epoch": 0.4, + "learning_rate": 1.9453315765435285e-05, + "loss": 0.5203, + "step": 2332 + }, + { + "epoch": 0.4, + "learning_rate": 1.9452715108668193e-05, + "loss": 0.5519, + "step": 2333 + }, + { + "epoch": 0.4, + "learning_rate": 1.945211413138685e-05, + "loss": 0.5126, + "step": 2334 + }, + { + "epoch": 0.4, + "learning_rate": 1.9451512833611623e-05, + "loss": 0.5725, + "step": 2335 + }, + { + "epoch": 0.4, + "learning_rate": 1.9450911215362915e-05, + "loss": 0.5669, + "step": 2336 + }, + { + "epoch": 0.4, + "learning_rate": 1.9450309276661117e-05, + "loss": 0.5196, + "step": 2337 + }, + { + "epoch": 0.4, + "learning_rate": 1.9449707017526637e-05, + "loss": 0.5148, + "step": 2338 + }, + { + "epoch": 0.4, + "learning_rate": 1.9449104437979908e-05, + "loss": 0.5063, + "step": 2339 + }, + { + "epoch": 0.4, + "learning_rate": 1.9448501538041343e-05, + "loss": 0.5215, + "step": 2340 + }, + { + "epoch": 0.4, + "learning_rate": 1.94478983177314e-05, + "loss": 0.4934, + "step": 2341 + }, + { + "epoch": 0.4, + "learning_rate": 1.944729477707053e-05, + "loss": 0.5093, + "step": 2342 + }, + { + "epoch": 0.4, + "learning_rate": 1.944669091607919e-05, + "loss": 0.552, + "step": 2343 + }, + { + "epoch": 0.4, + "learning_rate": 1.944608673477786e-05, + "loss": 0.5187, + "step": 2344 + }, + { + "epoch": 0.4, + "learning_rate": 1.9445482233187028e-05, + "loss": 0.5386, + "step": 2345 + }, + { + "epoch": 0.4, + "learning_rate": 1.9444877411327186e-05, + "loss": 0.5313, + "step": 2346 + }, + { + "epoch": 0.4, + "learning_rate": 1.9444272269218847e-05, + "loss": 0.5556, + "step": 2347 + }, + { + "epoch": 0.4, + "learning_rate": 1.9443666806882527e-05, + "loss": 0.5413, + "step": 2348 + }, + { + "epoch": 0.4, + "learning_rate": 1.9443061024338753e-05, + "loss": 0.5333, + "step": 2349 + }, + { + "epoch": 0.4, + "learning_rate": 1.944245492160807e-05, + "loss": 0.5085, + "step": 2350 + }, + { + "epoch": 0.4, + "learning_rate": 1.944184849871103e-05, + "loss": 0.5428, + "step": 2351 + }, + { + "epoch": 0.4, + "learning_rate": 1.9441241755668184e-05, + "loss": 0.543, + "step": 2352 + }, + { + "epoch": 0.4, + "learning_rate": 1.944063469250012e-05, + "loss": 0.5235, + "step": 2353 + }, + { + "epoch": 0.4, + "learning_rate": 1.9440027309227413e-05, + "loss": 0.5075, + "step": 2354 + }, + { + "epoch": 0.4, + "learning_rate": 1.9439419605870657e-05, + "loss": 0.5695, + "step": 2355 + }, + { + "epoch": 0.4, + "learning_rate": 1.9438811582450464e-05, + "loss": 0.5363, + "step": 2356 + }, + { + "epoch": 0.4, + "learning_rate": 1.9438203238987444e-05, + "loss": 0.5216, + "step": 2357 + }, + { + "epoch": 0.4, + "learning_rate": 1.9437594575502227e-05, + "loss": 0.5176, + "step": 2358 + }, + { + "epoch": 0.4, + "learning_rate": 1.9436985592015446e-05, + "loss": 0.5034, + "step": 2359 + }, + { + "epoch": 0.4, + "learning_rate": 1.943637628854776e-05, + "loss": 0.5824, + "step": 2360 + }, + { + "epoch": 0.4, + "learning_rate": 1.9435766665119823e-05, + "loss": 0.5968, + "step": 2361 + }, + { + "epoch": 0.4, + "learning_rate": 1.94351567217523e-05, + "loss": 0.5369, + "step": 2362 + }, + { + "epoch": 0.4, + "learning_rate": 1.9434546458465884e-05, + "loss": 0.4989, + "step": 2363 + }, + { + "epoch": 0.4, + "learning_rate": 1.943393587528126e-05, + "loss": 0.4884, + "step": 2364 + }, + { + "epoch": 0.4, + "learning_rate": 1.943332497221913e-05, + "loss": 0.5025, + "step": 2365 + }, + { + "epoch": 0.4, + "learning_rate": 1.943271374930021e-05, + "loss": 0.5285, + "step": 2366 + }, + { + "epoch": 0.4, + "learning_rate": 1.9432102206545225e-05, + "loss": 0.5021, + "step": 2367 + }, + { + "epoch": 0.4, + "learning_rate": 1.9431490343974914e-05, + "loss": 0.5112, + "step": 2368 + }, + { + "epoch": 0.4, + "learning_rate": 1.9430878161610018e-05, + "loss": 0.5033, + "step": 2369 + }, + { + "epoch": 0.4, + "learning_rate": 1.9430265659471294e-05, + "loss": 0.5794, + "step": 2370 + }, + { + "epoch": 0.4, + "learning_rate": 1.9429652837579517e-05, + "loss": 0.5314, + "step": 2371 + }, + { + "epoch": 0.4, + "learning_rate": 1.942903969595546e-05, + "loss": 0.522, + "step": 2372 + }, + { + "epoch": 0.4, + "learning_rate": 1.9428426234619917e-05, + "loss": 0.5358, + "step": 2373 + }, + { + "epoch": 0.4, + "learning_rate": 1.9427812453593682e-05, + "loss": 0.524, + "step": 2374 + }, + { + "epoch": 0.41, + "learning_rate": 1.9427198352897575e-05, + "loss": 0.523, + "step": 2375 + }, + { + "epoch": 0.41, + "learning_rate": 1.942658393255241e-05, + "loss": 0.525, + "step": 2376 + }, + { + "epoch": 0.41, + "learning_rate": 1.9425969192579028e-05, + "loss": 0.5463, + "step": 2377 + }, + { + "epoch": 0.41, + "learning_rate": 1.942535413299827e-05, + "loss": 0.5168, + "step": 2378 + }, + { + "epoch": 0.41, + "learning_rate": 1.9424738753830986e-05, + "loss": 0.5752, + "step": 2379 + }, + { + "epoch": 0.41, + "learning_rate": 1.942412305509805e-05, + "loss": 0.5176, + "step": 2380 + }, + { + "epoch": 0.41, + "learning_rate": 1.9423507036820333e-05, + "loss": 0.4994, + "step": 2381 + }, + { + "epoch": 0.41, + "learning_rate": 1.9422890699018723e-05, + "loss": 0.5294, + "step": 2382 + }, + { + "epoch": 0.41, + "learning_rate": 1.942227404171412e-05, + "loss": 0.5045, + "step": 2383 + }, + { + "epoch": 0.41, + "learning_rate": 1.9421657064927433e-05, + "loss": 0.5713, + "step": 2384 + }, + { + "epoch": 0.41, + "learning_rate": 1.9421039768679583e-05, + "loss": 0.5709, + "step": 2385 + }, + { + "epoch": 0.41, + "learning_rate": 1.94204221529915e-05, + "loss": 0.5334, + "step": 2386 + }, + { + "epoch": 0.41, + "learning_rate": 1.941980421788412e-05, + "loss": 0.5051, + "step": 2387 + }, + { + "epoch": 0.41, + "learning_rate": 1.9419185963378402e-05, + "loss": 0.5099, + "step": 2388 + }, + { + "epoch": 0.41, + "learning_rate": 1.9418567389495308e-05, + "loss": 0.5232, + "step": 2389 + }, + { + "epoch": 0.41, + "learning_rate": 1.9417948496255808e-05, + "loss": 0.4783, + "step": 2390 + }, + { + "epoch": 0.41, + "learning_rate": 1.9417329283680896e-05, + "loss": 0.532, + "step": 2391 + }, + { + "epoch": 0.41, + "learning_rate": 1.9416709751791554e-05, + "loss": 0.5237, + "step": 2392 + }, + { + "epoch": 0.41, + "learning_rate": 1.9416089900608806e-05, + "loss": 0.5544, + "step": 2393 + }, + { + "epoch": 0.41, + "learning_rate": 1.9415469730153652e-05, + "loss": 0.55, + "step": 2394 + }, + { + "epoch": 0.41, + "learning_rate": 1.941484924044713e-05, + "loss": 0.5563, + "step": 2395 + }, + { + "epoch": 0.41, + "learning_rate": 1.941422843151028e-05, + "loss": 0.5181, + "step": 2396 + }, + { + "epoch": 0.41, + "learning_rate": 1.9413607303364144e-05, + "loss": 0.5276, + "step": 2397 + }, + { + "epoch": 0.41, + "learning_rate": 1.941298585602979e-05, + "loss": 0.5015, + "step": 2398 + }, + { + "epoch": 0.41, + "learning_rate": 1.9412364089528288e-05, + "loss": 0.4741, + "step": 2399 + }, + { + "epoch": 0.41, + "learning_rate": 1.941174200388072e-05, + "loss": 0.5318, + "step": 2400 + }, + { + "epoch": 0.41, + "learning_rate": 1.9411119599108175e-05, + "loss": 0.5344, + "step": 2401 + }, + { + "epoch": 0.41, + "learning_rate": 1.941049687523176e-05, + "loss": 0.5232, + "step": 2402 + }, + { + "epoch": 0.41, + "learning_rate": 1.9409873832272593e-05, + "loss": 0.5352, + "step": 2403 + }, + { + "epoch": 0.41, + "learning_rate": 1.9409250470251795e-05, + "loss": 0.5445, + "step": 2404 + }, + { + "epoch": 0.41, + "learning_rate": 1.9408626789190504e-05, + "loss": 0.5103, + "step": 2405 + }, + { + "epoch": 0.41, + "learning_rate": 1.9408002789109866e-05, + "loss": 0.4969, + "step": 2406 + }, + { + "epoch": 0.41, + "learning_rate": 1.9407378470031044e-05, + "loss": 0.5073, + "step": 2407 + }, + { + "epoch": 0.41, + "learning_rate": 1.9406753831975202e-05, + "loss": 0.5201, + "step": 2408 + }, + { + "epoch": 0.41, + "learning_rate": 1.940612887496352e-05, + "loss": 0.5174, + "step": 2409 + }, + { + "epoch": 0.41, + "learning_rate": 1.9405503599017188e-05, + "loss": 0.5326, + "step": 2410 + }, + { + "epoch": 0.41, + "learning_rate": 1.940487800415741e-05, + "loss": 0.5119, + "step": 2411 + }, + { + "epoch": 0.41, + "learning_rate": 1.9404252090405397e-05, + "loss": 0.5128, + "step": 2412 + }, + { + "epoch": 0.41, + "learning_rate": 1.940362585778237e-05, + "loss": 0.554, + "step": 2413 + }, + { + "epoch": 0.41, + "learning_rate": 1.9402999306309568e-05, + "loss": 0.5153, + "step": 2414 + }, + { + "epoch": 0.41, + "learning_rate": 1.940237243600823e-05, + "loss": 0.543, + "step": 2415 + }, + { + "epoch": 0.41, + "learning_rate": 1.9401745246899612e-05, + "loss": 0.5485, + "step": 2416 + }, + { + "epoch": 0.41, + "learning_rate": 1.9401117739004984e-05, + "loss": 0.5073, + "step": 2417 + }, + { + "epoch": 0.41, + "learning_rate": 1.940048991234562e-05, + "loss": 0.5344, + "step": 2418 + }, + { + "epoch": 0.41, + "learning_rate": 1.9399861766942808e-05, + "loss": 0.4902, + "step": 2419 + }, + { + "epoch": 0.41, + "learning_rate": 1.9399233302817845e-05, + "loss": 0.4989, + "step": 2420 + }, + { + "epoch": 0.41, + "learning_rate": 1.9398604519992044e-05, + "loss": 0.5248, + "step": 2421 + }, + { + "epoch": 0.41, + "learning_rate": 1.9397975418486722e-05, + "loss": 0.5298, + "step": 2422 + }, + { + "epoch": 0.41, + "learning_rate": 1.9397345998323214e-05, + "loss": 0.5368, + "step": 2423 + }, + { + "epoch": 0.41, + "learning_rate": 1.939671625952286e-05, + "loss": 0.5323, + "step": 2424 + }, + { + "epoch": 0.41, + "learning_rate": 1.939608620210701e-05, + "loss": 0.529, + "step": 2425 + }, + { + "epoch": 0.41, + "learning_rate": 1.9395455826097028e-05, + "loss": 0.5446, + "step": 2426 + }, + { + "epoch": 0.41, + "learning_rate": 1.939482513151429e-05, + "loss": 0.5409, + "step": 2427 + }, + { + "epoch": 0.41, + "learning_rate": 1.939419411838018e-05, + "loss": 0.5394, + "step": 2428 + }, + { + "epoch": 0.41, + "learning_rate": 1.9393562786716096e-05, + "loss": 0.4906, + "step": 2429 + }, + { + "epoch": 0.41, + "learning_rate": 1.9392931136543446e-05, + "loss": 0.5547, + "step": 2430 + }, + { + "epoch": 0.41, + "learning_rate": 1.939229916788364e-05, + "loss": 0.488, + "step": 2431 + }, + { + "epoch": 0.41, + "learning_rate": 1.9391666880758113e-05, + "loss": 0.4795, + "step": 2432 + }, + { + "epoch": 0.41, + "learning_rate": 1.93910342751883e-05, + "loss": 0.5134, + "step": 2433 + }, + { + "epoch": 0.42, + "learning_rate": 1.9390401351195655e-05, + "loss": 0.5144, + "step": 2434 + }, + { + "epoch": 0.42, + "learning_rate": 1.938976810880163e-05, + "loss": 0.5104, + "step": 2435 + }, + { + "epoch": 0.42, + "learning_rate": 1.938913454802771e-05, + "loss": 0.5137, + "step": 2436 + }, + { + "epoch": 0.42, + "learning_rate": 1.9388500668895368e-05, + "loss": 0.5204, + "step": 2437 + }, + { + "epoch": 0.42, + "learning_rate": 1.93878664714261e-05, + "loss": 0.4855, + "step": 2438 + }, + { + "epoch": 0.42, + "learning_rate": 1.9387231955641403e-05, + "loss": 0.4869, + "step": 2439 + }, + { + "epoch": 0.42, + "learning_rate": 1.93865971215628e-05, + "loss": 0.5242, + "step": 2440 + }, + { + "epoch": 0.42, + "learning_rate": 1.9385961969211816e-05, + "loss": 0.5297, + "step": 2441 + }, + { + "epoch": 0.42, + "learning_rate": 1.9385326498609983e-05, + "loss": 0.5453, + "step": 2442 + }, + { + "epoch": 0.42, + "learning_rate": 1.938469070977885e-05, + "loss": 0.4949, + "step": 2443 + }, + { + "epoch": 0.42, + "learning_rate": 1.9384054602739976e-05, + "loss": 0.5154, + "step": 2444 + }, + { + "epoch": 0.42, + "learning_rate": 1.9383418177514928e-05, + "loss": 0.5445, + "step": 2445 + }, + { + "epoch": 0.42, + "learning_rate": 1.9382781434125285e-05, + "loss": 0.5453, + "step": 2446 + }, + { + "epoch": 0.42, + "learning_rate": 1.9382144372592635e-05, + "loss": 0.5381, + "step": 2447 + }, + { + "epoch": 0.42, + "learning_rate": 1.9381506992938584e-05, + "loss": 0.4827, + "step": 2448 + }, + { + "epoch": 0.42, + "learning_rate": 1.9380869295184742e-05, + "loss": 0.5363, + "step": 2449 + }, + { + "epoch": 0.42, + "learning_rate": 1.938023127935273e-05, + "loss": 0.4838, + "step": 2450 + }, + { + "epoch": 0.42, + "learning_rate": 1.937959294546418e-05, + "loss": 0.5415, + "step": 2451 + }, + { + "epoch": 0.42, + "learning_rate": 1.9378954293540738e-05, + "loss": 0.5898, + "step": 2452 + }, + { + "epoch": 0.42, + "learning_rate": 1.9378315323604063e-05, + "loss": 0.5646, + "step": 2453 + }, + { + "epoch": 0.42, + "learning_rate": 1.9377676035675815e-05, + "loss": 0.5392, + "step": 2454 + }, + { + "epoch": 0.42, + "learning_rate": 1.9377036429777673e-05, + "loss": 0.525, + "step": 2455 + }, + { + "epoch": 0.42, + "learning_rate": 1.9376396505931318e-05, + "loss": 0.5198, + "step": 2456 + }, + { + "epoch": 0.42, + "learning_rate": 1.9375756264158458e-05, + "loss": 0.5209, + "step": 2457 + }, + { + "epoch": 0.42, + "learning_rate": 1.9375115704480793e-05, + "loss": 0.5341, + "step": 2458 + }, + { + "epoch": 0.42, + "learning_rate": 1.937447482692005e-05, + "loss": 0.5513, + "step": 2459 + }, + { + "epoch": 0.42, + "learning_rate": 1.937383363149795e-05, + "loss": 0.4861, + "step": 2460 + }, + { + "epoch": 0.42, + "learning_rate": 1.9373192118236246e-05, + "loss": 0.5303, + "step": 2461 + }, + { + "epoch": 0.42, + "learning_rate": 1.937255028715668e-05, + "loss": 0.5563, + "step": 2462 + }, + { + "epoch": 0.42, + "learning_rate": 1.937190813828102e-05, + "loss": 0.5234, + "step": 2463 + }, + { + "epoch": 0.42, + "learning_rate": 1.9371265671631038e-05, + "loss": 0.54, + "step": 2464 + }, + { + "epoch": 0.42, + "learning_rate": 1.9370622887228517e-05, + "loss": 0.5771, + "step": 2465 + }, + { + "epoch": 0.42, + "learning_rate": 1.936997978509525e-05, + "loss": 0.5362, + "step": 2466 + }, + { + "epoch": 0.42, + "learning_rate": 1.9369336365253048e-05, + "loss": 0.4702, + "step": 2467 + }, + { + "epoch": 0.42, + "learning_rate": 1.9368692627723728e-05, + "loss": 0.5242, + "step": 2468 + }, + { + "epoch": 0.42, + "learning_rate": 1.936804857252911e-05, + "loss": 0.5164, + "step": 2469 + }, + { + "epoch": 0.42, + "learning_rate": 1.9367404199691034e-05, + "loss": 0.4974, + "step": 2470 + }, + { + "epoch": 0.42, + "learning_rate": 1.9366759509231356e-05, + "loss": 0.4866, + "step": 2471 + }, + { + "epoch": 0.42, + "learning_rate": 1.936611450117193e-05, + "loss": 0.5055, + "step": 2472 + }, + { + "epoch": 0.42, + "learning_rate": 1.9365469175534627e-05, + "loss": 0.5177, + "step": 2473 + }, + { + "epoch": 0.42, + "learning_rate": 1.936482353234133e-05, + "loss": 0.5305, + "step": 2474 + }, + { + "epoch": 0.42, + "learning_rate": 1.9364177571613927e-05, + "loss": 0.5253, + "step": 2475 + }, + { + "epoch": 0.42, + "learning_rate": 1.9363531293374323e-05, + "loss": 0.5689, + "step": 2476 + }, + { + "epoch": 0.42, + "learning_rate": 1.936288469764443e-05, + "loss": 0.5147, + "step": 2477 + }, + { + "epoch": 0.42, + "learning_rate": 1.9362237784446174e-05, + "loss": 0.5289, + "step": 2478 + }, + { + "epoch": 0.42, + "learning_rate": 1.936159055380149e-05, + "loss": 0.5274, + "step": 2479 + }, + { + "epoch": 0.42, + "learning_rate": 1.9360943005732323e-05, + "loss": 0.5131, + "step": 2480 + }, + { + "epoch": 0.42, + "learning_rate": 1.936029514026063e-05, + "loss": 0.5097, + "step": 2481 + }, + { + "epoch": 0.42, + "learning_rate": 1.9359646957408377e-05, + "loss": 0.5492, + "step": 2482 + }, + { + "epoch": 0.42, + "learning_rate": 1.9358998457197543e-05, + "loss": 0.5439, + "step": 2483 + }, + { + "epoch": 0.42, + "learning_rate": 1.9358349639650118e-05, + "loss": 0.5187, + "step": 2484 + }, + { + "epoch": 0.42, + "learning_rate": 1.93577005047881e-05, + "loss": 0.5175, + "step": 2485 + }, + { + "epoch": 0.42, + "learning_rate": 1.9357051052633496e-05, + "loss": 0.4813, + "step": 2486 + }, + { + "epoch": 0.42, + "learning_rate": 1.9356401283208335e-05, + "loss": 0.568, + "step": 2487 + }, + { + "epoch": 0.42, + "learning_rate": 1.9355751196534644e-05, + "loss": 0.5287, + "step": 2488 + }, + { + "epoch": 0.42, + "learning_rate": 1.9355100792634466e-05, + "loss": 0.5493, + "step": 2489 + }, + { + "epoch": 0.42, + "learning_rate": 1.935445007152985e-05, + "loss": 0.4892, + "step": 2490 + }, + { + "epoch": 0.42, + "learning_rate": 1.9353799033242867e-05, + "loss": 0.5998, + "step": 2491 + }, + { + "epoch": 0.43, + "learning_rate": 1.9353147677795586e-05, + "loss": 0.4963, + "step": 2492 + }, + { + "epoch": 0.43, + "learning_rate": 1.93524960052101e-05, + "loss": 0.5333, + "step": 2493 + }, + { + "epoch": 0.43, + "learning_rate": 1.93518440155085e-05, + "loss": 0.5674, + "step": 2494 + }, + { + "epoch": 0.43, + "learning_rate": 1.9351191708712895e-05, + "loss": 0.4952, + "step": 2495 + }, + { + "epoch": 0.43, + "learning_rate": 1.9350539084845397e-05, + "loss": 0.5553, + "step": 2496 + }, + { + "epoch": 0.43, + "learning_rate": 1.9349886143928144e-05, + "loss": 0.5231, + "step": 2497 + }, + { + "epoch": 0.43, + "learning_rate": 1.9349232885983268e-05, + "loss": 0.5186, + "step": 2498 + }, + { + "epoch": 0.43, + "learning_rate": 1.9348579311032923e-05, + "loss": 0.4938, + "step": 2499 + }, + { + "epoch": 0.43, + "learning_rate": 1.934792541909927e-05, + "loss": 0.4985, + "step": 2500 + }, + { + "epoch": 0.43, + "learning_rate": 1.9347271210204478e-05, + "loss": 0.5243, + "step": 2501 + }, + { + "epoch": 0.43, + "learning_rate": 1.934661668437073e-05, + "loss": 0.5318, + "step": 2502 + }, + { + "epoch": 0.43, + "learning_rate": 1.934596184162022e-05, + "loss": 0.5269, + "step": 2503 + }, + { + "epoch": 0.43, + "learning_rate": 1.9345306681975156e-05, + "loss": 0.5071, + "step": 2504 + }, + { + "epoch": 0.43, + "learning_rate": 1.9344651205457745e-05, + "loss": 0.55, + "step": 2505 + }, + { + "epoch": 0.43, + "learning_rate": 1.9343995412090215e-05, + "loss": 0.5393, + "step": 2506 + }, + { + "epoch": 0.43, + "learning_rate": 1.9343339301894802e-05, + "loss": 0.4802, + "step": 2507 + }, + { + "epoch": 0.43, + "learning_rate": 1.9342682874893756e-05, + "loss": 0.5375, + "step": 2508 + }, + { + "epoch": 0.43, + "learning_rate": 1.934202613110933e-05, + "loss": 0.5429, + "step": 2509 + }, + { + "epoch": 0.43, + "learning_rate": 1.934136907056379e-05, + "loss": 0.5345, + "step": 2510 + }, + { + "epoch": 0.43, + "learning_rate": 1.9340711693279424e-05, + "loss": 0.554, + "step": 2511 + }, + { + "epoch": 0.43, + "learning_rate": 1.934005399927852e-05, + "loss": 0.5241, + "step": 2512 + }, + { + "epoch": 0.43, + "learning_rate": 1.933939598858337e-05, + "loss": 0.5335, + "step": 2513 + }, + { + "epoch": 0.43, + "learning_rate": 1.933873766121629e-05, + "loss": 0.5334, + "step": 2514 + }, + { + "epoch": 0.43, + "learning_rate": 1.9338079017199603e-05, + "loss": 0.4771, + "step": 2515 + }, + { + "epoch": 0.43, + "learning_rate": 1.9337420056555648e-05, + "loss": 0.5641, + "step": 2516 + }, + { + "epoch": 0.43, + "learning_rate": 1.9336760779306756e-05, + "loss": 0.4908, + "step": 2517 + }, + { + "epoch": 0.43, + "learning_rate": 1.9336101185475288e-05, + "loss": 0.551, + "step": 2518 + }, + { + "epoch": 0.43, + "learning_rate": 1.933544127508361e-05, + "loss": 0.4948, + "step": 2519 + }, + { + "epoch": 0.43, + "learning_rate": 1.933478104815409e-05, + "loss": 0.5068, + "step": 2520 + }, + { + "epoch": 0.43, + "learning_rate": 1.9334120504709125e-05, + "loss": 0.5068, + "step": 2521 + }, + { + "epoch": 0.43, + "learning_rate": 1.9333459644771103e-05, + "loss": 0.5466, + "step": 2522 + }, + { + "epoch": 0.43, + "learning_rate": 1.933279846836244e-05, + "loss": 0.5074, + "step": 2523 + }, + { + "epoch": 0.43, + "learning_rate": 1.9332136975505547e-05, + "loss": 0.5253, + "step": 2524 + }, + { + "epoch": 0.43, + "learning_rate": 1.9331475166222858e-05, + "loss": 0.5476, + "step": 2525 + }, + { + "epoch": 0.43, + "learning_rate": 1.9330813040536815e-05, + "loss": 0.574, + "step": 2526 + }, + { + "epoch": 0.43, + "learning_rate": 1.9330150598469865e-05, + "loss": 0.5204, + "step": 2527 + }, + { + "epoch": 0.43, + "learning_rate": 1.9329487840044468e-05, + "loss": 0.5048, + "step": 2528 + }, + { + "epoch": 0.43, + "learning_rate": 1.93288247652831e-05, + "loss": 0.4963, + "step": 2529 + }, + { + "epoch": 0.43, + "learning_rate": 1.9328161374208242e-05, + "loss": 0.5396, + "step": 2530 + }, + { + "epoch": 0.43, + "learning_rate": 1.9327497666842384e-05, + "loss": 0.5255, + "step": 2531 + }, + { + "epoch": 0.43, + "learning_rate": 1.9326833643208038e-05, + "loss": 0.5237, + "step": 2532 + }, + { + "epoch": 0.43, + "learning_rate": 1.9326169303327716e-05, + "loss": 0.5345, + "step": 2533 + }, + { + "epoch": 0.43, + "learning_rate": 1.932550464722394e-05, + "loss": 0.5247, + "step": 2534 + }, + { + "epoch": 0.43, + "learning_rate": 1.9324839674919252e-05, + "loss": 0.4995, + "step": 2535 + }, + { + "epoch": 0.43, + "learning_rate": 1.93241743864362e-05, + "loss": 0.5297, + "step": 2536 + }, + { + "epoch": 0.43, + "learning_rate": 1.9323508781797337e-05, + "loss": 0.5212, + "step": 2537 + }, + { + "epoch": 0.43, + "learning_rate": 1.9322842861025233e-05, + "loss": 0.5256, + "step": 2538 + }, + { + "epoch": 0.43, + "learning_rate": 1.932217662414247e-05, + "loss": 0.5823, + "step": 2539 + }, + { + "epoch": 0.43, + "learning_rate": 1.9321510071171637e-05, + "loss": 0.5379, + "step": 2540 + }, + { + "epoch": 0.43, + "learning_rate": 1.9320843202135333e-05, + "loss": 0.5613, + "step": 2541 + }, + { + "epoch": 0.43, + "learning_rate": 1.932017601705617e-05, + "loss": 0.5917, + "step": 2542 + }, + { + "epoch": 0.43, + "learning_rate": 1.9319508515956773e-05, + "loss": 0.5467, + "step": 2543 + }, + { + "epoch": 0.43, + "learning_rate": 1.9318840698859774e-05, + "loss": 0.5535, + "step": 2544 + }, + { + "epoch": 0.43, + "learning_rate": 1.9318172565787815e-05, + "loss": 0.5523, + "step": 2545 + }, + { + "epoch": 0.43, + "learning_rate": 1.931750411676355e-05, + "loss": 0.5047, + "step": 2546 + }, + { + "epoch": 0.43, + "learning_rate": 1.9316835351809646e-05, + "loss": 0.525, + "step": 2547 + }, + { + "epoch": 0.43, + "learning_rate": 1.931616627094878e-05, + "loss": 0.5246, + "step": 2548 + }, + { + "epoch": 0.43, + "learning_rate": 1.9315496874203637e-05, + "loss": 0.4945, + "step": 2549 + }, + { + "epoch": 0.43, + "learning_rate": 1.9314827161596916e-05, + "loss": 0.5333, + "step": 2550 + }, + { + "epoch": 0.44, + "learning_rate": 1.9314157133151324e-05, + "loss": 0.5079, + "step": 2551 + }, + { + "epoch": 0.44, + "learning_rate": 1.931348678888958e-05, + "loss": 0.5225, + "step": 2552 + }, + { + "epoch": 0.44, + "learning_rate": 1.931281612883441e-05, + "loss": 0.4926, + "step": 2553 + }, + { + "epoch": 0.44, + "learning_rate": 1.9312145153008558e-05, + "loss": 0.5246, + "step": 2554 + }, + { + "epoch": 0.44, + "learning_rate": 1.931147386143477e-05, + "loss": 0.5477, + "step": 2555 + }, + { + "epoch": 0.44, + "learning_rate": 1.9310802254135817e-05, + "loss": 0.4738, + "step": 2556 + }, + { + "epoch": 0.44, + "learning_rate": 1.9310130331134465e-05, + "loss": 0.5318, + "step": 2557 + }, + { + "epoch": 0.44, + "learning_rate": 1.9309458092453494e-05, + "loss": 0.5648, + "step": 2558 + }, + { + "epoch": 0.44, + "learning_rate": 1.9308785538115704e-05, + "loss": 0.539, + "step": 2559 + }, + { + "epoch": 0.44, + "learning_rate": 1.9308112668143894e-05, + "loss": 0.4914, + "step": 2560 + }, + { + "epoch": 0.44, + "learning_rate": 1.9307439482560885e-05, + "loss": 0.5517, + "step": 2561 + }, + { + "epoch": 0.44, + "learning_rate": 1.9306765981389497e-05, + "loss": 0.5713, + "step": 2562 + }, + { + "epoch": 0.44, + "learning_rate": 1.930609216465257e-05, + "loss": 0.5798, + "step": 2563 + }, + { + "epoch": 0.44, + "learning_rate": 1.9305418032372948e-05, + "loss": 0.5322, + "step": 2564 + }, + { + "epoch": 0.44, + "learning_rate": 1.9304743584573496e-05, + "loss": 0.5183, + "step": 2565 + }, + { + "epoch": 0.44, + "learning_rate": 1.9304068821277074e-05, + "loss": 0.5211, + "step": 2566 + }, + { + "epoch": 0.44, + "learning_rate": 1.9303393742506564e-05, + "loss": 0.5236, + "step": 2567 + }, + { + "epoch": 0.44, + "learning_rate": 1.930271834828486e-05, + "loss": 0.5106, + "step": 2568 + }, + { + "epoch": 0.44, + "learning_rate": 1.9302042638634857e-05, + "loss": 0.5148, + "step": 2569 + }, + { + "epoch": 0.44, + "learning_rate": 1.9301366613579467e-05, + "loss": 0.5995, + "step": 2570 + }, + { + "epoch": 0.44, + "learning_rate": 1.930069027314162e-05, + "loss": 0.5057, + "step": 2571 + }, + { + "epoch": 0.44, + "learning_rate": 1.9300013617344236e-05, + "loss": 0.5219, + "step": 2572 + }, + { + "epoch": 0.44, + "learning_rate": 1.929933664621027e-05, + "loss": 0.4881, + "step": 2573 + }, + { + "epoch": 0.44, + "learning_rate": 1.929865935976267e-05, + "loss": 0.5179, + "step": 2574 + }, + { + "epoch": 0.44, + "learning_rate": 1.92979817580244e-05, + "loss": 0.4952, + "step": 2575 + }, + { + "epoch": 0.44, + "learning_rate": 1.9297303841018438e-05, + "loss": 0.574, + "step": 2576 + }, + { + "epoch": 0.44, + "learning_rate": 1.929662560876777e-05, + "loss": 0.5577, + "step": 2577 + }, + { + "epoch": 0.44, + "learning_rate": 1.929594706129539e-05, + "loss": 0.5415, + "step": 2578 + }, + { + "epoch": 0.44, + "learning_rate": 1.9295268198624314e-05, + "loss": 0.5593, + "step": 2579 + }, + { + "epoch": 0.44, + "learning_rate": 1.9294589020777552e-05, + "loss": 0.5397, + "step": 2580 + }, + { + "epoch": 0.44, + "learning_rate": 1.9293909527778135e-05, + "loss": 0.5012, + "step": 2581 + }, + { + "epoch": 0.44, + "learning_rate": 1.9293229719649105e-05, + "loss": 0.5182, + "step": 2582 + }, + { + "epoch": 0.44, + "learning_rate": 1.929254959641351e-05, + "loss": 0.521, + "step": 2583 + }, + { + "epoch": 0.44, + "learning_rate": 1.929186915809441e-05, + "loss": 0.5384, + "step": 2584 + }, + { + "epoch": 0.44, + "learning_rate": 1.9291188404714876e-05, + "loss": 0.5145, + "step": 2585 + }, + { + "epoch": 0.44, + "learning_rate": 1.9290507336297996e-05, + "loss": 0.5123, + "step": 2586 + }, + { + "epoch": 0.44, + "learning_rate": 1.928982595286686e-05, + "loss": 0.5488, + "step": 2587 + }, + { + "epoch": 0.44, + "learning_rate": 1.928914425444457e-05, + "loss": 0.5404, + "step": 2588 + }, + { + "epoch": 0.44, + "learning_rate": 1.9288462241054243e-05, + "loss": 0.5444, + "step": 2589 + }, + { + "epoch": 0.44, + "learning_rate": 1.9287779912719e-05, + "loss": 0.5316, + "step": 2590 + }, + { + "epoch": 0.44, + "learning_rate": 1.928709726946198e-05, + "loss": 0.5457, + "step": 2591 + }, + { + "epoch": 0.44, + "learning_rate": 1.9286414311306333e-05, + "loss": 0.5409, + "step": 2592 + }, + { + "epoch": 0.44, + "learning_rate": 1.928573103827521e-05, + "loss": 0.4802, + "step": 2593 + }, + { + "epoch": 0.44, + "learning_rate": 1.928504745039178e-05, + "loss": 0.526, + "step": 2594 + }, + { + "epoch": 0.44, + "learning_rate": 1.9284363547679225e-05, + "loss": 0.5177, + "step": 2595 + }, + { + "epoch": 0.44, + "learning_rate": 1.9283679330160726e-05, + "loss": 0.5274, + "step": 2596 + }, + { + "epoch": 0.44, + "learning_rate": 1.9282994797859495e-05, + "loss": 0.4965, + "step": 2597 + }, + { + "epoch": 0.44, + "learning_rate": 1.9282309950798733e-05, + "loss": 0.4993, + "step": 2598 + }, + { + "epoch": 0.44, + "learning_rate": 1.9281624789001664e-05, + "loss": 0.5311, + "step": 2599 + }, + { + "epoch": 0.44, + "learning_rate": 1.9280939312491522e-05, + "loss": 0.5538, + "step": 2600 + }, + { + "epoch": 0.44, + "learning_rate": 1.9280253521291547e-05, + "loss": 0.5297, + "step": 2601 + }, + { + "epoch": 0.44, + "learning_rate": 1.9279567415424992e-05, + "loss": 0.5468, + "step": 2602 + }, + { + "epoch": 0.44, + "learning_rate": 1.9278880994915123e-05, + "loss": 0.5136, + "step": 2603 + }, + { + "epoch": 0.44, + "learning_rate": 1.9278194259785213e-05, + "loss": 0.5423, + "step": 2604 + }, + { + "epoch": 0.44, + "learning_rate": 1.9277507210058545e-05, + "loss": 0.5035, + "step": 2605 + }, + { + "epoch": 0.44, + "learning_rate": 1.927681984575842e-05, + "loss": 0.5595, + "step": 2606 + }, + { + "epoch": 0.44, + "learning_rate": 1.927613216690814e-05, + "loss": 0.5623, + "step": 2607 + }, + { + "epoch": 0.44, + "learning_rate": 1.9275444173531025e-05, + "loss": 0.5201, + "step": 2608 + }, + { + "epoch": 0.44, + "learning_rate": 1.9274755865650404e-05, + "loss": 0.5095, + "step": 2609 + }, + { + "epoch": 0.45, + "learning_rate": 1.9274067243289613e-05, + "loss": 0.4809, + "step": 2610 + }, + { + "epoch": 0.45, + "learning_rate": 1.9273378306471998e-05, + "loss": 0.4789, + "step": 2611 + }, + { + "epoch": 0.45, + "learning_rate": 1.9272689055220923e-05, + "loss": 0.5182, + "step": 2612 + }, + { + "epoch": 0.45, + "learning_rate": 1.927199948955976e-05, + "loss": 0.4893, + "step": 2613 + }, + { + "epoch": 0.45, + "learning_rate": 1.927130960951189e-05, + "loss": 0.5211, + "step": 2614 + }, + { + "epoch": 0.45, + "learning_rate": 1.92706194151007e-05, + "loss": 0.4955, + "step": 2615 + }, + { + "epoch": 0.45, + "learning_rate": 1.9269928906349594e-05, + "loss": 0.5181, + "step": 2616 + }, + { + "epoch": 0.45, + "learning_rate": 1.9269238083281993e-05, + "loss": 0.4914, + "step": 2617 + }, + { + "epoch": 0.45, + "learning_rate": 1.926854694592131e-05, + "loss": 0.4953, + "step": 2618 + }, + { + "epoch": 0.45, + "learning_rate": 1.9267855494290985e-05, + "loss": 0.5459, + "step": 2619 + }, + { + "epoch": 0.45, + "learning_rate": 1.9267163728414463e-05, + "loss": 0.513, + "step": 2620 + }, + { + "epoch": 0.45, + "learning_rate": 1.9266471648315196e-05, + "loss": 0.5373, + "step": 2621 + }, + { + "epoch": 0.45, + "learning_rate": 1.9265779254016653e-05, + "loss": 0.524, + "step": 2622 + }, + { + "epoch": 0.45, + "learning_rate": 1.9265086545542315e-05, + "loss": 0.5503, + "step": 2623 + }, + { + "epoch": 0.45, + "learning_rate": 1.9264393522915667e-05, + "loss": 0.5085, + "step": 2624 + }, + { + "epoch": 0.45, + "learning_rate": 1.92637001861602e-05, + "loss": 0.5448, + "step": 2625 + }, + { + "epoch": 0.45, + "learning_rate": 1.9263006535299437e-05, + "loss": 0.5673, + "step": 2626 + }, + { + "epoch": 0.45, + "learning_rate": 1.9262312570356885e-05, + "loss": 0.5044, + "step": 2627 + }, + { + "epoch": 0.45, + "learning_rate": 1.9261618291356082e-05, + "loss": 0.4998, + "step": 2628 + }, + { + "epoch": 0.45, + "learning_rate": 1.9260923698320565e-05, + "loss": 0.508, + "step": 2629 + }, + { + "epoch": 0.45, + "learning_rate": 1.926022879127389e-05, + "loss": 0.5531, + "step": 2630 + }, + { + "epoch": 0.45, + "learning_rate": 1.9259533570239613e-05, + "loss": 0.5299, + "step": 2631 + }, + { + "epoch": 0.45, + "learning_rate": 1.925883803524131e-05, + "loss": 0.4581, + "step": 2632 + }, + { + "epoch": 0.45, + "learning_rate": 1.9258142186302572e-05, + "loss": 0.5461, + "step": 2633 + }, + { + "epoch": 0.45, + "learning_rate": 1.9257446023446982e-05, + "loss": 0.5628, + "step": 2634 + }, + { + "epoch": 0.45, + "learning_rate": 1.9256749546698146e-05, + "loss": 0.5432, + "step": 2635 + }, + { + "epoch": 0.45, + "learning_rate": 1.9256052756079687e-05, + "loss": 0.5191, + "step": 2636 + }, + { + "epoch": 0.45, + "learning_rate": 1.9255355651615225e-05, + "loss": 0.5289, + "step": 2637 + }, + { + "epoch": 0.45, + "learning_rate": 1.92546582333284e-05, + "loss": 0.5734, + "step": 2638 + }, + { + "epoch": 0.45, + "learning_rate": 1.925396050124286e-05, + "loss": 0.5376, + "step": 2639 + }, + { + "epoch": 0.45, + "learning_rate": 1.9253262455382256e-05, + "loss": 0.4909, + "step": 2640 + }, + { + "epoch": 0.45, + "learning_rate": 1.9252564095770266e-05, + "loss": 0.5197, + "step": 2641 + }, + { + "epoch": 0.45, + "learning_rate": 1.9251865422430563e-05, + "loss": 0.5152, + "step": 2642 + }, + { + "epoch": 0.45, + "learning_rate": 1.9251166435386837e-05, + "loss": 0.484, + "step": 2643 + }, + { + "epoch": 0.45, + "learning_rate": 1.9250467134662797e-05, + "loss": 0.4972, + "step": 2644 + }, + { + "epoch": 0.45, + "learning_rate": 1.9249767520282144e-05, + "loss": 0.5042, + "step": 2645 + }, + { + "epoch": 0.45, + "learning_rate": 1.9249067592268606e-05, + "loss": 0.5158, + "step": 2646 + }, + { + "epoch": 0.45, + "learning_rate": 1.924836735064591e-05, + "loss": 0.4993, + "step": 2647 + }, + { + "epoch": 0.45, + "learning_rate": 1.9247666795437808e-05, + "loss": 0.5039, + "step": 2648 + }, + { + "epoch": 0.45, + "learning_rate": 1.9246965926668046e-05, + "loss": 0.5002, + "step": 2649 + }, + { + "epoch": 0.45, + "learning_rate": 1.924626474436039e-05, + "loss": 0.5353, + "step": 2650 + }, + { + "epoch": 0.45, + "learning_rate": 1.9245563248538616e-05, + "loss": 0.5295, + "step": 2651 + }, + { + "epoch": 0.45, + "learning_rate": 1.924486143922651e-05, + "loss": 0.5147, + "step": 2652 + }, + { + "epoch": 0.45, + "learning_rate": 1.924415931644787e-05, + "loss": 0.4848, + "step": 2653 + }, + { + "epoch": 0.45, + "learning_rate": 1.9243456880226497e-05, + "loss": 0.5253, + "step": 2654 + }, + { + "epoch": 0.45, + "learning_rate": 1.9242754130586218e-05, + "loss": 0.5297, + "step": 2655 + }, + { + "epoch": 0.45, + "learning_rate": 1.924205106755085e-05, + "loss": 0.5215, + "step": 2656 + }, + { + "epoch": 0.45, + "learning_rate": 1.9241347691144245e-05, + "loss": 0.527, + "step": 2657 + }, + { + "epoch": 0.45, + "learning_rate": 1.924064400139024e-05, + "loss": 0.498, + "step": 2658 + }, + { + "epoch": 0.45, + "learning_rate": 1.9239939998312698e-05, + "loss": 0.5188, + "step": 2659 + }, + { + "epoch": 0.45, + "learning_rate": 1.9239235681935495e-05, + "loss": 0.4712, + "step": 2660 + }, + { + "epoch": 0.45, + "learning_rate": 1.923853105228251e-05, + "loss": 0.5212, + "step": 2661 + }, + { + "epoch": 0.45, + "learning_rate": 1.9237826109377634e-05, + "loss": 0.5662, + "step": 2662 + }, + { + "epoch": 0.45, + "learning_rate": 1.923712085324477e-05, + "loss": 0.5068, + "step": 2663 + }, + { + "epoch": 0.45, + "learning_rate": 1.9236415283907828e-05, + "loss": 0.5305, + "step": 2664 + }, + { + "epoch": 0.45, + "learning_rate": 1.9235709401390737e-05, + "loss": 0.5268, + "step": 2665 + }, + { + "epoch": 0.45, + "learning_rate": 1.9235003205717428e-05, + "loss": 0.5223, + "step": 2666 + }, + { + "epoch": 0.45, + "learning_rate": 1.923429669691185e-05, + "loss": 0.5787, + "step": 2667 + }, + { + "epoch": 0.46, + "learning_rate": 1.923358987499795e-05, + "loss": 0.4922, + "step": 2668 + }, + { + "epoch": 0.46, + "learning_rate": 1.9232882739999707e-05, + "loss": 0.5139, + "step": 2669 + }, + { + "epoch": 0.46, + "learning_rate": 1.923217529194109e-05, + "loss": 0.4885, + "step": 2670 + }, + { + "epoch": 0.46, + "learning_rate": 1.9231467530846085e-05, + "loss": 0.4949, + "step": 2671 + }, + { + "epoch": 0.46, + "learning_rate": 1.9230759456738695e-05, + "loss": 0.5345, + "step": 2672 + }, + { + "epoch": 0.46, + "learning_rate": 1.9230051069642927e-05, + "loss": 0.5191, + "step": 2673 + }, + { + "epoch": 0.46, + "learning_rate": 1.9229342369582796e-05, + "loss": 0.5427, + "step": 2674 + }, + { + "epoch": 0.46, + "learning_rate": 1.922863335658234e-05, + "loss": 0.5595, + "step": 2675 + }, + { + "epoch": 0.46, + "learning_rate": 1.9227924030665593e-05, + "loss": 0.5423, + "step": 2676 + }, + { + "epoch": 0.46, + "learning_rate": 1.922721439185661e-05, + "loss": 0.5321, + "step": 2677 + }, + { + "epoch": 0.46, + "learning_rate": 1.9226504440179452e-05, + "loss": 0.5186, + "step": 2678 + }, + { + "epoch": 0.46, + "learning_rate": 1.922579417565819e-05, + "loss": 0.4901, + "step": 2679 + }, + { + "epoch": 0.46, + "learning_rate": 1.9225083598316905e-05, + "loss": 0.5636, + "step": 2680 + }, + { + "epoch": 0.46, + "learning_rate": 1.92243727081797e-05, + "loss": 0.5187, + "step": 2681 + }, + { + "epoch": 0.46, + "learning_rate": 1.9223661505270668e-05, + "loss": 0.5587, + "step": 2682 + }, + { + "epoch": 0.46, + "learning_rate": 1.9222949989613928e-05, + "loss": 0.5313, + "step": 2683 + }, + { + "epoch": 0.46, + "learning_rate": 1.9222238161233607e-05, + "loss": 0.5088, + "step": 2684 + }, + { + "epoch": 0.46, + "learning_rate": 1.922152602015384e-05, + "loss": 0.5766, + "step": 2685 + }, + { + "epoch": 0.46, + "learning_rate": 1.9220813566398775e-05, + "loss": 0.5245, + "step": 2686 + }, + { + "epoch": 0.46, + "learning_rate": 1.922010079999257e-05, + "loss": 0.4843, + "step": 2687 + }, + { + "epoch": 0.46, + "learning_rate": 1.9219387720959383e-05, + "loss": 0.523, + "step": 2688 + }, + { + "epoch": 0.46, + "learning_rate": 1.9218674329323406e-05, + "loss": 0.5781, + "step": 2689 + }, + { + "epoch": 0.46, + "learning_rate": 1.921796062510882e-05, + "loss": 0.515, + "step": 2690 + }, + { + "epoch": 0.46, + "learning_rate": 1.921724660833983e-05, + "loss": 0.4933, + "step": 2691 + }, + { + "epoch": 0.46, + "learning_rate": 1.921653227904064e-05, + "loss": 0.503, + "step": 2692 + }, + { + "epoch": 0.46, + "learning_rate": 1.921581763723548e-05, + "loss": 0.5721, + "step": 2693 + }, + { + "epoch": 0.46, + "learning_rate": 1.9215102682948568e-05, + "loss": 0.5285, + "step": 2694 + }, + { + "epoch": 0.46, + "learning_rate": 1.9214387416204158e-05, + "loss": 0.5361, + "step": 2695 + }, + { + "epoch": 0.46, + "learning_rate": 1.9213671837026496e-05, + "loss": 0.5514, + "step": 2696 + }, + { + "epoch": 0.46, + "learning_rate": 1.9212955945439847e-05, + "loss": 0.4754, + "step": 2697 + }, + { + "epoch": 0.46, + "learning_rate": 1.9212239741468487e-05, + "loss": 0.5342, + "step": 2698 + }, + { + "epoch": 0.46, + "learning_rate": 1.9211523225136696e-05, + "loss": 0.5405, + "step": 2699 + }, + { + "epoch": 0.46, + "learning_rate": 1.9210806396468772e-05, + "loss": 0.4838, + "step": 2700 + }, + { + "epoch": 0.46, + "learning_rate": 1.921008925548902e-05, + "loss": 0.5332, + "step": 2701 + }, + { + "epoch": 0.46, + "learning_rate": 1.920937180222176e-05, + "loss": 0.5197, + "step": 2702 + }, + { + "epoch": 0.46, + "learning_rate": 1.920865403669131e-05, + "loss": 0.5435, + "step": 2703 + }, + { + "epoch": 0.46, + "learning_rate": 1.920793595892202e-05, + "loss": 0.5144, + "step": 2704 + }, + { + "epoch": 0.46, + "learning_rate": 1.9207217568938222e-05, + "loss": 0.4892, + "step": 2705 + }, + { + "epoch": 0.46, + "learning_rate": 1.920649886676429e-05, + "loss": 0.5414, + "step": 2706 + }, + { + "epoch": 0.46, + "learning_rate": 1.920577985242458e-05, + "loss": 0.4841, + "step": 2707 + }, + { + "epoch": 0.46, + "learning_rate": 1.9205060525943483e-05, + "loss": 0.5199, + "step": 2708 + }, + { + "epoch": 0.46, + "learning_rate": 1.920434088734538e-05, + "loss": 0.5233, + "step": 2709 + }, + { + "epoch": 0.46, + "learning_rate": 1.920362093665468e-05, + "loss": 0.5227, + "step": 2710 + }, + { + "epoch": 0.46, + "learning_rate": 1.9202900673895786e-05, + "loss": 0.4893, + "step": 2711 + }, + { + "epoch": 0.46, + "learning_rate": 1.9202180099093123e-05, + "loss": 0.4722, + "step": 2712 + }, + { + "epoch": 0.46, + "learning_rate": 1.920145921227113e-05, + "loss": 0.4954, + "step": 2713 + }, + { + "epoch": 0.46, + "learning_rate": 1.9200738013454242e-05, + "loss": 0.4937, + "step": 2714 + }, + { + "epoch": 0.46, + "learning_rate": 1.9200016502666918e-05, + "loss": 0.5433, + "step": 2715 + }, + { + "epoch": 0.46, + "learning_rate": 1.919929467993362e-05, + "loss": 0.5473, + "step": 2716 + }, + { + "epoch": 0.46, + "learning_rate": 1.919857254527882e-05, + "loss": 0.4787, + "step": 2717 + }, + { + "epoch": 0.46, + "learning_rate": 1.919785009872701e-05, + "loss": 0.544, + "step": 2718 + }, + { + "epoch": 0.46, + "learning_rate": 1.9197127340302683e-05, + "loss": 0.5201, + "step": 2719 + }, + { + "epoch": 0.46, + "learning_rate": 1.9196404270030345e-05, + "loss": 0.4783, + "step": 2720 + }, + { + "epoch": 0.46, + "learning_rate": 1.9195680887934513e-05, + "loss": 0.5685, + "step": 2721 + }, + { + "epoch": 0.46, + "learning_rate": 1.9194957194039718e-05, + "loss": 0.5261, + "step": 2722 + }, + { + "epoch": 0.46, + "learning_rate": 1.9194233188370492e-05, + "loss": 0.5233, + "step": 2723 + }, + { + "epoch": 0.46, + "learning_rate": 1.9193508870951386e-05, + "loss": 0.539, + "step": 2724 + }, + { + "epoch": 0.46, + "learning_rate": 1.9192784241806964e-05, + "loss": 0.5534, + "step": 2725 + }, + { + "epoch": 0.46, + "learning_rate": 1.9192059300961797e-05, + "loss": 0.4847, + "step": 2726 + }, + { + "epoch": 0.47, + "learning_rate": 1.9191334048440456e-05, + "loss": 0.4895, + "step": 2727 + }, + { + "epoch": 0.47, + "learning_rate": 1.919060848426754e-05, + "loss": 0.5303, + "step": 2728 + }, + { + "epoch": 0.47, + "learning_rate": 1.918988260846765e-05, + "loss": 0.5649, + "step": 2729 + }, + { + "epoch": 0.47, + "learning_rate": 1.91891564210654e-05, + "loss": 0.5225, + "step": 2730 + }, + { + "epoch": 0.47, + "learning_rate": 1.9188429922085406e-05, + "loss": 0.5137, + "step": 2731 + }, + { + "epoch": 0.47, + "learning_rate": 1.9187703111552305e-05, + "loss": 0.5022, + "step": 2732 + }, + { + "epoch": 0.47, + "learning_rate": 1.9186975989490742e-05, + "loss": 0.4992, + "step": 2733 + }, + { + "epoch": 0.47, + "learning_rate": 1.9186248555925376e-05, + "loss": 0.514, + "step": 2734 + }, + { + "epoch": 0.47, + "learning_rate": 1.9185520810880865e-05, + "loss": 0.5159, + "step": 2735 + }, + { + "epoch": 0.47, + "learning_rate": 1.9184792754381886e-05, + "loss": 0.5355, + "step": 2736 + }, + { + "epoch": 0.47, + "learning_rate": 1.9184064386453127e-05, + "loss": 0.5451, + "step": 2737 + }, + { + "epoch": 0.47, + "learning_rate": 1.9183335707119285e-05, + "loss": 0.53, + "step": 2738 + }, + { + "epoch": 0.47, + "learning_rate": 1.918260671640507e-05, + "loss": 0.515, + "step": 2739 + }, + { + "epoch": 0.47, + "learning_rate": 1.9181877414335193e-05, + "loss": 0.5112, + "step": 2740 + }, + { + "epoch": 0.47, + "learning_rate": 1.9181147800934386e-05, + "loss": 0.5323, + "step": 2741 + }, + { + "epoch": 0.47, + "learning_rate": 1.9180417876227388e-05, + "loss": 0.5166, + "step": 2742 + }, + { + "epoch": 0.47, + "learning_rate": 1.917968764023895e-05, + "loss": 0.5131, + "step": 2743 + }, + { + "epoch": 0.47, + "learning_rate": 1.9178957092993836e-05, + "loss": 0.568, + "step": 2744 + }, + { + "epoch": 0.47, + "learning_rate": 1.9178226234516806e-05, + "loss": 0.5095, + "step": 2745 + }, + { + "epoch": 0.47, + "learning_rate": 1.917749506483265e-05, + "loss": 0.4695, + "step": 2746 + }, + { + "epoch": 0.47, + "learning_rate": 1.9176763583966155e-05, + "loss": 0.5052, + "step": 2747 + }, + { + "epoch": 0.47, + "learning_rate": 1.9176031791942126e-05, + "loss": 0.5167, + "step": 2748 + }, + { + "epoch": 0.47, + "learning_rate": 1.9175299688785376e-05, + "loss": 0.5518, + "step": 2749 + }, + { + "epoch": 0.47, + "learning_rate": 1.917456727452073e-05, + "loss": 0.5377, + "step": 2750 + }, + { + "epoch": 0.47, + "learning_rate": 1.9173834549173017e-05, + "loss": 0.4957, + "step": 2751 + }, + { + "epoch": 0.47, + "learning_rate": 1.9173101512767086e-05, + "loss": 0.484, + "step": 2752 + }, + { + "epoch": 0.47, + "learning_rate": 1.9172368165327792e-05, + "loss": 0.4927, + "step": 2753 + }, + { + "epoch": 0.47, + "learning_rate": 1.917163450688e-05, + "loss": 0.5322, + "step": 2754 + }, + { + "epoch": 0.47, + "learning_rate": 1.917090053744858e-05, + "loss": 0.5142, + "step": 2755 + }, + { + "epoch": 0.47, + "learning_rate": 1.917016625705843e-05, + "loss": 0.4625, + "step": 2756 + }, + { + "epoch": 0.47, + "learning_rate": 1.916943166573444e-05, + "loss": 0.5269, + "step": 2757 + }, + { + "epoch": 0.47, + "learning_rate": 1.9168696763501523e-05, + "loss": 0.4892, + "step": 2758 + }, + { + "epoch": 0.47, + "learning_rate": 1.9167961550384587e-05, + "loss": 0.5508, + "step": 2759 + }, + { + "epoch": 0.47, + "learning_rate": 1.9167226026408573e-05, + "loss": 0.5346, + "step": 2760 + }, + { + "epoch": 0.47, + "learning_rate": 1.916649019159842e-05, + "loss": 0.4972, + "step": 2761 + }, + { + "epoch": 0.47, + "learning_rate": 1.9165754045979062e-05, + "loss": 0.5238, + "step": 2762 + }, + { + "epoch": 0.47, + "learning_rate": 1.916501758957548e-05, + "loss": 0.5102, + "step": 2763 + }, + { + "epoch": 0.47, + "learning_rate": 1.9164280822412634e-05, + "loss": 0.5325, + "step": 2764 + }, + { + "epoch": 0.47, + "learning_rate": 1.916354374451551e-05, + "loss": 0.5688, + "step": 2765 + }, + { + "epoch": 0.47, + "learning_rate": 1.9162806355909094e-05, + "loss": 0.5304, + "step": 2766 + }, + { + "epoch": 0.47, + "learning_rate": 1.9162068656618396e-05, + "loss": 0.5169, + "step": 2767 + }, + { + "epoch": 0.47, + "learning_rate": 1.9161330646668423e-05, + "loss": 0.5151, + "step": 2768 + }, + { + "epoch": 0.47, + "learning_rate": 1.9160592326084204e-05, + "loss": 0.5509, + "step": 2769 + }, + { + "epoch": 0.47, + "learning_rate": 1.915985369489077e-05, + "loss": 0.5137, + "step": 2770 + }, + { + "epoch": 0.47, + "learning_rate": 1.915911475311317e-05, + "loss": 0.4744, + "step": 2771 + }, + { + "epoch": 0.47, + "learning_rate": 1.9158375500776454e-05, + "loss": 0.5561, + "step": 2772 + }, + { + "epoch": 0.47, + "learning_rate": 1.915763593790569e-05, + "loss": 0.5178, + "step": 2773 + }, + { + "epoch": 0.47, + "learning_rate": 1.9156896064525955e-05, + "loss": 0.5331, + "step": 2774 + }, + { + "epoch": 0.47, + "learning_rate": 1.9156155880662335e-05, + "loss": 0.5694, + "step": 2775 + }, + { + "epoch": 0.47, + "learning_rate": 1.915541538633993e-05, + "loss": 0.5259, + "step": 2776 + }, + { + "epoch": 0.47, + "learning_rate": 1.9154674581583847e-05, + "loss": 0.4954, + "step": 2777 + }, + { + "epoch": 0.47, + "learning_rate": 1.91539334664192e-05, + "loss": 0.5261, + "step": 2778 + }, + { + "epoch": 0.47, + "learning_rate": 1.9153192040871125e-05, + "loss": 0.5393, + "step": 2779 + }, + { + "epoch": 0.47, + "learning_rate": 1.915245030496476e-05, + "loss": 0.4948, + "step": 2780 + }, + { + "epoch": 0.47, + "learning_rate": 1.915170825872525e-05, + "loss": 0.5193, + "step": 2781 + }, + { + "epoch": 0.47, + "learning_rate": 1.9150965902177758e-05, + "loss": 0.5286, + "step": 2782 + }, + { + "epoch": 0.47, + "learning_rate": 1.9150223235347462e-05, + "loss": 0.5098, + "step": 2783 + }, + { + "epoch": 0.47, + "learning_rate": 1.9149480258259535e-05, + "loss": 0.507, + "step": 2784 + }, + { + "epoch": 0.47, + "learning_rate": 1.914873697093917e-05, + "loss": 0.5497, + "step": 2785 + }, + { + "epoch": 0.48, + "learning_rate": 1.9147993373411572e-05, + "loss": 0.5075, + "step": 2786 + }, + { + "epoch": 0.48, + "learning_rate": 1.914724946570196e-05, + "loss": 0.5089, + "step": 2787 + }, + { + "epoch": 0.48, + "learning_rate": 1.9146505247835546e-05, + "loss": 0.5133, + "step": 2788 + }, + { + "epoch": 0.48, + "learning_rate": 1.9145760719837573e-05, + "loss": 0.5275, + "step": 2789 + }, + { + "epoch": 0.48, + "learning_rate": 1.9145015881733283e-05, + "loss": 0.5219, + "step": 2790 + }, + { + "epoch": 0.48, + "learning_rate": 1.914427073354793e-05, + "loss": 0.5216, + "step": 2791 + }, + { + "epoch": 0.48, + "learning_rate": 1.9143525275306784e-05, + "loss": 0.5304, + "step": 2792 + }, + { + "epoch": 0.48, + "learning_rate": 1.914277950703512e-05, + "loss": 0.5305, + "step": 2793 + }, + { + "epoch": 0.48, + "learning_rate": 1.9142033428758216e-05, + "loss": 0.5177, + "step": 2794 + }, + { + "epoch": 0.48, + "learning_rate": 1.9141287040501382e-05, + "loss": 0.5259, + "step": 2795 + }, + { + "epoch": 0.48, + "learning_rate": 1.914054034228992e-05, + "loss": 0.5177, + "step": 2796 + }, + { + "epoch": 0.48, + "learning_rate": 1.9139793334149148e-05, + "loss": 0.4935, + "step": 2797 + }, + { + "epoch": 0.48, + "learning_rate": 1.91390460161044e-05, + "loss": 0.515, + "step": 2798 + }, + { + "epoch": 0.48, + "learning_rate": 1.9138298388181004e-05, + "loss": 0.4967, + "step": 2799 + }, + { + "epoch": 0.48, + "learning_rate": 1.913755045040432e-05, + "loss": 0.4813, + "step": 2800 + }, + { + "epoch": 0.48, + "learning_rate": 1.913680220279971e-05, + "loss": 0.4961, + "step": 2801 + }, + { + "epoch": 0.48, + "learning_rate": 1.9136053645392535e-05, + "loss": 0.5371, + "step": 2802 + }, + { + "epoch": 0.48, + "learning_rate": 1.9135304778208184e-05, + "loss": 0.5025, + "step": 2803 + }, + { + "epoch": 0.48, + "learning_rate": 1.9134555601272048e-05, + "loss": 0.5378, + "step": 2804 + }, + { + "epoch": 0.48, + "learning_rate": 1.9133806114609527e-05, + "loss": 0.5546, + "step": 2805 + }, + { + "epoch": 0.48, + "learning_rate": 1.9133056318246034e-05, + "loss": 0.5102, + "step": 2806 + }, + { + "epoch": 0.48, + "learning_rate": 1.9132306212206994e-05, + "loss": 0.4788, + "step": 2807 + }, + { + "epoch": 0.48, + "learning_rate": 1.9131555796517842e-05, + "loss": 0.4584, + "step": 2808 + }, + { + "epoch": 0.48, + "learning_rate": 1.9130805071204022e-05, + "loss": 0.5476, + "step": 2809 + }, + { + "epoch": 0.48, + "learning_rate": 1.9130054036290987e-05, + "loss": 0.5446, + "step": 2810 + }, + { + "epoch": 0.48, + "learning_rate": 1.91293026918042e-05, + "loss": 0.518, + "step": 2811 + }, + { + "epoch": 0.48, + "learning_rate": 1.9128551037769146e-05, + "loss": 0.487, + "step": 2812 + }, + { + "epoch": 0.48, + "learning_rate": 1.91277990742113e-05, + "loss": 0.5141, + "step": 2813 + }, + { + "epoch": 0.48, + "learning_rate": 1.912704680115617e-05, + "loss": 0.5518, + "step": 2814 + }, + { + "epoch": 0.48, + "learning_rate": 1.9126294218629257e-05, + "loss": 0.5348, + "step": 2815 + }, + { + "epoch": 0.48, + "learning_rate": 1.9125541326656075e-05, + "loss": 0.4828, + "step": 2816 + }, + { + "epoch": 0.48, + "learning_rate": 1.912478812526216e-05, + "loss": 0.5326, + "step": 2817 + }, + { + "epoch": 0.48, + "learning_rate": 1.912403461447305e-05, + "loss": 0.4927, + "step": 2818 + }, + { + "epoch": 0.48, + "learning_rate": 1.9123280794314293e-05, + "loss": 0.5273, + "step": 2819 + }, + { + "epoch": 0.48, + "learning_rate": 1.9122526664811444e-05, + "loss": 0.5588, + "step": 2820 + }, + { + "epoch": 0.48, + "learning_rate": 1.912177222599008e-05, + "loss": 0.5841, + "step": 2821 + }, + { + "epoch": 0.48, + "learning_rate": 1.912101747787578e-05, + "loss": 0.536, + "step": 2822 + }, + { + "epoch": 0.48, + "learning_rate": 1.9120262420494135e-05, + "loss": 0.5487, + "step": 2823 + }, + { + "epoch": 0.48, + "learning_rate": 1.9119507053870748e-05, + "loss": 0.4978, + "step": 2824 + }, + { + "epoch": 0.48, + "learning_rate": 1.9118751378031228e-05, + "loss": 0.5088, + "step": 2825 + }, + { + "epoch": 0.48, + "learning_rate": 1.91179953930012e-05, + "loss": 0.4998, + "step": 2826 + }, + { + "epoch": 0.48, + "learning_rate": 1.9117239098806296e-05, + "loss": 0.5385, + "step": 2827 + }, + { + "epoch": 0.48, + "learning_rate": 1.9116482495472163e-05, + "loss": 0.4907, + "step": 2828 + }, + { + "epoch": 0.48, + "learning_rate": 1.9115725583024452e-05, + "loss": 0.5021, + "step": 2829 + }, + { + "epoch": 0.48, + "learning_rate": 1.911496836148883e-05, + "loss": 0.4653, + "step": 2830 + }, + { + "epoch": 0.48, + "learning_rate": 1.911421083089097e-05, + "loss": 0.5033, + "step": 2831 + }, + { + "epoch": 0.48, + "learning_rate": 1.9113452991256557e-05, + "loss": 0.4841, + "step": 2832 + }, + { + "epoch": 0.48, + "learning_rate": 1.911269484261129e-05, + "loss": 0.5322, + "step": 2833 + }, + { + "epoch": 0.48, + "learning_rate": 1.9111936384980877e-05, + "loss": 0.474, + "step": 2834 + }, + { + "epoch": 0.48, + "learning_rate": 1.911117761839103e-05, + "loss": 0.535, + "step": 2835 + }, + { + "epoch": 0.48, + "learning_rate": 1.9110418542867477e-05, + "loss": 0.4805, + "step": 2836 + }, + { + "epoch": 0.48, + "learning_rate": 1.9109659158435963e-05, + "loss": 0.506, + "step": 2837 + }, + { + "epoch": 0.48, + "learning_rate": 1.9108899465122227e-05, + "loss": 0.4731, + "step": 2838 + }, + { + "epoch": 0.48, + "learning_rate": 1.9108139462952035e-05, + "loss": 0.5226, + "step": 2839 + }, + { + "epoch": 0.48, + "learning_rate": 1.9107379151951157e-05, + "loss": 0.5095, + "step": 2840 + }, + { + "epoch": 0.48, + "learning_rate": 1.9106618532145366e-05, + "loss": 0.5242, + "step": 2841 + }, + { + "epoch": 0.48, + "learning_rate": 1.9105857603560457e-05, + "loss": 0.5737, + "step": 2842 + }, + { + "epoch": 0.48, + "learning_rate": 1.9105096366222233e-05, + "loss": 0.5239, + "step": 2843 + }, + { + "epoch": 0.49, + "learning_rate": 1.91043348201565e-05, + "loss": 0.5369, + "step": 2844 + }, + { + "epoch": 0.49, + "learning_rate": 1.9103572965389086e-05, + "loss": 0.5171, + "step": 2845 + }, + { + "epoch": 0.49, + "learning_rate": 1.9102810801945817e-05, + "loss": 0.4903, + "step": 2846 + }, + { + "epoch": 0.49, + "learning_rate": 1.910204832985254e-05, + "loss": 0.5174, + "step": 2847 + }, + { + "epoch": 0.49, + "learning_rate": 1.9101285549135104e-05, + "loss": 0.5067, + "step": 2848 + }, + { + "epoch": 0.49, + "learning_rate": 1.910052245981938e-05, + "loss": 0.5286, + "step": 2849 + }, + { + "epoch": 0.49, + "learning_rate": 1.9099759061931235e-05, + "loss": 0.554, + "step": 2850 + }, + { + "epoch": 0.49, + "learning_rate": 1.9098995355496558e-05, + "loss": 0.5202, + "step": 2851 + }, + { + "epoch": 0.49, + "learning_rate": 1.909823134054124e-05, + "loss": 0.4967, + "step": 2852 + }, + { + "epoch": 0.49, + "learning_rate": 1.909746701709119e-05, + "loss": 0.5063, + "step": 2853 + }, + { + "epoch": 0.49, + "learning_rate": 1.9096702385172324e-05, + "loss": 0.4854, + "step": 2854 + }, + { + "epoch": 0.49, + "learning_rate": 1.9095937444810566e-05, + "loss": 0.5166, + "step": 2855 + }, + { + "epoch": 0.49, + "learning_rate": 1.9095172196031854e-05, + "loss": 0.5164, + "step": 2856 + }, + { + "epoch": 0.49, + "learning_rate": 1.9094406638862137e-05, + "loss": 0.5305, + "step": 2857 + }, + { + "epoch": 0.49, + "learning_rate": 1.9093640773327375e-05, + "loss": 0.4843, + "step": 2858 + }, + { + "epoch": 0.49, + "learning_rate": 1.9092874599453525e-05, + "loss": 0.4937, + "step": 2859 + }, + { + "epoch": 0.49, + "learning_rate": 1.909210811726658e-05, + "loss": 0.5002, + "step": 2860 + }, + { + "epoch": 0.49, + "learning_rate": 1.909134132679252e-05, + "loss": 0.4824, + "step": 2861 + }, + { + "epoch": 0.49, + "learning_rate": 1.9090574228057348e-05, + "loss": 0.5182, + "step": 2862 + }, + { + "epoch": 0.49, + "learning_rate": 1.9089806821087076e-05, + "loss": 0.5445, + "step": 2863 + }, + { + "epoch": 0.49, + "learning_rate": 1.9089039105907717e-05, + "loss": 0.5136, + "step": 2864 + }, + { + "epoch": 0.49, + "learning_rate": 1.9088271082545314e-05, + "loss": 0.5138, + "step": 2865 + }, + { + "epoch": 0.49, + "learning_rate": 1.9087502751025898e-05, + "loss": 0.5009, + "step": 2866 + }, + { + "epoch": 0.49, + "learning_rate": 1.908673411137552e-05, + "loss": 0.5045, + "step": 2867 + }, + { + "epoch": 0.49, + "learning_rate": 1.908596516362025e-05, + "loss": 0.5665, + "step": 2868 + }, + { + "epoch": 0.49, + "learning_rate": 1.908519590778616e-05, + "loss": 0.5056, + "step": 2869 + }, + { + "epoch": 0.49, + "learning_rate": 1.9084426343899328e-05, + "loss": 0.543, + "step": 2870 + }, + { + "epoch": 0.49, + "learning_rate": 1.9083656471985855e-05, + "loss": 0.4703, + "step": 2871 + }, + { + "epoch": 0.49, + "learning_rate": 1.9082886292071836e-05, + "loss": 0.4855, + "step": 2872 + }, + { + "epoch": 0.49, + "learning_rate": 1.908211580418339e-05, + "loss": 0.4882, + "step": 2873 + }, + { + "epoch": 0.49, + "learning_rate": 1.9081345008346645e-05, + "loss": 0.5242, + "step": 2874 + }, + { + "epoch": 0.49, + "learning_rate": 1.9080573904587733e-05, + "loss": 0.5396, + "step": 2875 + }, + { + "epoch": 0.49, + "learning_rate": 1.90798024929328e-05, + "loss": 0.518, + "step": 2876 + }, + { + "epoch": 0.49, + "learning_rate": 1.9079030773408003e-05, + "loss": 0.515, + "step": 2877 + }, + { + "epoch": 0.49, + "learning_rate": 1.907825874603951e-05, + "loss": 0.5397, + "step": 2878 + }, + { + "epoch": 0.49, + "learning_rate": 1.9077486410853494e-05, + "loss": 0.5173, + "step": 2879 + }, + { + "epoch": 0.49, + "learning_rate": 1.907671376787615e-05, + "loss": 0.5476, + "step": 2880 + }, + { + "epoch": 0.49, + "learning_rate": 1.9075940817133668e-05, + "loss": 0.5194, + "step": 2881 + }, + { + "epoch": 0.49, + "learning_rate": 1.907516755865226e-05, + "loss": 0.5149, + "step": 2882 + }, + { + "epoch": 0.49, + "learning_rate": 1.9074393992458146e-05, + "loss": 0.5265, + "step": 2883 + }, + { + "epoch": 0.49, + "learning_rate": 1.9073620118577553e-05, + "loss": 0.4929, + "step": 2884 + }, + { + "epoch": 0.49, + "learning_rate": 1.9072845937036728e-05, + "loss": 0.4875, + "step": 2885 + }, + { + "epoch": 0.49, + "learning_rate": 1.9072071447861912e-05, + "loss": 0.5586, + "step": 2886 + }, + { + "epoch": 0.49, + "learning_rate": 1.9071296651079367e-05, + "loss": 0.5249, + "step": 2887 + }, + { + "epoch": 0.49, + "learning_rate": 1.907052154671537e-05, + "loss": 0.4938, + "step": 2888 + }, + { + "epoch": 0.49, + "learning_rate": 1.90697461347962e-05, + "loss": 0.524, + "step": 2889 + }, + { + "epoch": 0.49, + "learning_rate": 1.9068970415348143e-05, + "loss": 0.5158, + "step": 2890 + }, + { + "epoch": 0.49, + "learning_rate": 1.906819438839751e-05, + "loss": 0.4882, + "step": 2891 + }, + { + "epoch": 0.49, + "learning_rate": 1.906741805397061e-05, + "loss": 0.5437, + "step": 2892 + }, + { + "epoch": 0.49, + "learning_rate": 1.9066641412093764e-05, + "loss": 0.4756, + "step": 2893 + }, + { + "epoch": 0.49, + "learning_rate": 1.906586446279331e-05, + "loss": 0.5799, + "step": 2894 + }, + { + "epoch": 0.49, + "learning_rate": 1.9065087206095594e-05, + "loss": 0.559, + "step": 2895 + }, + { + "epoch": 0.49, + "learning_rate": 1.9064309642026963e-05, + "loss": 0.5207, + "step": 2896 + }, + { + "epoch": 0.49, + "learning_rate": 1.9063531770613787e-05, + "loss": 0.5093, + "step": 2897 + }, + { + "epoch": 0.49, + "learning_rate": 1.9062753591882437e-05, + "loss": 0.5078, + "step": 2898 + }, + { + "epoch": 0.49, + "learning_rate": 1.9061975105859304e-05, + "loss": 0.4774, + "step": 2899 + }, + { + "epoch": 0.49, + "learning_rate": 1.9061196312570782e-05, + "loss": 0.5233, + "step": 2900 + }, + { + "epoch": 0.49, + "learning_rate": 1.9060417212043278e-05, + "loss": 0.5091, + "step": 2901 + }, + { + "epoch": 0.49, + "learning_rate": 1.905963780430321e-05, + "loss": 0.5189, + "step": 2902 + }, + { + "epoch": 0.5, + "learning_rate": 1.9058858089377002e-05, + "loss": 0.513, + "step": 2903 + }, + { + "epoch": 0.5, + "learning_rate": 1.9058078067291095e-05, + "loss": 0.5378, + "step": 2904 + }, + { + "epoch": 0.5, + "learning_rate": 1.9057297738071937e-05, + "loss": 0.5263, + "step": 2905 + }, + { + "epoch": 0.5, + "learning_rate": 1.9056517101745987e-05, + "loss": 0.5301, + "step": 2906 + }, + { + "epoch": 0.5, + "learning_rate": 1.905573615833971e-05, + "loss": 0.5188, + "step": 2907 + }, + { + "epoch": 0.5, + "learning_rate": 1.9054954907879593e-05, + "loss": 0.539, + "step": 2908 + }, + { + "epoch": 0.5, + "learning_rate": 1.905417335039212e-05, + "loss": 0.5469, + "step": 2909 + }, + { + "epoch": 0.5, + "learning_rate": 1.9053391485903793e-05, + "loss": 0.5205, + "step": 2910 + }, + { + "epoch": 0.5, + "learning_rate": 1.9052609314441123e-05, + "loss": 0.5238, + "step": 2911 + }, + { + "epoch": 0.5, + "learning_rate": 1.905182683603063e-05, + "loss": 0.5212, + "step": 2912 + }, + { + "epoch": 0.5, + "learning_rate": 1.9051044050698847e-05, + "loss": 0.5484, + "step": 2913 + }, + { + "epoch": 0.5, + "learning_rate": 1.9050260958472315e-05, + "loss": 0.4985, + "step": 2914 + }, + { + "epoch": 0.5, + "learning_rate": 1.904947755937759e-05, + "loss": 0.5199, + "step": 2915 + }, + { + "epoch": 0.5, + "learning_rate": 1.9048693853441226e-05, + "loss": 0.4904, + "step": 2916 + }, + { + "epoch": 0.5, + "learning_rate": 1.9047909840689808e-05, + "loss": 0.4904, + "step": 2917 + }, + { + "epoch": 0.5, + "learning_rate": 1.904712552114991e-05, + "loss": 0.5043, + "step": 2918 + }, + { + "epoch": 0.5, + "learning_rate": 1.9046340894848134e-05, + "loss": 0.5232, + "step": 2919 + }, + { + "epoch": 0.5, + "learning_rate": 1.9045555961811075e-05, + "loss": 0.5402, + "step": 2920 + }, + { + "epoch": 0.5, + "learning_rate": 1.9044770722065354e-05, + "loss": 0.5366, + "step": 2921 + }, + { + "epoch": 0.5, + "learning_rate": 1.9043985175637596e-05, + "loss": 0.5337, + "step": 2922 + }, + { + "epoch": 0.5, + "learning_rate": 1.9043199322554437e-05, + "loss": 0.4938, + "step": 2923 + }, + { + "epoch": 0.5, + "learning_rate": 1.9042413162842523e-05, + "loss": 0.5105, + "step": 2924 + }, + { + "epoch": 0.5, + "learning_rate": 1.9041626696528503e-05, + "loss": 0.518, + "step": 2925 + }, + { + "epoch": 0.5, + "learning_rate": 1.9040839923639053e-05, + "loss": 0.499, + "step": 2926 + }, + { + "epoch": 0.5, + "learning_rate": 1.9040052844200848e-05, + "loss": 0.5173, + "step": 2927 + }, + { + "epoch": 0.5, + "learning_rate": 1.9039265458240576e-05, + "loss": 0.5038, + "step": 2928 + }, + { + "epoch": 0.5, + "learning_rate": 1.903847776578493e-05, + "loss": 0.5142, + "step": 2929 + }, + { + "epoch": 0.5, + "learning_rate": 1.9037689766860624e-05, + "loss": 0.5506, + "step": 2930 + }, + { + "epoch": 0.5, + "learning_rate": 1.9036901461494375e-05, + "loss": 0.5335, + "step": 2931 + }, + { + "epoch": 0.5, + "learning_rate": 1.903611284971291e-05, + "loss": 0.5637, + "step": 2932 + }, + { + "epoch": 0.5, + "learning_rate": 1.9035323931542976e-05, + "loss": 0.4912, + "step": 2933 + }, + { + "epoch": 0.5, + "learning_rate": 1.9034534707011308e-05, + "loss": 0.5201, + "step": 2934 + }, + { + "epoch": 0.5, + "learning_rate": 1.9033745176144686e-05, + "loss": 0.557, + "step": 2935 + }, + { + "epoch": 0.5, + "learning_rate": 1.9032955338969862e-05, + "loss": 0.5222, + "step": 2936 + }, + { + "epoch": 0.5, + "learning_rate": 1.9032165195513634e-05, + "loss": 0.5562, + "step": 2937 + }, + { + "epoch": 0.5, + "learning_rate": 1.9031374745802777e-05, + "loss": 0.515, + "step": 2938 + }, + { + "epoch": 0.5, + "learning_rate": 1.9030583989864106e-05, + "loss": 0.4739, + "step": 2939 + }, + { + "epoch": 0.5, + "learning_rate": 1.9029792927724426e-05, + "loss": 0.5546, + "step": 2940 + }, + { + "epoch": 0.5, + "learning_rate": 1.902900155941056e-05, + "loss": 0.5093, + "step": 2941 + }, + { + "epoch": 0.5, + "learning_rate": 1.9028209884949347e-05, + "loss": 0.5283, + "step": 2942 + }, + { + "epoch": 0.5, + "learning_rate": 1.9027417904367625e-05, + "loss": 0.5528, + "step": 2943 + }, + { + "epoch": 0.5, + "learning_rate": 1.9026625617692248e-05, + "loss": 0.5103, + "step": 2944 + }, + { + "epoch": 0.5, + "learning_rate": 1.902583302495008e-05, + "loss": 0.4717, + "step": 2945 + }, + { + "epoch": 0.5, + "learning_rate": 1.9025040126167998e-05, + "loss": 0.529, + "step": 2946 + }, + { + "epoch": 0.5, + "learning_rate": 1.9024246921372884e-05, + "loss": 0.5092, + "step": 2947 + }, + { + "epoch": 0.5, + "learning_rate": 1.902345341059164e-05, + "loss": 0.4789, + "step": 2948 + }, + { + "epoch": 0.5, + "learning_rate": 1.902265959385116e-05, + "loss": 0.5096, + "step": 2949 + }, + { + "epoch": 0.5, + "learning_rate": 1.902186547117837e-05, + "loss": 0.546, + "step": 2950 + }, + { + "epoch": 0.5, + "learning_rate": 1.902107104260019e-05, + "loss": 0.5239, + "step": 2951 + }, + { + "epoch": 0.5, + "learning_rate": 1.9020276308143565e-05, + "loss": 0.5261, + "step": 2952 + }, + { + "epoch": 0.5, + "learning_rate": 1.9019481267835434e-05, + "loss": 0.5198, + "step": 2953 + }, + { + "epoch": 0.5, + "learning_rate": 1.9018685921702754e-05, + "loss": 0.5697, + "step": 2954 + }, + { + "epoch": 0.5, + "learning_rate": 1.9017890269772505e-05, + "loss": 0.5434, + "step": 2955 + }, + { + "epoch": 0.5, + "learning_rate": 1.9017094312071646e-05, + "loss": 0.5267, + "step": 2956 + }, + { + "epoch": 0.5, + "learning_rate": 1.9016298048627183e-05, + "loss": 0.5287, + "step": 2957 + }, + { + "epoch": 0.5, + "learning_rate": 1.9015501479466104e-05, + "loss": 0.5031, + "step": 2958 + }, + { + "epoch": 0.5, + "learning_rate": 1.9014704604615425e-05, + "loss": 0.5164, + "step": 2959 + }, + { + "epoch": 0.5, + "learning_rate": 1.9013907424102167e-05, + "loss": 0.5173, + "step": 2960 + }, + { + "epoch": 0.5, + "learning_rate": 1.9013109937953353e-05, + "loss": 0.494, + "step": 2961 + }, + { + "epoch": 0.51, + "learning_rate": 1.9012312146196024e-05, + "loss": 0.4926, + "step": 2962 + }, + { + "epoch": 0.51, + "learning_rate": 1.9011514048857237e-05, + "loss": 0.5101, + "step": 2963 + }, + { + "epoch": 0.51, + "learning_rate": 1.9010715645964048e-05, + "loss": 0.531, + "step": 2964 + }, + { + "epoch": 0.51, + "learning_rate": 1.900991693754353e-05, + "loss": 0.5283, + "step": 2965 + }, + { + "epoch": 0.51, + "learning_rate": 1.9009117923622768e-05, + "loss": 0.5385, + "step": 2966 + }, + { + "epoch": 0.51, + "learning_rate": 1.9008318604228847e-05, + "loss": 0.4985, + "step": 2967 + }, + { + "epoch": 0.51, + "learning_rate": 1.9007518979388875e-05, + "loss": 0.501, + "step": 2968 + }, + { + "epoch": 0.51, + "learning_rate": 1.900671904912997e-05, + "loss": 0.4833, + "step": 2969 + }, + { + "epoch": 0.51, + "learning_rate": 1.900591881347924e-05, + "loss": 0.4913, + "step": 2970 + }, + { + "epoch": 0.51, + "learning_rate": 1.9005118272463832e-05, + "loss": 0.51, + "step": 2971 + }, + { + "epoch": 0.51, + "learning_rate": 1.9004317426110888e-05, + "loss": 0.5704, + "step": 2972 + }, + { + "epoch": 0.51, + "learning_rate": 1.900351627444756e-05, + "loss": 0.5252, + "step": 2973 + }, + { + "epoch": 0.51, + "learning_rate": 1.9002714817501013e-05, + "loss": 0.5079, + "step": 2974 + }, + { + "epoch": 0.51, + "learning_rate": 1.900191305529842e-05, + "loss": 0.5225, + "step": 2975 + }, + { + "epoch": 0.51, + "learning_rate": 1.900111098786697e-05, + "loss": 0.5019, + "step": 2976 + }, + { + "epoch": 0.51, + "learning_rate": 1.900030861523386e-05, + "loss": 0.5078, + "step": 2977 + }, + { + "epoch": 0.51, + "learning_rate": 1.899950593742629e-05, + "loss": 0.5215, + "step": 2978 + }, + { + "epoch": 0.51, + "learning_rate": 1.899870295447148e-05, + "loss": 0.4957, + "step": 2979 + }, + { + "epoch": 0.51, + "learning_rate": 1.8997899666396663e-05, + "loss": 0.5406, + "step": 2980 + }, + { + "epoch": 0.51, + "learning_rate": 1.8997096073229065e-05, + "loss": 0.5423, + "step": 2981 + }, + { + "epoch": 0.51, + "learning_rate": 1.8996292174995937e-05, + "loss": 0.5548, + "step": 2982 + }, + { + "epoch": 0.51, + "learning_rate": 1.899548797172454e-05, + "loss": 0.5532, + "step": 2983 + }, + { + "epoch": 0.51, + "learning_rate": 1.8994683463442144e-05, + "loss": 0.4912, + "step": 2984 + }, + { + "epoch": 0.51, + "learning_rate": 1.899387865017602e-05, + "loss": 0.4968, + "step": 2985 + }, + { + "epoch": 0.51, + "learning_rate": 1.8993073531953464e-05, + "loss": 0.5196, + "step": 2986 + }, + { + "epoch": 0.51, + "learning_rate": 1.8992268108801772e-05, + "loss": 0.4952, + "step": 2987 + }, + { + "epoch": 0.51, + "learning_rate": 1.8991462380748254e-05, + "loss": 0.5614, + "step": 2988 + }, + { + "epoch": 0.51, + "learning_rate": 1.899065634782023e-05, + "loss": 0.5108, + "step": 2989 + }, + { + "epoch": 0.51, + "learning_rate": 1.8989850010045027e-05, + "loss": 0.5082, + "step": 2990 + }, + { + "epoch": 0.51, + "learning_rate": 1.8989043367449992e-05, + "loss": 0.5243, + "step": 2991 + }, + { + "epoch": 0.51, + "learning_rate": 1.898823642006247e-05, + "loss": 0.5169, + "step": 2992 + }, + { + "epoch": 0.51, + "learning_rate": 1.8987429167909828e-05, + "loss": 0.495, + "step": 2993 + }, + { + "epoch": 0.51, + "learning_rate": 1.8986621611019434e-05, + "loss": 0.5248, + "step": 2994 + }, + { + "epoch": 0.51, + "learning_rate": 1.8985813749418667e-05, + "loss": 0.5376, + "step": 2995 + }, + { + "epoch": 0.51, + "learning_rate": 1.8985005583134926e-05, + "loss": 0.5201, + "step": 2996 + }, + { + "epoch": 0.51, + "learning_rate": 1.8984197112195608e-05, + "loss": 0.4932, + "step": 2997 + }, + { + "epoch": 0.51, + "learning_rate": 1.898338833662813e-05, + "loss": 0.5362, + "step": 2998 + }, + { + "epoch": 0.51, + "learning_rate": 1.8982579256459915e-05, + "loss": 0.5172, + "step": 2999 + }, + { + "epoch": 0.51, + "learning_rate": 1.898176987171839e-05, + "loss": 0.5035, + "step": 3000 + }, + { + "epoch": 0.51, + "learning_rate": 1.898096018243101e-05, + "loss": 0.5059, + "step": 3001 + }, + { + "epoch": 0.51, + "learning_rate": 1.8980150188625216e-05, + "loss": 0.5432, + "step": 3002 + }, + { + "epoch": 0.51, + "learning_rate": 1.8979339890328484e-05, + "loss": 0.5379, + "step": 3003 + }, + { + "epoch": 0.51, + "learning_rate": 1.8978529287568283e-05, + "loss": 0.5137, + "step": 3004 + }, + { + "epoch": 0.51, + "learning_rate": 1.89777183803721e-05, + "loss": 0.512, + "step": 3005 + }, + { + "epoch": 0.51, + "learning_rate": 1.8976907168767432e-05, + "loss": 0.5255, + "step": 3006 + }, + { + "epoch": 0.51, + "learning_rate": 1.8976095652781783e-05, + "loss": 0.5262, + "step": 3007 + }, + { + "epoch": 0.51, + "learning_rate": 1.8975283832442667e-05, + "loss": 0.5192, + "step": 3008 + }, + { + "epoch": 0.51, + "learning_rate": 1.8974471707777614e-05, + "loss": 0.558, + "step": 3009 + }, + { + "epoch": 0.51, + "learning_rate": 1.8973659278814158e-05, + "loss": 0.5324, + "step": 3010 + }, + { + "epoch": 0.51, + "learning_rate": 1.897284654557985e-05, + "loss": 0.5043, + "step": 3011 + }, + { + "epoch": 0.51, + "learning_rate": 1.8972033508102242e-05, + "loss": 0.4835, + "step": 3012 + }, + { + "epoch": 0.51, + "learning_rate": 1.8971220166408906e-05, + "loss": 0.5377, + "step": 3013 + }, + { + "epoch": 0.51, + "learning_rate": 1.897040652052742e-05, + "loss": 0.5032, + "step": 3014 + }, + { + "epoch": 0.51, + "learning_rate": 1.896959257048537e-05, + "loss": 0.5202, + "step": 3015 + }, + { + "epoch": 0.51, + "learning_rate": 1.8968778316310356e-05, + "loss": 0.482, + "step": 3016 + }, + { + "epoch": 0.51, + "learning_rate": 1.896796375802999e-05, + "loss": 0.4962, + "step": 3017 + }, + { + "epoch": 0.51, + "learning_rate": 1.8967148895671884e-05, + "loss": 0.5007, + "step": 3018 + }, + { + "epoch": 0.51, + "learning_rate": 1.8966333729263674e-05, + "loss": 0.5898, + "step": 3019 + }, + { + "epoch": 0.52, + "learning_rate": 1.8965518258832997e-05, + "loss": 0.5199, + "step": 3020 + }, + { + "epoch": 0.52, + "learning_rate": 1.8964702484407508e-05, + "loss": 0.5202, + "step": 3021 + }, + { + "epoch": 0.52, + "learning_rate": 1.8963886406014857e-05, + "loss": 0.5148, + "step": 3022 + }, + { + "epoch": 0.52, + "learning_rate": 1.8963070023682725e-05, + "loss": 0.5316, + "step": 3023 + }, + { + "epoch": 0.52, + "learning_rate": 1.896225333743879e-05, + "loss": 0.4884, + "step": 3024 + }, + { + "epoch": 0.52, + "learning_rate": 1.8961436347310742e-05, + "loss": 0.5051, + "step": 3025 + }, + { + "epoch": 0.52, + "learning_rate": 1.8960619053326285e-05, + "loss": 0.5205, + "step": 3026 + }, + { + "epoch": 0.52, + "learning_rate": 1.8959801455513127e-05, + "loss": 0.5162, + "step": 3027 + }, + { + "epoch": 0.52, + "learning_rate": 1.8958983553898998e-05, + "loss": 0.5369, + "step": 3028 + }, + { + "epoch": 0.52, + "learning_rate": 1.8958165348511623e-05, + "loss": 0.5193, + "step": 3029 + }, + { + "epoch": 0.52, + "learning_rate": 1.895734683937875e-05, + "loss": 0.5857, + "step": 3030 + }, + { + "epoch": 0.52, + "learning_rate": 1.895652802652813e-05, + "loss": 0.5067, + "step": 3031 + }, + { + "epoch": 0.52, + "learning_rate": 1.8955708909987523e-05, + "loss": 0.5473, + "step": 3032 + }, + { + "epoch": 0.52, + "learning_rate": 1.895488948978471e-05, + "loss": 0.465, + "step": 3033 + }, + { + "epoch": 0.52, + "learning_rate": 1.8954069765947473e-05, + "loss": 0.5057, + "step": 3034 + }, + { + "epoch": 0.52, + "learning_rate": 1.8953249738503602e-05, + "loss": 0.4969, + "step": 3035 + }, + { + "epoch": 0.52, + "learning_rate": 1.8952429407480908e-05, + "loss": 0.4816, + "step": 3036 + }, + { + "epoch": 0.52, + "learning_rate": 1.8951608772907205e-05, + "loss": 0.5113, + "step": 3037 + }, + { + "epoch": 0.52, + "learning_rate": 1.8950787834810312e-05, + "loss": 0.5089, + "step": 3038 + }, + { + "epoch": 0.52, + "learning_rate": 1.894996659321807e-05, + "loss": 0.5468, + "step": 3039 + }, + { + "epoch": 0.52, + "learning_rate": 1.8949145048158327e-05, + "loss": 0.5067, + "step": 3040 + }, + { + "epoch": 0.52, + "learning_rate": 1.8948323199658933e-05, + "loss": 0.5399, + "step": 3041 + }, + { + "epoch": 0.52, + "learning_rate": 1.894750104774776e-05, + "loss": 0.4991, + "step": 3042 + }, + { + "epoch": 0.52, + "learning_rate": 1.894667859245268e-05, + "loss": 0.5296, + "step": 3043 + }, + { + "epoch": 0.52, + "learning_rate": 1.8945855833801586e-05, + "loss": 0.5425, + "step": 3044 + }, + { + "epoch": 0.52, + "learning_rate": 1.894503277182237e-05, + "loss": 0.4862, + "step": 3045 + }, + { + "epoch": 0.52, + "learning_rate": 1.8944209406542945e-05, + "loss": 0.5366, + "step": 3046 + }, + { + "epoch": 0.52, + "learning_rate": 1.894338573799122e-05, + "loss": 0.5259, + "step": 3047 + }, + { + "epoch": 0.52, + "learning_rate": 1.8942561766195132e-05, + "loss": 0.5041, + "step": 3048 + }, + { + "epoch": 0.52, + "learning_rate": 1.8941737491182615e-05, + "loss": 0.4829, + "step": 3049 + }, + { + "epoch": 0.52, + "learning_rate": 1.8940912912981624e-05, + "loss": 0.5236, + "step": 3050 + }, + { + "epoch": 0.52, + "learning_rate": 1.8940088031620108e-05, + "loss": 0.5047, + "step": 3051 + }, + { + "epoch": 0.52, + "learning_rate": 1.8939262847126046e-05, + "loss": 0.5627, + "step": 3052 + }, + { + "epoch": 0.52, + "learning_rate": 1.893843735952741e-05, + "loss": 0.5079, + "step": 3053 + }, + { + "epoch": 0.52, + "learning_rate": 1.8937611568852192e-05, + "loss": 0.5108, + "step": 3054 + }, + { + "epoch": 0.52, + "learning_rate": 1.8936785475128397e-05, + "loss": 0.5298, + "step": 3055 + }, + { + "epoch": 0.52, + "learning_rate": 1.893595907838403e-05, + "loss": 0.5048, + "step": 3056 + }, + { + "epoch": 0.52, + "learning_rate": 1.893513237864711e-05, + "loss": 0.5373, + "step": 3057 + }, + { + "epoch": 0.52, + "learning_rate": 1.893430537594568e-05, + "loss": 0.5223, + "step": 3058 + }, + { + "epoch": 0.52, + "learning_rate": 1.8933478070307764e-05, + "loss": 0.5016, + "step": 3059 + }, + { + "epoch": 0.52, + "learning_rate": 1.8932650461761426e-05, + "loss": 0.5077, + "step": 3060 + }, + { + "epoch": 0.52, + "learning_rate": 1.8931822550334725e-05, + "loss": 0.5437, + "step": 3061 + }, + { + "epoch": 0.52, + "learning_rate": 1.8930994336055733e-05, + "loss": 0.47, + "step": 3062 + }, + { + "epoch": 0.52, + "learning_rate": 1.893016581895253e-05, + "loss": 0.5458, + "step": 3063 + }, + { + "epoch": 0.52, + "learning_rate": 1.8929336999053207e-05, + "loss": 0.5285, + "step": 3064 + }, + { + "epoch": 0.52, + "learning_rate": 1.8928507876385874e-05, + "loss": 0.5027, + "step": 3065 + }, + { + "epoch": 0.52, + "learning_rate": 1.892767845097864e-05, + "loss": 0.5161, + "step": 3066 + }, + { + "epoch": 0.52, + "learning_rate": 1.8926848722859626e-05, + "loss": 0.4967, + "step": 3067 + }, + { + "epoch": 0.52, + "learning_rate": 1.8926018692056967e-05, + "loss": 0.4989, + "step": 3068 + }, + { + "epoch": 0.52, + "learning_rate": 1.8925188358598815e-05, + "loss": 0.5053, + "step": 3069 + }, + { + "epoch": 0.52, + "learning_rate": 1.8924357722513314e-05, + "loss": 0.5347, + "step": 3070 + }, + { + "epoch": 0.52, + "learning_rate": 1.892352678382863e-05, + "loss": 0.5372, + "step": 3071 + }, + { + "epoch": 0.52, + "learning_rate": 1.8922695542572942e-05, + "loss": 0.5077, + "step": 3072 + }, + { + "epoch": 0.52, + "learning_rate": 1.892186399877443e-05, + "loss": 0.5095, + "step": 3073 + }, + { + "epoch": 0.52, + "learning_rate": 1.8921032152461294e-05, + "loss": 0.485, + "step": 3074 + }, + { + "epoch": 0.52, + "learning_rate": 1.8920200003661736e-05, + "loss": 0.4913, + "step": 3075 + }, + { + "epoch": 0.52, + "learning_rate": 1.8919367552403975e-05, + "loss": 0.4886, + "step": 3076 + }, + { + "epoch": 0.52, + "learning_rate": 1.8918534798716235e-05, + "loss": 0.5173, + "step": 3077 + }, + { + "epoch": 0.52, + "learning_rate": 1.8917701742626754e-05, + "loss": 0.5332, + "step": 3078 + }, + { + "epoch": 0.53, + "learning_rate": 1.8916868384163774e-05, + "loss": 0.5295, + "step": 3079 + }, + { + "epoch": 0.53, + "learning_rate": 1.8916034723355555e-05, + "loss": 0.48, + "step": 3080 + }, + { + "epoch": 0.53, + "learning_rate": 1.8915200760230364e-05, + "loss": 0.5166, + "step": 3081 + }, + { + "epoch": 0.53, + "learning_rate": 1.8914366494816478e-05, + "loss": 0.4948, + "step": 3082 + }, + { + "epoch": 0.53, + "learning_rate": 1.8913531927142185e-05, + "loss": 0.4922, + "step": 3083 + }, + { + "epoch": 0.53, + "learning_rate": 1.8912697057235783e-05, + "loss": 0.5084, + "step": 3084 + }, + { + "epoch": 0.53, + "learning_rate": 1.891186188512558e-05, + "loss": 0.5474, + "step": 3085 + }, + { + "epoch": 0.53, + "learning_rate": 1.891102641083989e-05, + "loss": 0.5177, + "step": 3086 + }, + { + "epoch": 0.53, + "learning_rate": 1.8910190634407047e-05, + "loss": 0.4791, + "step": 3087 + }, + { + "epoch": 0.53, + "learning_rate": 1.890935455585539e-05, + "loss": 0.4932, + "step": 3088 + }, + { + "epoch": 0.53, + "learning_rate": 1.8908518175213264e-05, + "loss": 0.4871, + "step": 3089 + }, + { + "epoch": 0.53, + "learning_rate": 1.8907681492509028e-05, + "loss": 0.5318, + "step": 3090 + }, + { + "epoch": 0.53, + "learning_rate": 1.8906844507771058e-05, + "loss": 0.5221, + "step": 3091 + }, + { + "epoch": 0.53, + "learning_rate": 1.8906007221027726e-05, + "loss": 0.5021, + "step": 3092 + }, + { + "epoch": 0.53, + "learning_rate": 1.8905169632307427e-05, + "loss": 0.5231, + "step": 3093 + }, + { + "epoch": 0.53, + "learning_rate": 1.890433174163856e-05, + "loss": 0.5354, + "step": 3094 + }, + { + "epoch": 0.53, + "learning_rate": 1.8903493549049535e-05, + "loss": 0.5326, + "step": 3095 + }, + { + "epoch": 0.53, + "learning_rate": 1.8902655054568774e-05, + "loss": 0.5095, + "step": 3096 + }, + { + "epoch": 0.53, + "learning_rate": 1.8901816258224705e-05, + "loss": 0.5491, + "step": 3097 + }, + { + "epoch": 0.53, + "learning_rate": 1.890097716004577e-05, + "loss": 0.4671, + "step": 3098 + }, + { + "epoch": 0.53, + "learning_rate": 1.890013776006042e-05, + "loss": 0.4953, + "step": 3099 + }, + { + "epoch": 0.53, + "learning_rate": 1.8899298058297122e-05, + "loss": 0.5515, + "step": 3100 + }, + { + "epoch": 0.53, + "learning_rate": 1.889845805478434e-05, + "loss": 0.5275, + "step": 3101 + }, + { + "epoch": 0.53, + "learning_rate": 1.8897617749550565e-05, + "loss": 0.5689, + "step": 3102 + }, + { + "epoch": 0.53, + "learning_rate": 1.8896777142624277e-05, + "loss": 0.4978, + "step": 3103 + }, + { + "epoch": 0.53, + "learning_rate": 1.8895936234033992e-05, + "loss": 0.5741, + "step": 3104 + }, + { + "epoch": 0.53, + "learning_rate": 1.8895095023808213e-05, + "loss": 0.4808, + "step": 3105 + }, + { + "epoch": 0.53, + "learning_rate": 1.889425351197547e-05, + "loss": 0.5499, + "step": 3106 + }, + { + "epoch": 0.53, + "learning_rate": 1.8893411698564285e-05, + "loss": 0.4996, + "step": 3107 + }, + { + "epoch": 0.53, + "learning_rate": 1.8892569583603215e-05, + "loss": 0.5397, + "step": 3108 + }, + { + "epoch": 0.53, + "learning_rate": 1.8891727167120808e-05, + "loss": 0.5414, + "step": 3109 + }, + { + "epoch": 0.53, + "learning_rate": 1.889088444914563e-05, + "loss": 0.5397, + "step": 3110 + }, + { + "epoch": 0.53, + "learning_rate": 1.8890041429706247e-05, + "loss": 0.5328, + "step": 3111 + }, + { + "epoch": 0.53, + "learning_rate": 1.8889198108831252e-05, + "loss": 0.4887, + "step": 3112 + }, + { + "epoch": 0.53, + "learning_rate": 1.8888354486549238e-05, + "loss": 0.5077, + "step": 3113 + }, + { + "epoch": 0.53, + "learning_rate": 1.8887510562888805e-05, + "loss": 0.5164, + "step": 3114 + }, + { + "epoch": 0.53, + "learning_rate": 1.8886666337878573e-05, + "loss": 0.5383, + "step": 3115 + }, + { + "epoch": 0.53, + "learning_rate": 1.8885821811547168e-05, + "loss": 0.5536, + "step": 3116 + }, + { + "epoch": 0.53, + "learning_rate": 1.8884976983923223e-05, + "loss": 0.5048, + "step": 3117 + }, + { + "epoch": 0.53, + "learning_rate": 1.8884131855035385e-05, + "loss": 0.5083, + "step": 3118 + }, + { + "epoch": 0.53, + "learning_rate": 1.8883286424912304e-05, + "loss": 0.561, + "step": 3119 + }, + { + "epoch": 0.53, + "learning_rate": 1.8882440693582653e-05, + "loss": 0.5542, + "step": 3120 + }, + { + "epoch": 0.53, + "learning_rate": 1.888159466107511e-05, + "loss": 0.5557, + "step": 3121 + }, + { + "epoch": 0.53, + "learning_rate": 1.888074832741835e-05, + "loss": 0.5322, + "step": 3122 + }, + { + "epoch": 0.53, + "learning_rate": 1.8879901692641085e-05, + "loss": 0.5334, + "step": 3123 + }, + { + "epoch": 0.53, + "learning_rate": 1.8879054756772013e-05, + "loss": 0.5303, + "step": 3124 + }, + { + "epoch": 0.53, + "learning_rate": 1.887820751983985e-05, + "loss": 0.558, + "step": 3125 + }, + { + "epoch": 0.53, + "learning_rate": 1.8877359981873326e-05, + "loss": 0.5242, + "step": 3126 + }, + { + "epoch": 0.53, + "learning_rate": 1.887651214290118e-05, + "loss": 0.5503, + "step": 3127 + }, + { + "epoch": 0.53, + "learning_rate": 1.8875664002952158e-05, + "loss": 0.4914, + "step": 3128 + }, + { + "epoch": 0.53, + "learning_rate": 1.887481556205502e-05, + "loss": 0.5003, + "step": 3129 + }, + { + "epoch": 0.53, + "learning_rate": 1.887396682023853e-05, + "loss": 0.4925, + "step": 3130 + }, + { + "epoch": 0.53, + "learning_rate": 1.887311777753147e-05, + "loss": 0.4949, + "step": 3131 + }, + { + "epoch": 0.53, + "learning_rate": 1.8872268433962624e-05, + "loss": 0.4706, + "step": 3132 + }, + { + "epoch": 0.53, + "learning_rate": 1.88714187895608e-05, + "loss": 0.5439, + "step": 3133 + }, + { + "epoch": 0.53, + "learning_rate": 1.8870568844354796e-05, + "loss": 0.4931, + "step": 3134 + }, + { + "epoch": 0.53, + "learning_rate": 1.8869718598373438e-05, + "loss": 0.496, + "step": 3135 + }, + { + "epoch": 0.53, + "learning_rate": 1.8868868051645554e-05, + "loss": 0.5586, + "step": 3136 + }, + { + "epoch": 0.54, + "learning_rate": 1.8868017204199985e-05, + "loss": 0.5124, + "step": 3137 + }, + { + "epoch": 0.54, + "learning_rate": 1.8867166056065577e-05, + "loss": 0.557, + "step": 3138 + }, + { + "epoch": 0.54, + "learning_rate": 1.8866314607271192e-05, + "loss": 0.5067, + "step": 3139 + }, + { + "epoch": 0.54, + "learning_rate": 1.88654628578457e-05, + "loss": 0.5001, + "step": 3140 + }, + { + "epoch": 0.54, + "learning_rate": 1.8864610807817984e-05, + "loss": 0.5382, + "step": 3141 + }, + { + "epoch": 0.54, + "learning_rate": 1.886375845721693e-05, + "loss": 0.5345, + "step": 3142 + }, + { + "epoch": 0.54, + "learning_rate": 1.8862905806071443e-05, + "loss": 0.5011, + "step": 3143 + }, + { + "epoch": 0.54, + "learning_rate": 1.886205285441043e-05, + "loss": 0.516, + "step": 3144 + }, + { + "epoch": 0.54, + "learning_rate": 1.8861199602262813e-05, + "loss": 0.531, + "step": 3145 + }, + { + "epoch": 0.54, + "learning_rate": 1.8860346049657526e-05, + "loss": 0.5353, + "step": 3146 + }, + { + "epoch": 0.54, + "learning_rate": 1.885949219662351e-05, + "loss": 0.534, + "step": 3147 + }, + { + "epoch": 0.54, + "learning_rate": 1.8858638043189712e-05, + "loss": 0.5465, + "step": 3148 + }, + { + "epoch": 0.54, + "learning_rate": 1.88577835893851e-05, + "loss": 0.5407, + "step": 3149 + }, + { + "epoch": 0.54, + "learning_rate": 1.8856928835238643e-05, + "loss": 0.5172, + "step": 3150 + }, + { + "epoch": 0.54, + "learning_rate": 1.885607378077932e-05, + "loss": 0.5352, + "step": 3151 + }, + { + "epoch": 0.54, + "learning_rate": 1.885521842603613e-05, + "loss": 0.5874, + "step": 3152 + }, + { + "epoch": 0.54, + "learning_rate": 1.8854362771038072e-05, + "loss": 0.5292, + "step": 3153 + }, + { + "epoch": 0.54, + "learning_rate": 1.8853506815814164e-05, + "loss": 0.5288, + "step": 3154 + }, + { + "epoch": 0.54, + "learning_rate": 1.8852650560393416e-05, + "loss": 0.5439, + "step": 3155 + }, + { + "epoch": 0.54, + "learning_rate": 1.8851794004804874e-05, + "loss": 0.4918, + "step": 3156 + }, + { + "epoch": 0.54, + "learning_rate": 1.885093714907758e-05, + "loss": 0.5176, + "step": 3157 + }, + { + "epoch": 0.54, + "learning_rate": 1.8850079993240582e-05, + "loss": 0.479, + "step": 3158 + }, + { + "epoch": 0.54, + "learning_rate": 1.8849222537322945e-05, + "loss": 0.5178, + "step": 3159 + }, + { + "epoch": 0.54, + "learning_rate": 1.8848364781353744e-05, + "loss": 0.5247, + "step": 3160 + }, + { + "epoch": 0.54, + "learning_rate": 1.8847506725362063e-05, + "loss": 0.5099, + "step": 3161 + }, + { + "epoch": 0.54, + "learning_rate": 1.8846648369376995e-05, + "loss": 0.5312, + "step": 3162 + }, + { + "epoch": 0.54, + "learning_rate": 1.884578971342765e-05, + "loss": 0.5489, + "step": 3163 + }, + { + "epoch": 0.54, + "learning_rate": 1.8844930757543136e-05, + "loss": 0.5469, + "step": 3164 + }, + { + "epoch": 0.54, + "learning_rate": 1.8844071501752576e-05, + "loss": 0.5618, + "step": 3165 + }, + { + "epoch": 0.54, + "learning_rate": 1.8843211946085112e-05, + "loss": 0.4965, + "step": 3166 + }, + { + "epoch": 0.54, + "learning_rate": 1.8842352090569885e-05, + "loss": 0.525, + "step": 3167 + }, + { + "epoch": 0.54, + "learning_rate": 1.884149193523605e-05, + "loss": 0.504, + "step": 3168 + }, + { + "epoch": 0.54, + "learning_rate": 1.8840631480112778e-05, + "loss": 0.5246, + "step": 3169 + }, + { + "epoch": 0.54, + "learning_rate": 1.8839770725229233e-05, + "loss": 0.4938, + "step": 3170 + }, + { + "epoch": 0.54, + "learning_rate": 1.8838909670614614e-05, + "loss": 0.53, + "step": 3171 + }, + { + "epoch": 0.54, + "learning_rate": 1.8838048316298104e-05, + "loss": 0.5072, + "step": 3172 + }, + { + "epoch": 0.54, + "learning_rate": 1.883718666230892e-05, + "loss": 0.5184, + "step": 3173 + }, + { + "epoch": 0.54, + "learning_rate": 1.8836324708676272e-05, + "loss": 0.5457, + "step": 3174 + }, + { + "epoch": 0.54, + "learning_rate": 1.883546245542939e-05, + "loss": 0.5093, + "step": 3175 + }, + { + "epoch": 0.54, + "learning_rate": 1.8834599902597507e-05, + "loss": 0.482, + "step": 3176 + }, + { + "epoch": 0.54, + "learning_rate": 1.883373705020987e-05, + "loss": 0.5069, + "step": 3177 + }, + { + "epoch": 0.54, + "learning_rate": 1.8832873898295737e-05, + "loss": 0.5242, + "step": 3178 + }, + { + "epoch": 0.54, + "learning_rate": 1.8832010446884377e-05, + "loss": 0.5188, + "step": 3179 + }, + { + "epoch": 0.54, + "learning_rate": 1.8831146696005068e-05, + "loss": 0.4855, + "step": 3180 + }, + { + "epoch": 0.54, + "learning_rate": 1.883028264568709e-05, + "loss": 0.518, + "step": 3181 + }, + { + "epoch": 0.54, + "learning_rate": 1.8829418295959745e-05, + "loss": 0.4935, + "step": 3182 + }, + { + "epoch": 0.54, + "learning_rate": 1.8828553646852342e-05, + "loss": 0.509, + "step": 3183 + }, + { + "epoch": 0.54, + "learning_rate": 1.8827688698394196e-05, + "loss": 0.5161, + "step": 3184 + }, + { + "epoch": 0.54, + "learning_rate": 1.8826823450614637e-05, + "loss": 0.5014, + "step": 3185 + }, + { + "epoch": 0.54, + "learning_rate": 1.8825957903543002e-05, + "loss": 0.4928, + "step": 3186 + }, + { + "epoch": 0.54, + "learning_rate": 1.8825092057208638e-05, + "loss": 0.503, + "step": 3187 + }, + { + "epoch": 0.54, + "learning_rate": 1.8824225911640907e-05, + "loss": 0.5114, + "step": 3188 + }, + { + "epoch": 0.54, + "learning_rate": 1.8823359466869173e-05, + "loss": 0.5311, + "step": 3189 + }, + { + "epoch": 0.54, + "learning_rate": 1.882249272292282e-05, + "loss": 0.5266, + "step": 3190 + }, + { + "epoch": 0.54, + "learning_rate": 1.882162567983123e-05, + "loss": 0.494, + "step": 3191 + }, + { + "epoch": 0.54, + "learning_rate": 1.882075833762381e-05, + "loss": 0.5696, + "step": 3192 + }, + { + "epoch": 0.54, + "learning_rate": 1.8819890696329962e-05, + "loss": 0.4818, + "step": 3193 + }, + { + "epoch": 0.54, + "learning_rate": 1.881902275597911e-05, + "loss": 0.5081, + "step": 3194 + }, + { + "epoch": 0.54, + "learning_rate": 1.8818154516600678e-05, + "loss": 0.5141, + "step": 3195 + }, + { + "epoch": 0.55, + "learning_rate": 1.881728597822411e-05, + "loss": 0.527, + "step": 3196 + }, + { + "epoch": 0.55, + "learning_rate": 1.8816417140878857e-05, + "loss": 0.5432, + "step": 3197 + }, + { + "epoch": 0.55, + "learning_rate": 1.8815548004594372e-05, + "loss": 0.5387, + "step": 3198 + }, + { + "epoch": 0.55, + "learning_rate": 1.8814678569400133e-05, + "loss": 0.5064, + "step": 3199 + }, + { + "epoch": 0.55, + "learning_rate": 1.881380883532561e-05, + "loss": 0.5482, + "step": 3200 + }, + { + "epoch": 0.55, + "learning_rate": 1.8812938802400303e-05, + "loss": 0.4763, + "step": 3201 + }, + { + "epoch": 0.55, + "learning_rate": 1.881206847065371e-05, + "loss": 0.5195, + "step": 3202 + }, + { + "epoch": 0.55, + "learning_rate": 1.8811197840115336e-05, + "loss": 0.556, + "step": 3203 + }, + { + "epoch": 0.55, + "learning_rate": 1.881032691081471e-05, + "loss": 0.4895, + "step": 3204 + }, + { + "epoch": 0.55, + "learning_rate": 1.8809455682781353e-05, + "loss": 0.5259, + "step": 3205 + }, + { + "epoch": 0.55, + "learning_rate": 1.880858415604482e-05, + "loss": 0.5161, + "step": 3206 + }, + { + "epoch": 0.55, + "learning_rate": 1.8807712330634645e-05, + "loss": 0.4961, + "step": 3207 + }, + { + "epoch": 0.55, + "learning_rate": 1.88068402065804e-05, + "loss": 0.5146, + "step": 3208 + }, + { + "epoch": 0.55, + "learning_rate": 1.880596778391165e-05, + "loss": 0.5016, + "step": 3209 + }, + { + "epoch": 0.55, + "learning_rate": 1.8805095062657982e-05, + "loss": 0.5419, + "step": 3210 + }, + { + "epoch": 0.55, + "learning_rate": 1.8804222042848987e-05, + "loss": 0.4993, + "step": 3211 + }, + { + "epoch": 0.55, + "learning_rate": 1.8803348724514264e-05, + "loss": 0.462, + "step": 3212 + }, + { + "epoch": 0.55, + "learning_rate": 1.8802475107683424e-05, + "loss": 0.5135, + "step": 3213 + }, + { + "epoch": 0.55, + "learning_rate": 1.880160119238609e-05, + "loss": 0.495, + "step": 3214 + }, + { + "epoch": 0.55, + "learning_rate": 1.8800726978651895e-05, + "loss": 0.4936, + "step": 3215 + }, + { + "epoch": 0.55, + "learning_rate": 1.879985246651048e-05, + "loss": 0.5341, + "step": 3216 + }, + { + "epoch": 0.55, + "learning_rate": 1.8798977655991498e-05, + "loss": 0.5052, + "step": 3217 + }, + { + "epoch": 0.55, + "learning_rate": 1.879810254712461e-05, + "loss": 0.5219, + "step": 3218 + }, + { + "epoch": 0.55, + "learning_rate": 1.8797227139939486e-05, + "loss": 0.5382, + "step": 3219 + }, + { + "epoch": 0.55, + "learning_rate": 1.8796351434465812e-05, + "loss": 0.518, + "step": 3220 + }, + { + "epoch": 0.55, + "learning_rate": 1.8795475430733287e-05, + "loss": 0.5161, + "step": 3221 + }, + { + "epoch": 0.55, + "learning_rate": 1.87945991287716e-05, + "loss": 0.5058, + "step": 3222 + }, + { + "epoch": 0.55, + "learning_rate": 1.8793722528610473e-05, + "loss": 0.4922, + "step": 3223 + }, + { + "epoch": 0.55, + "learning_rate": 1.879284563027963e-05, + "loss": 0.4983, + "step": 3224 + }, + { + "epoch": 0.55, + "learning_rate": 1.87919684338088e-05, + "loss": 0.47, + "step": 3225 + }, + { + "epoch": 0.55, + "learning_rate": 1.8791090939227723e-05, + "loss": 0.5061, + "step": 3226 + }, + { + "epoch": 0.55, + "learning_rate": 1.8790213146566157e-05, + "loss": 0.5068, + "step": 3227 + }, + { + "epoch": 0.55, + "learning_rate": 1.878933505585387e-05, + "loss": 0.4857, + "step": 3228 + }, + { + "epoch": 0.55, + "learning_rate": 1.8788456667120626e-05, + "loss": 0.5734, + "step": 3229 + }, + { + "epoch": 0.55, + "learning_rate": 1.8787577980396215e-05, + "loss": 0.5035, + "step": 3230 + }, + { + "epoch": 0.55, + "learning_rate": 1.8786698995710427e-05, + "loss": 0.5226, + "step": 3231 + }, + { + "epoch": 0.55, + "learning_rate": 1.8785819713093068e-05, + "loss": 0.5801, + "step": 3232 + }, + { + "epoch": 0.55, + "learning_rate": 1.878494013257395e-05, + "loss": 0.5227, + "step": 3233 + }, + { + "epoch": 0.55, + "learning_rate": 1.8784060254182904e-05, + "loss": 0.4925, + "step": 3234 + }, + { + "epoch": 0.55, + "learning_rate": 1.878318007794975e-05, + "loss": 0.5163, + "step": 3235 + }, + { + "epoch": 0.55, + "learning_rate": 1.878229960390435e-05, + "loss": 0.5121, + "step": 3236 + }, + { + "epoch": 0.55, + "learning_rate": 1.8781418832076546e-05, + "loss": 0.4937, + "step": 3237 + }, + { + "epoch": 0.55, + "learning_rate": 1.87805377624962e-05, + "loss": 0.5227, + "step": 3238 + }, + { + "epoch": 0.55, + "learning_rate": 1.87796563951932e-05, + "loss": 0.4851, + "step": 3239 + }, + { + "epoch": 0.55, + "learning_rate": 1.877877473019742e-05, + "loss": 0.5288, + "step": 3240 + }, + { + "epoch": 0.55, + "learning_rate": 1.8777892767538756e-05, + "loss": 0.4927, + "step": 3241 + }, + { + "epoch": 0.55, + "learning_rate": 1.8777010507247114e-05, + "loss": 0.5121, + "step": 3242 + }, + { + "epoch": 0.55, + "learning_rate": 1.877612794935241e-05, + "loss": 0.4994, + "step": 3243 + }, + { + "epoch": 0.55, + "learning_rate": 1.8775245093884572e-05, + "loss": 0.5171, + "step": 3244 + }, + { + "epoch": 0.55, + "learning_rate": 1.8774361940873523e-05, + "loss": 0.5126, + "step": 3245 + }, + { + "epoch": 0.55, + "learning_rate": 1.8773478490349225e-05, + "loss": 0.5117, + "step": 3246 + }, + { + "epoch": 0.55, + "learning_rate": 1.877259474234162e-05, + "loss": 0.5351, + "step": 3247 + }, + { + "epoch": 0.55, + "learning_rate": 1.877171069688068e-05, + "loss": 0.5171, + "step": 3248 + }, + { + "epoch": 0.55, + "learning_rate": 1.8770826353996378e-05, + "loss": 0.5359, + "step": 3249 + }, + { + "epoch": 0.55, + "learning_rate": 1.87699417137187e-05, + "loss": 0.5078, + "step": 3250 + }, + { + "epoch": 0.55, + "learning_rate": 1.876905677607764e-05, + "loss": 0.5875, + "step": 3251 + }, + { + "epoch": 0.55, + "learning_rate": 1.8768171541103205e-05, + "loss": 0.5259, + "step": 3252 + }, + { + "epoch": 0.55, + "learning_rate": 1.8767286008825413e-05, + "loss": 0.5772, + "step": 3253 + }, + { + "epoch": 0.55, + "learning_rate": 1.8766400179274287e-05, + "loss": 0.554, + "step": 3254 + }, + { + "epoch": 0.56, + "learning_rate": 1.8765514052479864e-05, + "loss": 0.5012, + "step": 3255 + }, + { + "epoch": 0.56, + "learning_rate": 1.876462762847219e-05, + "loss": 0.4833, + "step": 3256 + }, + { + "epoch": 0.56, + "learning_rate": 1.876374090728132e-05, + "loss": 0.4956, + "step": 3257 + }, + { + "epoch": 0.56, + "learning_rate": 1.876285388893732e-05, + "loss": 0.549, + "step": 3258 + }, + { + "epoch": 0.56, + "learning_rate": 1.876196657347027e-05, + "loss": 0.5023, + "step": 3259 + }, + { + "epoch": 0.56, + "learning_rate": 1.8761078960910254e-05, + "loss": 0.5322, + "step": 3260 + }, + { + "epoch": 0.56, + "learning_rate": 1.8760191051287363e-05, + "loss": 0.5676, + "step": 3261 + }, + { + "epoch": 0.56, + "learning_rate": 1.875930284463171e-05, + "loss": 0.4421, + "step": 3262 + }, + { + "epoch": 0.56, + "learning_rate": 1.8758414340973413e-05, + "loss": 0.5086, + "step": 3263 + }, + { + "epoch": 0.56, + "learning_rate": 1.8757525540342595e-05, + "loss": 0.531, + "step": 3264 + }, + { + "epoch": 0.56, + "learning_rate": 1.8756636442769388e-05, + "loss": 0.4497, + "step": 3265 + }, + { + "epoch": 0.56, + "learning_rate": 1.875574704828395e-05, + "loss": 0.5176, + "step": 3266 + }, + { + "epoch": 0.56, + "learning_rate": 1.875485735691643e-05, + "loss": 0.5107, + "step": 3267 + }, + { + "epoch": 0.56, + "learning_rate": 1.8753967368696996e-05, + "loss": 0.5136, + "step": 3268 + }, + { + "epoch": 0.56, + "learning_rate": 1.8753077083655825e-05, + "loss": 0.5198, + "step": 3269 + }, + { + "epoch": 0.56, + "learning_rate": 1.8752186501823102e-05, + "loss": 0.4797, + "step": 3270 + }, + { + "epoch": 0.56, + "learning_rate": 1.8751295623229032e-05, + "loss": 0.5211, + "step": 3271 + }, + { + "epoch": 0.56, + "learning_rate": 1.8750404447903813e-05, + "loss": 0.5052, + "step": 3272 + }, + { + "epoch": 0.56, + "learning_rate": 1.8749512975877663e-05, + "loss": 0.5216, + "step": 3273 + }, + { + "epoch": 0.56, + "learning_rate": 1.8748621207180816e-05, + "loss": 0.5171, + "step": 3274 + }, + { + "epoch": 0.56, + "learning_rate": 1.8747729141843507e-05, + "loss": 0.5169, + "step": 3275 + }, + { + "epoch": 0.56, + "learning_rate": 1.874683677989598e-05, + "loss": 0.5256, + "step": 3276 + }, + { + "epoch": 0.56, + "learning_rate": 1.8745944121368495e-05, + "loss": 0.4864, + "step": 3277 + }, + { + "epoch": 0.56, + "learning_rate": 1.8745051166291315e-05, + "loss": 0.4909, + "step": 3278 + }, + { + "epoch": 0.56, + "learning_rate": 1.8744157914694723e-05, + "loss": 0.5097, + "step": 3279 + }, + { + "epoch": 0.56, + "learning_rate": 1.8743264366609004e-05, + "loss": 0.4975, + "step": 3280 + }, + { + "epoch": 0.56, + "learning_rate": 1.874237052206446e-05, + "loss": 0.538, + "step": 3281 + }, + { + "epoch": 0.56, + "learning_rate": 1.874147638109139e-05, + "loss": 0.513, + "step": 3282 + }, + { + "epoch": 0.56, + "learning_rate": 1.8740581943720118e-05, + "loss": 0.5658, + "step": 3283 + }, + { + "epoch": 0.56, + "learning_rate": 1.873968720998097e-05, + "loss": 0.5013, + "step": 3284 + }, + { + "epoch": 0.56, + "learning_rate": 1.8738792179904284e-05, + "loss": 0.526, + "step": 3285 + }, + { + "epoch": 0.56, + "learning_rate": 1.8737896853520415e-05, + "loss": 0.5136, + "step": 3286 + }, + { + "epoch": 0.56, + "learning_rate": 1.8737001230859707e-05, + "loss": 0.4923, + "step": 3287 + }, + { + "epoch": 0.56, + "learning_rate": 1.8736105311952536e-05, + "loss": 0.5175, + "step": 3288 + }, + { + "epoch": 0.56, + "learning_rate": 1.873520909682928e-05, + "loss": 0.5241, + "step": 3289 + }, + { + "epoch": 0.56, + "learning_rate": 1.8734312585520328e-05, + "loss": 0.5407, + "step": 3290 + }, + { + "epoch": 0.56, + "learning_rate": 1.8733415778056073e-05, + "loss": 0.5201, + "step": 3291 + }, + { + "epoch": 0.56, + "learning_rate": 1.873251867446693e-05, + "loss": 0.5288, + "step": 3292 + }, + { + "epoch": 0.56, + "learning_rate": 1.8731621274783312e-05, + "loss": 0.5523, + "step": 3293 + }, + { + "epoch": 0.56, + "learning_rate": 1.873072357903565e-05, + "loss": 0.517, + "step": 3294 + }, + { + "epoch": 0.56, + "learning_rate": 1.872982558725438e-05, + "loss": 0.5556, + "step": 3295 + }, + { + "epoch": 0.56, + "learning_rate": 1.8728927299469952e-05, + "loss": 0.486, + "step": 3296 + }, + { + "epoch": 0.56, + "learning_rate": 1.872802871571282e-05, + "loss": 0.4842, + "step": 3297 + }, + { + "epoch": 0.56, + "learning_rate": 1.8727129836013464e-05, + "loss": 0.5197, + "step": 3298 + }, + { + "epoch": 0.56, + "learning_rate": 1.872623066040235e-05, + "loss": 0.5476, + "step": 3299 + }, + { + "epoch": 0.56, + "learning_rate": 1.872533118890997e-05, + "loss": 0.5063, + "step": 3300 + }, + { + "epoch": 0.56, + "learning_rate": 1.8724431421566822e-05, + "loss": 0.5695, + "step": 3301 + }, + { + "epoch": 0.56, + "learning_rate": 1.872353135840342e-05, + "loss": 0.5194, + "step": 3302 + }, + { + "epoch": 0.56, + "learning_rate": 1.8722630999450276e-05, + "loss": 0.5368, + "step": 3303 + }, + { + "epoch": 0.56, + "learning_rate": 1.8721730344737926e-05, + "loss": 0.5135, + "step": 3304 + }, + { + "epoch": 0.56, + "learning_rate": 1.8720829394296898e-05, + "loss": 0.5235, + "step": 3305 + }, + { + "epoch": 0.56, + "learning_rate": 1.8719928148157748e-05, + "loss": 0.4849, + "step": 3306 + }, + { + "epoch": 0.56, + "learning_rate": 1.8719026606351035e-05, + "loss": 0.483, + "step": 3307 + }, + { + "epoch": 0.56, + "learning_rate": 1.8718124768907323e-05, + "loss": 0.5025, + "step": 3308 + }, + { + "epoch": 0.56, + "learning_rate": 1.8717222635857196e-05, + "loss": 0.4949, + "step": 3309 + }, + { + "epoch": 0.56, + "learning_rate": 1.871632020723124e-05, + "loss": 0.5083, + "step": 3310 + }, + { + "epoch": 0.56, + "learning_rate": 1.871541748306005e-05, + "loss": 0.5288, + "step": 3311 + }, + { + "epoch": 0.56, + "learning_rate": 1.8714514463374243e-05, + "loss": 0.516, + "step": 3312 + }, + { + "epoch": 0.57, + "learning_rate": 1.871361114820443e-05, + "loss": 0.5, + "step": 3313 + }, + { + "epoch": 0.57, + "learning_rate": 1.8712707537581244e-05, + "loss": 0.5249, + "step": 3314 + }, + { + "epoch": 0.57, + "learning_rate": 1.8711803631535325e-05, + "loss": 0.551, + "step": 3315 + }, + { + "epoch": 0.57, + "learning_rate": 1.871089943009732e-05, + "loss": 0.5059, + "step": 3316 + }, + { + "epoch": 0.57, + "learning_rate": 1.8709994933297885e-05, + "loss": 0.5521, + "step": 3317 + }, + { + "epoch": 0.57, + "learning_rate": 1.8709090141167696e-05, + "loss": 0.5212, + "step": 3318 + }, + { + "epoch": 0.57, + "learning_rate": 1.8708185053737424e-05, + "loss": 0.5079, + "step": 3319 + }, + { + "epoch": 0.57, + "learning_rate": 1.8707279671037765e-05, + "loss": 0.4696, + "step": 3320 + }, + { + "epoch": 0.57, + "learning_rate": 1.8706373993099412e-05, + "loss": 0.5205, + "step": 3321 + }, + { + "epoch": 0.57, + "learning_rate": 1.8705468019953076e-05, + "loss": 0.5546, + "step": 3322 + }, + { + "epoch": 0.57, + "learning_rate": 1.8704561751629472e-05, + "loss": 0.4753, + "step": 3323 + }, + { + "epoch": 0.57, + "learning_rate": 1.870365518815934e-05, + "loss": 0.5276, + "step": 3324 + }, + { + "epoch": 0.57, + "learning_rate": 1.870274832957341e-05, + "loss": 0.5044, + "step": 3325 + }, + { + "epoch": 0.57, + "learning_rate": 1.870184117590243e-05, + "loss": 0.4897, + "step": 3326 + }, + { + "epoch": 0.57, + "learning_rate": 1.8700933727177165e-05, + "loss": 0.4991, + "step": 3327 + }, + { + "epoch": 0.57, + "learning_rate": 1.870002598342838e-05, + "loss": 0.5269, + "step": 3328 + }, + { + "epoch": 0.57, + "learning_rate": 1.869911794468686e-05, + "loss": 0.4687, + "step": 3329 + }, + { + "epoch": 0.57, + "learning_rate": 1.8698209610983382e-05, + "loss": 0.5062, + "step": 3330 + }, + { + "epoch": 0.57, + "learning_rate": 1.8697300982348752e-05, + "loss": 0.5232, + "step": 3331 + }, + { + "epoch": 0.57, + "learning_rate": 1.869639205881378e-05, + "loss": 0.4986, + "step": 3332 + }, + { + "epoch": 0.57, + "learning_rate": 1.8695482840409287e-05, + "loss": 0.5073, + "step": 3333 + }, + { + "epoch": 0.57, + "learning_rate": 1.8694573327166098e-05, + "loss": 0.4893, + "step": 3334 + }, + { + "epoch": 0.57, + "learning_rate": 1.8693663519115047e-05, + "loss": 0.5224, + "step": 3335 + }, + { + "epoch": 0.57, + "learning_rate": 1.8692753416286996e-05, + "loss": 0.5062, + "step": 3336 + }, + { + "epoch": 0.57, + "learning_rate": 1.8691843018712795e-05, + "loss": 0.5265, + "step": 3337 + }, + { + "epoch": 0.57, + "learning_rate": 1.869093232642331e-05, + "loss": 0.4649, + "step": 3338 + }, + { + "epoch": 0.57, + "learning_rate": 1.869002133944943e-05, + "loss": 0.4939, + "step": 3339 + }, + { + "epoch": 0.57, + "learning_rate": 1.8689110057822034e-05, + "loss": 0.539, + "step": 3340 + }, + { + "epoch": 0.57, + "learning_rate": 1.868819848157203e-05, + "loss": 0.5055, + "step": 3341 + }, + { + "epoch": 0.57, + "learning_rate": 1.868728661073032e-05, + "loss": 0.5138, + "step": 3342 + }, + { + "epoch": 0.57, + "learning_rate": 1.8686374445327823e-05, + "loss": 0.524, + "step": 3343 + }, + { + "epoch": 0.57, + "learning_rate": 1.8685461985395473e-05, + "loss": 0.5037, + "step": 3344 + }, + { + "epoch": 0.57, + "learning_rate": 1.8684549230964205e-05, + "loss": 0.5107, + "step": 3345 + }, + { + "epoch": 0.57, + "learning_rate": 1.8683636182064972e-05, + "loss": 0.5481, + "step": 3346 + }, + { + "epoch": 0.57, + "learning_rate": 1.8682722838728728e-05, + "loss": 0.4834, + "step": 3347 + }, + { + "epoch": 0.57, + "learning_rate": 1.868180920098644e-05, + "loss": 0.524, + "step": 3348 + }, + { + "epoch": 0.57, + "learning_rate": 1.8680895268869093e-05, + "loss": 0.5415, + "step": 3349 + }, + { + "epoch": 0.57, + "learning_rate": 1.8679981042407672e-05, + "loss": 0.5283, + "step": 3350 + }, + { + "epoch": 0.57, + "learning_rate": 1.8679066521633178e-05, + "loss": 0.5358, + "step": 3351 + }, + { + "epoch": 0.57, + "learning_rate": 1.867815170657662e-05, + "loss": 0.5474, + "step": 3352 + }, + { + "epoch": 0.57, + "learning_rate": 1.8677236597269016e-05, + "loss": 0.4823, + "step": 3353 + }, + { + "epoch": 0.57, + "learning_rate": 1.8676321193741392e-05, + "loss": 0.4993, + "step": 3354 + }, + { + "epoch": 0.57, + "learning_rate": 1.867540549602479e-05, + "loss": 0.5095, + "step": 3355 + }, + { + "epoch": 0.57, + "learning_rate": 1.8674489504150255e-05, + "loss": 0.484, + "step": 3356 + }, + { + "epoch": 0.57, + "learning_rate": 1.867357321814885e-05, + "loss": 0.4798, + "step": 3357 + }, + { + "epoch": 0.57, + "learning_rate": 1.867265663805164e-05, + "loss": 0.4965, + "step": 3358 + }, + { + "epoch": 0.57, + "learning_rate": 1.867173976388971e-05, + "loss": 0.4799, + "step": 3359 + }, + { + "epoch": 0.57, + "learning_rate": 1.867082259569414e-05, + "loss": 0.4995, + "step": 3360 + }, + { + "epoch": 0.57, + "learning_rate": 1.866990513349603e-05, + "loss": 0.54, + "step": 3361 + }, + { + "epoch": 0.57, + "learning_rate": 1.8668987377326498e-05, + "loss": 0.4798, + "step": 3362 + }, + { + "epoch": 0.57, + "learning_rate": 1.8668069327216648e-05, + "loss": 0.4822, + "step": 3363 + }, + { + "epoch": 0.57, + "learning_rate": 1.866715098319762e-05, + "loss": 0.5133, + "step": 3364 + }, + { + "epoch": 0.57, + "learning_rate": 1.866623234530055e-05, + "loss": 0.4907, + "step": 3365 + }, + { + "epoch": 0.57, + "learning_rate": 1.866531341355658e-05, + "loss": 0.497, + "step": 3366 + }, + { + "epoch": 0.57, + "learning_rate": 1.8664394187996876e-05, + "loss": 0.5, + "step": 3367 + }, + { + "epoch": 0.57, + "learning_rate": 1.86634746686526e-05, + "loss": 0.5114, + "step": 3368 + }, + { + "epoch": 0.57, + "learning_rate": 1.8662554855554936e-05, + "loss": 0.4923, + "step": 3369 + }, + { + "epoch": 0.57, + "learning_rate": 1.8661634748735073e-05, + "loss": 0.544, + "step": 3370 + }, + { + "epoch": 0.57, + "learning_rate": 1.8660714348224197e-05, + "loss": 0.5328, + "step": 3371 + }, + { + "epoch": 0.58, + "learning_rate": 1.865979365405353e-05, + "loss": 0.479, + "step": 3372 + }, + { + "epoch": 0.58, + "learning_rate": 1.8658872666254285e-05, + "loss": 0.5332, + "step": 3373 + }, + { + "epoch": 0.58, + "learning_rate": 1.8657951384857693e-05, + "loss": 0.5254, + "step": 3374 + }, + { + "epoch": 0.58, + "learning_rate": 1.8657029809894985e-05, + "loss": 0.5021, + "step": 3375 + }, + { + "epoch": 0.58, + "learning_rate": 1.865610794139741e-05, + "loss": 0.5013, + "step": 3376 + }, + { + "epoch": 0.58, + "learning_rate": 1.865518577939623e-05, + "loss": 0.4913, + "step": 3377 + }, + { + "epoch": 0.58, + "learning_rate": 1.8654263323922717e-05, + "loss": 0.4941, + "step": 3378 + }, + { + "epoch": 0.58, + "learning_rate": 1.865334057500814e-05, + "loss": 0.5101, + "step": 3379 + }, + { + "epoch": 0.58, + "learning_rate": 1.8652417532683792e-05, + "loss": 0.5282, + "step": 3380 + }, + { + "epoch": 0.58, + "learning_rate": 1.8651494196980967e-05, + "loss": 0.5664, + "step": 3381 + }, + { + "epoch": 0.58, + "learning_rate": 1.8650570567930975e-05, + "loss": 0.4802, + "step": 3382 + }, + { + "epoch": 0.58, + "learning_rate": 1.8649646645565132e-05, + "loss": 0.536, + "step": 3383 + }, + { + "epoch": 0.58, + "learning_rate": 1.864872242991477e-05, + "loss": 0.5487, + "step": 3384 + }, + { + "epoch": 0.58, + "learning_rate": 1.864779792101122e-05, + "loss": 0.5516, + "step": 3385 + }, + { + "epoch": 0.58, + "learning_rate": 1.8646873118885833e-05, + "loss": 0.5282, + "step": 3386 + }, + { + "epoch": 0.58, + "learning_rate": 1.8645948023569968e-05, + "loss": 0.4939, + "step": 3387 + }, + { + "epoch": 0.58, + "learning_rate": 1.8645022635094986e-05, + "loss": 0.5494, + "step": 3388 + }, + { + "epoch": 0.58, + "learning_rate": 1.8644096953492272e-05, + "loss": 0.525, + "step": 3389 + }, + { + "epoch": 0.58, + "learning_rate": 1.864317097879321e-05, + "loss": 0.5115, + "step": 3390 + }, + { + "epoch": 0.58, + "learning_rate": 1.8642244711029196e-05, + "loss": 0.5277, + "step": 3391 + }, + { + "epoch": 0.58, + "learning_rate": 1.8641318150231637e-05, + "loss": 0.5125, + "step": 3392 + }, + { + "epoch": 0.58, + "learning_rate": 1.8640391296431954e-05, + "loss": 0.4836, + "step": 3393 + }, + { + "epoch": 0.58, + "learning_rate": 1.8639464149661565e-05, + "loss": 0.5208, + "step": 3394 + }, + { + "epoch": 0.58, + "learning_rate": 1.8638536709951916e-05, + "loss": 0.5224, + "step": 3395 + }, + { + "epoch": 0.58, + "learning_rate": 1.8637608977334453e-05, + "loss": 0.5245, + "step": 3396 + }, + { + "epoch": 0.58, + "learning_rate": 1.8636680951840626e-05, + "loss": 0.4786, + "step": 3397 + }, + { + "epoch": 0.58, + "learning_rate": 1.8635752633501912e-05, + "loss": 0.5417, + "step": 3398 + }, + { + "epoch": 0.58, + "learning_rate": 1.8634824022349773e-05, + "loss": 0.4839, + "step": 3399 + }, + { + "epoch": 0.58, + "learning_rate": 1.8633895118415712e-05, + "loss": 0.5123, + "step": 3400 + }, + { + "epoch": 0.58, + "learning_rate": 1.863296592173121e-05, + "loss": 0.5196, + "step": 3401 + }, + { + "epoch": 0.58, + "learning_rate": 1.8632036432327787e-05, + "loss": 0.5444, + "step": 3402 + }, + { + "epoch": 0.58, + "learning_rate": 1.8631106650236953e-05, + "loss": 0.518, + "step": 3403 + }, + { + "epoch": 0.58, + "learning_rate": 1.863017657549023e-05, + "loss": 0.5136, + "step": 3404 + }, + { + "epoch": 0.58, + "learning_rate": 1.862924620811916e-05, + "loss": 0.5299, + "step": 3405 + }, + { + "epoch": 0.58, + "learning_rate": 1.862831554815529e-05, + "loss": 0.5381, + "step": 3406 + }, + { + "epoch": 0.58, + "learning_rate": 1.862738459563017e-05, + "loss": 0.5304, + "step": 3407 + }, + { + "epoch": 0.58, + "learning_rate": 1.862645335057537e-05, + "loss": 0.4877, + "step": 3408 + }, + { + "epoch": 0.58, + "learning_rate": 1.8625521813022468e-05, + "loss": 0.5015, + "step": 3409 + }, + { + "epoch": 0.58, + "learning_rate": 1.8624589983003044e-05, + "loss": 0.5004, + "step": 3410 + }, + { + "epoch": 0.58, + "learning_rate": 1.8623657860548698e-05, + "loss": 0.5234, + "step": 3411 + }, + { + "epoch": 0.58, + "learning_rate": 1.8622725445691036e-05, + "loss": 0.5559, + "step": 3412 + }, + { + "epoch": 0.58, + "learning_rate": 1.862179273846167e-05, + "loss": 0.4913, + "step": 3413 + }, + { + "epoch": 0.58, + "learning_rate": 1.8620859738892228e-05, + "loss": 0.5322, + "step": 3414 + }, + { + "epoch": 0.58, + "learning_rate": 1.8619926447014345e-05, + "loss": 0.5262, + "step": 3415 + }, + { + "epoch": 0.58, + "learning_rate": 1.8618992862859664e-05, + "loss": 0.5123, + "step": 3416 + }, + { + "epoch": 0.58, + "learning_rate": 1.8618058986459845e-05, + "loss": 0.5054, + "step": 3417 + }, + { + "epoch": 0.58, + "learning_rate": 1.8617124817846544e-05, + "loss": 0.4682, + "step": 3418 + }, + { + "epoch": 0.58, + "learning_rate": 1.8616190357051447e-05, + "loss": 0.5058, + "step": 3419 + }, + { + "epoch": 0.58, + "learning_rate": 1.8615255604106234e-05, + "loss": 0.4996, + "step": 3420 + }, + { + "epoch": 0.58, + "learning_rate": 1.8614320559042593e-05, + "loss": 0.4568, + "step": 3421 + }, + { + "epoch": 0.58, + "learning_rate": 1.8613385221892243e-05, + "loss": 0.5528, + "step": 3422 + }, + { + "epoch": 0.58, + "learning_rate": 1.861244959268689e-05, + "loss": 0.4947, + "step": 3423 + }, + { + "epoch": 0.58, + "learning_rate": 1.8611513671458256e-05, + "loss": 0.482, + "step": 3424 + }, + { + "epoch": 0.58, + "learning_rate": 1.8610577458238077e-05, + "loss": 0.5032, + "step": 3425 + }, + { + "epoch": 0.58, + "learning_rate": 1.8609640953058103e-05, + "loss": 0.4953, + "step": 3426 + }, + { + "epoch": 0.58, + "learning_rate": 1.860870415595008e-05, + "loss": 0.496, + "step": 3427 + }, + { + "epoch": 0.58, + "learning_rate": 1.8607767066945782e-05, + "loss": 0.5104, + "step": 3428 + }, + { + "epoch": 0.58, + "learning_rate": 1.860682968607697e-05, + "loss": 0.5295, + "step": 3429 + }, + { + "epoch": 0.58, + "learning_rate": 1.8605892013375442e-05, + "loss": 0.508, + "step": 3430 + }, + { + "epoch": 0.59, + "learning_rate": 1.860495404887298e-05, + "loss": 0.496, + "step": 3431 + }, + { + "epoch": 0.59, + "learning_rate": 1.8604015792601395e-05, + "loss": 0.5337, + "step": 3432 + }, + { + "epoch": 0.59, + "learning_rate": 1.86030772445925e-05, + "loss": 0.5341, + "step": 3433 + }, + { + "epoch": 0.59, + "learning_rate": 1.8602138404878113e-05, + "loss": 0.5239, + "step": 3434 + }, + { + "epoch": 0.59, + "learning_rate": 1.860119927349007e-05, + "loss": 0.5357, + "step": 3435 + }, + { + "epoch": 0.59, + "learning_rate": 1.8600259850460215e-05, + "loss": 0.5507, + "step": 3436 + }, + { + "epoch": 0.59, + "learning_rate": 1.8599320135820406e-05, + "loss": 0.5215, + "step": 3437 + }, + { + "epoch": 0.59, + "learning_rate": 1.85983801296025e-05, + "loss": 0.5201, + "step": 3438 + }, + { + "epoch": 0.59, + "learning_rate": 1.8597439831838368e-05, + "loss": 0.506, + "step": 3439 + }, + { + "epoch": 0.59, + "learning_rate": 1.8596499242559897e-05, + "loss": 0.5321, + "step": 3440 + }, + { + "epoch": 0.59, + "learning_rate": 1.8595558361798977e-05, + "loss": 0.5062, + "step": 3441 + }, + { + "epoch": 0.59, + "learning_rate": 1.8594617189587515e-05, + "loss": 0.5081, + "step": 3442 + }, + { + "epoch": 0.59, + "learning_rate": 1.8593675725957417e-05, + "loss": 0.5407, + "step": 3443 + }, + { + "epoch": 0.59, + "learning_rate": 1.8592733970940612e-05, + "loss": 0.4833, + "step": 3444 + }, + { + "epoch": 0.59, + "learning_rate": 1.8591791924569028e-05, + "loss": 0.5035, + "step": 3445 + }, + { + "epoch": 0.59, + "learning_rate": 1.859084958687461e-05, + "loss": 0.4863, + "step": 3446 + }, + { + "epoch": 0.59, + "learning_rate": 1.8589906957889305e-05, + "loss": 0.4843, + "step": 3447 + }, + { + "epoch": 0.59, + "learning_rate": 1.8588964037645078e-05, + "loss": 0.4851, + "step": 3448 + }, + { + "epoch": 0.59, + "learning_rate": 1.8588020826173904e-05, + "loss": 0.4912, + "step": 3449 + }, + { + "epoch": 0.59, + "learning_rate": 1.8587077323507755e-05, + "loss": 0.5515, + "step": 3450 + }, + { + "epoch": 0.59, + "learning_rate": 1.8586133529678635e-05, + "loss": 0.5078, + "step": 3451 + }, + { + "epoch": 0.59, + "learning_rate": 1.8585189444718538e-05, + "loss": 0.5772, + "step": 3452 + }, + { + "epoch": 0.59, + "learning_rate": 1.858424506865947e-05, + "loss": 0.4801, + "step": 3453 + }, + { + "epoch": 0.59, + "learning_rate": 1.8583300401533467e-05, + "loss": 0.4734, + "step": 3454 + }, + { + "epoch": 0.59, + "learning_rate": 1.8582355443372546e-05, + "loss": 0.5339, + "step": 3455 + }, + { + "epoch": 0.59, + "learning_rate": 1.8581410194208753e-05, + "loss": 0.4834, + "step": 3456 + }, + { + "epoch": 0.59, + "learning_rate": 1.858046465407414e-05, + "loss": 0.5602, + "step": 3457 + }, + { + "epoch": 0.59, + "learning_rate": 1.8579518823000762e-05, + "loss": 0.5083, + "step": 3458 + }, + { + "epoch": 0.59, + "learning_rate": 1.85785727010207e-05, + "loss": 0.5215, + "step": 3459 + }, + { + "epoch": 0.59, + "learning_rate": 1.857762628816602e-05, + "loss": 0.5277, + "step": 3460 + }, + { + "epoch": 0.59, + "learning_rate": 1.8576679584468828e-05, + "loss": 0.5407, + "step": 3461 + }, + { + "epoch": 0.59, + "learning_rate": 1.857573258996121e-05, + "loss": 0.4848, + "step": 3462 + }, + { + "epoch": 0.59, + "learning_rate": 1.8574785304675287e-05, + "loss": 0.5536, + "step": 3463 + }, + { + "epoch": 0.59, + "learning_rate": 1.857383772864317e-05, + "loss": 0.4815, + "step": 3464 + }, + { + "epoch": 0.59, + "learning_rate": 1.8572889861896993e-05, + "loss": 0.5652, + "step": 3465 + }, + { + "epoch": 0.59, + "learning_rate": 1.8571941704468893e-05, + "loss": 0.5556, + "step": 3466 + }, + { + "epoch": 0.59, + "learning_rate": 1.857099325639102e-05, + "loss": 0.5037, + "step": 3467 + }, + { + "epoch": 0.59, + "learning_rate": 1.8570044517695537e-05, + "loss": 0.5089, + "step": 3468 + }, + { + "epoch": 0.59, + "learning_rate": 1.8569095488414608e-05, + "loss": 0.5311, + "step": 3469 + }, + { + "epoch": 0.59, + "learning_rate": 1.8568146168580412e-05, + "loss": 0.5216, + "step": 3470 + }, + { + "epoch": 0.59, + "learning_rate": 1.856719655822514e-05, + "loss": 0.5066, + "step": 3471 + }, + { + "epoch": 0.59, + "learning_rate": 1.8566246657380994e-05, + "loss": 0.5114, + "step": 3472 + }, + { + "epoch": 0.59, + "learning_rate": 1.8565296466080176e-05, + "loss": 0.5412, + "step": 3473 + }, + { + "epoch": 0.59, + "learning_rate": 1.8564345984354902e-05, + "loss": 0.4583, + "step": 3474 + }, + { + "epoch": 0.59, + "learning_rate": 1.856339521223741e-05, + "loss": 0.4873, + "step": 3475 + }, + { + "epoch": 0.59, + "learning_rate": 1.8562444149759932e-05, + "loss": 0.5057, + "step": 3476 + }, + { + "epoch": 0.59, + "learning_rate": 1.8561492796954713e-05, + "loss": 0.511, + "step": 3477 + }, + { + "epoch": 0.59, + "learning_rate": 1.8560541153854016e-05, + "loss": 0.5514, + "step": 3478 + }, + { + "epoch": 0.59, + "learning_rate": 1.8559589220490107e-05, + "loss": 0.5352, + "step": 3479 + }, + { + "epoch": 0.59, + "learning_rate": 1.8558636996895262e-05, + "loss": 0.5244, + "step": 3480 + }, + { + "epoch": 0.59, + "learning_rate": 1.855768448310177e-05, + "loss": 0.5307, + "step": 3481 + }, + { + "epoch": 0.59, + "learning_rate": 1.855673167914193e-05, + "loss": 0.4926, + "step": 3482 + }, + { + "epoch": 0.59, + "learning_rate": 1.8555778585048038e-05, + "loss": 0.4922, + "step": 3483 + }, + { + "epoch": 0.59, + "learning_rate": 1.8554825200852426e-05, + "loss": 0.5258, + "step": 3484 + }, + { + "epoch": 0.59, + "learning_rate": 1.855387152658741e-05, + "loss": 0.5393, + "step": 3485 + }, + { + "epoch": 0.59, + "learning_rate": 1.8552917562285325e-05, + "loss": 0.5127, + "step": 3486 + }, + { + "epoch": 0.59, + "learning_rate": 1.8551963307978523e-05, + "loss": 0.5291, + "step": 3487 + }, + { + "epoch": 0.59, + "learning_rate": 1.855100876369936e-05, + "loss": 0.5207, + "step": 3488 + }, + { + "epoch": 0.6, + "learning_rate": 1.8550053929480202e-05, + "loss": 0.4939, + "step": 3489 + }, + { + "epoch": 0.6, + "learning_rate": 1.854909880535342e-05, + "loss": 0.5196, + "step": 3490 + }, + { + "epoch": 0.6, + "learning_rate": 1.8548143391351404e-05, + "loss": 0.4996, + "step": 3491 + }, + { + "epoch": 0.6, + "learning_rate": 1.854718768750655e-05, + "loss": 0.5338, + "step": 3492 + }, + { + "epoch": 0.6, + "learning_rate": 1.8546231693851254e-05, + "loss": 0.5124, + "step": 3493 + }, + { + "epoch": 0.6, + "learning_rate": 1.854527541041794e-05, + "loss": 0.5299, + "step": 3494 + }, + { + "epoch": 0.6, + "learning_rate": 1.8544318837239034e-05, + "loss": 0.477, + "step": 3495 + }, + { + "epoch": 0.6, + "learning_rate": 1.854336197434697e-05, + "loss": 0.5291, + "step": 3496 + }, + { + "epoch": 0.6, + "learning_rate": 1.8542404821774186e-05, + "loss": 0.5019, + "step": 3497 + }, + { + "epoch": 0.6, + "learning_rate": 1.8541447379553136e-05, + "loss": 0.552, + "step": 3498 + }, + { + "epoch": 0.6, + "learning_rate": 1.8540489647716293e-05, + "loss": 0.4654, + "step": 3499 + }, + { + "epoch": 0.6, + "learning_rate": 1.8539531626296123e-05, + "loss": 0.4986, + "step": 3500 + }, + { + "epoch": 0.6, + "learning_rate": 1.8538573315325113e-05, + "loss": 0.515, + "step": 3501 + }, + { + "epoch": 0.6, + "learning_rate": 1.853761471483576e-05, + "loss": 0.4862, + "step": 3502 + }, + { + "epoch": 0.6, + "learning_rate": 1.8536655824860564e-05, + "loss": 0.565, + "step": 3503 + }, + { + "epoch": 0.6, + "learning_rate": 1.8535696645432037e-05, + "loss": 0.5142, + "step": 3504 + }, + { + "epoch": 0.6, + "learning_rate": 1.85347371765827e-05, + "loss": 0.5117, + "step": 3505 + }, + { + "epoch": 0.6, + "learning_rate": 1.853377741834509e-05, + "loss": 0.5302, + "step": 3506 + }, + { + "epoch": 0.6, + "learning_rate": 1.853281737075175e-05, + "loss": 0.5187, + "step": 3507 + }, + { + "epoch": 0.6, + "learning_rate": 1.8531857033835226e-05, + "loss": 0.5204, + "step": 3508 + }, + { + "epoch": 0.6, + "learning_rate": 1.853089640762809e-05, + "loss": 0.5612, + "step": 3509 + }, + { + "epoch": 0.6, + "learning_rate": 1.8529935492162906e-05, + "loss": 0.5244, + "step": 3510 + }, + { + "epoch": 0.6, + "learning_rate": 1.8528974287472263e-05, + "loss": 0.4825, + "step": 3511 + }, + { + "epoch": 0.6, + "learning_rate": 1.8528012793588742e-05, + "loss": 0.4849, + "step": 3512 + }, + { + "epoch": 0.6, + "learning_rate": 1.8527051010544957e-05, + "loss": 0.5399, + "step": 3513 + }, + { + "epoch": 0.6, + "learning_rate": 1.852608893837351e-05, + "loss": 0.5112, + "step": 3514 + }, + { + "epoch": 0.6, + "learning_rate": 1.8525126577107028e-05, + "loss": 0.4987, + "step": 3515 + }, + { + "epoch": 0.6, + "learning_rate": 1.8524163926778138e-05, + "loss": 0.5257, + "step": 3516 + }, + { + "epoch": 0.6, + "learning_rate": 1.8523200987419482e-05, + "loss": 0.5148, + "step": 3517 + }, + { + "epoch": 0.6, + "learning_rate": 1.8522237759063712e-05, + "loss": 0.4957, + "step": 3518 + }, + { + "epoch": 0.6, + "learning_rate": 1.8521274241743485e-05, + "loss": 0.4947, + "step": 3519 + }, + { + "epoch": 0.6, + "learning_rate": 1.8520310435491477e-05, + "loss": 0.4693, + "step": 3520 + }, + { + "epoch": 0.6, + "learning_rate": 1.8519346340340362e-05, + "loss": 0.4816, + "step": 3521 + }, + { + "epoch": 0.6, + "learning_rate": 1.8518381956322827e-05, + "loss": 0.4785, + "step": 3522 + }, + { + "epoch": 0.6, + "learning_rate": 1.851741728347158e-05, + "loss": 0.5159, + "step": 3523 + }, + { + "epoch": 0.6, + "learning_rate": 1.8516452321819326e-05, + "loss": 0.5322, + "step": 3524 + }, + { + "epoch": 0.6, + "learning_rate": 1.8515487071398782e-05, + "loss": 0.4837, + "step": 3525 + }, + { + "epoch": 0.6, + "learning_rate": 1.851452153224268e-05, + "loss": 0.4835, + "step": 3526 + }, + { + "epoch": 0.6, + "learning_rate": 1.8513555704383758e-05, + "loss": 0.485, + "step": 3527 + }, + { + "epoch": 0.6, + "learning_rate": 1.8512589587854766e-05, + "loss": 0.5001, + "step": 3528 + }, + { + "epoch": 0.6, + "learning_rate": 1.851162318268846e-05, + "loss": 0.5299, + "step": 3529 + }, + { + "epoch": 0.6, + "learning_rate": 1.8510656488917607e-05, + "loss": 0.4894, + "step": 3530 + }, + { + "epoch": 0.6, + "learning_rate": 1.8509689506574986e-05, + "loss": 0.5168, + "step": 3531 + }, + { + "epoch": 0.6, + "learning_rate": 1.8508722235693387e-05, + "loss": 0.508, + "step": 3532 + }, + { + "epoch": 0.6, + "learning_rate": 1.8507754676305603e-05, + "loss": 0.5066, + "step": 3533 + }, + { + "epoch": 0.6, + "learning_rate": 1.8506786828444445e-05, + "loss": 0.5185, + "step": 3534 + }, + { + "epoch": 0.6, + "learning_rate": 1.8505818692142728e-05, + "loss": 0.5619, + "step": 3535 + }, + { + "epoch": 0.6, + "learning_rate": 1.8504850267433278e-05, + "loss": 0.5405, + "step": 3536 + }, + { + "epoch": 0.6, + "learning_rate": 1.8503881554348934e-05, + "loss": 0.5411, + "step": 3537 + }, + { + "epoch": 0.6, + "learning_rate": 1.850291255292254e-05, + "loss": 0.5382, + "step": 3538 + }, + { + "epoch": 0.6, + "learning_rate": 1.850194326318695e-05, + "loss": 0.5268, + "step": 3539 + }, + { + "epoch": 0.6, + "learning_rate": 1.8500973685175038e-05, + "loss": 0.5127, + "step": 3540 + }, + { + "epoch": 0.6, + "learning_rate": 1.850000381891967e-05, + "loss": 0.4831, + "step": 3541 + }, + { + "epoch": 0.6, + "learning_rate": 1.8499033664453736e-05, + "loss": 0.5583, + "step": 3542 + }, + { + "epoch": 0.6, + "learning_rate": 1.8498063221810133e-05, + "loss": 0.5214, + "step": 3543 + }, + { + "epoch": 0.6, + "learning_rate": 1.849709249102176e-05, + "loss": 0.5468, + "step": 3544 + }, + { + "epoch": 0.6, + "learning_rate": 1.8496121472121536e-05, + "loss": 0.5128, + "step": 3545 + }, + { + "epoch": 0.6, + "learning_rate": 1.8495150165142387e-05, + "loss": 0.5046, + "step": 3546 + }, + { + "epoch": 0.6, + "learning_rate": 1.8494178570117243e-05, + "loss": 0.5015, + "step": 3547 + }, + { + "epoch": 0.61, + "learning_rate": 1.8493206687079053e-05, + "loss": 0.5202, + "step": 3548 + }, + { + "epoch": 0.61, + "learning_rate": 1.8492234516060764e-05, + "loss": 0.4975, + "step": 3549 + }, + { + "epoch": 0.61, + "learning_rate": 1.8491262057095346e-05, + "loss": 0.5504, + "step": 3550 + }, + { + "epoch": 0.61, + "learning_rate": 1.8490289310215766e-05, + "loss": 0.5024, + "step": 3551 + }, + { + "epoch": 0.61, + "learning_rate": 1.848931627545501e-05, + "loss": 0.5333, + "step": 3552 + }, + { + "epoch": 0.61, + "learning_rate": 1.8488342952846074e-05, + "loss": 0.5552, + "step": 3553 + }, + { + "epoch": 0.61, + "learning_rate": 1.8487369342421956e-05, + "loss": 0.4902, + "step": 3554 + }, + { + "epoch": 0.61, + "learning_rate": 1.848639544421567e-05, + "loss": 0.5514, + "step": 3555 + }, + { + "epoch": 0.61, + "learning_rate": 1.8485421258260242e-05, + "loss": 0.4767, + "step": 3556 + }, + { + "epoch": 0.61, + "learning_rate": 1.8484446784588697e-05, + "loss": 0.5282, + "step": 3557 + }, + { + "epoch": 0.61, + "learning_rate": 1.848347202323408e-05, + "loss": 0.528, + "step": 3558 + }, + { + "epoch": 0.61, + "learning_rate": 1.848249697422944e-05, + "loss": 0.4869, + "step": 3559 + }, + { + "epoch": 0.61, + "learning_rate": 1.8481521637607842e-05, + "loss": 0.4846, + "step": 3560 + }, + { + "epoch": 0.61, + "learning_rate": 1.8480546013402354e-05, + "loss": 0.5274, + "step": 3561 + }, + { + "epoch": 0.61, + "learning_rate": 1.8479570101646057e-05, + "loss": 0.5475, + "step": 3562 + }, + { + "epoch": 0.61, + "learning_rate": 1.8478593902372046e-05, + "loss": 0.4953, + "step": 3563 + }, + { + "epoch": 0.61, + "learning_rate": 1.8477617415613413e-05, + "loss": 0.49, + "step": 3564 + }, + { + "epoch": 0.61, + "learning_rate": 1.8476640641403273e-05, + "loss": 0.4593, + "step": 3565 + }, + { + "epoch": 0.61, + "learning_rate": 1.8475663579774742e-05, + "loss": 0.4809, + "step": 3566 + }, + { + "epoch": 0.61, + "learning_rate": 1.8474686230760953e-05, + "loss": 0.529, + "step": 3567 + }, + { + "epoch": 0.61, + "learning_rate": 1.8473708594395045e-05, + "loss": 0.5458, + "step": 3568 + }, + { + "epoch": 0.61, + "learning_rate": 1.8472730670710167e-05, + "loss": 0.4615, + "step": 3569 + }, + { + "epoch": 0.61, + "learning_rate": 1.8471752459739472e-05, + "loss": 0.5171, + "step": 3570 + }, + { + "epoch": 0.61, + "learning_rate": 1.8470773961516134e-05, + "loss": 0.518, + "step": 3571 + }, + { + "epoch": 0.61, + "learning_rate": 1.8469795176073332e-05, + "loss": 0.5285, + "step": 3572 + }, + { + "epoch": 0.61, + "learning_rate": 1.846881610344425e-05, + "loss": 0.5206, + "step": 3573 + }, + { + "epoch": 0.61, + "learning_rate": 1.8467836743662088e-05, + "loss": 0.524, + "step": 3574 + }, + { + "epoch": 0.61, + "learning_rate": 1.8466857096760046e-05, + "loss": 0.5023, + "step": 3575 + }, + { + "epoch": 0.61, + "learning_rate": 1.846587716277135e-05, + "loss": 0.5185, + "step": 3576 + }, + { + "epoch": 0.61, + "learning_rate": 1.8464896941729227e-05, + "loss": 0.5215, + "step": 3577 + }, + { + "epoch": 0.61, + "learning_rate": 1.8463916433666908e-05, + "loss": 0.4945, + "step": 3578 + }, + { + "epoch": 0.61, + "learning_rate": 1.8462935638617643e-05, + "loss": 0.5101, + "step": 3579 + }, + { + "epoch": 0.61, + "learning_rate": 1.8461954556614685e-05, + "loss": 0.5171, + "step": 3580 + }, + { + "epoch": 0.61, + "learning_rate": 1.84609731876913e-05, + "loss": 0.5037, + "step": 3581 + }, + { + "epoch": 0.61, + "learning_rate": 1.8459991531880765e-05, + "loss": 0.512, + "step": 3582 + }, + { + "epoch": 0.61, + "learning_rate": 1.8459009589216364e-05, + "loss": 0.5092, + "step": 3583 + }, + { + "epoch": 0.61, + "learning_rate": 1.8458027359731395e-05, + "loss": 0.5297, + "step": 3584 + }, + { + "epoch": 0.61, + "learning_rate": 1.845704484345916e-05, + "loss": 0.5389, + "step": 3585 + }, + { + "epoch": 0.61, + "learning_rate": 1.8456062040432972e-05, + "loss": 0.5206, + "step": 3586 + }, + { + "epoch": 0.61, + "learning_rate": 1.8455078950686153e-05, + "loss": 0.4672, + "step": 3587 + }, + { + "epoch": 0.61, + "learning_rate": 1.8454095574252044e-05, + "loss": 0.5376, + "step": 3588 + }, + { + "epoch": 0.61, + "learning_rate": 1.8453111911163985e-05, + "loss": 0.5211, + "step": 3589 + }, + { + "epoch": 0.61, + "learning_rate": 1.8452127961455328e-05, + "loss": 0.5047, + "step": 3590 + }, + { + "epoch": 0.61, + "learning_rate": 1.8451143725159432e-05, + "loss": 0.4631, + "step": 3591 + }, + { + "epoch": 0.61, + "learning_rate": 1.845015920230968e-05, + "loss": 0.5136, + "step": 3592 + }, + { + "epoch": 0.61, + "learning_rate": 1.8449174392939443e-05, + "loss": 0.5265, + "step": 3593 + }, + { + "epoch": 0.61, + "learning_rate": 1.8448189297082123e-05, + "loss": 0.5099, + "step": 3594 + }, + { + "epoch": 0.61, + "learning_rate": 1.8447203914771114e-05, + "loss": 0.5469, + "step": 3595 + }, + { + "epoch": 0.61, + "learning_rate": 1.8446218246039834e-05, + "loss": 0.532, + "step": 3596 + }, + { + "epoch": 0.61, + "learning_rate": 1.84452322909217e-05, + "loss": 0.4736, + "step": 3597 + }, + { + "epoch": 0.61, + "learning_rate": 1.844424604945014e-05, + "loss": 0.5338, + "step": 3598 + }, + { + "epoch": 0.61, + "learning_rate": 1.84432595216586e-05, + "loss": 0.4732, + "step": 3599 + }, + { + "epoch": 0.61, + "learning_rate": 1.844227270758053e-05, + "loss": 0.4841, + "step": 3600 + }, + { + "epoch": 0.61, + "learning_rate": 1.844128560724939e-05, + "loss": 0.5181, + "step": 3601 + }, + { + "epoch": 0.61, + "learning_rate": 1.8440298220698647e-05, + "loss": 0.4934, + "step": 3602 + }, + { + "epoch": 0.61, + "learning_rate": 1.843931054796178e-05, + "loss": 0.5052, + "step": 3603 + }, + { + "epoch": 0.61, + "learning_rate": 1.843832258907228e-05, + "loss": 0.5066, + "step": 3604 + }, + { + "epoch": 0.61, + "learning_rate": 1.843733434406365e-05, + "loss": 0.4921, + "step": 3605 + }, + { + "epoch": 0.61, + "learning_rate": 1.8436345812969393e-05, + "loss": 0.4985, + "step": 3606 + }, + { + "epoch": 0.62, + "learning_rate": 1.8435356995823025e-05, + "loss": 0.4702, + "step": 3607 + }, + { + "epoch": 0.62, + "learning_rate": 1.843436789265808e-05, + "loss": 0.5013, + "step": 3608 + }, + { + "epoch": 0.62, + "learning_rate": 1.8433378503508095e-05, + "loss": 0.4747, + "step": 3609 + }, + { + "epoch": 0.62, + "learning_rate": 1.8432388828406612e-05, + "loss": 0.5314, + "step": 3610 + }, + { + "epoch": 0.62, + "learning_rate": 1.8431398867387192e-05, + "loss": 0.5395, + "step": 3611 + }, + { + "epoch": 0.62, + "learning_rate": 1.8430408620483402e-05, + "loss": 0.4988, + "step": 3612 + }, + { + "epoch": 0.62, + "learning_rate": 1.8429418087728818e-05, + "loss": 0.4982, + "step": 3613 + }, + { + "epoch": 0.62, + "learning_rate": 1.8428427269157027e-05, + "loss": 0.5012, + "step": 3614 + }, + { + "epoch": 0.62, + "learning_rate": 1.8427436164801618e-05, + "loss": 0.4847, + "step": 3615 + }, + { + "epoch": 0.62, + "learning_rate": 1.8426444774696205e-05, + "loss": 0.5318, + "step": 3616 + }, + { + "epoch": 0.62, + "learning_rate": 1.8425453098874403e-05, + "loss": 0.4826, + "step": 3617 + }, + { + "epoch": 0.62, + "learning_rate": 1.8424461137369828e-05, + "loss": 0.5231, + "step": 3618 + }, + { + "epoch": 0.62, + "learning_rate": 1.8423468890216127e-05, + "loss": 0.5028, + "step": 3619 + }, + { + "epoch": 0.62, + "learning_rate": 1.8422476357446934e-05, + "loss": 0.5401, + "step": 3620 + }, + { + "epoch": 0.62, + "learning_rate": 1.8421483539095905e-05, + "loss": 0.5313, + "step": 3621 + }, + { + "epoch": 0.62, + "learning_rate": 1.842049043519671e-05, + "loss": 0.4824, + "step": 3622 + }, + { + "epoch": 0.62, + "learning_rate": 1.8419497045783016e-05, + "loss": 0.5166, + "step": 3623 + }, + { + "epoch": 0.62, + "learning_rate": 1.841850337088851e-05, + "loss": 0.5055, + "step": 3624 + }, + { + "epoch": 0.62, + "learning_rate": 1.8417509410546874e-05, + "loss": 0.5154, + "step": 3625 + }, + { + "epoch": 0.62, + "learning_rate": 1.8416515164791825e-05, + "loss": 0.5047, + "step": 3626 + }, + { + "epoch": 0.62, + "learning_rate": 1.8415520633657065e-05, + "loss": 0.4782, + "step": 3627 + }, + { + "epoch": 0.62, + "learning_rate": 1.8414525817176323e-05, + "loss": 0.5119, + "step": 3628 + }, + { + "epoch": 0.62, + "learning_rate": 1.8413530715383327e-05, + "loss": 0.5045, + "step": 3629 + }, + { + "epoch": 0.62, + "learning_rate": 1.8412535328311813e-05, + "loss": 0.5165, + "step": 3630 + }, + { + "epoch": 0.62, + "learning_rate": 1.841153965599554e-05, + "loss": 0.5343, + "step": 3631 + }, + { + "epoch": 0.62, + "learning_rate": 1.841054369846826e-05, + "loss": 0.4701, + "step": 3632 + }, + { + "epoch": 0.62, + "learning_rate": 1.8409547455763756e-05, + "loss": 0.5044, + "step": 3633 + }, + { + "epoch": 0.62, + "learning_rate": 1.840855092791579e-05, + "loss": 0.5203, + "step": 3634 + }, + { + "epoch": 0.62, + "learning_rate": 1.8407554114958167e-05, + "loss": 0.5152, + "step": 3635 + }, + { + "epoch": 0.62, + "learning_rate": 1.8406557016924677e-05, + "loss": 0.462, + "step": 3636 + }, + { + "epoch": 0.62, + "learning_rate": 1.8405559633849134e-05, + "loss": 0.5057, + "step": 3637 + }, + { + "epoch": 0.62, + "learning_rate": 1.840456196576535e-05, + "loss": 0.4858, + "step": 3638 + }, + { + "epoch": 0.62, + "learning_rate": 1.840356401270716e-05, + "loss": 0.5073, + "step": 3639 + }, + { + "epoch": 0.62, + "learning_rate": 1.84025657747084e-05, + "loss": 0.4763, + "step": 3640 + }, + { + "epoch": 0.62, + "learning_rate": 1.8401567251802912e-05, + "loss": 0.5151, + "step": 3641 + }, + { + "epoch": 0.62, + "learning_rate": 1.8400568444024558e-05, + "loss": 0.5224, + "step": 3642 + }, + { + "epoch": 0.62, + "learning_rate": 1.8399569351407205e-05, + "loss": 0.5125, + "step": 3643 + }, + { + "epoch": 0.62, + "learning_rate": 1.8398569973984726e-05, + "loss": 0.4806, + "step": 3644 + }, + { + "epoch": 0.62, + "learning_rate": 1.8397570311791015e-05, + "loss": 0.5099, + "step": 3645 + }, + { + "epoch": 0.62, + "learning_rate": 1.8396570364859954e-05, + "loss": 0.5387, + "step": 3646 + }, + { + "epoch": 0.62, + "learning_rate": 1.839557013322546e-05, + "loss": 0.5227, + "step": 3647 + }, + { + "epoch": 0.62, + "learning_rate": 1.8394569616921445e-05, + "loss": 0.5196, + "step": 3648 + }, + { + "epoch": 0.62, + "learning_rate": 1.839356881598183e-05, + "loss": 0.54, + "step": 3649 + }, + { + "epoch": 0.62, + "learning_rate": 1.8392567730440553e-05, + "loss": 0.5307, + "step": 3650 + }, + { + "epoch": 0.62, + "learning_rate": 1.8391566360331555e-05, + "loss": 0.5146, + "step": 3651 + }, + { + "epoch": 0.62, + "learning_rate": 1.8390564705688794e-05, + "loss": 0.4967, + "step": 3652 + }, + { + "epoch": 0.62, + "learning_rate": 1.838956276654623e-05, + "loss": 0.5232, + "step": 3653 + }, + { + "epoch": 0.62, + "learning_rate": 1.8388560542937836e-05, + "loss": 0.5054, + "step": 3654 + }, + { + "epoch": 0.62, + "learning_rate": 1.8387558034897597e-05, + "loss": 0.5678, + "step": 3655 + }, + { + "epoch": 0.62, + "learning_rate": 1.83865552424595e-05, + "loss": 0.487, + "step": 3656 + }, + { + "epoch": 0.62, + "learning_rate": 1.838555216565755e-05, + "loss": 0.5096, + "step": 3657 + }, + { + "epoch": 0.62, + "learning_rate": 1.8384548804525762e-05, + "loss": 0.494, + "step": 3658 + }, + { + "epoch": 0.62, + "learning_rate": 1.8383545159098152e-05, + "loss": 0.5705, + "step": 3659 + }, + { + "epoch": 0.62, + "learning_rate": 1.838254122940875e-05, + "loss": 0.5194, + "step": 3660 + }, + { + "epoch": 0.62, + "learning_rate": 1.83815370154916e-05, + "loss": 0.4929, + "step": 3661 + }, + { + "epoch": 0.62, + "learning_rate": 1.838053251738075e-05, + "loss": 0.537, + "step": 3662 + }, + { + "epoch": 0.62, + "learning_rate": 1.837952773511026e-05, + "loss": 0.5051, + "step": 3663 + }, + { + "epoch": 0.62, + "learning_rate": 1.83785226687142e-05, + "loss": 0.5058, + "step": 3664 + }, + { + "epoch": 0.63, + "learning_rate": 1.8377517318226648e-05, + "loss": 0.5026, + "step": 3665 + }, + { + "epoch": 0.63, + "learning_rate": 1.837651168368169e-05, + "loss": 0.4881, + "step": 3666 + }, + { + "epoch": 0.63, + "learning_rate": 1.837550576511343e-05, + "loss": 0.5089, + "step": 3667 + }, + { + "epoch": 0.63, + "learning_rate": 1.8374499562555972e-05, + "loss": 0.5112, + "step": 3668 + }, + { + "epoch": 0.63, + "learning_rate": 1.8373493076043437e-05, + "loss": 0.4889, + "step": 3669 + }, + { + "epoch": 0.63, + "learning_rate": 1.8372486305609944e-05, + "loss": 0.4566, + "step": 3670 + }, + { + "epoch": 0.63, + "learning_rate": 1.8371479251289638e-05, + "loss": 0.4917, + "step": 3671 + }, + { + "epoch": 0.63, + "learning_rate": 1.8370471913116662e-05, + "loss": 0.5415, + "step": 3672 + }, + { + "epoch": 0.63, + "learning_rate": 1.836946429112517e-05, + "loss": 0.5221, + "step": 3673 + }, + { + "epoch": 0.63, + "learning_rate": 1.8368456385349333e-05, + "loss": 0.5, + "step": 3674 + }, + { + "epoch": 0.63, + "learning_rate": 1.836744819582332e-05, + "loss": 0.4963, + "step": 3675 + }, + { + "epoch": 0.63, + "learning_rate": 1.836643972258132e-05, + "loss": 0.4966, + "step": 3676 + }, + { + "epoch": 0.63, + "learning_rate": 1.8365430965657527e-05, + "loss": 0.4989, + "step": 3677 + }, + { + "epoch": 0.63, + "learning_rate": 1.8364421925086143e-05, + "loss": 0.5332, + "step": 3678 + }, + { + "epoch": 0.63, + "learning_rate": 1.836341260090138e-05, + "loss": 0.4946, + "step": 3679 + }, + { + "epoch": 0.63, + "learning_rate": 1.8362402993137468e-05, + "loss": 0.4999, + "step": 3680 + }, + { + "epoch": 0.63, + "learning_rate": 1.836139310182863e-05, + "loss": 0.5179, + "step": 3681 + }, + { + "epoch": 0.63, + "learning_rate": 1.8360382927009123e-05, + "loss": 0.5191, + "step": 3682 + }, + { + "epoch": 0.63, + "learning_rate": 1.8359372468713185e-05, + "loss": 0.518, + "step": 3683 + }, + { + "epoch": 0.63, + "learning_rate": 1.8358361726975084e-05, + "loss": 0.5124, + "step": 3684 + }, + { + "epoch": 0.63, + "learning_rate": 1.8357350701829092e-05, + "loss": 0.5044, + "step": 3685 + }, + { + "epoch": 0.63, + "learning_rate": 1.835633939330949e-05, + "loss": 0.4945, + "step": 3686 + }, + { + "epoch": 0.63, + "learning_rate": 1.8355327801450565e-05, + "loss": 0.486, + "step": 3687 + }, + { + "epoch": 0.63, + "learning_rate": 1.8354315926286617e-05, + "loss": 0.4868, + "step": 3688 + }, + { + "epoch": 0.63, + "learning_rate": 1.835330376785196e-05, + "loss": 0.4977, + "step": 3689 + }, + { + "epoch": 0.63, + "learning_rate": 1.835229132618091e-05, + "loss": 0.5406, + "step": 3690 + }, + { + "epoch": 0.63, + "learning_rate": 1.83512786013078e-05, + "loss": 0.4984, + "step": 3691 + }, + { + "epoch": 0.63, + "learning_rate": 1.8350265593266962e-05, + "loss": 0.4808, + "step": 3692 + }, + { + "epoch": 0.63, + "learning_rate": 1.834925230209275e-05, + "loss": 0.5101, + "step": 3693 + }, + { + "epoch": 0.63, + "learning_rate": 1.8348238727819523e-05, + "loss": 0.5525, + "step": 3694 + }, + { + "epoch": 0.63, + "learning_rate": 1.834722487048164e-05, + "loss": 0.5044, + "step": 3695 + }, + { + "epoch": 0.63, + "learning_rate": 1.8346210730113484e-05, + "loss": 0.5168, + "step": 3696 + }, + { + "epoch": 0.63, + "learning_rate": 1.8345196306749446e-05, + "loss": 0.5255, + "step": 3697 + }, + { + "epoch": 0.63, + "learning_rate": 1.834418160042391e-05, + "loss": 0.5248, + "step": 3698 + }, + { + "epoch": 0.63, + "learning_rate": 1.834316661117129e-05, + "loss": 0.4947, + "step": 3699 + }, + { + "epoch": 0.63, + "learning_rate": 1.8342151339026006e-05, + "loss": 0.5111, + "step": 3700 + }, + { + "epoch": 0.63, + "learning_rate": 1.834113578402247e-05, + "loss": 0.4709, + "step": 3701 + }, + { + "epoch": 0.63, + "learning_rate": 1.8340119946195128e-05, + "loss": 0.5023, + "step": 3702 + }, + { + "epoch": 0.63, + "learning_rate": 1.8339103825578416e-05, + "loss": 0.5128, + "step": 3703 + }, + { + "epoch": 0.63, + "learning_rate": 1.8338087422206796e-05, + "loss": 0.5068, + "step": 3704 + }, + { + "epoch": 0.63, + "learning_rate": 1.833707073611472e-05, + "loss": 0.5084, + "step": 3705 + }, + { + "epoch": 0.63, + "learning_rate": 1.833605376733667e-05, + "loss": 0.5172, + "step": 3706 + }, + { + "epoch": 0.63, + "learning_rate": 1.833503651590713e-05, + "loss": 0.478, + "step": 3707 + }, + { + "epoch": 0.63, + "learning_rate": 1.8334018981860582e-05, + "loss": 0.4895, + "step": 3708 + }, + { + "epoch": 0.63, + "learning_rate": 1.833300116523154e-05, + "loss": 0.5139, + "step": 3709 + }, + { + "epoch": 0.63, + "learning_rate": 1.8331983066054503e-05, + "loss": 0.5154, + "step": 3710 + }, + { + "epoch": 0.63, + "learning_rate": 1.8330964684364003e-05, + "loss": 0.5301, + "step": 3711 + }, + { + "epoch": 0.63, + "learning_rate": 1.832994602019456e-05, + "loss": 0.5198, + "step": 3712 + }, + { + "epoch": 0.63, + "learning_rate": 1.8328927073580723e-05, + "loss": 0.5055, + "step": 3713 + }, + { + "epoch": 0.63, + "learning_rate": 1.8327907844557036e-05, + "loss": 0.4809, + "step": 3714 + }, + { + "epoch": 0.63, + "learning_rate": 1.8326888333158058e-05, + "loss": 0.5606, + "step": 3715 + }, + { + "epoch": 0.63, + "learning_rate": 1.8325868539418363e-05, + "loss": 0.4869, + "step": 3716 + }, + { + "epoch": 0.63, + "learning_rate": 1.8324848463372524e-05, + "loss": 0.5266, + "step": 3717 + }, + { + "epoch": 0.63, + "learning_rate": 1.832382810505513e-05, + "loss": 0.5182, + "step": 3718 + }, + { + "epoch": 0.63, + "learning_rate": 1.832280746450078e-05, + "loss": 0.4936, + "step": 3719 + }, + { + "epoch": 0.63, + "learning_rate": 1.832178654174408e-05, + "loss": 0.5101, + "step": 3720 + }, + { + "epoch": 0.63, + "learning_rate": 1.8320765336819643e-05, + "loss": 0.5026, + "step": 3721 + }, + { + "epoch": 0.63, + "learning_rate": 1.83197438497621e-05, + "loss": 0.4866, + "step": 3722 + }, + { + "epoch": 0.63, + "learning_rate": 1.8318722080606086e-05, + "loss": 0.5685, + "step": 3723 + }, + { + "epoch": 0.64, + "learning_rate": 1.8317700029386245e-05, + "loss": 0.515, + "step": 3724 + }, + { + "epoch": 0.64, + "learning_rate": 1.8316677696137234e-05, + "loss": 0.509, + "step": 3725 + }, + { + "epoch": 0.64, + "learning_rate": 1.831565508089371e-05, + "loss": 0.5345, + "step": 3726 + }, + { + "epoch": 0.64, + "learning_rate": 1.8314632183690357e-05, + "loss": 0.5052, + "step": 3727 + }, + { + "epoch": 0.64, + "learning_rate": 1.8313609004561853e-05, + "loss": 0.4985, + "step": 3728 + }, + { + "epoch": 0.64, + "learning_rate": 1.8312585543542893e-05, + "loss": 0.5443, + "step": 3729 + }, + { + "epoch": 0.64, + "learning_rate": 1.831156180066818e-05, + "loss": 0.514, + "step": 3730 + }, + { + "epoch": 0.64, + "learning_rate": 1.831053777597242e-05, + "loss": 0.5221, + "step": 3731 + }, + { + "epoch": 0.64, + "learning_rate": 1.8309513469490343e-05, + "loss": 0.5079, + "step": 3732 + }, + { + "epoch": 0.64, + "learning_rate": 1.830848888125668e-05, + "loss": 0.4976, + "step": 3733 + }, + { + "epoch": 0.64, + "learning_rate": 1.8307464011306164e-05, + "loss": 0.5421, + "step": 3734 + }, + { + "epoch": 0.64, + "learning_rate": 1.830643885967355e-05, + "loss": 0.5551, + "step": 3735 + }, + { + "epoch": 0.64, + "learning_rate": 1.83054134263936e-05, + "loss": 0.4877, + "step": 3736 + }, + { + "epoch": 0.64, + "learning_rate": 1.830438771150108e-05, + "loss": 0.4798, + "step": 3737 + }, + { + "epoch": 0.64, + "learning_rate": 1.830336171503077e-05, + "loss": 0.4931, + "step": 3738 + }, + { + "epoch": 0.64, + "learning_rate": 1.830233543701746e-05, + "loss": 0.5249, + "step": 3739 + }, + { + "epoch": 0.64, + "learning_rate": 1.8301308877495944e-05, + "loss": 0.51, + "step": 3740 + }, + { + "epoch": 0.64, + "learning_rate": 1.8300282036501037e-05, + "loss": 0.4801, + "step": 3741 + }, + { + "epoch": 0.64, + "learning_rate": 1.829925491406755e-05, + "loss": 0.4876, + "step": 3742 + }, + { + "epoch": 0.64, + "learning_rate": 1.829822751023031e-05, + "loss": 0.5123, + "step": 3743 + }, + { + "epoch": 0.64, + "learning_rate": 1.829719982502416e-05, + "loss": 0.5976, + "step": 3744 + }, + { + "epoch": 0.64, + "learning_rate": 1.8296171858483935e-05, + "loss": 0.5354, + "step": 3745 + }, + { + "epoch": 0.64, + "learning_rate": 1.82951436106445e-05, + "loss": 0.4883, + "step": 3746 + }, + { + "epoch": 0.64, + "learning_rate": 1.8294115081540717e-05, + "loss": 0.5285, + "step": 3747 + }, + { + "epoch": 0.64, + "learning_rate": 1.829308627120746e-05, + "loss": 0.5378, + "step": 3748 + }, + { + "epoch": 0.64, + "learning_rate": 1.8292057179679608e-05, + "loss": 0.5161, + "step": 3749 + }, + { + "epoch": 0.64, + "learning_rate": 1.8291027806992062e-05, + "loss": 0.4616, + "step": 3750 + }, + { + "epoch": 0.64, + "learning_rate": 1.8289998153179722e-05, + "loss": 0.4854, + "step": 3751 + }, + { + "epoch": 0.64, + "learning_rate": 1.82889682182775e-05, + "loss": 0.5101, + "step": 3752 + }, + { + "epoch": 0.64, + "learning_rate": 1.828793800232032e-05, + "loss": 0.4829, + "step": 3753 + }, + { + "epoch": 0.64, + "learning_rate": 1.828690750534311e-05, + "loss": 0.5033, + "step": 3754 + }, + { + "epoch": 0.64, + "learning_rate": 1.8285876727380817e-05, + "loss": 0.5213, + "step": 3755 + }, + { + "epoch": 0.64, + "learning_rate": 1.8284845668468388e-05, + "loss": 0.5137, + "step": 3756 + }, + { + "epoch": 0.64, + "learning_rate": 1.8283814328640782e-05, + "loss": 0.5282, + "step": 3757 + }, + { + "epoch": 0.64, + "learning_rate": 1.8282782707932974e-05, + "loss": 0.5342, + "step": 3758 + }, + { + "epoch": 0.64, + "learning_rate": 1.8281750806379938e-05, + "loss": 0.4756, + "step": 3759 + }, + { + "epoch": 0.64, + "learning_rate": 1.8280718624016662e-05, + "loss": 0.4791, + "step": 3760 + }, + { + "epoch": 0.64, + "learning_rate": 1.827968616087815e-05, + "loss": 0.4944, + "step": 3761 + }, + { + "epoch": 0.64, + "learning_rate": 1.8278653416999402e-05, + "loss": 0.508, + "step": 3762 + }, + { + "epoch": 0.64, + "learning_rate": 1.8277620392415444e-05, + "loss": 0.4912, + "step": 3763 + }, + { + "epoch": 0.64, + "learning_rate": 1.8276587087161298e-05, + "loss": 0.5202, + "step": 3764 + }, + { + "epoch": 0.64, + "learning_rate": 1.8275553501272e-05, + "loss": 0.5579, + "step": 3765 + }, + { + "epoch": 0.64, + "learning_rate": 1.82745196347826e-05, + "loss": 0.5439, + "step": 3766 + }, + { + "epoch": 0.64, + "learning_rate": 1.8273485487728147e-05, + "loss": 0.5161, + "step": 3767 + }, + { + "epoch": 0.64, + "learning_rate": 1.827245106014371e-05, + "loss": 0.5257, + "step": 3768 + }, + { + "epoch": 0.64, + "learning_rate": 1.8271416352064365e-05, + "loss": 0.4553, + "step": 3769 + }, + { + "epoch": 0.64, + "learning_rate": 1.827038136352519e-05, + "loss": 0.4717, + "step": 3770 + }, + { + "epoch": 0.64, + "learning_rate": 1.826934609456129e-05, + "loss": 0.5238, + "step": 3771 + }, + { + "epoch": 0.64, + "learning_rate": 1.8268310545207756e-05, + "loss": 0.5256, + "step": 3772 + }, + { + "epoch": 0.64, + "learning_rate": 1.8267274715499704e-05, + "loss": 0.5039, + "step": 3773 + }, + { + "epoch": 0.64, + "learning_rate": 1.8266238605472258e-05, + "loss": 0.5571, + "step": 3774 + }, + { + "epoch": 0.64, + "learning_rate": 1.8265202215160546e-05, + "loss": 0.4873, + "step": 3775 + }, + { + "epoch": 0.64, + "learning_rate": 1.8264165544599716e-05, + "loss": 0.5301, + "step": 3776 + }, + { + "epoch": 0.64, + "learning_rate": 1.8263128593824907e-05, + "loss": 0.5057, + "step": 3777 + }, + { + "epoch": 0.64, + "learning_rate": 1.826209136287129e-05, + "loss": 0.5278, + "step": 3778 + }, + { + "epoch": 0.64, + "learning_rate": 1.8261053851774033e-05, + "loss": 0.4751, + "step": 3779 + }, + { + "epoch": 0.64, + "learning_rate": 1.8260016060568312e-05, + "loss": 0.4931, + "step": 3780 + }, + { + "epoch": 0.64, + "learning_rate": 1.825897798928931e-05, + "loss": 0.4713, + "step": 3781 + }, + { + "epoch": 0.65, + "learning_rate": 1.8257939637972236e-05, + "loss": 0.478, + "step": 3782 + }, + { + "epoch": 0.65, + "learning_rate": 1.8256901006652293e-05, + "loss": 0.4576, + "step": 3783 + }, + { + "epoch": 0.65, + "learning_rate": 1.8255862095364697e-05, + "loss": 0.5176, + "step": 3784 + }, + { + "epoch": 0.65, + "learning_rate": 1.8254822904144673e-05, + "loss": 0.5151, + "step": 3785 + }, + { + "epoch": 0.65, + "learning_rate": 1.8253783433027456e-05, + "loss": 0.5221, + "step": 3786 + }, + { + "epoch": 0.65, + "learning_rate": 1.8252743682048297e-05, + "loss": 0.5063, + "step": 3787 + }, + { + "epoch": 0.65, + "learning_rate": 1.825170365124245e-05, + "loss": 0.5105, + "step": 3788 + }, + { + "epoch": 0.65, + "learning_rate": 1.8250663340645176e-05, + "loss": 0.4843, + "step": 3789 + }, + { + "epoch": 0.65, + "learning_rate": 1.8249622750291748e-05, + "loss": 0.5411, + "step": 3790 + }, + { + "epoch": 0.65, + "learning_rate": 1.8248581880217457e-05, + "loss": 0.5213, + "step": 3791 + }, + { + "epoch": 0.65, + "learning_rate": 1.824754073045759e-05, + "loss": 0.5704, + "step": 3792 + }, + { + "epoch": 0.65, + "learning_rate": 1.824649930104745e-05, + "loss": 0.522, + "step": 3793 + }, + { + "epoch": 0.65, + "learning_rate": 1.8245457592022348e-05, + "loss": 0.496, + "step": 3794 + }, + { + "epoch": 0.65, + "learning_rate": 1.8244415603417603e-05, + "loss": 0.5285, + "step": 3795 + }, + { + "epoch": 0.65, + "learning_rate": 1.8243373335268554e-05, + "loss": 0.4931, + "step": 3796 + }, + { + "epoch": 0.65, + "learning_rate": 1.8242330787610535e-05, + "loss": 0.4927, + "step": 3797 + }, + { + "epoch": 0.65, + "learning_rate": 1.82412879604789e-05, + "loss": 0.4979, + "step": 3798 + }, + { + "epoch": 0.65, + "learning_rate": 1.8240244853909e-05, + "loss": 0.4763, + "step": 3799 + }, + { + "epoch": 0.65, + "learning_rate": 1.8239201467936215e-05, + "loss": 0.4777, + "step": 3800 + }, + { + "epoch": 0.65, + "learning_rate": 1.8238157802595915e-05, + "loss": 0.5188, + "step": 3801 + }, + { + "epoch": 0.65, + "learning_rate": 1.823711385792349e-05, + "loss": 0.524, + "step": 3802 + }, + { + "epoch": 0.65, + "learning_rate": 1.823606963395434e-05, + "loss": 0.5185, + "step": 3803 + }, + { + "epoch": 0.65, + "learning_rate": 1.8235025130723868e-05, + "loss": 0.4877, + "step": 3804 + }, + { + "epoch": 0.65, + "learning_rate": 1.8233980348267487e-05, + "loss": 0.5245, + "step": 3805 + }, + { + "epoch": 0.65, + "learning_rate": 1.8232935286620634e-05, + "loss": 0.5468, + "step": 3806 + }, + { + "epoch": 0.65, + "learning_rate": 1.823188994581873e-05, + "loss": 0.501, + "step": 3807 + }, + { + "epoch": 0.65, + "learning_rate": 1.823084432589723e-05, + "loss": 0.4983, + "step": 3808 + }, + { + "epoch": 0.65, + "learning_rate": 1.8229798426891582e-05, + "loss": 0.5311, + "step": 3809 + }, + { + "epoch": 0.65, + "learning_rate": 1.8228752248837253e-05, + "loss": 0.5056, + "step": 3810 + }, + { + "epoch": 0.65, + "learning_rate": 1.822770579176971e-05, + "loss": 0.5386, + "step": 3811 + }, + { + "epoch": 0.65, + "learning_rate": 1.822665905572445e-05, + "loss": 0.5395, + "step": 3812 + }, + { + "epoch": 0.65, + "learning_rate": 1.8225612040736945e-05, + "loss": 0.5325, + "step": 3813 + }, + { + "epoch": 0.65, + "learning_rate": 1.822456474684271e-05, + "loss": 0.5146, + "step": 3814 + }, + { + "epoch": 0.65, + "learning_rate": 1.8223517174077247e-05, + "loss": 0.5027, + "step": 3815 + }, + { + "epoch": 0.65, + "learning_rate": 1.8222469322476084e-05, + "loss": 0.5303, + "step": 3816 + }, + { + "epoch": 0.65, + "learning_rate": 1.822142119207475e-05, + "loss": 0.5337, + "step": 3817 + }, + { + "epoch": 0.65, + "learning_rate": 1.8220372782908778e-05, + "loss": 0.4727, + "step": 3818 + }, + { + "epoch": 0.65, + "learning_rate": 1.821932409501372e-05, + "loss": 0.5118, + "step": 3819 + }, + { + "epoch": 0.65, + "learning_rate": 1.8218275128425133e-05, + "loss": 0.5548, + "step": 3820 + }, + { + "epoch": 0.65, + "learning_rate": 1.8217225883178588e-05, + "loss": 0.5385, + "step": 3821 + }, + { + "epoch": 0.65, + "learning_rate": 1.8216176359309656e-05, + "loss": 0.4818, + "step": 3822 + }, + { + "epoch": 0.65, + "learning_rate": 1.821512655685393e-05, + "loss": 0.5141, + "step": 3823 + }, + { + "epoch": 0.65, + "learning_rate": 1.8214076475847e-05, + "loss": 0.4876, + "step": 3824 + }, + { + "epoch": 0.65, + "learning_rate": 1.8213026116324472e-05, + "loss": 0.4784, + "step": 3825 + }, + { + "epoch": 0.65, + "learning_rate": 1.8211975478321964e-05, + "loss": 0.5297, + "step": 3826 + }, + { + "epoch": 0.65, + "learning_rate": 1.82109245618751e-05, + "loss": 0.5188, + "step": 3827 + }, + { + "epoch": 0.65, + "learning_rate": 1.820987336701951e-05, + "loss": 0.5743, + "step": 3828 + }, + { + "epoch": 0.65, + "learning_rate": 1.8208821893790838e-05, + "loss": 0.4718, + "step": 3829 + }, + { + "epoch": 0.65, + "learning_rate": 1.8207770142224736e-05, + "loss": 0.5072, + "step": 3830 + }, + { + "epoch": 0.65, + "learning_rate": 1.820671811235687e-05, + "loss": 0.4983, + "step": 3831 + }, + { + "epoch": 0.65, + "learning_rate": 1.8205665804222908e-05, + "loss": 0.5197, + "step": 3832 + }, + { + "epoch": 0.65, + "learning_rate": 1.8204613217858528e-05, + "loss": 0.4716, + "step": 3833 + }, + { + "epoch": 0.65, + "learning_rate": 1.8203560353299425e-05, + "loss": 0.5256, + "step": 3834 + }, + { + "epoch": 0.65, + "learning_rate": 1.8202507210581298e-05, + "loss": 0.5254, + "step": 3835 + }, + { + "epoch": 0.65, + "learning_rate": 1.8201453789739852e-05, + "loss": 0.5275, + "step": 3836 + }, + { + "epoch": 0.65, + "learning_rate": 1.820040009081081e-05, + "loss": 0.5166, + "step": 3837 + }, + { + "epoch": 0.65, + "learning_rate": 1.8199346113829898e-05, + "loss": 0.5298, + "step": 3838 + }, + { + "epoch": 0.65, + "learning_rate": 1.8198291858832852e-05, + "loss": 0.4936, + "step": 3839 + }, + { + "epoch": 0.65, + "learning_rate": 1.8197237325855425e-05, + "loss": 0.5206, + "step": 3840 + }, + { + "epoch": 0.66, + "learning_rate": 1.8196182514933366e-05, + "loss": 0.5048, + "step": 3841 + }, + { + "epoch": 0.66, + "learning_rate": 1.819512742610244e-05, + "loss": 0.5033, + "step": 3842 + }, + { + "epoch": 0.66, + "learning_rate": 1.8194072059398425e-05, + "loss": 0.5249, + "step": 3843 + }, + { + "epoch": 0.66, + "learning_rate": 1.819301641485711e-05, + "loss": 0.5102, + "step": 3844 + }, + { + "epoch": 0.66, + "learning_rate": 1.8191960492514282e-05, + "loss": 0.5355, + "step": 3845 + }, + { + "epoch": 0.66, + "learning_rate": 1.8190904292405745e-05, + "loss": 0.523, + "step": 3846 + }, + { + "epoch": 0.66, + "learning_rate": 1.8189847814567315e-05, + "loss": 0.5091, + "step": 3847 + }, + { + "epoch": 0.66, + "learning_rate": 1.818879105903481e-05, + "loss": 0.4886, + "step": 3848 + }, + { + "epoch": 0.66, + "learning_rate": 1.8187734025844068e-05, + "loss": 0.5221, + "step": 3849 + }, + { + "epoch": 0.66, + "learning_rate": 1.8186676715030924e-05, + "loss": 0.4868, + "step": 3850 + }, + { + "epoch": 0.66, + "learning_rate": 1.818561912663123e-05, + "loss": 0.5337, + "step": 3851 + }, + { + "epoch": 0.66, + "learning_rate": 1.8184561260680843e-05, + "loss": 0.5431, + "step": 3852 + }, + { + "epoch": 0.66, + "learning_rate": 1.818350311721564e-05, + "loss": 0.5112, + "step": 3853 + }, + { + "epoch": 0.66, + "learning_rate": 1.8182444696271492e-05, + "loss": 0.4897, + "step": 3854 + }, + { + "epoch": 0.66, + "learning_rate": 1.818138599788429e-05, + "loss": 0.5039, + "step": 3855 + }, + { + "epoch": 0.66, + "learning_rate": 1.818032702208993e-05, + "loss": 0.518, + "step": 3856 + }, + { + "epoch": 0.66, + "learning_rate": 1.8179267768924323e-05, + "loss": 0.496, + "step": 3857 + }, + { + "epoch": 0.66, + "learning_rate": 1.817820823842338e-05, + "loss": 0.4729, + "step": 3858 + }, + { + "epoch": 0.66, + "learning_rate": 1.817714843062303e-05, + "loss": 0.5072, + "step": 3859 + }, + { + "epoch": 0.66, + "learning_rate": 1.8176088345559204e-05, + "loss": 0.506, + "step": 3860 + }, + { + "epoch": 0.66, + "learning_rate": 1.817502798326785e-05, + "loss": 0.5637, + "step": 3861 + }, + { + "epoch": 0.66, + "learning_rate": 1.8173967343784922e-05, + "loss": 0.5108, + "step": 3862 + }, + { + "epoch": 0.66, + "learning_rate": 1.817290642714638e-05, + "loss": 0.4926, + "step": 3863 + }, + { + "epoch": 0.66, + "learning_rate": 1.81718452333882e-05, + "loss": 0.522, + "step": 3864 + }, + { + "epoch": 0.66, + "learning_rate": 1.8170783762546363e-05, + "loss": 0.4993, + "step": 3865 + }, + { + "epoch": 0.66, + "learning_rate": 1.816972201465686e-05, + "loss": 0.5141, + "step": 3866 + }, + { + "epoch": 0.66, + "learning_rate": 1.8168659989755694e-05, + "loss": 0.4938, + "step": 3867 + }, + { + "epoch": 0.66, + "learning_rate": 1.8167597687878876e-05, + "loss": 0.4815, + "step": 3868 + }, + { + "epoch": 0.66, + "learning_rate": 1.8166535109062416e-05, + "loss": 0.5249, + "step": 3869 + }, + { + "epoch": 0.66, + "learning_rate": 1.8165472253342355e-05, + "loss": 0.5348, + "step": 3870 + }, + { + "epoch": 0.66, + "learning_rate": 1.816440912075472e-05, + "loss": 0.5074, + "step": 3871 + }, + { + "epoch": 0.66, + "learning_rate": 1.8163345711335575e-05, + "loss": 0.5194, + "step": 3872 + }, + { + "epoch": 0.66, + "learning_rate": 1.816228202512096e-05, + "loss": 0.5167, + "step": 3873 + }, + { + "epoch": 0.66, + "learning_rate": 1.8161218062146947e-05, + "loss": 0.5301, + "step": 3874 + }, + { + "epoch": 0.66, + "learning_rate": 1.816015382244962e-05, + "loss": 0.4905, + "step": 3875 + }, + { + "epoch": 0.66, + "learning_rate": 1.8159089306065052e-05, + "loss": 0.5118, + "step": 3876 + }, + { + "epoch": 0.66, + "learning_rate": 1.8158024513029344e-05, + "loss": 0.5083, + "step": 3877 + }, + { + "epoch": 0.66, + "learning_rate": 1.81569594433786e-05, + "loss": 0.5361, + "step": 3878 + }, + { + "epoch": 0.66, + "learning_rate": 1.8155894097148933e-05, + "loss": 0.5233, + "step": 3879 + }, + { + "epoch": 0.66, + "learning_rate": 1.815482847437647e-05, + "loss": 0.4999, + "step": 3880 + }, + { + "epoch": 0.66, + "learning_rate": 1.8153762575097334e-05, + "loss": 0.5117, + "step": 3881 + }, + { + "epoch": 0.66, + "learning_rate": 1.8152696399347675e-05, + "loss": 0.519, + "step": 3882 + }, + { + "epoch": 0.66, + "learning_rate": 1.8151629947163636e-05, + "loss": 0.548, + "step": 3883 + }, + { + "epoch": 0.66, + "learning_rate": 1.8150563218581386e-05, + "loss": 0.5034, + "step": 3884 + }, + { + "epoch": 0.66, + "learning_rate": 1.8149496213637087e-05, + "loss": 0.4856, + "step": 3885 + }, + { + "epoch": 0.66, + "learning_rate": 1.8148428932366923e-05, + "loss": 0.5367, + "step": 3886 + }, + { + "epoch": 0.66, + "learning_rate": 1.8147361374807083e-05, + "loss": 0.4977, + "step": 3887 + }, + { + "epoch": 0.66, + "learning_rate": 1.814629354099376e-05, + "loss": 0.4933, + "step": 3888 + }, + { + "epoch": 0.66, + "learning_rate": 1.8145225430963166e-05, + "loss": 0.5405, + "step": 3889 + }, + { + "epoch": 0.66, + "learning_rate": 1.8144157044751513e-05, + "loss": 0.4753, + "step": 3890 + }, + { + "epoch": 0.66, + "learning_rate": 1.814308838239503e-05, + "loss": 0.5009, + "step": 3891 + }, + { + "epoch": 0.66, + "learning_rate": 1.8142019443929953e-05, + "loss": 0.529, + "step": 3892 + }, + { + "epoch": 0.66, + "learning_rate": 1.8140950229392525e-05, + "loss": 0.565, + "step": 3893 + }, + { + "epoch": 0.66, + "learning_rate": 1.8139880738819e-05, + "loss": 0.5716, + "step": 3894 + }, + { + "epoch": 0.66, + "learning_rate": 1.813881097224564e-05, + "loss": 0.526, + "step": 3895 + }, + { + "epoch": 0.66, + "learning_rate": 1.813774092970872e-05, + "loss": 0.51, + "step": 3896 + }, + { + "epoch": 0.66, + "learning_rate": 1.813667061124452e-05, + "loss": 0.5286, + "step": 3897 + }, + { + "epoch": 0.66, + "learning_rate": 1.8135600016889335e-05, + "loss": 0.4968, + "step": 3898 + }, + { + "epoch": 0.66, + "learning_rate": 1.8134529146679462e-05, + "loss": 0.5088, + "step": 3899 + }, + { + "epoch": 0.67, + "learning_rate": 1.8133458000651213e-05, + "loss": 0.4553, + "step": 3900 + }, + { + "epoch": 0.67, + "learning_rate": 1.8132386578840904e-05, + "loss": 0.5219, + "step": 3901 + }, + { + "epoch": 0.67, + "learning_rate": 1.8131314881284875e-05, + "loss": 0.5648, + "step": 3902 + }, + { + "epoch": 0.67, + "learning_rate": 1.8130242908019448e-05, + "loss": 0.4746, + "step": 3903 + }, + { + "epoch": 0.67, + "learning_rate": 1.8129170659080982e-05, + "loss": 0.4871, + "step": 3904 + }, + { + "epoch": 0.67, + "learning_rate": 1.8128098134505828e-05, + "loss": 0.5326, + "step": 3905 + }, + { + "epoch": 0.67, + "learning_rate": 1.8127025334330358e-05, + "loss": 0.5055, + "step": 3906 + }, + { + "epoch": 0.67, + "learning_rate": 1.8125952258590944e-05, + "loss": 0.4986, + "step": 3907 + }, + { + "epoch": 0.67, + "learning_rate": 1.8124878907323966e-05, + "loss": 0.5162, + "step": 3908 + }, + { + "epoch": 0.67, + "learning_rate": 1.8123805280565828e-05, + "loss": 0.5498, + "step": 3909 + }, + { + "epoch": 0.67, + "learning_rate": 1.8122731378352924e-05, + "loss": 0.5367, + "step": 3910 + }, + { + "epoch": 0.67, + "learning_rate": 1.8121657200721677e-05, + "loss": 0.4797, + "step": 3911 + }, + { + "epoch": 0.67, + "learning_rate": 1.8120582747708503e-05, + "loss": 0.5202, + "step": 3912 + }, + { + "epoch": 0.67, + "learning_rate": 1.811950801934983e-05, + "loss": 0.4922, + "step": 3913 + }, + { + "epoch": 0.67, + "learning_rate": 1.811843301568211e-05, + "loss": 0.4888, + "step": 3914 + }, + { + "epoch": 0.67, + "learning_rate": 1.8117357736741783e-05, + "loss": 0.496, + "step": 3915 + }, + { + "epoch": 0.67, + "learning_rate": 1.8116282182565313e-05, + "loss": 0.5016, + "step": 3916 + }, + { + "epoch": 0.67, + "learning_rate": 1.811520635318917e-05, + "loss": 0.4928, + "step": 3917 + }, + { + "epoch": 0.67, + "learning_rate": 1.8114130248649827e-05, + "loss": 0.5257, + "step": 3918 + }, + { + "epoch": 0.67, + "learning_rate": 1.8113053868983778e-05, + "loss": 0.511, + "step": 3919 + }, + { + "epoch": 0.67, + "learning_rate": 1.8111977214227513e-05, + "loss": 0.4746, + "step": 3920 + }, + { + "epoch": 0.67, + "learning_rate": 1.811090028441755e-05, + "loss": 0.5063, + "step": 3921 + }, + { + "epoch": 0.67, + "learning_rate": 1.810982307959039e-05, + "loss": 0.4894, + "step": 3922 + }, + { + "epoch": 0.67, + "learning_rate": 1.8108745599782568e-05, + "loss": 0.4688, + "step": 3923 + }, + { + "epoch": 0.67, + "learning_rate": 1.8107667845030615e-05, + "loss": 0.5326, + "step": 3924 + }, + { + "epoch": 0.67, + "learning_rate": 1.8106589815371073e-05, + "loss": 0.4959, + "step": 3925 + }, + { + "epoch": 0.67, + "learning_rate": 1.81055115108405e-05, + "loss": 0.4602, + "step": 3926 + }, + { + "epoch": 0.67, + "learning_rate": 1.8104432931475454e-05, + "loss": 0.5171, + "step": 3927 + }, + { + "epoch": 0.67, + "learning_rate": 1.8103354077312506e-05, + "loss": 0.5255, + "step": 3928 + }, + { + "epoch": 0.67, + "learning_rate": 1.8102274948388235e-05, + "loss": 0.4822, + "step": 3929 + }, + { + "epoch": 0.67, + "learning_rate": 1.8101195544739237e-05, + "loss": 0.4982, + "step": 3930 + }, + { + "epoch": 0.67, + "learning_rate": 1.8100115866402108e-05, + "loss": 0.569, + "step": 3931 + }, + { + "epoch": 0.67, + "learning_rate": 1.8099035913413455e-05, + "loss": 0.4873, + "step": 3932 + }, + { + "epoch": 0.67, + "learning_rate": 1.80979556858099e-05, + "loss": 0.5476, + "step": 3933 + }, + { + "epoch": 0.67, + "learning_rate": 1.8096875183628074e-05, + "loss": 0.52, + "step": 3934 + }, + { + "epoch": 0.67, + "learning_rate": 1.8095794406904606e-05, + "loss": 0.4613, + "step": 3935 + }, + { + "epoch": 0.67, + "learning_rate": 1.809471335567614e-05, + "loss": 0.5294, + "step": 3936 + }, + { + "epoch": 0.67, + "learning_rate": 1.809363202997934e-05, + "loss": 0.4987, + "step": 3937 + }, + { + "epoch": 0.67, + "learning_rate": 1.8092550429850867e-05, + "loss": 0.4951, + "step": 3938 + }, + { + "epoch": 0.67, + "learning_rate": 1.809146855532739e-05, + "loss": 0.5114, + "step": 3939 + }, + { + "epoch": 0.67, + "learning_rate": 1.8090386406445603e-05, + "loss": 0.4679, + "step": 3940 + }, + { + "epoch": 0.67, + "learning_rate": 1.808930398324219e-05, + "loss": 0.5296, + "step": 3941 + }, + { + "epoch": 0.67, + "learning_rate": 1.8088221285753854e-05, + "loss": 0.5254, + "step": 3942 + }, + { + "epoch": 0.67, + "learning_rate": 1.8087138314017308e-05, + "loss": 0.4956, + "step": 3943 + }, + { + "epoch": 0.67, + "learning_rate": 1.808605506806927e-05, + "loss": 0.4889, + "step": 3944 + }, + { + "epoch": 0.67, + "learning_rate": 1.8084971547946475e-05, + "loss": 0.4905, + "step": 3945 + }, + { + "epoch": 0.67, + "learning_rate": 1.8083887753685653e-05, + "loss": 0.4785, + "step": 3946 + }, + { + "epoch": 0.67, + "learning_rate": 1.8082803685323564e-05, + "loss": 0.4789, + "step": 3947 + }, + { + "epoch": 0.67, + "learning_rate": 1.8081719342896954e-05, + "loss": 0.4901, + "step": 3948 + }, + { + "epoch": 0.67, + "learning_rate": 1.80806347264426e-05, + "loss": 0.5174, + "step": 3949 + }, + { + "epoch": 0.67, + "learning_rate": 1.807954983599727e-05, + "loss": 0.5192, + "step": 3950 + }, + { + "epoch": 0.67, + "learning_rate": 1.8078464671597755e-05, + "loss": 0.4715, + "step": 3951 + }, + { + "epoch": 0.67, + "learning_rate": 1.8077379233280846e-05, + "loss": 0.4979, + "step": 3952 + }, + { + "epoch": 0.67, + "learning_rate": 1.807629352108335e-05, + "loss": 0.4962, + "step": 3953 + }, + { + "epoch": 0.67, + "learning_rate": 1.8075207535042078e-05, + "loss": 0.5117, + "step": 3954 + }, + { + "epoch": 0.67, + "learning_rate": 1.8074121275193857e-05, + "loss": 0.5123, + "step": 3955 + }, + { + "epoch": 0.67, + "learning_rate": 1.8073034741575513e-05, + "loss": 0.4998, + "step": 3956 + }, + { + "epoch": 0.67, + "learning_rate": 1.8071947934223894e-05, + "loss": 0.5097, + "step": 3957 + }, + { + "epoch": 0.68, + "learning_rate": 1.807086085317584e-05, + "loss": 0.5043, + "step": 3958 + }, + { + "epoch": 0.68, + "learning_rate": 1.8069773498468224e-05, + "loss": 0.5086, + "step": 3959 + }, + { + "epoch": 0.68, + "learning_rate": 1.8068685870137906e-05, + "loss": 0.5118, + "step": 3960 + }, + { + "epoch": 0.68, + "learning_rate": 1.8067597968221764e-05, + "loss": 0.4602, + "step": 3961 + }, + { + "epoch": 0.68, + "learning_rate": 1.8066509792756694e-05, + "loss": 0.5597, + "step": 3962 + }, + { + "epoch": 0.68, + "learning_rate": 1.8065421343779583e-05, + "loss": 0.5523, + "step": 3963 + }, + { + "epoch": 0.68, + "learning_rate": 1.806433262132734e-05, + "loss": 0.4805, + "step": 3964 + }, + { + "epoch": 0.68, + "learning_rate": 1.8063243625436885e-05, + "loss": 0.5575, + "step": 3965 + }, + { + "epoch": 0.68, + "learning_rate": 1.806215435614514e-05, + "loss": 0.5242, + "step": 3966 + }, + { + "epoch": 0.68, + "learning_rate": 1.8061064813489037e-05, + "loss": 0.5003, + "step": 3967 + }, + { + "epoch": 0.68, + "learning_rate": 1.805997499750552e-05, + "loss": 0.5464, + "step": 3968 + }, + { + "epoch": 0.68, + "learning_rate": 1.8058884908231546e-05, + "loss": 0.5109, + "step": 3969 + }, + { + "epoch": 0.68, + "learning_rate": 1.805779454570407e-05, + "loss": 0.4902, + "step": 3970 + }, + { + "epoch": 0.68, + "learning_rate": 1.8056703909960066e-05, + "loss": 0.5223, + "step": 3971 + }, + { + "epoch": 0.68, + "learning_rate": 1.8055613001036513e-05, + "loss": 0.554, + "step": 3972 + }, + { + "epoch": 0.68, + "learning_rate": 1.8054521818970406e-05, + "loss": 0.4866, + "step": 3973 + }, + { + "epoch": 0.68, + "learning_rate": 1.8053430363798738e-05, + "loss": 0.5329, + "step": 3974 + }, + { + "epoch": 0.68, + "learning_rate": 1.8052338635558514e-05, + "loss": 0.5099, + "step": 3975 + }, + { + "epoch": 0.68, + "learning_rate": 1.8051246634286762e-05, + "loss": 0.5078, + "step": 3976 + }, + { + "epoch": 0.68, + "learning_rate": 1.80501543600205e-05, + "loss": 0.4544, + "step": 3977 + }, + { + "epoch": 0.68, + "learning_rate": 1.804906181279677e-05, + "loss": 0.5126, + "step": 3978 + }, + { + "epoch": 0.68, + "learning_rate": 1.8047968992652612e-05, + "loss": 0.5205, + "step": 3979 + }, + { + "epoch": 0.68, + "learning_rate": 1.8046875899625084e-05, + "loss": 0.5042, + "step": 3980 + }, + { + "epoch": 0.68, + "learning_rate": 1.8045782533751242e-05, + "loss": 0.5146, + "step": 3981 + }, + { + "epoch": 0.68, + "learning_rate": 1.804468889506817e-05, + "loss": 0.492, + "step": 3982 + }, + { + "epoch": 0.68, + "learning_rate": 1.8043594983612946e-05, + "loss": 0.5148, + "step": 3983 + }, + { + "epoch": 0.68, + "learning_rate": 1.8042500799422654e-05, + "loss": 0.5015, + "step": 3984 + }, + { + "epoch": 0.68, + "learning_rate": 1.8041406342534404e-05, + "loss": 0.5235, + "step": 3985 + }, + { + "epoch": 0.68, + "learning_rate": 1.8040311612985304e-05, + "loss": 0.4775, + "step": 3986 + }, + { + "epoch": 0.68, + "learning_rate": 1.8039216610812472e-05, + "loss": 0.5176, + "step": 3987 + }, + { + "epoch": 0.68, + "learning_rate": 1.8038121336053037e-05, + "loss": 0.4784, + "step": 3988 + }, + { + "epoch": 0.68, + "learning_rate": 1.803702578874413e-05, + "loss": 0.5435, + "step": 3989 + }, + { + "epoch": 0.68, + "learning_rate": 1.803592996892291e-05, + "loss": 0.4502, + "step": 3990 + }, + { + "epoch": 0.68, + "learning_rate": 1.8034833876626522e-05, + "loss": 0.4743, + "step": 3991 + }, + { + "epoch": 0.68, + "learning_rate": 1.803373751189214e-05, + "loss": 0.5283, + "step": 3992 + }, + { + "epoch": 0.68, + "learning_rate": 1.8032640874756932e-05, + "loss": 0.5452, + "step": 3993 + }, + { + "epoch": 0.68, + "learning_rate": 1.8031543965258085e-05, + "loss": 0.5063, + "step": 3994 + }, + { + "epoch": 0.68, + "learning_rate": 1.8030446783432793e-05, + "loss": 0.5553, + "step": 3995 + }, + { + "epoch": 0.68, + "learning_rate": 1.8029349329318252e-05, + "loss": 0.5112, + "step": 3996 + }, + { + "epoch": 0.68, + "learning_rate": 1.8028251602951683e-05, + "loss": 0.5415, + "step": 3997 + }, + { + "epoch": 0.68, + "learning_rate": 1.80271536043703e-05, + "loss": 0.5188, + "step": 3998 + }, + { + "epoch": 0.68, + "learning_rate": 1.8026055333611336e-05, + "loss": 0.5292, + "step": 3999 + }, + { + "epoch": 0.68, + "learning_rate": 1.802495679071203e-05, + "loss": 0.5216, + "step": 4000 + }, + { + "epoch": 0.68, + "learning_rate": 1.8023857975709626e-05, + "loss": 0.5155, + "step": 4001 + }, + { + "epoch": 0.68, + "learning_rate": 1.802275888864139e-05, + "loss": 0.4463, + "step": 4002 + }, + { + "epoch": 0.68, + "learning_rate": 1.8021659529544577e-05, + "loss": 0.513, + "step": 4003 + }, + { + "epoch": 0.68, + "learning_rate": 1.8020559898456476e-05, + "loss": 0.4973, + "step": 4004 + }, + { + "epoch": 0.68, + "learning_rate": 1.8019459995414363e-05, + "loss": 0.4977, + "step": 4005 + }, + { + "epoch": 0.68, + "learning_rate": 1.8018359820455535e-05, + "loss": 0.5106, + "step": 4006 + }, + { + "epoch": 0.68, + "learning_rate": 1.80172593736173e-05, + "loss": 0.5277, + "step": 4007 + }, + { + "epoch": 0.68, + "learning_rate": 1.8016158654936965e-05, + "loss": 0.5194, + "step": 4008 + }, + { + "epoch": 0.68, + "learning_rate": 1.8015057664451857e-05, + "loss": 0.519, + "step": 4009 + }, + { + "epoch": 0.68, + "learning_rate": 1.8013956402199304e-05, + "loss": 0.5466, + "step": 4010 + }, + { + "epoch": 0.68, + "learning_rate": 1.801285486821665e-05, + "loss": 0.5254, + "step": 4011 + }, + { + "epoch": 0.68, + "learning_rate": 1.801175306254124e-05, + "loss": 0.513, + "step": 4012 + }, + { + "epoch": 0.68, + "learning_rate": 1.8010650985210435e-05, + "loss": 0.5043, + "step": 4013 + }, + { + "epoch": 0.68, + "learning_rate": 1.8009548636261608e-05, + "loss": 0.4934, + "step": 4014 + }, + { + "epoch": 0.68, + "learning_rate": 1.8008446015732127e-05, + "loss": 0.5131, + "step": 4015 + }, + { + "epoch": 0.68, + "learning_rate": 1.8007343123659387e-05, + "loss": 0.5514, + "step": 4016 + }, + { + "epoch": 0.69, + "learning_rate": 1.8006239960080784e-05, + "loss": 0.4817, + "step": 4017 + }, + { + "epoch": 0.69, + "learning_rate": 1.8005136525033714e-05, + "loss": 0.4854, + "step": 4018 + }, + { + "epoch": 0.69, + "learning_rate": 1.8004032818555605e-05, + "loss": 0.4966, + "step": 4019 + }, + { + "epoch": 0.69, + "learning_rate": 1.800292884068387e-05, + "loss": 0.4683, + "step": 4020 + }, + { + "epoch": 0.69, + "learning_rate": 1.800182459145594e-05, + "loss": 0.5504, + "step": 4021 + }, + { + "epoch": 0.69, + "learning_rate": 1.8000720070909265e-05, + "loss": 0.493, + "step": 4022 + }, + { + "epoch": 0.69, + "learning_rate": 1.799961527908129e-05, + "loss": 0.5163, + "step": 4023 + }, + { + "epoch": 0.69, + "learning_rate": 1.7998510216009483e-05, + "loss": 0.5291, + "step": 4024 + }, + { + "epoch": 0.69, + "learning_rate": 1.7997404881731305e-05, + "loss": 0.547, + "step": 4025 + }, + { + "epoch": 0.69, + "learning_rate": 1.799629927628424e-05, + "loss": 0.4984, + "step": 4026 + }, + { + "epoch": 0.69, + "learning_rate": 1.7995193399705772e-05, + "loss": 0.5194, + "step": 4027 + }, + { + "epoch": 0.69, + "learning_rate": 1.79940872520334e-05, + "loss": 0.5013, + "step": 4028 + }, + { + "epoch": 0.69, + "learning_rate": 1.799298083330463e-05, + "loss": 0.5153, + "step": 4029 + }, + { + "epoch": 0.69, + "learning_rate": 1.799187414355698e-05, + "loss": 0.4636, + "step": 4030 + }, + { + "epoch": 0.69, + "learning_rate": 1.799076718282797e-05, + "loss": 0.5082, + "step": 4031 + }, + { + "epoch": 0.69, + "learning_rate": 1.7989659951155136e-05, + "loss": 0.5112, + "step": 4032 + }, + { + "epoch": 0.69, + "learning_rate": 1.7988552448576026e-05, + "loss": 0.5084, + "step": 4033 + }, + { + "epoch": 0.69, + "learning_rate": 1.798744467512818e-05, + "loss": 0.4995, + "step": 4034 + }, + { + "epoch": 0.69, + "learning_rate": 1.7986336630849173e-05, + "loss": 0.5023, + "step": 4035 + }, + { + "epoch": 0.69, + "learning_rate": 1.7985228315776568e-05, + "loss": 0.5206, + "step": 4036 + }, + { + "epoch": 0.69, + "learning_rate": 1.7984119729947944e-05, + "loss": 0.4983, + "step": 4037 + }, + { + "epoch": 0.69, + "learning_rate": 1.7983010873400894e-05, + "loss": 0.5267, + "step": 4038 + }, + { + "epoch": 0.69, + "learning_rate": 1.7981901746173015e-05, + "loss": 0.4787, + "step": 4039 + }, + { + "epoch": 0.69, + "learning_rate": 1.7980792348301914e-05, + "loss": 0.5163, + "step": 4040 + }, + { + "epoch": 0.69, + "learning_rate": 1.7979682679825202e-05, + "loss": 0.4981, + "step": 4041 + }, + { + "epoch": 0.69, + "learning_rate": 1.7978572740780515e-05, + "loss": 0.5149, + "step": 4042 + }, + { + "epoch": 0.69, + "learning_rate": 1.7977462531205483e-05, + "loss": 0.5098, + "step": 4043 + }, + { + "epoch": 0.69, + "learning_rate": 1.7976352051137746e-05, + "loss": 0.5038, + "step": 4044 + }, + { + "epoch": 0.69, + "learning_rate": 1.7975241300614963e-05, + "loss": 0.4899, + "step": 4045 + }, + { + "epoch": 0.69, + "learning_rate": 1.7974130279674792e-05, + "loss": 0.5065, + "step": 4046 + }, + { + "epoch": 0.69, + "learning_rate": 1.797301898835491e-05, + "loss": 0.5355, + "step": 4047 + }, + { + "epoch": 0.69, + "learning_rate": 1.7971907426692992e-05, + "loss": 0.5137, + "step": 4048 + }, + { + "epoch": 0.69, + "learning_rate": 1.797079559472673e-05, + "loss": 0.4689, + "step": 4049 + }, + { + "epoch": 0.69, + "learning_rate": 1.7969683492493824e-05, + "loss": 0.4792, + "step": 4050 + }, + { + "epoch": 0.69, + "learning_rate": 1.7968571120031982e-05, + "loss": 0.4956, + "step": 4051 + }, + { + "epoch": 0.69, + "learning_rate": 1.7967458477378918e-05, + "loss": 0.5406, + "step": 4052 + }, + { + "epoch": 0.69, + "learning_rate": 1.796634556457236e-05, + "loss": 0.5312, + "step": 4053 + }, + { + "epoch": 0.69, + "learning_rate": 1.7965232381650047e-05, + "loss": 0.527, + "step": 4054 + }, + { + "epoch": 0.69, + "learning_rate": 1.7964118928649724e-05, + "loss": 0.5016, + "step": 4055 + }, + { + "epoch": 0.69, + "learning_rate": 1.796300520560914e-05, + "loss": 0.4931, + "step": 4056 + }, + { + "epoch": 0.69, + "learning_rate": 1.7961891212566063e-05, + "loss": 0.5478, + "step": 4057 + }, + { + "epoch": 0.69, + "learning_rate": 1.796077694955826e-05, + "loss": 0.5089, + "step": 4058 + }, + { + "epoch": 0.69, + "learning_rate": 1.7959662416623516e-05, + "loss": 0.5322, + "step": 4059 + }, + { + "epoch": 0.69, + "learning_rate": 1.7958547613799623e-05, + "loss": 0.4937, + "step": 4060 + }, + { + "epoch": 0.69, + "learning_rate": 1.7957432541124375e-05, + "loss": 0.4889, + "step": 4061 + }, + { + "epoch": 0.69, + "learning_rate": 1.795631719863559e-05, + "loss": 0.5152, + "step": 4062 + }, + { + "epoch": 0.69, + "learning_rate": 1.7955201586371077e-05, + "loss": 0.5737, + "step": 4063 + }, + { + "epoch": 0.69, + "learning_rate": 1.7954085704368666e-05, + "loss": 0.5202, + "step": 4064 + }, + { + "epoch": 0.69, + "learning_rate": 1.7952969552666194e-05, + "loss": 0.4854, + "step": 4065 + }, + { + "epoch": 0.69, + "learning_rate": 1.795185313130151e-05, + "loss": 0.5295, + "step": 4066 + }, + { + "epoch": 0.69, + "learning_rate": 1.7950736440312462e-05, + "loss": 0.5156, + "step": 4067 + }, + { + "epoch": 0.69, + "learning_rate": 1.794961947973692e-05, + "loss": 0.5302, + "step": 4068 + }, + { + "epoch": 0.69, + "learning_rate": 1.794850224961275e-05, + "loss": 0.5028, + "step": 4069 + }, + { + "epoch": 0.69, + "learning_rate": 1.7947384749977842e-05, + "loss": 0.5026, + "step": 4070 + }, + { + "epoch": 0.69, + "learning_rate": 1.794626698087008e-05, + "loss": 0.487, + "step": 4071 + }, + { + "epoch": 0.69, + "learning_rate": 1.794514894232737e-05, + "loss": 0.5476, + "step": 4072 + }, + { + "epoch": 0.69, + "learning_rate": 1.7944030634387618e-05, + "loss": 0.5017, + "step": 4073 + }, + { + "epoch": 0.69, + "learning_rate": 1.7942912057088743e-05, + "loss": 0.5116, + "step": 4074 + }, + { + "epoch": 0.69, + "learning_rate": 1.7941793210468676e-05, + "loss": 0.4893, + "step": 4075 + }, + { + "epoch": 0.7, + "learning_rate": 1.7940674094565348e-05, + "loss": 0.4679, + "step": 4076 + }, + { + "epoch": 0.7, + "learning_rate": 1.793955470941671e-05, + "loss": 0.5176, + "step": 4077 + }, + { + "epoch": 0.7, + "learning_rate": 1.7938435055060716e-05, + "loss": 0.4778, + "step": 4078 + }, + { + "epoch": 0.7, + "learning_rate": 1.793731513153533e-05, + "loss": 0.5199, + "step": 4079 + }, + { + "epoch": 0.7, + "learning_rate": 1.7936194938878522e-05, + "loss": 0.5117, + "step": 4080 + }, + { + "epoch": 0.7, + "learning_rate": 1.7935074477128275e-05, + "loss": 0.5424, + "step": 4081 + }, + { + "epoch": 0.7, + "learning_rate": 1.7933953746322586e-05, + "loss": 0.5264, + "step": 4082 + }, + { + "epoch": 0.7, + "learning_rate": 1.7932832746499453e-05, + "loss": 0.5079, + "step": 4083 + }, + { + "epoch": 0.7, + "learning_rate": 1.793171147769689e-05, + "loss": 0.4999, + "step": 4084 + }, + { + "epoch": 0.7, + "learning_rate": 1.7930589939952906e-05, + "loss": 0.4996, + "step": 4085 + }, + { + "epoch": 0.7, + "learning_rate": 1.7929468133305534e-05, + "loss": 0.5026, + "step": 4086 + }, + { + "epoch": 0.7, + "learning_rate": 1.7928346057792812e-05, + "loss": 0.545, + "step": 4087 + }, + { + "epoch": 0.7, + "learning_rate": 1.792722371345279e-05, + "loss": 0.4926, + "step": 4088 + }, + { + "epoch": 0.7, + "learning_rate": 1.7926101100323518e-05, + "loss": 0.5595, + "step": 4089 + }, + { + "epoch": 0.7, + "learning_rate": 1.792497821844306e-05, + "loss": 0.5177, + "step": 4090 + }, + { + "epoch": 0.7, + "learning_rate": 1.7923855067849497e-05, + "loss": 0.5459, + "step": 4091 + }, + { + "epoch": 0.7, + "learning_rate": 1.7922731648580902e-05, + "loss": 0.496, + "step": 4092 + }, + { + "epoch": 0.7, + "learning_rate": 1.7921607960675374e-05, + "loss": 0.534, + "step": 4093 + }, + { + "epoch": 0.7, + "learning_rate": 1.792048400417101e-05, + "loss": 0.5024, + "step": 4094 + }, + { + "epoch": 0.7, + "learning_rate": 1.7919359779105924e-05, + "loss": 0.4809, + "step": 4095 + }, + { + "epoch": 0.7, + "learning_rate": 1.7918235285518228e-05, + "loss": 0.5223, + "step": 4096 + }, + { + "epoch": 0.7, + "learning_rate": 1.791711052344606e-05, + "loss": 0.5002, + "step": 4097 + }, + { + "epoch": 0.7, + "learning_rate": 1.791598549292755e-05, + "loss": 0.5083, + "step": 4098 + }, + { + "epoch": 0.7, + "learning_rate": 1.7914860194000853e-05, + "loss": 0.4641, + "step": 4099 + }, + { + "epoch": 0.7, + "learning_rate": 1.791373462670411e-05, + "loss": 0.5153, + "step": 4100 + }, + { + "epoch": 0.7, + "learning_rate": 1.79126087910755e-05, + "loss": 0.5348, + "step": 4101 + }, + { + "epoch": 0.7, + "learning_rate": 1.7911482687153192e-05, + "loss": 0.5551, + "step": 4102 + }, + { + "epoch": 0.7, + "learning_rate": 1.7910356314975365e-05, + "loss": 0.5156, + "step": 4103 + }, + { + "epoch": 0.7, + "learning_rate": 1.7909229674580218e-05, + "loss": 0.5209, + "step": 4104 + }, + { + "epoch": 0.7, + "learning_rate": 1.7908102766005946e-05, + "loss": 0.518, + "step": 4105 + }, + { + "epoch": 0.7, + "learning_rate": 1.7906975589290764e-05, + "loss": 0.504, + "step": 4106 + }, + { + "epoch": 0.7, + "learning_rate": 1.790584814447289e-05, + "loss": 0.5296, + "step": 4107 + }, + { + "epoch": 0.7, + "learning_rate": 1.7904720431590547e-05, + "loss": 0.5038, + "step": 4108 + }, + { + "epoch": 0.7, + "learning_rate": 1.790359245068198e-05, + "loss": 0.5139, + "step": 4109 + }, + { + "epoch": 0.7, + "learning_rate": 1.790246420178543e-05, + "loss": 0.5112, + "step": 4110 + }, + { + "epoch": 0.7, + "learning_rate": 1.790133568493916e-05, + "loss": 0.4623, + "step": 4111 + }, + { + "epoch": 0.7, + "learning_rate": 1.7900206900181424e-05, + "loss": 0.4715, + "step": 4112 + }, + { + "epoch": 0.7, + "learning_rate": 1.7899077847550505e-05, + "loss": 0.5187, + "step": 4113 + }, + { + "epoch": 0.7, + "learning_rate": 1.7897948527084684e-05, + "loss": 0.5012, + "step": 4114 + }, + { + "epoch": 0.7, + "learning_rate": 1.789681893882225e-05, + "loss": 0.4953, + "step": 4115 + }, + { + "epoch": 0.7, + "learning_rate": 1.7895689082801505e-05, + "loss": 0.4882, + "step": 4116 + }, + { + "epoch": 0.7, + "learning_rate": 1.789455895906076e-05, + "loss": 0.4896, + "step": 4117 + }, + { + "epoch": 0.7, + "learning_rate": 1.7893428567638334e-05, + "loss": 0.4886, + "step": 4118 + }, + { + "epoch": 0.7, + "learning_rate": 1.7892297908572557e-05, + "loss": 0.4638, + "step": 4119 + }, + { + "epoch": 0.7, + "learning_rate": 1.7891166981901764e-05, + "loss": 0.5302, + "step": 4120 + }, + { + "epoch": 0.7, + "learning_rate": 1.7890035787664302e-05, + "loss": 0.5157, + "step": 4121 + }, + { + "epoch": 0.7, + "learning_rate": 1.7888904325898527e-05, + "loss": 0.5088, + "step": 4122 + }, + { + "epoch": 0.7, + "learning_rate": 1.7887772596642807e-05, + "loss": 0.5044, + "step": 4123 + }, + { + "epoch": 0.7, + "learning_rate": 1.7886640599935505e-05, + "loss": 0.4429, + "step": 4124 + }, + { + "epoch": 0.7, + "learning_rate": 1.7885508335815013e-05, + "loss": 0.5471, + "step": 4125 + }, + { + "epoch": 0.7, + "learning_rate": 1.788437580431972e-05, + "loss": 0.5088, + "step": 4126 + }, + { + "epoch": 0.7, + "learning_rate": 1.788324300548803e-05, + "loss": 0.4951, + "step": 4127 + }, + { + "epoch": 0.7, + "learning_rate": 1.7882109939358353e-05, + "loss": 0.5582, + "step": 4128 + }, + { + "epoch": 0.7, + "learning_rate": 1.78809766059691e-05, + "loss": 0.4977, + "step": 4129 + }, + { + "epoch": 0.7, + "learning_rate": 1.787984300535871e-05, + "loss": 0.4933, + "step": 4130 + }, + { + "epoch": 0.7, + "learning_rate": 1.7878709137565608e-05, + "loss": 0.5372, + "step": 4131 + }, + { + "epoch": 0.7, + "learning_rate": 1.787757500262825e-05, + "loss": 0.4896, + "step": 4132 + }, + { + "epoch": 0.7, + "learning_rate": 1.787644060058509e-05, + "loss": 0.5145, + "step": 4133 + }, + { + "epoch": 0.71, + "learning_rate": 1.787530593147459e-05, + "loss": 0.5005, + "step": 4134 + }, + { + "epoch": 0.71, + "learning_rate": 1.787417099533522e-05, + "loss": 0.5334, + "step": 4135 + }, + { + "epoch": 0.71, + "learning_rate": 1.7873035792205466e-05, + "loss": 0.5348, + "step": 4136 + }, + { + "epoch": 0.71, + "learning_rate": 1.7871900322123822e-05, + "loss": 0.5401, + "step": 4137 + }, + { + "epoch": 0.71, + "learning_rate": 1.7870764585128784e-05, + "loss": 0.5001, + "step": 4138 + }, + { + "epoch": 0.71, + "learning_rate": 1.7869628581258863e-05, + "loss": 0.4793, + "step": 4139 + }, + { + "epoch": 0.71, + "learning_rate": 1.786849231055258e-05, + "loss": 0.5505, + "step": 4140 + }, + { + "epoch": 0.71, + "learning_rate": 1.7867355773048463e-05, + "loss": 0.4882, + "step": 4141 + }, + { + "epoch": 0.71, + "learning_rate": 1.7866218968785044e-05, + "loss": 0.5313, + "step": 4142 + }, + { + "epoch": 0.71, + "learning_rate": 1.786508189780087e-05, + "loss": 0.4928, + "step": 4143 + }, + { + "epoch": 0.71, + "learning_rate": 1.78639445601345e-05, + "loss": 0.4873, + "step": 4144 + }, + { + "epoch": 0.71, + "learning_rate": 1.786280695582449e-05, + "loss": 0.5321, + "step": 4145 + }, + { + "epoch": 0.71, + "learning_rate": 1.7861669084909426e-05, + "loss": 0.5004, + "step": 4146 + }, + { + "epoch": 0.71, + "learning_rate": 1.7860530947427878e-05, + "loss": 0.5391, + "step": 4147 + }, + { + "epoch": 0.71, + "learning_rate": 1.7859392543418438e-05, + "loss": 0.4983, + "step": 4148 + }, + { + "epoch": 0.71, + "learning_rate": 1.785825387291971e-05, + "loss": 0.4907, + "step": 4149 + }, + { + "epoch": 0.71, + "learning_rate": 1.7857114935970303e-05, + "loss": 0.5315, + "step": 4150 + }, + { + "epoch": 0.71, + "learning_rate": 1.7855975732608833e-05, + "loss": 0.4981, + "step": 4151 + }, + { + "epoch": 0.71, + "learning_rate": 1.7854836262873926e-05, + "loss": 0.5197, + "step": 4152 + }, + { + "epoch": 0.71, + "learning_rate": 1.785369652680422e-05, + "loss": 0.5184, + "step": 4153 + }, + { + "epoch": 0.71, + "learning_rate": 1.785255652443836e-05, + "loss": 0.5299, + "step": 4154 + }, + { + "epoch": 0.71, + "learning_rate": 1.7851416255815006e-05, + "loss": 0.5156, + "step": 4155 + }, + { + "epoch": 0.71, + "learning_rate": 1.785027572097281e-05, + "loss": 0.5095, + "step": 4156 + }, + { + "epoch": 0.71, + "learning_rate": 1.7849134919950454e-05, + "loss": 0.5433, + "step": 4157 + }, + { + "epoch": 0.71, + "learning_rate": 1.7847993852786612e-05, + "loss": 0.5466, + "step": 4158 + }, + { + "epoch": 0.71, + "learning_rate": 1.7846852519519976e-05, + "loss": 0.5345, + "step": 4159 + }, + { + "epoch": 0.71, + "learning_rate": 1.7845710920189247e-05, + "loss": 0.5241, + "step": 4160 + }, + { + "epoch": 0.71, + "learning_rate": 1.7844569054833136e-05, + "loss": 0.5334, + "step": 4161 + }, + { + "epoch": 0.71, + "learning_rate": 1.7843426923490356e-05, + "loss": 0.5386, + "step": 4162 + }, + { + "epoch": 0.71, + "learning_rate": 1.7842284526199635e-05, + "loss": 0.5041, + "step": 4163 + }, + { + "epoch": 0.71, + "learning_rate": 1.7841141862999704e-05, + "loss": 0.4913, + "step": 4164 + }, + { + "epoch": 0.71, + "learning_rate": 1.7839998933929315e-05, + "loss": 0.5337, + "step": 4165 + }, + { + "epoch": 0.71, + "learning_rate": 1.783885573902722e-05, + "loss": 0.5226, + "step": 4166 + }, + { + "epoch": 0.71, + "learning_rate": 1.7837712278332177e-05, + "loss": 0.4979, + "step": 4167 + }, + { + "epoch": 0.71, + "learning_rate": 1.783656855188296e-05, + "loss": 0.5145, + "step": 4168 + }, + { + "epoch": 0.71, + "learning_rate": 1.783542455971835e-05, + "loss": 0.4608, + "step": 4169 + }, + { + "epoch": 0.71, + "learning_rate": 1.7834280301877133e-05, + "loss": 0.5133, + "step": 4170 + }, + { + "epoch": 0.71, + "learning_rate": 1.783313577839811e-05, + "loss": 0.5407, + "step": 4171 + }, + { + "epoch": 0.71, + "learning_rate": 1.783199098932009e-05, + "loss": 0.4604, + "step": 4172 + }, + { + "epoch": 0.71, + "learning_rate": 1.783084593468189e-05, + "loss": 0.5147, + "step": 4173 + }, + { + "epoch": 0.71, + "learning_rate": 1.7829700614522332e-05, + "loss": 0.4648, + "step": 4174 + }, + { + "epoch": 0.71, + "learning_rate": 1.7828555028880253e-05, + "loss": 0.521, + "step": 4175 + }, + { + "epoch": 0.71, + "learning_rate": 1.7827409177794495e-05, + "loss": 0.5079, + "step": 4176 + }, + { + "epoch": 0.71, + "learning_rate": 1.782626306130391e-05, + "loss": 0.5326, + "step": 4177 + }, + { + "epoch": 0.71, + "learning_rate": 1.782511667944736e-05, + "loss": 0.5311, + "step": 4178 + }, + { + "epoch": 0.71, + "learning_rate": 1.782397003226372e-05, + "loss": 0.4917, + "step": 4179 + }, + { + "epoch": 0.71, + "learning_rate": 1.7822823119791866e-05, + "loss": 0.4916, + "step": 4180 + }, + { + "epoch": 0.71, + "learning_rate": 1.7821675942070678e-05, + "loss": 0.5255, + "step": 4181 + }, + { + "epoch": 0.71, + "learning_rate": 1.782052849913907e-05, + "loss": 0.4837, + "step": 4182 + }, + { + "epoch": 0.71, + "learning_rate": 1.7819380791035937e-05, + "loss": 0.5141, + "step": 4183 + }, + { + "epoch": 0.71, + "learning_rate": 1.78182328178002e-05, + "loss": 0.5054, + "step": 4184 + }, + { + "epoch": 0.71, + "learning_rate": 1.781708457947078e-05, + "loss": 0.5188, + "step": 4185 + }, + { + "epoch": 0.71, + "learning_rate": 1.781593607608661e-05, + "loss": 0.5013, + "step": 4186 + }, + { + "epoch": 0.71, + "learning_rate": 1.7814787307686634e-05, + "loss": 0.4516, + "step": 4187 + }, + { + "epoch": 0.71, + "learning_rate": 1.7813638274309803e-05, + "loss": 0.5001, + "step": 4188 + }, + { + "epoch": 0.71, + "learning_rate": 1.781248897599508e-05, + "loss": 0.532, + "step": 4189 + }, + { + "epoch": 0.71, + "learning_rate": 1.781133941278143e-05, + "loss": 0.512, + "step": 4190 + }, + { + "epoch": 0.71, + "learning_rate": 1.7810189584707834e-05, + "loss": 0.5682, + "step": 4191 + }, + { + "epoch": 0.71, + "learning_rate": 1.7809039491813277e-05, + "loss": 0.5382, + "step": 4192 + }, + { + "epoch": 0.72, + "learning_rate": 1.7807889134136757e-05, + "loss": 0.4647, + "step": 4193 + }, + { + "epoch": 0.72, + "learning_rate": 1.780673851171728e-05, + "loss": 0.5093, + "step": 4194 + }, + { + "epoch": 0.72, + "learning_rate": 1.7805587624593864e-05, + "loss": 0.506, + "step": 4195 + }, + { + "epoch": 0.72, + "learning_rate": 1.7804436472805524e-05, + "loss": 0.5147, + "step": 4196 + }, + { + "epoch": 0.72, + "learning_rate": 1.7803285056391298e-05, + "loss": 0.5097, + "step": 4197 + }, + { + "epoch": 0.72, + "learning_rate": 1.7802133375390224e-05, + "loss": 0.4737, + "step": 4198 + }, + { + "epoch": 0.72, + "learning_rate": 1.7800981429841353e-05, + "loss": 0.486, + "step": 4199 + }, + { + "epoch": 0.72, + "learning_rate": 1.7799829219783746e-05, + "loss": 0.5167, + "step": 4200 + }, + { + "epoch": 0.72, + "learning_rate": 1.7798676745256467e-05, + "loss": 0.4877, + "step": 4201 + }, + { + "epoch": 0.72, + "learning_rate": 1.7797524006298597e-05, + "loss": 0.5179, + "step": 4202 + }, + { + "epoch": 0.72, + "learning_rate": 1.7796371002949225e-05, + "loss": 0.5146, + "step": 4203 + }, + { + "epoch": 0.72, + "learning_rate": 1.779521773524744e-05, + "loss": 0.5293, + "step": 4204 + }, + { + "epoch": 0.72, + "learning_rate": 1.7794064203232347e-05, + "loss": 0.5077, + "step": 4205 + }, + { + "epoch": 0.72, + "learning_rate": 1.779291040694306e-05, + "loss": 0.5426, + "step": 4206 + }, + { + "epoch": 0.72, + "learning_rate": 1.77917563464187e-05, + "loss": 0.5515, + "step": 4207 + }, + { + "epoch": 0.72, + "learning_rate": 1.77906020216984e-05, + "loss": 0.4934, + "step": 4208 + }, + { + "epoch": 0.72, + "learning_rate": 1.7789447432821298e-05, + "loss": 0.5102, + "step": 4209 + }, + { + "epoch": 0.72, + "learning_rate": 1.7788292579826543e-05, + "loss": 0.5275, + "step": 4210 + }, + { + "epoch": 0.72, + "learning_rate": 1.7787137462753292e-05, + "loss": 0.5384, + "step": 4211 + }, + { + "epoch": 0.72, + "learning_rate": 1.7785982081640715e-05, + "loss": 0.4989, + "step": 4212 + }, + { + "epoch": 0.72, + "learning_rate": 1.778482643652798e-05, + "loss": 0.5007, + "step": 4213 + }, + { + "epoch": 0.72, + "learning_rate": 1.7783670527454286e-05, + "loss": 0.4872, + "step": 4214 + }, + { + "epoch": 0.72, + "learning_rate": 1.778251435445881e-05, + "loss": 0.5083, + "step": 4215 + }, + { + "epoch": 0.72, + "learning_rate": 1.7781357917580764e-05, + "loss": 0.5336, + "step": 4216 + }, + { + "epoch": 0.72, + "learning_rate": 1.778020121685936e-05, + "loss": 0.5301, + "step": 4217 + }, + { + "epoch": 0.72, + "learning_rate": 1.777904425233381e-05, + "loss": 0.4981, + "step": 4218 + }, + { + "epoch": 0.72, + "learning_rate": 1.7777887024043355e-05, + "loss": 0.5144, + "step": 4219 + }, + { + "epoch": 0.72, + "learning_rate": 1.7776729532027223e-05, + "loss": 0.5094, + "step": 4220 + }, + { + "epoch": 0.72, + "learning_rate": 1.7775571776324663e-05, + "loss": 0.4807, + "step": 4221 + }, + { + "epoch": 0.72, + "learning_rate": 1.777441375697494e-05, + "loss": 0.4856, + "step": 4222 + }, + { + "epoch": 0.72, + "learning_rate": 1.777325547401731e-05, + "loss": 0.5199, + "step": 4223 + }, + { + "epoch": 0.72, + "learning_rate": 1.777209692749105e-05, + "loss": 0.5035, + "step": 4224 + }, + { + "epoch": 0.72, + "learning_rate": 1.7770938117435443e-05, + "loss": 0.4794, + "step": 4225 + }, + { + "epoch": 0.72, + "learning_rate": 1.776977904388978e-05, + "loss": 0.4991, + "step": 4226 + }, + { + "epoch": 0.72, + "learning_rate": 1.7768619706893365e-05, + "loss": 0.5296, + "step": 4227 + }, + { + "epoch": 0.72, + "learning_rate": 1.77674601064855e-05, + "loss": 0.4966, + "step": 4228 + }, + { + "epoch": 0.72, + "learning_rate": 1.7766300242705514e-05, + "loss": 0.5169, + "step": 4229 + }, + { + "epoch": 0.72, + "learning_rate": 1.7765140115592726e-05, + "loss": 0.5516, + "step": 4230 + }, + { + "epoch": 0.72, + "learning_rate": 1.776397972518648e-05, + "loss": 0.5403, + "step": 4231 + }, + { + "epoch": 0.72, + "learning_rate": 1.7762819071526112e-05, + "loss": 0.5221, + "step": 4232 + }, + { + "epoch": 0.72, + "learning_rate": 1.7761658154650986e-05, + "loss": 0.5323, + "step": 4233 + }, + { + "epoch": 0.72, + "learning_rate": 1.776049697460046e-05, + "loss": 0.5199, + "step": 4234 + }, + { + "epoch": 0.72, + "learning_rate": 1.775933553141391e-05, + "loss": 0.4682, + "step": 4235 + }, + { + "epoch": 0.72, + "learning_rate": 1.775817382513071e-05, + "loss": 0.4692, + "step": 4236 + }, + { + "epoch": 0.72, + "learning_rate": 1.7757011855790258e-05, + "loss": 0.5328, + "step": 4237 + }, + { + "epoch": 0.72, + "learning_rate": 1.7755849623431947e-05, + "loss": 0.4822, + "step": 4238 + }, + { + "epoch": 0.72, + "learning_rate": 1.7754687128095192e-05, + "loss": 0.491, + "step": 4239 + }, + { + "epoch": 0.72, + "learning_rate": 1.7753524369819404e-05, + "loss": 0.4947, + "step": 4240 + }, + { + "epoch": 0.72, + "learning_rate": 1.7752361348644012e-05, + "loss": 0.501, + "step": 4241 + }, + { + "epoch": 0.72, + "learning_rate": 1.7751198064608446e-05, + "loss": 0.4636, + "step": 4242 + }, + { + "epoch": 0.72, + "learning_rate": 1.7750034517752156e-05, + "loss": 0.529, + "step": 4243 + }, + { + "epoch": 0.72, + "learning_rate": 1.7748870708114587e-05, + "loss": 0.5251, + "step": 4244 + }, + { + "epoch": 0.72, + "learning_rate": 1.7747706635735208e-05, + "loss": 0.475, + "step": 4245 + }, + { + "epoch": 0.72, + "learning_rate": 1.7746542300653486e-05, + "loss": 0.541, + "step": 4246 + }, + { + "epoch": 0.72, + "learning_rate": 1.77453777029089e-05, + "loss": 0.4817, + "step": 4247 + }, + { + "epoch": 0.72, + "learning_rate": 1.7744212842540936e-05, + "loss": 0.4926, + "step": 4248 + }, + { + "epoch": 0.72, + "learning_rate": 1.7743047719589096e-05, + "loss": 0.4974, + "step": 4249 + }, + { + "epoch": 0.72, + "learning_rate": 1.7741882334092886e-05, + "loss": 0.5397, + "step": 4250 + }, + { + "epoch": 0.72, + "learning_rate": 1.7740716686091813e-05, + "loss": 0.4938, + "step": 4251 + }, + { + "epoch": 0.73, + "learning_rate": 1.773955077562541e-05, + "loss": 0.5145, + "step": 4252 + }, + { + "epoch": 0.73, + "learning_rate": 1.7738384602733205e-05, + "loss": 0.5425, + "step": 4253 + }, + { + "epoch": 0.73, + "learning_rate": 1.7737218167454737e-05, + "loss": 0.4792, + "step": 4254 + }, + { + "epoch": 0.73, + "learning_rate": 1.7736051469829562e-05, + "loss": 0.4993, + "step": 4255 + }, + { + "epoch": 0.73, + "learning_rate": 1.7734884509897237e-05, + "loss": 0.5036, + "step": 4256 + }, + { + "epoch": 0.73, + "learning_rate": 1.7733717287697328e-05, + "loss": 0.5157, + "step": 4257 + }, + { + "epoch": 0.73, + "learning_rate": 1.773254980326942e-05, + "loss": 0.5487, + "step": 4258 + }, + { + "epoch": 0.73, + "learning_rate": 1.773138205665309e-05, + "loss": 0.5102, + "step": 4259 + }, + { + "epoch": 0.73, + "learning_rate": 1.7730214047887936e-05, + "loss": 0.5167, + "step": 4260 + }, + { + "epoch": 0.73, + "learning_rate": 1.772904577701356e-05, + "loss": 0.4915, + "step": 4261 + }, + { + "epoch": 0.73, + "learning_rate": 1.772787724406958e-05, + "loss": 0.5015, + "step": 4262 + }, + { + "epoch": 0.73, + "learning_rate": 1.7726708449095614e-05, + "loss": 0.4905, + "step": 4263 + }, + { + "epoch": 0.73, + "learning_rate": 1.7725539392131292e-05, + "loss": 0.5362, + "step": 4264 + }, + { + "epoch": 0.73, + "learning_rate": 1.7724370073216255e-05, + "loss": 0.5297, + "step": 4265 + }, + { + "epoch": 0.73, + "learning_rate": 1.772320049239015e-05, + "loss": 0.4883, + "step": 4266 + }, + { + "epoch": 0.73, + "learning_rate": 1.7722030649692634e-05, + "loss": 0.5269, + "step": 4267 + }, + { + "epoch": 0.73, + "learning_rate": 1.7720860545163377e-05, + "loss": 0.5191, + "step": 4268 + }, + { + "epoch": 0.73, + "learning_rate": 1.7719690178842044e-05, + "loss": 0.4639, + "step": 4269 + }, + { + "epoch": 0.73, + "learning_rate": 1.7718519550768332e-05, + "loss": 0.4838, + "step": 4270 + }, + { + "epoch": 0.73, + "learning_rate": 1.7717348660981922e-05, + "loss": 0.5079, + "step": 4271 + }, + { + "epoch": 0.73, + "learning_rate": 1.771617750952252e-05, + "loss": 0.5174, + "step": 4272 + }, + { + "epoch": 0.73, + "learning_rate": 1.7715006096429838e-05, + "loss": 0.4842, + "step": 4273 + }, + { + "epoch": 0.73, + "learning_rate": 1.7713834421743596e-05, + "loss": 0.4842, + "step": 4274 + }, + { + "epoch": 0.73, + "learning_rate": 1.7712662485503517e-05, + "loss": 0.5127, + "step": 4275 + }, + { + "epoch": 0.73, + "learning_rate": 1.771149028774934e-05, + "loss": 0.4932, + "step": 4276 + }, + { + "epoch": 0.73, + "learning_rate": 1.7710317828520814e-05, + "loss": 0.5473, + "step": 4277 + }, + { + "epoch": 0.73, + "learning_rate": 1.7709145107857692e-05, + "loss": 0.4758, + "step": 4278 + }, + { + "epoch": 0.73, + "learning_rate": 1.7707972125799738e-05, + "loss": 0.5045, + "step": 4279 + }, + { + "epoch": 0.73, + "learning_rate": 1.770679888238672e-05, + "loss": 0.4859, + "step": 4280 + }, + { + "epoch": 0.73, + "learning_rate": 1.7705625377658422e-05, + "loss": 0.4897, + "step": 4281 + }, + { + "epoch": 0.73, + "learning_rate": 1.770445161165464e-05, + "loss": 0.465, + "step": 4282 + }, + { + "epoch": 0.73, + "learning_rate": 1.7703277584415163e-05, + "loss": 0.524, + "step": 4283 + }, + { + "epoch": 0.73, + "learning_rate": 1.7702103295979806e-05, + "loss": 0.4856, + "step": 4284 + }, + { + "epoch": 0.73, + "learning_rate": 1.7700928746388382e-05, + "loss": 0.5444, + "step": 4285 + }, + { + "epoch": 0.73, + "learning_rate": 1.7699753935680723e-05, + "loss": 0.4788, + "step": 4286 + }, + { + "epoch": 0.73, + "learning_rate": 1.7698578863896655e-05, + "loss": 0.5357, + "step": 4287 + }, + { + "epoch": 0.73, + "learning_rate": 1.769740353107602e-05, + "loss": 0.4985, + "step": 4288 + }, + { + "epoch": 0.73, + "learning_rate": 1.7696227937258682e-05, + "loss": 0.5171, + "step": 4289 + }, + { + "epoch": 0.73, + "learning_rate": 1.7695052082484493e-05, + "loss": 0.4772, + "step": 4290 + }, + { + "epoch": 0.73, + "learning_rate": 1.7693875966793325e-05, + "loss": 0.5238, + "step": 4291 + }, + { + "epoch": 0.73, + "learning_rate": 1.7692699590225055e-05, + "loss": 0.5267, + "step": 4292 + }, + { + "epoch": 0.73, + "learning_rate": 1.769152295281957e-05, + "loss": 0.5175, + "step": 4293 + }, + { + "epoch": 0.73, + "learning_rate": 1.769034605461677e-05, + "loss": 0.5033, + "step": 4294 + }, + { + "epoch": 0.73, + "learning_rate": 1.768916889565656e-05, + "loss": 0.4894, + "step": 4295 + }, + { + "epoch": 0.73, + "learning_rate": 1.768799147597885e-05, + "loss": 0.509, + "step": 4296 + }, + { + "epoch": 0.73, + "learning_rate": 1.768681379562357e-05, + "loss": 0.4797, + "step": 4297 + }, + { + "epoch": 0.73, + "learning_rate": 1.7685635854630648e-05, + "loss": 0.5328, + "step": 4298 + }, + { + "epoch": 0.73, + "learning_rate": 1.768445765304002e-05, + "loss": 0.4999, + "step": 4299 + }, + { + "epoch": 0.73, + "learning_rate": 1.7683279190891638e-05, + "loss": 0.5194, + "step": 4300 + }, + { + "epoch": 0.73, + "learning_rate": 1.7682100468225462e-05, + "loss": 0.5383, + "step": 4301 + }, + { + "epoch": 0.73, + "learning_rate": 1.768092148508146e-05, + "loss": 0.5057, + "step": 4302 + }, + { + "epoch": 0.73, + "learning_rate": 1.7679742241499608e-05, + "loss": 0.5286, + "step": 4303 + }, + { + "epoch": 0.73, + "learning_rate": 1.7678562737519888e-05, + "loss": 0.4821, + "step": 4304 + }, + { + "epoch": 0.73, + "learning_rate": 1.7677382973182295e-05, + "loss": 0.5146, + "step": 4305 + }, + { + "epoch": 0.73, + "learning_rate": 1.767620294852683e-05, + "loss": 0.5103, + "step": 4306 + }, + { + "epoch": 0.73, + "learning_rate": 1.7675022663593505e-05, + "loss": 0.4744, + "step": 4307 + }, + { + "epoch": 0.73, + "learning_rate": 1.7673842118422343e-05, + "loss": 0.5307, + "step": 4308 + }, + { + "epoch": 0.73, + "learning_rate": 1.767266131305337e-05, + "loss": 0.4927, + "step": 4309 + }, + { + "epoch": 0.74, + "learning_rate": 1.7671480247526626e-05, + "loss": 0.5065, + "step": 4310 + }, + { + "epoch": 0.74, + "learning_rate": 1.767029892188215e-05, + "loss": 0.5133, + "step": 4311 + }, + { + "epoch": 0.74, + "learning_rate": 1.7669117336160007e-05, + "loss": 0.5177, + "step": 4312 + }, + { + "epoch": 0.74, + "learning_rate": 1.7667935490400255e-05, + "loss": 0.5218, + "step": 4313 + }, + { + "epoch": 0.74, + "learning_rate": 1.766675338464297e-05, + "loss": 0.5198, + "step": 4314 + }, + { + "epoch": 0.74, + "learning_rate": 1.7665571018928235e-05, + "loss": 0.5224, + "step": 4315 + }, + { + "epoch": 0.74, + "learning_rate": 1.7664388393296134e-05, + "loss": 0.5345, + "step": 4316 + }, + { + "epoch": 0.74, + "learning_rate": 1.7663205507786773e-05, + "loss": 0.4984, + "step": 4317 + }, + { + "epoch": 0.74, + "learning_rate": 1.7662022362440256e-05, + "loss": 0.5007, + "step": 4318 + }, + { + "epoch": 0.74, + "learning_rate": 1.7660838957296707e-05, + "loss": 0.4767, + "step": 4319 + }, + { + "epoch": 0.74, + "learning_rate": 1.7659655292396244e-05, + "loss": 0.4989, + "step": 4320 + }, + { + "epoch": 0.74, + "learning_rate": 1.7658471367779003e-05, + "loss": 0.4861, + "step": 4321 + }, + { + "epoch": 0.74, + "learning_rate": 1.765728718348513e-05, + "loss": 0.5101, + "step": 4322 + }, + { + "epoch": 0.74, + "learning_rate": 1.765610273955478e-05, + "loss": 0.5168, + "step": 4323 + }, + { + "epoch": 0.74, + "learning_rate": 1.7654918036028104e-05, + "loss": 0.4787, + "step": 4324 + }, + { + "epoch": 0.74, + "learning_rate": 1.7653733072945285e-05, + "loss": 0.4914, + "step": 4325 + }, + { + "epoch": 0.74, + "learning_rate": 1.7652547850346492e-05, + "loss": 0.4844, + "step": 4326 + }, + { + "epoch": 0.74, + "learning_rate": 1.7651362368271915e-05, + "loss": 0.4778, + "step": 4327 + }, + { + "epoch": 0.74, + "learning_rate": 1.765017662676175e-05, + "loss": 0.4942, + "step": 4328 + }, + { + "epoch": 0.74, + "learning_rate": 1.7648990625856205e-05, + "loss": 0.5098, + "step": 4329 + }, + { + "epoch": 0.74, + "learning_rate": 1.764780436559549e-05, + "loss": 0.5051, + "step": 4330 + }, + { + "epoch": 0.74, + "learning_rate": 1.764661784601983e-05, + "loss": 0.494, + "step": 4331 + }, + { + "epoch": 0.74, + "learning_rate": 1.7645431067169456e-05, + "loss": 0.4832, + "step": 4332 + }, + { + "epoch": 0.74, + "learning_rate": 1.7644244029084604e-05, + "loss": 0.4602, + "step": 4333 + }, + { + "epoch": 0.74, + "learning_rate": 1.7643056731805533e-05, + "loss": 0.5139, + "step": 4334 + }, + { + "epoch": 0.74, + "learning_rate": 1.7641869175372493e-05, + "loss": 0.5112, + "step": 4335 + }, + { + "epoch": 0.74, + "learning_rate": 1.7640681359825752e-05, + "loss": 0.503, + "step": 4336 + }, + { + "epoch": 0.74, + "learning_rate": 1.7639493285205585e-05, + "loss": 0.5064, + "step": 4337 + }, + { + "epoch": 0.74, + "learning_rate": 1.7638304951552277e-05, + "loss": 0.5122, + "step": 4338 + }, + { + "epoch": 0.74, + "learning_rate": 1.7637116358906123e-05, + "loss": 0.4979, + "step": 4339 + }, + { + "epoch": 0.74, + "learning_rate": 1.763592750730742e-05, + "loss": 0.5453, + "step": 4340 + }, + { + "epoch": 0.74, + "learning_rate": 1.7634738396796484e-05, + "loss": 0.5199, + "step": 4341 + }, + { + "epoch": 0.74, + "learning_rate": 1.7633549027413632e-05, + "loss": 0.5054, + "step": 4342 + }, + { + "epoch": 0.74, + "learning_rate": 1.763235939919919e-05, + "loss": 0.5558, + "step": 4343 + }, + { + "epoch": 0.74, + "learning_rate": 1.7631169512193495e-05, + "loss": 0.5172, + "step": 4344 + }, + { + "epoch": 0.74, + "learning_rate": 1.7629979366436897e-05, + "loss": 0.4705, + "step": 4345 + }, + { + "epoch": 0.74, + "learning_rate": 1.7628788961969748e-05, + "loss": 0.514, + "step": 4346 + }, + { + "epoch": 0.74, + "learning_rate": 1.7627598298832406e-05, + "loss": 0.5409, + "step": 4347 + }, + { + "epoch": 0.74, + "learning_rate": 1.762640737706525e-05, + "loss": 0.4608, + "step": 4348 + }, + { + "epoch": 0.74, + "learning_rate": 1.762521619670866e-05, + "loss": 0.5089, + "step": 4349 + }, + { + "epoch": 0.74, + "learning_rate": 1.7624024757803022e-05, + "loss": 0.5098, + "step": 4350 + }, + { + "epoch": 0.74, + "learning_rate": 1.7622833060388737e-05, + "loss": 0.5623, + "step": 4351 + }, + { + "epoch": 0.74, + "learning_rate": 1.762164110450621e-05, + "loss": 0.4916, + "step": 4352 + }, + { + "epoch": 0.74, + "learning_rate": 1.7620448890195862e-05, + "loss": 0.502, + "step": 4353 + }, + { + "epoch": 0.74, + "learning_rate": 1.761925641749811e-05, + "loss": 0.5236, + "step": 4354 + }, + { + "epoch": 0.74, + "learning_rate": 1.761806368645339e-05, + "loss": 0.4731, + "step": 4355 + }, + { + "epoch": 0.74, + "learning_rate": 1.7616870697102144e-05, + "loss": 0.5261, + "step": 4356 + }, + { + "epoch": 0.74, + "learning_rate": 1.7615677449484825e-05, + "loss": 0.5209, + "step": 4357 + }, + { + "epoch": 0.74, + "learning_rate": 1.7614483943641892e-05, + "loss": 0.4892, + "step": 4358 + }, + { + "epoch": 0.74, + "learning_rate": 1.7613290179613813e-05, + "loss": 0.4952, + "step": 4359 + }, + { + "epoch": 0.74, + "learning_rate": 1.7612096157441066e-05, + "loss": 0.4678, + "step": 4360 + }, + { + "epoch": 0.74, + "learning_rate": 1.761090187716413e-05, + "loss": 0.4853, + "step": 4361 + }, + { + "epoch": 0.74, + "learning_rate": 1.760970733882351e-05, + "loss": 0.5035, + "step": 4362 + }, + { + "epoch": 0.74, + "learning_rate": 1.7608512542459702e-05, + "loss": 0.497, + "step": 4363 + }, + { + "epoch": 0.74, + "learning_rate": 1.7607317488113222e-05, + "loss": 0.5177, + "step": 4364 + }, + { + "epoch": 0.74, + "learning_rate": 1.7606122175824586e-05, + "loss": 0.5175, + "step": 4365 + }, + { + "epoch": 0.74, + "learning_rate": 1.760492660563433e-05, + "loss": 0.5104, + "step": 4366 + }, + { + "epoch": 0.74, + "learning_rate": 1.760373077758299e-05, + "loss": 0.4943, + "step": 4367 + }, + { + "epoch": 0.74, + "learning_rate": 1.7602534691711112e-05, + "loss": 0.5197, + "step": 4368 + }, + { + "epoch": 0.75, + "learning_rate": 1.760133834805925e-05, + "loss": 0.487, + "step": 4369 + }, + { + "epoch": 0.75, + "learning_rate": 1.7600141746667973e-05, + "loss": 0.5081, + "step": 4370 + }, + { + "epoch": 0.75, + "learning_rate": 1.759894488757785e-05, + "loss": 0.4937, + "step": 4371 + }, + { + "epoch": 0.75, + "learning_rate": 1.7597747770829463e-05, + "loss": 0.5382, + "step": 4372 + }, + { + "epoch": 0.75, + "learning_rate": 1.759655039646341e-05, + "loss": 0.5303, + "step": 4373 + }, + { + "epoch": 0.75, + "learning_rate": 1.7595352764520283e-05, + "loss": 0.4913, + "step": 4374 + }, + { + "epoch": 0.75, + "learning_rate": 1.759415487504069e-05, + "loss": 0.4912, + "step": 4375 + }, + { + "epoch": 0.75, + "learning_rate": 1.7592956728065254e-05, + "loss": 0.5132, + "step": 4376 + }, + { + "epoch": 0.75, + "learning_rate": 1.7591758323634594e-05, + "loss": 0.5039, + "step": 4377 + }, + { + "epoch": 0.75, + "learning_rate": 1.7590559661789354e-05, + "loss": 0.494, + "step": 4378 + }, + { + "epoch": 0.75, + "learning_rate": 1.7589360742570165e-05, + "loss": 0.5294, + "step": 4379 + }, + { + "epoch": 0.75, + "learning_rate": 1.7588161566017684e-05, + "loss": 0.5464, + "step": 4380 + }, + { + "epoch": 0.75, + "learning_rate": 1.7586962132172575e-05, + "loss": 0.5068, + "step": 4381 + }, + { + "epoch": 0.75, + "learning_rate": 1.7585762441075504e-05, + "loss": 0.5188, + "step": 4382 + }, + { + "epoch": 0.75, + "learning_rate": 1.7584562492767154e-05, + "loss": 0.5073, + "step": 4383 + }, + { + "epoch": 0.75, + "learning_rate": 1.75833622872882e-05, + "loss": 0.4722, + "step": 4384 + }, + { + "epoch": 0.75, + "learning_rate": 1.758216182467935e-05, + "loss": 0.4989, + "step": 4385 + }, + { + "epoch": 0.75, + "learning_rate": 1.75809611049813e-05, + "loss": 0.4971, + "step": 4386 + }, + { + "epoch": 0.75, + "learning_rate": 1.757976012823477e-05, + "loss": 0.5183, + "step": 4387 + }, + { + "epoch": 0.75, + "learning_rate": 1.7578558894480473e-05, + "loss": 0.5103, + "step": 4388 + }, + { + "epoch": 0.75, + "learning_rate": 1.7577357403759147e-05, + "loss": 0.5138, + "step": 4389 + }, + { + "epoch": 0.75, + "learning_rate": 1.7576155656111528e-05, + "loss": 0.4899, + "step": 4390 + }, + { + "epoch": 0.75, + "learning_rate": 1.7574953651578362e-05, + "loss": 0.5124, + "step": 4391 + }, + { + "epoch": 0.75, + "learning_rate": 1.757375139020041e-05, + "loss": 0.5052, + "step": 4392 + }, + { + "epoch": 0.75, + "learning_rate": 1.7572548872018433e-05, + "loss": 0.5114, + "step": 4393 + }, + { + "epoch": 0.75, + "learning_rate": 1.757134609707321e-05, + "loss": 0.5245, + "step": 4394 + }, + { + "epoch": 0.75, + "learning_rate": 1.7570143065405517e-05, + "loss": 0.5065, + "step": 4395 + }, + { + "epoch": 0.75, + "learning_rate": 1.7568939777056148e-05, + "loss": 0.5269, + "step": 4396 + }, + { + "epoch": 0.75, + "learning_rate": 1.7567736232065906e-05, + "loss": 0.5191, + "step": 4397 + }, + { + "epoch": 0.75, + "learning_rate": 1.7566532430475594e-05, + "loss": 0.5178, + "step": 4398 + }, + { + "epoch": 0.75, + "learning_rate": 1.7565328372326037e-05, + "loss": 0.5127, + "step": 4399 + }, + { + "epoch": 0.75, + "learning_rate": 1.7564124057658057e-05, + "loss": 0.5052, + "step": 4400 + }, + { + "epoch": 0.75, + "learning_rate": 1.7562919486512485e-05, + "loss": 0.4783, + "step": 4401 + }, + { + "epoch": 0.75, + "learning_rate": 1.756171465893017e-05, + "loss": 0.5, + "step": 4402 + }, + { + "epoch": 0.75, + "learning_rate": 1.7560509574951962e-05, + "loss": 0.4809, + "step": 4403 + }, + { + "epoch": 0.75, + "learning_rate": 1.755930423461872e-05, + "loss": 0.5279, + "step": 4404 + }, + { + "epoch": 0.75, + "learning_rate": 1.7558098637971323e-05, + "loss": 0.4744, + "step": 4405 + }, + { + "epoch": 0.75, + "learning_rate": 1.7556892785050636e-05, + "loss": 0.5417, + "step": 4406 + }, + { + "epoch": 0.75, + "learning_rate": 1.7555686675897554e-05, + "loss": 0.4659, + "step": 4407 + }, + { + "epoch": 0.75, + "learning_rate": 1.7554480310552973e-05, + "loss": 0.4856, + "step": 4408 + }, + { + "epoch": 0.75, + "learning_rate": 1.7553273689057792e-05, + "loss": 0.4408, + "step": 4409 + }, + { + "epoch": 0.75, + "learning_rate": 1.755206681145293e-05, + "loss": 0.4871, + "step": 4410 + }, + { + "epoch": 0.75, + "learning_rate": 1.7550859677779308e-05, + "loss": 0.5153, + "step": 4411 + }, + { + "epoch": 0.75, + "learning_rate": 1.754965228807785e-05, + "loss": 0.5079, + "step": 4412 + }, + { + "epoch": 0.75, + "learning_rate": 1.75484446423895e-05, + "loss": 0.5191, + "step": 4413 + }, + { + "epoch": 0.75, + "learning_rate": 1.7547236740755207e-05, + "loss": 0.5342, + "step": 4414 + }, + { + "epoch": 0.75, + "learning_rate": 1.7546028583215928e-05, + "loss": 0.4989, + "step": 4415 + }, + { + "epoch": 0.75, + "learning_rate": 1.7544820169812623e-05, + "loss": 0.4771, + "step": 4416 + }, + { + "epoch": 0.75, + "learning_rate": 1.754361150058627e-05, + "loss": 0.4948, + "step": 4417 + }, + { + "epoch": 0.75, + "learning_rate": 1.754240257557785e-05, + "loss": 0.4622, + "step": 4418 + }, + { + "epoch": 0.75, + "learning_rate": 1.7541193394828354e-05, + "loss": 0.4987, + "step": 4419 + }, + { + "epoch": 0.75, + "learning_rate": 1.753998395837878e-05, + "loss": 0.543, + "step": 4420 + }, + { + "epoch": 0.75, + "learning_rate": 1.753877426627014e-05, + "loss": 0.5225, + "step": 4421 + }, + { + "epoch": 0.75, + "learning_rate": 1.7537564318543455e-05, + "loss": 0.479, + "step": 4422 + }, + { + "epoch": 0.75, + "learning_rate": 1.753635411523974e-05, + "loss": 0.5261, + "step": 4423 + }, + { + "epoch": 0.75, + "learning_rate": 1.753514365640004e-05, + "loss": 0.4565, + "step": 4424 + }, + { + "epoch": 0.75, + "learning_rate": 1.7533932942065388e-05, + "loss": 0.513, + "step": 4425 + }, + { + "epoch": 0.75, + "learning_rate": 1.7532721972276845e-05, + "loss": 0.5214, + "step": 4426 + }, + { + "epoch": 0.76, + "learning_rate": 1.7531510747075464e-05, + "loss": 0.5237, + "step": 4427 + }, + { + "epoch": 0.76, + "learning_rate": 1.753029926650232e-05, + "loss": 0.5336, + "step": 4428 + }, + { + "epoch": 0.76, + "learning_rate": 1.752908753059849e-05, + "loss": 0.4841, + "step": 4429 + }, + { + "epoch": 0.76, + "learning_rate": 1.752787553940506e-05, + "loss": 0.511, + "step": 4430 + }, + { + "epoch": 0.76, + "learning_rate": 1.752666329296312e-05, + "loss": 0.5004, + "step": 4431 + }, + { + "epoch": 0.76, + "learning_rate": 1.7525450791313782e-05, + "loss": 0.502, + "step": 4432 + }, + { + "epoch": 0.76, + "learning_rate": 1.7524238034498152e-05, + "loss": 0.5275, + "step": 4433 + }, + { + "epoch": 0.76, + "learning_rate": 1.7523025022557355e-05, + "loss": 0.5497, + "step": 4434 + }, + { + "epoch": 0.76, + "learning_rate": 1.7521811755532517e-05, + "loss": 0.4862, + "step": 4435 + }, + { + "epoch": 0.76, + "learning_rate": 1.7520598233464778e-05, + "loss": 0.4755, + "step": 4436 + }, + { + "epoch": 0.76, + "learning_rate": 1.751938445639529e-05, + "loss": 0.5049, + "step": 4437 + }, + { + "epoch": 0.76, + "learning_rate": 1.7518170424365203e-05, + "loss": 0.5264, + "step": 4438 + }, + { + "epoch": 0.76, + "learning_rate": 1.751695613741568e-05, + "loss": 0.4929, + "step": 4439 + }, + { + "epoch": 0.76, + "learning_rate": 1.7515741595587898e-05, + "loss": 0.5078, + "step": 4440 + }, + { + "epoch": 0.76, + "learning_rate": 1.751452679892304e-05, + "loss": 0.501, + "step": 4441 + }, + { + "epoch": 0.76, + "learning_rate": 1.751331174746229e-05, + "loss": 0.4879, + "step": 4442 + }, + { + "epoch": 0.76, + "learning_rate": 1.7512096441246852e-05, + "loss": 0.4982, + "step": 4443 + }, + { + "epoch": 0.76, + "learning_rate": 1.7510880880317927e-05, + "loss": 0.4883, + "step": 4444 + }, + { + "epoch": 0.76, + "learning_rate": 1.7509665064716743e-05, + "loss": 0.4493, + "step": 4445 + }, + { + "epoch": 0.76, + "learning_rate": 1.7508448994484515e-05, + "loss": 0.484, + "step": 4446 + }, + { + "epoch": 0.76, + "learning_rate": 1.750723266966248e-05, + "loss": 0.4751, + "step": 4447 + }, + { + "epoch": 0.76, + "learning_rate": 1.7506016090291878e-05, + "loss": 0.4751, + "step": 4448 + }, + { + "epoch": 0.76, + "learning_rate": 1.7504799256413962e-05, + "loss": 0.5233, + "step": 4449 + }, + { + "epoch": 0.76, + "learning_rate": 1.750358216806999e-05, + "loss": 0.5122, + "step": 4450 + }, + { + "epoch": 0.76, + "learning_rate": 1.750236482530123e-05, + "loss": 0.4966, + "step": 4451 + }, + { + "epoch": 0.76, + "learning_rate": 1.7501147228148957e-05, + "loss": 0.4794, + "step": 4452 + }, + { + "epoch": 0.76, + "learning_rate": 1.749992937665446e-05, + "loss": 0.4836, + "step": 4453 + }, + { + "epoch": 0.76, + "learning_rate": 1.7498711270859035e-05, + "loss": 0.4692, + "step": 4454 + }, + { + "epoch": 0.76, + "learning_rate": 1.7497492910803972e-05, + "loss": 0.4958, + "step": 4455 + }, + { + "epoch": 0.76, + "learning_rate": 1.7496274296530598e-05, + "loss": 0.4825, + "step": 4456 + }, + { + "epoch": 0.76, + "learning_rate": 1.749505542808022e-05, + "loss": 0.4446, + "step": 4457 + }, + { + "epoch": 0.76, + "learning_rate": 1.7493836305494173e-05, + "loss": 0.5057, + "step": 4458 + }, + { + "epoch": 0.76, + "learning_rate": 1.749261692881379e-05, + "loss": 0.5174, + "step": 4459 + }, + { + "epoch": 0.76, + "learning_rate": 1.7491397298080423e-05, + "loss": 0.4992, + "step": 4460 + }, + { + "epoch": 0.76, + "learning_rate": 1.749017741333542e-05, + "loss": 0.5035, + "step": 4461 + }, + { + "epoch": 0.76, + "learning_rate": 1.7488957274620147e-05, + "loss": 0.4852, + "step": 4462 + }, + { + "epoch": 0.76, + "learning_rate": 1.748773688197597e-05, + "loss": 0.4894, + "step": 4463 + }, + { + "epoch": 0.76, + "learning_rate": 1.748651623544428e-05, + "loss": 0.5253, + "step": 4464 + }, + { + "epoch": 0.76, + "learning_rate": 1.7485295335066454e-05, + "loss": 0.4797, + "step": 4465 + }, + { + "epoch": 0.76, + "learning_rate": 1.7484074180883896e-05, + "loss": 0.5583, + "step": 4466 + }, + { + "epoch": 0.76, + "learning_rate": 1.7482852772938005e-05, + "loss": 0.5651, + "step": 4467 + }, + { + "epoch": 0.76, + "learning_rate": 1.7481631111270206e-05, + "loss": 0.4932, + "step": 4468 + }, + { + "epoch": 0.76, + "learning_rate": 1.7480409195921913e-05, + "loss": 0.5079, + "step": 4469 + }, + { + "epoch": 0.76, + "learning_rate": 1.747918702693456e-05, + "loss": 0.5288, + "step": 4470 + }, + { + "epoch": 0.76, + "learning_rate": 1.7477964604349594e-05, + "loss": 0.4823, + "step": 4471 + }, + { + "epoch": 0.76, + "learning_rate": 1.7476741928208452e-05, + "loss": 0.5058, + "step": 4472 + }, + { + "epoch": 0.76, + "learning_rate": 1.7475518998552598e-05, + "loss": 0.4877, + "step": 4473 + }, + { + "epoch": 0.76, + "learning_rate": 1.74742958154235e-05, + "loss": 0.5285, + "step": 4474 + }, + { + "epoch": 0.76, + "learning_rate": 1.7473072378862625e-05, + "loss": 0.4862, + "step": 4475 + }, + { + "epoch": 0.76, + "learning_rate": 1.7471848688911465e-05, + "loss": 0.5429, + "step": 4476 + }, + { + "epoch": 0.76, + "learning_rate": 1.7470624745611506e-05, + "loss": 0.4703, + "step": 4477 + }, + { + "epoch": 0.76, + "learning_rate": 1.746940054900425e-05, + "loss": 0.4865, + "step": 4478 + }, + { + "epoch": 0.76, + "learning_rate": 1.7468176099131208e-05, + "loss": 0.5275, + "step": 4479 + }, + { + "epoch": 0.76, + "learning_rate": 1.7466951396033897e-05, + "loss": 0.4661, + "step": 4480 + }, + { + "epoch": 0.76, + "learning_rate": 1.746572643975384e-05, + "loss": 0.5046, + "step": 4481 + }, + { + "epoch": 0.76, + "learning_rate": 1.7464501230332573e-05, + "loss": 0.5382, + "step": 4482 + }, + { + "epoch": 0.76, + "learning_rate": 1.7463275767811637e-05, + "loss": 0.5212, + "step": 4483 + }, + { + "epoch": 0.76, + "learning_rate": 1.746205005223259e-05, + "loss": 0.4841, + "step": 4484 + }, + { + "epoch": 0.76, + "learning_rate": 1.7460824083636995e-05, + "loss": 0.5417, + "step": 4485 + }, + { + "epoch": 0.77, + "learning_rate": 1.745959786206641e-05, + "loss": 0.4876, + "step": 4486 + }, + { + "epoch": 0.77, + "learning_rate": 1.7458371387562415e-05, + "loss": 0.4832, + "step": 4487 + }, + { + "epoch": 0.77, + "learning_rate": 1.74571446601666e-05, + "loss": 0.4984, + "step": 4488 + }, + { + "epoch": 0.77, + "learning_rate": 1.7455917679920563e-05, + "loss": 0.5171, + "step": 4489 + }, + { + "epoch": 0.77, + "learning_rate": 1.74546904468659e-05, + "loss": 0.4874, + "step": 4490 + }, + { + "epoch": 0.77, + "learning_rate": 1.7453462961044227e-05, + "loss": 0.5501, + "step": 4491 + }, + { + "epoch": 0.77, + "learning_rate": 1.7452235222497166e-05, + "loss": 0.5124, + "step": 4492 + }, + { + "epoch": 0.77, + "learning_rate": 1.7451007231266343e-05, + "loss": 0.4989, + "step": 4493 + }, + { + "epoch": 0.77, + "learning_rate": 1.7449778987393395e-05, + "loss": 0.5446, + "step": 4494 + }, + { + "epoch": 0.77, + "learning_rate": 1.7448550490919965e-05, + "loss": 0.5267, + "step": 4495 + }, + { + "epoch": 0.77, + "learning_rate": 1.744732174188772e-05, + "loss": 0.5153, + "step": 4496 + }, + { + "epoch": 0.77, + "learning_rate": 1.7446092740338313e-05, + "loss": 0.5309, + "step": 4497 + }, + { + "epoch": 0.77, + "learning_rate": 1.7444863486313418e-05, + "loss": 0.5275, + "step": 4498 + }, + { + "epoch": 0.77, + "learning_rate": 1.7443633979854717e-05, + "loss": 0.4719, + "step": 4499 + }, + { + "epoch": 0.77, + "learning_rate": 1.7442404221003898e-05, + "loss": 0.4896, + "step": 4500 + }, + { + "epoch": 0.77, + "learning_rate": 1.7441174209802662e-05, + "loss": 0.4831, + "step": 4501 + }, + { + "epoch": 0.77, + "learning_rate": 1.7439943946292706e-05, + "loss": 0.4874, + "step": 4502 + }, + { + "epoch": 0.77, + "learning_rate": 1.7438713430515753e-05, + "loss": 0.4972, + "step": 4503 + }, + { + "epoch": 0.77, + "learning_rate": 1.7437482662513523e-05, + "loss": 0.5104, + "step": 4504 + }, + { + "epoch": 0.77, + "learning_rate": 1.743625164232775e-05, + "loss": 0.4892, + "step": 4505 + }, + { + "epoch": 0.77, + "learning_rate": 1.7435020370000173e-05, + "loss": 0.4896, + "step": 4506 + }, + { + "epoch": 0.77, + "learning_rate": 1.743378884557254e-05, + "loss": 0.5162, + "step": 4507 + }, + { + "epoch": 0.77, + "learning_rate": 1.7432557069086608e-05, + "loss": 0.4956, + "step": 4508 + }, + { + "epoch": 0.77, + "learning_rate": 1.7431325040584142e-05, + "loss": 0.5521, + "step": 4509 + }, + { + "epoch": 0.77, + "learning_rate": 1.7430092760106925e-05, + "loss": 0.5475, + "step": 4510 + }, + { + "epoch": 0.77, + "learning_rate": 1.742886022769673e-05, + "loss": 0.5172, + "step": 4511 + }, + { + "epoch": 0.77, + "learning_rate": 1.742762744339535e-05, + "loss": 0.5019, + "step": 4512 + }, + { + "epoch": 0.77, + "learning_rate": 1.7426394407244595e-05, + "loss": 0.4814, + "step": 4513 + }, + { + "epoch": 0.77, + "learning_rate": 1.7425161119286258e-05, + "loss": 0.4813, + "step": 4514 + }, + { + "epoch": 0.77, + "learning_rate": 1.7423927579562167e-05, + "loss": 0.5243, + "step": 4515 + }, + { + "epoch": 0.77, + "learning_rate": 1.7422693788114148e-05, + "loss": 0.5163, + "step": 4516 + }, + { + "epoch": 0.77, + "learning_rate": 1.7421459744984027e-05, + "loss": 0.4942, + "step": 4517 + }, + { + "epoch": 0.77, + "learning_rate": 1.7420225450213656e-05, + "loss": 0.5163, + "step": 4518 + }, + { + "epoch": 0.77, + "learning_rate": 1.7418990903844883e-05, + "loss": 0.5035, + "step": 4519 + }, + { + "epoch": 0.77, + "learning_rate": 1.7417756105919567e-05, + "loss": 0.492, + "step": 4520 + }, + { + "epoch": 0.77, + "learning_rate": 1.7416521056479577e-05, + "loss": 0.4732, + "step": 4521 + }, + { + "epoch": 0.77, + "learning_rate": 1.741528575556679e-05, + "loss": 0.5092, + "step": 4522 + }, + { + "epoch": 0.77, + "learning_rate": 1.7414050203223092e-05, + "loss": 0.5457, + "step": 4523 + }, + { + "epoch": 0.77, + "learning_rate": 1.7412814399490376e-05, + "loss": 0.4728, + "step": 4524 + }, + { + "epoch": 0.77, + "learning_rate": 1.7411578344410543e-05, + "loss": 0.4696, + "step": 4525 + }, + { + "epoch": 0.77, + "learning_rate": 1.741034203802551e-05, + "loss": 0.5305, + "step": 4526 + }, + { + "epoch": 0.77, + "learning_rate": 1.7409105480377188e-05, + "loss": 0.4729, + "step": 4527 + }, + { + "epoch": 0.77, + "learning_rate": 1.7407868671507513e-05, + "loss": 0.5007, + "step": 4528 + }, + { + "epoch": 0.77, + "learning_rate": 1.740663161145842e-05, + "loss": 0.5068, + "step": 4529 + }, + { + "epoch": 0.77, + "learning_rate": 1.7405394300271846e-05, + "loss": 0.4707, + "step": 4530 + }, + { + "epoch": 0.77, + "learning_rate": 1.7404156737989756e-05, + "loss": 0.5388, + "step": 4531 + }, + { + "epoch": 0.77, + "learning_rate": 1.7402918924654106e-05, + "loss": 0.519, + "step": 4532 + }, + { + "epoch": 0.77, + "learning_rate": 1.740168086030687e-05, + "loss": 0.4724, + "step": 4533 + }, + { + "epoch": 0.77, + "learning_rate": 1.7400442544990023e-05, + "loss": 0.4712, + "step": 4534 + }, + { + "epoch": 0.77, + "learning_rate": 1.7399203978745557e-05, + "loss": 0.4736, + "step": 4535 + }, + { + "epoch": 0.77, + "learning_rate": 1.739796516161546e-05, + "loss": 0.4956, + "step": 4536 + }, + { + "epoch": 0.77, + "learning_rate": 1.739672609364175e-05, + "loss": 0.5034, + "step": 4537 + }, + { + "epoch": 0.77, + "learning_rate": 1.739548677486643e-05, + "loss": 0.5272, + "step": 4538 + }, + { + "epoch": 0.77, + "learning_rate": 1.7394247205331526e-05, + "loss": 0.5282, + "step": 4539 + }, + { + "epoch": 0.77, + "learning_rate": 1.7393007385079064e-05, + "loss": 0.5343, + "step": 4540 + }, + { + "epoch": 0.77, + "learning_rate": 1.7391767314151088e-05, + "loss": 0.4599, + "step": 4541 + }, + { + "epoch": 0.77, + "learning_rate": 1.7390526992589644e-05, + "loss": 0.489, + "step": 4542 + }, + { + "epoch": 0.77, + "learning_rate": 1.7389286420436782e-05, + "loss": 0.5032, + "step": 4543 + }, + { + "epoch": 0.77, + "learning_rate": 1.7388045597734573e-05, + "loss": 0.5079, + "step": 4544 + }, + { + "epoch": 0.78, + "learning_rate": 1.738680452452509e-05, + "loss": 0.4588, + "step": 4545 + }, + { + "epoch": 0.78, + "learning_rate": 1.7385563200850408e-05, + "loss": 0.4803, + "step": 4546 + }, + { + "epoch": 0.78, + "learning_rate": 1.7384321626752624e-05, + "loss": 0.5244, + "step": 4547 + }, + { + "epoch": 0.78, + "learning_rate": 1.7383079802273828e-05, + "loss": 0.4683, + "step": 4548 + }, + { + "epoch": 0.78, + "learning_rate": 1.7381837727456135e-05, + "loss": 0.5034, + "step": 4549 + }, + { + "epoch": 0.78, + "learning_rate": 1.7380595402341653e-05, + "loss": 0.5052, + "step": 4550 + }, + { + "epoch": 0.78, + "learning_rate": 1.7379352826972514e-05, + "loss": 0.4874, + "step": 4551 + }, + { + "epoch": 0.78, + "learning_rate": 1.737811000139084e-05, + "loss": 0.5123, + "step": 4552 + }, + { + "epoch": 0.78, + "learning_rate": 1.737686692563878e-05, + "loss": 0.5163, + "step": 4553 + }, + { + "epoch": 0.78, + "learning_rate": 1.737562359975848e-05, + "loss": 0.52, + "step": 4554 + }, + { + "epoch": 0.78, + "learning_rate": 1.7374380023792098e-05, + "loss": 0.5154, + "step": 4555 + }, + { + "epoch": 0.78, + "learning_rate": 1.7373136197781797e-05, + "loss": 0.4613, + "step": 4556 + }, + { + "epoch": 0.78, + "learning_rate": 1.7371892121769755e-05, + "loss": 0.4744, + "step": 4557 + }, + { + "epoch": 0.78, + "learning_rate": 1.737064779579816e-05, + "loss": 0.5327, + "step": 4558 + }, + { + "epoch": 0.78, + "learning_rate": 1.7369403219909186e-05, + "loss": 0.4831, + "step": 4559 + }, + { + "epoch": 0.78, + "learning_rate": 1.7368158394145053e-05, + "loss": 0.49, + "step": 4560 + }, + { + "epoch": 0.78, + "learning_rate": 1.736691331854796e-05, + "loss": 0.498, + "step": 4561 + }, + { + "epoch": 0.78, + "learning_rate": 1.7365667993160126e-05, + "loss": 0.4772, + "step": 4562 + }, + { + "epoch": 0.78, + "learning_rate": 1.7364422418023773e-05, + "loss": 0.541, + "step": 4563 + }, + { + "epoch": 0.78, + "learning_rate": 1.7363176593181138e-05, + "loss": 0.4887, + "step": 4564 + }, + { + "epoch": 0.78, + "learning_rate": 1.736193051867446e-05, + "loss": 0.5022, + "step": 4565 + }, + { + "epoch": 0.78, + "learning_rate": 1.7360684194545996e-05, + "loss": 0.4867, + "step": 4566 + }, + { + "epoch": 0.78, + "learning_rate": 1.7359437620838e-05, + "loss": 0.4788, + "step": 4567 + }, + { + "epoch": 0.78, + "learning_rate": 1.735819079759274e-05, + "loss": 0.5251, + "step": 4568 + }, + { + "epoch": 0.78, + "learning_rate": 1.7356943724852498e-05, + "loss": 0.5076, + "step": 4569 + }, + { + "epoch": 0.78, + "learning_rate": 1.735569640265955e-05, + "loss": 0.5002, + "step": 4570 + }, + { + "epoch": 0.78, + "learning_rate": 1.7354448831056197e-05, + "loss": 0.4794, + "step": 4571 + }, + { + "epoch": 0.78, + "learning_rate": 1.735320101008473e-05, + "loss": 0.5002, + "step": 4572 + }, + { + "epoch": 0.78, + "learning_rate": 1.7351952939787474e-05, + "loss": 0.5171, + "step": 4573 + }, + { + "epoch": 0.78, + "learning_rate": 1.7350704620206734e-05, + "loss": 0.4906, + "step": 4574 + }, + { + "epoch": 0.78, + "learning_rate": 1.7349456051384844e-05, + "loss": 0.4745, + "step": 4575 + }, + { + "epoch": 0.78, + "learning_rate": 1.7348207233364137e-05, + "loss": 0.5082, + "step": 4576 + }, + { + "epoch": 0.78, + "learning_rate": 1.7346958166186956e-05, + "loss": 0.494, + "step": 4577 + }, + { + "epoch": 0.78, + "learning_rate": 1.7345708849895655e-05, + "loss": 0.5136, + "step": 4578 + }, + { + "epoch": 0.78, + "learning_rate": 1.7344459284532598e-05, + "loss": 0.4981, + "step": 4579 + }, + { + "epoch": 0.78, + "learning_rate": 1.7343209470140147e-05, + "loss": 0.4855, + "step": 4580 + }, + { + "epoch": 0.78, + "learning_rate": 1.7341959406760684e-05, + "loss": 0.4811, + "step": 4581 + }, + { + "epoch": 0.78, + "learning_rate": 1.734070909443659e-05, + "loss": 0.4944, + "step": 4582 + }, + { + "epoch": 0.78, + "learning_rate": 1.733945853321027e-05, + "loss": 0.5379, + "step": 4583 + }, + { + "epoch": 0.78, + "learning_rate": 1.7338207723124118e-05, + "loss": 0.5332, + "step": 4584 + }, + { + "epoch": 0.78, + "learning_rate": 1.7336956664220547e-05, + "loss": 0.5306, + "step": 4585 + }, + { + "epoch": 0.78, + "learning_rate": 1.733570535654198e-05, + "loss": 0.49, + "step": 4586 + }, + { + "epoch": 0.78, + "learning_rate": 1.733445380013084e-05, + "loss": 0.4966, + "step": 4587 + }, + { + "epoch": 0.78, + "learning_rate": 1.7333201995029564e-05, + "loss": 0.5203, + "step": 4588 + }, + { + "epoch": 0.78, + "learning_rate": 1.7331949941280602e-05, + "loss": 0.5192, + "step": 4589 + }, + { + "epoch": 0.78, + "learning_rate": 1.7330697638926407e-05, + "loss": 0.5239, + "step": 4590 + }, + { + "epoch": 0.78, + "learning_rate": 1.7329445088009437e-05, + "loss": 0.5511, + "step": 4591 + }, + { + "epoch": 0.78, + "learning_rate": 1.7328192288572164e-05, + "loss": 0.4862, + "step": 4592 + }, + { + "epoch": 0.78, + "learning_rate": 1.732693924065707e-05, + "loss": 0.4878, + "step": 4593 + }, + { + "epoch": 0.78, + "learning_rate": 1.732568594430664e-05, + "loss": 0.5209, + "step": 4594 + }, + { + "epoch": 0.78, + "learning_rate": 1.732443239956337e-05, + "loss": 0.484, + "step": 4595 + }, + { + "epoch": 0.78, + "learning_rate": 1.7323178606469756e-05, + "loss": 0.537, + "step": 4596 + }, + { + "epoch": 0.78, + "learning_rate": 1.7321924565068324e-05, + "loss": 0.4803, + "step": 4597 + }, + { + "epoch": 0.78, + "learning_rate": 1.7320670275401585e-05, + "loss": 0.4747, + "step": 4598 + }, + { + "epoch": 0.78, + "learning_rate": 1.7319415737512075e-05, + "loss": 0.4788, + "step": 4599 + }, + { + "epoch": 0.78, + "learning_rate": 1.7318160951442328e-05, + "loss": 0.4667, + "step": 4600 + }, + { + "epoch": 0.78, + "learning_rate": 1.731690591723489e-05, + "loss": 0.4694, + "step": 4601 + }, + { + "epoch": 0.78, + "learning_rate": 1.7315650634932313e-05, + "loss": 0.4964, + "step": 4602 + }, + { + "epoch": 0.79, + "learning_rate": 1.7314395104577172e-05, + "loss": 0.4925, + "step": 4603 + }, + { + "epoch": 0.79, + "learning_rate": 1.7313139326212024e-05, + "loss": 0.5112, + "step": 4604 + }, + { + "epoch": 0.79, + "learning_rate": 1.7311883299879455e-05, + "loss": 0.4966, + "step": 4605 + }, + { + "epoch": 0.79, + "learning_rate": 1.7310627025622056e-05, + "loss": 0.4987, + "step": 4606 + }, + { + "epoch": 0.79, + "learning_rate": 1.7309370503482417e-05, + "loss": 0.4597, + "step": 4607 + }, + { + "epoch": 0.79, + "learning_rate": 1.730811373350315e-05, + "loss": 0.5165, + "step": 4608 + }, + { + "epoch": 0.79, + "learning_rate": 1.730685671572686e-05, + "loss": 0.4766, + "step": 4609 + }, + { + "epoch": 0.79, + "learning_rate": 1.7305599450196178e-05, + "loss": 0.5176, + "step": 4610 + }, + { + "epoch": 0.79, + "learning_rate": 1.730434193695373e-05, + "loss": 0.5087, + "step": 4611 + }, + { + "epoch": 0.79, + "learning_rate": 1.7303084176042154e-05, + "loss": 0.5035, + "step": 4612 + }, + { + "epoch": 0.79, + "learning_rate": 1.7301826167504098e-05, + "loss": 0.5224, + "step": 4613 + }, + { + "epoch": 0.79, + "learning_rate": 1.7300567911382214e-05, + "loss": 0.4729, + "step": 4614 + }, + { + "epoch": 0.79, + "learning_rate": 1.729930940771917e-05, + "loss": 0.5075, + "step": 4615 + }, + { + "epoch": 0.79, + "learning_rate": 1.729805065655764e-05, + "loss": 0.4956, + "step": 4616 + }, + { + "epoch": 0.79, + "learning_rate": 1.72967916579403e-05, + "loss": 0.4468, + "step": 4617 + }, + { + "epoch": 0.79, + "learning_rate": 1.7295532411909844e-05, + "loss": 0.4974, + "step": 4618 + }, + { + "epoch": 0.79, + "learning_rate": 1.7294272918508963e-05, + "loss": 0.4896, + "step": 4619 + }, + { + "epoch": 0.79, + "learning_rate": 1.7293013177780368e-05, + "loss": 0.4814, + "step": 4620 + }, + { + "epoch": 0.79, + "learning_rate": 1.729175318976677e-05, + "loss": 0.5048, + "step": 4621 + }, + { + "epoch": 0.79, + "learning_rate": 1.7290492954510893e-05, + "loss": 0.4964, + "step": 4622 + }, + { + "epoch": 0.79, + "learning_rate": 1.7289232472055468e-05, + "loss": 0.4957, + "step": 4623 + }, + { + "epoch": 0.79, + "learning_rate": 1.728797174244323e-05, + "loss": 0.4631, + "step": 4624 + }, + { + "epoch": 0.79, + "learning_rate": 1.728671076571694e-05, + "loss": 0.498, + "step": 4625 + }, + { + "epoch": 0.79, + "learning_rate": 1.7285449541919336e-05, + "loss": 0.4883, + "step": 4626 + }, + { + "epoch": 0.79, + "learning_rate": 1.7284188071093197e-05, + "loss": 0.5023, + "step": 4627 + }, + { + "epoch": 0.79, + "learning_rate": 1.7282926353281286e-05, + "loss": 0.4721, + "step": 4628 + }, + { + "epoch": 0.79, + "learning_rate": 1.7281664388526392e-05, + "loss": 0.5433, + "step": 4629 + }, + { + "epoch": 0.79, + "learning_rate": 1.7280402176871298e-05, + "loss": 0.5224, + "step": 4630 + }, + { + "epoch": 0.79, + "learning_rate": 1.7279139718358805e-05, + "loss": 0.5124, + "step": 4631 + }, + { + "epoch": 0.79, + "learning_rate": 1.727787701303172e-05, + "loss": 0.5314, + "step": 4632 + }, + { + "epoch": 0.79, + "learning_rate": 1.7276614060932858e-05, + "loss": 0.5179, + "step": 4633 + }, + { + "epoch": 0.79, + "learning_rate": 1.727535086210504e-05, + "loss": 0.4888, + "step": 4634 + }, + { + "epoch": 0.79, + "learning_rate": 1.7274087416591094e-05, + "loss": 0.4402, + "step": 4635 + }, + { + "epoch": 0.79, + "learning_rate": 1.727282372443387e-05, + "loss": 0.5385, + "step": 4636 + }, + { + "epoch": 0.79, + "learning_rate": 1.727155978567621e-05, + "loss": 0.4738, + "step": 4637 + }, + { + "epoch": 0.79, + "learning_rate": 1.7270295600360967e-05, + "loss": 0.4933, + "step": 4638 + }, + { + "epoch": 0.79, + "learning_rate": 1.7269031168531013e-05, + "loss": 0.4846, + "step": 4639 + }, + { + "epoch": 0.79, + "learning_rate": 1.7267766490229213e-05, + "loss": 0.4796, + "step": 4640 + }, + { + "epoch": 0.79, + "learning_rate": 1.7266501565498458e-05, + "loss": 0.5034, + "step": 4641 + }, + { + "epoch": 0.79, + "learning_rate": 1.7265236394381634e-05, + "loss": 0.5136, + "step": 4642 + }, + { + "epoch": 0.79, + "learning_rate": 1.7263970976921635e-05, + "loss": 0.5473, + "step": 4643 + }, + { + "epoch": 0.79, + "learning_rate": 1.7262705313161376e-05, + "loss": 0.4875, + "step": 4644 + }, + { + "epoch": 0.79, + "learning_rate": 1.7261439403143763e-05, + "loss": 0.5414, + "step": 4645 + }, + { + "epoch": 0.79, + "learning_rate": 1.7260173246911724e-05, + "loss": 0.4995, + "step": 4646 + }, + { + "epoch": 0.79, + "learning_rate": 1.7258906844508194e-05, + "loss": 0.4802, + "step": 4647 + }, + { + "epoch": 0.79, + "learning_rate": 1.7257640195976107e-05, + "loss": 0.5132, + "step": 4648 + }, + { + "epoch": 0.79, + "learning_rate": 1.7256373301358414e-05, + "loss": 0.512, + "step": 4649 + }, + { + "epoch": 0.79, + "learning_rate": 1.725510616069807e-05, + "loss": 0.5524, + "step": 4650 + }, + { + "epoch": 0.79, + "learning_rate": 1.7253838774038045e-05, + "loss": 0.5112, + "step": 4651 + }, + { + "epoch": 0.79, + "learning_rate": 1.7252571141421307e-05, + "loss": 0.51, + "step": 4652 + }, + { + "epoch": 0.79, + "learning_rate": 1.7251303262890838e-05, + "loss": 0.4766, + "step": 4653 + }, + { + "epoch": 0.79, + "learning_rate": 1.7250035138489634e-05, + "loss": 0.519, + "step": 4654 + }, + { + "epoch": 0.79, + "learning_rate": 1.7248766768260686e-05, + "loss": 0.5338, + "step": 4655 + }, + { + "epoch": 0.79, + "learning_rate": 1.7247498152247008e-05, + "loss": 0.5053, + "step": 4656 + }, + { + "epoch": 0.79, + "learning_rate": 1.7246229290491606e-05, + "loss": 0.4891, + "step": 4657 + }, + { + "epoch": 0.79, + "learning_rate": 1.7244960183037514e-05, + "loss": 0.4803, + "step": 4658 + }, + { + "epoch": 0.79, + "learning_rate": 1.7243690829927752e-05, + "loss": 0.4988, + "step": 4659 + }, + { + "epoch": 0.79, + "learning_rate": 1.724242123120537e-05, + "loss": 0.5153, + "step": 4660 + }, + { + "epoch": 0.79, + "learning_rate": 1.7241151386913415e-05, + "loss": 0.5013, + "step": 4661 + }, + { + "epoch": 0.8, + "learning_rate": 1.7239881297094938e-05, + "loss": 0.5261, + "step": 4662 + }, + { + "epoch": 0.8, + "learning_rate": 1.723861096179301e-05, + "loss": 0.5041, + "step": 4663 + }, + { + "epoch": 0.8, + "learning_rate": 1.72373403810507e-05, + "loss": 0.486, + "step": 4664 + }, + { + "epoch": 0.8, + "learning_rate": 1.7236069554911095e-05, + "loss": 0.4768, + "step": 4665 + }, + { + "epoch": 0.8, + "learning_rate": 1.7234798483417283e-05, + "loss": 0.4848, + "step": 4666 + }, + { + "epoch": 0.8, + "learning_rate": 1.7233527166612357e-05, + "loss": 0.5055, + "step": 4667 + }, + { + "epoch": 0.8, + "learning_rate": 1.7232255604539433e-05, + "loss": 0.5057, + "step": 4668 + }, + { + "epoch": 0.8, + "learning_rate": 1.7230983797241615e-05, + "loss": 0.5118, + "step": 4669 + }, + { + "epoch": 0.8, + "learning_rate": 1.7229711744762036e-05, + "loss": 0.4894, + "step": 4670 + }, + { + "epoch": 0.8, + "learning_rate": 1.7228439447143823e-05, + "loss": 0.4551, + "step": 4671 + }, + { + "epoch": 0.8, + "learning_rate": 1.7227166904430116e-05, + "loss": 0.5364, + "step": 4672 + }, + { + "epoch": 0.8, + "learning_rate": 1.7225894116664064e-05, + "loss": 0.4981, + "step": 4673 + }, + { + "epoch": 0.8, + "learning_rate": 1.7224621083888827e-05, + "loss": 0.4777, + "step": 4674 + }, + { + "epoch": 0.8, + "learning_rate": 1.722334780614756e-05, + "loss": 0.5163, + "step": 4675 + }, + { + "epoch": 0.8, + "learning_rate": 1.722207428348345e-05, + "loss": 0.4995, + "step": 4676 + }, + { + "epoch": 0.8, + "learning_rate": 1.7220800515939667e-05, + "loss": 0.4745, + "step": 4677 + }, + { + "epoch": 0.8, + "learning_rate": 1.7219526503559408e-05, + "loss": 0.4834, + "step": 4678 + }, + { + "epoch": 0.8, + "learning_rate": 1.7218252246385865e-05, + "loss": 0.4781, + "step": 4679 + }, + { + "epoch": 0.8, + "learning_rate": 1.721697774446225e-05, + "loss": 0.5412, + "step": 4680 + }, + { + "epoch": 0.8, + "learning_rate": 1.721570299783177e-05, + "loss": 0.5152, + "step": 4681 + }, + { + "epoch": 0.8, + "learning_rate": 1.7214428006537657e-05, + "loss": 0.4941, + "step": 4682 + }, + { + "epoch": 0.8, + "learning_rate": 1.7213152770623137e-05, + "loss": 0.4816, + "step": 4683 + }, + { + "epoch": 0.8, + "learning_rate": 1.7211877290131452e-05, + "loss": 0.5319, + "step": 4684 + }, + { + "epoch": 0.8, + "learning_rate": 1.7210601565105847e-05, + "loss": 0.5106, + "step": 4685 + }, + { + "epoch": 0.8, + "learning_rate": 1.720932559558958e-05, + "loss": 0.5368, + "step": 4686 + }, + { + "epoch": 0.8, + "learning_rate": 1.7208049381625916e-05, + "loss": 0.5209, + "step": 4687 + }, + { + "epoch": 0.8, + "learning_rate": 1.720677292325813e-05, + "loss": 0.5173, + "step": 4688 + }, + { + "epoch": 0.8, + "learning_rate": 1.7205496220529496e-05, + "loss": 0.4668, + "step": 4689 + }, + { + "epoch": 0.8, + "learning_rate": 1.7204219273483305e-05, + "loss": 0.524, + "step": 4690 + }, + { + "epoch": 0.8, + "learning_rate": 1.7202942082162862e-05, + "loss": 0.5228, + "step": 4691 + }, + { + "epoch": 0.8, + "learning_rate": 1.720166464661147e-05, + "loss": 0.4956, + "step": 4692 + }, + { + "epoch": 0.8, + "learning_rate": 1.7200386966872436e-05, + "loss": 0.5282, + "step": 4693 + }, + { + "epoch": 0.8, + "learning_rate": 1.7199109042989088e-05, + "loss": 0.5302, + "step": 4694 + }, + { + "epoch": 0.8, + "learning_rate": 1.7197830875004755e-05, + "loss": 0.4798, + "step": 4695 + }, + { + "epoch": 0.8, + "learning_rate": 1.719655246296278e-05, + "loss": 0.4923, + "step": 4696 + }, + { + "epoch": 0.8, + "learning_rate": 1.7195273806906504e-05, + "loss": 0.4958, + "step": 4697 + }, + { + "epoch": 0.8, + "learning_rate": 1.719399490687929e-05, + "loss": 0.5146, + "step": 4698 + }, + { + "epoch": 0.8, + "learning_rate": 1.7192715762924493e-05, + "loss": 0.4776, + "step": 4699 + }, + { + "epoch": 0.8, + "learning_rate": 1.7191436375085496e-05, + "loss": 0.4792, + "step": 4700 + }, + { + "epoch": 0.8, + "learning_rate": 1.719015674340567e-05, + "loss": 0.4878, + "step": 4701 + }, + { + "epoch": 0.8, + "learning_rate": 1.7188876867928405e-05, + "loss": 0.5118, + "step": 4702 + }, + { + "epoch": 0.8, + "learning_rate": 1.7187596748697098e-05, + "loss": 0.5119, + "step": 4703 + }, + { + "epoch": 0.8, + "learning_rate": 1.718631638575516e-05, + "loss": 0.51, + "step": 4704 + }, + { + "epoch": 0.8, + "learning_rate": 1.7185035779146e-05, + "loss": 0.5116, + "step": 4705 + }, + { + "epoch": 0.8, + "learning_rate": 1.7183754928913038e-05, + "loss": 0.5248, + "step": 4706 + }, + { + "epoch": 0.8, + "learning_rate": 1.7182473835099703e-05, + "loss": 0.5226, + "step": 4707 + }, + { + "epoch": 0.8, + "learning_rate": 1.718119249774944e-05, + "loss": 0.4859, + "step": 4708 + }, + { + "epoch": 0.8, + "learning_rate": 1.717991091690569e-05, + "loss": 0.5013, + "step": 4709 + }, + { + "epoch": 0.8, + "learning_rate": 1.717862909261191e-05, + "loss": 0.5117, + "step": 4710 + }, + { + "epoch": 0.8, + "learning_rate": 1.7177347024911562e-05, + "loss": 0.489, + "step": 4711 + }, + { + "epoch": 0.8, + "learning_rate": 1.7176064713848117e-05, + "loss": 0.4801, + "step": 4712 + }, + { + "epoch": 0.8, + "learning_rate": 1.7174782159465056e-05, + "loss": 0.5095, + "step": 4713 + }, + { + "epoch": 0.8, + "learning_rate": 1.7173499361805863e-05, + "loss": 0.5294, + "step": 4714 + }, + { + "epoch": 0.8, + "learning_rate": 1.7172216320914036e-05, + "loss": 0.483, + "step": 4715 + }, + { + "epoch": 0.8, + "learning_rate": 1.717093303683308e-05, + "loss": 0.5317, + "step": 4716 + }, + { + "epoch": 0.8, + "learning_rate": 1.7169649509606507e-05, + "loss": 0.4796, + "step": 4717 + }, + { + "epoch": 0.8, + "learning_rate": 1.7168365739277838e-05, + "loss": 0.49, + "step": 4718 + }, + { + "epoch": 0.8, + "learning_rate": 1.7167081725890602e-05, + "loss": 0.4899, + "step": 4719 + }, + { + "epoch": 0.8, + "learning_rate": 1.7165797469488336e-05, + "loss": 0.5081, + "step": 4720 + }, + { + "epoch": 0.81, + "learning_rate": 1.7164512970114587e-05, + "loss": 0.5111, + "step": 4721 + }, + { + "epoch": 0.81, + "learning_rate": 1.71632282278129e-05, + "loss": 0.4581, + "step": 4722 + }, + { + "epoch": 0.81, + "learning_rate": 1.7161943242626852e-05, + "loss": 0.5184, + "step": 4723 + }, + { + "epoch": 0.81, + "learning_rate": 1.7160658014600004e-05, + "loss": 0.5124, + "step": 4724 + }, + { + "epoch": 0.81, + "learning_rate": 1.7159372543775932e-05, + "loss": 0.5225, + "step": 4725 + }, + { + "epoch": 0.81, + "learning_rate": 1.7158086830198228e-05, + "loss": 0.498, + "step": 4726 + }, + { + "epoch": 0.81, + "learning_rate": 1.7156800873910483e-05, + "loss": 0.5011, + "step": 4727 + }, + { + "epoch": 0.81, + "learning_rate": 1.7155514674956304e-05, + "loss": 0.5367, + "step": 4728 + }, + { + "epoch": 0.81, + "learning_rate": 1.7154228233379297e-05, + "loss": 0.5107, + "step": 4729 + }, + { + "epoch": 0.81, + "learning_rate": 1.7152941549223087e-05, + "loss": 0.4844, + "step": 4730 + }, + { + "epoch": 0.81, + "learning_rate": 1.71516546225313e-05, + "loss": 0.4981, + "step": 4731 + }, + { + "epoch": 0.81, + "learning_rate": 1.715036745334757e-05, + "loss": 0.4802, + "step": 4732 + }, + { + "epoch": 0.81, + "learning_rate": 1.7149080041715543e-05, + "loss": 0.5289, + "step": 4733 + }, + { + "epoch": 0.81, + "learning_rate": 1.714779238767887e-05, + "loss": 0.5515, + "step": 4734 + }, + { + "epoch": 0.81, + "learning_rate": 1.7146504491281214e-05, + "loss": 0.4748, + "step": 4735 + }, + { + "epoch": 0.81, + "learning_rate": 1.714521635256624e-05, + "loss": 0.5169, + "step": 4736 + }, + { + "epoch": 0.81, + "learning_rate": 1.7143927971577627e-05, + "loss": 0.4944, + "step": 4737 + }, + { + "epoch": 0.81, + "learning_rate": 1.7142639348359065e-05, + "loss": 0.5033, + "step": 4738 + }, + { + "epoch": 0.81, + "learning_rate": 1.714135048295424e-05, + "loss": 0.5333, + "step": 4739 + }, + { + "epoch": 0.81, + "learning_rate": 1.714006137540686e-05, + "loss": 0.4922, + "step": 4740 + }, + { + "epoch": 0.81, + "learning_rate": 1.713877202576063e-05, + "loss": 0.5423, + "step": 4741 + }, + { + "epoch": 0.81, + "learning_rate": 1.713748243405927e-05, + "loss": 0.4992, + "step": 4742 + }, + { + "epoch": 0.81, + "learning_rate": 1.7136192600346502e-05, + "loss": 0.4996, + "step": 4743 + }, + { + "epoch": 0.81, + "learning_rate": 1.713490252466607e-05, + "loss": 0.5187, + "step": 4744 + }, + { + "epoch": 0.81, + "learning_rate": 1.713361220706171e-05, + "loss": 0.4991, + "step": 4745 + }, + { + "epoch": 0.81, + "learning_rate": 1.7132321647577176e-05, + "loss": 0.4911, + "step": 4746 + }, + { + "epoch": 0.81, + "learning_rate": 1.7131030846256226e-05, + "loss": 0.48, + "step": 4747 + }, + { + "epoch": 0.81, + "learning_rate": 1.7129739803142627e-05, + "loss": 0.4946, + "step": 4748 + }, + { + "epoch": 0.81, + "learning_rate": 1.7128448518280153e-05, + "loss": 0.5494, + "step": 4749 + }, + { + "epoch": 0.81, + "learning_rate": 1.712715699171259e-05, + "loss": 0.5294, + "step": 4750 + }, + { + "epoch": 0.81, + "learning_rate": 1.7125865223483726e-05, + "loss": 0.4656, + "step": 4751 + }, + { + "epoch": 0.81, + "learning_rate": 1.7124573213637367e-05, + "loss": 0.536, + "step": 4752 + }, + { + "epoch": 0.81, + "learning_rate": 1.712328096221732e-05, + "loss": 0.5114, + "step": 4753 + }, + { + "epoch": 0.81, + "learning_rate": 1.71219884692674e-05, + "loss": 0.519, + "step": 4754 + }, + { + "epoch": 0.81, + "learning_rate": 1.712069573483143e-05, + "loss": 0.4594, + "step": 4755 + }, + { + "epoch": 0.81, + "learning_rate": 1.7119402758953247e-05, + "loss": 0.4628, + "step": 4756 + }, + { + "epoch": 0.81, + "learning_rate": 1.7118109541676688e-05, + "loss": 0.4997, + "step": 4757 + }, + { + "epoch": 0.81, + "learning_rate": 1.7116816083045603e-05, + "loss": 0.5229, + "step": 4758 + }, + { + "epoch": 0.81, + "learning_rate": 1.7115522383103853e-05, + "loss": 0.5296, + "step": 4759 + }, + { + "epoch": 0.81, + "learning_rate": 1.71142284418953e-05, + "loss": 0.5072, + "step": 4760 + }, + { + "epoch": 0.81, + "learning_rate": 1.7112934259463817e-05, + "loss": 0.4948, + "step": 4761 + }, + { + "epoch": 0.81, + "learning_rate": 1.7111639835853292e-05, + "loss": 0.5053, + "step": 4762 + }, + { + "epoch": 0.81, + "learning_rate": 1.711034517110761e-05, + "loss": 0.4995, + "step": 4763 + }, + { + "epoch": 0.81, + "learning_rate": 1.7109050265270667e-05, + "loss": 0.5084, + "step": 4764 + }, + { + "epoch": 0.81, + "learning_rate": 1.710775511838638e-05, + "loss": 0.5304, + "step": 4765 + }, + { + "epoch": 0.81, + "learning_rate": 1.710645973049865e-05, + "loss": 0.5161, + "step": 4766 + }, + { + "epoch": 0.81, + "learning_rate": 1.710516410165141e-05, + "loss": 0.4693, + "step": 4767 + }, + { + "epoch": 0.81, + "learning_rate": 1.7103868231888588e-05, + "loss": 0.5098, + "step": 4768 + }, + { + "epoch": 0.81, + "learning_rate": 1.710257212125412e-05, + "loss": 0.5004, + "step": 4769 + }, + { + "epoch": 0.81, + "learning_rate": 1.7101275769791955e-05, + "loss": 0.534, + "step": 4770 + }, + { + "epoch": 0.81, + "learning_rate": 1.7099979177546052e-05, + "loss": 0.5279, + "step": 4771 + }, + { + "epoch": 0.81, + "learning_rate": 1.7098682344560376e-05, + "loss": 0.5175, + "step": 4772 + }, + { + "epoch": 0.81, + "learning_rate": 1.709738527087889e-05, + "loss": 0.5108, + "step": 4773 + }, + { + "epoch": 0.81, + "learning_rate": 1.709608795654558e-05, + "loss": 0.5175, + "step": 4774 + }, + { + "epoch": 0.81, + "learning_rate": 1.7094790401604436e-05, + "loss": 0.5136, + "step": 4775 + }, + { + "epoch": 0.81, + "learning_rate": 1.709349260609945e-05, + "loss": 0.5276, + "step": 4776 + }, + { + "epoch": 0.81, + "learning_rate": 1.7092194570074626e-05, + "loss": 0.507, + "step": 4777 + }, + { + "epoch": 0.81, + "learning_rate": 1.709089629357398e-05, + "loss": 0.5166, + "step": 4778 + }, + { + "epoch": 0.82, + "learning_rate": 1.7089597776641533e-05, + "loss": 0.4457, + "step": 4779 + }, + { + "epoch": 0.82, + "learning_rate": 1.7088299019321314e-05, + "loss": 0.5307, + "step": 4780 + }, + { + "epoch": 0.82, + "learning_rate": 1.7087000021657356e-05, + "loss": 0.526, + "step": 4781 + }, + { + "epoch": 0.82, + "learning_rate": 1.7085700783693712e-05, + "loss": 0.4982, + "step": 4782 + }, + { + "epoch": 0.82, + "learning_rate": 1.7084401305474424e-05, + "loss": 0.4878, + "step": 4783 + }, + { + "epoch": 0.82, + "learning_rate": 1.7083101587043565e-05, + "loss": 0.4931, + "step": 4784 + }, + { + "epoch": 0.82, + "learning_rate": 1.7081801628445195e-05, + "loss": 0.5143, + "step": 4785 + }, + { + "epoch": 0.82, + "learning_rate": 1.70805014297234e-05, + "loss": 0.5289, + "step": 4786 + }, + { + "epoch": 0.82, + "learning_rate": 1.7079200990922263e-05, + "loss": 0.4799, + "step": 4787 + }, + { + "epoch": 0.82, + "learning_rate": 1.7077900312085875e-05, + "loss": 0.5207, + "step": 4788 + }, + { + "epoch": 0.82, + "learning_rate": 1.707659939325834e-05, + "loss": 0.4944, + "step": 4789 + }, + { + "epoch": 0.82, + "learning_rate": 1.7075298234483772e-05, + "loss": 0.5511, + "step": 4790 + }, + { + "epoch": 0.82, + "learning_rate": 1.7073996835806286e-05, + "loss": 0.5214, + "step": 4791 + }, + { + "epoch": 0.82, + "learning_rate": 1.707269519727001e-05, + "loss": 0.5214, + "step": 4792 + }, + { + "epoch": 0.82, + "learning_rate": 1.7071393318919077e-05, + "loss": 0.5451, + "step": 4793 + }, + { + "epoch": 0.82, + "learning_rate": 1.7070091200797634e-05, + "loss": 0.5169, + "step": 4794 + }, + { + "epoch": 0.82, + "learning_rate": 1.7068788842949824e-05, + "loss": 0.4933, + "step": 4795 + }, + { + "epoch": 0.82, + "learning_rate": 1.7067486245419815e-05, + "loss": 0.4753, + "step": 4796 + }, + { + "epoch": 0.82, + "learning_rate": 1.706618340825177e-05, + "loss": 0.5496, + "step": 4797 + }, + { + "epoch": 0.82, + "learning_rate": 1.7064880331489866e-05, + "loss": 0.5073, + "step": 4798 + }, + { + "epoch": 0.82, + "learning_rate": 1.7063577015178286e-05, + "loss": 0.5408, + "step": 4799 + }, + { + "epoch": 0.82, + "learning_rate": 1.706227345936122e-05, + "loss": 0.5677, + "step": 4800 + }, + { + "epoch": 0.82, + "learning_rate": 1.7060969664082874e-05, + "loss": 0.4563, + "step": 4801 + }, + { + "epoch": 0.82, + "learning_rate": 1.7059665629387445e-05, + "loss": 0.4693, + "step": 4802 + }, + { + "epoch": 0.82, + "learning_rate": 1.7058361355319157e-05, + "loss": 0.4978, + "step": 4803 + }, + { + "epoch": 0.82, + "learning_rate": 1.7057056841922234e-05, + "loss": 0.5049, + "step": 4804 + }, + { + "epoch": 0.82, + "learning_rate": 1.7055752089240907e-05, + "loss": 0.5, + "step": 4805 + }, + { + "epoch": 0.82, + "learning_rate": 1.7054447097319413e-05, + "loss": 0.5121, + "step": 4806 + }, + { + "epoch": 0.82, + "learning_rate": 1.7053141866202007e-05, + "loss": 0.5155, + "step": 4807 + }, + { + "epoch": 0.82, + "learning_rate": 1.705183639593294e-05, + "loss": 0.4953, + "step": 4808 + }, + { + "epoch": 0.82, + "learning_rate": 1.705053068655648e-05, + "loss": 0.5096, + "step": 4809 + }, + { + "epoch": 0.82, + "learning_rate": 1.70492247381169e-05, + "loss": 0.5255, + "step": 4810 + }, + { + "epoch": 0.82, + "learning_rate": 1.7047918550658476e-05, + "loss": 0.4506, + "step": 4811 + }, + { + "epoch": 0.82, + "learning_rate": 1.7046612124225506e-05, + "loss": 0.5029, + "step": 4812 + }, + { + "epoch": 0.82, + "learning_rate": 1.704530545886228e-05, + "loss": 0.5101, + "step": 4813 + }, + { + "epoch": 0.82, + "learning_rate": 1.70439985546131e-05, + "loss": 0.5137, + "step": 4814 + }, + { + "epoch": 0.82, + "learning_rate": 1.704269141152229e-05, + "loss": 0.4822, + "step": 4815 + }, + { + "epoch": 0.82, + "learning_rate": 1.7041384029634165e-05, + "loss": 0.486, + "step": 4816 + }, + { + "epoch": 0.82, + "learning_rate": 1.7040076408993054e-05, + "loss": 0.4927, + "step": 4817 + }, + { + "epoch": 0.82, + "learning_rate": 1.7038768549643297e-05, + "loss": 0.477, + "step": 4818 + }, + { + "epoch": 0.82, + "learning_rate": 1.703746045162924e-05, + "loss": 0.5241, + "step": 4819 + }, + { + "epoch": 0.82, + "learning_rate": 1.7036152114995236e-05, + "loss": 0.4819, + "step": 4820 + }, + { + "epoch": 0.82, + "learning_rate": 1.7034843539785646e-05, + "loss": 0.4984, + "step": 4821 + }, + { + "epoch": 0.82, + "learning_rate": 1.703353472604484e-05, + "loss": 0.5173, + "step": 4822 + }, + { + "epoch": 0.82, + "learning_rate": 1.7032225673817194e-05, + "loss": 0.4951, + "step": 4823 + }, + { + "epoch": 0.82, + "learning_rate": 1.7030916383147104e-05, + "loss": 0.4742, + "step": 4824 + }, + { + "epoch": 0.82, + "learning_rate": 1.702960685407895e-05, + "loss": 0.5435, + "step": 4825 + }, + { + "epoch": 0.82, + "learning_rate": 1.7028297086657145e-05, + "loss": 0.4897, + "step": 4826 + }, + { + "epoch": 0.82, + "learning_rate": 1.7026987080926093e-05, + "loss": 0.5023, + "step": 4827 + }, + { + "epoch": 0.82, + "learning_rate": 1.702567683693022e-05, + "loss": 0.4945, + "step": 4828 + }, + { + "epoch": 0.82, + "learning_rate": 1.7024366354713945e-05, + "loss": 0.5021, + "step": 4829 + }, + { + "epoch": 0.82, + "learning_rate": 1.7023055634321707e-05, + "loss": 0.4542, + "step": 4830 + }, + { + "epoch": 0.82, + "learning_rate": 1.7021744675797946e-05, + "loss": 0.5311, + "step": 4831 + }, + { + "epoch": 0.82, + "learning_rate": 1.7020433479187115e-05, + "loss": 0.5231, + "step": 4832 + }, + { + "epoch": 0.82, + "learning_rate": 1.701912204453367e-05, + "loss": 0.4801, + "step": 4833 + }, + { + "epoch": 0.82, + "learning_rate": 1.7017810371882083e-05, + "loss": 0.5051, + "step": 4834 + }, + { + "epoch": 0.82, + "learning_rate": 1.7016498461276826e-05, + "loss": 0.5153, + "step": 4835 + }, + { + "epoch": 0.82, + "learning_rate": 1.701518631276238e-05, + "loss": 0.4838, + "step": 4836 + }, + { + "epoch": 0.82, + "learning_rate": 1.701387392638324e-05, + "loss": 0.5008, + "step": 4837 + }, + { + "epoch": 0.83, + "learning_rate": 1.7012561302183904e-05, + "loss": 0.5534, + "step": 4838 + }, + { + "epoch": 0.83, + "learning_rate": 1.701124844020888e-05, + "loss": 0.4903, + "step": 4839 + }, + { + "epoch": 0.83, + "learning_rate": 1.7009935340502678e-05, + "loss": 0.507, + "step": 4840 + }, + { + "epoch": 0.83, + "learning_rate": 1.700862200310983e-05, + "loss": 0.4935, + "step": 4841 + }, + { + "epoch": 0.83, + "learning_rate": 1.7007308428074857e-05, + "loss": 0.5307, + "step": 4842 + }, + { + "epoch": 0.83, + "learning_rate": 1.7005994615442315e-05, + "loss": 0.512, + "step": 4843 + }, + { + "epoch": 0.83, + "learning_rate": 1.7004680565256733e-05, + "loss": 0.4993, + "step": 4844 + }, + { + "epoch": 0.83, + "learning_rate": 1.7003366277562676e-05, + "loss": 0.5011, + "step": 4845 + }, + { + "epoch": 0.83, + "learning_rate": 1.700205175240471e-05, + "loss": 0.4946, + "step": 4846 + }, + { + "epoch": 0.83, + "learning_rate": 1.70007369898274e-05, + "loss": 0.4873, + "step": 4847 + }, + { + "epoch": 0.83, + "learning_rate": 1.699942198987533e-05, + "loss": 0.5242, + "step": 4848 + }, + { + "epoch": 0.83, + "learning_rate": 1.6998106752593086e-05, + "loss": 0.4998, + "step": 4849 + }, + { + "epoch": 0.83, + "learning_rate": 1.6996791278025265e-05, + "loss": 0.5004, + "step": 4850 + }, + { + "epoch": 0.83, + "learning_rate": 1.6995475566216475e-05, + "loss": 0.4949, + "step": 4851 + }, + { + "epoch": 0.83, + "learning_rate": 1.6994159617211318e-05, + "loss": 0.499, + "step": 4852 + }, + { + "epoch": 0.83, + "learning_rate": 1.6992843431054425e-05, + "loss": 0.4995, + "step": 4853 + }, + { + "epoch": 0.83, + "learning_rate": 1.6991527007790415e-05, + "loss": 0.5055, + "step": 4854 + }, + { + "epoch": 0.83, + "learning_rate": 1.6990210347463933e-05, + "loss": 0.4858, + "step": 4855 + }, + { + "epoch": 0.83, + "learning_rate": 1.6988893450119614e-05, + "loss": 0.4666, + "step": 4856 + }, + { + "epoch": 0.83, + "learning_rate": 1.6987576315802113e-05, + "loss": 0.4907, + "step": 4857 + }, + { + "epoch": 0.83, + "learning_rate": 1.6986258944556095e-05, + "loss": 0.4983, + "step": 4858 + }, + { + "epoch": 0.83, + "learning_rate": 1.6984941336426223e-05, + "loss": 0.5017, + "step": 4859 + }, + { + "epoch": 0.83, + "learning_rate": 1.6983623491457177e-05, + "loss": 0.5263, + "step": 4860 + }, + { + "epoch": 0.83, + "learning_rate": 1.6982305409693637e-05, + "loss": 0.4992, + "step": 4861 + }, + { + "epoch": 0.83, + "learning_rate": 1.6980987091180298e-05, + "loss": 0.4835, + "step": 4862 + }, + { + "epoch": 0.83, + "learning_rate": 1.697966853596186e-05, + "loss": 0.4863, + "step": 4863 + }, + { + "epoch": 0.83, + "learning_rate": 1.6978349744083035e-05, + "loss": 0.4767, + "step": 4864 + }, + { + "epoch": 0.83, + "learning_rate": 1.6977030715588534e-05, + "loss": 0.5116, + "step": 4865 + }, + { + "epoch": 0.83, + "learning_rate": 1.697571145052308e-05, + "loss": 0.5256, + "step": 4866 + }, + { + "epoch": 0.83, + "learning_rate": 1.6974391948931414e-05, + "loss": 0.5118, + "step": 4867 + }, + { + "epoch": 0.83, + "learning_rate": 1.6973072210858266e-05, + "loss": 0.4963, + "step": 4868 + }, + { + "epoch": 0.83, + "learning_rate": 1.6971752236348397e-05, + "loss": 0.4953, + "step": 4869 + }, + { + "epoch": 0.83, + "learning_rate": 1.6970432025446547e-05, + "loss": 0.4698, + "step": 4870 + }, + { + "epoch": 0.83, + "learning_rate": 1.69691115781975e-05, + "loss": 0.4775, + "step": 4871 + }, + { + "epoch": 0.83, + "learning_rate": 1.696779089464601e-05, + "loss": 0.4808, + "step": 4872 + }, + { + "epoch": 0.83, + "learning_rate": 1.6966469974836873e-05, + "loss": 0.4642, + "step": 4873 + }, + { + "epoch": 0.83, + "learning_rate": 1.6965148818814867e-05, + "loss": 0.5145, + "step": 4874 + }, + { + "epoch": 0.83, + "learning_rate": 1.6963827426624792e-05, + "loss": 0.5204, + "step": 4875 + }, + { + "epoch": 0.83, + "learning_rate": 1.6962505798311456e-05, + "loss": 0.5019, + "step": 4876 + }, + { + "epoch": 0.83, + "learning_rate": 1.6961183933919662e-05, + "loss": 0.5476, + "step": 4877 + }, + { + "epoch": 0.83, + "learning_rate": 1.6959861833494244e-05, + "loss": 0.4955, + "step": 4878 + }, + { + "epoch": 0.83, + "learning_rate": 1.6958539497080018e-05, + "loss": 0.5077, + "step": 4879 + }, + { + "epoch": 0.83, + "learning_rate": 1.6957216924721828e-05, + "loss": 0.5207, + "step": 4880 + }, + { + "epoch": 0.83, + "learning_rate": 1.6955894116464518e-05, + "loss": 0.4963, + "step": 4881 + }, + { + "epoch": 0.83, + "learning_rate": 1.695457107235294e-05, + "loss": 0.5271, + "step": 4882 + }, + { + "epoch": 0.83, + "learning_rate": 1.695324779243195e-05, + "loss": 0.4894, + "step": 4883 + }, + { + "epoch": 0.83, + "learning_rate": 1.6951924276746425e-05, + "loss": 0.4485, + "step": 4884 + }, + { + "epoch": 0.83, + "learning_rate": 1.6950600525341235e-05, + "loss": 0.52, + "step": 4885 + }, + { + "epoch": 0.83, + "learning_rate": 1.6949276538261267e-05, + "loss": 0.5147, + "step": 4886 + }, + { + "epoch": 0.83, + "learning_rate": 1.6947952315551414e-05, + "loss": 0.4711, + "step": 4887 + }, + { + "epoch": 0.83, + "learning_rate": 1.6946627857256576e-05, + "loss": 0.4683, + "step": 4888 + }, + { + "epoch": 0.83, + "learning_rate": 1.694530316342166e-05, + "loss": 0.5019, + "step": 4889 + }, + { + "epoch": 0.83, + "learning_rate": 1.6943978234091584e-05, + "loss": 0.48, + "step": 4890 + }, + { + "epoch": 0.83, + "learning_rate": 1.6942653069311274e-05, + "loss": 0.516, + "step": 4891 + }, + { + "epoch": 0.83, + "learning_rate": 1.694132766912566e-05, + "loss": 0.5459, + "step": 4892 + }, + { + "epoch": 0.83, + "learning_rate": 1.6940002033579685e-05, + "loss": 0.524, + "step": 4893 + }, + { + "epoch": 0.83, + "learning_rate": 1.6938676162718294e-05, + "loss": 0.4835, + "step": 4894 + }, + { + "epoch": 0.83, + "learning_rate": 1.693735005658645e-05, + "loss": 0.4942, + "step": 4895 + }, + { + "epoch": 0.83, + "learning_rate": 1.693602371522911e-05, + "loss": 0.5073, + "step": 4896 + }, + { + "epoch": 0.84, + "learning_rate": 1.6934697138691248e-05, + "loss": 0.52, + "step": 4897 + }, + { + "epoch": 0.84, + "learning_rate": 1.6933370327017843e-05, + "loss": 0.4957, + "step": 4898 + }, + { + "epoch": 0.84, + "learning_rate": 1.6932043280253892e-05, + "loss": 0.4907, + "step": 4899 + }, + { + "epoch": 0.84, + "learning_rate": 1.6930715998444384e-05, + "loss": 0.5144, + "step": 4900 + }, + { + "epoch": 0.84, + "learning_rate": 1.6929388481634323e-05, + "loss": 0.491, + "step": 4901 + }, + { + "epoch": 0.84, + "learning_rate": 1.6928060729868723e-05, + "loss": 0.5012, + "step": 4902 + }, + { + "epoch": 0.84, + "learning_rate": 1.6926732743192606e-05, + "loss": 0.5472, + "step": 4903 + }, + { + "epoch": 0.84, + "learning_rate": 1.6925404521650993e-05, + "loss": 0.5028, + "step": 4904 + }, + { + "epoch": 0.84, + "learning_rate": 1.692407606528893e-05, + "loss": 0.4887, + "step": 4905 + }, + { + "epoch": 0.84, + "learning_rate": 1.6922747374151456e-05, + "loss": 0.5318, + "step": 4906 + }, + { + "epoch": 0.84, + "learning_rate": 1.6921418448283625e-05, + "loss": 0.5062, + "step": 4907 + }, + { + "epoch": 0.84, + "learning_rate": 1.6920089287730493e-05, + "loss": 0.4912, + "step": 4908 + }, + { + "epoch": 0.84, + "learning_rate": 1.6918759892537132e-05, + "loss": 0.5284, + "step": 4909 + }, + { + "epoch": 0.84, + "learning_rate": 1.6917430262748618e-05, + "loss": 0.4584, + "step": 4910 + }, + { + "epoch": 0.84, + "learning_rate": 1.691610039841003e-05, + "loss": 0.5049, + "step": 4911 + }, + { + "epoch": 0.84, + "learning_rate": 1.6914770299566467e-05, + "loss": 0.4751, + "step": 4912 + }, + { + "epoch": 0.84, + "learning_rate": 1.6913439966263022e-05, + "loss": 0.4792, + "step": 4913 + }, + { + "epoch": 0.84, + "learning_rate": 1.691210939854481e-05, + "loss": 0.4893, + "step": 4914 + }, + { + "epoch": 0.84, + "learning_rate": 1.691077859645694e-05, + "loss": 0.489, + "step": 4915 + }, + { + "epoch": 0.84, + "learning_rate": 1.6909447560044537e-05, + "loss": 0.5318, + "step": 4916 + }, + { + "epoch": 0.84, + "learning_rate": 1.6908116289352735e-05, + "loss": 0.5107, + "step": 4917 + }, + { + "epoch": 0.84, + "learning_rate": 1.6906784784426674e-05, + "loss": 0.5491, + "step": 4918 + }, + { + "epoch": 0.84, + "learning_rate": 1.6905453045311496e-05, + "loss": 0.4916, + "step": 4919 + }, + { + "epoch": 0.84, + "learning_rate": 1.6904121072052363e-05, + "loss": 0.4953, + "step": 4920 + }, + { + "epoch": 0.84, + "learning_rate": 1.6902788864694435e-05, + "loss": 0.5143, + "step": 4921 + }, + { + "epoch": 0.84, + "learning_rate": 1.6901456423282886e-05, + "loss": 0.5212, + "step": 4922 + }, + { + "epoch": 0.84, + "learning_rate": 1.6900123747862893e-05, + "loss": 0.4996, + "step": 4923 + }, + { + "epoch": 0.84, + "learning_rate": 1.6898790838479643e-05, + "loss": 0.5317, + "step": 4924 + }, + { + "epoch": 0.84, + "learning_rate": 1.6897457695178332e-05, + "loss": 0.4619, + "step": 4925 + }, + { + "epoch": 0.84, + "learning_rate": 1.689612431800416e-05, + "loss": 0.5036, + "step": 4926 + }, + { + "epoch": 0.84, + "learning_rate": 1.6894790707002344e-05, + "loss": 0.5137, + "step": 4927 + }, + { + "epoch": 0.84, + "learning_rate": 1.6893456862218096e-05, + "loss": 0.49, + "step": 4928 + }, + { + "epoch": 0.84, + "learning_rate": 1.689212278369665e-05, + "loss": 0.4661, + "step": 4929 + }, + { + "epoch": 0.84, + "learning_rate": 1.6890788471483236e-05, + "loss": 0.4509, + "step": 4930 + }, + { + "epoch": 0.84, + "learning_rate": 1.6889453925623093e-05, + "loss": 0.458, + "step": 4931 + }, + { + "epoch": 0.84, + "learning_rate": 1.688811914616148e-05, + "loss": 0.4767, + "step": 4932 + }, + { + "epoch": 0.84, + "learning_rate": 1.6886784133143654e-05, + "loss": 0.5114, + "step": 4933 + }, + { + "epoch": 0.84, + "learning_rate": 1.6885448886614875e-05, + "loss": 0.4565, + "step": 4934 + }, + { + "epoch": 0.84, + "learning_rate": 1.6884113406620425e-05, + "loss": 0.518, + "step": 4935 + }, + { + "epoch": 0.84, + "learning_rate": 1.6882777693205578e-05, + "loss": 0.5419, + "step": 4936 + }, + { + "epoch": 0.84, + "learning_rate": 1.6881441746415634e-05, + "loss": 0.4807, + "step": 4937 + }, + { + "epoch": 0.84, + "learning_rate": 1.6880105566295886e-05, + "loss": 0.4742, + "step": 4938 + }, + { + "epoch": 0.84, + "learning_rate": 1.687876915289164e-05, + "loss": 0.4837, + "step": 4939 + }, + { + "epoch": 0.84, + "learning_rate": 1.6877432506248207e-05, + "loss": 0.5435, + "step": 4940 + }, + { + "epoch": 0.84, + "learning_rate": 1.6876095626410913e-05, + "loss": 0.4834, + "step": 4941 + }, + { + "epoch": 0.84, + "learning_rate": 1.6874758513425086e-05, + "loss": 0.4836, + "step": 4942 + }, + { + "epoch": 0.84, + "learning_rate": 1.6873421167336066e-05, + "loss": 0.5191, + "step": 4943 + }, + { + "epoch": 0.84, + "learning_rate": 1.6872083588189197e-05, + "loss": 0.4842, + "step": 4944 + }, + { + "epoch": 0.84, + "learning_rate": 1.6870745776029832e-05, + "loss": 0.4758, + "step": 4945 + }, + { + "epoch": 0.84, + "learning_rate": 1.686940773090333e-05, + "loss": 0.504, + "step": 4946 + }, + { + "epoch": 0.84, + "learning_rate": 1.6868069452855066e-05, + "loss": 0.5045, + "step": 4947 + }, + { + "epoch": 0.84, + "learning_rate": 1.6866730941930413e-05, + "loss": 0.5082, + "step": 4948 + }, + { + "epoch": 0.84, + "learning_rate": 1.6865392198174757e-05, + "loss": 0.5044, + "step": 4949 + }, + { + "epoch": 0.84, + "learning_rate": 1.686405322163349e-05, + "loss": 0.485, + "step": 4950 + }, + { + "epoch": 0.84, + "learning_rate": 1.6862714012352016e-05, + "loss": 0.4994, + "step": 4951 + }, + { + "epoch": 0.84, + "learning_rate": 1.686137457037574e-05, + "loss": 0.495, + "step": 4952 + }, + { + "epoch": 0.84, + "learning_rate": 1.686003489575008e-05, + "loss": 0.4723, + "step": 4953 + }, + { + "epoch": 0.84, + "learning_rate": 1.685869498852046e-05, + "loss": 0.5055, + "step": 4954 + }, + { + "epoch": 0.85, + "learning_rate": 1.6857354848732312e-05, + "loss": 0.5043, + "step": 4955 + }, + { + "epoch": 0.85, + "learning_rate": 1.685601447643108e-05, + "loss": 0.4734, + "step": 4956 + }, + { + "epoch": 0.85, + "learning_rate": 1.6854673871662205e-05, + "loss": 0.4958, + "step": 4957 + }, + { + "epoch": 0.85, + "learning_rate": 1.6853333034471155e-05, + "loss": 0.4894, + "step": 4958 + }, + { + "epoch": 0.85, + "learning_rate": 1.685199196490338e-05, + "loss": 0.5216, + "step": 4959 + }, + { + "epoch": 0.85, + "learning_rate": 1.685065066300436e-05, + "loss": 0.4897, + "step": 4960 + }, + { + "epoch": 0.85, + "learning_rate": 1.6849309128819572e-05, + "loss": 0.5107, + "step": 4961 + }, + { + "epoch": 0.85, + "learning_rate": 1.6847967362394503e-05, + "loss": 0.4803, + "step": 4962 + }, + { + "epoch": 0.85, + "learning_rate": 1.6846625363774654e-05, + "loss": 0.4572, + "step": 4963 + }, + { + "epoch": 0.85, + "learning_rate": 1.684528313300552e-05, + "loss": 0.5144, + "step": 4964 + }, + { + "epoch": 0.85, + "learning_rate": 1.684394067013262e-05, + "loss": 0.5044, + "step": 4965 + }, + { + "epoch": 0.85, + "learning_rate": 1.6842597975201465e-05, + "loss": 0.4568, + "step": 4966 + }, + { + "epoch": 0.85, + "learning_rate": 1.684125504825759e-05, + "loss": 0.4641, + "step": 4967 + }, + { + "epoch": 0.85, + "learning_rate": 1.6839911889346527e-05, + "loss": 0.4687, + "step": 4968 + }, + { + "epoch": 0.85, + "learning_rate": 1.6838568498513812e-05, + "loss": 0.5064, + "step": 4969 + }, + { + "epoch": 0.85, + "learning_rate": 1.6837224875805004e-05, + "loss": 0.5184, + "step": 4970 + }, + { + "epoch": 0.85, + "learning_rate": 1.6835881021265657e-05, + "loss": 0.4742, + "step": 4971 + }, + { + "epoch": 0.85, + "learning_rate": 1.6834536934941345e-05, + "loss": 0.5146, + "step": 4972 + }, + { + "epoch": 0.85, + "learning_rate": 1.6833192616877627e-05, + "loss": 0.4898, + "step": 4973 + }, + { + "epoch": 0.85, + "learning_rate": 1.68318480671201e-05, + "loss": 0.4987, + "step": 4974 + }, + { + "epoch": 0.85, + "learning_rate": 1.6830503285714343e-05, + "loss": 0.5107, + "step": 4975 + }, + { + "epoch": 0.85, + "learning_rate": 1.682915827270596e-05, + "loss": 0.4565, + "step": 4976 + }, + { + "epoch": 0.85, + "learning_rate": 1.6827813028140553e-05, + "loss": 0.4867, + "step": 4977 + }, + { + "epoch": 0.85, + "learning_rate": 1.682646755206374e-05, + "loss": 0.4817, + "step": 4978 + }, + { + "epoch": 0.85, + "learning_rate": 1.682512184452114e-05, + "loss": 0.4969, + "step": 4979 + }, + { + "epoch": 0.85, + "learning_rate": 1.682377590555838e-05, + "loss": 0.4825, + "step": 4980 + }, + { + "epoch": 0.85, + "learning_rate": 1.6822429735221094e-05, + "loss": 0.5153, + "step": 4981 + }, + { + "epoch": 0.85, + "learning_rate": 1.6821083333554935e-05, + "loss": 0.4867, + "step": 4982 + }, + { + "epoch": 0.85, + "learning_rate": 1.6819736700605548e-05, + "loss": 0.4992, + "step": 4983 + }, + { + "epoch": 0.85, + "learning_rate": 1.68183898364186e-05, + "loss": 0.4932, + "step": 4984 + }, + { + "epoch": 0.85, + "learning_rate": 1.6817042741039757e-05, + "loss": 0.5086, + "step": 4985 + }, + { + "epoch": 0.85, + "learning_rate": 1.681569541451469e-05, + "loss": 0.4936, + "step": 4986 + }, + { + "epoch": 0.85, + "learning_rate": 1.6814347856889095e-05, + "loss": 0.5258, + "step": 4987 + }, + { + "epoch": 0.85, + "learning_rate": 1.681300006820865e-05, + "loss": 0.526, + "step": 4988 + }, + { + "epoch": 0.85, + "learning_rate": 1.6811652048519063e-05, + "loss": 0.4818, + "step": 4989 + }, + { + "epoch": 0.85, + "learning_rate": 1.6810303797866038e-05, + "loss": 0.5077, + "step": 4990 + }, + { + "epoch": 0.85, + "learning_rate": 1.680895531629529e-05, + "loss": 0.4435, + "step": 4991 + }, + { + "epoch": 0.85, + "learning_rate": 1.6807606603852544e-05, + "loss": 0.5345, + "step": 4992 + }, + { + "epoch": 0.85, + "learning_rate": 1.6806257660583534e-05, + "loss": 0.4729, + "step": 4993 + }, + { + "epoch": 0.85, + "learning_rate": 1.680490848653399e-05, + "loss": 0.491, + "step": 4994 + }, + { + "epoch": 0.85, + "learning_rate": 1.6803559081749668e-05, + "loss": 0.5028, + "step": 4995 + }, + { + "epoch": 0.85, + "learning_rate": 1.6802209446276318e-05, + "loss": 0.496, + "step": 4996 + }, + { + "epoch": 0.85, + "learning_rate": 1.6800859580159698e-05, + "loss": 0.547, + "step": 4997 + }, + { + "epoch": 0.85, + "learning_rate": 1.6799509483445585e-05, + "loss": 0.4788, + "step": 4998 + }, + { + "epoch": 0.85, + "learning_rate": 1.6798159156179756e-05, + "loss": 0.4757, + "step": 4999 + }, + { + "epoch": 0.85, + "learning_rate": 1.6796808598407995e-05, + "loss": 0.4825, + "step": 5000 + }, + { + "epoch": 0.85, + "learning_rate": 1.6795457810176095e-05, + "loss": 0.5077, + "step": 5001 + }, + { + "epoch": 0.85, + "learning_rate": 1.6794106791529855e-05, + "loss": 0.496, + "step": 5002 + }, + { + "epoch": 0.85, + "learning_rate": 1.6792755542515092e-05, + "loss": 0.5199, + "step": 5003 + }, + { + "epoch": 0.85, + "learning_rate": 1.6791404063177617e-05, + "loss": 0.496, + "step": 5004 + }, + { + "epoch": 0.85, + "learning_rate": 1.6790052353563254e-05, + "loss": 0.4794, + "step": 5005 + }, + { + "epoch": 0.85, + "learning_rate": 1.6788700413717835e-05, + "loss": 0.4497, + "step": 5006 + }, + { + "epoch": 0.85, + "learning_rate": 1.6787348243687204e-05, + "loss": 0.5031, + "step": 5007 + }, + { + "epoch": 0.85, + "learning_rate": 1.678599584351721e-05, + "loss": 0.4775, + "step": 5008 + }, + { + "epoch": 0.85, + "learning_rate": 1.6784643213253707e-05, + "loss": 0.4966, + "step": 5009 + }, + { + "epoch": 0.85, + "learning_rate": 1.6783290352942558e-05, + "loss": 0.5084, + "step": 5010 + }, + { + "epoch": 0.85, + "learning_rate": 1.6781937262629634e-05, + "loss": 0.5141, + "step": 5011 + }, + { + "epoch": 0.85, + "learning_rate": 1.6780583942360817e-05, + "loss": 0.5089, + "step": 5012 + }, + { + "epoch": 0.85, + "learning_rate": 1.6779230392181993e-05, + "loss": 0.5098, + "step": 5013 + }, + { + "epoch": 0.86, + "learning_rate": 1.6777876612139058e-05, + "loss": 0.5115, + "step": 5014 + }, + { + "epoch": 0.86, + "learning_rate": 1.6776522602277913e-05, + "loss": 0.5427, + "step": 5015 + }, + { + "epoch": 0.86, + "learning_rate": 1.6775168362644465e-05, + "loss": 0.5359, + "step": 5016 + }, + { + "epoch": 0.86, + "learning_rate": 1.677381389328464e-05, + "loss": 0.5119, + "step": 5017 + }, + { + "epoch": 0.86, + "learning_rate": 1.677245919424436e-05, + "loss": 0.4792, + "step": 5018 + }, + { + "epoch": 0.86, + "learning_rate": 1.677110426556956e-05, + "loss": 0.4803, + "step": 5019 + }, + { + "epoch": 0.86, + "learning_rate": 1.6769749107306183e-05, + "loss": 0.5283, + "step": 5020 + }, + { + "epoch": 0.86, + "learning_rate": 1.6768393719500175e-05, + "loss": 0.4812, + "step": 5021 + }, + { + "epoch": 0.86, + "learning_rate": 1.676703810219749e-05, + "loss": 0.5367, + "step": 5022 + }, + { + "epoch": 0.86, + "learning_rate": 1.6765682255444105e-05, + "loss": 0.4933, + "step": 5023 + }, + { + "epoch": 0.86, + "learning_rate": 1.6764326179285985e-05, + "loss": 0.4724, + "step": 5024 + }, + { + "epoch": 0.86, + "learning_rate": 1.676296987376911e-05, + "loss": 0.4746, + "step": 5025 + }, + { + "epoch": 0.86, + "learning_rate": 1.676161333893947e-05, + "loss": 0.4921, + "step": 5026 + }, + { + "epoch": 0.86, + "learning_rate": 1.676025657484306e-05, + "loss": 0.528, + "step": 5027 + }, + { + "epoch": 0.86, + "learning_rate": 1.6758899581525886e-05, + "loss": 0.4967, + "step": 5028 + }, + { + "epoch": 0.86, + "learning_rate": 1.675754235903396e-05, + "loss": 0.5032, + "step": 5029 + }, + { + "epoch": 0.86, + "learning_rate": 1.67561849074133e-05, + "loss": 0.4801, + "step": 5030 + }, + { + "epoch": 0.86, + "learning_rate": 1.6754827226709932e-05, + "loss": 0.49, + "step": 5031 + }, + { + "epoch": 0.86, + "learning_rate": 1.6753469316969893e-05, + "loss": 0.487, + "step": 5032 + }, + { + "epoch": 0.86, + "learning_rate": 1.6752111178239227e-05, + "loss": 0.4845, + "step": 5033 + }, + { + "epoch": 0.86, + "learning_rate": 1.6750752810563982e-05, + "loss": 0.5154, + "step": 5034 + }, + { + "epoch": 0.86, + "learning_rate": 1.674939421399022e-05, + "loss": 0.4969, + "step": 5035 + }, + { + "epoch": 0.86, + "learning_rate": 1.6748035388564e-05, + "loss": 0.5108, + "step": 5036 + }, + { + "epoch": 0.86, + "learning_rate": 1.67466763343314e-05, + "loss": 0.4915, + "step": 5037 + }, + { + "epoch": 0.86, + "learning_rate": 1.6745317051338505e-05, + "loss": 0.4646, + "step": 5038 + }, + { + "epoch": 0.86, + "learning_rate": 1.67439575396314e-05, + "loss": 0.464, + "step": 5039 + }, + { + "epoch": 0.86, + "learning_rate": 1.6742597799256182e-05, + "loss": 0.5051, + "step": 5040 + }, + { + "epoch": 0.86, + "learning_rate": 1.6741237830258957e-05, + "loss": 0.5249, + "step": 5041 + }, + { + "epoch": 0.86, + "learning_rate": 1.6739877632685838e-05, + "loss": 0.5187, + "step": 5042 + }, + { + "epoch": 0.86, + "learning_rate": 1.673851720658295e-05, + "loss": 0.5339, + "step": 5043 + }, + { + "epoch": 0.86, + "learning_rate": 1.673715655199641e-05, + "loss": 0.5433, + "step": 5044 + }, + { + "epoch": 0.86, + "learning_rate": 1.673579566897236e-05, + "loss": 0.4831, + "step": 5045 + }, + { + "epoch": 0.86, + "learning_rate": 1.673443455755695e-05, + "loss": 0.4754, + "step": 5046 + }, + { + "epoch": 0.86, + "learning_rate": 1.673307321779632e-05, + "loss": 0.5259, + "step": 5047 + }, + { + "epoch": 0.86, + "learning_rate": 1.6731711649736637e-05, + "loss": 0.5068, + "step": 5048 + }, + { + "epoch": 0.86, + "learning_rate": 1.6730349853424064e-05, + "loss": 0.5028, + "step": 5049 + }, + { + "epoch": 0.86, + "learning_rate": 1.6728987828904775e-05, + "loss": 0.5068, + "step": 5050 + }, + { + "epoch": 0.86, + "learning_rate": 1.6727625576224954e-05, + "loss": 0.4898, + "step": 5051 + }, + { + "epoch": 0.86, + "learning_rate": 1.6726263095430792e-05, + "loss": 0.5221, + "step": 5052 + }, + { + "epoch": 0.86, + "learning_rate": 1.6724900386568486e-05, + "loss": 0.4882, + "step": 5053 + }, + { + "epoch": 0.86, + "learning_rate": 1.6723537449684243e-05, + "loss": 0.4604, + "step": 5054 + }, + { + "epoch": 0.86, + "learning_rate": 1.672217428482427e-05, + "loss": 0.4581, + "step": 5055 + }, + { + "epoch": 0.86, + "learning_rate": 1.6720810892034796e-05, + "loss": 0.4938, + "step": 5056 + }, + { + "epoch": 0.86, + "learning_rate": 1.6719447271362044e-05, + "loss": 0.4991, + "step": 5057 + }, + { + "epoch": 0.86, + "learning_rate": 1.6718083422852256e-05, + "loss": 0.4933, + "step": 5058 + }, + { + "epoch": 0.86, + "learning_rate": 1.671671934655167e-05, + "loss": 0.5454, + "step": 5059 + }, + { + "epoch": 0.86, + "learning_rate": 1.671535504250654e-05, + "loss": 0.5275, + "step": 5060 + }, + { + "epoch": 0.86, + "learning_rate": 1.671399051076313e-05, + "loss": 0.4589, + "step": 5061 + }, + { + "epoch": 0.86, + "learning_rate": 1.67126257513677e-05, + "loss": 0.4439, + "step": 5062 + }, + { + "epoch": 0.86, + "learning_rate": 1.6711260764366527e-05, + "loss": 0.5181, + "step": 5063 + }, + { + "epoch": 0.86, + "learning_rate": 1.67098955498059e-05, + "loss": 0.5017, + "step": 5064 + }, + { + "epoch": 0.86, + "learning_rate": 1.6708530107732104e-05, + "loss": 0.5141, + "step": 5065 + }, + { + "epoch": 0.86, + "learning_rate": 1.6707164438191438e-05, + "loss": 0.5426, + "step": 5066 + }, + { + "epoch": 0.86, + "learning_rate": 1.6705798541230207e-05, + "loss": 0.5357, + "step": 5067 + }, + { + "epoch": 0.86, + "learning_rate": 1.6704432416894726e-05, + "loss": 0.4889, + "step": 5068 + }, + { + "epoch": 0.86, + "learning_rate": 1.6703066065231318e-05, + "loss": 0.4868, + "step": 5069 + }, + { + "epoch": 0.86, + "learning_rate": 1.6701699486286308e-05, + "loss": 0.4428, + "step": 5070 + }, + { + "epoch": 0.86, + "learning_rate": 1.6700332680106034e-05, + "loss": 0.5154, + "step": 5071 + }, + { + "epoch": 0.87, + "learning_rate": 1.6698965646736843e-05, + "loss": 0.4707, + "step": 5072 + }, + { + "epoch": 0.87, + "learning_rate": 1.669759838622508e-05, + "loss": 0.4928, + "step": 5073 + }, + { + "epoch": 0.87, + "learning_rate": 1.6696230898617115e-05, + "loss": 0.5435, + "step": 5074 + }, + { + "epoch": 0.87, + "learning_rate": 1.669486318395931e-05, + "loss": 0.5282, + "step": 5075 + }, + { + "epoch": 0.87, + "learning_rate": 1.6693495242298045e-05, + "loss": 0.4982, + "step": 5076 + }, + { + "epoch": 0.87, + "learning_rate": 1.6692127073679692e-05, + "loss": 0.474, + "step": 5077 + }, + { + "epoch": 0.87, + "learning_rate": 1.6690758678150652e-05, + "loss": 0.4495, + "step": 5078 + }, + { + "epoch": 0.87, + "learning_rate": 1.6689390055757325e-05, + "loss": 0.4835, + "step": 5079 + }, + { + "epoch": 0.87, + "learning_rate": 1.6688021206546102e-05, + "loss": 0.5003, + "step": 5080 + }, + { + "epoch": 0.87, + "learning_rate": 1.6686652130563413e-05, + "loss": 0.4954, + "step": 5081 + }, + { + "epoch": 0.87, + "learning_rate": 1.6685282827855672e-05, + "loss": 0.5036, + "step": 5082 + }, + { + "epoch": 0.87, + "learning_rate": 1.6683913298469308e-05, + "loss": 0.526, + "step": 5083 + }, + { + "epoch": 0.87, + "learning_rate": 1.668254354245076e-05, + "loss": 0.4763, + "step": 5084 + }, + { + "epoch": 0.87, + "learning_rate": 1.6681173559846472e-05, + "loss": 0.508, + "step": 5085 + }, + { + "epoch": 0.87, + "learning_rate": 1.6679803350702893e-05, + "loss": 0.4751, + "step": 5086 + }, + { + "epoch": 0.87, + "learning_rate": 1.6678432915066488e-05, + "loss": 0.4875, + "step": 5087 + }, + { + "epoch": 0.87, + "learning_rate": 1.6677062252983723e-05, + "loss": 0.4956, + "step": 5088 + }, + { + "epoch": 0.87, + "learning_rate": 1.6675691364501068e-05, + "loss": 0.5006, + "step": 5089 + }, + { + "epoch": 0.87, + "learning_rate": 1.6674320249665013e-05, + "loss": 0.4723, + "step": 5090 + }, + { + "epoch": 0.87, + "learning_rate": 1.6672948908522046e-05, + "loss": 0.4809, + "step": 5091 + }, + { + "epoch": 0.87, + "learning_rate": 1.6671577341118662e-05, + "loss": 0.5486, + "step": 5092 + }, + { + "epoch": 0.87, + "learning_rate": 1.667020554750137e-05, + "loss": 0.4938, + "step": 5093 + }, + { + "epoch": 0.87, + "learning_rate": 1.6668833527716687e-05, + "loss": 0.5196, + "step": 5094 + }, + { + "epoch": 0.87, + "learning_rate": 1.6667461281811127e-05, + "loss": 0.4889, + "step": 5095 + }, + { + "epoch": 0.87, + "learning_rate": 1.6666088809831223e-05, + "loss": 0.5072, + "step": 5096 + }, + { + "epoch": 0.87, + "learning_rate": 1.666471611182351e-05, + "loss": 0.5139, + "step": 5097 + }, + { + "epoch": 0.87, + "learning_rate": 1.6663343187834534e-05, + "loss": 0.4857, + "step": 5098 + }, + { + "epoch": 0.87, + "learning_rate": 1.666197003791085e-05, + "loss": 0.5225, + "step": 5099 + }, + { + "epoch": 0.87, + "learning_rate": 1.6660596662099007e-05, + "loss": 0.4887, + "step": 5100 + }, + { + "epoch": 0.87, + "learning_rate": 1.6659223060445583e-05, + "loss": 0.4531, + "step": 5101 + }, + { + "epoch": 0.87, + "learning_rate": 1.6657849232997147e-05, + "loss": 0.4924, + "step": 5102 + }, + { + "epoch": 0.87, + "learning_rate": 1.665647517980028e-05, + "loss": 0.5111, + "step": 5103 + }, + { + "epoch": 0.87, + "learning_rate": 1.665510090090158e-05, + "loss": 0.5628, + "step": 5104 + }, + { + "epoch": 0.87, + "learning_rate": 1.6653726396347642e-05, + "loss": 0.4869, + "step": 5105 + }, + { + "epoch": 0.87, + "learning_rate": 1.6652351666185063e-05, + "loss": 0.5145, + "step": 5106 + }, + { + "epoch": 0.87, + "learning_rate": 1.6650976710460467e-05, + "loss": 0.5116, + "step": 5107 + }, + { + "epoch": 0.87, + "learning_rate": 1.664960152922047e-05, + "loss": 0.5065, + "step": 5108 + }, + { + "epoch": 0.87, + "learning_rate": 1.66482261225117e-05, + "loss": 0.4745, + "step": 5109 + }, + { + "epoch": 0.87, + "learning_rate": 1.6646850490380795e-05, + "loss": 0.4958, + "step": 5110 + }, + { + "epoch": 0.87, + "learning_rate": 1.6645474632874396e-05, + "loss": 0.4777, + "step": 5111 + }, + { + "epoch": 0.87, + "learning_rate": 1.6644098550039157e-05, + "loss": 0.5119, + "step": 5112 + }, + { + "epoch": 0.87, + "learning_rate": 1.6642722241921737e-05, + "loss": 0.5083, + "step": 5113 + }, + { + "epoch": 0.87, + "learning_rate": 1.66413457085688e-05, + "loss": 0.4862, + "step": 5114 + }, + { + "epoch": 0.87, + "learning_rate": 1.6639968950027023e-05, + "loss": 0.5119, + "step": 5115 + }, + { + "epoch": 0.87, + "learning_rate": 1.6638591966343087e-05, + "loss": 0.449, + "step": 5116 + }, + { + "epoch": 0.87, + "learning_rate": 1.663721475756368e-05, + "loss": 0.5156, + "step": 5117 + }, + { + "epoch": 0.87, + "learning_rate": 1.6635837323735503e-05, + "loss": 0.5103, + "step": 5118 + }, + { + "epoch": 0.87, + "learning_rate": 1.6634459664905257e-05, + "loss": 0.4958, + "step": 5119 + }, + { + "epoch": 0.87, + "learning_rate": 1.6633081781119652e-05, + "loss": 0.4608, + "step": 5120 + }, + { + "epoch": 0.87, + "learning_rate": 1.663170367242542e-05, + "loss": 0.4852, + "step": 5121 + }, + { + "epoch": 0.87, + "learning_rate": 1.6630325338869277e-05, + "loss": 0.5037, + "step": 5122 + }, + { + "epoch": 0.87, + "learning_rate": 1.6628946780497955e-05, + "loss": 0.5008, + "step": 5123 + }, + { + "epoch": 0.87, + "learning_rate": 1.662756799735821e-05, + "loss": 0.518, + "step": 5124 + }, + { + "epoch": 0.87, + "learning_rate": 1.662618898949679e-05, + "loss": 0.496, + "step": 5125 + }, + { + "epoch": 0.87, + "learning_rate": 1.6624809756960445e-05, + "loss": 0.4736, + "step": 5126 + }, + { + "epoch": 0.87, + "learning_rate": 1.6623430299795944e-05, + "loss": 0.518, + "step": 5127 + }, + { + "epoch": 0.87, + "learning_rate": 1.6622050618050066e-05, + "loss": 0.5081, + "step": 5128 + }, + { + "epoch": 0.87, + "learning_rate": 1.662067071176958e-05, + "loss": 0.5249, + "step": 5129 + }, + { + "epoch": 0.87, + "learning_rate": 1.661929058100129e-05, + "loss": 0.4985, + "step": 5130 + }, + { + "epoch": 0.88, + "learning_rate": 1.6617910225791983e-05, + "loss": 0.5027, + "step": 5131 + }, + { + "epoch": 0.88, + "learning_rate": 1.6616529646188464e-05, + "loss": 0.532, + "step": 5132 + }, + { + "epoch": 0.88, + "learning_rate": 1.6615148842237542e-05, + "loss": 0.5293, + "step": 5133 + }, + { + "epoch": 0.88, + "learning_rate": 1.6613767813986045e-05, + "loss": 0.4875, + "step": 5134 + }, + { + "epoch": 0.88, + "learning_rate": 1.6612386561480785e-05, + "loss": 0.5316, + "step": 5135 + }, + { + "epoch": 0.88, + "learning_rate": 1.6611005084768616e-05, + "loss": 0.501, + "step": 5136 + }, + { + "epoch": 0.88, + "learning_rate": 1.660962338389636e-05, + "loss": 0.4918, + "step": 5137 + }, + { + "epoch": 0.88, + "learning_rate": 1.6608241458910877e-05, + "loss": 0.4998, + "step": 5138 + }, + { + "epoch": 0.88, + "learning_rate": 1.660685930985902e-05, + "loss": 0.5184, + "step": 5139 + }, + { + "epoch": 0.88, + "learning_rate": 1.6605476936787662e-05, + "loss": 0.461, + "step": 5140 + }, + { + "epoch": 0.88, + "learning_rate": 1.6604094339743668e-05, + "loss": 0.4869, + "step": 5141 + }, + { + "epoch": 0.88, + "learning_rate": 1.660271151877392e-05, + "loss": 0.4605, + "step": 5142 + }, + { + "epoch": 0.88, + "learning_rate": 1.6601328473925305e-05, + "loss": 0.4971, + "step": 5143 + }, + { + "epoch": 0.88, + "learning_rate": 1.6599945205244713e-05, + "loss": 0.4672, + "step": 5144 + }, + { + "epoch": 0.88, + "learning_rate": 1.6598561712779052e-05, + "loss": 0.5168, + "step": 5145 + }, + { + "epoch": 0.88, + "learning_rate": 1.6597177996575236e-05, + "loss": 0.4847, + "step": 5146 + }, + { + "epoch": 0.88, + "learning_rate": 1.6595794056680173e-05, + "loss": 0.4823, + "step": 5147 + }, + { + "epoch": 0.88, + "learning_rate": 1.6594409893140796e-05, + "loss": 0.5051, + "step": 5148 + }, + { + "epoch": 0.88, + "learning_rate": 1.659302550600404e-05, + "loss": 0.4538, + "step": 5149 + }, + { + "epoch": 0.88, + "learning_rate": 1.6591640895316836e-05, + "loss": 0.5368, + "step": 5150 + }, + { + "epoch": 0.88, + "learning_rate": 1.6590256061126142e-05, + "loss": 0.4885, + "step": 5151 + }, + { + "epoch": 0.88, + "learning_rate": 1.6588871003478907e-05, + "loss": 0.4978, + "step": 5152 + }, + { + "epoch": 0.88, + "learning_rate": 1.6587485722422094e-05, + "loss": 0.4834, + "step": 5153 + }, + { + "epoch": 0.88, + "learning_rate": 1.658610021800268e-05, + "loss": 0.5028, + "step": 5154 + }, + { + "epoch": 0.88, + "learning_rate": 1.658471449026764e-05, + "loss": 0.4667, + "step": 5155 + }, + { + "epoch": 0.88, + "learning_rate": 1.6583328539263957e-05, + "loss": 0.5057, + "step": 5156 + }, + { + "epoch": 0.88, + "learning_rate": 1.658194236503863e-05, + "loss": 0.494, + "step": 5157 + }, + { + "epoch": 0.88, + "learning_rate": 1.6580555967638657e-05, + "loss": 0.5459, + "step": 5158 + }, + { + "epoch": 0.88, + "learning_rate": 1.6579169347111043e-05, + "loss": 0.4784, + "step": 5159 + }, + { + "epoch": 0.88, + "learning_rate": 1.6577782503502815e-05, + "loss": 0.4862, + "step": 5160 + }, + { + "epoch": 0.88, + "learning_rate": 1.6576395436860985e-05, + "loss": 0.5026, + "step": 5161 + }, + { + "epoch": 0.88, + "learning_rate": 1.6575008147232597e-05, + "loss": 0.4668, + "step": 5162 + }, + { + "epoch": 0.88, + "learning_rate": 1.657362063466468e-05, + "loss": 0.5483, + "step": 5163 + }, + { + "epoch": 0.88, + "learning_rate": 1.657223289920428e-05, + "loss": 0.5061, + "step": 5164 + }, + { + "epoch": 0.88, + "learning_rate": 1.657084494089846e-05, + "loss": 0.5057, + "step": 5165 + }, + { + "epoch": 0.88, + "learning_rate": 1.656945675979427e-05, + "loss": 0.5269, + "step": 5166 + }, + { + "epoch": 0.88, + "learning_rate": 1.656806835593879e-05, + "loss": 0.4762, + "step": 5167 + }, + { + "epoch": 0.88, + "learning_rate": 1.6566679729379093e-05, + "loss": 0.472, + "step": 5168 + }, + { + "epoch": 0.88, + "learning_rate": 1.656529088016226e-05, + "loss": 0.507, + "step": 5169 + }, + { + "epoch": 0.88, + "learning_rate": 1.6563901808335385e-05, + "loss": 0.5086, + "step": 5170 + }, + { + "epoch": 0.88, + "learning_rate": 1.656251251394557e-05, + "loss": 0.4415, + "step": 5171 + }, + { + "epoch": 0.88, + "learning_rate": 1.6561122997039917e-05, + "loss": 0.5155, + "step": 5172 + }, + { + "epoch": 0.88, + "learning_rate": 1.6559733257665546e-05, + "loss": 0.508, + "step": 5173 + }, + { + "epoch": 0.88, + "learning_rate": 1.6558343295869575e-05, + "loss": 0.5446, + "step": 5174 + }, + { + "epoch": 0.88, + "learning_rate": 1.655695311169914e-05, + "loss": 0.52, + "step": 5175 + }, + { + "epoch": 0.88, + "learning_rate": 1.6555562705201364e-05, + "loss": 0.4755, + "step": 5176 + }, + { + "epoch": 0.88, + "learning_rate": 1.6554172076423404e-05, + "loss": 0.4938, + "step": 5177 + }, + { + "epoch": 0.88, + "learning_rate": 1.655278122541241e-05, + "loss": 0.496, + "step": 5178 + }, + { + "epoch": 0.88, + "learning_rate": 1.6551390152215542e-05, + "loss": 0.4806, + "step": 5179 + }, + { + "epoch": 0.88, + "learning_rate": 1.6549998856879963e-05, + "loss": 0.5012, + "step": 5180 + }, + { + "epoch": 0.88, + "learning_rate": 1.6548607339452853e-05, + "loss": 0.5601, + "step": 5181 + }, + { + "epoch": 0.88, + "learning_rate": 1.654721559998139e-05, + "loss": 0.4931, + "step": 5182 + }, + { + "epoch": 0.88, + "learning_rate": 1.6545823638512768e-05, + "loss": 0.4996, + "step": 5183 + }, + { + "epoch": 0.88, + "learning_rate": 1.6544431455094182e-05, + "loss": 0.4876, + "step": 5184 + }, + { + "epoch": 0.88, + "learning_rate": 1.6543039049772836e-05, + "loss": 0.4783, + "step": 5185 + }, + { + "epoch": 0.88, + "learning_rate": 1.6541646422595942e-05, + "loss": 0.5166, + "step": 5186 + }, + { + "epoch": 0.88, + "learning_rate": 1.6540253573610725e-05, + "loss": 0.4936, + "step": 5187 + }, + { + "epoch": 0.88, + "learning_rate": 1.653886050286441e-05, + "loss": 0.5193, + "step": 5188 + }, + { + "epoch": 0.88, + "learning_rate": 1.653746721040423e-05, + "loss": 0.5198, + "step": 5189 + }, + { + "epoch": 0.89, + "learning_rate": 1.6536073696277422e-05, + "loss": 0.4358, + "step": 5190 + }, + { + "epoch": 0.89, + "learning_rate": 1.653467996053125e-05, + "loss": 0.5103, + "step": 5191 + }, + { + "epoch": 0.89, + "learning_rate": 1.6533286003212963e-05, + "loss": 0.4859, + "step": 5192 + }, + { + "epoch": 0.89, + "learning_rate": 1.6531891824369827e-05, + "loss": 0.5186, + "step": 5193 + }, + { + "epoch": 0.89, + "learning_rate": 1.6530497424049117e-05, + "loss": 0.4613, + "step": 5194 + }, + { + "epoch": 0.89, + "learning_rate": 1.6529102802298104e-05, + "loss": 0.4771, + "step": 5195 + }, + { + "epoch": 0.89, + "learning_rate": 1.652770795916409e-05, + "loss": 0.5435, + "step": 5196 + }, + { + "epoch": 0.89, + "learning_rate": 1.652631289469436e-05, + "loss": 0.4991, + "step": 5197 + }, + { + "epoch": 0.89, + "learning_rate": 1.652491760893622e-05, + "loss": 0.5093, + "step": 5198 + }, + { + "epoch": 0.89, + "learning_rate": 1.652352210193698e-05, + "loss": 0.4593, + "step": 5199 + }, + { + "epoch": 0.89, + "learning_rate": 1.652212637374395e-05, + "loss": 0.5177, + "step": 5200 + }, + { + "epoch": 0.89, + "learning_rate": 1.652073042440447e-05, + "loss": 0.4936, + "step": 5201 + }, + { + "epoch": 0.89, + "learning_rate": 1.651933425396586e-05, + "loss": 0.5192, + "step": 5202 + }, + { + "epoch": 0.89, + "learning_rate": 1.6517937862475467e-05, + "loss": 0.4575, + "step": 5203 + }, + { + "epoch": 0.89, + "learning_rate": 1.6516541249980638e-05, + "loss": 0.4355, + "step": 5204 + }, + { + "epoch": 0.89, + "learning_rate": 1.6515144416528724e-05, + "loss": 0.5053, + "step": 5205 + }, + { + "epoch": 0.89, + "learning_rate": 1.6513747362167093e-05, + "loss": 0.5183, + "step": 5206 + }, + { + "epoch": 0.89, + "learning_rate": 1.651235008694311e-05, + "loss": 0.542, + "step": 5207 + }, + { + "epoch": 0.89, + "learning_rate": 1.6510952590904154e-05, + "loss": 0.4909, + "step": 5208 + }, + { + "epoch": 0.89, + "learning_rate": 1.650955487409761e-05, + "loss": 0.4977, + "step": 5209 + }, + { + "epoch": 0.89, + "learning_rate": 1.6508156936570874e-05, + "loss": 0.5482, + "step": 5210 + }, + { + "epoch": 0.89, + "learning_rate": 1.6506758778371343e-05, + "loss": 0.4667, + "step": 5211 + }, + { + "epoch": 0.89, + "learning_rate": 1.650536039954642e-05, + "loss": 0.4761, + "step": 5212 + }, + { + "epoch": 0.89, + "learning_rate": 1.650396180014353e-05, + "loss": 0.518, + "step": 5213 + }, + { + "epoch": 0.89, + "learning_rate": 1.650256298021009e-05, + "loss": 0.4914, + "step": 5214 + }, + { + "epoch": 0.89, + "learning_rate": 1.650116393979353e-05, + "loss": 0.4735, + "step": 5215 + }, + { + "epoch": 0.89, + "learning_rate": 1.6499764678941287e-05, + "loss": 0.5046, + "step": 5216 + }, + { + "epoch": 0.89, + "learning_rate": 1.6498365197700806e-05, + "loss": 0.5184, + "step": 5217 + }, + { + "epoch": 0.89, + "learning_rate": 1.649696549611954e-05, + "loss": 0.5299, + "step": 5218 + }, + { + "epoch": 0.89, + "learning_rate": 1.649556557424495e-05, + "loss": 0.4979, + "step": 5219 + }, + { + "epoch": 0.89, + "learning_rate": 1.64941654321245e-05, + "loss": 0.5132, + "step": 5220 + }, + { + "epoch": 0.89, + "learning_rate": 1.649276506980567e-05, + "loss": 0.5082, + "step": 5221 + }, + { + "epoch": 0.89, + "learning_rate": 1.6491364487335935e-05, + "loss": 0.4879, + "step": 5222 + }, + { + "epoch": 0.89, + "learning_rate": 1.6489963684762792e-05, + "loss": 0.4918, + "step": 5223 + }, + { + "epoch": 0.89, + "learning_rate": 1.648856266213373e-05, + "loss": 0.4941, + "step": 5224 + }, + { + "epoch": 0.89, + "learning_rate": 1.6487161419496262e-05, + "loss": 0.4715, + "step": 5225 + }, + { + "epoch": 0.89, + "learning_rate": 1.64857599568979e-05, + "loss": 0.486, + "step": 5226 + }, + { + "epoch": 0.89, + "learning_rate": 1.6484358274386155e-05, + "loss": 0.5043, + "step": 5227 + }, + { + "epoch": 0.89, + "learning_rate": 1.648295637200856e-05, + "loss": 0.4839, + "step": 5228 + }, + { + "epoch": 0.89, + "learning_rate": 1.648155424981265e-05, + "loss": 0.5145, + "step": 5229 + }, + { + "epoch": 0.89, + "learning_rate": 1.6480151907845967e-05, + "loss": 0.5263, + "step": 5230 + }, + { + "epoch": 0.89, + "learning_rate": 1.6478749346156056e-05, + "loss": 0.4649, + "step": 5231 + }, + { + "epoch": 0.89, + "learning_rate": 1.6477346564790478e-05, + "loss": 0.5016, + "step": 5232 + }, + { + "epoch": 0.89, + "learning_rate": 1.6475943563796795e-05, + "loss": 0.4602, + "step": 5233 + }, + { + "epoch": 0.89, + "learning_rate": 1.6474540343222584e-05, + "loss": 0.528, + "step": 5234 + }, + { + "epoch": 0.89, + "learning_rate": 1.6473136903115414e-05, + "loss": 0.5181, + "step": 5235 + }, + { + "epoch": 0.89, + "learning_rate": 1.6471733243522877e-05, + "loss": 0.5463, + "step": 5236 + }, + { + "epoch": 0.89, + "learning_rate": 1.647032936449257e-05, + "loss": 0.5051, + "step": 5237 + }, + { + "epoch": 0.89, + "learning_rate": 1.64689252660721e-05, + "loss": 0.5291, + "step": 5238 + }, + { + "epoch": 0.89, + "learning_rate": 1.6467520948309057e-05, + "loss": 0.4706, + "step": 5239 + }, + { + "epoch": 0.89, + "learning_rate": 1.646611641125107e-05, + "loss": 0.5007, + "step": 5240 + }, + { + "epoch": 0.89, + "learning_rate": 1.6464711654945765e-05, + "loss": 0.5211, + "step": 5241 + }, + { + "epoch": 0.89, + "learning_rate": 1.6463306679440765e-05, + "loss": 0.4948, + "step": 5242 + }, + { + "epoch": 0.89, + "learning_rate": 1.6461901484783714e-05, + "loss": 0.5132, + "step": 5243 + }, + { + "epoch": 0.89, + "learning_rate": 1.6460496071022258e-05, + "loss": 0.4724, + "step": 5244 + }, + { + "epoch": 0.89, + "learning_rate": 1.6459090438204048e-05, + "loss": 0.479, + "step": 5245 + }, + { + "epoch": 0.89, + "learning_rate": 1.6457684586376748e-05, + "loss": 0.5423, + "step": 5246 + }, + { + "epoch": 0.89, + "learning_rate": 1.6456278515588023e-05, + "loss": 0.4932, + "step": 5247 + }, + { + "epoch": 0.9, + "learning_rate": 1.6454872225885554e-05, + "loss": 0.4742, + "step": 5248 + }, + { + "epoch": 0.9, + "learning_rate": 1.6453465717317023e-05, + "loss": 0.5275, + "step": 5249 + }, + { + "epoch": 0.9, + "learning_rate": 1.6452058989930112e-05, + "loss": 0.5314, + "step": 5250 + }, + { + "epoch": 0.9, + "learning_rate": 1.645065204377253e-05, + "loss": 0.519, + "step": 5251 + }, + { + "epoch": 0.9, + "learning_rate": 1.6449244878891976e-05, + "loss": 0.531, + "step": 5252 + }, + { + "epoch": 0.9, + "learning_rate": 1.6447837495336163e-05, + "loss": 0.4896, + "step": 5253 + }, + { + "epoch": 0.9, + "learning_rate": 1.6446429893152815e-05, + "loss": 0.5122, + "step": 5254 + }, + { + "epoch": 0.9, + "learning_rate": 1.6445022072389658e-05, + "loss": 0.5037, + "step": 5255 + }, + { + "epoch": 0.9, + "learning_rate": 1.644361403309443e-05, + "loss": 0.4997, + "step": 5256 + }, + { + "epoch": 0.9, + "learning_rate": 1.6442205775314864e-05, + "loss": 0.4883, + "step": 5257 + }, + { + "epoch": 0.9, + "learning_rate": 1.6440797299098723e-05, + "loss": 0.4433, + "step": 5258 + }, + { + "epoch": 0.9, + "learning_rate": 1.6439388604493758e-05, + "loss": 0.5093, + "step": 5259 + }, + { + "epoch": 0.9, + "learning_rate": 1.643797969154773e-05, + "loss": 0.4562, + "step": 5260 + }, + { + "epoch": 0.9, + "learning_rate": 1.6436570560308414e-05, + "loss": 0.4748, + "step": 5261 + }, + { + "epoch": 0.9, + "learning_rate": 1.6435161210823595e-05, + "loss": 0.4899, + "step": 5262 + }, + { + "epoch": 0.9, + "learning_rate": 1.6433751643141052e-05, + "loss": 0.4731, + "step": 5263 + }, + { + "epoch": 0.9, + "learning_rate": 1.6432341857308586e-05, + "loss": 0.5177, + "step": 5264 + }, + { + "epoch": 0.9, + "learning_rate": 1.6430931853373995e-05, + "loss": 0.4978, + "step": 5265 + }, + { + "epoch": 0.9, + "learning_rate": 1.6429521631385088e-05, + "loss": 0.502, + "step": 5266 + }, + { + "epoch": 0.9, + "learning_rate": 1.6428111191389683e-05, + "loss": 0.5156, + "step": 5267 + }, + { + "epoch": 0.9, + "learning_rate": 1.64267005334356e-05, + "loss": 0.4778, + "step": 5268 + }, + { + "epoch": 0.9, + "learning_rate": 1.6425289657570674e-05, + "loss": 0.5049, + "step": 5269 + }, + { + "epoch": 0.9, + "learning_rate": 1.6423878563842745e-05, + "loss": 0.4928, + "step": 5270 + }, + { + "epoch": 0.9, + "learning_rate": 1.642246725229966e-05, + "loss": 0.505, + "step": 5271 + }, + { + "epoch": 0.9, + "learning_rate": 1.6421055722989266e-05, + "loss": 0.4776, + "step": 5272 + }, + { + "epoch": 0.9, + "learning_rate": 1.6419643975959428e-05, + "loss": 0.5328, + "step": 5273 + }, + { + "epoch": 0.9, + "learning_rate": 1.6418232011258016e-05, + "loss": 0.5328, + "step": 5274 + }, + { + "epoch": 0.9, + "learning_rate": 1.64168198289329e-05, + "loss": 0.4722, + "step": 5275 + }, + { + "epoch": 0.9, + "learning_rate": 1.6415407429031974e-05, + "loss": 0.5075, + "step": 5276 + }, + { + "epoch": 0.9, + "learning_rate": 1.6413994811603117e-05, + "loss": 0.4743, + "step": 5277 + }, + { + "epoch": 0.9, + "learning_rate": 1.6412581976694227e-05, + "loss": 0.5077, + "step": 5278 + }, + { + "epoch": 0.9, + "learning_rate": 1.6411168924353217e-05, + "loss": 0.5014, + "step": 5279 + }, + { + "epoch": 0.9, + "learning_rate": 1.6409755654627994e-05, + "loss": 0.4766, + "step": 5280 + }, + { + "epoch": 0.9, + "learning_rate": 1.6408342167566483e-05, + "loss": 0.5037, + "step": 5281 + }, + { + "epoch": 0.9, + "learning_rate": 1.6406928463216603e-05, + "loss": 0.4715, + "step": 5282 + }, + { + "epoch": 0.9, + "learning_rate": 1.6405514541626296e-05, + "loss": 0.5061, + "step": 5283 + }, + { + "epoch": 0.9, + "learning_rate": 1.64041004028435e-05, + "loss": 0.4595, + "step": 5284 + }, + { + "epoch": 0.9, + "learning_rate": 1.6402686046916165e-05, + "loss": 0.5071, + "step": 5285 + }, + { + "epoch": 0.9, + "learning_rate": 1.640127147389225e-05, + "loss": 0.4598, + "step": 5286 + }, + { + "epoch": 0.9, + "learning_rate": 1.639985668381972e-05, + "loss": 0.5075, + "step": 5287 + }, + { + "epoch": 0.9, + "learning_rate": 1.639844167674654e-05, + "loss": 0.5066, + "step": 5288 + }, + { + "epoch": 0.9, + "learning_rate": 1.6397026452720693e-05, + "loss": 0.5192, + "step": 5289 + }, + { + "epoch": 0.9, + "learning_rate": 1.6395611011790166e-05, + "loss": 0.5044, + "step": 5290 + }, + { + "epoch": 0.9, + "learning_rate": 1.6394195354002952e-05, + "loss": 0.534, + "step": 5291 + }, + { + "epoch": 0.9, + "learning_rate": 1.6392779479407047e-05, + "loss": 0.4723, + "step": 5292 + }, + { + "epoch": 0.9, + "learning_rate": 1.639136338805047e-05, + "loss": 0.5471, + "step": 5293 + }, + { + "epoch": 0.9, + "learning_rate": 1.6389947079981224e-05, + "loss": 0.4761, + "step": 5294 + }, + { + "epoch": 0.9, + "learning_rate": 1.6388530555247342e-05, + "loss": 0.4751, + "step": 5295 + }, + { + "epoch": 0.9, + "learning_rate": 1.638711381389685e-05, + "loss": 0.4797, + "step": 5296 + }, + { + "epoch": 0.9, + "learning_rate": 1.6385696855977785e-05, + "loss": 0.5144, + "step": 5297 + }, + { + "epoch": 0.9, + "learning_rate": 1.638427968153819e-05, + "loss": 0.5221, + "step": 5298 + }, + { + "epoch": 0.9, + "learning_rate": 1.6382862290626126e-05, + "loss": 0.5964, + "step": 5299 + }, + { + "epoch": 0.9, + "learning_rate": 1.6381444683289643e-05, + "loss": 0.4915, + "step": 5300 + }, + { + "epoch": 0.9, + "learning_rate": 1.638002685957681e-05, + "loss": 0.5192, + "step": 5301 + }, + { + "epoch": 0.9, + "learning_rate": 1.6378608819535708e-05, + "loss": 0.5037, + "step": 5302 + }, + { + "epoch": 0.9, + "learning_rate": 1.6377190563214408e-05, + "loss": 0.4905, + "step": 5303 + }, + { + "epoch": 0.9, + "learning_rate": 1.6375772090661006e-05, + "loss": 0.489, + "step": 5304 + }, + { + "epoch": 0.9, + "learning_rate": 1.63743534019236e-05, + "loss": 0.5468, + "step": 5305 + }, + { + "epoch": 0.9, + "learning_rate": 1.637293449705029e-05, + "loss": 0.4733, + "step": 5306 + }, + { + "epoch": 0.91, + "learning_rate": 1.6371515376089183e-05, + "loss": 0.5184, + "step": 5307 + }, + { + "epoch": 0.91, + "learning_rate": 1.6370096039088406e-05, + "loss": 0.4721, + "step": 5308 + }, + { + "epoch": 0.91, + "learning_rate": 1.636867648609608e-05, + "loss": 0.4876, + "step": 5309 + }, + { + "epoch": 0.91, + "learning_rate": 1.6367256717160338e-05, + "loss": 0.4899, + "step": 5310 + }, + { + "epoch": 0.91, + "learning_rate": 1.6365836732329318e-05, + "loss": 0.5085, + "step": 5311 + }, + { + "epoch": 0.91, + "learning_rate": 1.6364416531651172e-05, + "loss": 0.5218, + "step": 5312 + }, + { + "epoch": 0.91, + "learning_rate": 1.6362996115174056e-05, + "loss": 0.4539, + "step": 5313 + }, + { + "epoch": 0.91, + "learning_rate": 1.6361575482946126e-05, + "loss": 0.5093, + "step": 5314 + }, + { + "epoch": 0.91, + "learning_rate": 1.6360154635015556e-05, + "loss": 0.5179, + "step": 5315 + }, + { + "epoch": 0.91, + "learning_rate": 1.635873357143052e-05, + "loss": 0.5114, + "step": 5316 + }, + { + "epoch": 0.91, + "learning_rate": 1.6357312292239204e-05, + "loss": 0.4529, + "step": 5317 + }, + { + "epoch": 0.91, + "learning_rate": 1.63558907974898e-05, + "loss": 0.5057, + "step": 5318 + }, + { + "epoch": 0.91, + "learning_rate": 1.6354469087230508e-05, + "loss": 0.5177, + "step": 5319 + }, + { + "epoch": 0.91, + "learning_rate": 1.635304716150953e-05, + "loss": 0.5585, + "step": 5320 + }, + { + "epoch": 0.91, + "learning_rate": 1.6351625020375082e-05, + "loss": 0.4842, + "step": 5321 + }, + { + "epoch": 0.91, + "learning_rate": 1.6350202663875385e-05, + "loss": 0.4629, + "step": 5322 + }, + { + "epoch": 0.91, + "learning_rate": 1.6348780092058665e-05, + "loss": 0.5384, + "step": 5323 + }, + { + "epoch": 0.91, + "learning_rate": 1.634735730497316e-05, + "loss": 0.522, + "step": 5324 + }, + { + "epoch": 0.91, + "learning_rate": 1.634593430266711e-05, + "loss": 0.4586, + "step": 5325 + }, + { + "epoch": 0.91, + "learning_rate": 1.6344511085188766e-05, + "loss": 0.5103, + "step": 5326 + }, + { + "epoch": 0.91, + "learning_rate": 1.6343087652586387e-05, + "loss": 0.5043, + "step": 5327 + }, + { + "epoch": 0.91, + "learning_rate": 1.6341664004908232e-05, + "loss": 0.4955, + "step": 5328 + }, + { + "epoch": 0.91, + "learning_rate": 1.634024014220258e-05, + "loss": 0.5072, + "step": 5329 + }, + { + "epoch": 0.91, + "learning_rate": 1.6338816064517707e-05, + "loss": 0.4963, + "step": 5330 + }, + { + "epoch": 0.91, + "learning_rate": 1.6337391771901897e-05, + "loss": 0.4839, + "step": 5331 + }, + { + "epoch": 0.91, + "learning_rate": 1.6335967264403446e-05, + "loss": 0.564, + "step": 5332 + }, + { + "epoch": 0.91, + "learning_rate": 1.6334542542070653e-05, + "loss": 0.5362, + "step": 5333 + }, + { + "epoch": 0.91, + "learning_rate": 1.633311760495183e-05, + "loss": 0.5015, + "step": 5334 + }, + { + "epoch": 0.91, + "learning_rate": 1.633169245309529e-05, + "loss": 0.4913, + "step": 5335 + }, + { + "epoch": 0.91, + "learning_rate": 1.6330267086549354e-05, + "loss": 0.4944, + "step": 5336 + }, + { + "epoch": 0.91, + "learning_rate": 1.6328841505362358e-05, + "loss": 0.4293, + "step": 5337 + }, + { + "epoch": 0.91, + "learning_rate": 1.6327415709582634e-05, + "loss": 0.4908, + "step": 5338 + }, + { + "epoch": 0.91, + "learning_rate": 1.6325989699258525e-05, + "loss": 0.4827, + "step": 5339 + }, + { + "epoch": 0.91, + "learning_rate": 1.6324563474438385e-05, + "loss": 0.4943, + "step": 5340 + }, + { + "epoch": 0.91, + "learning_rate": 1.632313703517058e-05, + "loss": 0.4852, + "step": 5341 + }, + { + "epoch": 0.91, + "learning_rate": 1.6321710381503467e-05, + "loss": 0.4964, + "step": 5342 + }, + { + "epoch": 0.91, + "learning_rate": 1.6320283513485424e-05, + "loss": 0.4851, + "step": 5343 + }, + { + "epoch": 0.91, + "learning_rate": 1.631885643116483e-05, + "loss": 0.4651, + "step": 5344 + }, + { + "epoch": 0.91, + "learning_rate": 1.6317429134590076e-05, + "loss": 0.5274, + "step": 5345 + }, + { + "epoch": 0.91, + "learning_rate": 1.6316001623809557e-05, + "loss": 0.4981, + "step": 5346 + }, + { + "epoch": 0.91, + "learning_rate": 1.631457389887167e-05, + "loss": 0.5011, + "step": 5347 + }, + { + "epoch": 0.91, + "learning_rate": 1.631314595982483e-05, + "loss": 0.5214, + "step": 5348 + }, + { + "epoch": 0.91, + "learning_rate": 1.631171780671746e-05, + "loss": 0.4922, + "step": 5349 + }, + { + "epoch": 0.91, + "learning_rate": 1.631028943959798e-05, + "loss": 0.4902, + "step": 5350 + }, + { + "epoch": 0.91, + "learning_rate": 1.630886085851481e-05, + "loss": 0.4931, + "step": 5351 + }, + { + "epoch": 0.91, + "learning_rate": 1.6307432063516405e-05, + "loss": 0.4917, + "step": 5352 + }, + { + "epoch": 0.91, + "learning_rate": 1.6306003054651207e-05, + "loss": 0.4952, + "step": 5353 + }, + { + "epoch": 0.91, + "learning_rate": 1.6304573831967666e-05, + "loss": 0.473, + "step": 5354 + }, + { + "epoch": 0.91, + "learning_rate": 1.6303144395514246e-05, + "loss": 0.5271, + "step": 5355 + }, + { + "epoch": 0.91, + "learning_rate": 1.6301714745339413e-05, + "loss": 0.5004, + "step": 5356 + }, + { + "epoch": 0.91, + "learning_rate": 1.6300284881491647e-05, + "loss": 0.4925, + "step": 5357 + }, + { + "epoch": 0.91, + "learning_rate": 1.6298854804019423e-05, + "loss": 0.4909, + "step": 5358 + }, + { + "epoch": 0.91, + "learning_rate": 1.6297424512971237e-05, + "loss": 0.5413, + "step": 5359 + }, + { + "epoch": 0.91, + "learning_rate": 1.6295994008395585e-05, + "loss": 0.5178, + "step": 5360 + }, + { + "epoch": 0.91, + "learning_rate": 1.6294563290340967e-05, + "loss": 0.5185, + "step": 5361 + }, + { + "epoch": 0.91, + "learning_rate": 1.62931323588559e-05, + "loss": 0.5083, + "step": 5362 + }, + { + "epoch": 0.91, + "learning_rate": 1.6291701213988896e-05, + "loss": 0.4914, + "step": 5363 + }, + { + "epoch": 0.91, + "learning_rate": 1.629026985578849e-05, + "loss": 0.539, + "step": 5364 + }, + { + "epoch": 0.91, + "learning_rate": 1.628883828430321e-05, + "loss": 0.4963, + "step": 5365 + }, + { + "epoch": 0.92, + "learning_rate": 1.6287406499581597e-05, + "loss": 0.4845, + "step": 5366 + }, + { + "epoch": 0.92, + "learning_rate": 1.62859745016722e-05, + "loss": 0.4611, + "step": 5367 + }, + { + "epoch": 0.92, + "learning_rate": 1.6284542290623568e-05, + "loss": 0.467, + "step": 5368 + }, + { + "epoch": 0.92, + "learning_rate": 1.628310986648427e-05, + "loss": 0.5135, + "step": 5369 + }, + { + "epoch": 0.92, + "learning_rate": 1.6281677229302875e-05, + "loss": 0.472, + "step": 5370 + }, + { + "epoch": 0.92, + "learning_rate": 1.6280244379127953e-05, + "loss": 0.4549, + "step": 5371 + }, + { + "epoch": 0.92, + "learning_rate": 1.62788113160081e-05, + "loss": 0.4916, + "step": 5372 + }, + { + "epoch": 0.92, + "learning_rate": 1.6277378039991892e-05, + "loss": 0.469, + "step": 5373 + }, + { + "epoch": 0.92, + "learning_rate": 1.627594455112794e-05, + "loss": 0.5227, + "step": 5374 + }, + { + "epoch": 0.92, + "learning_rate": 1.6274510849464842e-05, + "loss": 0.4802, + "step": 5375 + }, + { + "epoch": 0.92, + "learning_rate": 1.6273076935051217e-05, + "loss": 0.5131, + "step": 5376 + }, + { + "epoch": 0.92, + "learning_rate": 1.6271642807935672e-05, + "loss": 0.4754, + "step": 5377 + }, + { + "epoch": 0.92, + "learning_rate": 1.627020846816685e-05, + "loss": 0.4717, + "step": 5378 + }, + { + "epoch": 0.92, + "learning_rate": 1.6268773915793376e-05, + "loss": 0.4746, + "step": 5379 + }, + { + "epoch": 0.92, + "learning_rate": 1.6267339150863896e-05, + "loss": 0.4905, + "step": 5380 + }, + { + "epoch": 0.92, + "learning_rate": 1.6265904173427056e-05, + "loss": 0.4802, + "step": 5381 + }, + { + "epoch": 0.92, + "learning_rate": 1.626446898353151e-05, + "loss": 0.4956, + "step": 5382 + }, + { + "epoch": 0.92, + "learning_rate": 1.626303358122592e-05, + "loss": 0.4949, + "step": 5383 + }, + { + "epoch": 0.92, + "learning_rate": 1.6261597966558968e-05, + "loss": 0.4548, + "step": 5384 + }, + { + "epoch": 0.92, + "learning_rate": 1.6260162139579324e-05, + "loss": 0.5199, + "step": 5385 + }, + { + "epoch": 0.92, + "learning_rate": 1.6258726100335666e-05, + "loss": 0.4822, + "step": 5386 + }, + { + "epoch": 0.92, + "learning_rate": 1.6257289848876695e-05, + "loss": 0.474, + "step": 5387 + }, + { + "epoch": 0.92, + "learning_rate": 1.625585338525111e-05, + "loss": 0.4794, + "step": 5388 + }, + { + "epoch": 0.92, + "learning_rate": 1.625441670950761e-05, + "loss": 0.5229, + "step": 5389 + }, + { + "epoch": 0.92, + "learning_rate": 1.6252979821694913e-05, + "loss": 0.5165, + "step": 5390 + }, + { + "epoch": 0.92, + "learning_rate": 1.625154272186174e-05, + "loss": 0.4844, + "step": 5391 + }, + { + "epoch": 0.92, + "learning_rate": 1.625010541005682e-05, + "loss": 0.5227, + "step": 5392 + }, + { + "epoch": 0.92, + "learning_rate": 1.624866788632889e-05, + "loss": 0.471, + "step": 5393 + }, + { + "epoch": 0.92, + "learning_rate": 1.6247230150726685e-05, + "loss": 0.4566, + "step": 5394 + }, + { + "epoch": 0.92, + "learning_rate": 1.6245792203298964e-05, + "loss": 0.4752, + "step": 5395 + }, + { + "epoch": 0.92, + "learning_rate": 1.6244354044094473e-05, + "loss": 0.4771, + "step": 5396 + }, + { + "epoch": 0.92, + "learning_rate": 1.624291567316198e-05, + "loss": 0.5299, + "step": 5397 + }, + { + "epoch": 0.92, + "learning_rate": 1.6241477090550262e-05, + "loss": 0.5047, + "step": 5398 + }, + { + "epoch": 0.92, + "learning_rate": 1.624003829630809e-05, + "loss": 0.4993, + "step": 5399 + }, + { + "epoch": 0.92, + "learning_rate": 1.623859929048425e-05, + "loss": 0.4906, + "step": 5400 + }, + { + "epoch": 0.92, + "learning_rate": 1.6237160073127537e-05, + "loss": 0.4804, + "step": 5401 + }, + { + "epoch": 0.92, + "learning_rate": 1.623572064428675e-05, + "loss": 0.4777, + "step": 5402 + }, + { + "epoch": 0.92, + "learning_rate": 1.62342810040107e-05, + "loss": 0.475, + "step": 5403 + }, + { + "epoch": 0.92, + "learning_rate": 1.623284115234819e-05, + "loss": 0.4933, + "step": 5404 + }, + { + "epoch": 0.92, + "learning_rate": 1.6231401089348052e-05, + "loss": 0.4962, + "step": 5405 + }, + { + "epoch": 0.92, + "learning_rate": 1.6229960815059106e-05, + "loss": 0.5126, + "step": 5406 + }, + { + "epoch": 0.92, + "learning_rate": 1.6228520329530196e-05, + "loss": 0.498, + "step": 5407 + }, + { + "epoch": 0.92, + "learning_rate": 1.622707963281016e-05, + "loss": 0.4984, + "step": 5408 + }, + { + "epoch": 0.92, + "learning_rate": 1.622563872494785e-05, + "loss": 0.5, + "step": 5409 + }, + { + "epoch": 0.92, + "learning_rate": 1.622419760599212e-05, + "loss": 0.5062, + "step": 5410 + }, + { + "epoch": 0.92, + "learning_rate": 1.6222756275991835e-05, + "loss": 0.5206, + "step": 5411 + }, + { + "epoch": 0.92, + "learning_rate": 1.6221314734995867e-05, + "loss": 0.4988, + "step": 5412 + }, + { + "epoch": 0.92, + "learning_rate": 1.6219872983053093e-05, + "loss": 0.4911, + "step": 5413 + }, + { + "epoch": 0.92, + "learning_rate": 1.62184310202124e-05, + "loss": 0.4504, + "step": 5414 + }, + { + "epoch": 0.92, + "learning_rate": 1.6216988846522686e-05, + "loss": 0.4951, + "step": 5415 + }, + { + "epoch": 0.92, + "learning_rate": 1.621554646203284e-05, + "loss": 0.4937, + "step": 5416 + }, + { + "epoch": 0.92, + "learning_rate": 1.6214103866791776e-05, + "loss": 0.4749, + "step": 5417 + }, + { + "epoch": 0.92, + "learning_rate": 1.6212661060848412e-05, + "loss": 0.4737, + "step": 5418 + }, + { + "epoch": 0.92, + "learning_rate": 1.621121804425166e-05, + "loss": 0.4788, + "step": 5419 + }, + { + "epoch": 0.92, + "learning_rate": 1.6209774817050455e-05, + "loss": 0.4976, + "step": 5420 + }, + { + "epoch": 0.92, + "learning_rate": 1.620833137929373e-05, + "loss": 0.4732, + "step": 5421 + }, + { + "epoch": 0.92, + "learning_rate": 1.620688773103043e-05, + "loss": 0.5089, + "step": 5422 + }, + { + "epoch": 0.92, + "learning_rate": 1.62054438723095e-05, + "loss": 0.537, + "step": 5423 + }, + { + "epoch": 0.93, + "learning_rate": 1.6203999803179903e-05, + "loss": 0.467, + "step": 5424 + }, + { + "epoch": 0.93, + "learning_rate": 1.62025555236906e-05, + "loss": 0.4533, + "step": 5425 + }, + { + "epoch": 0.93, + "learning_rate": 1.6201111033890564e-05, + "loss": 0.4841, + "step": 5426 + }, + { + "epoch": 0.93, + "learning_rate": 1.6199666333828773e-05, + "loss": 0.4968, + "step": 5427 + }, + { + "epoch": 0.93, + "learning_rate": 1.619822142355421e-05, + "loss": 0.5045, + "step": 5428 + }, + { + "epoch": 0.93, + "learning_rate": 1.619677630311587e-05, + "loss": 0.4981, + "step": 5429 + }, + { + "epoch": 0.93, + "learning_rate": 1.6195330972562755e-05, + "loss": 0.5407, + "step": 5430 + }, + { + "epoch": 0.93, + "learning_rate": 1.619388543194387e-05, + "loss": 0.4954, + "step": 5431 + }, + { + "epoch": 0.93, + "learning_rate": 1.6192439681308228e-05, + "loss": 0.5007, + "step": 5432 + }, + { + "epoch": 0.93, + "learning_rate": 1.619099372070485e-05, + "loss": 0.4914, + "step": 5433 + }, + { + "epoch": 0.93, + "learning_rate": 1.6189547550182766e-05, + "loss": 0.5007, + "step": 5434 + }, + { + "epoch": 0.93, + "learning_rate": 1.618810116979101e-05, + "loss": 0.5422, + "step": 5435 + }, + { + "epoch": 0.93, + "learning_rate": 1.6186654579578628e-05, + "loss": 0.4598, + "step": 5436 + }, + { + "epoch": 0.93, + "learning_rate": 1.6185207779594665e-05, + "loss": 0.4845, + "step": 5437 + }, + { + "epoch": 0.93, + "learning_rate": 1.6183760769888178e-05, + "loss": 0.4965, + "step": 5438 + }, + { + "epoch": 0.93, + "learning_rate": 1.618231355050824e-05, + "loss": 0.4921, + "step": 5439 + }, + { + "epoch": 0.93, + "learning_rate": 1.6180866121503908e-05, + "loss": 0.4921, + "step": 5440 + }, + { + "epoch": 0.93, + "learning_rate": 1.617941848292427e-05, + "loss": 0.4986, + "step": 5441 + }, + { + "epoch": 0.93, + "learning_rate": 1.6177970634818408e-05, + "loss": 0.4478, + "step": 5442 + }, + { + "epoch": 0.93, + "learning_rate": 1.6176522577235413e-05, + "loss": 0.4974, + "step": 5443 + }, + { + "epoch": 0.93, + "learning_rate": 1.617507431022439e-05, + "loss": 0.494, + "step": 5444 + }, + { + "epoch": 0.93, + "learning_rate": 1.6173625833834438e-05, + "loss": 0.5114, + "step": 5445 + }, + { + "epoch": 0.93, + "learning_rate": 1.6172177148114676e-05, + "loss": 0.5225, + "step": 5446 + }, + { + "epoch": 0.93, + "learning_rate": 1.617072825311422e-05, + "loss": 0.5015, + "step": 5447 + }, + { + "epoch": 0.93, + "learning_rate": 1.6169279148882206e-05, + "loss": 0.4939, + "step": 5448 + }, + { + "epoch": 0.93, + "learning_rate": 1.616782983546776e-05, + "loss": 0.5134, + "step": 5449 + }, + { + "epoch": 0.93, + "learning_rate": 1.616638031292003e-05, + "loss": 0.5006, + "step": 5450 + }, + { + "epoch": 0.93, + "learning_rate": 1.616493058128816e-05, + "loss": 0.5176, + "step": 5451 + }, + { + "epoch": 0.93, + "learning_rate": 1.6163480640621312e-05, + "loss": 0.4978, + "step": 5452 + }, + { + "epoch": 0.93, + "learning_rate": 1.6162030490968646e-05, + "loss": 0.4749, + "step": 5453 + }, + { + "epoch": 0.93, + "learning_rate": 1.6160580132379335e-05, + "loss": 0.5308, + "step": 5454 + }, + { + "epoch": 0.93, + "learning_rate": 1.6159129564902548e-05, + "loss": 0.5084, + "step": 5455 + }, + { + "epoch": 0.93, + "learning_rate": 1.615767878858748e-05, + "loss": 0.4594, + "step": 5456 + }, + { + "epoch": 0.93, + "learning_rate": 1.615622780348332e-05, + "loss": 0.5166, + "step": 5457 + }, + { + "epoch": 0.93, + "learning_rate": 1.6154776609639263e-05, + "loss": 0.4913, + "step": 5458 + }, + { + "epoch": 0.93, + "learning_rate": 1.6153325207104518e-05, + "loss": 0.488, + "step": 5459 + }, + { + "epoch": 0.93, + "learning_rate": 1.6151873595928295e-05, + "loss": 0.4984, + "step": 5460 + }, + { + "epoch": 0.93, + "learning_rate": 1.6150421776159818e-05, + "loss": 0.4894, + "step": 5461 + }, + { + "epoch": 0.93, + "learning_rate": 1.6148969747848312e-05, + "loss": 0.4855, + "step": 5462 + }, + { + "epoch": 0.93, + "learning_rate": 1.614751751104301e-05, + "loss": 0.4914, + "step": 5463 + }, + { + "epoch": 0.93, + "learning_rate": 1.6146065065793154e-05, + "loss": 0.4837, + "step": 5464 + }, + { + "epoch": 0.93, + "learning_rate": 1.6144612412147995e-05, + "loss": 0.5254, + "step": 5465 + }, + { + "epoch": 0.93, + "learning_rate": 1.6143159550156782e-05, + "loss": 0.4593, + "step": 5466 + }, + { + "epoch": 0.93, + "learning_rate": 1.614170647986878e-05, + "loss": 0.4989, + "step": 5467 + }, + { + "epoch": 0.93, + "learning_rate": 1.6140253201333264e-05, + "loss": 0.4907, + "step": 5468 + }, + { + "epoch": 0.93, + "learning_rate": 1.61387997145995e-05, + "loss": 0.5153, + "step": 5469 + }, + { + "epoch": 0.93, + "learning_rate": 1.6137346019716784e-05, + "loss": 0.5518, + "step": 5470 + }, + { + "epoch": 0.93, + "learning_rate": 1.6135892116734395e-05, + "loss": 0.5136, + "step": 5471 + }, + { + "epoch": 0.93, + "learning_rate": 1.6134438005701636e-05, + "loss": 0.5022, + "step": 5472 + }, + { + "epoch": 0.93, + "learning_rate": 1.613298368666781e-05, + "loss": 0.5406, + "step": 5473 + }, + { + "epoch": 0.93, + "learning_rate": 1.6131529159682237e-05, + "loss": 0.5311, + "step": 5474 + }, + { + "epoch": 0.93, + "learning_rate": 1.613007442479422e-05, + "loss": 0.5059, + "step": 5475 + }, + { + "epoch": 0.93, + "learning_rate": 1.61286194820531e-05, + "loss": 0.4703, + "step": 5476 + }, + { + "epoch": 0.93, + "learning_rate": 1.61271643315082e-05, + "loss": 0.475, + "step": 5477 + }, + { + "epoch": 0.93, + "learning_rate": 1.6125708973208868e-05, + "loss": 0.5249, + "step": 5478 + }, + { + "epoch": 0.93, + "learning_rate": 1.6124253407204444e-05, + "loss": 0.4684, + "step": 5479 + }, + { + "epoch": 0.93, + "learning_rate": 1.6122797633544282e-05, + "loss": 0.5045, + "step": 5480 + }, + { + "epoch": 0.93, + "learning_rate": 1.6121341652277748e-05, + "loss": 0.4671, + "step": 5481 + }, + { + "epoch": 0.93, + "learning_rate": 1.611988546345421e-05, + "loss": 0.4691, + "step": 5482 + }, + { + "epoch": 0.94, + "learning_rate": 1.611842906712304e-05, + "loss": 0.5263, + "step": 5483 + }, + { + "epoch": 0.94, + "learning_rate": 1.6116972463333617e-05, + "loss": 0.4749, + "step": 5484 + }, + { + "epoch": 0.94, + "learning_rate": 1.6115515652135338e-05, + "loss": 0.5123, + "step": 5485 + }, + { + "epoch": 0.94, + "learning_rate": 1.6114058633577596e-05, + "loss": 0.4699, + "step": 5486 + }, + { + "epoch": 0.94, + "learning_rate": 1.611260140770979e-05, + "loss": 0.5024, + "step": 5487 + }, + { + "epoch": 0.94, + "learning_rate": 1.611114397458134e-05, + "loss": 0.4865, + "step": 5488 + }, + { + "epoch": 0.94, + "learning_rate": 1.6109686334241655e-05, + "loss": 0.4943, + "step": 5489 + }, + { + "epoch": 0.94, + "learning_rate": 1.6108228486740164e-05, + "loss": 0.4855, + "step": 5490 + }, + { + "epoch": 0.94, + "learning_rate": 1.610677043212629e-05, + "loss": 0.4881, + "step": 5491 + }, + { + "epoch": 0.94, + "learning_rate": 1.6105312170449483e-05, + "loss": 0.4967, + "step": 5492 + }, + { + "epoch": 0.94, + "learning_rate": 1.6103853701759185e-05, + "loss": 0.4797, + "step": 5493 + }, + { + "epoch": 0.94, + "learning_rate": 1.610239502610484e-05, + "loss": 0.4929, + "step": 5494 + }, + { + "epoch": 0.94, + "learning_rate": 1.610093614353592e-05, + "loss": 0.497, + "step": 5495 + }, + { + "epoch": 0.94, + "learning_rate": 1.6099477054101883e-05, + "loss": 0.4778, + "step": 5496 + }, + { + "epoch": 0.94, + "learning_rate": 1.6098017757852206e-05, + "loss": 0.5079, + "step": 5497 + }, + { + "epoch": 0.94, + "learning_rate": 1.6096558254836367e-05, + "loss": 0.4985, + "step": 5498 + }, + { + "epoch": 0.94, + "learning_rate": 1.6095098545103858e-05, + "loss": 0.5185, + "step": 5499 + }, + { + "epoch": 0.94, + "learning_rate": 1.6093638628704166e-05, + "loss": 0.536, + "step": 5500 + }, + { + "epoch": 0.94, + "learning_rate": 1.6092178505686802e-05, + "loss": 0.4912, + "step": 5501 + }, + { + "epoch": 0.94, + "learning_rate": 1.609071817610127e-05, + "loss": 0.4671, + "step": 5502 + }, + { + "epoch": 0.94, + "learning_rate": 1.6089257639997083e-05, + "loss": 0.4508, + "step": 5503 + }, + { + "epoch": 0.94, + "learning_rate": 1.6087796897423765e-05, + "loss": 0.4589, + "step": 5504 + }, + { + "epoch": 0.94, + "learning_rate": 1.608633594843085e-05, + "loss": 0.4729, + "step": 5505 + }, + { + "epoch": 0.94, + "learning_rate": 1.608487479306787e-05, + "loss": 0.4676, + "step": 5506 + }, + { + "epoch": 0.94, + "learning_rate": 1.6083413431384368e-05, + "loss": 0.4685, + "step": 5507 + }, + { + "epoch": 0.94, + "learning_rate": 1.6081951863429898e-05, + "loss": 0.4949, + "step": 5508 + }, + { + "epoch": 0.94, + "learning_rate": 1.6080490089254012e-05, + "loss": 0.4826, + "step": 5509 + }, + { + "epoch": 0.94, + "learning_rate": 1.607902810890628e-05, + "loss": 0.5005, + "step": 5510 + }, + { + "epoch": 0.94, + "learning_rate": 1.6077565922436272e-05, + "loss": 0.4893, + "step": 5511 + }, + { + "epoch": 0.94, + "learning_rate": 1.607610352989357e-05, + "loss": 0.5009, + "step": 5512 + }, + { + "epoch": 0.94, + "learning_rate": 1.6074640931327755e-05, + "loss": 0.5068, + "step": 5513 + }, + { + "epoch": 0.94, + "learning_rate": 1.6073178126788415e-05, + "loss": 0.5361, + "step": 5514 + }, + { + "epoch": 0.94, + "learning_rate": 1.6071715116325157e-05, + "loss": 0.4965, + "step": 5515 + }, + { + "epoch": 0.94, + "learning_rate": 1.6070251899987587e-05, + "loss": 0.5113, + "step": 5516 + }, + { + "epoch": 0.94, + "learning_rate": 1.6068788477825317e-05, + "loss": 0.4947, + "step": 5517 + }, + { + "epoch": 0.94, + "learning_rate": 1.6067324849887967e-05, + "loss": 0.4859, + "step": 5518 + }, + { + "epoch": 0.94, + "learning_rate": 1.6065861016225165e-05, + "loss": 0.4858, + "step": 5519 + }, + { + "epoch": 0.94, + "learning_rate": 1.606439697688654e-05, + "loss": 0.5204, + "step": 5520 + }, + { + "epoch": 0.94, + "learning_rate": 1.6062932731921745e-05, + "loss": 0.4804, + "step": 5521 + }, + { + "epoch": 0.94, + "learning_rate": 1.6061468281380416e-05, + "loss": 0.4645, + "step": 5522 + }, + { + "epoch": 0.94, + "learning_rate": 1.6060003625312214e-05, + "loss": 0.544, + "step": 5523 + }, + { + "epoch": 0.94, + "learning_rate": 1.6058538763766806e-05, + "loss": 0.5022, + "step": 5524 + }, + { + "epoch": 0.94, + "learning_rate": 1.605707369679385e-05, + "loss": 0.5124, + "step": 5525 + }, + { + "epoch": 0.94, + "learning_rate": 1.605560842444303e-05, + "loss": 0.4598, + "step": 5526 + }, + { + "epoch": 0.94, + "learning_rate": 1.605414294676403e-05, + "loss": 0.5063, + "step": 5527 + }, + { + "epoch": 0.94, + "learning_rate": 1.605267726380654e-05, + "loss": 0.4588, + "step": 5528 + }, + { + "epoch": 0.94, + "learning_rate": 1.605121137562025e-05, + "loss": 0.4922, + "step": 5529 + }, + { + "epoch": 0.94, + "learning_rate": 1.604974528225487e-05, + "loss": 0.4561, + "step": 5530 + }, + { + "epoch": 0.94, + "learning_rate": 1.604827898376011e-05, + "loss": 0.4924, + "step": 5531 + }, + { + "epoch": 0.94, + "learning_rate": 1.6046812480185688e-05, + "loss": 0.5116, + "step": 5532 + }, + { + "epoch": 0.94, + "learning_rate": 1.604534577158133e-05, + "loss": 0.4563, + "step": 5533 + }, + { + "epoch": 0.94, + "learning_rate": 1.6043878857996763e-05, + "loss": 0.4745, + "step": 5534 + }, + { + "epoch": 0.94, + "learning_rate": 1.6042411739481732e-05, + "loss": 0.4908, + "step": 5535 + }, + { + "epoch": 0.94, + "learning_rate": 1.6040944416085983e-05, + "loss": 0.4975, + "step": 5536 + }, + { + "epoch": 0.94, + "learning_rate": 1.603947688785926e-05, + "loss": 0.4855, + "step": 5537 + }, + { + "epoch": 0.94, + "learning_rate": 1.6038009154851334e-05, + "loss": 0.523, + "step": 5538 + }, + { + "epoch": 0.94, + "learning_rate": 1.603654121711196e-05, + "loss": 0.4707, + "step": 5539 + }, + { + "epoch": 0.94, + "learning_rate": 1.6035073074690923e-05, + "loss": 0.4954, + "step": 5540 + }, + { + "epoch": 0.94, + "learning_rate": 1.603360472763799e-05, + "loss": 0.4745, + "step": 5541 + }, + { + "epoch": 0.95, + "learning_rate": 1.6032136176002964e-05, + "loss": 0.5078, + "step": 5542 + }, + { + "epoch": 0.95, + "learning_rate": 1.603066741983563e-05, + "loss": 0.5118, + "step": 5543 + }, + { + "epoch": 0.95, + "learning_rate": 1.602919845918579e-05, + "loss": 0.515, + "step": 5544 + }, + { + "epoch": 0.95, + "learning_rate": 1.602772929410325e-05, + "loss": 0.4768, + "step": 5545 + }, + { + "epoch": 0.95, + "learning_rate": 1.6026259924637835e-05, + "loss": 0.482, + "step": 5546 + }, + { + "epoch": 0.95, + "learning_rate": 1.6024790350839358e-05, + "loss": 0.4822, + "step": 5547 + }, + { + "epoch": 0.95, + "learning_rate": 1.602332057275765e-05, + "loss": 0.4825, + "step": 5548 + }, + { + "epoch": 0.95, + "learning_rate": 1.6021850590442547e-05, + "loss": 0.4892, + "step": 5549 + }, + { + "epoch": 0.95, + "learning_rate": 1.6020380403943892e-05, + "loss": 0.4923, + "step": 5550 + }, + { + "epoch": 0.95, + "learning_rate": 1.6018910013311533e-05, + "loss": 0.506, + "step": 5551 + }, + { + "epoch": 0.95, + "learning_rate": 1.601743941859533e-05, + "loss": 0.4866, + "step": 5552 + }, + { + "epoch": 0.95, + "learning_rate": 1.6015968619845148e-05, + "loss": 0.4877, + "step": 5553 + }, + { + "epoch": 0.95, + "learning_rate": 1.6014497617110854e-05, + "loss": 0.5098, + "step": 5554 + }, + { + "epoch": 0.95, + "learning_rate": 1.6013026410442325e-05, + "loss": 0.4942, + "step": 5555 + }, + { + "epoch": 0.95, + "learning_rate": 1.6011554999889446e-05, + "loss": 0.4898, + "step": 5556 + }, + { + "epoch": 0.95, + "learning_rate": 1.601008338550211e-05, + "loss": 0.4716, + "step": 5557 + }, + { + "epoch": 0.95, + "learning_rate": 1.6008611567330215e-05, + "loss": 0.5055, + "step": 5558 + }, + { + "epoch": 0.95, + "learning_rate": 1.6007139545423666e-05, + "loss": 0.5118, + "step": 5559 + }, + { + "epoch": 0.95, + "learning_rate": 1.600566731983237e-05, + "loss": 0.5484, + "step": 5560 + }, + { + "epoch": 0.95, + "learning_rate": 1.6004194890606252e-05, + "loss": 0.492, + "step": 5561 + }, + { + "epoch": 0.95, + "learning_rate": 1.6002722257795235e-05, + "loss": 0.5453, + "step": 5562 + }, + { + "epoch": 0.95, + "learning_rate": 1.6001249421449255e-05, + "loss": 0.521, + "step": 5563 + }, + { + "epoch": 0.95, + "learning_rate": 1.599977638161825e-05, + "loss": 0.4639, + "step": 5564 + }, + { + "epoch": 0.95, + "learning_rate": 1.5998303138352163e-05, + "loss": 0.4802, + "step": 5565 + }, + { + "epoch": 0.95, + "learning_rate": 1.5996829691700954e-05, + "loss": 0.4942, + "step": 5566 + }, + { + "epoch": 0.95, + "learning_rate": 1.5995356041714576e-05, + "loss": 0.5034, + "step": 5567 + }, + { + "epoch": 0.95, + "learning_rate": 1.5993882188443e-05, + "loss": 0.5273, + "step": 5568 + }, + { + "epoch": 0.95, + "learning_rate": 1.59924081319362e-05, + "loss": 0.4745, + "step": 5569 + }, + { + "epoch": 0.95, + "learning_rate": 1.599093387224416e-05, + "loss": 0.4995, + "step": 5570 + }, + { + "epoch": 0.95, + "learning_rate": 1.5989459409416863e-05, + "loss": 0.5094, + "step": 5571 + }, + { + "epoch": 0.95, + "learning_rate": 1.5987984743504304e-05, + "loss": 0.45, + "step": 5572 + }, + { + "epoch": 0.95, + "learning_rate": 1.5986509874556488e-05, + "loss": 0.4861, + "step": 5573 + }, + { + "epoch": 0.95, + "learning_rate": 1.598503480262342e-05, + "loss": 0.4891, + "step": 5574 + }, + { + "epoch": 0.95, + "learning_rate": 1.5983559527755118e-05, + "loss": 0.481, + "step": 5575 + }, + { + "epoch": 0.95, + "learning_rate": 1.5982084050001605e-05, + "loss": 0.51, + "step": 5576 + }, + { + "epoch": 0.95, + "learning_rate": 1.5980608369412908e-05, + "loss": 0.4935, + "step": 5577 + }, + { + "epoch": 0.95, + "learning_rate": 1.597913248603906e-05, + "loss": 0.4509, + "step": 5578 + }, + { + "epoch": 0.95, + "learning_rate": 1.5977656399930114e-05, + "loss": 0.4728, + "step": 5579 + }, + { + "epoch": 0.95, + "learning_rate": 1.597618011113611e-05, + "loss": 0.5311, + "step": 5580 + }, + { + "epoch": 0.95, + "learning_rate": 1.597470361970711e-05, + "loss": 0.5155, + "step": 5581 + }, + { + "epoch": 0.95, + "learning_rate": 1.5973226925693173e-05, + "loss": 0.507, + "step": 5582 + }, + { + "epoch": 0.95, + "learning_rate": 1.5971750029144373e-05, + "loss": 0.5089, + "step": 5583 + }, + { + "epoch": 0.95, + "learning_rate": 1.5970272930110788e-05, + "loss": 0.4781, + "step": 5584 + }, + { + "epoch": 0.95, + "learning_rate": 1.59687956286425e-05, + "loss": 0.5002, + "step": 5585 + }, + { + "epoch": 0.95, + "learning_rate": 1.5967318124789598e-05, + "loss": 0.5063, + "step": 5586 + }, + { + "epoch": 0.95, + "learning_rate": 1.5965840418602187e-05, + "loss": 0.4913, + "step": 5587 + }, + { + "epoch": 0.95, + "learning_rate": 1.5964362510130365e-05, + "loss": 0.5062, + "step": 5588 + }, + { + "epoch": 0.95, + "learning_rate": 1.5962884399424243e-05, + "loss": 0.4652, + "step": 5589 + }, + { + "epoch": 0.95, + "learning_rate": 1.5961406086533945e-05, + "loss": 0.4868, + "step": 5590 + }, + { + "epoch": 0.95, + "learning_rate": 1.5959927571509596e-05, + "loss": 0.4529, + "step": 5591 + }, + { + "epoch": 0.95, + "learning_rate": 1.5958448854401325e-05, + "loss": 0.4919, + "step": 5592 + }, + { + "epoch": 0.95, + "learning_rate": 1.5956969935259267e-05, + "loss": 0.5022, + "step": 5593 + }, + { + "epoch": 0.95, + "learning_rate": 1.5955490814133577e-05, + "loss": 0.48, + "step": 5594 + }, + { + "epoch": 0.95, + "learning_rate": 1.5954011491074405e-05, + "loss": 0.4583, + "step": 5595 + }, + { + "epoch": 0.95, + "learning_rate": 1.5952531966131907e-05, + "loss": 0.4758, + "step": 5596 + }, + { + "epoch": 0.95, + "learning_rate": 1.595105223935625e-05, + "loss": 0.4611, + "step": 5597 + }, + { + "epoch": 0.95, + "learning_rate": 1.5949572310797612e-05, + "loss": 0.4713, + "step": 5598 + }, + { + "epoch": 0.95, + "learning_rate": 1.5948092180506166e-05, + "loss": 0.5429, + "step": 5599 + }, + { + "epoch": 0.96, + "learning_rate": 1.5946611848532105e-05, + "loss": 0.4905, + "step": 5600 + }, + { + "epoch": 0.96, + "learning_rate": 1.594513131492562e-05, + "loss": 0.5086, + "step": 5601 + }, + { + "epoch": 0.96, + "learning_rate": 1.5943650579736913e-05, + "loss": 0.5074, + "step": 5602 + }, + { + "epoch": 0.96, + "learning_rate": 1.5942169643016188e-05, + "loss": 0.4853, + "step": 5603 + }, + { + "epoch": 0.96, + "learning_rate": 1.5940688504813664e-05, + "loss": 0.4542, + "step": 5604 + }, + { + "epoch": 0.96, + "learning_rate": 1.593920716517956e-05, + "loss": 0.5127, + "step": 5605 + }, + { + "epoch": 0.96, + "learning_rate": 1.5937725624164105e-05, + "loss": 0.476, + "step": 5606 + }, + { + "epoch": 0.96, + "learning_rate": 1.5936243881817527e-05, + "loss": 0.4729, + "step": 5607 + }, + { + "epoch": 0.96, + "learning_rate": 1.593476193819008e-05, + "loss": 0.5012, + "step": 5608 + }, + { + "epoch": 0.96, + "learning_rate": 1.5933279793332e-05, + "loss": 0.479, + "step": 5609 + }, + { + "epoch": 0.96, + "learning_rate": 1.5931797447293553e-05, + "loss": 0.4929, + "step": 5610 + }, + { + "epoch": 0.96, + "learning_rate": 1.5930314900124993e-05, + "loss": 0.4599, + "step": 5611 + }, + { + "epoch": 0.96, + "learning_rate": 1.5928832151876595e-05, + "loss": 0.5027, + "step": 5612 + }, + { + "epoch": 0.96, + "learning_rate": 1.5927349202598632e-05, + "loss": 0.511, + "step": 5613 + }, + { + "epoch": 0.96, + "learning_rate": 1.5925866052341384e-05, + "loss": 0.4765, + "step": 5614 + }, + { + "epoch": 0.96, + "learning_rate": 1.592438270115514e-05, + "loss": 0.4928, + "step": 5615 + }, + { + "epoch": 0.96, + "learning_rate": 1.5922899149090203e-05, + "loss": 0.4474, + "step": 5616 + }, + { + "epoch": 0.96, + "learning_rate": 1.592141539619687e-05, + "loss": 0.5115, + "step": 5617 + }, + { + "epoch": 0.96, + "learning_rate": 1.5919931442525453e-05, + "loss": 0.501, + "step": 5618 + }, + { + "epoch": 0.96, + "learning_rate": 1.5918447288126265e-05, + "loss": 0.4983, + "step": 5619 + }, + { + "epoch": 0.96, + "learning_rate": 1.5916962933049634e-05, + "loss": 0.5123, + "step": 5620 + }, + { + "epoch": 0.96, + "learning_rate": 1.5915478377345894e-05, + "loss": 0.4935, + "step": 5621 + }, + { + "epoch": 0.96, + "learning_rate": 1.5913993621065368e-05, + "loss": 0.454, + "step": 5622 + }, + { + "epoch": 0.96, + "learning_rate": 1.5912508664258412e-05, + "loss": 0.4702, + "step": 5623 + }, + { + "epoch": 0.96, + "learning_rate": 1.5911023506975372e-05, + "loss": 0.4971, + "step": 5624 + }, + { + "epoch": 0.96, + "learning_rate": 1.590953814926661e-05, + "loss": 0.4861, + "step": 5625 + }, + { + "epoch": 0.96, + "learning_rate": 1.5908052591182485e-05, + "loss": 0.4726, + "step": 5626 + }, + { + "epoch": 0.96, + "learning_rate": 1.5906566832773366e-05, + "loss": 0.5358, + "step": 5627 + }, + { + "epoch": 0.96, + "learning_rate": 1.590508087408964e-05, + "loss": 0.5212, + "step": 5628 + }, + { + "epoch": 0.96, + "learning_rate": 1.590359471518168e-05, + "loss": 0.5092, + "step": 5629 + }, + { + "epoch": 0.96, + "learning_rate": 1.5902108356099885e-05, + "loss": 0.4688, + "step": 5630 + }, + { + "epoch": 0.96, + "learning_rate": 1.5900621796894654e-05, + "loss": 0.4634, + "step": 5631 + }, + { + "epoch": 0.96, + "learning_rate": 1.589913503761639e-05, + "loss": 0.5054, + "step": 5632 + }, + { + "epoch": 0.96, + "learning_rate": 1.58976480783155e-05, + "loss": 0.5125, + "step": 5633 + }, + { + "epoch": 0.96, + "learning_rate": 1.5896160919042406e-05, + "loss": 0.5179, + "step": 5634 + }, + { + "epoch": 0.96, + "learning_rate": 1.5894673559847536e-05, + "loss": 0.4938, + "step": 5635 + }, + { + "epoch": 0.96, + "learning_rate": 1.5893186000781317e-05, + "loss": 0.5454, + "step": 5636 + }, + { + "epoch": 0.96, + "learning_rate": 1.5891698241894195e-05, + "loss": 0.4844, + "step": 5637 + }, + { + "epoch": 0.96, + "learning_rate": 1.589021028323661e-05, + "loss": 0.5037, + "step": 5638 + }, + { + "epoch": 0.96, + "learning_rate": 1.5888722124859012e-05, + "loss": 0.4469, + "step": 5639 + }, + { + "epoch": 0.96, + "learning_rate": 1.588723376681187e-05, + "loss": 0.4996, + "step": 5640 + }, + { + "epoch": 0.96, + "learning_rate": 1.588574520914564e-05, + "loss": 0.489, + "step": 5641 + }, + { + "epoch": 0.96, + "learning_rate": 1.5884256451910796e-05, + "loss": 0.4354, + "step": 5642 + }, + { + "epoch": 0.96, + "learning_rate": 1.5882767495157825e-05, + "loss": 0.4757, + "step": 5643 + }, + { + "epoch": 0.96, + "learning_rate": 1.5881278338937205e-05, + "loss": 0.4625, + "step": 5644 + }, + { + "epoch": 0.96, + "learning_rate": 1.5879788983299434e-05, + "loss": 0.4973, + "step": 5645 + }, + { + "epoch": 0.96, + "learning_rate": 1.5878299428295004e-05, + "loss": 0.4866, + "step": 5646 + }, + { + "epoch": 0.96, + "learning_rate": 1.5876809673974434e-05, + "loss": 0.4782, + "step": 5647 + }, + { + "epoch": 0.96, + "learning_rate": 1.5875319720388232e-05, + "loss": 0.5195, + "step": 5648 + }, + { + "epoch": 0.96, + "learning_rate": 1.5873829567586908e-05, + "loss": 0.4762, + "step": 5649 + }, + { + "epoch": 0.96, + "learning_rate": 1.5872339215621006e-05, + "loss": 0.5041, + "step": 5650 + }, + { + "epoch": 0.96, + "learning_rate": 1.5870848664541046e-05, + "loss": 0.4733, + "step": 5651 + }, + { + "epoch": 0.96, + "learning_rate": 1.5869357914397572e-05, + "loss": 0.4965, + "step": 5652 + }, + { + "epoch": 0.96, + "learning_rate": 1.5867866965241136e-05, + "loss": 0.5068, + "step": 5653 + }, + { + "epoch": 0.96, + "learning_rate": 1.5866375817122285e-05, + "loss": 0.4555, + "step": 5654 + }, + { + "epoch": 0.96, + "learning_rate": 1.5864884470091584e-05, + "loss": 0.5119, + "step": 5655 + }, + { + "epoch": 0.96, + "learning_rate": 1.58633929241996e-05, + "loss": 0.5338, + "step": 5656 + }, + { + "epoch": 0.96, + "learning_rate": 1.5861901179496904e-05, + "loss": 0.4858, + "step": 5657 + }, + { + "epoch": 0.96, + "learning_rate": 1.586040923603408e-05, + "loss": 0.5078, + "step": 5658 + }, + { + "epoch": 0.97, + "learning_rate": 1.585891709386171e-05, + "loss": 0.492, + "step": 5659 + }, + { + "epoch": 0.97, + "learning_rate": 1.5857424753030397e-05, + "loss": 0.4783, + "step": 5660 + }, + { + "epoch": 0.97, + "learning_rate": 1.5855932213590734e-05, + "loss": 0.4688, + "step": 5661 + }, + { + "epoch": 0.97, + "learning_rate": 1.5854439475593335e-05, + "loss": 0.4822, + "step": 5662 + }, + { + "epoch": 0.97, + "learning_rate": 1.5852946539088808e-05, + "loss": 0.4785, + "step": 5663 + }, + { + "epoch": 0.97, + "learning_rate": 1.5851453404127778e-05, + "loss": 0.4803, + "step": 5664 + }, + { + "epoch": 0.97, + "learning_rate": 1.5849960070760875e-05, + "loss": 0.4376, + "step": 5665 + }, + { + "epoch": 0.97, + "learning_rate": 1.5848466539038733e-05, + "loss": 0.4994, + "step": 5666 + }, + { + "epoch": 0.97, + "learning_rate": 1.5846972809011986e-05, + "loss": 0.4886, + "step": 5667 + }, + { + "epoch": 0.97, + "learning_rate": 1.5845478880731285e-05, + "loss": 0.5311, + "step": 5668 + }, + { + "epoch": 0.97, + "learning_rate": 1.5843984754247294e-05, + "loss": 0.5121, + "step": 5669 + }, + { + "epoch": 0.97, + "learning_rate": 1.5842490429610668e-05, + "loss": 0.4999, + "step": 5670 + }, + { + "epoch": 0.97, + "learning_rate": 1.584099590687207e-05, + "loss": 0.4592, + "step": 5671 + }, + { + "epoch": 0.97, + "learning_rate": 1.5839501186082184e-05, + "loss": 0.5048, + "step": 5672 + }, + { + "epoch": 0.97, + "learning_rate": 1.5838006267291685e-05, + "loss": 0.4492, + "step": 5673 + }, + { + "epoch": 0.97, + "learning_rate": 1.583651115055126e-05, + "loss": 0.4816, + "step": 5674 + }, + { + "epoch": 0.97, + "learning_rate": 1.5835015835911612e-05, + "loss": 0.4862, + "step": 5675 + }, + { + "epoch": 0.97, + "learning_rate": 1.5833520323423442e-05, + "loss": 0.4804, + "step": 5676 + }, + { + "epoch": 0.97, + "learning_rate": 1.5832024613137453e-05, + "loss": 0.5004, + "step": 5677 + }, + { + "epoch": 0.97, + "learning_rate": 1.583052870510436e-05, + "loss": 0.5148, + "step": 5678 + }, + { + "epoch": 0.97, + "learning_rate": 1.582903259937489e-05, + "loss": 0.471, + "step": 5679 + }, + { + "epoch": 0.97, + "learning_rate": 1.582753629599977e-05, + "loss": 0.4901, + "step": 5680 + }, + { + "epoch": 0.97, + "learning_rate": 1.5826039795029733e-05, + "loss": 0.4749, + "step": 5681 + }, + { + "epoch": 0.97, + "learning_rate": 1.5824543096515523e-05, + "loss": 0.5195, + "step": 5682 + }, + { + "epoch": 0.97, + "learning_rate": 1.5823046200507888e-05, + "loss": 0.4778, + "step": 5683 + }, + { + "epoch": 0.97, + "learning_rate": 1.582154910705758e-05, + "loss": 0.4946, + "step": 5684 + }, + { + "epoch": 0.97, + "learning_rate": 1.5820051816215372e-05, + "loss": 0.4777, + "step": 5685 + }, + { + "epoch": 0.97, + "learning_rate": 1.5818554328032017e-05, + "loss": 0.5504, + "step": 5686 + }, + { + "epoch": 0.97, + "learning_rate": 1.5817056642558307e-05, + "loss": 0.4878, + "step": 5687 + }, + { + "epoch": 0.97, + "learning_rate": 1.5815558759845013e-05, + "loss": 0.5231, + "step": 5688 + }, + { + "epoch": 0.97, + "learning_rate": 1.5814060679942927e-05, + "loss": 0.5116, + "step": 5689 + }, + { + "epoch": 0.97, + "learning_rate": 1.5812562402902844e-05, + "loss": 0.5061, + "step": 5690 + }, + { + "epoch": 0.97, + "learning_rate": 1.5811063928775564e-05, + "loss": 0.4923, + "step": 5691 + }, + { + "epoch": 0.97, + "learning_rate": 1.5809565257611904e-05, + "loss": 0.5113, + "step": 5692 + }, + { + "epoch": 0.97, + "learning_rate": 1.580806638946267e-05, + "loss": 0.4975, + "step": 5693 + }, + { + "epoch": 0.97, + "learning_rate": 1.5806567324378696e-05, + "loss": 0.4753, + "step": 5694 + }, + { + "epoch": 0.97, + "learning_rate": 1.5805068062410795e-05, + "loss": 0.4822, + "step": 5695 + }, + { + "epoch": 0.97, + "learning_rate": 1.580356860360982e-05, + "loss": 0.5131, + "step": 5696 + }, + { + "epoch": 0.97, + "learning_rate": 1.5802068948026598e-05, + "loss": 0.5049, + "step": 5697 + }, + { + "epoch": 0.97, + "learning_rate": 1.5800569095711983e-05, + "loss": 0.4824, + "step": 5698 + }, + { + "epoch": 0.97, + "learning_rate": 1.5799069046716836e-05, + "loss": 0.463, + "step": 5699 + }, + { + "epoch": 0.97, + "learning_rate": 1.5797568801092017e-05, + "loss": 0.4887, + "step": 5700 + }, + { + "epoch": 0.97, + "learning_rate": 1.5796068358888387e-05, + "loss": 0.4951, + "step": 5701 + }, + { + "epoch": 0.97, + "learning_rate": 1.5794567720156833e-05, + "loss": 0.4816, + "step": 5702 + }, + { + "epoch": 0.97, + "learning_rate": 1.5793066884948232e-05, + "loss": 0.5291, + "step": 5703 + }, + { + "epoch": 0.97, + "learning_rate": 1.5791565853313477e-05, + "loss": 0.4959, + "step": 5704 + }, + { + "epoch": 0.97, + "learning_rate": 1.5790064625303453e-05, + "loss": 0.4911, + "step": 5705 + }, + { + "epoch": 0.97, + "learning_rate": 1.5788563200969072e-05, + "loss": 0.4981, + "step": 5706 + }, + { + "epoch": 0.97, + "learning_rate": 1.5787061580361246e-05, + "loss": 0.503, + "step": 5707 + }, + { + "epoch": 0.97, + "learning_rate": 1.578555976353088e-05, + "loss": 0.4907, + "step": 5708 + }, + { + "epoch": 0.97, + "learning_rate": 1.5784057750528904e-05, + "loss": 0.5002, + "step": 5709 + }, + { + "epoch": 0.97, + "learning_rate": 1.5782555541406242e-05, + "loss": 0.5077, + "step": 5710 + }, + { + "epoch": 0.97, + "learning_rate": 1.578105313621383e-05, + "loss": 0.5056, + "step": 5711 + }, + { + "epoch": 0.97, + "learning_rate": 1.5779550535002615e-05, + "loss": 0.4953, + "step": 5712 + }, + { + "epoch": 0.97, + "learning_rate": 1.577804773782354e-05, + "loss": 0.4989, + "step": 5713 + }, + { + "epoch": 0.97, + "learning_rate": 1.5776544744727567e-05, + "loss": 0.4852, + "step": 5714 + }, + { + "epoch": 0.97, + "learning_rate": 1.5775041555765655e-05, + "loss": 0.5068, + "step": 5715 + }, + { + "epoch": 0.97, + "learning_rate": 1.577353817098877e-05, + "loss": 0.5093, + "step": 5716 + }, + { + "epoch": 0.98, + "learning_rate": 1.5772034590447888e-05, + "loss": 0.5069, + "step": 5717 + }, + { + "epoch": 0.98, + "learning_rate": 1.5770530814193994e-05, + "loss": 0.4914, + "step": 5718 + }, + { + "epoch": 0.98, + "learning_rate": 1.576902684227808e-05, + "loss": 0.4798, + "step": 5719 + }, + { + "epoch": 0.98, + "learning_rate": 1.5767522674751133e-05, + "loss": 0.5082, + "step": 5720 + }, + { + "epoch": 0.98, + "learning_rate": 1.576601831166416e-05, + "loss": 0.4822, + "step": 5721 + }, + { + "epoch": 0.98, + "learning_rate": 1.576451375306817e-05, + "loss": 0.4794, + "step": 5722 + }, + { + "epoch": 0.98, + "learning_rate": 1.5763008999014175e-05, + "loss": 0.4919, + "step": 5723 + }, + { + "epoch": 0.98, + "learning_rate": 1.5761504049553195e-05, + "loss": 0.4643, + "step": 5724 + }, + { + "epoch": 0.98, + "learning_rate": 1.5759998904736263e-05, + "loss": 0.501, + "step": 5725 + }, + { + "epoch": 0.98, + "learning_rate": 1.5758493564614418e-05, + "loss": 0.4991, + "step": 5726 + }, + { + "epoch": 0.98, + "learning_rate": 1.5756988029238692e-05, + "loss": 0.4949, + "step": 5727 + }, + { + "epoch": 0.98, + "learning_rate": 1.5755482298660144e-05, + "loss": 0.4852, + "step": 5728 + }, + { + "epoch": 0.98, + "learning_rate": 1.575397637292982e-05, + "loss": 0.4955, + "step": 5729 + }, + { + "epoch": 0.98, + "learning_rate": 1.5752470252098784e-05, + "loss": 0.4707, + "step": 5730 + }, + { + "epoch": 0.98, + "learning_rate": 1.5750963936218104e-05, + "loss": 0.4769, + "step": 5731 + }, + { + "epoch": 0.98, + "learning_rate": 1.574945742533886e-05, + "loss": 0.5206, + "step": 5732 + }, + { + "epoch": 0.98, + "learning_rate": 1.5747950719512126e-05, + "loss": 0.4802, + "step": 5733 + }, + { + "epoch": 0.98, + "learning_rate": 1.5746443818788996e-05, + "loss": 0.4993, + "step": 5734 + }, + { + "epoch": 0.98, + "learning_rate": 1.574493672322056e-05, + "loss": 0.5039, + "step": 5735 + }, + { + "epoch": 0.98, + "learning_rate": 1.574342943285792e-05, + "loss": 0.4715, + "step": 5736 + }, + { + "epoch": 0.98, + "learning_rate": 1.5741921947752188e-05, + "loss": 0.512, + "step": 5737 + }, + { + "epoch": 0.98, + "learning_rate": 1.5740414267954473e-05, + "loss": 0.5115, + "step": 5738 + }, + { + "epoch": 0.98, + "learning_rate": 1.57389063935159e-05, + "loss": 0.4628, + "step": 5739 + }, + { + "epoch": 0.98, + "learning_rate": 1.5737398324487593e-05, + "loss": 0.4917, + "step": 5740 + }, + { + "epoch": 0.98, + "learning_rate": 1.5735890060920692e-05, + "loss": 0.4824, + "step": 5741 + }, + { + "epoch": 0.98, + "learning_rate": 1.5734381602866333e-05, + "loss": 0.5364, + "step": 5742 + }, + { + "epoch": 0.98, + "learning_rate": 1.5732872950375663e-05, + "loss": 0.4582, + "step": 5743 + }, + { + "epoch": 0.98, + "learning_rate": 1.573136410349984e-05, + "loss": 0.4727, + "step": 5744 + }, + { + "epoch": 0.98, + "learning_rate": 1.5729855062290024e-05, + "loss": 0.5175, + "step": 5745 + }, + { + "epoch": 0.98, + "learning_rate": 1.5728345826797377e-05, + "loss": 0.5011, + "step": 5746 + }, + { + "epoch": 0.98, + "learning_rate": 1.572683639707308e-05, + "loss": 0.507, + "step": 5747 + }, + { + "epoch": 0.98, + "learning_rate": 1.5725326773168304e-05, + "loss": 0.4797, + "step": 5748 + }, + { + "epoch": 0.98, + "learning_rate": 1.572381695513425e-05, + "loss": 0.4325, + "step": 5749 + }, + { + "epoch": 0.98, + "learning_rate": 1.57223069430221e-05, + "loss": 0.5122, + "step": 5750 + }, + { + "epoch": 0.98, + "learning_rate": 1.5720796736883058e-05, + "loss": 0.5004, + "step": 5751 + }, + { + "epoch": 0.98, + "learning_rate": 1.5719286336768332e-05, + "loss": 0.5258, + "step": 5752 + }, + { + "epoch": 0.98, + "learning_rate": 1.571777574272913e-05, + "loss": 0.5033, + "step": 5753 + }, + { + "epoch": 0.98, + "learning_rate": 1.5716264954816676e-05, + "loss": 0.4953, + "step": 5754 + }, + { + "epoch": 0.98, + "learning_rate": 1.5714753973082196e-05, + "loss": 0.4692, + "step": 5755 + }, + { + "epoch": 0.98, + "learning_rate": 1.571324279757693e-05, + "loss": 0.4876, + "step": 5756 + }, + { + "epoch": 0.98, + "learning_rate": 1.5711731428352102e-05, + "loss": 0.4977, + "step": 5757 + }, + { + "epoch": 0.98, + "learning_rate": 1.571021986545897e-05, + "loss": 0.5211, + "step": 5758 + }, + { + "epoch": 0.98, + "learning_rate": 1.5708708108948782e-05, + "loss": 0.5044, + "step": 5759 + }, + { + "epoch": 0.98, + "learning_rate": 1.5707196158872803e-05, + "loss": 0.5371, + "step": 5760 + }, + { + "epoch": 0.98, + "learning_rate": 1.570568401528229e-05, + "loss": 0.4775, + "step": 5761 + }, + { + "epoch": 0.98, + "learning_rate": 1.5704171678228522e-05, + "loss": 0.5372, + "step": 5762 + }, + { + "epoch": 0.98, + "learning_rate": 1.5702659147762776e-05, + "loss": 0.4799, + "step": 5763 + }, + { + "epoch": 0.98, + "learning_rate": 1.570114642393634e-05, + "loss": 0.4938, + "step": 5764 + }, + { + "epoch": 0.98, + "learning_rate": 1.5699633506800502e-05, + "loss": 0.4729, + "step": 5765 + }, + { + "epoch": 0.98, + "learning_rate": 1.569812039640656e-05, + "loss": 0.5624, + "step": 5766 + }, + { + "epoch": 0.98, + "learning_rate": 1.5696607092805826e-05, + "loss": 0.4868, + "step": 5767 + }, + { + "epoch": 0.98, + "learning_rate": 1.5695093596049603e-05, + "loss": 0.5454, + "step": 5768 + }, + { + "epoch": 0.98, + "learning_rate": 1.5693579906189214e-05, + "loss": 0.5073, + "step": 5769 + }, + { + "epoch": 0.98, + "learning_rate": 1.5692066023275986e-05, + "loss": 0.5036, + "step": 5770 + }, + { + "epoch": 0.98, + "learning_rate": 1.5690551947361246e-05, + "loss": 0.4873, + "step": 5771 + }, + { + "epoch": 0.98, + "learning_rate": 1.5689037678496335e-05, + "loss": 0.4752, + "step": 5772 + }, + { + "epoch": 0.98, + "learning_rate": 1.5687523216732595e-05, + "loss": 0.503, + "step": 5773 + }, + { + "epoch": 0.98, + "learning_rate": 1.568600856212138e-05, + "loss": 0.5151, + "step": 5774 + }, + { + "epoch": 0.98, + "learning_rate": 1.5684493714714047e-05, + "loss": 0.4974, + "step": 5775 + }, + { + "epoch": 0.99, + "learning_rate": 1.5682978674561957e-05, + "loss": 0.4889, + "step": 5776 + }, + { + "epoch": 0.99, + "learning_rate": 1.5681463441716482e-05, + "loss": 0.4339, + "step": 5777 + }, + { + "epoch": 0.99, + "learning_rate": 1.5679948016229002e-05, + "loss": 0.4792, + "step": 5778 + }, + { + "epoch": 0.99, + "learning_rate": 1.5678432398150896e-05, + "loss": 0.4913, + "step": 5779 + }, + { + "epoch": 0.99, + "learning_rate": 1.5676916587533558e-05, + "loss": 0.5153, + "step": 5780 + }, + { + "epoch": 0.99, + "learning_rate": 1.5675400584428386e-05, + "loss": 0.4874, + "step": 5781 + }, + { + "epoch": 0.99, + "learning_rate": 1.5673884388886774e-05, + "loss": 0.5055, + "step": 5782 + }, + { + "epoch": 0.99, + "learning_rate": 1.5672368000960144e-05, + "loss": 0.5182, + "step": 5783 + }, + { + "epoch": 0.99, + "learning_rate": 1.5670851420699905e-05, + "loss": 0.4897, + "step": 5784 + }, + { + "epoch": 0.99, + "learning_rate": 1.5669334648157484e-05, + "loss": 0.459, + "step": 5785 + }, + { + "epoch": 0.99, + "learning_rate": 1.5667817683384304e-05, + "loss": 0.5346, + "step": 5786 + }, + { + "epoch": 0.99, + "learning_rate": 1.566630052643181e-05, + "loss": 0.5055, + "step": 5787 + }, + { + "epoch": 0.99, + "learning_rate": 1.5664783177351436e-05, + "loss": 0.5088, + "step": 5788 + }, + { + "epoch": 0.99, + "learning_rate": 1.5663265636194633e-05, + "loss": 0.4955, + "step": 5789 + }, + { + "epoch": 0.99, + "learning_rate": 1.566174790301286e-05, + "loss": 0.4965, + "step": 5790 + }, + { + "epoch": 0.99, + "learning_rate": 1.5660229977857578e-05, + "loss": 0.4985, + "step": 5791 + }, + { + "epoch": 0.99, + "learning_rate": 1.565871186078025e-05, + "loss": 0.4905, + "step": 5792 + }, + { + "epoch": 0.99, + "learning_rate": 1.565719355183236e-05, + "loss": 0.5189, + "step": 5793 + }, + { + "epoch": 0.99, + "learning_rate": 1.5655675051065382e-05, + "loss": 0.5096, + "step": 5794 + }, + { + "epoch": 0.99, + "learning_rate": 1.5654156358530808e-05, + "loss": 0.5435, + "step": 5795 + }, + { + "epoch": 0.99, + "learning_rate": 1.565263747428013e-05, + "loss": 0.4978, + "step": 5796 + }, + { + "epoch": 0.99, + "learning_rate": 1.565111839836485e-05, + "loss": 0.4861, + "step": 5797 + }, + { + "epoch": 0.99, + "learning_rate": 1.5649599130836474e-05, + "loss": 0.4897, + "step": 5798 + }, + { + "epoch": 0.99, + "learning_rate": 1.5648079671746522e-05, + "loss": 0.4629, + "step": 5799 + }, + { + "epoch": 0.99, + "learning_rate": 1.5646560021146507e-05, + "loss": 0.5322, + "step": 5800 + }, + { + "epoch": 0.99, + "learning_rate": 1.564504017908796e-05, + "loss": 0.4641, + "step": 5801 + }, + { + "epoch": 0.99, + "learning_rate": 1.564352014562241e-05, + "loss": 0.4749, + "step": 5802 + }, + { + "epoch": 0.99, + "learning_rate": 1.5641999920801403e-05, + "loss": 0.4796, + "step": 5803 + }, + { + "epoch": 0.99, + "learning_rate": 1.5640479504676483e-05, + "loss": 0.4993, + "step": 5804 + }, + { + "epoch": 0.99, + "learning_rate": 1.5638958897299205e-05, + "loss": 0.5285, + "step": 5805 + }, + { + "epoch": 0.99, + "learning_rate": 1.5637438098721122e-05, + "loss": 0.4916, + "step": 5806 + }, + { + "epoch": 0.99, + "learning_rate": 1.5635917108993805e-05, + "loss": 0.5458, + "step": 5807 + }, + { + "epoch": 0.99, + "learning_rate": 1.563439592816882e-05, + "loss": 0.485, + "step": 5808 + }, + { + "epoch": 0.99, + "learning_rate": 1.563287455629776e-05, + "loss": 0.4966, + "step": 5809 + }, + { + "epoch": 0.99, + "learning_rate": 1.5631352993432197e-05, + "loss": 0.4723, + "step": 5810 + }, + { + "epoch": 0.99, + "learning_rate": 1.562983123962373e-05, + "loss": 0.486, + "step": 5811 + }, + { + "epoch": 0.99, + "learning_rate": 1.5628309294923952e-05, + "loss": 0.4603, + "step": 5812 + }, + { + "epoch": 0.99, + "learning_rate": 1.5626787159384474e-05, + "loss": 0.5243, + "step": 5813 + }, + { + "epoch": 0.99, + "learning_rate": 1.56252648330569e-05, + "loss": 0.4671, + "step": 5814 + }, + { + "epoch": 0.99, + "learning_rate": 1.562374231599285e-05, + "loss": 0.4965, + "step": 5815 + }, + { + "epoch": 0.99, + "learning_rate": 1.5622219608243954e-05, + "loss": 0.4792, + "step": 5816 + }, + { + "epoch": 0.99, + "learning_rate": 1.5620696709861833e-05, + "loss": 0.5252, + "step": 5817 + }, + { + "epoch": 0.99, + "learning_rate": 1.5619173620898133e-05, + "loss": 0.5375, + "step": 5818 + }, + { + "epoch": 0.99, + "learning_rate": 1.561765034140449e-05, + "loss": 0.5283, + "step": 5819 + }, + { + "epoch": 0.99, + "learning_rate": 1.561612687143256e-05, + "loss": 0.4474, + "step": 5820 + }, + { + "epoch": 0.99, + "learning_rate": 1.5614603211033996e-05, + "loss": 0.4987, + "step": 5821 + }, + { + "epoch": 0.99, + "learning_rate": 1.561307936026046e-05, + "loss": 0.4948, + "step": 5822 + }, + { + "epoch": 0.99, + "learning_rate": 1.561155531916363e-05, + "loss": 0.462, + "step": 5823 + }, + { + "epoch": 0.99, + "learning_rate": 1.561003108779517e-05, + "loss": 0.5018, + "step": 5824 + }, + { + "epoch": 0.99, + "learning_rate": 1.560850666620677e-05, + "loss": 0.4643, + "step": 5825 + }, + { + "epoch": 0.99, + "learning_rate": 1.5606982054450113e-05, + "loss": 0.5005, + "step": 5826 + }, + { + "epoch": 0.99, + "learning_rate": 1.5605457252576897e-05, + "loss": 0.5048, + "step": 5827 + }, + { + "epoch": 0.99, + "learning_rate": 1.5603932260638827e-05, + "loss": 0.5104, + "step": 5828 + }, + { + "epoch": 0.99, + "learning_rate": 1.560240707868761e-05, + "loss": 0.5216, + "step": 5829 + }, + { + "epoch": 0.99, + "learning_rate": 1.5600881706774953e-05, + "loss": 0.504, + "step": 5830 + }, + { + "epoch": 0.99, + "learning_rate": 1.5599356144952587e-05, + "loss": 0.4775, + "step": 5831 + }, + { + "epoch": 0.99, + "learning_rate": 1.5597830393272234e-05, + "loss": 0.5122, + "step": 5832 + }, + { + "epoch": 0.99, + "learning_rate": 1.5596304451785625e-05, + "loss": 0.5074, + "step": 5833 + }, + { + "epoch": 0.99, + "learning_rate": 1.559477832054451e-05, + "loss": 0.4799, + "step": 5834 + }, + { + "epoch": 1.0, + "learning_rate": 1.5593251999600628e-05, + "loss": 0.4992, + "step": 5835 + }, + { + "epoch": 1.0, + "learning_rate": 1.559172548900573e-05, + "loss": 0.4515, + "step": 5836 + }, + { + "epoch": 1.0, + "learning_rate": 1.5590198788811585e-05, + "loss": 0.4755, + "step": 5837 + }, + { + "epoch": 1.0, + "learning_rate": 1.5588671899069948e-05, + "loss": 0.5369, + "step": 5838 + }, + { + "epoch": 1.0, + "learning_rate": 1.55871448198326e-05, + "loss": 0.4684, + "step": 5839 + }, + { + "epoch": 1.0, + "learning_rate": 1.5585617551151317e-05, + "loss": 0.4783, + "step": 5840 + }, + { + "epoch": 1.0, + "learning_rate": 1.558409009307788e-05, + "loss": 0.5128, + "step": 5841 + }, + { + "epoch": 1.0, + "learning_rate": 1.558256244566409e-05, + "loss": 0.5476, + "step": 5842 + }, + { + "epoch": 1.0, + "learning_rate": 1.558103460896174e-05, + "loss": 0.515, + "step": 5843 + }, + { + "epoch": 1.0, + "learning_rate": 1.557950658302263e-05, + "loss": 0.5359, + "step": 5844 + }, + { + "epoch": 1.0, + "learning_rate": 1.5577978367898575e-05, + "loss": 0.498, + "step": 5845 + }, + { + "epoch": 1.0, + "learning_rate": 1.5576449963641394e-05, + "loss": 0.4853, + "step": 5846 + }, + { + "epoch": 1.0, + "learning_rate": 1.5574921370302912e-05, + "loss": 0.5174, + "step": 5847 + }, + { + "epoch": 1.0, + "learning_rate": 1.5573392587934954e-05, + "loss": 0.5056, + "step": 5848 + }, + { + "epoch": 1.0, + "learning_rate": 1.557186361658936e-05, + "loss": 0.4901, + "step": 5849 + }, + { + "epoch": 1.0, + "learning_rate": 1.5570334456317972e-05, + "loss": 0.4887, + "step": 5850 + }, + { + "epoch": 1.0, + "learning_rate": 1.556880510717264e-05, + "loss": 0.4829, + "step": 5851 + }, + { + "epoch": 1.0, + "learning_rate": 1.5567275569205216e-05, + "loss": 0.5136, + "step": 5852 + }, + { + "epoch": 1.0, + "learning_rate": 1.5565745842467567e-05, + "loss": 0.4997, + "step": 5853 + }, + { + "epoch": 1.0, + "learning_rate": 1.5564215927011567e-05, + "loss": 0.4977, + "step": 5854 + }, + { + "epoch": 1.0, + "learning_rate": 1.5562685822889078e-05, + "loss": 0.4962, + "step": 5855 + }, + { + "epoch": 1.0, + "learning_rate": 1.5561155530151985e-05, + "loss": 0.4834, + "step": 5856 + }, + { + "epoch": 1.0, + "learning_rate": 1.5559625048852182e-05, + "loss": 0.4712, + "step": 5857 + }, + { + "epoch": 1.0, + "learning_rate": 1.5558094379041558e-05, + "loss": 0.4838, + "step": 5858 + }, + { + "epoch": 1.0, + "learning_rate": 1.555656352077202e-05, + "loss": 0.5168, + "step": 5859 + }, + { + "epoch": 1.0, + "learning_rate": 1.5555032474095464e-05, + "loss": 0.4696, + "step": 5860 + }, + { + "epoch": 1.0, + "learning_rate": 1.5553501239063813e-05, + "loss": 0.4395, + "step": 5861 + }, + { + "epoch": 1.0, + "learning_rate": 1.5551969815728983e-05, + "loss": 0.5154, + "step": 5862 + }, + { + "epoch": 1.0, + "learning_rate": 1.55504382041429e-05, + "loss": 0.5074, + "step": 5863 + }, + { + "epoch": 1.0, + "learning_rate": 1.5548906404357492e-05, + "loss": 0.4608, + "step": 5864 + }, + { + "epoch": 1.0, + "learning_rate": 1.5547374416424712e-05, + "loss": 0.452, + "step": 5865 + }, + { + "epoch": 1.0, + "learning_rate": 1.554584224039649e-05, + "loss": 0.4298, + "step": 5866 + }, + { + "epoch": 1.0, + "learning_rate": 1.5544309876324787e-05, + "loss": 0.4495, + "step": 5867 + }, + { + "epoch": 1.0, + "learning_rate": 1.5542777324261556e-05, + "loss": 0.4225, + "step": 5868 + }, + { + "epoch": 1.0, + "learning_rate": 1.5541244584258763e-05, + "loss": 0.4532, + "step": 5869 + }, + { + "epoch": 1.0, + "learning_rate": 1.553971165636838e-05, + "loss": 0.3927, + "step": 5870 + }, + { + "epoch": 1.0, + "learning_rate": 1.553817854064238e-05, + "loss": 0.4641, + "step": 5871 + }, + { + "epoch": 1.0, + "learning_rate": 1.5536645237132755e-05, + "loss": 0.4496, + "step": 5872 + }, + { + "epoch": 1.0, + "learning_rate": 1.553511174589149e-05, + "loss": 0.4155, + "step": 5873 + }, + { + "epoch": 1.0, + "learning_rate": 1.5533578066970575e-05, + "loss": 0.4013, + "step": 5874 + }, + { + "epoch": 1.0, + "learning_rate": 1.5532044200422022e-05, + "loss": 0.4346, + "step": 5875 + }, + { + "epoch": 1.0, + "learning_rate": 1.5530510146297834e-05, + "loss": 0.4279, + "step": 5876 + }, + { + "epoch": 1.0, + "learning_rate": 1.5528975904650034e-05, + "loss": 0.4295, + "step": 5877 + }, + { + "epoch": 1.0, + "learning_rate": 1.5527441475530634e-05, + "loss": 0.4279, + "step": 5878 + }, + { + "epoch": 1.0, + "learning_rate": 1.552590685899167e-05, + "loss": 0.4373, + "step": 5879 + }, + { + "epoch": 1.0, + "learning_rate": 1.5524372055085168e-05, + "loss": 0.4519, + "step": 5880 + }, + { + "epoch": 1.0, + "learning_rate": 1.5522837063863175e-05, + "loss": 0.3929, + "step": 5881 + }, + { + "epoch": 1.0, + "learning_rate": 1.552130188537774e-05, + "loss": 0.4235, + "step": 5882 + }, + { + "epoch": 1.0, + "learning_rate": 1.551976651968091e-05, + "loss": 0.4325, + "step": 5883 + }, + { + "epoch": 1.0, + "learning_rate": 1.5518230966824753e-05, + "loss": 0.4319, + "step": 5884 + }, + { + "epoch": 1.0, + "learning_rate": 1.5516695226861325e-05, + "loss": 0.4319, + "step": 5885 + }, + { + "epoch": 1.0, + "learning_rate": 1.551515929984271e-05, + "loss": 0.4013, + "step": 5886 + }, + { + "epoch": 1.0, + "learning_rate": 1.5513623185820973e-05, + "loss": 0.4415, + "step": 5887 + }, + { + "epoch": 1.0, + "learning_rate": 1.5512086884848213e-05, + "loss": 0.428, + "step": 5888 + }, + { + "epoch": 1.0, + "learning_rate": 1.5510550396976512e-05, + "loss": 0.424, + "step": 5889 + }, + { + "epoch": 1.0, + "learning_rate": 1.550901372225797e-05, + "loss": 0.4263, + "step": 5890 + }, + { + "epoch": 1.0, + "learning_rate": 1.5507476860744697e-05, + "loss": 0.4003, + "step": 5891 + }, + { + "epoch": 1.0, + "learning_rate": 1.5505939812488796e-05, + "loss": 0.4193, + "step": 5892 + }, + { + "epoch": 1.01, + "learning_rate": 1.5504402577542387e-05, + "loss": 0.4343, + "step": 5893 + }, + { + "epoch": 1.01, + "learning_rate": 1.5502865155957595e-05, + "loss": 0.4646, + "step": 5894 + }, + { + "epoch": 1.01, + "learning_rate": 1.5501327547786545e-05, + "loss": 0.4444, + "step": 5895 + }, + { + "epoch": 1.01, + "learning_rate": 1.5499789753081378e-05, + "loss": 0.4502, + "step": 5896 + }, + { + "epoch": 1.01, + "learning_rate": 1.549825177189423e-05, + "loss": 0.4778, + "step": 5897 + }, + { + "epoch": 1.01, + "learning_rate": 1.5496713604277254e-05, + "loss": 0.4251, + "step": 5898 + }, + { + "epoch": 1.01, + "learning_rate": 1.5495175250282605e-05, + "loss": 0.4261, + "step": 5899 + }, + { + "epoch": 1.01, + "learning_rate": 1.5493636709962442e-05, + "loss": 0.4357, + "step": 5900 + }, + { + "epoch": 1.01, + "learning_rate": 1.5492097983368933e-05, + "loss": 0.4347, + "step": 5901 + }, + { + "epoch": 1.01, + "learning_rate": 1.5490559070554255e-05, + "loss": 0.4509, + "step": 5902 + }, + { + "epoch": 1.01, + "learning_rate": 1.5489019971570588e-05, + "loss": 0.4325, + "step": 5903 + }, + { + "epoch": 1.01, + "learning_rate": 1.548748068647011e-05, + "loss": 0.4474, + "step": 5904 + }, + { + "epoch": 1.01, + "learning_rate": 1.5485941215305017e-05, + "loss": 0.4157, + "step": 5905 + }, + { + "epoch": 1.01, + "learning_rate": 1.5484401558127515e-05, + "loss": 0.4379, + "step": 5906 + }, + { + "epoch": 1.01, + "learning_rate": 1.5482861714989808e-05, + "loss": 0.4386, + "step": 5907 + }, + { + "epoch": 1.01, + "learning_rate": 1.54813216859441e-05, + "loss": 0.4074, + "step": 5908 + }, + { + "epoch": 1.01, + "learning_rate": 1.5479781471042613e-05, + "loss": 0.4538, + "step": 5909 + }, + { + "epoch": 1.01, + "learning_rate": 1.5478241070337574e-05, + "loss": 0.4458, + "step": 5910 + }, + { + "epoch": 1.01, + "learning_rate": 1.547670048388121e-05, + "loss": 0.389, + "step": 5911 + }, + { + "epoch": 1.01, + "learning_rate": 1.5475159711725758e-05, + "loss": 0.4099, + "step": 5912 + }, + { + "epoch": 1.01, + "learning_rate": 1.5473618753923463e-05, + "loss": 0.4074, + "step": 5913 + }, + { + "epoch": 1.01, + "learning_rate": 1.5472077610526574e-05, + "loss": 0.4315, + "step": 5914 + }, + { + "epoch": 1.01, + "learning_rate": 1.5470536281587344e-05, + "loss": 0.3973, + "step": 5915 + }, + { + "epoch": 1.01, + "learning_rate": 1.546899476715804e-05, + "loss": 0.43, + "step": 5916 + }, + { + "epoch": 1.01, + "learning_rate": 1.5467453067290925e-05, + "loss": 0.445, + "step": 5917 + }, + { + "epoch": 1.01, + "learning_rate": 1.546591118203828e-05, + "loss": 0.4111, + "step": 5918 + }, + { + "epoch": 1.01, + "learning_rate": 1.546436911145238e-05, + "loss": 0.4324, + "step": 5919 + }, + { + "epoch": 1.01, + "learning_rate": 1.546282685558551e-05, + "loss": 0.4485, + "step": 5920 + }, + { + "epoch": 1.01, + "learning_rate": 1.5461284414489972e-05, + "loss": 0.4106, + "step": 5921 + }, + { + "epoch": 1.01, + "learning_rate": 1.5459741788218062e-05, + "loss": 0.4137, + "step": 5922 + }, + { + "epoch": 1.01, + "learning_rate": 1.5458198976822086e-05, + "loss": 0.4367, + "step": 5923 + }, + { + "epoch": 1.01, + "learning_rate": 1.5456655980354352e-05, + "loss": 0.4158, + "step": 5924 + }, + { + "epoch": 1.01, + "learning_rate": 1.545511279886718e-05, + "loss": 0.4337, + "step": 5925 + }, + { + "epoch": 1.01, + "learning_rate": 1.5453569432412904e-05, + "loss": 0.4599, + "step": 5926 + }, + { + "epoch": 1.01, + "learning_rate": 1.5452025881043846e-05, + "loss": 0.4573, + "step": 5927 + }, + { + "epoch": 1.01, + "learning_rate": 1.5450482144812342e-05, + "loss": 0.4386, + "step": 5928 + }, + { + "epoch": 1.01, + "learning_rate": 1.544893822377074e-05, + "loss": 0.4516, + "step": 5929 + }, + { + "epoch": 1.01, + "learning_rate": 1.5447394117971392e-05, + "loss": 0.4154, + "step": 5930 + }, + { + "epoch": 1.01, + "learning_rate": 1.544584982746665e-05, + "loss": 0.4277, + "step": 5931 + }, + { + "epoch": 1.01, + "learning_rate": 1.5444305352308874e-05, + "loss": 0.4302, + "step": 5932 + }, + { + "epoch": 1.01, + "learning_rate": 1.5442760692550443e-05, + "loss": 0.4262, + "step": 5933 + }, + { + "epoch": 1.01, + "learning_rate": 1.544121584824372e-05, + "loss": 0.4644, + "step": 5934 + }, + { + "epoch": 1.01, + "learning_rate": 1.543967081944109e-05, + "loss": 0.4475, + "step": 5935 + }, + { + "epoch": 1.01, + "learning_rate": 1.543812560619495e-05, + "loss": 0.4308, + "step": 5936 + }, + { + "epoch": 1.01, + "learning_rate": 1.5436580208557678e-05, + "loss": 0.3842, + "step": 5937 + }, + { + "epoch": 1.01, + "learning_rate": 1.5435034626581683e-05, + "loss": 0.386, + "step": 5938 + }, + { + "epoch": 1.01, + "learning_rate": 1.543348886031937e-05, + "loss": 0.4108, + "step": 5939 + }, + { + "epoch": 1.01, + "learning_rate": 1.5431942909823153e-05, + "loss": 0.439, + "step": 5940 + }, + { + "epoch": 1.01, + "learning_rate": 1.543039677514545e-05, + "loss": 0.431, + "step": 5941 + }, + { + "epoch": 1.01, + "learning_rate": 1.5428850456338682e-05, + "loss": 0.3973, + "step": 5942 + }, + { + "epoch": 1.01, + "learning_rate": 1.5427303953455284e-05, + "loss": 0.402, + "step": 5943 + }, + { + "epoch": 1.01, + "learning_rate": 1.5425757266547695e-05, + "loss": 0.4429, + "step": 5944 + }, + { + "epoch": 1.01, + "learning_rate": 1.5424210395668352e-05, + "loss": 0.3946, + "step": 5945 + }, + { + "epoch": 1.01, + "learning_rate": 1.542266334086971e-05, + "loss": 0.4371, + "step": 5946 + }, + { + "epoch": 1.01, + "learning_rate": 1.5421116102204226e-05, + "loss": 0.427, + "step": 5947 + }, + { + "epoch": 1.01, + "learning_rate": 1.5419568679724363e-05, + "loss": 0.4146, + "step": 5948 + }, + { + "epoch": 1.01, + "learning_rate": 1.541802107348258e-05, + "loss": 0.4322, + "step": 5949 + }, + { + "epoch": 1.01, + "learning_rate": 1.5416473283531367e-05, + "loss": 0.429, + "step": 5950 + }, + { + "epoch": 1.01, + "learning_rate": 1.541492530992319e-05, + "loss": 0.4506, + "step": 5951 + }, + { + "epoch": 1.02, + "learning_rate": 1.5413377152710548e-05, + "loss": 0.429, + "step": 5952 + }, + { + "epoch": 1.02, + "learning_rate": 1.541182881194593e-05, + "loss": 0.4253, + "step": 5953 + }, + { + "epoch": 1.02, + "learning_rate": 1.5410280287681834e-05, + "loss": 0.4259, + "step": 5954 + }, + { + "epoch": 1.02, + "learning_rate": 1.5408731579970772e-05, + "loss": 0.4141, + "step": 5955 + }, + { + "epoch": 1.02, + "learning_rate": 1.5407182688865246e-05, + "loss": 0.4444, + "step": 5956 + }, + { + "epoch": 1.02, + "learning_rate": 1.5405633614417785e-05, + "loss": 0.4176, + "step": 5957 + }, + { + "epoch": 1.02, + "learning_rate": 1.540408435668091e-05, + "loss": 0.4426, + "step": 5958 + }, + { + "epoch": 1.02, + "learning_rate": 1.540253491570715e-05, + "loss": 0.4224, + "step": 5959 + }, + { + "epoch": 1.02, + "learning_rate": 1.540098529154904e-05, + "loss": 0.4129, + "step": 5960 + }, + { + "epoch": 1.02, + "learning_rate": 1.539943548425913e-05, + "loss": 0.428, + "step": 5961 + }, + { + "epoch": 1.02, + "learning_rate": 1.5397885493889962e-05, + "loss": 0.4487, + "step": 5962 + }, + { + "epoch": 1.02, + "learning_rate": 1.53963353204941e-05, + "loss": 0.4405, + "step": 5963 + }, + { + "epoch": 1.02, + "learning_rate": 1.53947849641241e-05, + "loss": 0.4419, + "step": 5964 + }, + { + "epoch": 1.02, + "learning_rate": 1.539323442483253e-05, + "loss": 0.4411, + "step": 5965 + }, + { + "epoch": 1.02, + "learning_rate": 1.5391683702671967e-05, + "loss": 0.4316, + "step": 5966 + }, + { + "epoch": 1.02, + "learning_rate": 1.539013279769499e-05, + "loss": 0.4289, + "step": 5967 + }, + { + "epoch": 1.02, + "learning_rate": 1.5388581709954185e-05, + "loss": 0.4012, + "step": 5968 + }, + { + "epoch": 1.02, + "learning_rate": 1.5387030439502146e-05, + "loss": 0.4187, + "step": 5969 + }, + { + "epoch": 1.02, + "learning_rate": 1.538547898639147e-05, + "loss": 0.4301, + "step": 5970 + }, + { + "epoch": 1.02, + "learning_rate": 1.538392735067477e-05, + "loss": 0.4046, + "step": 5971 + }, + { + "epoch": 1.02, + "learning_rate": 1.5382375532404648e-05, + "loss": 0.4072, + "step": 5972 + }, + { + "epoch": 1.02, + "learning_rate": 1.5380823531633727e-05, + "loss": 0.483, + "step": 5973 + }, + { + "epoch": 1.02, + "learning_rate": 1.5379271348414634e-05, + "loss": 0.4145, + "step": 5974 + }, + { + "epoch": 1.02, + "learning_rate": 1.537771898279999e-05, + "loss": 0.414, + "step": 5975 + }, + { + "epoch": 1.02, + "learning_rate": 1.537616643484243e-05, + "loss": 0.4116, + "step": 5976 + }, + { + "epoch": 1.02, + "learning_rate": 1.5374613704594605e-05, + "loss": 0.4302, + "step": 5977 + }, + { + "epoch": 1.02, + "learning_rate": 1.5373060792109167e-05, + "loss": 0.4215, + "step": 5978 + }, + { + "epoch": 1.02, + "learning_rate": 1.537150769743876e-05, + "loss": 0.4331, + "step": 5979 + }, + { + "epoch": 1.02, + "learning_rate": 1.5369954420636048e-05, + "loss": 0.4635, + "step": 5980 + }, + { + "epoch": 1.02, + "learning_rate": 1.53684009617537e-05, + "loss": 0.4363, + "step": 5981 + }, + { + "epoch": 1.02, + "learning_rate": 1.536684732084439e-05, + "loss": 0.4141, + "step": 5982 + }, + { + "epoch": 1.02, + "learning_rate": 1.5365293497960798e-05, + "loss": 0.4291, + "step": 5983 + }, + { + "epoch": 1.02, + "learning_rate": 1.5363739493155602e-05, + "loss": 0.4182, + "step": 5984 + }, + { + "epoch": 1.02, + "learning_rate": 1.5362185306481507e-05, + "loss": 0.4359, + "step": 5985 + }, + { + "epoch": 1.02, + "learning_rate": 1.53606309379912e-05, + "loss": 0.4295, + "step": 5986 + }, + { + "epoch": 1.02, + "learning_rate": 1.535907638773739e-05, + "loss": 0.4352, + "step": 5987 + }, + { + "epoch": 1.02, + "learning_rate": 1.5357521655772783e-05, + "loss": 0.4224, + "step": 5988 + }, + { + "epoch": 1.02, + "learning_rate": 1.5355966742150103e-05, + "loss": 0.4215, + "step": 5989 + }, + { + "epoch": 1.02, + "learning_rate": 1.5354411646922067e-05, + "loss": 0.4253, + "step": 5990 + }, + { + "epoch": 1.02, + "learning_rate": 1.5352856370141404e-05, + "loss": 0.4076, + "step": 5991 + }, + { + "epoch": 1.02, + "learning_rate": 1.5351300911860848e-05, + "loss": 0.413, + "step": 5992 + }, + { + "epoch": 1.02, + "learning_rate": 1.5349745272133146e-05, + "loss": 0.4099, + "step": 5993 + }, + { + "epoch": 1.02, + "learning_rate": 1.5348189451011038e-05, + "loss": 0.4298, + "step": 5994 + }, + { + "epoch": 1.02, + "learning_rate": 1.534663344854728e-05, + "loss": 0.4118, + "step": 5995 + }, + { + "epoch": 1.02, + "learning_rate": 1.534507726479464e-05, + "loss": 0.4219, + "step": 5996 + }, + { + "epoch": 1.02, + "learning_rate": 1.534352089980587e-05, + "loss": 0.402, + "step": 5997 + }, + { + "epoch": 1.02, + "learning_rate": 1.5341964353633746e-05, + "loss": 0.4154, + "step": 5998 + }, + { + "epoch": 1.02, + "learning_rate": 1.534040762633105e-05, + "loss": 0.4757, + "step": 5999 + }, + { + "epoch": 1.02, + "learning_rate": 1.5338850717950564e-05, + "loss": 0.3858, + "step": 6000 + }, + { + "epoch": 1.02, + "learning_rate": 1.533729362854508e-05, + "loss": 0.4262, + "step": 6001 + }, + { + "epoch": 1.02, + "learning_rate": 1.533573635816739e-05, + "loss": 0.4137, + "step": 6002 + }, + { + "epoch": 1.02, + "learning_rate": 1.53341789068703e-05, + "loss": 0.4061, + "step": 6003 + }, + { + "epoch": 1.02, + "learning_rate": 1.533262127470662e-05, + "loss": 0.4324, + "step": 6004 + }, + { + "epoch": 1.02, + "learning_rate": 1.5331063461729157e-05, + "loss": 0.4287, + "step": 6005 + }, + { + "epoch": 1.02, + "learning_rate": 1.532950546799074e-05, + "loss": 0.4126, + "step": 6006 + }, + { + "epoch": 1.02, + "learning_rate": 1.5327947293544196e-05, + "loss": 0.4069, + "step": 6007 + }, + { + "epoch": 1.02, + "learning_rate": 1.5326388938442354e-05, + "loss": 0.4533, + "step": 6008 + }, + { + "epoch": 1.02, + "learning_rate": 1.5324830402738056e-05, + "loss": 0.416, + "step": 6009 + }, + { + "epoch": 1.02, + "learning_rate": 1.5323271686484144e-05, + "loss": 0.4347, + "step": 6010 + }, + { + "epoch": 1.03, + "learning_rate": 1.5321712789733475e-05, + "loss": 0.4419, + "step": 6011 + }, + { + "epoch": 1.03, + "learning_rate": 1.5320153712538902e-05, + "loss": 0.4318, + "step": 6012 + }, + { + "epoch": 1.03, + "learning_rate": 1.5318594454953295e-05, + "loss": 0.4003, + "step": 6013 + }, + { + "epoch": 1.03, + "learning_rate": 1.5317035017029514e-05, + "loss": 0.4689, + "step": 6014 + }, + { + "epoch": 1.03, + "learning_rate": 1.5315475398820443e-05, + "loss": 0.4147, + "step": 6015 + }, + { + "epoch": 1.03, + "learning_rate": 1.5313915600378962e-05, + "loss": 0.4304, + "step": 6016 + }, + { + "epoch": 1.03, + "learning_rate": 1.5312355621757955e-05, + "loss": 0.4259, + "step": 6017 + }, + { + "epoch": 1.03, + "learning_rate": 1.5310795463010326e-05, + "loss": 0.413, + "step": 6018 + }, + { + "epoch": 1.03, + "learning_rate": 1.5309235124188967e-05, + "loss": 0.4375, + "step": 6019 + }, + { + "epoch": 1.03, + "learning_rate": 1.5307674605346785e-05, + "loss": 0.4492, + "step": 6020 + }, + { + "epoch": 1.03, + "learning_rate": 1.5306113906536698e-05, + "loss": 0.4164, + "step": 6021 + }, + { + "epoch": 1.03, + "learning_rate": 1.5304553027811623e-05, + "loss": 0.3834, + "step": 6022 + }, + { + "epoch": 1.03, + "learning_rate": 1.5302991969224485e-05, + "loss": 0.4337, + "step": 6023 + }, + { + "epoch": 1.03, + "learning_rate": 1.5301430730828207e-05, + "loss": 0.4092, + "step": 6024 + }, + { + "epoch": 1.03, + "learning_rate": 1.5299869312675735e-05, + "loss": 0.4287, + "step": 6025 + }, + { + "epoch": 1.03, + "learning_rate": 1.5298307714820013e-05, + "loss": 0.4207, + "step": 6026 + }, + { + "epoch": 1.03, + "learning_rate": 1.529674593731399e-05, + "loss": 0.3975, + "step": 6027 + }, + { + "epoch": 1.03, + "learning_rate": 1.5295183980210612e-05, + "loss": 0.4201, + "step": 6028 + }, + { + "epoch": 1.03, + "learning_rate": 1.529362184356285e-05, + "loss": 0.4325, + "step": 6029 + }, + { + "epoch": 1.03, + "learning_rate": 1.5292059527423665e-05, + "loss": 0.4664, + "step": 6030 + }, + { + "epoch": 1.03, + "learning_rate": 1.529049703184604e-05, + "loss": 0.4002, + "step": 6031 + }, + { + "epoch": 1.03, + "learning_rate": 1.5288934356882945e-05, + "loss": 0.4702, + "step": 6032 + }, + { + "epoch": 1.03, + "learning_rate": 1.528737150258737e-05, + "loss": 0.4032, + "step": 6033 + }, + { + "epoch": 1.03, + "learning_rate": 1.528580846901231e-05, + "loss": 0.4058, + "step": 6034 + }, + { + "epoch": 1.03, + "learning_rate": 1.5284245256210758e-05, + "loss": 0.4332, + "step": 6035 + }, + { + "epoch": 1.03, + "learning_rate": 1.528268186423572e-05, + "loss": 0.4269, + "step": 6036 + }, + { + "epoch": 1.03, + "learning_rate": 1.52811182931402e-05, + "loss": 0.4416, + "step": 6037 + }, + { + "epoch": 1.03, + "learning_rate": 1.527955454297723e-05, + "loss": 0.4114, + "step": 6038 + }, + { + "epoch": 1.03, + "learning_rate": 1.5277990613799814e-05, + "loss": 0.4305, + "step": 6039 + }, + { + "epoch": 1.03, + "learning_rate": 1.527642650566099e-05, + "loss": 0.4072, + "step": 6040 + }, + { + "epoch": 1.03, + "learning_rate": 1.5274862218613794e-05, + "loss": 0.4598, + "step": 6041 + }, + { + "epoch": 1.03, + "learning_rate": 1.527329775271126e-05, + "loss": 0.4138, + "step": 6042 + }, + { + "epoch": 1.03, + "learning_rate": 1.527173310800644e-05, + "loss": 0.4555, + "step": 6043 + }, + { + "epoch": 1.03, + "learning_rate": 1.5270168284552385e-05, + "loss": 0.4276, + "step": 6044 + }, + { + "epoch": 1.03, + "learning_rate": 1.5268603282402146e-05, + "loss": 0.4222, + "step": 6045 + }, + { + "epoch": 1.03, + "learning_rate": 1.52670381016088e-05, + "loss": 0.4291, + "step": 6046 + }, + { + "epoch": 1.03, + "learning_rate": 1.5265472742225415e-05, + "loss": 0.4489, + "step": 6047 + }, + { + "epoch": 1.03, + "learning_rate": 1.5263907204305062e-05, + "loss": 0.4159, + "step": 6048 + }, + { + "epoch": 1.03, + "learning_rate": 1.526234148790083e-05, + "loss": 0.4313, + "step": 6049 + }, + { + "epoch": 1.03, + "learning_rate": 1.52607755930658e-05, + "loss": 0.4535, + "step": 6050 + }, + { + "epoch": 1.03, + "learning_rate": 1.5259209519853074e-05, + "loss": 0.4364, + "step": 6051 + }, + { + "epoch": 1.03, + "learning_rate": 1.5257643268315754e-05, + "loss": 0.4187, + "step": 6052 + }, + { + "epoch": 1.03, + "learning_rate": 1.5256076838506943e-05, + "loss": 0.4177, + "step": 6053 + }, + { + "epoch": 1.03, + "learning_rate": 1.5254510230479756e-05, + "loss": 0.4222, + "step": 6054 + }, + { + "epoch": 1.03, + "learning_rate": 1.5252943444287307e-05, + "loss": 0.4456, + "step": 6055 + }, + { + "epoch": 1.03, + "learning_rate": 1.525137647998273e-05, + "loss": 0.4063, + "step": 6056 + }, + { + "epoch": 1.03, + "learning_rate": 1.5249809337619151e-05, + "loss": 0.4128, + "step": 6057 + }, + { + "epoch": 1.03, + "learning_rate": 1.5248242017249706e-05, + "loss": 0.4239, + "step": 6058 + }, + { + "epoch": 1.03, + "learning_rate": 1.524667451892754e-05, + "loss": 0.4262, + "step": 6059 + }, + { + "epoch": 1.03, + "learning_rate": 1.5245106842705805e-05, + "loss": 0.4112, + "step": 6060 + }, + { + "epoch": 1.03, + "learning_rate": 1.5243538988637653e-05, + "loss": 0.4316, + "step": 6061 + }, + { + "epoch": 1.03, + "learning_rate": 1.5241970956776249e-05, + "loss": 0.4119, + "step": 6062 + }, + { + "epoch": 1.03, + "learning_rate": 1.5240402747174754e-05, + "loss": 0.4445, + "step": 6063 + }, + { + "epoch": 1.03, + "learning_rate": 1.5238834359886347e-05, + "loss": 0.3898, + "step": 6064 + }, + { + "epoch": 1.03, + "learning_rate": 1.5237265794964207e-05, + "loss": 0.4231, + "step": 6065 + }, + { + "epoch": 1.03, + "learning_rate": 1.5235697052461515e-05, + "loss": 0.4231, + "step": 6066 + }, + { + "epoch": 1.03, + "learning_rate": 1.5234128132431469e-05, + "loss": 0.4041, + "step": 6067 + }, + { + "epoch": 1.03, + "learning_rate": 1.5232559034927263e-05, + "loss": 0.4265, + "step": 6068 + }, + { + "epoch": 1.04, + "learning_rate": 1.5230989760002101e-05, + "loss": 0.4061, + "step": 6069 + }, + { + "epoch": 1.04, + "learning_rate": 1.5229420307709192e-05, + "loss": 0.4534, + "step": 6070 + }, + { + "epoch": 1.04, + "learning_rate": 1.5227850678101752e-05, + "loss": 0.4186, + "step": 6071 + }, + { + "epoch": 1.04, + "learning_rate": 1.5226280871233006e-05, + "loss": 0.3836, + "step": 6072 + }, + { + "epoch": 1.04, + "learning_rate": 1.5224710887156175e-05, + "loss": 0.3952, + "step": 6073 + }, + { + "epoch": 1.04, + "learning_rate": 1.5223140725924494e-05, + "loss": 0.4453, + "step": 6074 + }, + { + "epoch": 1.04, + "learning_rate": 1.5221570387591209e-05, + "loss": 0.4175, + "step": 6075 + }, + { + "epoch": 1.04, + "learning_rate": 1.5219999872209559e-05, + "loss": 0.4437, + "step": 6076 + }, + { + "epoch": 1.04, + "learning_rate": 1.5218429179832798e-05, + "loss": 0.4733, + "step": 6077 + }, + { + "epoch": 1.04, + "learning_rate": 1.5216858310514184e-05, + "loss": 0.3979, + "step": 6078 + }, + { + "epoch": 1.04, + "learning_rate": 1.521528726430698e-05, + "loss": 0.4095, + "step": 6079 + }, + { + "epoch": 1.04, + "learning_rate": 1.5213716041264457e-05, + "loss": 0.4212, + "step": 6080 + }, + { + "epoch": 1.04, + "learning_rate": 1.5212144641439887e-05, + "loss": 0.4189, + "step": 6081 + }, + { + "epoch": 1.04, + "learning_rate": 1.5210573064886553e-05, + "loss": 0.42, + "step": 6082 + }, + { + "epoch": 1.04, + "learning_rate": 1.5209001311657749e-05, + "loss": 0.4179, + "step": 6083 + }, + { + "epoch": 1.04, + "learning_rate": 1.5207429381806762e-05, + "loss": 0.4044, + "step": 6084 + }, + { + "epoch": 1.04, + "learning_rate": 1.5205857275386891e-05, + "loss": 0.4334, + "step": 6085 + }, + { + "epoch": 1.04, + "learning_rate": 1.5204284992451444e-05, + "loss": 0.4431, + "step": 6086 + }, + { + "epoch": 1.04, + "learning_rate": 1.520271253305373e-05, + "loss": 0.4211, + "step": 6087 + }, + { + "epoch": 1.04, + "learning_rate": 1.520113989724707e-05, + "loss": 0.4091, + "step": 6088 + }, + { + "epoch": 1.04, + "learning_rate": 1.5199567085084787e-05, + "loss": 0.4295, + "step": 6089 + }, + { + "epoch": 1.04, + "learning_rate": 1.5197994096620207e-05, + "loss": 0.3945, + "step": 6090 + }, + { + "epoch": 1.04, + "learning_rate": 1.5196420931906672e-05, + "loss": 0.4096, + "step": 6091 + }, + { + "epoch": 1.04, + "learning_rate": 1.519484759099752e-05, + "loss": 0.3973, + "step": 6092 + }, + { + "epoch": 1.04, + "learning_rate": 1.5193274073946094e-05, + "loss": 0.4301, + "step": 6093 + }, + { + "epoch": 1.04, + "learning_rate": 1.5191700380805754e-05, + "loss": 0.4135, + "step": 6094 + }, + { + "epoch": 1.04, + "learning_rate": 1.5190126511629859e-05, + "loss": 0.4426, + "step": 6095 + }, + { + "epoch": 1.04, + "learning_rate": 1.5188552466471768e-05, + "loss": 0.4065, + "step": 6096 + }, + { + "epoch": 1.04, + "learning_rate": 1.5186978245384856e-05, + "loss": 0.468, + "step": 6097 + }, + { + "epoch": 1.04, + "learning_rate": 1.5185403848422503e-05, + "loss": 0.4296, + "step": 6098 + }, + { + "epoch": 1.04, + "learning_rate": 1.5183829275638093e-05, + "loss": 0.4615, + "step": 6099 + }, + { + "epoch": 1.04, + "learning_rate": 1.518225452708501e-05, + "loss": 0.4202, + "step": 6100 + }, + { + "epoch": 1.04, + "learning_rate": 1.5180679602816648e-05, + "loss": 0.4234, + "step": 6101 + }, + { + "epoch": 1.04, + "learning_rate": 1.5179104502886416e-05, + "loss": 0.4565, + "step": 6102 + }, + { + "epoch": 1.04, + "learning_rate": 1.5177529227347717e-05, + "loss": 0.4326, + "step": 6103 + }, + { + "epoch": 1.04, + "learning_rate": 1.517595377625396e-05, + "loss": 0.4369, + "step": 6104 + }, + { + "epoch": 1.04, + "learning_rate": 1.5174378149658568e-05, + "loss": 0.4211, + "step": 6105 + }, + { + "epoch": 1.04, + "learning_rate": 1.517280234761497e-05, + "loss": 0.4226, + "step": 6106 + }, + { + "epoch": 1.04, + "learning_rate": 1.5171226370176589e-05, + "loss": 0.4395, + "step": 6107 + }, + { + "epoch": 1.04, + "learning_rate": 1.5169650217396867e-05, + "loss": 0.4089, + "step": 6108 + }, + { + "epoch": 1.04, + "learning_rate": 1.5168073889329245e-05, + "loss": 0.4305, + "step": 6109 + }, + { + "epoch": 1.04, + "learning_rate": 1.5166497386027173e-05, + "loss": 0.4043, + "step": 6110 + }, + { + "epoch": 1.04, + "learning_rate": 1.5164920707544101e-05, + "loss": 0.4055, + "step": 6111 + }, + { + "epoch": 1.04, + "learning_rate": 1.5163343853933495e-05, + "loss": 0.4388, + "step": 6112 + }, + { + "epoch": 1.04, + "learning_rate": 1.5161766825248823e-05, + "loss": 0.4397, + "step": 6113 + }, + { + "epoch": 1.04, + "learning_rate": 1.5160189621543548e-05, + "loss": 0.4128, + "step": 6114 + }, + { + "epoch": 1.04, + "learning_rate": 1.5158612242871158e-05, + "loss": 0.4084, + "step": 6115 + }, + { + "epoch": 1.04, + "learning_rate": 1.5157034689285137e-05, + "loss": 0.4179, + "step": 6116 + }, + { + "epoch": 1.04, + "learning_rate": 1.5155456960838968e-05, + "loss": 0.4268, + "step": 6117 + }, + { + "epoch": 1.04, + "learning_rate": 1.5153879057586153e-05, + "loss": 0.4285, + "step": 6118 + }, + { + "epoch": 1.04, + "learning_rate": 1.5152300979580192e-05, + "loss": 0.4285, + "step": 6119 + }, + { + "epoch": 1.04, + "learning_rate": 1.5150722726874594e-05, + "loss": 0.417, + "step": 6120 + }, + { + "epoch": 1.04, + "learning_rate": 1.5149144299522874e-05, + "loss": 0.4156, + "step": 6121 + }, + { + "epoch": 1.04, + "learning_rate": 1.5147565697578547e-05, + "loss": 0.4208, + "step": 6122 + }, + { + "epoch": 1.04, + "learning_rate": 1.5145986921095145e-05, + "loss": 0.4207, + "step": 6123 + }, + { + "epoch": 1.04, + "learning_rate": 1.51444079701262e-05, + "loss": 0.4331, + "step": 6124 + }, + { + "epoch": 1.04, + "learning_rate": 1.5142828844725243e-05, + "loss": 0.419, + "step": 6125 + }, + { + "epoch": 1.04, + "learning_rate": 1.5141249544945823e-05, + "loss": 0.4042, + "step": 6126 + }, + { + "epoch": 1.04, + "learning_rate": 1.5139670070841485e-05, + "loss": 0.41, + "step": 6127 + }, + { + "epoch": 1.05, + "learning_rate": 1.5138090422465793e-05, + "loss": 0.4043, + "step": 6128 + }, + { + "epoch": 1.05, + "learning_rate": 1.5136510599872298e-05, + "loss": 0.4173, + "step": 6129 + }, + { + "epoch": 1.05, + "learning_rate": 1.5134930603114572e-05, + "loss": 0.4302, + "step": 6130 + }, + { + "epoch": 1.05, + "learning_rate": 1.5133350432246191e-05, + "loss": 0.4345, + "step": 6131 + }, + { + "epoch": 1.05, + "learning_rate": 1.5131770087320727e-05, + "loss": 0.4562, + "step": 6132 + }, + { + "epoch": 1.05, + "learning_rate": 1.5130189568391774e-05, + "loss": 0.4272, + "step": 6133 + }, + { + "epoch": 1.05, + "learning_rate": 1.5128608875512914e-05, + "loss": 0.4267, + "step": 6134 + }, + { + "epoch": 1.05, + "learning_rate": 1.5127028008737747e-05, + "loss": 0.4275, + "step": 6135 + }, + { + "epoch": 1.05, + "learning_rate": 1.5125446968119878e-05, + "loss": 0.4289, + "step": 6136 + }, + { + "epoch": 1.05, + "learning_rate": 1.5123865753712914e-05, + "loss": 0.4227, + "step": 6137 + }, + { + "epoch": 1.05, + "learning_rate": 1.5122284365570467e-05, + "loss": 0.4181, + "step": 6138 + }, + { + "epoch": 1.05, + "learning_rate": 1.5120702803746162e-05, + "loss": 0.4431, + "step": 6139 + }, + { + "epoch": 1.05, + "learning_rate": 1.511912106829362e-05, + "loss": 0.455, + "step": 6140 + }, + { + "epoch": 1.05, + "learning_rate": 1.5117539159266477e-05, + "loss": 0.4473, + "step": 6141 + }, + { + "epoch": 1.05, + "learning_rate": 1.511595707671837e-05, + "loss": 0.4235, + "step": 6142 + }, + { + "epoch": 1.05, + "learning_rate": 1.5114374820702944e-05, + "loss": 0.4257, + "step": 6143 + }, + { + "epoch": 1.05, + "learning_rate": 1.5112792391273843e-05, + "loss": 0.4193, + "step": 6144 + }, + { + "epoch": 1.05, + "learning_rate": 1.5111209788484729e-05, + "loss": 0.4448, + "step": 6145 + }, + { + "epoch": 1.05, + "learning_rate": 1.5109627012389259e-05, + "loss": 0.4363, + "step": 6146 + }, + { + "epoch": 1.05, + "learning_rate": 1.5108044063041107e-05, + "loss": 0.4318, + "step": 6147 + }, + { + "epoch": 1.05, + "learning_rate": 1.5106460940493942e-05, + "loss": 0.4194, + "step": 6148 + }, + { + "epoch": 1.05, + "learning_rate": 1.5104877644801436e-05, + "loss": 0.4134, + "step": 6149 + }, + { + "epoch": 1.05, + "learning_rate": 1.5103294176017287e-05, + "loss": 0.4049, + "step": 6150 + }, + { + "epoch": 1.05, + "learning_rate": 1.510171053419518e-05, + "loss": 0.4058, + "step": 6151 + }, + { + "epoch": 1.05, + "learning_rate": 1.5100126719388808e-05, + "loss": 0.4235, + "step": 6152 + }, + { + "epoch": 1.05, + "learning_rate": 1.5098542731651877e-05, + "loss": 0.4383, + "step": 6153 + }, + { + "epoch": 1.05, + "learning_rate": 1.5096958571038096e-05, + "loss": 0.4637, + "step": 6154 + }, + { + "epoch": 1.05, + "learning_rate": 1.5095374237601183e-05, + "loss": 0.4025, + "step": 6155 + }, + { + "epoch": 1.05, + "learning_rate": 1.5093789731394852e-05, + "loss": 0.4233, + "step": 6156 + }, + { + "epoch": 1.05, + "learning_rate": 1.509220505247283e-05, + "loss": 0.4733, + "step": 6157 + }, + { + "epoch": 1.05, + "learning_rate": 1.509062020088885e-05, + "loss": 0.4362, + "step": 6158 + }, + { + "epoch": 1.05, + "learning_rate": 1.5089035176696651e-05, + "loss": 0.4472, + "step": 6159 + }, + { + "epoch": 1.05, + "learning_rate": 1.5087449979949974e-05, + "loss": 0.3919, + "step": 6160 + }, + { + "epoch": 1.05, + "learning_rate": 1.508586461070257e-05, + "loss": 0.4247, + "step": 6161 + }, + { + "epoch": 1.05, + "learning_rate": 1.5084279069008194e-05, + "loss": 0.4283, + "step": 6162 + }, + { + "epoch": 1.05, + "learning_rate": 1.5082693354920608e-05, + "loss": 0.4266, + "step": 6163 + }, + { + "epoch": 1.05, + "learning_rate": 1.5081107468493577e-05, + "loss": 0.446, + "step": 6164 + }, + { + "epoch": 1.05, + "learning_rate": 1.5079521409780875e-05, + "loss": 0.43, + "step": 6165 + }, + { + "epoch": 1.05, + "learning_rate": 1.5077935178836281e-05, + "loss": 0.4194, + "step": 6166 + }, + { + "epoch": 1.05, + "learning_rate": 1.5076348775713579e-05, + "loss": 0.4501, + "step": 6167 + }, + { + "epoch": 1.05, + "learning_rate": 1.5074762200466557e-05, + "loss": 0.4352, + "step": 6168 + }, + { + "epoch": 1.05, + "learning_rate": 1.5073175453149017e-05, + "loss": 0.4489, + "step": 6169 + }, + { + "epoch": 1.05, + "learning_rate": 1.5071588533814758e-05, + "loss": 0.4048, + "step": 6170 + }, + { + "epoch": 1.05, + "learning_rate": 1.5070001442517583e-05, + "loss": 0.4541, + "step": 6171 + }, + { + "epoch": 1.05, + "learning_rate": 1.5068414179311316e-05, + "loss": 0.3945, + "step": 6172 + }, + { + "epoch": 1.05, + "learning_rate": 1.5066826744249767e-05, + "loss": 0.4301, + "step": 6173 + }, + { + "epoch": 1.05, + "learning_rate": 1.506523913738677e-05, + "loss": 0.4191, + "step": 6174 + }, + { + "epoch": 1.05, + "learning_rate": 1.5063651358776143e-05, + "loss": 0.4216, + "step": 6175 + }, + { + "epoch": 1.05, + "learning_rate": 1.5062063408471737e-05, + "loss": 0.4439, + "step": 6176 + }, + { + "epoch": 1.05, + "learning_rate": 1.5060475286527388e-05, + "loss": 0.4159, + "step": 6177 + }, + { + "epoch": 1.05, + "learning_rate": 1.5058886992996944e-05, + "loss": 0.4244, + "step": 6178 + }, + { + "epoch": 1.05, + "learning_rate": 1.505729852793426e-05, + "loss": 0.4394, + "step": 6179 + }, + { + "epoch": 1.05, + "learning_rate": 1.5055709891393201e-05, + "loss": 0.4435, + "step": 6180 + }, + { + "epoch": 1.05, + "learning_rate": 1.505412108342763e-05, + "loss": 0.4279, + "step": 6181 + }, + { + "epoch": 1.05, + "learning_rate": 1.5052532104091416e-05, + "loss": 0.4462, + "step": 6182 + }, + { + "epoch": 1.05, + "learning_rate": 1.5050942953438439e-05, + "loss": 0.4314, + "step": 6183 + }, + { + "epoch": 1.05, + "learning_rate": 1.5049353631522582e-05, + "loss": 0.4119, + "step": 6184 + }, + { + "epoch": 1.05, + "learning_rate": 1.5047764138397736e-05, + "loss": 0.4223, + "step": 6185 + }, + { + "epoch": 1.06, + "learning_rate": 1.5046174474117794e-05, + "loss": 0.4188, + "step": 6186 + }, + { + "epoch": 1.06, + "learning_rate": 1.5044584638736659e-05, + "loss": 0.4188, + "step": 6187 + }, + { + "epoch": 1.06, + "learning_rate": 1.5042994632308235e-05, + "loss": 0.4242, + "step": 6188 + }, + { + "epoch": 1.06, + "learning_rate": 1.5041404454886436e-05, + "loss": 0.4412, + "step": 6189 + }, + { + "epoch": 1.06, + "learning_rate": 1.503981410652518e-05, + "loss": 0.4291, + "step": 6190 + }, + { + "epoch": 1.06, + "learning_rate": 1.5038223587278394e-05, + "loss": 0.4322, + "step": 6191 + }, + { + "epoch": 1.06, + "learning_rate": 1.5036632897200005e-05, + "loss": 0.4378, + "step": 6192 + }, + { + "epoch": 1.06, + "learning_rate": 1.5035042036343948e-05, + "loss": 0.3909, + "step": 6193 + }, + { + "epoch": 1.06, + "learning_rate": 1.5033451004764166e-05, + "loss": 0.431, + "step": 6194 + }, + { + "epoch": 1.06, + "learning_rate": 1.5031859802514605e-05, + "loss": 0.4639, + "step": 6195 + }, + { + "epoch": 1.06, + "learning_rate": 1.5030268429649222e-05, + "loss": 0.3974, + "step": 6196 + }, + { + "epoch": 1.06, + "learning_rate": 1.5028676886221968e-05, + "loss": 0.4343, + "step": 6197 + }, + { + "epoch": 1.06, + "learning_rate": 1.5027085172286814e-05, + "loss": 0.4586, + "step": 6198 + }, + { + "epoch": 1.06, + "learning_rate": 1.502549328789773e-05, + "loss": 0.4448, + "step": 6199 + }, + { + "epoch": 1.06, + "learning_rate": 1.5023901233108693e-05, + "loss": 0.4276, + "step": 6200 + }, + { + "epoch": 1.06, + "learning_rate": 1.5022309007973682e-05, + "loss": 0.4344, + "step": 6201 + }, + { + "epoch": 1.06, + "learning_rate": 1.5020716612546684e-05, + "loss": 0.4685, + "step": 6202 + }, + { + "epoch": 1.06, + "learning_rate": 1.5019124046881697e-05, + "loss": 0.4353, + "step": 6203 + }, + { + "epoch": 1.06, + "learning_rate": 1.5017531311032715e-05, + "loss": 0.4273, + "step": 6204 + }, + { + "epoch": 1.06, + "learning_rate": 1.5015938405053748e-05, + "loss": 0.4324, + "step": 6205 + }, + { + "epoch": 1.06, + "learning_rate": 1.50143453289988e-05, + "loss": 0.3963, + "step": 6206 + }, + { + "epoch": 1.06, + "learning_rate": 1.50127520829219e-05, + "loss": 0.4697, + "step": 6207 + }, + { + "epoch": 1.06, + "learning_rate": 1.5011158666877057e-05, + "loss": 0.4071, + "step": 6208 + }, + { + "epoch": 1.06, + "learning_rate": 1.5009565080918305e-05, + "loss": 0.4299, + "step": 6209 + }, + { + "epoch": 1.06, + "learning_rate": 1.5007971325099675e-05, + "loss": 0.4033, + "step": 6210 + }, + { + "epoch": 1.06, + "learning_rate": 1.5006377399475213e-05, + "loss": 0.4398, + "step": 6211 + }, + { + "epoch": 1.06, + "learning_rate": 1.5004783304098963e-05, + "loss": 0.4229, + "step": 6212 + }, + { + "epoch": 1.06, + "learning_rate": 1.500318903902497e-05, + "loss": 0.3973, + "step": 6213 + }, + { + "epoch": 1.06, + "learning_rate": 1.5001594604307295e-05, + "loss": 0.3829, + "step": 6214 + }, + { + "epoch": 1.06, + "learning_rate": 1.5000000000000002e-05, + "loss": 0.4198, + "step": 6215 + }, + { + "epoch": 1.06, + "learning_rate": 1.4998405226157155e-05, + "loss": 0.4198, + "step": 6216 + }, + { + "epoch": 1.06, + "learning_rate": 1.4996810282832832e-05, + "loss": 0.421, + "step": 6217 + }, + { + "epoch": 1.06, + "learning_rate": 1.4995215170081112e-05, + "loss": 0.4125, + "step": 6218 + }, + { + "epoch": 1.06, + "learning_rate": 1.4993619887956084e-05, + "loss": 0.4171, + "step": 6219 + }, + { + "epoch": 1.06, + "learning_rate": 1.4992024436511833e-05, + "loss": 0.3927, + "step": 6220 + }, + { + "epoch": 1.06, + "learning_rate": 1.4990428815802461e-05, + "loss": 0.4383, + "step": 6221 + }, + { + "epoch": 1.06, + "learning_rate": 1.4988833025882067e-05, + "loss": 0.4358, + "step": 6222 + }, + { + "epoch": 1.06, + "learning_rate": 1.4987237066804762e-05, + "loss": 0.4276, + "step": 6223 + }, + { + "epoch": 1.06, + "learning_rate": 1.498564093862466e-05, + "loss": 0.4468, + "step": 6224 + }, + { + "epoch": 1.06, + "learning_rate": 1.498404464139588e-05, + "loss": 0.4292, + "step": 6225 + }, + { + "epoch": 1.06, + "learning_rate": 1.498244817517255e-05, + "loss": 0.405, + "step": 6226 + }, + { + "epoch": 1.06, + "learning_rate": 1.49808515400088e-05, + "loss": 0.4411, + "step": 6227 + }, + { + "epoch": 1.06, + "learning_rate": 1.497925473595877e-05, + "loss": 0.3995, + "step": 6228 + }, + { + "epoch": 1.06, + "learning_rate": 1.4977657763076598e-05, + "loss": 0.4381, + "step": 6229 + }, + { + "epoch": 1.06, + "learning_rate": 1.497606062141644e-05, + "loss": 0.403, + "step": 6230 + }, + { + "epoch": 1.06, + "learning_rate": 1.497446331103244e-05, + "loss": 0.4502, + "step": 6231 + }, + { + "epoch": 1.06, + "learning_rate": 1.4972865831978767e-05, + "loss": 0.4554, + "step": 6232 + }, + { + "epoch": 1.06, + "learning_rate": 1.4971268184309585e-05, + "loss": 0.4394, + "step": 6233 + }, + { + "epoch": 1.06, + "learning_rate": 1.4969670368079063e-05, + "loss": 0.4114, + "step": 6234 + }, + { + "epoch": 1.06, + "learning_rate": 1.496807238334138e-05, + "loss": 0.3981, + "step": 6235 + }, + { + "epoch": 1.06, + "learning_rate": 1.496647423015072e-05, + "loss": 0.4214, + "step": 6236 + }, + { + "epoch": 1.06, + "learning_rate": 1.4964875908561271e-05, + "loss": 0.4477, + "step": 6237 + }, + { + "epoch": 1.06, + "learning_rate": 1.4963277418627227e-05, + "loss": 0.4527, + "step": 6238 + }, + { + "epoch": 1.06, + "learning_rate": 1.4961678760402787e-05, + "loss": 0.4033, + "step": 6239 + }, + { + "epoch": 1.06, + "learning_rate": 1.4960079933942158e-05, + "loss": 0.452, + "step": 6240 + }, + { + "epoch": 1.06, + "learning_rate": 1.4958480939299556e-05, + "loss": 0.3861, + "step": 6241 + }, + { + "epoch": 1.06, + "learning_rate": 1.4956881776529188e-05, + "loss": 0.3888, + "step": 6242 + }, + { + "epoch": 1.06, + "learning_rate": 1.4955282445685289e-05, + "loss": 0.4427, + "step": 6243 + }, + { + "epoch": 1.06, + "learning_rate": 1.495368294682208e-05, + "loss": 0.4174, + "step": 6244 + }, + { + "epoch": 1.07, + "learning_rate": 1.4952083279993797e-05, + "loss": 0.4376, + "step": 6245 + }, + { + "epoch": 1.07, + "learning_rate": 1.495048344525468e-05, + "loss": 0.4283, + "step": 6246 + }, + { + "epoch": 1.07, + "learning_rate": 1.4948883442658973e-05, + "loss": 0.4055, + "step": 6247 + }, + { + "epoch": 1.07, + "learning_rate": 1.4947283272260931e-05, + "loss": 0.4336, + "step": 6248 + }, + { + "epoch": 1.07, + "learning_rate": 1.4945682934114811e-05, + "loss": 0.4807, + "step": 6249 + }, + { + "epoch": 1.07, + "learning_rate": 1.4944082428274873e-05, + "loss": 0.4051, + "step": 6250 + }, + { + "epoch": 1.07, + "learning_rate": 1.494248175479539e-05, + "loss": 0.4388, + "step": 6251 + }, + { + "epoch": 1.07, + "learning_rate": 1.4940880913730634e-05, + "loss": 0.4004, + "step": 6252 + }, + { + "epoch": 1.07, + "learning_rate": 1.493927990513488e-05, + "loss": 0.3953, + "step": 6253 + }, + { + "epoch": 1.07, + "learning_rate": 1.4937678729062417e-05, + "loss": 0.4438, + "step": 6254 + }, + { + "epoch": 1.07, + "learning_rate": 1.493607738556754e-05, + "loss": 0.4601, + "step": 6255 + }, + { + "epoch": 1.07, + "learning_rate": 1.4934475874704544e-05, + "loss": 0.4271, + "step": 6256 + }, + { + "epoch": 1.07, + "learning_rate": 1.4932874196527727e-05, + "loss": 0.4465, + "step": 6257 + }, + { + "epoch": 1.07, + "learning_rate": 1.4931272351091401e-05, + "loss": 0.4672, + "step": 6258 + }, + { + "epoch": 1.07, + "learning_rate": 1.4929670338449885e-05, + "loss": 0.4129, + "step": 6259 + }, + { + "epoch": 1.07, + "learning_rate": 1.492806815865749e-05, + "loss": 0.4079, + "step": 6260 + }, + { + "epoch": 1.07, + "learning_rate": 1.4926465811768544e-05, + "loss": 0.426, + "step": 6261 + }, + { + "epoch": 1.07, + "learning_rate": 1.4924863297837378e-05, + "loss": 0.4208, + "step": 6262 + }, + { + "epoch": 1.07, + "learning_rate": 1.492326061691833e-05, + "loss": 0.4379, + "step": 6263 + }, + { + "epoch": 1.07, + "learning_rate": 1.4921657769065744e-05, + "loss": 0.4234, + "step": 6264 + }, + { + "epoch": 1.07, + "learning_rate": 1.4920054754333962e-05, + "loss": 0.4299, + "step": 6265 + }, + { + "epoch": 1.07, + "learning_rate": 1.4918451572777342e-05, + "loss": 0.425, + "step": 6266 + }, + { + "epoch": 1.07, + "learning_rate": 1.4916848224450245e-05, + "loss": 0.4297, + "step": 6267 + }, + { + "epoch": 1.07, + "learning_rate": 1.491524470940703e-05, + "loss": 0.4054, + "step": 6268 + }, + { + "epoch": 1.07, + "learning_rate": 1.4913641027702073e-05, + "loss": 0.4367, + "step": 6269 + }, + { + "epoch": 1.07, + "learning_rate": 1.4912037179389747e-05, + "loss": 0.4627, + "step": 6270 + }, + { + "epoch": 1.07, + "learning_rate": 1.4910433164524435e-05, + "loss": 0.3984, + "step": 6271 + }, + { + "epoch": 1.07, + "learning_rate": 1.4908828983160524e-05, + "loss": 0.4352, + "step": 6272 + }, + { + "epoch": 1.07, + "learning_rate": 1.4907224635352406e-05, + "loss": 0.4338, + "step": 6273 + }, + { + "epoch": 1.07, + "learning_rate": 1.4905620121154483e-05, + "loss": 0.4009, + "step": 6274 + }, + { + "epoch": 1.07, + "learning_rate": 1.4904015440621158e-05, + "loss": 0.4258, + "step": 6275 + }, + { + "epoch": 1.07, + "learning_rate": 1.4902410593806843e-05, + "loss": 0.4047, + "step": 6276 + }, + { + "epoch": 1.07, + "learning_rate": 1.4900805580765947e-05, + "loss": 0.4219, + "step": 6277 + }, + { + "epoch": 1.07, + "learning_rate": 1.4899200401552899e-05, + "loss": 0.4204, + "step": 6278 + }, + { + "epoch": 1.07, + "learning_rate": 1.4897595056222123e-05, + "loss": 0.4215, + "step": 6279 + }, + { + "epoch": 1.07, + "learning_rate": 1.4895989544828049e-05, + "loss": 0.4296, + "step": 6280 + }, + { + "epoch": 1.07, + "learning_rate": 1.489438386742512e-05, + "loss": 0.3946, + "step": 6281 + }, + { + "epoch": 1.07, + "learning_rate": 1.4892778024067778e-05, + "loss": 0.4198, + "step": 6282 + }, + { + "epoch": 1.07, + "learning_rate": 1.4891172014810472e-05, + "loss": 0.4261, + "step": 6283 + }, + { + "epoch": 1.07, + "learning_rate": 1.4889565839707655e-05, + "loss": 0.4133, + "step": 6284 + }, + { + "epoch": 1.07, + "learning_rate": 1.4887959498813791e-05, + "loss": 0.4143, + "step": 6285 + }, + { + "epoch": 1.07, + "learning_rate": 1.4886352992183348e-05, + "loss": 0.389, + "step": 6286 + }, + { + "epoch": 1.07, + "learning_rate": 1.4884746319870794e-05, + "loss": 0.4328, + "step": 6287 + }, + { + "epoch": 1.07, + "learning_rate": 1.4883139481930607e-05, + "loss": 0.4049, + "step": 6288 + }, + { + "epoch": 1.07, + "learning_rate": 1.488153247841727e-05, + "loss": 0.43, + "step": 6289 + }, + { + "epoch": 1.07, + "learning_rate": 1.4879925309385277e-05, + "loss": 0.4026, + "step": 6290 + }, + { + "epoch": 1.07, + "learning_rate": 1.4878317974889117e-05, + "loss": 0.4091, + "step": 6291 + }, + { + "epoch": 1.07, + "learning_rate": 1.4876710474983292e-05, + "loss": 0.4326, + "step": 6292 + }, + { + "epoch": 1.07, + "learning_rate": 1.4875102809722304e-05, + "loss": 0.4444, + "step": 6293 + }, + { + "epoch": 1.07, + "learning_rate": 1.4873494979160672e-05, + "loss": 0.4537, + "step": 6294 + }, + { + "epoch": 1.07, + "learning_rate": 1.4871886983352908e-05, + "loss": 0.4313, + "step": 6295 + }, + { + "epoch": 1.07, + "learning_rate": 1.4870278822353531e-05, + "loss": 0.4273, + "step": 6296 + }, + { + "epoch": 1.07, + "learning_rate": 1.4868670496217074e-05, + "loss": 0.4926, + "step": 6297 + }, + { + "epoch": 1.07, + "learning_rate": 1.4867062004998073e-05, + "loss": 0.3831, + "step": 6298 + }, + { + "epoch": 1.07, + "learning_rate": 1.4865453348751064e-05, + "loss": 0.4163, + "step": 6299 + }, + { + "epoch": 1.07, + "learning_rate": 1.4863844527530592e-05, + "loss": 0.4225, + "step": 6300 + }, + { + "epoch": 1.07, + "learning_rate": 1.4862235541391207e-05, + "loss": 0.4265, + "step": 6301 + }, + { + "epoch": 1.07, + "learning_rate": 1.4860626390387464e-05, + "loss": 0.4472, + "step": 6302 + }, + { + "epoch": 1.07, + "learning_rate": 1.4859017074573925e-05, + "loss": 0.4059, + "step": 6303 + }, + { + "epoch": 1.08, + "learning_rate": 1.485740759400516e-05, + "loss": 0.4538, + "step": 6304 + }, + { + "epoch": 1.08, + "learning_rate": 1.4855797948735742e-05, + "loss": 0.4777, + "step": 6305 + }, + { + "epoch": 1.08, + "learning_rate": 1.4854188138820245e-05, + "loss": 0.4395, + "step": 6306 + }, + { + "epoch": 1.08, + "learning_rate": 1.4852578164313259e-05, + "loss": 0.441, + "step": 6307 + }, + { + "epoch": 1.08, + "learning_rate": 1.4850968025269366e-05, + "loss": 0.4501, + "step": 6308 + }, + { + "epoch": 1.08, + "learning_rate": 1.4849357721743169e-05, + "loss": 0.4336, + "step": 6309 + }, + { + "epoch": 1.08, + "learning_rate": 1.4847747253789262e-05, + "loss": 0.4801, + "step": 6310 + }, + { + "epoch": 1.08, + "learning_rate": 1.4846136621462254e-05, + "loss": 0.4454, + "step": 6311 + }, + { + "epoch": 1.08, + "learning_rate": 1.484452582481676e-05, + "loss": 0.4447, + "step": 6312 + }, + { + "epoch": 1.08, + "learning_rate": 1.4842914863907397e-05, + "loss": 0.3976, + "step": 6313 + }, + { + "epoch": 1.08, + "learning_rate": 1.4841303738788782e-05, + "loss": 0.3711, + "step": 6314 + }, + { + "epoch": 1.08, + "learning_rate": 1.4839692449515548e-05, + "loss": 0.448, + "step": 6315 + }, + { + "epoch": 1.08, + "learning_rate": 1.4838080996142332e-05, + "loss": 0.4058, + "step": 6316 + }, + { + "epoch": 1.08, + "learning_rate": 1.4836469378723765e-05, + "loss": 0.396, + "step": 6317 + }, + { + "epoch": 1.08, + "learning_rate": 1.48348575973145e-05, + "loss": 0.4209, + "step": 6318 + }, + { + "epoch": 1.08, + "learning_rate": 1.4833245651969186e-05, + "loss": 0.4458, + "step": 6319 + }, + { + "epoch": 1.08, + "learning_rate": 1.4831633542742479e-05, + "loss": 0.4049, + "step": 6320 + }, + { + "epoch": 1.08, + "learning_rate": 1.4830021269689039e-05, + "loss": 0.4459, + "step": 6321 + }, + { + "epoch": 1.08, + "learning_rate": 1.4828408832863537e-05, + "loss": 0.4041, + "step": 6322 + }, + { + "epoch": 1.08, + "learning_rate": 1.4826796232320643e-05, + "loss": 0.4209, + "step": 6323 + }, + { + "epoch": 1.08, + "learning_rate": 1.482518346811504e-05, + "loss": 0.4435, + "step": 6324 + }, + { + "epoch": 1.08, + "learning_rate": 1.4823570540301408e-05, + "loss": 0.4248, + "step": 6325 + }, + { + "epoch": 1.08, + "learning_rate": 1.4821957448934437e-05, + "loss": 0.4111, + "step": 6326 + }, + { + "epoch": 1.08, + "learning_rate": 1.4820344194068823e-05, + "loss": 0.4349, + "step": 6327 + }, + { + "epoch": 1.08, + "learning_rate": 1.481873077575927e-05, + "loss": 0.4498, + "step": 6328 + }, + { + "epoch": 1.08, + "learning_rate": 1.4817117194060475e-05, + "loss": 0.405, + "step": 6329 + }, + { + "epoch": 1.08, + "learning_rate": 1.4815503449027162e-05, + "loss": 0.4044, + "step": 6330 + }, + { + "epoch": 1.08, + "learning_rate": 1.4813889540714041e-05, + "loss": 0.4286, + "step": 6331 + }, + { + "epoch": 1.08, + "learning_rate": 1.481227546917584e-05, + "loss": 0.4401, + "step": 6332 + }, + { + "epoch": 1.08, + "learning_rate": 1.4810661234467279e-05, + "loss": 0.4397, + "step": 6333 + }, + { + "epoch": 1.08, + "learning_rate": 1.4809046836643098e-05, + "loss": 0.4336, + "step": 6334 + }, + { + "epoch": 1.08, + "learning_rate": 1.4807432275758038e-05, + "loss": 0.3975, + "step": 6335 + }, + { + "epoch": 1.08, + "learning_rate": 1.4805817551866839e-05, + "loss": 0.426, + "step": 6336 + }, + { + "epoch": 1.08, + "learning_rate": 1.4804202665024256e-05, + "loss": 0.4366, + "step": 6337 + }, + { + "epoch": 1.08, + "learning_rate": 1.4802587615285045e-05, + "loss": 0.4347, + "step": 6338 + }, + { + "epoch": 1.08, + "learning_rate": 1.4800972402703964e-05, + "loss": 0.3975, + "step": 6339 + }, + { + "epoch": 1.08, + "learning_rate": 1.4799357027335786e-05, + "loss": 0.415, + "step": 6340 + }, + { + "epoch": 1.08, + "learning_rate": 1.4797741489235276e-05, + "loss": 0.4129, + "step": 6341 + }, + { + "epoch": 1.08, + "learning_rate": 1.4796125788457218e-05, + "loss": 0.4195, + "step": 6342 + }, + { + "epoch": 1.08, + "learning_rate": 1.4794509925056394e-05, + "loss": 0.4347, + "step": 6343 + }, + { + "epoch": 1.08, + "learning_rate": 1.4792893899087595e-05, + "loss": 0.4195, + "step": 6344 + }, + { + "epoch": 1.08, + "learning_rate": 1.4791277710605612e-05, + "loss": 0.4591, + "step": 6345 + }, + { + "epoch": 1.08, + "learning_rate": 1.478966135966525e-05, + "loss": 0.4349, + "step": 6346 + }, + { + "epoch": 1.08, + "learning_rate": 1.4788044846321313e-05, + "loss": 0.4334, + "step": 6347 + }, + { + "epoch": 1.08, + "learning_rate": 1.4786428170628608e-05, + "loss": 0.427, + "step": 6348 + }, + { + "epoch": 1.08, + "learning_rate": 1.4784811332641957e-05, + "loss": 0.4421, + "step": 6349 + }, + { + "epoch": 1.08, + "learning_rate": 1.4783194332416182e-05, + "loss": 0.4503, + "step": 6350 + }, + { + "epoch": 1.08, + "learning_rate": 1.4781577170006105e-05, + "loss": 0.4093, + "step": 6351 + }, + { + "epoch": 1.08, + "learning_rate": 1.4779959845466567e-05, + "loss": 0.4146, + "step": 6352 + }, + { + "epoch": 1.08, + "learning_rate": 1.4778342358852405e-05, + "loss": 0.4249, + "step": 6353 + }, + { + "epoch": 1.08, + "learning_rate": 1.477672471021846e-05, + "loss": 0.4384, + "step": 6354 + }, + { + "epoch": 1.08, + "learning_rate": 1.4775106899619585e-05, + "loss": 0.4119, + "step": 6355 + }, + { + "epoch": 1.08, + "learning_rate": 1.4773488927110633e-05, + "loss": 0.4201, + "step": 6356 + }, + { + "epoch": 1.08, + "learning_rate": 1.4771870792746463e-05, + "loss": 0.4498, + "step": 6357 + }, + { + "epoch": 1.08, + "learning_rate": 1.477025249658195e-05, + "loss": 0.4179, + "step": 6358 + }, + { + "epoch": 1.08, + "learning_rate": 1.4768634038671956e-05, + "loss": 0.4174, + "step": 6359 + }, + { + "epoch": 1.08, + "learning_rate": 1.476701541907136e-05, + "loss": 0.4144, + "step": 6360 + }, + { + "epoch": 1.08, + "learning_rate": 1.476539663783505e-05, + "loss": 0.4284, + "step": 6361 + }, + { + "epoch": 1.09, + "learning_rate": 1.476377769501791e-05, + "loss": 0.4465, + "step": 6362 + }, + { + "epoch": 1.09, + "learning_rate": 1.4762158590674835e-05, + "loss": 0.3803, + "step": 6363 + }, + { + "epoch": 1.09, + "learning_rate": 1.4760539324860724e-05, + "loss": 0.4396, + "step": 6364 + }, + { + "epoch": 1.09, + "learning_rate": 1.4758919897630481e-05, + "loss": 0.4259, + "step": 6365 + }, + { + "epoch": 1.09, + "learning_rate": 1.4757300309039015e-05, + "loss": 0.4391, + "step": 6366 + }, + { + "epoch": 1.09, + "learning_rate": 1.4755680559141243e-05, + "loss": 0.449, + "step": 6367 + }, + { + "epoch": 1.09, + "learning_rate": 1.4754060647992087e-05, + "loss": 0.3917, + "step": 6368 + }, + { + "epoch": 1.09, + "learning_rate": 1.4752440575646473e-05, + "loss": 0.4221, + "step": 6369 + }, + { + "epoch": 1.09, + "learning_rate": 1.4750820342159333e-05, + "loss": 0.3936, + "step": 6370 + }, + { + "epoch": 1.09, + "learning_rate": 1.4749199947585604e-05, + "loss": 0.4495, + "step": 6371 + }, + { + "epoch": 1.09, + "learning_rate": 1.4747579391980228e-05, + "loss": 0.4323, + "step": 6372 + }, + { + "epoch": 1.09, + "learning_rate": 1.4745958675398156e-05, + "loss": 0.4552, + "step": 6373 + }, + { + "epoch": 1.09, + "learning_rate": 1.4744337797894335e-05, + "loss": 0.4104, + "step": 6374 + }, + { + "epoch": 1.09, + "learning_rate": 1.4742716759523731e-05, + "loss": 0.4399, + "step": 6375 + }, + { + "epoch": 1.09, + "learning_rate": 1.474109556034131e-05, + "loss": 0.4298, + "step": 6376 + }, + { + "epoch": 1.09, + "learning_rate": 1.4739474200402038e-05, + "loss": 0.451, + "step": 6377 + }, + { + "epoch": 1.09, + "learning_rate": 1.4737852679760891e-05, + "loss": 0.4404, + "step": 6378 + }, + { + "epoch": 1.09, + "learning_rate": 1.4736230998472852e-05, + "loss": 0.431, + "step": 6379 + }, + { + "epoch": 1.09, + "learning_rate": 1.4734609156592908e-05, + "loss": 0.4336, + "step": 6380 + }, + { + "epoch": 1.09, + "learning_rate": 1.4732987154176046e-05, + "loss": 0.4505, + "step": 6381 + }, + { + "epoch": 1.09, + "learning_rate": 1.4731364991277267e-05, + "loss": 0.4599, + "step": 6382 + }, + { + "epoch": 1.09, + "learning_rate": 1.4729742667951575e-05, + "loss": 0.4379, + "step": 6383 + }, + { + "epoch": 1.09, + "learning_rate": 1.4728120184253978e-05, + "loss": 0.3983, + "step": 6384 + }, + { + "epoch": 1.09, + "learning_rate": 1.4726497540239486e-05, + "loss": 0.423, + "step": 6385 + }, + { + "epoch": 1.09, + "learning_rate": 1.4724874735963124e-05, + "loss": 0.4052, + "step": 6386 + }, + { + "epoch": 1.09, + "learning_rate": 1.472325177147991e-05, + "loss": 0.4645, + "step": 6387 + }, + { + "epoch": 1.09, + "learning_rate": 1.472162864684488e-05, + "loss": 0.4143, + "step": 6388 + }, + { + "epoch": 1.09, + "learning_rate": 1.4720005362113066e-05, + "loss": 0.4098, + "step": 6389 + }, + { + "epoch": 1.09, + "learning_rate": 1.471838191733951e-05, + "loss": 0.4356, + "step": 6390 + }, + { + "epoch": 1.09, + "learning_rate": 1.4716758312579255e-05, + "loss": 0.3974, + "step": 6391 + }, + { + "epoch": 1.09, + "learning_rate": 1.4715134547887362e-05, + "loss": 0.4208, + "step": 6392 + }, + { + "epoch": 1.09, + "learning_rate": 1.4713510623318879e-05, + "loss": 0.3975, + "step": 6393 + }, + { + "epoch": 1.09, + "learning_rate": 1.4711886538928872e-05, + "loss": 0.443, + "step": 6394 + }, + { + "epoch": 1.09, + "learning_rate": 1.471026229477241e-05, + "loss": 0.4081, + "step": 6395 + }, + { + "epoch": 1.09, + "learning_rate": 1.4708637890904564e-05, + "loss": 0.4083, + "step": 6396 + }, + { + "epoch": 1.09, + "learning_rate": 1.4707013327380413e-05, + "loss": 0.4113, + "step": 6397 + }, + { + "epoch": 1.09, + "learning_rate": 1.4705388604255041e-05, + "loss": 0.4153, + "step": 6398 + }, + { + "epoch": 1.09, + "learning_rate": 1.4703763721583544e-05, + "loss": 0.4231, + "step": 6399 + }, + { + "epoch": 1.09, + "learning_rate": 1.4702138679421007e-05, + "loss": 0.4196, + "step": 6400 + }, + { + "epoch": 1.09, + "learning_rate": 1.4700513477822537e-05, + "loss": 0.4208, + "step": 6401 + }, + { + "epoch": 1.09, + "learning_rate": 1.469888811684324e-05, + "loss": 0.4133, + "step": 6402 + }, + { + "epoch": 1.09, + "learning_rate": 1.4697262596538227e-05, + "loss": 0.4169, + "step": 6403 + }, + { + "epoch": 1.09, + "learning_rate": 1.4695636916962607e-05, + "loss": 0.4186, + "step": 6404 + }, + { + "epoch": 1.09, + "learning_rate": 1.4694011078171512e-05, + "loss": 0.4089, + "step": 6405 + }, + { + "epoch": 1.09, + "learning_rate": 1.4692385080220067e-05, + "loss": 0.413, + "step": 6406 + }, + { + "epoch": 1.09, + "learning_rate": 1.4690758923163405e-05, + "loss": 0.4024, + "step": 6407 + }, + { + "epoch": 1.09, + "learning_rate": 1.4689132607056657e-05, + "loss": 0.4219, + "step": 6408 + }, + { + "epoch": 1.09, + "learning_rate": 1.4687506131954978e-05, + "loss": 0.445, + "step": 6409 + }, + { + "epoch": 1.09, + "learning_rate": 1.468587949791351e-05, + "loss": 0.4398, + "step": 6410 + }, + { + "epoch": 1.09, + "learning_rate": 1.4684252704987412e-05, + "loss": 0.3962, + "step": 6411 + }, + { + "epoch": 1.09, + "learning_rate": 1.4682625753231838e-05, + "loss": 0.4328, + "step": 6412 + }, + { + "epoch": 1.09, + "learning_rate": 1.4680998642701958e-05, + "loss": 0.3955, + "step": 6413 + }, + { + "epoch": 1.09, + "learning_rate": 1.4679371373452941e-05, + "loss": 0.4205, + "step": 6414 + }, + { + "epoch": 1.09, + "learning_rate": 1.4677743945539961e-05, + "loss": 0.4335, + "step": 6415 + }, + { + "epoch": 1.09, + "learning_rate": 1.4676116359018205e-05, + "loss": 0.3993, + "step": 6416 + }, + { + "epoch": 1.09, + "learning_rate": 1.4674488613942852e-05, + "loss": 0.4237, + "step": 6417 + }, + { + "epoch": 1.09, + "learning_rate": 1.4672860710369101e-05, + "loss": 0.4215, + "step": 6418 + }, + { + "epoch": 1.09, + "learning_rate": 1.4671232648352147e-05, + "loss": 0.4023, + "step": 6419 + }, + { + "epoch": 1.09, + "learning_rate": 1.466960442794719e-05, + "loss": 0.4219, + "step": 6420 + }, + { + "epoch": 1.1, + "learning_rate": 1.4667976049209443e-05, + "loss": 0.4197, + "step": 6421 + }, + { + "epoch": 1.1, + "learning_rate": 1.4666347512194117e-05, + "loss": 0.4372, + "step": 6422 + }, + { + "epoch": 1.1, + "learning_rate": 1.4664718816956432e-05, + "loss": 0.3946, + "step": 6423 + }, + { + "epoch": 1.1, + "learning_rate": 1.4663089963551611e-05, + "loss": 0.4494, + "step": 6424 + }, + { + "epoch": 1.1, + "learning_rate": 1.4661460952034884e-05, + "loss": 0.4265, + "step": 6425 + }, + { + "epoch": 1.1, + "learning_rate": 1.4659831782461489e-05, + "loss": 0.4373, + "step": 6426 + }, + { + "epoch": 1.1, + "learning_rate": 1.4658202454886663e-05, + "loss": 0.4641, + "step": 6427 + }, + { + "epoch": 1.1, + "learning_rate": 1.4656572969365652e-05, + "loss": 0.461, + "step": 6428 + }, + { + "epoch": 1.1, + "learning_rate": 1.465494332595371e-05, + "loss": 0.456, + "step": 6429 + }, + { + "epoch": 1.1, + "learning_rate": 1.465331352470609e-05, + "loss": 0.4444, + "step": 6430 + }, + { + "epoch": 1.1, + "learning_rate": 1.4651683565678054e-05, + "loss": 0.4033, + "step": 6431 + }, + { + "epoch": 1.1, + "learning_rate": 1.4650053448924872e-05, + "loss": 0.4549, + "step": 6432 + }, + { + "epoch": 1.1, + "learning_rate": 1.4648423174501816e-05, + "loss": 0.4116, + "step": 6433 + }, + { + "epoch": 1.1, + "learning_rate": 1.4646792742464162e-05, + "loss": 0.4408, + "step": 6434 + }, + { + "epoch": 1.1, + "learning_rate": 1.4645162152867193e-05, + "loss": 0.4408, + "step": 6435 + }, + { + "epoch": 1.1, + "learning_rate": 1.4643531405766203e-05, + "loss": 0.3923, + "step": 6436 + }, + { + "epoch": 1.1, + "learning_rate": 1.4641900501216477e-05, + "loss": 0.4264, + "step": 6437 + }, + { + "epoch": 1.1, + "learning_rate": 1.464026943927332e-05, + "loss": 0.4562, + "step": 6438 + }, + { + "epoch": 1.1, + "learning_rate": 1.4638638219992039e-05, + "loss": 0.4395, + "step": 6439 + }, + { + "epoch": 1.1, + "learning_rate": 1.4637006843427938e-05, + "loss": 0.3995, + "step": 6440 + }, + { + "epoch": 1.1, + "learning_rate": 1.4635375309636333e-05, + "loss": 0.4474, + "step": 6441 + }, + { + "epoch": 1.1, + "learning_rate": 1.4633743618672549e-05, + "loss": 0.4457, + "step": 6442 + }, + { + "epoch": 1.1, + "learning_rate": 1.4632111770591909e-05, + "loss": 0.435, + "step": 6443 + }, + { + "epoch": 1.1, + "learning_rate": 1.4630479765449745e-05, + "loss": 0.4155, + "step": 6444 + }, + { + "epoch": 1.1, + "learning_rate": 1.4628847603301393e-05, + "loss": 0.4108, + "step": 6445 + }, + { + "epoch": 1.1, + "learning_rate": 1.4627215284202192e-05, + "loss": 0.4385, + "step": 6446 + }, + { + "epoch": 1.1, + "learning_rate": 1.4625582808207495e-05, + "loss": 0.436, + "step": 6447 + }, + { + "epoch": 1.1, + "learning_rate": 1.4623950175372652e-05, + "loss": 0.4143, + "step": 6448 + }, + { + "epoch": 1.1, + "learning_rate": 1.462231738575302e-05, + "loss": 0.4079, + "step": 6449 + }, + { + "epoch": 1.1, + "learning_rate": 1.4620684439403962e-05, + "loss": 0.4374, + "step": 6450 + }, + { + "epoch": 1.1, + "learning_rate": 1.4619051336380849e-05, + "loss": 0.4324, + "step": 6451 + }, + { + "epoch": 1.1, + "learning_rate": 1.4617418076739053e-05, + "loss": 0.4277, + "step": 6452 + }, + { + "epoch": 1.1, + "learning_rate": 1.461578466053395e-05, + "loss": 0.4126, + "step": 6453 + }, + { + "epoch": 1.1, + "learning_rate": 1.461415108782093e-05, + "loss": 0.4092, + "step": 6454 + }, + { + "epoch": 1.1, + "learning_rate": 1.4612517358655382e-05, + "loss": 0.4369, + "step": 6455 + }, + { + "epoch": 1.1, + "learning_rate": 1.46108834730927e-05, + "loss": 0.4558, + "step": 6456 + }, + { + "epoch": 1.1, + "learning_rate": 1.460924943118828e-05, + "loss": 0.4163, + "step": 6457 + }, + { + "epoch": 1.1, + "learning_rate": 1.4607615232997537e-05, + "loss": 0.3878, + "step": 6458 + }, + { + "epoch": 1.1, + "learning_rate": 1.4605980878575872e-05, + "loss": 0.4471, + "step": 6459 + }, + { + "epoch": 1.1, + "learning_rate": 1.4604346367978706e-05, + "loss": 0.406, + "step": 6460 + }, + { + "epoch": 1.1, + "learning_rate": 1.460271170126146e-05, + "loss": 0.4434, + "step": 6461 + }, + { + "epoch": 1.1, + "learning_rate": 1.4601076878479561e-05, + "loss": 0.4234, + "step": 6462 + }, + { + "epoch": 1.1, + "learning_rate": 1.4599441899688442e-05, + "loss": 0.4475, + "step": 6463 + }, + { + "epoch": 1.1, + "learning_rate": 1.4597806764943538e-05, + "loss": 0.4035, + "step": 6464 + }, + { + "epoch": 1.1, + "learning_rate": 1.4596171474300293e-05, + "loss": 0.4457, + "step": 6465 + }, + { + "epoch": 1.1, + "learning_rate": 1.4594536027814159e-05, + "loss": 0.4316, + "step": 6466 + }, + { + "epoch": 1.1, + "learning_rate": 1.4592900425540583e-05, + "loss": 0.391, + "step": 6467 + }, + { + "epoch": 1.1, + "learning_rate": 1.4591264667535026e-05, + "loss": 0.3968, + "step": 6468 + }, + { + "epoch": 1.1, + "learning_rate": 1.4589628753852952e-05, + "loss": 0.4605, + "step": 6469 + }, + { + "epoch": 1.1, + "learning_rate": 1.4587992684549827e-05, + "loss": 0.4281, + "step": 6470 + }, + { + "epoch": 1.1, + "learning_rate": 1.4586356459681133e-05, + "loss": 0.4248, + "step": 6471 + }, + { + "epoch": 1.1, + "learning_rate": 1.458472007930234e-05, + "loss": 0.4425, + "step": 6472 + }, + { + "epoch": 1.1, + "learning_rate": 1.458308354346894e-05, + "loss": 0.408, + "step": 6473 + }, + { + "epoch": 1.1, + "learning_rate": 1.4581446852236425e-05, + "loss": 0.4241, + "step": 6474 + }, + { + "epoch": 1.1, + "learning_rate": 1.4579810005660284e-05, + "loss": 0.4158, + "step": 6475 + }, + { + "epoch": 1.1, + "learning_rate": 1.4578173003796019e-05, + "loss": 0.4235, + "step": 6476 + }, + { + "epoch": 1.1, + "learning_rate": 1.4576535846699136e-05, + "loss": 0.44, + "step": 6477 + }, + { + "epoch": 1.1, + "learning_rate": 1.4574898534425153e-05, + "loss": 0.4507, + "step": 6478 + }, + { + "epoch": 1.1, + "learning_rate": 1.4573261067029574e-05, + "loss": 0.4533, + "step": 6479 + }, + { + "epoch": 1.11, + "learning_rate": 1.4571623444567933e-05, + "loss": 0.4452, + "step": 6480 + }, + { + "epoch": 1.11, + "learning_rate": 1.4569985667095748e-05, + "loss": 0.4087, + "step": 6481 + }, + { + "epoch": 1.11, + "learning_rate": 1.4568347734668558e-05, + "loss": 0.396, + "step": 6482 + }, + { + "epoch": 1.11, + "learning_rate": 1.45667096473419e-05, + "loss": 0.462, + "step": 6483 + }, + { + "epoch": 1.11, + "learning_rate": 1.456507140517131e-05, + "loss": 0.444, + "step": 6484 + }, + { + "epoch": 1.11, + "learning_rate": 1.4563433008212342e-05, + "loss": 0.4005, + "step": 6485 + }, + { + "epoch": 1.11, + "learning_rate": 1.456179445652055e-05, + "loss": 0.4447, + "step": 6486 + }, + { + "epoch": 1.11, + "learning_rate": 1.4560155750151487e-05, + "loss": 0.4278, + "step": 6487 + }, + { + "epoch": 1.11, + "learning_rate": 1.455851688916072e-05, + "loss": 0.4136, + "step": 6488 + }, + { + "epoch": 1.11, + "learning_rate": 1.4556877873603824e-05, + "loss": 0.4269, + "step": 6489 + }, + { + "epoch": 1.11, + "learning_rate": 1.4555238703536364e-05, + "loss": 0.4031, + "step": 6490 + }, + { + "epoch": 1.11, + "learning_rate": 1.4553599379013923e-05, + "loss": 0.4154, + "step": 6491 + }, + { + "epoch": 1.11, + "learning_rate": 1.4551959900092086e-05, + "loss": 0.4147, + "step": 6492 + }, + { + "epoch": 1.11, + "learning_rate": 1.4550320266826444e-05, + "loss": 0.4198, + "step": 6493 + }, + { + "epoch": 1.11, + "learning_rate": 1.4548680479272587e-05, + "loss": 0.4269, + "step": 6494 + }, + { + "epoch": 1.11, + "learning_rate": 1.4547040537486123e-05, + "loss": 0.417, + "step": 6495 + }, + { + "epoch": 1.11, + "learning_rate": 1.4545400441522652e-05, + "loss": 0.4501, + "step": 6496 + }, + { + "epoch": 1.11, + "learning_rate": 1.454376019143779e-05, + "loss": 0.4202, + "step": 6497 + }, + { + "epoch": 1.11, + "learning_rate": 1.4542119787287153e-05, + "loss": 0.4146, + "step": 6498 + }, + { + "epoch": 1.11, + "learning_rate": 1.4540479229126354e-05, + "loss": 0.4345, + "step": 6499 + }, + { + "epoch": 1.11, + "learning_rate": 1.4538838517011026e-05, + "loss": 0.3997, + "step": 6500 + }, + { + "epoch": 1.11, + "learning_rate": 1.4537197650996801e-05, + "loss": 0.4455, + "step": 6501 + }, + { + "epoch": 1.11, + "learning_rate": 1.4535556631139316e-05, + "loss": 0.3816, + "step": 6502 + }, + { + "epoch": 1.11, + "learning_rate": 1.4533915457494212e-05, + "loss": 0.4437, + "step": 6503 + }, + { + "epoch": 1.11, + "learning_rate": 1.4532274130117136e-05, + "loss": 0.486, + "step": 6504 + }, + { + "epoch": 1.11, + "learning_rate": 1.4530632649063743e-05, + "loss": 0.4282, + "step": 6505 + }, + { + "epoch": 1.11, + "learning_rate": 1.4528991014389688e-05, + "loss": 0.473, + "step": 6506 + }, + { + "epoch": 1.11, + "learning_rate": 1.4527349226150636e-05, + "loss": 0.426, + "step": 6507 + }, + { + "epoch": 1.11, + "learning_rate": 1.4525707284402255e-05, + "loss": 0.4438, + "step": 6508 + }, + { + "epoch": 1.11, + "learning_rate": 1.4524065189200216e-05, + "loss": 0.4133, + "step": 6509 + }, + { + "epoch": 1.11, + "learning_rate": 1.45224229406002e-05, + "loss": 0.4123, + "step": 6510 + }, + { + "epoch": 1.11, + "learning_rate": 1.4520780538657892e-05, + "loss": 0.4273, + "step": 6511 + }, + { + "epoch": 1.11, + "learning_rate": 1.4519137983428983e-05, + "loss": 0.4654, + "step": 6512 + }, + { + "epoch": 1.11, + "learning_rate": 1.451749527496916e-05, + "loss": 0.4151, + "step": 6513 + }, + { + "epoch": 1.11, + "learning_rate": 1.4515852413334131e-05, + "loss": 0.4112, + "step": 6514 + }, + { + "epoch": 1.11, + "learning_rate": 1.451420939857959e-05, + "loss": 0.425, + "step": 6515 + }, + { + "epoch": 1.11, + "learning_rate": 1.4512566230761258e-05, + "loss": 0.4238, + "step": 6516 + }, + { + "epoch": 1.11, + "learning_rate": 1.4510922909934846e-05, + "loss": 0.4186, + "step": 6517 + }, + { + "epoch": 1.11, + "learning_rate": 1.450927943615607e-05, + "loss": 0.439, + "step": 6518 + }, + { + "epoch": 1.11, + "learning_rate": 1.4507635809480662e-05, + "loss": 0.4631, + "step": 6519 + }, + { + "epoch": 1.11, + "learning_rate": 1.4505992029964349e-05, + "loss": 0.4487, + "step": 6520 + }, + { + "epoch": 1.11, + "learning_rate": 1.4504348097662866e-05, + "loss": 0.5023, + "step": 6521 + }, + { + "epoch": 1.11, + "learning_rate": 1.4502704012631958e-05, + "loss": 0.4149, + "step": 6522 + }, + { + "epoch": 1.11, + "learning_rate": 1.4501059774927369e-05, + "loss": 0.4297, + "step": 6523 + }, + { + "epoch": 1.11, + "learning_rate": 1.4499415384604847e-05, + "loss": 0.4301, + "step": 6524 + }, + { + "epoch": 1.11, + "learning_rate": 1.4497770841720152e-05, + "loss": 0.44, + "step": 6525 + }, + { + "epoch": 1.11, + "learning_rate": 1.4496126146329046e-05, + "loss": 0.4441, + "step": 6526 + }, + { + "epoch": 1.11, + "learning_rate": 1.4494481298487295e-05, + "loss": 0.4298, + "step": 6527 + }, + { + "epoch": 1.11, + "learning_rate": 1.4492836298250671e-05, + "loss": 0.4457, + "step": 6528 + }, + { + "epoch": 1.11, + "learning_rate": 1.4491191145674952e-05, + "loss": 0.4403, + "step": 6529 + }, + { + "epoch": 1.11, + "learning_rate": 1.4489545840815919e-05, + "loss": 0.4319, + "step": 6530 + }, + { + "epoch": 1.11, + "learning_rate": 1.4487900383729363e-05, + "loss": 0.4513, + "step": 6531 + }, + { + "epoch": 1.11, + "learning_rate": 1.4486254774471072e-05, + "loss": 0.4247, + "step": 6532 + }, + { + "epoch": 1.11, + "learning_rate": 1.4484609013096844e-05, + "loss": 0.416, + "step": 6533 + }, + { + "epoch": 1.11, + "learning_rate": 1.4482963099662488e-05, + "loss": 0.4015, + "step": 6534 + }, + { + "epoch": 1.11, + "learning_rate": 1.4481317034223803e-05, + "loss": 0.4444, + "step": 6535 + }, + { + "epoch": 1.11, + "learning_rate": 1.4479670816836612e-05, + "loss": 0.4205, + "step": 6536 + }, + { + "epoch": 1.11, + "learning_rate": 1.4478024447556722e-05, + "loss": 0.4066, + "step": 6537 + }, + { + "epoch": 1.12, + "learning_rate": 1.447637792643997e-05, + "loss": 0.4397, + "step": 6538 + }, + { + "epoch": 1.12, + "learning_rate": 1.4474731253542178e-05, + "loss": 0.3894, + "step": 6539 + }, + { + "epoch": 1.12, + "learning_rate": 1.4473084428919177e-05, + "loss": 0.4415, + "step": 6540 + }, + { + "epoch": 1.12, + "learning_rate": 1.447143745262681e-05, + "loss": 0.4439, + "step": 6541 + }, + { + "epoch": 1.12, + "learning_rate": 1.4469790324720923e-05, + "loss": 0.4147, + "step": 6542 + }, + { + "epoch": 1.12, + "learning_rate": 1.4468143045257359e-05, + "loss": 0.4513, + "step": 6543 + }, + { + "epoch": 1.12, + "learning_rate": 1.4466495614291977e-05, + "loss": 0.435, + "step": 6544 + }, + { + "epoch": 1.12, + "learning_rate": 1.4464848031880638e-05, + "loss": 0.4046, + "step": 6545 + }, + { + "epoch": 1.12, + "learning_rate": 1.4463200298079203e-05, + "loss": 0.4068, + "step": 6546 + }, + { + "epoch": 1.12, + "learning_rate": 1.4461552412943545e-05, + "loss": 0.4259, + "step": 6547 + }, + { + "epoch": 1.12, + "learning_rate": 1.4459904376529535e-05, + "loss": 0.4126, + "step": 6548 + }, + { + "epoch": 1.12, + "learning_rate": 1.4458256188893058e-05, + "loss": 0.424, + "step": 6549 + }, + { + "epoch": 1.12, + "learning_rate": 1.4456607850089995e-05, + "loss": 0.4326, + "step": 6550 + }, + { + "epoch": 1.12, + "learning_rate": 1.4454959360176237e-05, + "loss": 0.4199, + "step": 6551 + }, + { + "epoch": 1.12, + "learning_rate": 1.4453310719207683e-05, + "loss": 0.4296, + "step": 6552 + }, + { + "epoch": 1.12, + "learning_rate": 1.4451661927240233e-05, + "loss": 0.4355, + "step": 6553 + }, + { + "epoch": 1.12, + "learning_rate": 1.4450012984329792e-05, + "loss": 0.4555, + "step": 6554 + }, + { + "epoch": 1.12, + "learning_rate": 1.4448363890532266e-05, + "loss": 0.4376, + "step": 6555 + }, + { + "epoch": 1.12, + "learning_rate": 1.4446714645903576e-05, + "loss": 0.4271, + "step": 6556 + }, + { + "epoch": 1.12, + "learning_rate": 1.4445065250499646e-05, + "loss": 0.4059, + "step": 6557 + }, + { + "epoch": 1.12, + "learning_rate": 1.4443415704376396e-05, + "loss": 0.4043, + "step": 6558 + }, + { + "epoch": 1.12, + "learning_rate": 1.4441766007589757e-05, + "loss": 0.4156, + "step": 6559 + }, + { + "epoch": 1.12, + "learning_rate": 1.4440116160195672e-05, + "loss": 0.428, + "step": 6560 + }, + { + "epoch": 1.12, + "learning_rate": 1.4438466162250077e-05, + "loss": 0.3859, + "step": 6561 + }, + { + "epoch": 1.12, + "learning_rate": 1.4436816013808925e-05, + "loss": 0.4489, + "step": 6562 + }, + { + "epoch": 1.12, + "learning_rate": 1.4435165714928159e-05, + "loss": 0.4428, + "step": 6563 + }, + { + "epoch": 1.12, + "learning_rate": 1.443351526566374e-05, + "loss": 0.4104, + "step": 6564 + }, + { + "epoch": 1.12, + "learning_rate": 1.4431864666071634e-05, + "loss": 0.3995, + "step": 6565 + }, + { + "epoch": 1.12, + "learning_rate": 1.44302139162078e-05, + "loss": 0.4178, + "step": 6566 + }, + { + "epoch": 1.12, + "learning_rate": 1.4428563016128214e-05, + "loss": 0.428, + "step": 6567 + }, + { + "epoch": 1.12, + "learning_rate": 1.4426911965888859e-05, + "loss": 0.3939, + "step": 6568 + }, + { + "epoch": 1.12, + "learning_rate": 1.4425260765545707e-05, + "loss": 0.4323, + "step": 6569 + }, + { + "epoch": 1.12, + "learning_rate": 1.4423609415154754e-05, + "loss": 0.4127, + "step": 6570 + }, + { + "epoch": 1.12, + "learning_rate": 1.4421957914771984e-05, + "loss": 0.4298, + "step": 6571 + }, + { + "epoch": 1.12, + "learning_rate": 1.4420306264453405e-05, + "loss": 0.4495, + "step": 6572 + }, + { + "epoch": 1.12, + "learning_rate": 1.441865446425501e-05, + "loss": 0.4298, + "step": 6573 + }, + { + "epoch": 1.12, + "learning_rate": 1.4417002514232814e-05, + "loss": 0.391, + "step": 6574 + }, + { + "epoch": 1.12, + "learning_rate": 1.4415350414442824e-05, + "loss": 0.4421, + "step": 6575 + }, + { + "epoch": 1.12, + "learning_rate": 1.4413698164941064e-05, + "loss": 0.4352, + "step": 6576 + }, + { + "epoch": 1.12, + "learning_rate": 1.4412045765783551e-05, + "loss": 0.4109, + "step": 6577 + }, + { + "epoch": 1.12, + "learning_rate": 1.4410393217026317e-05, + "loss": 0.4384, + "step": 6578 + }, + { + "epoch": 1.12, + "learning_rate": 1.4408740518725392e-05, + "loss": 0.4433, + "step": 6579 + }, + { + "epoch": 1.12, + "learning_rate": 1.4407087670936821e-05, + "loss": 0.4458, + "step": 6580 + }, + { + "epoch": 1.12, + "learning_rate": 1.4405434673716639e-05, + "loss": 0.4346, + "step": 6581 + }, + { + "epoch": 1.12, + "learning_rate": 1.4403781527120898e-05, + "loss": 0.37, + "step": 6582 + }, + { + "epoch": 1.12, + "learning_rate": 1.4402128231205651e-05, + "loss": 0.4462, + "step": 6583 + }, + { + "epoch": 1.12, + "learning_rate": 1.4400474786026959e-05, + "loss": 0.4329, + "step": 6584 + }, + { + "epoch": 1.12, + "learning_rate": 1.4398821191640882e-05, + "loss": 0.4263, + "step": 6585 + }, + { + "epoch": 1.12, + "learning_rate": 1.439716744810349e-05, + "loss": 0.4282, + "step": 6586 + }, + { + "epoch": 1.12, + "learning_rate": 1.4395513555470856e-05, + "loss": 0.4591, + "step": 6587 + }, + { + "epoch": 1.12, + "learning_rate": 1.439385951379906e-05, + "loss": 0.4317, + "step": 6588 + }, + { + "epoch": 1.12, + "learning_rate": 1.4392205323144185e-05, + "loss": 0.4151, + "step": 6589 + }, + { + "epoch": 1.12, + "learning_rate": 1.439055098356232e-05, + "loss": 0.4366, + "step": 6590 + }, + { + "epoch": 1.12, + "learning_rate": 1.438889649510956e-05, + "loss": 0.4, + "step": 6591 + }, + { + "epoch": 1.12, + "learning_rate": 1.4387241857842e-05, + "loss": 0.4216, + "step": 6592 + }, + { + "epoch": 1.12, + "learning_rate": 1.4385587071815747e-05, + "loss": 0.4142, + "step": 6593 + }, + { + "epoch": 1.12, + "learning_rate": 1.4383932137086911e-05, + "loss": 0.4457, + "step": 6594 + }, + { + "epoch": 1.12, + "learning_rate": 1.4382277053711606e-05, + "loss": 0.4408, + "step": 6595 + }, + { + "epoch": 1.12, + "learning_rate": 1.4380621821745947e-05, + "loss": 0.4182, + "step": 6596 + }, + { + "epoch": 1.13, + "learning_rate": 1.4378966441246064e-05, + "loss": 0.4178, + "step": 6597 + }, + { + "epoch": 1.13, + "learning_rate": 1.4377310912268084e-05, + "loss": 0.427, + "step": 6598 + }, + { + "epoch": 1.13, + "learning_rate": 1.4375655234868135e-05, + "loss": 0.438, + "step": 6599 + }, + { + "epoch": 1.13, + "learning_rate": 1.4373999409102364e-05, + "loss": 0.4206, + "step": 6600 + }, + { + "epoch": 1.13, + "learning_rate": 1.4372343435026915e-05, + "loss": 0.3957, + "step": 6601 + }, + { + "epoch": 1.13, + "learning_rate": 1.4370687312697932e-05, + "loss": 0.4275, + "step": 6602 + }, + { + "epoch": 1.13, + "learning_rate": 1.4369031042171576e-05, + "loss": 0.4165, + "step": 6603 + }, + { + "epoch": 1.13, + "learning_rate": 1.4367374623504e-05, + "loss": 0.4399, + "step": 6604 + }, + { + "epoch": 1.13, + "learning_rate": 1.4365718056751371e-05, + "loss": 0.4287, + "step": 6605 + }, + { + "epoch": 1.13, + "learning_rate": 1.436406134196986e-05, + "loss": 0.4391, + "step": 6606 + }, + { + "epoch": 1.13, + "learning_rate": 1.436240447921564e-05, + "loss": 0.4526, + "step": 6607 + }, + { + "epoch": 1.13, + "learning_rate": 1.4360747468544889e-05, + "loss": 0.4144, + "step": 6608 + }, + { + "epoch": 1.13, + "learning_rate": 1.4359090310013795e-05, + "loss": 0.4399, + "step": 6609 + }, + { + "epoch": 1.13, + "learning_rate": 1.4357433003678545e-05, + "loss": 0.443, + "step": 6610 + }, + { + "epoch": 1.13, + "learning_rate": 1.4355775549595333e-05, + "loss": 0.4215, + "step": 6611 + }, + { + "epoch": 1.13, + "learning_rate": 1.4354117947820358e-05, + "loss": 0.4451, + "step": 6612 + }, + { + "epoch": 1.13, + "learning_rate": 1.435246019840983e-05, + "loss": 0.4262, + "step": 6613 + }, + { + "epoch": 1.13, + "learning_rate": 1.4350802301419951e-05, + "loss": 0.42, + "step": 6614 + }, + { + "epoch": 1.13, + "learning_rate": 1.4349144256906938e-05, + "loss": 0.4335, + "step": 6615 + }, + { + "epoch": 1.13, + "learning_rate": 1.434748606492701e-05, + "loss": 0.4039, + "step": 6616 + }, + { + "epoch": 1.13, + "learning_rate": 1.4345827725536396e-05, + "loss": 0.4739, + "step": 6617 + }, + { + "epoch": 1.13, + "learning_rate": 1.4344169238791323e-05, + "loss": 0.4217, + "step": 6618 + }, + { + "epoch": 1.13, + "learning_rate": 1.4342510604748024e-05, + "loss": 0.4629, + "step": 6619 + }, + { + "epoch": 1.13, + "learning_rate": 1.4340851823462735e-05, + "loss": 0.424, + "step": 6620 + }, + { + "epoch": 1.13, + "learning_rate": 1.4339192894991708e-05, + "loss": 0.4298, + "step": 6621 + }, + { + "epoch": 1.13, + "learning_rate": 1.4337533819391189e-05, + "loss": 0.4139, + "step": 6622 + }, + { + "epoch": 1.13, + "learning_rate": 1.4335874596717434e-05, + "loss": 0.4442, + "step": 6623 + }, + { + "epoch": 1.13, + "learning_rate": 1.4334215227026697e-05, + "loss": 0.4147, + "step": 6624 + }, + { + "epoch": 1.13, + "learning_rate": 1.4332555710375253e-05, + "loss": 0.4572, + "step": 6625 + }, + { + "epoch": 1.13, + "learning_rate": 1.4330896046819363e-05, + "loss": 0.4063, + "step": 6626 + }, + { + "epoch": 1.13, + "learning_rate": 1.43292362364153e-05, + "loss": 0.466, + "step": 6627 + }, + { + "epoch": 1.13, + "learning_rate": 1.432757627921935e-05, + "loss": 0.4192, + "step": 6628 + }, + { + "epoch": 1.13, + "learning_rate": 1.432591617528779e-05, + "loss": 0.4456, + "step": 6629 + }, + { + "epoch": 1.13, + "learning_rate": 1.4324255924676918e-05, + "loss": 0.4547, + "step": 6630 + }, + { + "epoch": 1.13, + "learning_rate": 1.4322595527443019e-05, + "loss": 0.4305, + "step": 6631 + }, + { + "epoch": 1.13, + "learning_rate": 1.43209349836424e-05, + "loss": 0.425, + "step": 6632 + }, + { + "epoch": 1.13, + "learning_rate": 1.4319274293331363e-05, + "loss": 0.4322, + "step": 6633 + }, + { + "epoch": 1.13, + "learning_rate": 1.4317613456566216e-05, + "loss": 0.4353, + "step": 6634 + }, + { + "epoch": 1.13, + "learning_rate": 1.4315952473403272e-05, + "loss": 0.4204, + "step": 6635 + }, + { + "epoch": 1.13, + "learning_rate": 1.4314291343898851e-05, + "loss": 0.4229, + "step": 6636 + }, + { + "epoch": 1.13, + "learning_rate": 1.4312630068109278e-05, + "loss": 0.4376, + "step": 6637 + }, + { + "epoch": 1.13, + "learning_rate": 1.4310968646090884e-05, + "loss": 0.461, + "step": 6638 + }, + { + "epoch": 1.13, + "learning_rate": 1.4309307077899998e-05, + "loss": 0.4349, + "step": 6639 + }, + { + "epoch": 1.13, + "learning_rate": 1.4307645363592965e-05, + "loss": 0.432, + "step": 6640 + }, + { + "epoch": 1.13, + "learning_rate": 1.4305983503226125e-05, + "loss": 0.4145, + "step": 6641 + }, + { + "epoch": 1.13, + "learning_rate": 1.4304321496855829e-05, + "loss": 0.4369, + "step": 6642 + }, + { + "epoch": 1.13, + "learning_rate": 1.4302659344538428e-05, + "loss": 0.4655, + "step": 6643 + }, + { + "epoch": 1.13, + "learning_rate": 1.430099704633028e-05, + "loss": 0.4102, + "step": 6644 + }, + { + "epoch": 1.13, + "learning_rate": 1.4299334602287753e-05, + "loss": 0.4268, + "step": 6645 + }, + { + "epoch": 1.13, + "learning_rate": 1.4297672012467212e-05, + "loss": 0.4072, + "step": 6646 + }, + { + "epoch": 1.13, + "learning_rate": 1.4296009276925036e-05, + "loss": 0.4383, + "step": 6647 + }, + { + "epoch": 1.13, + "learning_rate": 1.4294346395717598e-05, + "loss": 0.4337, + "step": 6648 + }, + { + "epoch": 1.13, + "learning_rate": 1.4292683368901283e-05, + "loss": 0.4129, + "step": 6649 + }, + { + "epoch": 1.13, + "learning_rate": 1.429102019653248e-05, + "loss": 0.427, + "step": 6650 + }, + { + "epoch": 1.13, + "learning_rate": 1.4289356878667584e-05, + "loss": 0.4118, + "step": 6651 + }, + { + "epoch": 1.13, + "learning_rate": 1.4287693415362988e-05, + "loss": 0.4085, + "step": 6652 + }, + { + "epoch": 1.13, + "learning_rate": 1.4286029806675101e-05, + "loss": 0.4642, + "step": 6653 + }, + { + "epoch": 1.13, + "learning_rate": 1.4284366052660325e-05, + "loss": 0.4055, + "step": 6654 + }, + { + "epoch": 1.13, + "learning_rate": 1.4282702153375083e-05, + "loss": 0.4234, + "step": 6655 + }, + { + "epoch": 1.14, + "learning_rate": 1.4281038108875784e-05, + "loss": 0.3871, + "step": 6656 + }, + { + "epoch": 1.14, + "learning_rate": 1.4279373919218855e-05, + "loss": 0.4247, + "step": 6657 + }, + { + "epoch": 1.14, + "learning_rate": 1.4277709584460721e-05, + "loss": 0.4455, + "step": 6658 + }, + { + "epoch": 1.14, + "learning_rate": 1.4276045104657819e-05, + "loss": 0.4501, + "step": 6659 + }, + { + "epoch": 1.14, + "learning_rate": 1.4274380479866583e-05, + "loss": 0.3968, + "step": 6660 + }, + { + "epoch": 1.14, + "learning_rate": 1.4272715710143454e-05, + "loss": 0.4177, + "step": 6661 + }, + { + "epoch": 1.14, + "learning_rate": 1.4271050795544887e-05, + "loss": 0.4305, + "step": 6662 + }, + { + "epoch": 1.14, + "learning_rate": 1.4269385736127329e-05, + "loss": 0.4421, + "step": 6663 + }, + { + "epoch": 1.14, + "learning_rate": 1.4267720531947235e-05, + "loss": 0.4472, + "step": 6664 + }, + { + "epoch": 1.14, + "learning_rate": 1.4266055183061073e-05, + "loss": 0.4262, + "step": 6665 + }, + { + "epoch": 1.14, + "learning_rate": 1.4264389689525309e-05, + "loss": 0.4374, + "step": 6666 + }, + { + "epoch": 1.14, + "learning_rate": 1.426272405139641e-05, + "loss": 0.3924, + "step": 6667 + }, + { + "epoch": 1.14, + "learning_rate": 1.4261058268730859e-05, + "loss": 0.4272, + "step": 6668 + }, + { + "epoch": 1.14, + "learning_rate": 1.4259392341585134e-05, + "loss": 0.4149, + "step": 6669 + }, + { + "epoch": 1.14, + "learning_rate": 1.4257726270015727e-05, + "loss": 0.4453, + "step": 6670 + }, + { + "epoch": 1.14, + "learning_rate": 1.4256060054079121e-05, + "loss": 0.3945, + "step": 6671 + }, + { + "epoch": 1.14, + "learning_rate": 1.425439369383182e-05, + "loss": 0.4244, + "step": 6672 + }, + { + "epoch": 1.14, + "learning_rate": 1.4252727189330324e-05, + "loss": 0.4014, + "step": 6673 + }, + { + "epoch": 1.14, + "learning_rate": 1.4251060540631139e-05, + "loss": 0.3956, + "step": 6674 + }, + { + "epoch": 1.14, + "learning_rate": 1.4249393747790774e-05, + "loss": 0.4521, + "step": 6675 + }, + { + "epoch": 1.14, + "learning_rate": 1.4247726810865745e-05, + "loss": 0.4247, + "step": 6676 + }, + { + "epoch": 1.14, + "learning_rate": 1.4246059729912579e-05, + "loss": 0.4285, + "step": 6677 + }, + { + "epoch": 1.14, + "learning_rate": 1.4244392504987794e-05, + "loss": 0.437, + "step": 6678 + }, + { + "epoch": 1.14, + "learning_rate": 1.4242725136147926e-05, + "loss": 0.4133, + "step": 6679 + }, + { + "epoch": 1.14, + "learning_rate": 1.4241057623449508e-05, + "loss": 0.4627, + "step": 6680 + }, + { + "epoch": 1.14, + "learning_rate": 1.4239389966949082e-05, + "loss": 0.393, + "step": 6681 + }, + { + "epoch": 1.14, + "learning_rate": 1.4237722166703197e-05, + "loss": 0.4307, + "step": 6682 + }, + { + "epoch": 1.14, + "learning_rate": 1.4236054222768395e-05, + "loss": 0.449, + "step": 6683 + }, + { + "epoch": 1.14, + "learning_rate": 1.4234386135201235e-05, + "loss": 0.4401, + "step": 6684 + }, + { + "epoch": 1.14, + "learning_rate": 1.423271790405828e-05, + "loss": 0.4688, + "step": 6685 + }, + { + "epoch": 1.14, + "learning_rate": 1.423104952939609e-05, + "loss": 0.4277, + "step": 6686 + }, + { + "epoch": 1.14, + "learning_rate": 1.422938101127124e-05, + "loss": 0.4198, + "step": 6687 + }, + { + "epoch": 1.14, + "learning_rate": 1.4227712349740297e-05, + "loss": 0.3969, + "step": 6688 + }, + { + "epoch": 1.14, + "learning_rate": 1.422604354485985e-05, + "loss": 0.4129, + "step": 6689 + }, + { + "epoch": 1.14, + "learning_rate": 1.4224374596686479e-05, + "loss": 0.4494, + "step": 6690 + }, + { + "epoch": 1.14, + "learning_rate": 1.422270550527677e-05, + "loss": 0.4172, + "step": 6691 + }, + { + "epoch": 1.14, + "learning_rate": 1.4221036270687324e-05, + "loss": 0.4148, + "step": 6692 + }, + { + "epoch": 1.14, + "learning_rate": 1.421936689297473e-05, + "loss": 0.4268, + "step": 6693 + }, + { + "epoch": 1.14, + "learning_rate": 1.42176973721956e-05, + "loss": 0.3966, + "step": 6694 + }, + { + "epoch": 1.14, + "learning_rate": 1.4216027708406542e-05, + "loss": 0.4279, + "step": 6695 + }, + { + "epoch": 1.14, + "learning_rate": 1.4214357901664166e-05, + "loss": 0.4284, + "step": 6696 + }, + { + "epoch": 1.14, + "learning_rate": 1.4212687952025093e-05, + "loss": 0.42, + "step": 6697 + }, + { + "epoch": 1.14, + "learning_rate": 1.4211017859545946e-05, + "loss": 0.4453, + "step": 6698 + }, + { + "epoch": 1.14, + "learning_rate": 1.4209347624283352e-05, + "loss": 0.4228, + "step": 6699 + }, + { + "epoch": 1.14, + "learning_rate": 1.4207677246293943e-05, + "loss": 0.4397, + "step": 6700 + }, + { + "epoch": 1.14, + "learning_rate": 1.4206006725634358e-05, + "loss": 0.4568, + "step": 6701 + }, + { + "epoch": 1.14, + "learning_rate": 1.4204336062361238e-05, + "loss": 0.4446, + "step": 6702 + }, + { + "epoch": 1.14, + "learning_rate": 1.4202665256531233e-05, + "loss": 0.4752, + "step": 6703 + }, + { + "epoch": 1.14, + "learning_rate": 1.4200994308200995e-05, + "loss": 0.4123, + "step": 6704 + }, + { + "epoch": 1.14, + "learning_rate": 1.4199323217427179e-05, + "loss": 0.4268, + "step": 6705 + }, + { + "epoch": 1.14, + "learning_rate": 1.4197651984266448e-05, + "loss": 0.4258, + "step": 6706 + }, + { + "epoch": 1.14, + "learning_rate": 1.4195980608775469e-05, + "loss": 0.4032, + "step": 6707 + }, + { + "epoch": 1.14, + "learning_rate": 1.4194309091010912e-05, + "loss": 0.4411, + "step": 6708 + }, + { + "epoch": 1.14, + "learning_rate": 1.4192637431029452e-05, + "loss": 0.42, + "step": 6709 + }, + { + "epoch": 1.14, + "learning_rate": 1.4190965628887775e-05, + "loss": 0.4246, + "step": 6710 + }, + { + "epoch": 1.14, + "learning_rate": 1.4189293684642564e-05, + "loss": 0.4274, + "step": 6711 + }, + { + "epoch": 1.14, + "learning_rate": 1.418762159835051e-05, + "loss": 0.4409, + "step": 6712 + }, + { + "epoch": 1.14, + "learning_rate": 1.418594937006831e-05, + "loss": 0.4354, + "step": 6713 + }, + { + "epoch": 1.15, + "learning_rate": 1.4184276999852661e-05, + "loss": 0.4051, + "step": 6714 + }, + { + "epoch": 1.15, + "learning_rate": 1.4182604487760275e-05, + "loss": 0.3952, + "step": 6715 + }, + { + "epoch": 1.15, + "learning_rate": 1.4180931833847853e-05, + "loss": 0.4494, + "step": 6716 + }, + { + "epoch": 1.15, + "learning_rate": 1.4179259038172115e-05, + "loss": 0.4303, + "step": 6717 + }, + { + "epoch": 1.15, + "learning_rate": 1.417758610078978e-05, + "loss": 0.4189, + "step": 6718 + }, + { + "epoch": 1.15, + "learning_rate": 1.4175913021757574e-05, + "loss": 0.4517, + "step": 6719 + }, + { + "epoch": 1.15, + "learning_rate": 1.4174239801132224e-05, + "loss": 0.4275, + "step": 6720 + }, + { + "epoch": 1.15, + "learning_rate": 1.4172566438970465e-05, + "loss": 0.4315, + "step": 6721 + }, + { + "epoch": 1.15, + "learning_rate": 1.4170892935329039e-05, + "loss": 0.418, + "step": 6722 + }, + { + "epoch": 1.15, + "learning_rate": 1.4169219290264683e-05, + "loss": 0.3845, + "step": 6723 + }, + { + "epoch": 1.15, + "learning_rate": 1.4167545503834148e-05, + "loss": 0.4068, + "step": 6724 + }, + { + "epoch": 1.15, + "learning_rate": 1.4165871576094187e-05, + "loss": 0.4204, + "step": 6725 + }, + { + "epoch": 1.15, + "learning_rate": 1.4164197507101565e-05, + "loss": 0.4496, + "step": 6726 + }, + { + "epoch": 1.15, + "learning_rate": 1.4162523296913036e-05, + "loss": 0.4344, + "step": 6727 + }, + { + "epoch": 1.15, + "learning_rate": 1.4160848945585368e-05, + "loss": 0.4278, + "step": 6728 + }, + { + "epoch": 1.15, + "learning_rate": 1.4159174453175341e-05, + "loss": 0.4351, + "step": 6729 + }, + { + "epoch": 1.15, + "learning_rate": 1.4157499819739726e-05, + "loss": 0.4024, + "step": 6730 + }, + { + "epoch": 1.15, + "learning_rate": 1.4155825045335305e-05, + "loss": 0.4135, + "step": 6731 + }, + { + "epoch": 1.15, + "learning_rate": 1.4154150130018867e-05, + "loss": 0.4181, + "step": 6732 + }, + { + "epoch": 1.15, + "learning_rate": 1.41524750738472e-05, + "loss": 0.4423, + "step": 6733 + }, + { + "epoch": 1.15, + "learning_rate": 1.4150799876877105e-05, + "loss": 0.4523, + "step": 6734 + }, + { + "epoch": 1.15, + "learning_rate": 1.4149124539165382e-05, + "loss": 0.4276, + "step": 6735 + }, + { + "epoch": 1.15, + "learning_rate": 1.4147449060768834e-05, + "loss": 0.4363, + "step": 6736 + }, + { + "epoch": 1.15, + "learning_rate": 1.4145773441744274e-05, + "loss": 0.3957, + "step": 6737 + }, + { + "epoch": 1.15, + "learning_rate": 1.4144097682148518e-05, + "loss": 0.4223, + "step": 6738 + }, + { + "epoch": 1.15, + "learning_rate": 1.4142421782038384e-05, + "loss": 0.4483, + "step": 6739 + }, + { + "epoch": 1.15, + "learning_rate": 1.4140745741470699e-05, + "loss": 0.4376, + "step": 6740 + }, + { + "epoch": 1.15, + "learning_rate": 1.4139069560502293e-05, + "loss": 0.4422, + "step": 6741 + }, + { + "epoch": 1.15, + "learning_rate": 1.4137393239189996e-05, + "loss": 0.3923, + "step": 6742 + }, + { + "epoch": 1.15, + "learning_rate": 1.4135716777590652e-05, + "loss": 0.3981, + "step": 6743 + }, + { + "epoch": 1.15, + "learning_rate": 1.4134040175761098e-05, + "loss": 0.4309, + "step": 6744 + }, + { + "epoch": 1.15, + "learning_rate": 1.4132363433758196e-05, + "loss": 0.3917, + "step": 6745 + }, + { + "epoch": 1.15, + "learning_rate": 1.4130686551638791e-05, + "loss": 0.4478, + "step": 6746 + }, + { + "epoch": 1.15, + "learning_rate": 1.4129009529459735e-05, + "loss": 0.4537, + "step": 6747 + }, + { + "epoch": 1.15, + "learning_rate": 1.4127332367277904e-05, + "loss": 0.4421, + "step": 6748 + }, + { + "epoch": 1.15, + "learning_rate": 1.4125655065150157e-05, + "loss": 0.4453, + "step": 6749 + }, + { + "epoch": 1.15, + "learning_rate": 1.4123977623133368e-05, + "loss": 0.4561, + "step": 6750 + }, + { + "epoch": 1.15, + "learning_rate": 1.4122300041284415e-05, + "loss": 0.4408, + "step": 6751 + }, + { + "epoch": 1.15, + "learning_rate": 1.4120622319660184e-05, + "loss": 0.4504, + "step": 6752 + }, + { + "epoch": 1.15, + "learning_rate": 1.4118944458317554e-05, + "loss": 0.4214, + "step": 6753 + }, + { + "epoch": 1.15, + "learning_rate": 1.4117266457313423e-05, + "loss": 0.4392, + "step": 6754 + }, + { + "epoch": 1.15, + "learning_rate": 1.4115588316704682e-05, + "loss": 0.4222, + "step": 6755 + }, + { + "epoch": 1.15, + "learning_rate": 1.4113910036548238e-05, + "loss": 0.4376, + "step": 6756 + }, + { + "epoch": 1.15, + "learning_rate": 1.4112231616900987e-05, + "loss": 0.4097, + "step": 6757 + }, + { + "epoch": 1.15, + "learning_rate": 1.4110553057819846e-05, + "loss": 0.4365, + "step": 6758 + }, + { + "epoch": 1.15, + "learning_rate": 1.4108874359361733e-05, + "loss": 0.3924, + "step": 6759 + }, + { + "epoch": 1.15, + "learning_rate": 1.4107195521583564e-05, + "loss": 0.4219, + "step": 6760 + }, + { + "epoch": 1.15, + "learning_rate": 1.4105516544542263e-05, + "loss": 0.4542, + "step": 6761 + }, + { + "epoch": 1.15, + "learning_rate": 1.4103837428294758e-05, + "loss": 0.427, + "step": 6762 + }, + { + "epoch": 1.15, + "learning_rate": 1.4102158172897987e-05, + "loss": 0.4479, + "step": 6763 + }, + { + "epoch": 1.15, + "learning_rate": 1.4100478778408885e-05, + "loss": 0.4568, + "step": 6764 + }, + { + "epoch": 1.15, + "learning_rate": 1.4098799244884398e-05, + "loss": 0.4296, + "step": 6765 + }, + { + "epoch": 1.15, + "learning_rate": 1.4097119572381472e-05, + "loss": 0.4007, + "step": 6766 + }, + { + "epoch": 1.15, + "learning_rate": 1.4095439760957062e-05, + "loss": 0.4303, + "step": 6767 + }, + { + "epoch": 1.15, + "learning_rate": 1.4093759810668124e-05, + "loss": 0.4303, + "step": 6768 + }, + { + "epoch": 1.15, + "learning_rate": 1.4092079721571622e-05, + "loss": 0.4331, + "step": 6769 + }, + { + "epoch": 1.15, + "learning_rate": 1.4090399493724523e-05, + "loss": 0.4128, + "step": 6770 + }, + { + "epoch": 1.15, + "learning_rate": 1.4088719127183796e-05, + "loss": 0.4346, + "step": 6771 + }, + { + "epoch": 1.15, + "learning_rate": 1.4087038622006417e-05, + "loss": 0.4045, + "step": 6772 + }, + { + "epoch": 1.16, + "learning_rate": 1.408535797824937e-05, + "loss": 0.3822, + "step": 6773 + }, + { + "epoch": 1.16, + "learning_rate": 1.4083677195969638e-05, + "loss": 0.4415, + "step": 6774 + }, + { + "epoch": 1.16, + "learning_rate": 1.4081996275224217e-05, + "loss": 0.4396, + "step": 6775 + }, + { + "epoch": 1.16, + "learning_rate": 1.4080315216070094e-05, + "loss": 0.4657, + "step": 6776 + }, + { + "epoch": 1.16, + "learning_rate": 1.407863401856428e-05, + "loss": 0.4337, + "step": 6777 + }, + { + "epoch": 1.16, + "learning_rate": 1.4076952682763766e-05, + "loss": 0.4075, + "step": 6778 + }, + { + "epoch": 1.16, + "learning_rate": 1.4075271208725572e-05, + "loss": 0.4139, + "step": 6779 + }, + { + "epoch": 1.16, + "learning_rate": 1.4073589596506705e-05, + "loss": 0.4159, + "step": 6780 + }, + { + "epoch": 1.16, + "learning_rate": 1.4071907846164189e-05, + "loss": 0.4496, + "step": 6781 + }, + { + "epoch": 1.16, + "learning_rate": 1.4070225957755042e-05, + "loss": 0.4094, + "step": 6782 + }, + { + "epoch": 1.16, + "learning_rate": 1.4068543931336299e-05, + "loss": 0.4397, + "step": 6783 + }, + { + "epoch": 1.16, + "learning_rate": 1.4066861766964984e-05, + "loss": 0.4814, + "step": 6784 + }, + { + "epoch": 1.16, + "learning_rate": 1.406517946469814e-05, + "loss": 0.4268, + "step": 6785 + }, + { + "epoch": 1.16, + "learning_rate": 1.4063497024592809e-05, + "loss": 0.4448, + "step": 6786 + }, + { + "epoch": 1.16, + "learning_rate": 1.4061814446706037e-05, + "loss": 0.4467, + "step": 6787 + }, + { + "epoch": 1.16, + "learning_rate": 1.406013173109487e-05, + "loss": 0.4261, + "step": 6788 + }, + { + "epoch": 1.16, + "learning_rate": 1.4058448877816373e-05, + "loss": 0.3964, + "step": 6789 + }, + { + "epoch": 1.16, + "learning_rate": 1.4056765886927604e-05, + "loss": 0.4551, + "step": 6790 + }, + { + "epoch": 1.16, + "learning_rate": 1.4055082758485623e-05, + "loss": 0.412, + "step": 6791 + }, + { + "epoch": 1.16, + "learning_rate": 1.4053399492547506e-05, + "loss": 0.4385, + "step": 6792 + }, + { + "epoch": 1.16, + "learning_rate": 1.4051716089170325e-05, + "loss": 0.398, + "step": 6793 + }, + { + "epoch": 1.16, + "learning_rate": 1.4050032548411162e-05, + "loss": 0.4173, + "step": 6794 + }, + { + "epoch": 1.16, + "learning_rate": 1.4048348870327096e-05, + "loss": 0.4449, + "step": 6795 + }, + { + "epoch": 1.16, + "learning_rate": 1.404666505497522e-05, + "loss": 0.4491, + "step": 6796 + }, + { + "epoch": 1.16, + "learning_rate": 1.4044981102412622e-05, + "loss": 0.4341, + "step": 6797 + }, + { + "epoch": 1.16, + "learning_rate": 1.4043297012696409e-05, + "loss": 0.456, + "step": 6798 + }, + { + "epoch": 1.16, + "learning_rate": 1.4041612785883676e-05, + "loss": 0.4162, + "step": 6799 + }, + { + "epoch": 1.16, + "learning_rate": 1.4039928422031534e-05, + "loss": 0.3907, + "step": 6800 + }, + { + "epoch": 1.16, + "learning_rate": 1.4038243921197094e-05, + "loss": 0.4145, + "step": 6801 + }, + { + "epoch": 1.16, + "learning_rate": 1.4036559283437472e-05, + "loss": 0.4003, + "step": 6802 + }, + { + "epoch": 1.16, + "learning_rate": 1.4034874508809788e-05, + "loss": 0.4506, + "step": 6803 + }, + { + "epoch": 1.16, + "learning_rate": 1.4033189597371168e-05, + "loss": 0.4295, + "step": 6804 + }, + { + "epoch": 1.16, + "learning_rate": 1.4031504549178747e-05, + "loss": 0.4518, + "step": 6805 + }, + { + "epoch": 1.16, + "learning_rate": 1.4029819364289656e-05, + "loss": 0.4317, + "step": 6806 + }, + { + "epoch": 1.16, + "learning_rate": 1.4028134042761035e-05, + "loss": 0.4113, + "step": 6807 + }, + { + "epoch": 1.16, + "learning_rate": 1.402644858465003e-05, + "loss": 0.4093, + "step": 6808 + }, + { + "epoch": 1.16, + "learning_rate": 1.4024762990013789e-05, + "loss": 0.443, + "step": 6809 + }, + { + "epoch": 1.16, + "learning_rate": 1.4023077258909463e-05, + "loss": 0.4392, + "step": 6810 + }, + { + "epoch": 1.16, + "learning_rate": 1.4021391391394214e-05, + "loss": 0.3986, + "step": 6811 + }, + { + "epoch": 1.16, + "learning_rate": 1.4019705387525204e-05, + "loss": 0.4401, + "step": 6812 + }, + { + "epoch": 1.16, + "learning_rate": 1.4018019247359601e-05, + "loss": 0.4328, + "step": 6813 + }, + { + "epoch": 1.16, + "learning_rate": 1.4016332970954576e-05, + "loss": 0.4437, + "step": 6814 + }, + { + "epoch": 1.16, + "learning_rate": 1.4014646558367306e-05, + "loss": 0.4218, + "step": 6815 + }, + { + "epoch": 1.16, + "learning_rate": 1.4012960009654975e-05, + "loss": 0.4309, + "step": 6816 + }, + { + "epoch": 1.16, + "learning_rate": 1.4011273324874766e-05, + "loss": 0.409, + "step": 6817 + }, + { + "epoch": 1.16, + "learning_rate": 1.4009586504083868e-05, + "loss": 0.4051, + "step": 6818 + }, + { + "epoch": 1.16, + "learning_rate": 1.4007899547339476e-05, + "loss": 0.4154, + "step": 6819 + }, + { + "epoch": 1.16, + "learning_rate": 1.4006212454698798e-05, + "loss": 0.4424, + "step": 6820 + }, + { + "epoch": 1.16, + "learning_rate": 1.4004525226219028e-05, + "loss": 0.4129, + "step": 6821 + }, + { + "epoch": 1.16, + "learning_rate": 1.4002837861957384e-05, + "loss": 0.4338, + "step": 6822 + }, + { + "epoch": 1.16, + "learning_rate": 1.400115036197107e-05, + "loss": 0.4149, + "step": 6823 + }, + { + "epoch": 1.16, + "learning_rate": 1.3999462726317315e-05, + "loss": 0.4243, + "step": 6824 + }, + { + "epoch": 1.16, + "learning_rate": 1.3997774955053337e-05, + "loss": 0.4336, + "step": 6825 + }, + { + "epoch": 1.16, + "learning_rate": 1.3996087048236357e-05, + "loss": 0.4153, + "step": 6826 + }, + { + "epoch": 1.16, + "learning_rate": 1.3994399005923616e-05, + "loss": 0.4102, + "step": 6827 + }, + { + "epoch": 1.16, + "learning_rate": 1.3992710828172347e-05, + "loss": 0.4309, + "step": 6828 + }, + { + "epoch": 1.16, + "learning_rate": 1.3991022515039792e-05, + "loss": 0.4296, + "step": 6829 + }, + { + "epoch": 1.16, + "learning_rate": 1.3989334066583196e-05, + "loss": 0.4328, + "step": 6830 + }, + { + "epoch": 1.17, + "learning_rate": 1.3987645482859813e-05, + "loss": 0.4351, + "step": 6831 + }, + { + "epoch": 1.17, + "learning_rate": 1.3985956763926893e-05, + "loss": 0.4217, + "step": 6832 + }, + { + "epoch": 1.17, + "learning_rate": 1.3984267909841698e-05, + "loss": 0.4182, + "step": 6833 + }, + { + "epoch": 1.17, + "learning_rate": 1.3982578920661492e-05, + "loss": 0.4518, + "step": 6834 + }, + { + "epoch": 1.17, + "learning_rate": 1.3980889796443542e-05, + "loss": 0.4005, + "step": 6835 + }, + { + "epoch": 1.17, + "learning_rate": 1.3979200537245126e-05, + "loss": 0.4178, + "step": 6836 + }, + { + "epoch": 1.17, + "learning_rate": 1.3977511143123514e-05, + "loss": 0.427, + "step": 6837 + }, + { + "epoch": 1.17, + "learning_rate": 1.3975821614135994e-05, + "loss": 0.425, + "step": 6838 + }, + { + "epoch": 1.17, + "learning_rate": 1.3974131950339856e-05, + "loss": 0.4136, + "step": 6839 + }, + { + "epoch": 1.17, + "learning_rate": 1.3972442151792385e-05, + "loss": 0.466, + "step": 6840 + }, + { + "epoch": 1.17, + "learning_rate": 1.3970752218550883e-05, + "loss": 0.4336, + "step": 6841 + }, + { + "epoch": 1.17, + "learning_rate": 1.3969062150672646e-05, + "loss": 0.4208, + "step": 6842 + }, + { + "epoch": 1.17, + "learning_rate": 1.396737194821498e-05, + "loss": 0.4609, + "step": 6843 + }, + { + "epoch": 1.17, + "learning_rate": 1.3965681611235198e-05, + "loss": 0.4071, + "step": 6844 + }, + { + "epoch": 1.17, + "learning_rate": 1.3963991139790608e-05, + "loss": 0.3959, + "step": 6845 + }, + { + "epoch": 1.17, + "learning_rate": 1.396230053393854e-05, + "loss": 0.4473, + "step": 6846 + }, + { + "epoch": 1.17, + "learning_rate": 1.3960609793736305e-05, + "loss": 0.39, + "step": 6847 + }, + { + "epoch": 1.17, + "learning_rate": 1.395891891924124e-05, + "loss": 0.4111, + "step": 6848 + }, + { + "epoch": 1.17, + "learning_rate": 1.3957227910510675e-05, + "loss": 0.438, + "step": 6849 + }, + { + "epoch": 1.17, + "learning_rate": 1.3955536767601949e-05, + "loss": 0.4441, + "step": 6850 + }, + { + "epoch": 1.17, + "learning_rate": 1.3953845490572398e-05, + "loss": 0.4397, + "step": 6851 + }, + { + "epoch": 1.17, + "learning_rate": 1.3952154079479372e-05, + "loss": 0.4432, + "step": 6852 + }, + { + "epoch": 1.17, + "learning_rate": 1.3950462534380224e-05, + "loss": 0.4597, + "step": 6853 + }, + { + "epoch": 1.17, + "learning_rate": 1.3948770855332307e-05, + "loss": 0.4435, + "step": 6854 + }, + { + "epoch": 1.17, + "learning_rate": 1.394707904239298e-05, + "loss": 0.4058, + "step": 6855 + }, + { + "epoch": 1.17, + "learning_rate": 1.3945387095619611e-05, + "loss": 0.4124, + "step": 6856 + }, + { + "epoch": 1.17, + "learning_rate": 1.3943695015069568e-05, + "loss": 0.432, + "step": 6857 + }, + { + "epoch": 1.17, + "learning_rate": 1.394200280080022e-05, + "loss": 0.3985, + "step": 6858 + }, + { + "epoch": 1.17, + "learning_rate": 1.394031045286895e-05, + "loss": 0.4297, + "step": 6859 + }, + { + "epoch": 1.17, + "learning_rate": 1.3938617971333137e-05, + "loss": 0.4061, + "step": 6860 + }, + { + "epoch": 1.17, + "learning_rate": 1.3936925356250172e-05, + "loss": 0.4387, + "step": 6861 + }, + { + "epoch": 1.17, + "learning_rate": 1.3935232607677446e-05, + "loss": 0.3914, + "step": 6862 + }, + { + "epoch": 1.17, + "learning_rate": 1.3933539725672352e-05, + "loss": 0.4331, + "step": 6863 + }, + { + "epoch": 1.17, + "learning_rate": 1.3931846710292295e-05, + "loss": 0.415, + "step": 6864 + }, + { + "epoch": 1.17, + "learning_rate": 1.393015356159468e-05, + "loss": 0.4399, + "step": 6865 + }, + { + "epoch": 1.17, + "learning_rate": 1.392846027963691e-05, + "loss": 0.4502, + "step": 6866 + }, + { + "epoch": 1.17, + "learning_rate": 1.3926766864476407e-05, + "loss": 0.4083, + "step": 6867 + }, + { + "epoch": 1.17, + "learning_rate": 1.3925073316170585e-05, + "loss": 0.3892, + "step": 6868 + }, + { + "epoch": 1.17, + "learning_rate": 1.3923379634776872e-05, + "loss": 0.4622, + "step": 6869 + }, + { + "epoch": 1.17, + "learning_rate": 1.3921685820352693e-05, + "loss": 0.4237, + "step": 6870 + }, + { + "epoch": 1.17, + "learning_rate": 1.3919991872955478e-05, + "loss": 0.4313, + "step": 6871 + }, + { + "epoch": 1.17, + "learning_rate": 1.391829779264267e-05, + "loss": 0.4418, + "step": 6872 + }, + { + "epoch": 1.17, + "learning_rate": 1.3916603579471705e-05, + "loss": 0.4144, + "step": 6873 + }, + { + "epoch": 1.17, + "learning_rate": 1.3914909233500028e-05, + "loss": 0.3927, + "step": 6874 + }, + { + "epoch": 1.17, + "learning_rate": 1.3913214754785095e-05, + "loss": 0.4266, + "step": 6875 + }, + { + "epoch": 1.17, + "learning_rate": 1.3911520143384359e-05, + "loss": 0.422, + "step": 6876 + }, + { + "epoch": 1.17, + "learning_rate": 1.3909825399355278e-05, + "loss": 0.4388, + "step": 6877 + }, + { + "epoch": 1.17, + "learning_rate": 1.3908130522755311e-05, + "loss": 0.4409, + "step": 6878 + }, + { + "epoch": 1.17, + "learning_rate": 1.3906435513641937e-05, + "loss": 0.4324, + "step": 6879 + }, + { + "epoch": 1.17, + "learning_rate": 1.3904740372072622e-05, + "loss": 0.4086, + "step": 6880 + }, + { + "epoch": 1.17, + "learning_rate": 1.3903045098104845e-05, + "loss": 0.4297, + "step": 6881 + }, + { + "epoch": 1.17, + "learning_rate": 1.3901349691796088e-05, + "loss": 0.4127, + "step": 6882 + }, + { + "epoch": 1.17, + "learning_rate": 1.3899654153203836e-05, + "loss": 0.4361, + "step": 6883 + }, + { + "epoch": 1.17, + "learning_rate": 1.3897958482385584e-05, + "loss": 0.4242, + "step": 6884 + }, + { + "epoch": 1.17, + "learning_rate": 1.3896262679398817e-05, + "loss": 0.405, + "step": 6885 + }, + { + "epoch": 1.17, + "learning_rate": 1.3894566744301049e-05, + "loss": 0.4928, + "step": 6886 + }, + { + "epoch": 1.17, + "learning_rate": 1.3892870677149773e-05, + "loss": 0.4204, + "step": 6887 + }, + { + "epoch": 1.17, + "learning_rate": 1.3891174478002502e-05, + "loss": 0.4018, + "step": 6888 + }, + { + "epoch": 1.17, + "learning_rate": 1.3889478146916751e-05, + "loss": 0.4432, + "step": 6889 + }, + { + "epoch": 1.18, + "learning_rate": 1.3887781683950034e-05, + "loss": 0.4132, + "step": 6890 + }, + { + "epoch": 1.18, + "learning_rate": 1.3886085089159875e-05, + "loss": 0.4338, + "step": 6891 + }, + { + "epoch": 1.18, + "learning_rate": 1.3884388362603803e-05, + "loss": 0.4009, + "step": 6892 + }, + { + "epoch": 1.18, + "learning_rate": 1.3882691504339343e-05, + "loss": 0.4414, + "step": 6893 + }, + { + "epoch": 1.18, + "learning_rate": 1.3880994514424036e-05, + "loss": 0.4004, + "step": 6894 + }, + { + "epoch": 1.18, + "learning_rate": 1.387929739291542e-05, + "loss": 0.4474, + "step": 6895 + }, + { + "epoch": 1.18, + "learning_rate": 1.3877600139871038e-05, + "loss": 0.4312, + "step": 6896 + }, + { + "epoch": 1.18, + "learning_rate": 1.3875902755348443e-05, + "loss": 0.4366, + "step": 6897 + }, + { + "epoch": 1.18, + "learning_rate": 1.3874205239405186e-05, + "loss": 0.4659, + "step": 6898 + }, + { + "epoch": 1.18, + "learning_rate": 1.3872507592098824e-05, + "loss": 0.4192, + "step": 6899 + }, + { + "epoch": 1.18, + "learning_rate": 1.3870809813486918e-05, + "loss": 0.3827, + "step": 6900 + }, + { + "epoch": 1.18, + "learning_rate": 1.386911190362704e-05, + "loss": 0.4458, + "step": 6901 + }, + { + "epoch": 1.18, + "learning_rate": 1.3867413862576755e-05, + "loss": 0.4412, + "step": 6902 + }, + { + "epoch": 1.18, + "learning_rate": 1.3865715690393646e-05, + "loss": 0.4689, + "step": 6903 + }, + { + "epoch": 1.18, + "learning_rate": 1.3864017387135287e-05, + "loss": 0.4055, + "step": 6904 + }, + { + "epoch": 1.18, + "learning_rate": 1.3862318952859265e-05, + "loss": 0.4053, + "step": 6905 + }, + { + "epoch": 1.18, + "learning_rate": 1.3860620387623167e-05, + "loss": 0.45, + "step": 6906 + }, + { + "epoch": 1.18, + "learning_rate": 1.385892169148459e-05, + "loss": 0.4227, + "step": 6907 + }, + { + "epoch": 1.18, + "learning_rate": 1.385722286450113e-05, + "loss": 0.419, + "step": 6908 + }, + { + "epoch": 1.18, + "learning_rate": 1.3855523906730385e-05, + "loss": 0.4163, + "step": 6909 + }, + { + "epoch": 1.18, + "learning_rate": 1.3853824818229971e-05, + "loss": 0.4074, + "step": 6910 + }, + { + "epoch": 1.18, + "learning_rate": 1.3852125599057492e-05, + "loss": 0.394, + "step": 6911 + }, + { + "epoch": 1.18, + "learning_rate": 1.3850426249270566e-05, + "loss": 0.3796, + "step": 6912 + }, + { + "epoch": 1.18, + "learning_rate": 1.3848726768926812e-05, + "loss": 0.3916, + "step": 6913 + }, + { + "epoch": 1.18, + "learning_rate": 1.3847027158083856e-05, + "loss": 0.4129, + "step": 6914 + }, + { + "epoch": 1.18, + "learning_rate": 1.3845327416799326e-05, + "loss": 0.435, + "step": 6915 + }, + { + "epoch": 1.18, + "learning_rate": 1.3843627545130853e-05, + "loss": 0.3965, + "step": 6916 + }, + { + "epoch": 1.18, + "learning_rate": 1.3841927543136078e-05, + "loss": 0.4304, + "step": 6917 + }, + { + "epoch": 1.18, + "learning_rate": 1.3840227410872644e-05, + "loss": 0.4345, + "step": 6918 + }, + { + "epoch": 1.18, + "learning_rate": 1.3838527148398196e-05, + "loss": 0.4339, + "step": 6919 + }, + { + "epoch": 1.18, + "learning_rate": 1.3836826755770386e-05, + "loss": 0.44, + "step": 6920 + }, + { + "epoch": 1.18, + "learning_rate": 1.3835126233046863e-05, + "loss": 0.381, + "step": 6921 + }, + { + "epoch": 1.18, + "learning_rate": 1.3833425580285298e-05, + "loss": 0.4181, + "step": 6922 + }, + { + "epoch": 1.18, + "learning_rate": 1.3831724797543344e-05, + "loss": 0.4105, + "step": 6923 + }, + { + "epoch": 1.18, + "learning_rate": 1.3830023884878678e-05, + "loss": 0.4134, + "step": 6924 + }, + { + "epoch": 1.18, + "learning_rate": 1.3828322842348972e-05, + "loss": 0.413, + "step": 6925 + }, + { + "epoch": 1.18, + "learning_rate": 1.3826621670011897e-05, + "loss": 0.4453, + "step": 6926 + }, + { + "epoch": 1.18, + "learning_rate": 1.3824920367925139e-05, + "loss": 0.4534, + "step": 6927 + }, + { + "epoch": 1.18, + "learning_rate": 1.3823218936146388e-05, + "loss": 0.4667, + "step": 6928 + }, + { + "epoch": 1.18, + "learning_rate": 1.382151737473333e-05, + "loss": 0.4854, + "step": 6929 + }, + { + "epoch": 1.18, + "learning_rate": 1.381981568374366e-05, + "loss": 0.4226, + "step": 6930 + }, + { + "epoch": 1.18, + "learning_rate": 1.381811386323508e-05, + "loss": 0.4314, + "step": 6931 + }, + { + "epoch": 1.18, + "learning_rate": 1.3816411913265288e-05, + "loss": 0.4433, + "step": 6932 + }, + { + "epoch": 1.18, + "learning_rate": 1.3814709833892002e-05, + "loss": 0.4258, + "step": 6933 + }, + { + "epoch": 1.18, + "learning_rate": 1.3813007625172927e-05, + "loss": 0.4227, + "step": 6934 + }, + { + "epoch": 1.18, + "learning_rate": 1.3811305287165782e-05, + "loss": 0.4089, + "step": 6935 + }, + { + "epoch": 1.18, + "learning_rate": 1.3809602819928289e-05, + "loss": 0.4333, + "step": 6936 + }, + { + "epoch": 1.18, + "learning_rate": 1.3807900223518173e-05, + "loss": 0.3825, + "step": 6937 + }, + { + "epoch": 1.18, + "learning_rate": 1.3806197497993165e-05, + "loss": 0.4165, + "step": 6938 + }, + { + "epoch": 1.18, + "learning_rate": 1.3804494643410997e-05, + "loss": 0.4336, + "step": 6939 + }, + { + "epoch": 1.18, + "learning_rate": 1.3802791659829413e-05, + "loss": 0.431, + "step": 6940 + }, + { + "epoch": 1.18, + "learning_rate": 1.3801088547306149e-05, + "loss": 0.437, + "step": 6941 + }, + { + "epoch": 1.18, + "learning_rate": 1.3799385305898959e-05, + "loss": 0.3898, + "step": 6942 + }, + { + "epoch": 1.18, + "learning_rate": 1.379768193566559e-05, + "loss": 0.4444, + "step": 6943 + }, + { + "epoch": 1.18, + "learning_rate": 1.3795978436663803e-05, + "loss": 0.4441, + "step": 6944 + }, + { + "epoch": 1.18, + "learning_rate": 1.379427480895136e-05, + "loss": 0.4096, + "step": 6945 + }, + { + "epoch": 1.18, + "learning_rate": 1.379257105258602e-05, + "loss": 0.4154, + "step": 6946 + }, + { + "epoch": 1.18, + "learning_rate": 1.3790867167625552e-05, + "loss": 0.4388, + "step": 6947 + }, + { + "epoch": 1.18, + "learning_rate": 1.3789163154127739e-05, + "loss": 0.4139, + "step": 6948 + }, + { + "epoch": 1.19, + "learning_rate": 1.3787459012150348e-05, + "loss": 0.4428, + "step": 6949 + }, + { + "epoch": 1.19, + "learning_rate": 1.3785754741751167e-05, + "loss": 0.4215, + "step": 6950 + }, + { + "epoch": 1.19, + "learning_rate": 1.3784050342987985e-05, + "loss": 0.4324, + "step": 6951 + }, + { + "epoch": 1.19, + "learning_rate": 1.3782345815918591e-05, + "loss": 0.4226, + "step": 6952 + }, + { + "epoch": 1.19, + "learning_rate": 1.3780641160600781e-05, + "loss": 0.3808, + "step": 6953 + }, + { + "epoch": 1.19, + "learning_rate": 1.377893637709235e-05, + "loss": 0.4337, + "step": 6954 + }, + { + "epoch": 1.19, + "learning_rate": 1.3777231465451114e-05, + "loss": 0.4225, + "step": 6955 + }, + { + "epoch": 1.19, + "learning_rate": 1.3775526425734868e-05, + "loss": 0.4229, + "step": 6956 + }, + { + "epoch": 1.19, + "learning_rate": 1.3773821258001434e-05, + "loss": 0.4246, + "step": 6957 + }, + { + "epoch": 1.19, + "learning_rate": 1.3772115962308625e-05, + "loss": 0.4473, + "step": 6958 + }, + { + "epoch": 1.19, + "learning_rate": 1.3770410538714268e-05, + "loss": 0.4245, + "step": 6959 + }, + { + "epoch": 1.19, + "learning_rate": 1.376870498727618e-05, + "loss": 0.4253, + "step": 6960 + }, + { + "epoch": 1.19, + "learning_rate": 1.37669993080522e-05, + "loss": 0.3941, + "step": 6961 + }, + { + "epoch": 1.19, + "learning_rate": 1.376529350110016e-05, + "loss": 0.4559, + "step": 6962 + }, + { + "epoch": 1.19, + "learning_rate": 1.3763587566477896e-05, + "loss": 0.4464, + "step": 6963 + }, + { + "epoch": 1.19, + "learning_rate": 1.3761881504243253e-05, + "loss": 0.4591, + "step": 6964 + }, + { + "epoch": 1.19, + "learning_rate": 1.3760175314454079e-05, + "loss": 0.4089, + "step": 6965 + }, + { + "epoch": 1.19, + "learning_rate": 1.3758468997168226e-05, + "loss": 0.4481, + "step": 6966 + }, + { + "epoch": 1.19, + "learning_rate": 1.3756762552443555e-05, + "loss": 0.4231, + "step": 6967 + }, + { + "epoch": 1.19, + "learning_rate": 1.3755055980337919e-05, + "loss": 0.4488, + "step": 6968 + }, + { + "epoch": 1.19, + "learning_rate": 1.3753349280909185e-05, + "loss": 0.4454, + "step": 6969 + }, + { + "epoch": 1.19, + "learning_rate": 1.3751642454215222e-05, + "loss": 0.4215, + "step": 6970 + }, + { + "epoch": 1.19, + "learning_rate": 1.374993550031391e-05, + "loss": 0.3819, + "step": 6971 + }, + { + "epoch": 1.19, + "learning_rate": 1.3748228419263116e-05, + "loss": 0.3891, + "step": 6972 + }, + { + "epoch": 1.19, + "learning_rate": 1.374652121112073e-05, + "loss": 0.4287, + "step": 6973 + }, + { + "epoch": 1.19, + "learning_rate": 1.3744813875944636e-05, + "loss": 0.4633, + "step": 6974 + }, + { + "epoch": 1.19, + "learning_rate": 1.3743106413792724e-05, + "loss": 0.416, + "step": 6975 + }, + { + "epoch": 1.19, + "learning_rate": 1.3741398824722894e-05, + "loss": 0.4562, + "step": 6976 + }, + { + "epoch": 1.19, + "learning_rate": 1.3739691108793039e-05, + "loss": 0.4538, + "step": 6977 + }, + { + "epoch": 1.19, + "learning_rate": 1.3737983266061067e-05, + "loss": 0.4368, + "step": 6978 + }, + { + "epoch": 1.19, + "learning_rate": 1.373627529658488e-05, + "loss": 0.4245, + "step": 6979 + }, + { + "epoch": 1.19, + "learning_rate": 1.3734567200422397e-05, + "loss": 0.4605, + "step": 6980 + }, + { + "epoch": 1.19, + "learning_rate": 1.3732858977631532e-05, + "loss": 0.4108, + "step": 6981 + }, + { + "epoch": 1.19, + "learning_rate": 1.3731150628270209e-05, + "loss": 0.4372, + "step": 6982 + }, + { + "epoch": 1.19, + "learning_rate": 1.3729442152396346e-05, + "loss": 0.4596, + "step": 6983 + }, + { + "epoch": 1.19, + "learning_rate": 1.3727733550067881e-05, + "loss": 0.419, + "step": 6984 + }, + { + "epoch": 1.19, + "learning_rate": 1.372602482134274e-05, + "loss": 0.4087, + "step": 6985 + }, + { + "epoch": 1.19, + "learning_rate": 1.372431596627887e-05, + "loss": 0.4225, + "step": 6986 + }, + { + "epoch": 1.19, + "learning_rate": 1.3722606984934202e-05, + "loss": 0.4402, + "step": 6987 + }, + { + "epoch": 1.19, + "learning_rate": 1.372089787736669e-05, + "loss": 0.4205, + "step": 6988 + }, + { + "epoch": 1.19, + "learning_rate": 1.3719188643634285e-05, + "loss": 0.4224, + "step": 6989 + }, + { + "epoch": 1.19, + "learning_rate": 1.371747928379494e-05, + "loss": 0.4342, + "step": 6990 + }, + { + "epoch": 1.19, + "learning_rate": 1.3715769797906614e-05, + "loss": 0.4015, + "step": 6991 + }, + { + "epoch": 1.19, + "learning_rate": 1.3714060186027274e-05, + "loss": 0.4379, + "step": 6992 + }, + { + "epoch": 1.19, + "learning_rate": 1.3712350448214886e-05, + "loss": 0.4385, + "step": 6993 + }, + { + "epoch": 1.19, + "learning_rate": 1.371064058452742e-05, + "loss": 0.3815, + "step": 6994 + }, + { + "epoch": 1.19, + "learning_rate": 1.3708930595022855e-05, + "loss": 0.4072, + "step": 6995 + }, + { + "epoch": 1.19, + "learning_rate": 1.3707220479759171e-05, + "loss": 0.3746, + "step": 6996 + }, + { + "epoch": 1.19, + "learning_rate": 1.3705510238794355e-05, + "loss": 0.4499, + "step": 6997 + }, + { + "epoch": 1.19, + "learning_rate": 1.3703799872186393e-05, + "loss": 0.4343, + "step": 6998 + }, + { + "epoch": 1.19, + "learning_rate": 1.3702089379993282e-05, + "loss": 0.4578, + "step": 6999 + }, + { + "epoch": 1.19, + "learning_rate": 1.3700378762273018e-05, + "loss": 0.3866, + "step": 7000 + }, + { + "epoch": 1.19, + "learning_rate": 1.3698668019083607e-05, + "loss": 0.4205, + "step": 7001 + }, + { + "epoch": 1.19, + "learning_rate": 1.3696957150483049e-05, + "loss": 0.4651, + "step": 7002 + }, + { + "epoch": 1.19, + "learning_rate": 1.3695246156529354e-05, + "loss": 0.3991, + "step": 7003 + }, + { + "epoch": 1.19, + "learning_rate": 1.3693535037280543e-05, + "loss": 0.3943, + "step": 7004 + }, + { + "epoch": 1.19, + "learning_rate": 1.3691823792794632e-05, + "loss": 0.401, + "step": 7005 + }, + { + "epoch": 1.19, + "learning_rate": 1.3690112423129645e-05, + "loss": 0.4669, + "step": 7006 + }, + { + "epoch": 1.2, + "learning_rate": 1.3688400928343608e-05, + "loss": 0.4134, + "step": 7007 + }, + { + "epoch": 1.2, + "learning_rate": 1.3686689308494557e-05, + "loss": 0.4702, + "step": 7008 + }, + { + "epoch": 1.2, + "learning_rate": 1.3684977563640525e-05, + "loss": 0.4518, + "step": 7009 + }, + { + "epoch": 1.2, + "learning_rate": 1.3683265693839548e-05, + "loss": 0.4431, + "step": 7010 + }, + { + "epoch": 1.2, + "learning_rate": 1.368155369914968e-05, + "loss": 0.4059, + "step": 7011 + }, + { + "epoch": 1.2, + "learning_rate": 1.3679841579628964e-05, + "loss": 0.4323, + "step": 7012 + }, + { + "epoch": 1.2, + "learning_rate": 1.3678129335335451e-05, + "loss": 0.4047, + "step": 7013 + }, + { + "epoch": 1.2, + "learning_rate": 1.3676416966327201e-05, + "loss": 0.4499, + "step": 7014 + }, + { + "epoch": 1.2, + "learning_rate": 1.367470447266228e-05, + "loss": 0.4407, + "step": 7015 + }, + { + "epoch": 1.2, + "learning_rate": 1.3672991854398747e-05, + "loss": 0.4502, + "step": 7016 + }, + { + "epoch": 1.2, + "learning_rate": 1.3671279111594675e-05, + "loss": 0.419, + "step": 7017 + }, + { + "epoch": 1.2, + "learning_rate": 1.3669566244308135e-05, + "loss": 0.4312, + "step": 7018 + }, + { + "epoch": 1.2, + "learning_rate": 1.3667853252597212e-05, + "loss": 0.4009, + "step": 7019 + }, + { + "epoch": 1.2, + "learning_rate": 1.366614013651998e-05, + "loss": 0.4246, + "step": 7020 + }, + { + "epoch": 1.2, + "learning_rate": 1.3664426896134533e-05, + "loss": 0.41, + "step": 7021 + }, + { + "epoch": 1.2, + "learning_rate": 1.3662713531498957e-05, + "loss": 0.4465, + "step": 7022 + }, + { + "epoch": 1.2, + "learning_rate": 1.3661000042671353e-05, + "loss": 0.471, + "step": 7023 + }, + { + "epoch": 1.2, + "learning_rate": 1.3659286429709817e-05, + "loss": 0.4445, + "step": 7024 + }, + { + "epoch": 1.2, + "learning_rate": 1.365757269267245e-05, + "loss": 0.3869, + "step": 7025 + }, + { + "epoch": 1.2, + "learning_rate": 1.3655858831617362e-05, + "loss": 0.4342, + "step": 7026 + }, + { + "epoch": 1.2, + "learning_rate": 1.3654144846602669e-05, + "loss": 0.4015, + "step": 7027 + }, + { + "epoch": 1.2, + "learning_rate": 1.365243073768648e-05, + "loss": 0.4267, + "step": 7028 + }, + { + "epoch": 1.2, + "learning_rate": 1.365071650492692e-05, + "loss": 0.4496, + "step": 7029 + }, + { + "epoch": 1.2, + "learning_rate": 1.3649002148382113e-05, + "loss": 0.4516, + "step": 7030 + }, + { + "epoch": 1.2, + "learning_rate": 1.364728766811019e-05, + "loss": 0.4122, + "step": 7031 + }, + { + "epoch": 1.2, + "learning_rate": 1.3645573064169281e-05, + "loss": 0.4266, + "step": 7032 + }, + { + "epoch": 1.2, + "learning_rate": 1.3643858336617526e-05, + "loss": 0.4261, + "step": 7033 + }, + { + "epoch": 1.2, + "learning_rate": 1.3642143485513062e-05, + "loss": 0.4397, + "step": 7034 + }, + { + "epoch": 1.2, + "learning_rate": 1.3640428510914039e-05, + "loss": 0.4386, + "step": 7035 + }, + { + "epoch": 1.2, + "learning_rate": 1.3638713412878603e-05, + "loss": 0.4454, + "step": 7036 + }, + { + "epoch": 1.2, + "learning_rate": 1.3636998191464913e-05, + "loss": 0.4292, + "step": 7037 + }, + { + "epoch": 1.2, + "learning_rate": 1.3635282846731121e-05, + "loss": 0.4329, + "step": 7038 + }, + { + "epoch": 1.2, + "learning_rate": 1.3633567378735394e-05, + "loss": 0.4308, + "step": 7039 + }, + { + "epoch": 1.2, + "learning_rate": 1.3631851787535901e-05, + "loss": 0.4119, + "step": 7040 + }, + { + "epoch": 1.2, + "learning_rate": 1.3630136073190804e-05, + "loss": 0.4392, + "step": 7041 + }, + { + "epoch": 1.2, + "learning_rate": 1.3628420235758288e-05, + "loss": 0.4581, + "step": 7042 + }, + { + "epoch": 1.2, + "learning_rate": 1.3626704275296523e-05, + "loss": 0.4386, + "step": 7043 + }, + { + "epoch": 1.2, + "learning_rate": 1.3624988191863698e-05, + "loss": 0.4255, + "step": 7044 + }, + { + "epoch": 1.2, + "learning_rate": 1.3623271985517997e-05, + "loss": 0.4278, + "step": 7045 + }, + { + "epoch": 1.2, + "learning_rate": 1.3621555656317615e-05, + "loss": 0.4672, + "step": 7046 + }, + { + "epoch": 1.2, + "learning_rate": 1.3619839204320747e-05, + "loss": 0.4362, + "step": 7047 + }, + { + "epoch": 1.2, + "learning_rate": 1.3618122629585591e-05, + "loss": 0.4045, + "step": 7048 + }, + { + "epoch": 1.2, + "learning_rate": 1.361640593217035e-05, + "loss": 0.4072, + "step": 7049 + }, + { + "epoch": 1.2, + "learning_rate": 1.3614689112133239e-05, + "loss": 0.4331, + "step": 7050 + }, + { + "epoch": 1.2, + "learning_rate": 1.3612972169532464e-05, + "loss": 0.4655, + "step": 7051 + }, + { + "epoch": 1.2, + "learning_rate": 1.3611255104426242e-05, + "loss": 0.4495, + "step": 7052 + }, + { + "epoch": 1.2, + "learning_rate": 1.3609537916872798e-05, + "loss": 0.443, + "step": 7053 + }, + { + "epoch": 1.2, + "learning_rate": 1.3607820606930349e-05, + "loss": 0.3716, + "step": 7054 + }, + { + "epoch": 1.2, + "learning_rate": 1.360610317465713e-05, + "loss": 0.4548, + "step": 7055 + }, + { + "epoch": 1.2, + "learning_rate": 1.3604385620111376e-05, + "loss": 0.3796, + "step": 7056 + }, + { + "epoch": 1.2, + "learning_rate": 1.3602667943351323e-05, + "loss": 0.4554, + "step": 7057 + }, + { + "epoch": 1.2, + "learning_rate": 1.3600950144435205e-05, + "loss": 0.4521, + "step": 7058 + }, + { + "epoch": 1.2, + "learning_rate": 1.3599232223421277e-05, + "loss": 0.4155, + "step": 7059 + }, + { + "epoch": 1.2, + "learning_rate": 1.3597514180367783e-05, + "loss": 0.4227, + "step": 7060 + }, + { + "epoch": 1.2, + "learning_rate": 1.3595796015332986e-05, + "loss": 0.4024, + "step": 7061 + }, + { + "epoch": 1.2, + "learning_rate": 1.3594077728375129e-05, + "loss": 0.4581, + "step": 7062 + }, + { + "epoch": 1.2, + "learning_rate": 1.3592359319552487e-05, + "loss": 0.4215, + "step": 7063 + }, + { + "epoch": 1.2, + "learning_rate": 1.359064078892332e-05, + "loss": 0.4238, + "step": 7064 + }, + { + "epoch": 1.2, + "learning_rate": 1.3588922136545902e-05, + "loss": 0.4047, + "step": 7065 + }, + { + "epoch": 1.21, + "learning_rate": 1.3587203362478501e-05, + "loss": 0.4091, + "step": 7066 + }, + { + "epoch": 1.21, + "learning_rate": 1.3585484466779402e-05, + "loss": 0.4075, + "step": 7067 + }, + { + "epoch": 1.21, + "learning_rate": 1.3583765449506886e-05, + "loss": 0.4148, + "step": 7068 + }, + { + "epoch": 1.21, + "learning_rate": 1.3582046310719241e-05, + "loss": 0.4783, + "step": 7069 + }, + { + "epoch": 1.21, + "learning_rate": 1.3580327050474756e-05, + "loss": 0.4801, + "step": 7070 + }, + { + "epoch": 1.21, + "learning_rate": 1.3578607668831726e-05, + "loss": 0.4251, + "step": 7071 + }, + { + "epoch": 1.21, + "learning_rate": 1.3576888165848456e-05, + "loss": 0.3974, + "step": 7072 + }, + { + "epoch": 1.21, + "learning_rate": 1.3575168541583242e-05, + "loss": 0.4355, + "step": 7073 + }, + { + "epoch": 1.21, + "learning_rate": 1.357344879609439e-05, + "loss": 0.4724, + "step": 7074 + }, + { + "epoch": 1.21, + "learning_rate": 1.3571728929440218e-05, + "loss": 0.3959, + "step": 7075 + }, + { + "epoch": 1.21, + "learning_rate": 1.3570008941679042e-05, + "loss": 0.4305, + "step": 7076 + }, + { + "epoch": 1.21, + "learning_rate": 1.3568288832869176e-05, + "loss": 0.4314, + "step": 7077 + }, + { + "epoch": 1.21, + "learning_rate": 1.356656860306895e-05, + "loss": 0.4076, + "step": 7078 + }, + { + "epoch": 1.21, + "learning_rate": 1.3564848252336689e-05, + "loss": 0.4083, + "step": 7079 + }, + { + "epoch": 1.21, + "learning_rate": 1.3563127780730728e-05, + "loss": 0.4143, + "step": 7080 + }, + { + "epoch": 1.21, + "learning_rate": 1.3561407188309395e-05, + "loss": 0.398, + "step": 7081 + }, + { + "epoch": 1.21, + "learning_rate": 1.3559686475131036e-05, + "loss": 0.4268, + "step": 7082 + }, + { + "epoch": 1.21, + "learning_rate": 1.3557965641253998e-05, + "loss": 0.4529, + "step": 7083 + }, + { + "epoch": 1.21, + "learning_rate": 1.3556244686736625e-05, + "loss": 0.4189, + "step": 7084 + }, + { + "epoch": 1.21, + "learning_rate": 1.3554523611637273e-05, + "loss": 0.4421, + "step": 7085 + }, + { + "epoch": 1.21, + "learning_rate": 1.3552802416014297e-05, + "loss": 0.454, + "step": 7086 + }, + { + "epoch": 1.21, + "learning_rate": 1.3551081099926058e-05, + "loss": 0.4412, + "step": 7087 + }, + { + "epoch": 1.21, + "learning_rate": 1.3549359663430921e-05, + "loss": 0.4211, + "step": 7088 + }, + { + "epoch": 1.21, + "learning_rate": 1.3547638106587253e-05, + "loss": 0.4496, + "step": 7089 + }, + { + "epoch": 1.21, + "learning_rate": 1.3545916429453428e-05, + "loss": 0.4265, + "step": 7090 + }, + { + "epoch": 1.21, + "learning_rate": 1.3544194632087825e-05, + "loss": 0.4151, + "step": 7091 + }, + { + "epoch": 1.21, + "learning_rate": 1.3542472714548825e-05, + "loss": 0.4308, + "step": 7092 + }, + { + "epoch": 1.21, + "learning_rate": 1.354075067689481e-05, + "loss": 0.4169, + "step": 7093 + }, + { + "epoch": 1.21, + "learning_rate": 1.353902851918417e-05, + "loss": 0.4263, + "step": 7094 + }, + { + "epoch": 1.21, + "learning_rate": 1.3537306241475305e-05, + "loss": 0.433, + "step": 7095 + }, + { + "epoch": 1.21, + "learning_rate": 1.3535583843826605e-05, + "loss": 0.4219, + "step": 7096 + }, + { + "epoch": 1.21, + "learning_rate": 1.3533861326296474e-05, + "loss": 0.3932, + "step": 7097 + }, + { + "epoch": 1.21, + "learning_rate": 1.3532138688943317e-05, + "loss": 0.4348, + "step": 7098 + }, + { + "epoch": 1.21, + "learning_rate": 1.3530415931825544e-05, + "loss": 0.4221, + "step": 7099 + }, + { + "epoch": 1.21, + "learning_rate": 1.352869305500157e-05, + "loss": 0.4107, + "step": 7100 + }, + { + "epoch": 1.21, + "learning_rate": 1.352697005852981e-05, + "loss": 0.4005, + "step": 7101 + }, + { + "epoch": 1.21, + "learning_rate": 1.3525246942468688e-05, + "loss": 0.4471, + "step": 7102 + }, + { + "epoch": 1.21, + "learning_rate": 1.3523523706876626e-05, + "loss": 0.3964, + "step": 7103 + }, + { + "epoch": 1.21, + "learning_rate": 1.3521800351812065e-05, + "loss": 0.4082, + "step": 7104 + }, + { + "epoch": 1.21, + "learning_rate": 1.3520076877333424e-05, + "loss": 0.4386, + "step": 7105 + }, + { + "epoch": 1.21, + "learning_rate": 1.3518353283499153e-05, + "loss": 0.4191, + "step": 7106 + }, + { + "epoch": 1.21, + "learning_rate": 1.3516629570367688e-05, + "loss": 0.4259, + "step": 7107 + }, + { + "epoch": 1.21, + "learning_rate": 1.3514905737997474e-05, + "loss": 0.4108, + "step": 7108 + }, + { + "epoch": 1.21, + "learning_rate": 1.3513181786446965e-05, + "loss": 0.436, + "step": 7109 + }, + { + "epoch": 1.21, + "learning_rate": 1.3511457715774614e-05, + "loss": 0.4636, + "step": 7110 + }, + { + "epoch": 1.21, + "learning_rate": 1.3509733526038879e-05, + "loss": 0.4509, + "step": 7111 + }, + { + "epoch": 1.21, + "learning_rate": 1.3508009217298225e-05, + "loss": 0.3911, + "step": 7112 + }, + { + "epoch": 1.21, + "learning_rate": 1.3506284789611118e-05, + "loss": 0.4144, + "step": 7113 + }, + { + "epoch": 1.21, + "learning_rate": 1.3504560243036018e-05, + "loss": 0.4314, + "step": 7114 + }, + { + "epoch": 1.21, + "learning_rate": 1.3502835577631413e-05, + "loss": 0.3946, + "step": 7115 + }, + { + "epoch": 1.21, + "learning_rate": 1.3501110793455771e-05, + "loss": 0.451, + "step": 7116 + }, + { + "epoch": 1.21, + "learning_rate": 1.3499385890567585e-05, + "loss": 0.4433, + "step": 7117 + }, + { + "epoch": 1.21, + "learning_rate": 1.3497660869025333e-05, + "loss": 0.3918, + "step": 7118 + }, + { + "epoch": 1.21, + "learning_rate": 1.349593572888751e-05, + "loss": 0.4234, + "step": 7119 + }, + { + "epoch": 1.21, + "learning_rate": 1.3494210470212608e-05, + "loss": 0.3883, + "step": 7120 + }, + { + "epoch": 1.21, + "learning_rate": 1.3492485093059126e-05, + "loss": 0.4407, + "step": 7121 + }, + { + "epoch": 1.21, + "learning_rate": 1.3490759597485568e-05, + "loss": 0.4157, + "step": 7122 + }, + { + "epoch": 1.21, + "learning_rate": 1.3489033983550438e-05, + "loss": 0.3937, + "step": 7123 + }, + { + "epoch": 1.21, + "learning_rate": 1.3487308251312249e-05, + "loss": 0.4237, + "step": 7124 + }, + { + "epoch": 1.22, + "learning_rate": 1.3485582400829514e-05, + "loss": 0.4506, + "step": 7125 + }, + { + "epoch": 1.22, + "learning_rate": 1.3483856432160751e-05, + "loss": 0.4102, + "step": 7126 + }, + { + "epoch": 1.22, + "learning_rate": 1.3482130345364489e-05, + "loss": 0.4206, + "step": 7127 + }, + { + "epoch": 1.22, + "learning_rate": 1.3480404140499245e-05, + "loss": 0.478, + "step": 7128 + }, + { + "epoch": 1.22, + "learning_rate": 1.3478677817623556e-05, + "loss": 0.4138, + "step": 7129 + }, + { + "epoch": 1.22, + "learning_rate": 1.3476951376795953e-05, + "loss": 0.4517, + "step": 7130 + }, + { + "epoch": 1.22, + "learning_rate": 1.3475224818074976e-05, + "loss": 0.4204, + "step": 7131 + }, + { + "epoch": 1.22, + "learning_rate": 1.3473498141519172e-05, + "loss": 0.3849, + "step": 7132 + }, + { + "epoch": 1.22, + "learning_rate": 1.3471771347187081e-05, + "loss": 0.4133, + "step": 7133 + }, + { + "epoch": 1.22, + "learning_rate": 1.3470044435137254e-05, + "loss": 0.4314, + "step": 7134 + }, + { + "epoch": 1.22, + "learning_rate": 1.3468317405428252e-05, + "loss": 0.423, + "step": 7135 + }, + { + "epoch": 1.22, + "learning_rate": 1.3466590258118629e-05, + "loss": 0.4511, + "step": 7136 + }, + { + "epoch": 1.22, + "learning_rate": 1.3464862993266948e-05, + "loss": 0.4458, + "step": 7137 + }, + { + "epoch": 1.22, + "learning_rate": 1.3463135610931772e-05, + "loss": 0.4065, + "step": 7138 + }, + { + "epoch": 1.22, + "learning_rate": 1.3461408111171675e-05, + "loss": 0.4386, + "step": 7139 + }, + { + "epoch": 1.22, + "learning_rate": 1.3459680494045236e-05, + "loss": 0.4294, + "step": 7140 + }, + { + "epoch": 1.22, + "learning_rate": 1.3457952759611027e-05, + "loss": 0.4321, + "step": 7141 + }, + { + "epoch": 1.22, + "learning_rate": 1.345622490792763e-05, + "loss": 0.4282, + "step": 7142 + }, + { + "epoch": 1.22, + "learning_rate": 1.3454496939053638e-05, + "loss": 0.4015, + "step": 7143 + }, + { + "epoch": 1.22, + "learning_rate": 1.3452768853047637e-05, + "loss": 0.4258, + "step": 7144 + }, + { + "epoch": 1.22, + "learning_rate": 1.3451040649968221e-05, + "loss": 0.4184, + "step": 7145 + }, + { + "epoch": 1.22, + "learning_rate": 1.3449312329873988e-05, + "loss": 0.4086, + "step": 7146 + }, + { + "epoch": 1.22, + "learning_rate": 1.3447583892823543e-05, + "loss": 0.4617, + "step": 7147 + }, + { + "epoch": 1.22, + "learning_rate": 1.3445855338875491e-05, + "loss": 0.4184, + "step": 7148 + }, + { + "epoch": 1.22, + "learning_rate": 1.344412666808844e-05, + "loss": 0.449, + "step": 7149 + }, + { + "epoch": 1.22, + "learning_rate": 1.3442397880521007e-05, + "loss": 0.466, + "step": 7150 + }, + { + "epoch": 1.22, + "learning_rate": 1.3440668976231812e-05, + "loss": 0.4183, + "step": 7151 + }, + { + "epoch": 1.22, + "learning_rate": 1.3438939955279475e-05, + "loss": 0.3868, + "step": 7152 + }, + { + "epoch": 1.22, + "learning_rate": 1.3437210817722619e-05, + "loss": 0.4327, + "step": 7153 + }, + { + "epoch": 1.22, + "learning_rate": 1.3435481563619879e-05, + "loss": 0.438, + "step": 7154 + }, + { + "epoch": 1.22, + "learning_rate": 1.3433752193029888e-05, + "loss": 0.4431, + "step": 7155 + }, + { + "epoch": 1.22, + "learning_rate": 1.3432022706011279e-05, + "loss": 0.4254, + "step": 7156 + }, + { + "epoch": 1.22, + "learning_rate": 1.3430293102622699e-05, + "loss": 0.4595, + "step": 7157 + }, + { + "epoch": 1.22, + "learning_rate": 1.3428563382922792e-05, + "loss": 0.3928, + "step": 7158 + }, + { + "epoch": 1.22, + "learning_rate": 1.3426833546970213e-05, + "loss": 0.3998, + "step": 7159 + }, + { + "epoch": 1.22, + "learning_rate": 1.3425103594823608e-05, + "loss": 0.4941, + "step": 7160 + }, + { + "epoch": 1.22, + "learning_rate": 1.3423373526541637e-05, + "loss": 0.4276, + "step": 7161 + }, + { + "epoch": 1.22, + "learning_rate": 1.3421643342182964e-05, + "loss": 0.4353, + "step": 7162 + }, + { + "epoch": 1.22, + "learning_rate": 1.3419913041806252e-05, + "loss": 0.3973, + "step": 7163 + }, + { + "epoch": 1.22, + "learning_rate": 1.341818262547017e-05, + "loss": 0.4082, + "step": 7164 + }, + { + "epoch": 1.22, + "learning_rate": 1.3416452093233394e-05, + "loss": 0.4658, + "step": 7165 + }, + { + "epoch": 1.22, + "learning_rate": 1.3414721445154604e-05, + "loss": 0.4543, + "step": 7166 + }, + { + "epoch": 1.22, + "learning_rate": 1.3412990681292473e-05, + "loss": 0.4232, + "step": 7167 + }, + { + "epoch": 1.22, + "learning_rate": 1.3411259801705694e-05, + "loss": 0.4146, + "step": 7168 + }, + { + "epoch": 1.22, + "learning_rate": 1.340952880645295e-05, + "loss": 0.451, + "step": 7169 + }, + { + "epoch": 1.22, + "learning_rate": 1.3407797695592939e-05, + "loss": 0.4428, + "step": 7170 + }, + { + "epoch": 1.22, + "learning_rate": 1.3406066469184354e-05, + "loss": 0.4597, + "step": 7171 + }, + { + "epoch": 1.22, + "learning_rate": 1.34043351272859e-05, + "loss": 0.4276, + "step": 7172 + }, + { + "epoch": 1.22, + "learning_rate": 1.3402603669956276e-05, + "loss": 0.3976, + "step": 7173 + }, + { + "epoch": 1.22, + "learning_rate": 1.3400872097254198e-05, + "loss": 0.442, + "step": 7174 + }, + { + "epoch": 1.22, + "learning_rate": 1.3399140409238374e-05, + "loss": 0.4029, + "step": 7175 + }, + { + "epoch": 1.22, + "learning_rate": 1.339740860596752e-05, + "loss": 0.4377, + "step": 7176 + }, + { + "epoch": 1.22, + "learning_rate": 1.339567668750036e-05, + "loss": 0.4404, + "step": 7177 + }, + { + "epoch": 1.22, + "learning_rate": 1.3393944653895614e-05, + "loss": 0.428, + "step": 7178 + }, + { + "epoch": 1.22, + "learning_rate": 1.3392212505212012e-05, + "loss": 0.4103, + "step": 7179 + }, + { + "epoch": 1.22, + "learning_rate": 1.3390480241508287e-05, + "loss": 0.4456, + "step": 7180 + }, + { + "epoch": 1.22, + "learning_rate": 1.338874786284318e-05, + "loss": 0.4268, + "step": 7181 + }, + { + "epoch": 1.22, + "learning_rate": 1.3387015369275418e-05, + "loss": 0.4329, + "step": 7182 + }, + { + "epoch": 1.23, + "learning_rate": 1.3385282760863758e-05, + "loss": 0.444, + "step": 7183 + }, + { + "epoch": 1.23, + "learning_rate": 1.3383550037666938e-05, + "loss": 0.4052, + "step": 7184 + }, + { + "epoch": 1.23, + "learning_rate": 1.338181719974372e-05, + "loss": 0.4302, + "step": 7185 + }, + { + "epoch": 1.23, + "learning_rate": 1.3380084247152847e-05, + "loss": 0.4519, + "step": 7186 + }, + { + "epoch": 1.23, + "learning_rate": 1.3378351179953086e-05, + "loss": 0.441, + "step": 7187 + }, + { + "epoch": 1.23, + "learning_rate": 1.33766179982032e-05, + "loss": 0.4407, + "step": 7188 + }, + { + "epoch": 1.23, + "learning_rate": 1.3374884701961956e-05, + "loss": 0.4266, + "step": 7189 + }, + { + "epoch": 1.23, + "learning_rate": 1.3373151291288124e-05, + "loss": 0.416, + "step": 7190 + }, + { + "epoch": 1.23, + "learning_rate": 1.337141776624048e-05, + "loss": 0.4298, + "step": 7191 + }, + { + "epoch": 1.23, + "learning_rate": 1.3369684126877802e-05, + "loss": 0.4123, + "step": 7192 + }, + { + "epoch": 1.23, + "learning_rate": 1.3367950373258873e-05, + "loss": 0.45, + "step": 7193 + }, + { + "epoch": 1.23, + "learning_rate": 1.3366216505442477e-05, + "loss": 0.437, + "step": 7194 + }, + { + "epoch": 1.23, + "learning_rate": 1.336448252348741e-05, + "loss": 0.4428, + "step": 7195 + }, + { + "epoch": 1.23, + "learning_rate": 1.3362748427452461e-05, + "loss": 0.4546, + "step": 7196 + }, + { + "epoch": 1.23, + "learning_rate": 1.3361014217396432e-05, + "loss": 0.4074, + "step": 7197 + }, + { + "epoch": 1.23, + "learning_rate": 1.3359279893378121e-05, + "loss": 0.4055, + "step": 7198 + }, + { + "epoch": 1.23, + "learning_rate": 1.335754545545634e-05, + "loss": 0.4145, + "step": 7199 + }, + { + "epoch": 1.23, + "learning_rate": 1.3355810903689894e-05, + "loss": 0.4369, + "step": 7200 + }, + { + "epoch": 1.23, + "learning_rate": 1.3354076238137596e-05, + "loss": 0.4292, + "step": 7201 + }, + { + "epoch": 1.23, + "learning_rate": 1.3352341458858264e-05, + "loss": 0.4668, + "step": 7202 + }, + { + "epoch": 1.23, + "learning_rate": 1.3350606565910724e-05, + "loss": 0.4604, + "step": 7203 + }, + { + "epoch": 1.23, + "learning_rate": 1.3348871559353798e-05, + "loss": 0.4502, + "step": 7204 + }, + { + "epoch": 1.23, + "learning_rate": 1.3347136439246314e-05, + "loss": 0.388, + "step": 7205 + }, + { + "epoch": 1.23, + "learning_rate": 1.3345401205647101e-05, + "loss": 0.4364, + "step": 7206 + }, + { + "epoch": 1.23, + "learning_rate": 1.3343665858615006e-05, + "loss": 0.4219, + "step": 7207 + }, + { + "epoch": 1.23, + "learning_rate": 1.3341930398208866e-05, + "loss": 0.4392, + "step": 7208 + }, + { + "epoch": 1.23, + "learning_rate": 1.3340194824487517e-05, + "loss": 0.4389, + "step": 7209 + }, + { + "epoch": 1.23, + "learning_rate": 1.3338459137509818e-05, + "loss": 0.4324, + "step": 7210 + }, + { + "epoch": 1.23, + "learning_rate": 1.3336723337334619e-05, + "loss": 0.4058, + "step": 7211 + }, + { + "epoch": 1.23, + "learning_rate": 1.333498742402077e-05, + "loss": 0.4542, + "step": 7212 + }, + { + "epoch": 1.23, + "learning_rate": 1.3333251397627136e-05, + "loss": 0.4186, + "step": 7213 + }, + { + "epoch": 1.23, + "learning_rate": 1.333151525821258e-05, + "loss": 0.4911, + "step": 7214 + }, + { + "epoch": 1.23, + "learning_rate": 1.3329779005835967e-05, + "loss": 0.4295, + "step": 7215 + }, + { + "epoch": 1.23, + "learning_rate": 1.3328042640556174e-05, + "loss": 0.4144, + "step": 7216 + }, + { + "epoch": 1.23, + "learning_rate": 1.3326306162432067e-05, + "loss": 0.4157, + "step": 7217 + }, + { + "epoch": 1.23, + "learning_rate": 1.3324569571522535e-05, + "loss": 0.4488, + "step": 7218 + }, + { + "epoch": 1.23, + "learning_rate": 1.3322832867886454e-05, + "loss": 0.3993, + "step": 7219 + }, + { + "epoch": 1.23, + "learning_rate": 1.3321096051582712e-05, + "loss": 0.446, + "step": 7220 + }, + { + "epoch": 1.23, + "learning_rate": 1.3319359122670201e-05, + "loss": 0.4418, + "step": 7221 + }, + { + "epoch": 1.23, + "learning_rate": 1.3317622081207815e-05, + "loss": 0.4629, + "step": 7222 + }, + { + "epoch": 1.23, + "learning_rate": 1.3315884927254452e-05, + "loss": 0.434, + "step": 7223 + }, + { + "epoch": 1.23, + "learning_rate": 1.3314147660869012e-05, + "loss": 0.4009, + "step": 7224 + }, + { + "epoch": 1.23, + "learning_rate": 1.3312410282110402e-05, + "loss": 0.421, + "step": 7225 + }, + { + "epoch": 1.23, + "learning_rate": 1.3310672791037532e-05, + "loss": 0.4363, + "step": 7226 + }, + { + "epoch": 1.23, + "learning_rate": 1.3308935187709313e-05, + "loss": 0.439, + "step": 7227 + }, + { + "epoch": 1.23, + "learning_rate": 1.3307197472184663e-05, + "loss": 0.379, + "step": 7228 + }, + { + "epoch": 1.23, + "learning_rate": 1.3305459644522505e-05, + "loss": 0.4577, + "step": 7229 + }, + { + "epoch": 1.23, + "learning_rate": 1.3303721704781765e-05, + "loss": 0.4081, + "step": 7230 + }, + { + "epoch": 1.23, + "learning_rate": 1.3301983653021368e-05, + "loss": 0.4429, + "step": 7231 + }, + { + "epoch": 1.23, + "learning_rate": 1.3300245489300246e-05, + "loss": 0.4342, + "step": 7232 + }, + { + "epoch": 1.23, + "learning_rate": 1.3298507213677335e-05, + "loss": 0.4368, + "step": 7233 + }, + { + "epoch": 1.23, + "learning_rate": 1.3296768826211578e-05, + "loss": 0.4396, + "step": 7234 + }, + { + "epoch": 1.23, + "learning_rate": 1.3295030326961917e-05, + "loss": 0.4373, + "step": 7235 + }, + { + "epoch": 1.23, + "learning_rate": 1.3293291715987298e-05, + "loss": 0.4183, + "step": 7236 + }, + { + "epoch": 1.23, + "learning_rate": 1.3291552993346676e-05, + "loss": 0.4247, + "step": 7237 + }, + { + "epoch": 1.23, + "learning_rate": 1.3289814159099e-05, + "loss": 0.4597, + "step": 7238 + }, + { + "epoch": 1.23, + "learning_rate": 1.3288075213303239e-05, + "loss": 0.4429, + "step": 7239 + }, + { + "epoch": 1.23, + "learning_rate": 1.3286336156018345e-05, + "loss": 0.4142, + "step": 7240 + }, + { + "epoch": 1.23, + "learning_rate": 1.3284596987303291e-05, + "loss": 0.4209, + "step": 7241 + }, + { + "epoch": 1.24, + "learning_rate": 1.3282857707217042e-05, + "loss": 0.4143, + "step": 7242 + }, + { + "epoch": 1.24, + "learning_rate": 1.3281118315818575e-05, + "loss": 0.4346, + "step": 7243 + }, + { + "epoch": 1.24, + "learning_rate": 1.327937881316687e-05, + "loss": 0.4369, + "step": 7244 + }, + { + "epoch": 1.24, + "learning_rate": 1.3277639199320906e-05, + "loss": 0.4143, + "step": 7245 + }, + { + "epoch": 1.24, + "learning_rate": 1.3275899474339667e-05, + "loss": 0.4615, + "step": 7246 + }, + { + "epoch": 1.24, + "learning_rate": 1.3274159638282145e-05, + "loss": 0.4521, + "step": 7247 + }, + { + "epoch": 1.24, + "learning_rate": 1.3272419691207327e-05, + "loss": 0.4235, + "step": 7248 + }, + { + "epoch": 1.24, + "learning_rate": 1.3270679633174219e-05, + "loss": 0.4284, + "step": 7249 + }, + { + "epoch": 1.24, + "learning_rate": 1.3268939464241812e-05, + "loss": 0.4361, + "step": 7250 + }, + { + "epoch": 1.24, + "learning_rate": 1.3267199184469117e-05, + "loss": 0.4083, + "step": 7251 + }, + { + "epoch": 1.24, + "learning_rate": 1.3265458793915135e-05, + "loss": 0.421, + "step": 7252 + }, + { + "epoch": 1.24, + "learning_rate": 1.3263718292638888e-05, + "loss": 0.3955, + "step": 7253 + }, + { + "epoch": 1.24, + "learning_rate": 1.3261977680699381e-05, + "loss": 0.4203, + "step": 7254 + }, + { + "epoch": 1.24, + "learning_rate": 1.3260236958155639e-05, + "loss": 0.4044, + "step": 7255 + }, + { + "epoch": 1.24, + "learning_rate": 1.3258496125066684e-05, + "loss": 0.4019, + "step": 7256 + }, + { + "epoch": 1.24, + "learning_rate": 1.3256755181491539e-05, + "loss": 0.4168, + "step": 7257 + }, + { + "epoch": 1.24, + "learning_rate": 1.3255014127489237e-05, + "loss": 0.4402, + "step": 7258 + }, + { + "epoch": 1.24, + "learning_rate": 1.325327296311881e-05, + "loss": 0.4397, + "step": 7259 + }, + { + "epoch": 1.24, + "learning_rate": 1.3251531688439302e-05, + "loss": 0.4448, + "step": 7260 + }, + { + "epoch": 1.24, + "learning_rate": 1.3249790303509751e-05, + "loss": 0.419, + "step": 7261 + }, + { + "epoch": 1.24, + "learning_rate": 1.32480488083892e-05, + "loss": 0.4592, + "step": 7262 + }, + { + "epoch": 1.24, + "learning_rate": 1.3246307203136701e-05, + "loss": 0.4404, + "step": 7263 + }, + { + "epoch": 1.24, + "learning_rate": 1.3244565487811307e-05, + "loss": 0.4347, + "step": 7264 + }, + { + "epoch": 1.24, + "learning_rate": 1.324282366247207e-05, + "loss": 0.4591, + "step": 7265 + }, + { + "epoch": 1.24, + "learning_rate": 1.3241081727178055e-05, + "loss": 0.4359, + "step": 7266 + }, + { + "epoch": 1.24, + "learning_rate": 1.3239339681988324e-05, + "loss": 0.4714, + "step": 7267 + }, + { + "epoch": 1.24, + "learning_rate": 1.3237597526961948e-05, + "loss": 0.4365, + "step": 7268 + }, + { + "epoch": 1.24, + "learning_rate": 1.3235855262157993e-05, + "loss": 0.4626, + "step": 7269 + }, + { + "epoch": 1.24, + "learning_rate": 1.323411288763554e-05, + "loss": 0.4253, + "step": 7270 + }, + { + "epoch": 1.24, + "learning_rate": 1.323237040345366e-05, + "loss": 0.4324, + "step": 7271 + }, + { + "epoch": 1.24, + "learning_rate": 1.3230627809671446e-05, + "loss": 0.4428, + "step": 7272 + }, + { + "epoch": 1.24, + "learning_rate": 1.3228885106347975e-05, + "loss": 0.4219, + "step": 7273 + }, + { + "epoch": 1.24, + "learning_rate": 1.3227142293542338e-05, + "loss": 0.44, + "step": 7274 + }, + { + "epoch": 1.24, + "learning_rate": 1.322539937131364e-05, + "loss": 0.4239, + "step": 7275 + }, + { + "epoch": 1.24, + "learning_rate": 1.3223656339720963e-05, + "loss": 0.4066, + "step": 7276 + }, + { + "epoch": 1.24, + "learning_rate": 1.3221913198823417e-05, + "loss": 0.4176, + "step": 7277 + }, + { + "epoch": 1.24, + "learning_rate": 1.3220169948680106e-05, + "loss": 0.433, + "step": 7278 + }, + { + "epoch": 1.24, + "learning_rate": 1.3218426589350137e-05, + "loss": 0.426, + "step": 7279 + }, + { + "epoch": 1.24, + "learning_rate": 1.3216683120892622e-05, + "loss": 0.393, + "step": 7280 + }, + { + "epoch": 1.24, + "learning_rate": 1.3214939543366678e-05, + "loss": 0.4107, + "step": 7281 + }, + { + "epoch": 1.24, + "learning_rate": 1.3213195856831425e-05, + "loss": 0.4377, + "step": 7282 + }, + { + "epoch": 1.24, + "learning_rate": 1.3211452061345986e-05, + "loss": 0.3945, + "step": 7283 + }, + { + "epoch": 1.24, + "learning_rate": 1.3209708156969488e-05, + "loss": 0.408, + "step": 7284 + }, + { + "epoch": 1.24, + "learning_rate": 1.320796414376106e-05, + "loss": 0.411, + "step": 7285 + }, + { + "epoch": 1.24, + "learning_rate": 1.3206220021779842e-05, + "loss": 0.4374, + "step": 7286 + }, + { + "epoch": 1.24, + "learning_rate": 1.3204475791084965e-05, + "loss": 0.4109, + "step": 7287 + }, + { + "epoch": 1.24, + "learning_rate": 1.3202731451735576e-05, + "loss": 0.3961, + "step": 7288 + }, + { + "epoch": 1.24, + "learning_rate": 1.3200987003790815e-05, + "loss": 0.4311, + "step": 7289 + }, + { + "epoch": 1.24, + "learning_rate": 1.319924244730984e-05, + "loss": 0.4067, + "step": 7290 + }, + { + "epoch": 1.24, + "learning_rate": 1.3197497782351794e-05, + "loss": 0.459, + "step": 7291 + }, + { + "epoch": 1.24, + "learning_rate": 1.319575300897584e-05, + "loss": 0.4371, + "step": 7292 + }, + { + "epoch": 1.24, + "learning_rate": 1.3194008127241136e-05, + "loss": 0.4099, + "step": 7293 + }, + { + "epoch": 1.24, + "learning_rate": 1.3192263137206847e-05, + "loss": 0.4389, + "step": 7294 + }, + { + "epoch": 1.24, + "learning_rate": 1.3190518038932143e-05, + "loss": 0.4258, + "step": 7295 + }, + { + "epoch": 1.24, + "learning_rate": 1.318877283247619e-05, + "loss": 0.4476, + "step": 7296 + }, + { + "epoch": 1.24, + "learning_rate": 1.3187027517898161e-05, + "loss": 0.4477, + "step": 7297 + }, + { + "epoch": 1.24, + "learning_rate": 1.3185282095257242e-05, + "loss": 0.4272, + "step": 7298 + }, + { + "epoch": 1.24, + "learning_rate": 1.3183536564612612e-05, + "loss": 0.4462, + "step": 7299 + }, + { + "epoch": 1.24, + "learning_rate": 1.3181790926023454e-05, + "loss": 0.4062, + "step": 7300 + }, + { + "epoch": 1.25, + "learning_rate": 1.3180045179548966e-05, + "loss": 0.4342, + "step": 7301 + }, + { + "epoch": 1.25, + "learning_rate": 1.3178299325248332e-05, + "loss": 0.4545, + "step": 7302 + }, + { + "epoch": 1.25, + "learning_rate": 1.3176553363180753e-05, + "loss": 0.3952, + "step": 7303 + }, + { + "epoch": 1.25, + "learning_rate": 1.3174807293405427e-05, + "loss": 0.427, + "step": 7304 + }, + { + "epoch": 1.25, + "learning_rate": 1.3173061115981563e-05, + "loss": 0.4453, + "step": 7305 + }, + { + "epoch": 1.25, + "learning_rate": 1.3171314830968365e-05, + "loss": 0.427, + "step": 7306 + }, + { + "epoch": 1.25, + "learning_rate": 1.3169568438425042e-05, + "loss": 0.4299, + "step": 7307 + }, + { + "epoch": 1.25, + "learning_rate": 1.3167821938410816e-05, + "loss": 0.4039, + "step": 7308 + }, + { + "epoch": 1.25, + "learning_rate": 1.3166075330984903e-05, + "loss": 0.4386, + "step": 7309 + }, + { + "epoch": 1.25, + "learning_rate": 1.3164328616206524e-05, + "loss": 0.4379, + "step": 7310 + }, + { + "epoch": 1.25, + "learning_rate": 1.3162581794134906e-05, + "loss": 0.4434, + "step": 7311 + }, + { + "epoch": 1.25, + "learning_rate": 1.3160834864829278e-05, + "loss": 0.4272, + "step": 7312 + }, + { + "epoch": 1.25, + "learning_rate": 1.3159087828348876e-05, + "loss": 0.421, + "step": 7313 + }, + { + "epoch": 1.25, + "learning_rate": 1.315734068475293e-05, + "loss": 0.4233, + "step": 7314 + }, + { + "epoch": 1.25, + "learning_rate": 1.3155593434100686e-05, + "loss": 0.4453, + "step": 7315 + }, + { + "epoch": 1.25, + "learning_rate": 1.3153846076451391e-05, + "loss": 0.404, + "step": 7316 + }, + { + "epoch": 1.25, + "learning_rate": 1.3152098611864289e-05, + "loss": 0.4423, + "step": 7317 + }, + { + "epoch": 1.25, + "learning_rate": 1.315035104039863e-05, + "loss": 0.4035, + "step": 7318 + }, + { + "epoch": 1.25, + "learning_rate": 1.3148603362113673e-05, + "loss": 0.419, + "step": 7319 + }, + { + "epoch": 1.25, + "learning_rate": 1.3146855577068673e-05, + "loss": 0.4334, + "step": 7320 + }, + { + "epoch": 1.25, + "learning_rate": 1.3145107685322894e-05, + "loss": 0.4025, + "step": 7321 + }, + { + "epoch": 1.25, + "learning_rate": 1.3143359686935602e-05, + "loss": 0.4351, + "step": 7322 + }, + { + "epoch": 1.25, + "learning_rate": 1.3141611581966067e-05, + "loss": 0.4449, + "step": 7323 + }, + { + "epoch": 1.25, + "learning_rate": 1.3139863370473563e-05, + "loss": 0.446, + "step": 7324 + }, + { + "epoch": 1.25, + "learning_rate": 1.3138115052517364e-05, + "loss": 0.4155, + "step": 7325 + }, + { + "epoch": 1.25, + "learning_rate": 1.3136366628156754e-05, + "loss": 0.4141, + "step": 7326 + }, + { + "epoch": 1.25, + "learning_rate": 1.313461809745101e-05, + "loss": 0.4471, + "step": 7327 + }, + { + "epoch": 1.25, + "learning_rate": 1.3132869460459432e-05, + "loss": 0.4213, + "step": 7328 + }, + { + "epoch": 1.25, + "learning_rate": 1.3131120717241298e-05, + "loss": 0.4138, + "step": 7329 + }, + { + "epoch": 1.25, + "learning_rate": 1.312937186785591e-05, + "loss": 0.4164, + "step": 7330 + }, + { + "epoch": 1.25, + "learning_rate": 1.3127622912362567e-05, + "loss": 0.4174, + "step": 7331 + }, + { + "epoch": 1.25, + "learning_rate": 1.3125873850820567e-05, + "loss": 0.4231, + "step": 7332 + }, + { + "epoch": 1.25, + "learning_rate": 1.3124124683289216e-05, + "loss": 0.455, + "step": 7333 + }, + { + "epoch": 1.25, + "learning_rate": 1.312237540982783e-05, + "loss": 0.4606, + "step": 7334 + }, + { + "epoch": 1.25, + "learning_rate": 1.3120626030495716e-05, + "loss": 0.4281, + "step": 7335 + }, + { + "epoch": 1.25, + "learning_rate": 1.3118876545352188e-05, + "loss": 0.431, + "step": 7336 + }, + { + "epoch": 1.25, + "learning_rate": 1.3117126954456572e-05, + "loss": 0.4252, + "step": 7337 + }, + { + "epoch": 1.25, + "learning_rate": 1.3115377257868185e-05, + "loss": 0.4366, + "step": 7338 + }, + { + "epoch": 1.25, + "learning_rate": 1.3113627455646363e-05, + "loss": 0.4304, + "step": 7339 + }, + { + "epoch": 1.25, + "learning_rate": 1.3111877547850427e-05, + "loss": 0.4039, + "step": 7340 + }, + { + "epoch": 1.25, + "learning_rate": 1.3110127534539717e-05, + "loss": 0.419, + "step": 7341 + }, + { + "epoch": 1.25, + "learning_rate": 1.3108377415773573e-05, + "loss": 0.4337, + "step": 7342 + }, + { + "epoch": 1.25, + "learning_rate": 1.3106627191611333e-05, + "loss": 0.4253, + "step": 7343 + }, + { + "epoch": 1.25, + "learning_rate": 1.310487686211234e-05, + "loss": 0.4323, + "step": 7344 + }, + { + "epoch": 1.25, + "learning_rate": 1.3103126427335941e-05, + "loss": 0.3988, + "step": 7345 + }, + { + "epoch": 1.25, + "learning_rate": 1.3101375887341497e-05, + "loss": 0.4373, + "step": 7346 + }, + { + "epoch": 1.25, + "learning_rate": 1.309962524218836e-05, + "loss": 0.3965, + "step": 7347 + }, + { + "epoch": 1.25, + "learning_rate": 1.3097874491935886e-05, + "loss": 0.4403, + "step": 7348 + }, + { + "epoch": 1.25, + "learning_rate": 1.309612363664344e-05, + "loss": 0.4169, + "step": 7349 + }, + { + "epoch": 1.25, + "learning_rate": 1.309437267637039e-05, + "loss": 0.4281, + "step": 7350 + }, + { + "epoch": 1.25, + "learning_rate": 1.3092621611176104e-05, + "loss": 0.3973, + "step": 7351 + }, + { + "epoch": 1.25, + "learning_rate": 1.3090870441119956e-05, + "loss": 0.4383, + "step": 7352 + }, + { + "epoch": 1.25, + "learning_rate": 1.308911916626132e-05, + "loss": 0.407, + "step": 7353 + }, + { + "epoch": 1.25, + "learning_rate": 1.3087367786659585e-05, + "loss": 0.418, + "step": 7354 + }, + { + "epoch": 1.25, + "learning_rate": 1.3085616302374129e-05, + "loss": 0.4446, + "step": 7355 + }, + { + "epoch": 1.25, + "learning_rate": 1.308386471346434e-05, + "loss": 0.4481, + "step": 7356 + }, + { + "epoch": 1.25, + "learning_rate": 1.3082113019989608e-05, + "loss": 0.3974, + "step": 7357 + }, + { + "epoch": 1.25, + "learning_rate": 1.3080361222009334e-05, + "loss": 0.4127, + "step": 7358 + }, + { + "epoch": 1.26, + "learning_rate": 1.3078609319582914e-05, + "loss": 0.4139, + "step": 7359 + }, + { + "epoch": 1.26, + "learning_rate": 1.3076857312769744e-05, + "loss": 0.4224, + "step": 7360 + }, + { + "epoch": 1.26, + "learning_rate": 1.3075105201629232e-05, + "loss": 0.4578, + "step": 7361 + }, + { + "epoch": 1.26, + "learning_rate": 1.3073352986220796e-05, + "loss": 0.4299, + "step": 7362 + }, + { + "epoch": 1.26, + "learning_rate": 1.3071600666603837e-05, + "loss": 0.4422, + "step": 7363 + }, + { + "epoch": 1.26, + "learning_rate": 1.3069848242837774e-05, + "loss": 0.3911, + "step": 7364 + }, + { + "epoch": 1.26, + "learning_rate": 1.3068095714982033e-05, + "loss": 0.4138, + "step": 7365 + }, + { + "epoch": 1.26, + "learning_rate": 1.3066343083096028e-05, + "loss": 0.4257, + "step": 7366 + }, + { + "epoch": 1.26, + "learning_rate": 1.3064590347239193e-05, + "loss": 0.4067, + "step": 7367 + }, + { + "epoch": 1.26, + "learning_rate": 1.3062837507470953e-05, + "loss": 0.4174, + "step": 7368 + }, + { + "epoch": 1.26, + "learning_rate": 1.3061084563850748e-05, + "loss": 0.4356, + "step": 7369 + }, + { + "epoch": 1.26, + "learning_rate": 1.3059331516438008e-05, + "loss": 0.4302, + "step": 7370 + }, + { + "epoch": 1.26, + "learning_rate": 1.3057578365292174e-05, + "loss": 0.4139, + "step": 7371 + }, + { + "epoch": 1.26, + "learning_rate": 1.3055825110472698e-05, + "loss": 0.4309, + "step": 7372 + }, + { + "epoch": 1.26, + "learning_rate": 1.3054071752039022e-05, + "loss": 0.424, + "step": 7373 + }, + { + "epoch": 1.26, + "learning_rate": 1.3052318290050598e-05, + "loss": 0.4424, + "step": 7374 + }, + { + "epoch": 1.26, + "learning_rate": 1.3050564724566884e-05, + "loss": 0.4166, + "step": 7375 + }, + { + "epoch": 1.26, + "learning_rate": 1.304881105564733e-05, + "loss": 0.4335, + "step": 7376 + }, + { + "epoch": 1.26, + "learning_rate": 1.304705728335141e-05, + "loss": 0.4475, + "step": 7377 + }, + { + "epoch": 1.26, + "learning_rate": 1.3045303407738576e-05, + "loss": 0.399, + "step": 7378 + }, + { + "epoch": 1.26, + "learning_rate": 1.3043549428868308e-05, + "loss": 0.4038, + "step": 7379 + }, + { + "epoch": 1.26, + "learning_rate": 1.3041795346800075e-05, + "loss": 0.4255, + "step": 7380 + }, + { + "epoch": 1.26, + "learning_rate": 1.3040041161593347e-05, + "loss": 0.4283, + "step": 7381 + }, + { + "epoch": 1.26, + "learning_rate": 1.3038286873307613e-05, + "loss": 0.4309, + "step": 7382 + }, + { + "epoch": 1.26, + "learning_rate": 1.303653248200235e-05, + "loss": 0.4119, + "step": 7383 + }, + { + "epoch": 1.26, + "learning_rate": 1.3034777987737044e-05, + "loss": 0.4443, + "step": 7384 + }, + { + "epoch": 1.26, + "learning_rate": 1.3033023390571186e-05, + "loss": 0.4267, + "step": 7385 + }, + { + "epoch": 1.26, + "learning_rate": 1.3031268690564269e-05, + "loss": 0.404, + "step": 7386 + }, + { + "epoch": 1.26, + "learning_rate": 1.3029513887775793e-05, + "loss": 0.3897, + "step": 7387 + }, + { + "epoch": 1.26, + "learning_rate": 1.3027758982265255e-05, + "loss": 0.4111, + "step": 7388 + }, + { + "epoch": 1.26, + "learning_rate": 1.3026003974092159e-05, + "loss": 0.3994, + "step": 7389 + }, + { + "epoch": 1.26, + "learning_rate": 1.3024248863316012e-05, + "loss": 0.4411, + "step": 7390 + }, + { + "epoch": 1.26, + "learning_rate": 1.3022493649996326e-05, + "loss": 0.4477, + "step": 7391 + }, + { + "epoch": 1.26, + "learning_rate": 1.3020738334192615e-05, + "loss": 0.416, + "step": 7392 + }, + { + "epoch": 1.26, + "learning_rate": 1.3018982915964395e-05, + "loss": 0.4184, + "step": 7393 + }, + { + "epoch": 1.26, + "learning_rate": 1.3017227395371188e-05, + "loss": 0.3981, + "step": 7394 + }, + { + "epoch": 1.26, + "learning_rate": 1.3015471772472523e-05, + "loss": 0.4003, + "step": 7395 + }, + { + "epoch": 1.26, + "learning_rate": 1.301371604732792e-05, + "loss": 0.4341, + "step": 7396 + }, + { + "epoch": 1.26, + "learning_rate": 1.3011960219996915e-05, + "loss": 0.3824, + "step": 7397 + }, + { + "epoch": 1.26, + "learning_rate": 1.3010204290539045e-05, + "loss": 0.4327, + "step": 7398 + }, + { + "epoch": 1.26, + "learning_rate": 1.3008448259013848e-05, + "loss": 0.4343, + "step": 7399 + }, + { + "epoch": 1.26, + "learning_rate": 1.300669212548086e-05, + "loss": 0.4184, + "step": 7400 + }, + { + "epoch": 1.26, + "learning_rate": 1.3004935889999631e-05, + "loss": 0.4367, + "step": 7401 + }, + { + "epoch": 1.26, + "learning_rate": 1.300317955262971e-05, + "loss": 0.4012, + "step": 7402 + }, + { + "epoch": 1.26, + "learning_rate": 1.3001423113430651e-05, + "loss": 0.4416, + "step": 7403 + }, + { + "epoch": 1.26, + "learning_rate": 1.2999666572462007e-05, + "loss": 0.4286, + "step": 7404 + }, + { + "epoch": 1.26, + "learning_rate": 1.2997909929783337e-05, + "loss": 0.4343, + "step": 7405 + }, + { + "epoch": 1.26, + "learning_rate": 1.2996153185454205e-05, + "loss": 0.4694, + "step": 7406 + }, + { + "epoch": 1.26, + "learning_rate": 1.2994396339534176e-05, + "loss": 0.4156, + "step": 7407 + }, + { + "epoch": 1.26, + "learning_rate": 1.2992639392082822e-05, + "loss": 0.4131, + "step": 7408 + }, + { + "epoch": 1.26, + "learning_rate": 1.299088234315971e-05, + "loss": 0.4544, + "step": 7409 + }, + { + "epoch": 1.26, + "learning_rate": 1.2989125192824425e-05, + "loss": 0.4556, + "step": 7410 + }, + { + "epoch": 1.26, + "learning_rate": 1.298736794113654e-05, + "loss": 0.4412, + "step": 7411 + }, + { + "epoch": 1.26, + "learning_rate": 1.2985610588155642e-05, + "loss": 0.4266, + "step": 7412 + }, + { + "epoch": 1.26, + "learning_rate": 1.2983853133941316e-05, + "loss": 0.4265, + "step": 7413 + }, + { + "epoch": 1.26, + "learning_rate": 1.2982095578553153e-05, + "loss": 0.4327, + "step": 7414 + }, + { + "epoch": 1.26, + "learning_rate": 1.298033792205075e-05, + "loss": 0.3988, + "step": 7415 + }, + { + "epoch": 1.26, + "learning_rate": 1.2978580164493697e-05, + "loss": 0.4491, + "step": 7416 + }, + { + "epoch": 1.26, + "learning_rate": 1.2976822305941596e-05, + "loss": 0.4124, + "step": 7417 + }, + { + "epoch": 1.27, + "learning_rate": 1.2975064346454058e-05, + "loss": 0.4537, + "step": 7418 + }, + { + "epoch": 1.27, + "learning_rate": 1.2973306286090682e-05, + "loss": 0.4323, + "step": 7419 + }, + { + "epoch": 1.27, + "learning_rate": 1.2971548124911082e-05, + "loss": 0.4401, + "step": 7420 + }, + { + "epoch": 1.27, + "learning_rate": 1.2969789862974874e-05, + "loss": 0.4321, + "step": 7421 + }, + { + "epoch": 1.27, + "learning_rate": 1.296803150034167e-05, + "loss": 0.4399, + "step": 7422 + }, + { + "epoch": 1.27, + "learning_rate": 1.29662730370711e-05, + "loss": 0.4362, + "step": 7423 + }, + { + "epoch": 1.27, + "learning_rate": 1.296451447322278e-05, + "loss": 0.4241, + "step": 7424 + }, + { + "epoch": 1.27, + "learning_rate": 1.2962755808856341e-05, + "loss": 0.4311, + "step": 7425 + }, + { + "epoch": 1.27, + "learning_rate": 1.2960997044031416e-05, + "loss": 0.3984, + "step": 7426 + }, + { + "epoch": 1.27, + "learning_rate": 1.2959238178807634e-05, + "loss": 0.432, + "step": 7427 + }, + { + "epoch": 1.27, + "learning_rate": 1.2957479213244639e-05, + "loss": 0.4218, + "step": 7428 + }, + { + "epoch": 1.27, + "learning_rate": 1.2955720147402073e-05, + "loss": 0.4185, + "step": 7429 + }, + { + "epoch": 1.27, + "learning_rate": 1.2953960981339578e-05, + "loss": 0.4064, + "step": 7430 + }, + { + "epoch": 1.27, + "learning_rate": 1.2952201715116797e-05, + "loss": 0.43, + "step": 7431 + }, + { + "epoch": 1.27, + "learning_rate": 1.295044234879339e-05, + "loss": 0.4428, + "step": 7432 + }, + { + "epoch": 1.27, + "learning_rate": 1.2948682882429008e-05, + "loss": 0.4175, + "step": 7433 + }, + { + "epoch": 1.27, + "learning_rate": 1.2946923316083313e-05, + "loss": 0.4322, + "step": 7434 + }, + { + "epoch": 1.27, + "learning_rate": 1.2945163649815962e-05, + "loss": 0.4157, + "step": 7435 + }, + { + "epoch": 1.27, + "learning_rate": 1.2943403883686623e-05, + "loss": 0.4179, + "step": 7436 + }, + { + "epoch": 1.27, + "learning_rate": 1.2941644017754964e-05, + "loss": 0.4552, + "step": 7437 + }, + { + "epoch": 1.27, + "learning_rate": 1.293988405208066e-05, + "loss": 0.3955, + "step": 7438 + }, + { + "epoch": 1.27, + "learning_rate": 1.2938123986723378e-05, + "loss": 0.4118, + "step": 7439 + }, + { + "epoch": 1.27, + "learning_rate": 1.2936363821742808e-05, + "loss": 0.4572, + "step": 7440 + }, + { + "epoch": 1.27, + "learning_rate": 1.2934603557198621e-05, + "loss": 0.4067, + "step": 7441 + }, + { + "epoch": 1.27, + "learning_rate": 1.293284319315051e-05, + "loss": 0.4315, + "step": 7442 + }, + { + "epoch": 1.27, + "learning_rate": 1.293108272965816e-05, + "loss": 0.449, + "step": 7443 + }, + { + "epoch": 1.27, + "learning_rate": 1.2929322166781269e-05, + "loss": 0.4242, + "step": 7444 + }, + { + "epoch": 1.27, + "learning_rate": 1.2927561504579522e-05, + "loss": 0.4064, + "step": 7445 + }, + { + "epoch": 1.27, + "learning_rate": 1.292580074311263e-05, + "loss": 0.4662, + "step": 7446 + }, + { + "epoch": 1.27, + "learning_rate": 1.2924039882440287e-05, + "loss": 0.4248, + "step": 7447 + }, + { + "epoch": 1.27, + "learning_rate": 1.2922278922622203e-05, + "loss": 0.4336, + "step": 7448 + }, + { + "epoch": 1.27, + "learning_rate": 1.2920517863718083e-05, + "loss": 0.4534, + "step": 7449 + }, + { + "epoch": 1.27, + "learning_rate": 1.2918756705787642e-05, + "loss": 0.4298, + "step": 7450 + }, + { + "epoch": 1.27, + "learning_rate": 1.2916995448890597e-05, + "loss": 0.4477, + "step": 7451 + }, + { + "epoch": 1.27, + "learning_rate": 1.2915234093086665e-05, + "loss": 0.4333, + "step": 7452 + }, + { + "epoch": 1.27, + "learning_rate": 1.2913472638435569e-05, + "loss": 0.4085, + "step": 7453 + }, + { + "epoch": 1.27, + "learning_rate": 1.2911711084997038e-05, + "loss": 0.4181, + "step": 7454 + }, + { + "epoch": 1.27, + "learning_rate": 1.2909949432830796e-05, + "loss": 0.4191, + "step": 7455 + }, + { + "epoch": 1.27, + "learning_rate": 1.2908187681996577e-05, + "loss": 0.4974, + "step": 7456 + }, + { + "epoch": 1.27, + "learning_rate": 1.2906425832554119e-05, + "loss": 0.4044, + "step": 7457 + }, + { + "epoch": 1.27, + "learning_rate": 1.290466388456316e-05, + "loss": 0.4436, + "step": 7458 + }, + { + "epoch": 1.27, + "learning_rate": 1.2902901838083442e-05, + "loss": 0.456, + "step": 7459 + }, + { + "epoch": 1.27, + "learning_rate": 1.290113969317471e-05, + "loss": 0.4349, + "step": 7460 + }, + { + "epoch": 1.27, + "learning_rate": 1.2899377449896714e-05, + "loss": 0.4464, + "step": 7461 + }, + { + "epoch": 1.27, + "learning_rate": 1.2897615108309211e-05, + "loss": 0.421, + "step": 7462 + }, + { + "epoch": 1.27, + "learning_rate": 1.2895852668471953e-05, + "loss": 0.4354, + "step": 7463 + }, + { + "epoch": 1.27, + "learning_rate": 1.2894090130444697e-05, + "loss": 0.4495, + "step": 7464 + }, + { + "epoch": 1.27, + "learning_rate": 1.2892327494287207e-05, + "loss": 0.4016, + "step": 7465 + }, + { + "epoch": 1.27, + "learning_rate": 1.2890564760059252e-05, + "loss": 0.4273, + "step": 7466 + }, + { + "epoch": 1.27, + "learning_rate": 1.28888019278206e-05, + "loss": 0.4242, + "step": 7467 + }, + { + "epoch": 1.27, + "learning_rate": 1.2887038997631019e-05, + "loss": 0.421, + "step": 7468 + }, + { + "epoch": 1.27, + "learning_rate": 1.2885275969550288e-05, + "loss": 0.406, + "step": 7469 + }, + { + "epoch": 1.27, + "learning_rate": 1.2883512843638192e-05, + "loss": 0.4344, + "step": 7470 + }, + { + "epoch": 1.27, + "learning_rate": 1.2881749619954507e-05, + "loss": 0.3948, + "step": 7471 + }, + { + "epoch": 1.27, + "learning_rate": 1.2879986298559018e-05, + "loss": 0.427, + "step": 7472 + }, + { + "epoch": 1.27, + "learning_rate": 1.2878222879511512e-05, + "loss": 0.3888, + "step": 7473 + }, + { + "epoch": 1.27, + "learning_rate": 1.2876459362871793e-05, + "loss": 0.4204, + "step": 7474 + }, + { + "epoch": 1.27, + "learning_rate": 1.2874695748699644e-05, + "loss": 0.4576, + "step": 7475 + }, + { + "epoch": 1.28, + "learning_rate": 1.287293203705487e-05, + "loss": 0.4129, + "step": 7476 + }, + { + "epoch": 1.28, + "learning_rate": 1.2871168227997277e-05, + "loss": 0.4421, + "step": 7477 + }, + { + "epoch": 1.28, + "learning_rate": 1.2869404321586663e-05, + "loss": 0.4477, + "step": 7478 + }, + { + "epoch": 1.28, + "learning_rate": 1.2867640317882842e-05, + "loss": 0.4164, + "step": 7479 + }, + { + "epoch": 1.28, + "learning_rate": 1.286587621694562e-05, + "loss": 0.4305, + "step": 7480 + }, + { + "epoch": 1.28, + "learning_rate": 1.2864112018834822e-05, + "loss": 0.4155, + "step": 7481 + }, + { + "epoch": 1.28, + "learning_rate": 1.2862347723610263e-05, + "loss": 0.4301, + "step": 7482 + }, + { + "epoch": 1.28, + "learning_rate": 1.2860583331331759e-05, + "loss": 0.4095, + "step": 7483 + }, + { + "epoch": 1.28, + "learning_rate": 1.2858818842059145e-05, + "loss": 0.4134, + "step": 7484 + }, + { + "epoch": 1.28, + "learning_rate": 1.2857054255852243e-05, + "loss": 0.3867, + "step": 7485 + }, + { + "epoch": 1.28, + "learning_rate": 1.2855289572770891e-05, + "loss": 0.4178, + "step": 7486 + }, + { + "epoch": 1.28, + "learning_rate": 1.2853524792874916e-05, + "loss": 0.4111, + "step": 7487 + }, + { + "epoch": 1.28, + "learning_rate": 1.2851759916224162e-05, + "loss": 0.4102, + "step": 7488 + }, + { + "epoch": 1.28, + "learning_rate": 1.2849994942878475e-05, + "loss": 0.3913, + "step": 7489 + }, + { + "epoch": 1.28, + "learning_rate": 1.2848229872897689e-05, + "loss": 0.411, + "step": 7490 + }, + { + "epoch": 1.28, + "learning_rate": 1.2846464706341662e-05, + "loss": 0.4412, + "step": 7491 + }, + { + "epoch": 1.28, + "learning_rate": 1.284469944327024e-05, + "loss": 0.4358, + "step": 7492 + }, + { + "epoch": 1.28, + "learning_rate": 1.2842934083743283e-05, + "loss": 0.4048, + "step": 7493 + }, + { + "epoch": 1.28, + "learning_rate": 1.2841168627820648e-05, + "loss": 0.4531, + "step": 7494 + }, + { + "epoch": 1.28, + "learning_rate": 1.283940307556219e-05, + "loss": 0.4291, + "step": 7495 + }, + { + "epoch": 1.28, + "learning_rate": 1.2837637427027783e-05, + "loss": 0.4328, + "step": 7496 + }, + { + "epoch": 1.28, + "learning_rate": 1.2835871682277293e-05, + "loss": 0.4347, + "step": 7497 + }, + { + "epoch": 1.28, + "learning_rate": 1.2834105841370583e-05, + "loss": 0.4348, + "step": 7498 + }, + { + "epoch": 1.28, + "learning_rate": 1.2832339904367536e-05, + "loss": 0.4373, + "step": 7499 + }, + { + "epoch": 1.28, + "learning_rate": 1.2830573871328029e-05, + "loss": 0.395, + "step": 7500 + }, + { + "epoch": 1.28, + "learning_rate": 1.2828807742311942e-05, + "loss": 0.4736, + "step": 7501 + }, + { + "epoch": 1.28, + "learning_rate": 1.2827041517379159e-05, + "loss": 0.4567, + "step": 7502 + }, + { + "epoch": 1.28, + "learning_rate": 1.2825275196589568e-05, + "loss": 0.4203, + "step": 7503 + }, + { + "epoch": 1.28, + "learning_rate": 1.282350878000306e-05, + "loss": 0.4195, + "step": 7504 + }, + { + "epoch": 1.28, + "learning_rate": 1.2821742267679524e-05, + "loss": 0.4374, + "step": 7505 + }, + { + "epoch": 1.28, + "learning_rate": 1.2819975659678866e-05, + "loss": 0.4155, + "step": 7506 + }, + { + "epoch": 1.28, + "learning_rate": 1.281820895606098e-05, + "loss": 0.4212, + "step": 7507 + }, + { + "epoch": 1.28, + "learning_rate": 1.2816442156885774e-05, + "loss": 0.4245, + "step": 7508 + }, + { + "epoch": 1.28, + "learning_rate": 1.2814675262213151e-05, + "loss": 0.444, + "step": 7509 + }, + { + "epoch": 1.28, + "learning_rate": 1.2812908272103028e-05, + "loss": 0.4245, + "step": 7510 + }, + { + "epoch": 1.28, + "learning_rate": 1.281114118661531e-05, + "loss": 0.4438, + "step": 7511 + }, + { + "epoch": 1.28, + "learning_rate": 1.2809374005809923e-05, + "loss": 0.4455, + "step": 7512 + }, + { + "epoch": 1.28, + "learning_rate": 1.2807606729746777e-05, + "loss": 0.3926, + "step": 7513 + }, + { + "epoch": 1.28, + "learning_rate": 1.28058393584858e-05, + "loss": 0.4498, + "step": 7514 + }, + { + "epoch": 1.28, + "learning_rate": 1.280407189208692e-05, + "loss": 0.4234, + "step": 7515 + }, + { + "epoch": 1.28, + "learning_rate": 1.2802304330610067e-05, + "loss": 0.4076, + "step": 7516 + }, + { + "epoch": 1.28, + "learning_rate": 1.280053667411517e-05, + "loss": 0.4146, + "step": 7517 + }, + { + "epoch": 1.28, + "learning_rate": 1.2798768922662169e-05, + "loss": 0.4412, + "step": 7518 + }, + { + "epoch": 1.28, + "learning_rate": 1.2797001076311e-05, + "loss": 0.4189, + "step": 7519 + }, + { + "epoch": 1.28, + "learning_rate": 1.2795233135121607e-05, + "loss": 0.4323, + "step": 7520 + }, + { + "epoch": 1.28, + "learning_rate": 1.2793465099153934e-05, + "loss": 0.4269, + "step": 7521 + }, + { + "epoch": 1.28, + "learning_rate": 1.2791696968467935e-05, + "loss": 0.4479, + "step": 7522 + }, + { + "epoch": 1.28, + "learning_rate": 1.278992874312356e-05, + "loss": 0.4277, + "step": 7523 + }, + { + "epoch": 1.28, + "learning_rate": 1.2788160423180762e-05, + "loss": 0.393, + "step": 7524 + }, + { + "epoch": 1.28, + "learning_rate": 1.27863920086995e-05, + "loss": 0.3981, + "step": 7525 + }, + { + "epoch": 1.28, + "learning_rate": 1.278462349973974e-05, + "loss": 0.4128, + "step": 7526 + }, + { + "epoch": 1.28, + "learning_rate": 1.2782854896361442e-05, + "loss": 0.419, + "step": 7527 + }, + { + "epoch": 1.28, + "learning_rate": 1.2781086198624576e-05, + "loss": 0.4098, + "step": 7528 + }, + { + "epoch": 1.28, + "learning_rate": 1.2779317406589113e-05, + "loss": 0.4456, + "step": 7529 + }, + { + "epoch": 1.28, + "learning_rate": 1.277754852031503e-05, + "loss": 0.4676, + "step": 7530 + }, + { + "epoch": 1.28, + "learning_rate": 1.2775779539862305e-05, + "loss": 0.3904, + "step": 7531 + }, + { + "epoch": 1.28, + "learning_rate": 1.2774010465290913e-05, + "loss": 0.4304, + "step": 7532 + }, + { + "epoch": 1.28, + "learning_rate": 1.2772241296660845e-05, + "loss": 0.3891, + "step": 7533 + }, + { + "epoch": 1.28, + "learning_rate": 1.2770472034032084e-05, + "loss": 0.4296, + "step": 7534 + }, + { + "epoch": 1.29, + "learning_rate": 1.2768702677464626e-05, + "loss": 0.422, + "step": 7535 + }, + { + "epoch": 1.29, + "learning_rate": 1.2766933227018457e-05, + "loss": 0.4337, + "step": 7536 + }, + { + "epoch": 1.29, + "learning_rate": 1.2765163682753578e-05, + "loss": 0.4249, + "step": 7537 + }, + { + "epoch": 1.29, + "learning_rate": 1.2763394044729993e-05, + "loss": 0.4097, + "step": 7538 + }, + { + "epoch": 1.29, + "learning_rate": 1.2761624313007699e-05, + "loss": 0.4232, + "step": 7539 + }, + { + "epoch": 1.29, + "learning_rate": 1.2759854487646704e-05, + "loss": 0.4016, + "step": 7540 + }, + { + "epoch": 1.29, + "learning_rate": 1.2758084568707022e-05, + "loss": 0.4293, + "step": 7541 + }, + { + "epoch": 1.29, + "learning_rate": 1.2756314556248661e-05, + "loss": 0.4195, + "step": 7542 + }, + { + "epoch": 1.29, + "learning_rate": 1.2754544450331636e-05, + "loss": 0.4241, + "step": 7543 + }, + { + "epoch": 1.29, + "learning_rate": 1.275277425101597e-05, + "loss": 0.4521, + "step": 7544 + }, + { + "epoch": 1.29, + "learning_rate": 1.2751003958361683e-05, + "loss": 0.4263, + "step": 7545 + }, + { + "epoch": 1.29, + "learning_rate": 1.2749233572428805e-05, + "loss": 0.3943, + "step": 7546 + }, + { + "epoch": 1.29, + "learning_rate": 1.2747463093277359e-05, + "loss": 0.4117, + "step": 7547 + }, + { + "epoch": 1.29, + "learning_rate": 1.2745692520967379e-05, + "loss": 0.4318, + "step": 7548 + }, + { + "epoch": 1.29, + "learning_rate": 1.27439218555589e-05, + "loss": 0.471, + "step": 7549 + }, + { + "epoch": 1.29, + "learning_rate": 1.274215109711196e-05, + "loss": 0.4358, + "step": 7550 + }, + { + "epoch": 1.29, + "learning_rate": 1.2740380245686602e-05, + "loss": 0.4292, + "step": 7551 + }, + { + "epoch": 1.29, + "learning_rate": 1.2738609301342865e-05, + "loss": 0.4379, + "step": 7552 + }, + { + "epoch": 1.29, + "learning_rate": 1.2736838264140808e-05, + "loss": 0.3965, + "step": 7553 + }, + { + "epoch": 1.29, + "learning_rate": 1.2735067134140468e-05, + "loss": 0.4508, + "step": 7554 + }, + { + "epoch": 1.29, + "learning_rate": 1.2733295911401907e-05, + "loss": 0.4188, + "step": 7555 + }, + { + "epoch": 1.29, + "learning_rate": 1.2731524595985176e-05, + "loss": 0.438, + "step": 7556 + }, + { + "epoch": 1.29, + "learning_rate": 1.2729753187950347e-05, + "loss": 0.4288, + "step": 7557 + }, + { + "epoch": 1.29, + "learning_rate": 1.2727981687357474e-05, + "loss": 0.4216, + "step": 7558 + }, + { + "epoch": 1.29, + "learning_rate": 1.2726210094266623e-05, + "loss": 0.4485, + "step": 7559 + }, + { + "epoch": 1.29, + "learning_rate": 1.2724438408737863e-05, + "loss": 0.3825, + "step": 7560 + }, + { + "epoch": 1.29, + "learning_rate": 1.2722666630831275e-05, + "loss": 0.4208, + "step": 7561 + }, + { + "epoch": 1.29, + "learning_rate": 1.2720894760606925e-05, + "loss": 0.4318, + "step": 7562 + }, + { + "epoch": 1.29, + "learning_rate": 1.2719122798124899e-05, + "loss": 0.4205, + "step": 7563 + }, + { + "epoch": 1.29, + "learning_rate": 1.2717350743445276e-05, + "loss": 0.4716, + "step": 7564 + }, + { + "epoch": 1.29, + "learning_rate": 1.2715578596628142e-05, + "loss": 0.4578, + "step": 7565 + }, + { + "epoch": 1.29, + "learning_rate": 1.2713806357733588e-05, + "loss": 0.4264, + "step": 7566 + }, + { + "epoch": 1.29, + "learning_rate": 1.2712034026821697e-05, + "loss": 0.46, + "step": 7567 + }, + { + "epoch": 1.29, + "learning_rate": 1.2710261603952573e-05, + "loss": 0.446, + "step": 7568 + }, + { + "epoch": 1.29, + "learning_rate": 1.2708489089186308e-05, + "loss": 0.4355, + "step": 7569 + }, + { + "epoch": 1.29, + "learning_rate": 1.2706716482583005e-05, + "loss": 0.4348, + "step": 7570 + }, + { + "epoch": 1.29, + "learning_rate": 1.2704943784202768e-05, + "loss": 0.4238, + "step": 7571 + }, + { + "epoch": 1.29, + "learning_rate": 1.2703170994105707e-05, + "loss": 0.427, + "step": 7572 + }, + { + "epoch": 1.29, + "learning_rate": 1.2701398112351925e-05, + "loss": 0.3921, + "step": 7573 + }, + { + "epoch": 1.29, + "learning_rate": 1.2699625139001543e-05, + "loss": 0.4459, + "step": 7574 + }, + { + "epoch": 1.29, + "learning_rate": 1.2697852074114671e-05, + "loss": 0.4431, + "step": 7575 + }, + { + "epoch": 1.29, + "learning_rate": 1.2696078917751433e-05, + "loss": 0.4232, + "step": 7576 + }, + { + "epoch": 1.29, + "learning_rate": 1.269430566997195e-05, + "loss": 0.4329, + "step": 7577 + }, + { + "epoch": 1.29, + "learning_rate": 1.2692532330836346e-05, + "loss": 0.4429, + "step": 7578 + }, + { + "epoch": 1.29, + "learning_rate": 1.2690758900404751e-05, + "loss": 0.4331, + "step": 7579 + }, + { + "epoch": 1.29, + "learning_rate": 1.26889853787373e-05, + "loss": 0.4176, + "step": 7580 + }, + { + "epoch": 1.29, + "learning_rate": 1.2687211765894126e-05, + "loss": 0.421, + "step": 7581 + }, + { + "epoch": 1.29, + "learning_rate": 1.2685438061935362e-05, + "loss": 0.4172, + "step": 7582 + }, + { + "epoch": 1.29, + "learning_rate": 1.268366426692116e-05, + "loss": 0.4397, + "step": 7583 + }, + { + "epoch": 1.29, + "learning_rate": 1.2681890380911652e-05, + "loss": 0.4607, + "step": 7584 + }, + { + "epoch": 1.29, + "learning_rate": 1.2680116403966993e-05, + "loss": 0.4223, + "step": 7585 + }, + { + "epoch": 1.29, + "learning_rate": 1.2678342336147331e-05, + "loss": 0.3884, + "step": 7586 + }, + { + "epoch": 1.29, + "learning_rate": 1.267656817751282e-05, + "loss": 0.4778, + "step": 7587 + }, + { + "epoch": 1.29, + "learning_rate": 1.267479392812362e-05, + "loss": 0.4523, + "step": 7588 + }, + { + "epoch": 1.29, + "learning_rate": 1.2673019588039885e-05, + "loss": 0.4083, + "step": 7589 + }, + { + "epoch": 1.29, + "learning_rate": 1.2671245157321779e-05, + "loss": 0.4189, + "step": 7590 + }, + { + "epoch": 1.29, + "learning_rate": 1.266947063602947e-05, + "loss": 0.4464, + "step": 7591 + }, + { + "epoch": 1.29, + "learning_rate": 1.2667696024223126e-05, + "loss": 0.4121, + "step": 7592 + }, + { + "epoch": 1.29, + "learning_rate": 1.266592132196292e-05, + "loss": 0.4227, + "step": 7593 + }, + { + "epoch": 1.3, + "learning_rate": 1.2664146529309022e-05, + "loss": 0.4562, + "step": 7594 + }, + { + "epoch": 1.3, + "learning_rate": 1.2662371646321621e-05, + "loss": 0.4405, + "step": 7595 + }, + { + "epoch": 1.3, + "learning_rate": 1.2660596673060885e-05, + "loss": 0.3898, + "step": 7596 + }, + { + "epoch": 1.3, + "learning_rate": 1.2658821609587007e-05, + "loss": 0.4294, + "step": 7597 + }, + { + "epoch": 1.3, + "learning_rate": 1.2657046455960172e-05, + "loss": 0.4134, + "step": 7598 + }, + { + "epoch": 1.3, + "learning_rate": 1.2655271212240567e-05, + "loss": 0.4534, + "step": 7599 + }, + { + "epoch": 1.3, + "learning_rate": 1.2653495878488391e-05, + "loss": 0.4435, + "step": 7600 + }, + { + "epoch": 1.3, + "learning_rate": 1.2651720454763833e-05, + "loss": 0.4671, + "step": 7601 + }, + { + "epoch": 1.3, + "learning_rate": 1.2649944941127104e-05, + "loss": 0.3905, + "step": 7602 + }, + { + "epoch": 1.3, + "learning_rate": 1.2648169337638396e-05, + "loss": 0.3865, + "step": 7603 + }, + { + "epoch": 1.3, + "learning_rate": 1.2646393644357918e-05, + "loss": 0.4221, + "step": 7604 + }, + { + "epoch": 1.3, + "learning_rate": 1.2644617861345882e-05, + "loss": 0.4387, + "step": 7605 + }, + { + "epoch": 1.3, + "learning_rate": 1.2642841988662495e-05, + "loss": 0.3855, + "step": 7606 + }, + { + "epoch": 1.3, + "learning_rate": 1.2641066026367973e-05, + "loss": 0.4286, + "step": 7607 + }, + { + "epoch": 1.3, + "learning_rate": 1.2639289974522533e-05, + "loss": 0.3975, + "step": 7608 + }, + { + "epoch": 1.3, + "learning_rate": 1.2637513833186397e-05, + "loss": 0.467, + "step": 7609 + }, + { + "epoch": 1.3, + "learning_rate": 1.2635737602419791e-05, + "loss": 0.4596, + "step": 7610 + }, + { + "epoch": 1.3, + "learning_rate": 1.2633961282282938e-05, + "loss": 0.3966, + "step": 7611 + }, + { + "epoch": 1.3, + "learning_rate": 1.2632184872836068e-05, + "loss": 0.4418, + "step": 7612 + }, + { + "epoch": 1.3, + "learning_rate": 1.263040837413942e-05, + "loss": 0.4074, + "step": 7613 + }, + { + "epoch": 1.3, + "learning_rate": 1.2628631786253223e-05, + "loss": 0.4254, + "step": 7614 + }, + { + "epoch": 1.3, + "learning_rate": 1.2626855109237717e-05, + "loss": 0.3889, + "step": 7615 + }, + { + "epoch": 1.3, + "learning_rate": 1.2625078343153146e-05, + "loss": 0.4274, + "step": 7616 + }, + { + "epoch": 1.3, + "learning_rate": 1.2623301488059753e-05, + "loss": 0.4159, + "step": 7617 + }, + { + "epoch": 1.3, + "learning_rate": 1.262152454401779e-05, + "loss": 0.4217, + "step": 7618 + }, + { + "epoch": 1.3, + "learning_rate": 1.26197475110875e-05, + "loss": 0.4053, + "step": 7619 + }, + { + "epoch": 1.3, + "learning_rate": 1.2617970389329148e-05, + "loss": 0.4013, + "step": 7620 + }, + { + "epoch": 1.3, + "learning_rate": 1.2616193178802984e-05, + "loss": 0.4402, + "step": 7621 + }, + { + "epoch": 1.3, + "learning_rate": 1.261441587956927e-05, + "loss": 0.3847, + "step": 7622 + }, + { + "epoch": 1.3, + "learning_rate": 1.2612638491688268e-05, + "loss": 0.3963, + "step": 7623 + }, + { + "epoch": 1.3, + "learning_rate": 1.2610861015220242e-05, + "loss": 0.4408, + "step": 7624 + }, + { + "epoch": 1.3, + "learning_rate": 1.2609083450225468e-05, + "loss": 0.4647, + "step": 7625 + }, + { + "epoch": 1.3, + "learning_rate": 1.2607305796764209e-05, + "loss": 0.4204, + "step": 7626 + }, + { + "epoch": 1.3, + "learning_rate": 1.2605528054896747e-05, + "loss": 0.3943, + "step": 7627 + }, + { + "epoch": 1.3, + "learning_rate": 1.260375022468336e-05, + "loss": 0.4169, + "step": 7628 + }, + { + "epoch": 1.3, + "learning_rate": 1.2601972306184327e-05, + "loss": 0.4429, + "step": 7629 + }, + { + "epoch": 1.3, + "learning_rate": 1.2600194299459932e-05, + "loss": 0.3881, + "step": 7630 + }, + { + "epoch": 1.3, + "learning_rate": 1.2598416204570461e-05, + "loss": 0.4041, + "step": 7631 + }, + { + "epoch": 1.3, + "learning_rate": 1.2596638021576206e-05, + "loss": 0.3849, + "step": 7632 + }, + { + "epoch": 1.3, + "learning_rate": 1.2594859750537458e-05, + "loss": 0.4275, + "step": 7633 + }, + { + "epoch": 1.3, + "learning_rate": 1.2593081391514515e-05, + "loss": 0.4198, + "step": 7634 + }, + { + "epoch": 1.3, + "learning_rate": 1.2591302944567674e-05, + "loss": 0.4179, + "step": 7635 + }, + { + "epoch": 1.3, + "learning_rate": 1.2589524409757241e-05, + "loss": 0.4228, + "step": 7636 + }, + { + "epoch": 1.3, + "learning_rate": 1.258774578714352e-05, + "loss": 0.4489, + "step": 7637 + }, + { + "epoch": 1.3, + "learning_rate": 1.2585967076786814e-05, + "loss": 0.4084, + "step": 7638 + }, + { + "epoch": 1.3, + "learning_rate": 1.2584188278747436e-05, + "loss": 0.4066, + "step": 7639 + }, + { + "epoch": 1.3, + "learning_rate": 1.2582409393085703e-05, + "loss": 0.4105, + "step": 7640 + }, + { + "epoch": 1.3, + "learning_rate": 1.2580630419861933e-05, + "loss": 0.4423, + "step": 7641 + }, + { + "epoch": 1.3, + "learning_rate": 1.2578851359136437e-05, + "loss": 0.4248, + "step": 7642 + }, + { + "epoch": 1.3, + "learning_rate": 1.2577072210969546e-05, + "loss": 0.4388, + "step": 7643 + }, + { + "epoch": 1.3, + "learning_rate": 1.2575292975421586e-05, + "loss": 0.3989, + "step": 7644 + }, + { + "epoch": 1.3, + "learning_rate": 1.2573513652552883e-05, + "loss": 0.4404, + "step": 7645 + }, + { + "epoch": 1.3, + "learning_rate": 1.2571734242423767e-05, + "loss": 0.4484, + "step": 7646 + }, + { + "epoch": 1.3, + "learning_rate": 1.2569954745094574e-05, + "loss": 0.4282, + "step": 7647 + }, + { + "epoch": 1.3, + "learning_rate": 1.2568175160625644e-05, + "loss": 0.4532, + "step": 7648 + }, + { + "epoch": 1.3, + "learning_rate": 1.2566395489077313e-05, + "loss": 0.418, + "step": 7649 + }, + { + "epoch": 1.3, + "learning_rate": 1.2564615730509927e-05, + "loss": 0.441, + "step": 7650 + }, + { + "epoch": 1.3, + "learning_rate": 1.2562835884983837e-05, + "loss": 0.4108, + "step": 7651 + }, + { + "epoch": 1.31, + "learning_rate": 1.2561055952559388e-05, + "loss": 0.4291, + "step": 7652 + }, + { + "epoch": 1.31, + "learning_rate": 1.255927593329693e-05, + "loss": 0.3753, + "step": 7653 + }, + { + "epoch": 1.31, + "learning_rate": 1.2557495827256821e-05, + "loss": 0.4134, + "step": 7654 + }, + { + "epoch": 1.31, + "learning_rate": 1.2555715634499421e-05, + "loss": 0.4286, + "step": 7655 + }, + { + "epoch": 1.31, + "learning_rate": 1.2553935355085084e-05, + "loss": 0.4537, + "step": 7656 + }, + { + "epoch": 1.31, + "learning_rate": 1.2552154989074183e-05, + "loss": 0.4095, + "step": 7657 + }, + { + "epoch": 1.31, + "learning_rate": 1.255037453652708e-05, + "loss": 0.4642, + "step": 7658 + }, + { + "epoch": 1.31, + "learning_rate": 1.254859399750415e-05, + "loss": 0.4481, + "step": 7659 + }, + { + "epoch": 1.31, + "learning_rate": 1.254681337206576e-05, + "loss": 0.4408, + "step": 7660 + }, + { + "epoch": 1.31, + "learning_rate": 1.254503266027229e-05, + "loss": 0.4455, + "step": 7661 + }, + { + "epoch": 1.31, + "learning_rate": 1.2543251862184115e-05, + "loss": 0.4231, + "step": 7662 + }, + { + "epoch": 1.31, + "learning_rate": 1.254147097786162e-05, + "loss": 0.4668, + "step": 7663 + }, + { + "epoch": 1.31, + "learning_rate": 1.2539690007365186e-05, + "loss": 0.4365, + "step": 7664 + }, + { + "epoch": 1.31, + "learning_rate": 1.2537908950755204e-05, + "loss": 0.4445, + "step": 7665 + }, + { + "epoch": 1.31, + "learning_rate": 1.2536127808092065e-05, + "loss": 0.4162, + "step": 7666 + }, + { + "epoch": 1.31, + "learning_rate": 1.2534346579436158e-05, + "loss": 0.3973, + "step": 7667 + }, + { + "epoch": 1.31, + "learning_rate": 1.2532565264847885e-05, + "loss": 0.3824, + "step": 7668 + }, + { + "epoch": 1.31, + "learning_rate": 1.2530783864387642e-05, + "loss": 0.4286, + "step": 7669 + }, + { + "epoch": 1.31, + "learning_rate": 1.2529002378115833e-05, + "loss": 0.4148, + "step": 7670 + }, + { + "epoch": 1.31, + "learning_rate": 1.2527220806092857e-05, + "loss": 0.4318, + "step": 7671 + }, + { + "epoch": 1.31, + "learning_rate": 1.2525439148379127e-05, + "loss": 0.4436, + "step": 7672 + }, + { + "epoch": 1.31, + "learning_rate": 1.2523657405035057e-05, + "loss": 0.4754, + "step": 7673 + }, + { + "epoch": 1.31, + "learning_rate": 1.2521875576121056e-05, + "loss": 0.3639, + "step": 7674 + }, + { + "epoch": 1.31, + "learning_rate": 1.252009366169754e-05, + "loss": 0.4237, + "step": 7675 + }, + { + "epoch": 1.31, + "learning_rate": 1.2518311661824931e-05, + "loss": 0.437, + "step": 7676 + }, + { + "epoch": 1.31, + "learning_rate": 1.2516529576563654e-05, + "loss": 0.4423, + "step": 7677 + }, + { + "epoch": 1.31, + "learning_rate": 1.251474740597413e-05, + "loss": 0.403, + "step": 7678 + }, + { + "epoch": 1.31, + "learning_rate": 1.2512965150116788e-05, + "loss": 0.4554, + "step": 7679 + }, + { + "epoch": 1.31, + "learning_rate": 1.2511182809052056e-05, + "loss": 0.3779, + "step": 7680 + }, + { + "epoch": 1.31, + "learning_rate": 1.2509400382840377e-05, + "loss": 0.4195, + "step": 7681 + }, + { + "epoch": 1.31, + "learning_rate": 1.250761787154218e-05, + "loss": 0.4025, + "step": 7682 + }, + { + "epoch": 1.31, + "learning_rate": 1.2505835275217905e-05, + "loss": 0.4632, + "step": 7683 + }, + { + "epoch": 1.31, + "learning_rate": 1.2504052593928001e-05, + "loss": 0.3914, + "step": 7684 + }, + { + "epoch": 1.31, + "learning_rate": 1.250226982773291e-05, + "loss": 0.4092, + "step": 7685 + }, + { + "epoch": 1.31, + "learning_rate": 1.2500486976693078e-05, + "loss": 0.408, + "step": 7686 + }, + { + "epoch": 1.31, + "learning_rate": 1.2498704040868959e-05, + "loss": 0.4491, + "step": 7687 + }, + { + "epoch": 1.31, + "learning_rate": 1.2496921020321004e-05, + "loss": 0.4193, + "step": 7688 + }, + { + "epoch": 1.31, + "learning_rate": 1.249513791510968e-05, + "loss": 0.4299, + "step": 7689 + }, + { + "epoch": 1.31, + "learning_rate": 1.2493354725295433e-05, + "loss": 0.4209, + "step": 7690 + }, + { + "epoch": 1.31, + "learning_rate": 1.2491571450938733e-05, + "loss": 0.47, + "step": 7691 + }, + { + "epoch": 1.31, + "learning_rate": 1.2489788092100048e-05, + "loss": 0.4162, + "step": 7692 + }, + { + "epoch": 1.31, + "learning_rate": 1.2488004648839844e-05, + "loss": 0.4095, + "step": 7693 + }, + { + "epoch": 1.31, + "learning_rate": 1.2486221121218589e-05, + "loss": 0.4214, + "step": 7694 + }, + { + "epoch": 1.31, + "learning_rate": 1.248443750929676e-05, + "loss": 0.4391, + "step": 7695 + }, + { + "epoch": 1.31, + "learning_rate": 1.248265381313484e-05, + "loss": 0.4648, + "step": 7696 + }, + { + "epoch": 1.31, + "learning_rate": 1.2480870032793298e-05, + "loss": 0.4476, + "step": 7697 + }, + { + "epoch": 1.31, + "learning_rate": 1.2479086168332623e-05, + "loss": 0.4445, + "step": 7698 + }, + { + "epoch": 1.31, + "learning_rate": 1.24773022198133e-05, + "loss": 0.4018, + "step": 7699 + }, + { + "epoch": 1.31, + "learning_rate": 1.247551818729582e-05, + "loss": 0.4133, + "step": 7700 + }, + { + "epoch": 1.31, + "learning_rate": 1.247373407084067e-05, + "loss": 0.4351, + "step": 7701 + }, + { + "epoch": 1.31, + "learning_rate": 1.247194987050835e-05, + "loss": 0.4689, + "step": 7702 + }, + { + "epoch": 1.31, + "learning_rate": 1.2470165586359346e-05, + "loss": 0.4375, + "step": 7703 + }, + { + "epoch": 1.31, + "learning_rate": 1.2468381218454172e-05, + "loss": 0.4341, + "step": 7704 + }, + { + "epoch": 1.31, + "learning_rate": 1.2466596766853321e-05, + "loss": 0.4191, + "step": 7705 + }, + { + "epoch": 1.31, + "learning_rate": 1.2464812231617298e-05, + "loss": 0.4325, + "step": 7706 + }, + { + "epoch": 1.31, + "learning_rate": 1.2463027612806622e-05, + "loss": 0.4399, + "step": 7707 + }, + { + "epoch": 1.31, + "learning_rate": 1.2461242910481792e-05, + "loss": 0.431, + "step": 7708 + }, + { + "epoch": 1.31, + "learning_rate": 1.245945812470333e-05, + "loss": 0.4214, + "step": 7709 + }, + { + "epoch": 1.31, + "learning_rate": 1.2457673255531748e-05, + "loss": 0.3986, + "step": 7710 + }, + { + "epoch": 1.32, + "learning_rate": 1.2455888303027573e-05, + "loss": 0.4385, + "step": 7711 + }, + { + "epoch": 1.32, + "learning_rate": 1.2454103267251316e-05, + "loss": 0.4531, + "step": 7712 + }, + { + "epoch": 1.32, + "learning_rate": 1.245231814826351e-05, + "loss": 0.3893, + "step": 7713 + }, + { + "epoch": 1.32, + "learning_rate": 1.2450532946124685e-05, + "loss": 0.4655, + "step": 7714 + }, + { + "epoch": 1.32, + "learning_rate": 1.244874766089537e-05, + "loss": 0.437, + "step": 7715 + }, + { + "epoch": 1.32, + "learning_rate": 1.2446962292636096e-05, + "loss": 0.4303, + "step": 7716 + }, + { + "epoch": 1.32, + "learning_rate": 1.2445176841407403e-05, + "loss": 0.4231, + "step": 7717 + }, + { + "epoch": 1.32, + "learning_rate": 1.2443391307269828e-05, + "loss": 0.4305, + "step": 7718 + }, + { + "epoch": 1.32, + "learning_rate": 1.2441605690283915e-05, + "loss": 0.4446, + "step": 7719 + }, + { + "epoch": 1.32, + "learning_rate": 1.243981999051021e-05, + "loss": 0.4252, + "step": 7720 + }, + { + "epoch": 1.32, + "learning_rate": 1.2438034208009257e-05, + "loss": 0.3941, + "step": 7721 + }, + { + "epoch": 1.32, + "learning_rate": 1.2436248342841613e-05, + "loss": 0.4519, + "step": 7722 + }, + { + "epoch": 1.32, + "learning_rate": 1.2434462395067824e-05, + "loss": 0.4254, + "step": 7723 + }, + { + "epoch": 1.32, + "learning_rate": 1.243267636474845e-05, + "loss": 0.4622, + "step": 7724 + }, + { + "epoch": 1.32, + "learning_rate": 1.2430890251944054e-05, + "loss": 0.4571, + "step": 7725 + }, + { + "epoch": 1.32, + "learning_rate": 1.2429104056715194e-05, + "loss": 0.4299, + "step": 7726 + }, + { + "epoch": 1.32, + "learning_rate": 1.2427317779122432e-05, + "loss": 0.415, + "step": 7727 + }, + { + "epoch": 1.32, + "learning_rate": 1.242553141922634e-05, + "loss": 0.4199, + "step": 7728 + }, + { + "epoch": 1.32, + "learning_rate": 1.2423744977087487e-05, + "loss": 0.403, + "step": 7729 + }, + { + "epoch": 1.32, + "learning_rate": 1.2421958452766445e-05, + "loss": 0.4337, + "step": 7730 + }, + { + "epoch": 1.32, + "learning_rate": 1.2420171846323792e-05, + "loss": 0.4064, + "step": 7731 + }, + { + "epoch": 1.32, + "learning_rate": 1.2418385157820104e-05, + "loss": 0.4206, + "step": 7732 + }, + { + "epoch": 1.32, + "learning_rate": 1.2416598387315966e-05, + "loss": 0.4004, + "step": 7733 + }, + { + "epoch": 1.32, + "learning_rate": 1.241481153487196e-05, + "loss": 0.4704, + "step": 7734 + }, + { + "epoch": 1.32, + "learning_rate": 1.241302460054867e-05, + "loss": 0.4075, + "step": 7735 + }, + { + "epoch": 1.32, + "learning_rate": 1.241123758440669e-05, + "loss": 0.4044, + "step": 7736 + }, + { + "epoch": 1.32, + "learning_rate": 1.2409450486506611e-05, + "loss": 0.4474, + "step": 7737 + }, + { + "epoch": 1.32, + "learning_rate": 1.2407663306909033e-05, + "loss": 0.4359, + "step": 7738 + }, + { + "epoch": 1.32, + "learning_rate": 1.2405876045674546e-05, + "loss": 0.4724, + "step": 7739 + }, + { + "epoch": 1.32, + "learning_rate": 1.2404088702863754e-05, + "loss": 0.3788, + "step": 7740 + }, + { + "epoch": 1.32, + "learning_rate": 1.2402301278537263e-05, + "loss": 0.4555, + "step": 7741 + }, + { + "epoch": 1.32, + "learning_rate": 1.2400513772755681e-05, + "loss": 0.4016, + "step": 7742 + }, + { + "epoch": 1.32, + "learning_rate": 1.2398726185579608e-05, + "loss": 0.4378, + "step": 7743 + }, + { + "epoch": 1.32, + "learning_rate": 1.2396938517069664e-05, + "loss": 0.4143, + "step": 7744 + }, + { + "epoch": 1.32, + "learning_rate": 1.2395150767286462e-05, + "loss": 0.4156, + "step": 7745 + }, + { + "epoch": 1.32, + "learning_rate": 1.2393362936290618e-05, + "loss": 0.4801, + "step": 7746 + }, + { + "epoch": 1.32, + "learning_rate": 1.2391575024142753e-05, + "loss": 0.4031, + "step": 7747 + }, + { + "epoch": 1.32, + "learning_rate": 1.2389787030903491e-05, + "loss": 0.4635, + "step": 7748 + }, + { + "epoch": 1.32, + "learning_rate": 1.2387998956633458e-05, + "loss": 0.4648, + "step": 7749 + }, + { + "epoch": 1.32, + "learning_rate": 1.238621080139328e-05, + "loss": 0.4463, + "step": 7750 + }, + { + "epoch": 1.32, + "learning_rate": 1.2384422565243588e-05, + "loss": 0.4231, + "step": 7751 + }, + { + "epoch": 1.32, + "learning_rate": 1.2382634248245017e-05, + "loss": 0.3972, + "step": 7752 + }, + { + "epoch": 1.32, + "learning_rate": 1.2380845850458204e-05, + "loss": 0.4375, + "step": 7753 + }, + { + "epoch": 1.32, + "learning_rate": 1.237905737194379e-05, + "loss": 0.4355, + "step": 7754 + }, + { + "epoch": 1.32, + "learning_rate": 1.2377268812762413e-05, + "loss": 0.3901, + "step": 7755 + }, + { + "epoch": 1.32, + "learning_rate": 1.2375480172974723e-05, + "loss": 0.4199, + "step": 7756 + }, + { + "epoch": 1.32, + "learning_rate": 1.2373691452641365e-05, + "loss": 0.4099, + "step": 7757 + }, + { + "epoch": 1.32, + "learning_rate": 1.237190265182299e-05, + "loss": 0.4031, + "step": 7758 + }, + { + "epoch": 1.32, + "learning_rate": 1.2370113770580246e-05, + "loss": 0.4321, + "step": 7759 + }, + { + "epoch": 1.32, + "learning_rate": 1.2368324808973797e-05, + "loss": 0.4143, + "step": 7760 + }, + { + "epoch": 1.32, + "learning_rate": 1.2366535767064294e-05, + "loss": 0.4444, + "step": 7761 + }, + { + "epoch": 1.32, + "learning_rate": 1.2364746644912405e-05, + "loss": 0.4109, + "step": 7762 + }, + { + "epoch": 1.32, + "learning_rate": 1.2362957442578788e-05, + "loss": 0.4335, + "step": 7763 + }, + { + "epoch": 1.32, + "learning_rate": 1.2361168160124115e-05, + "loss": 0.3962, + "step": 7764 + }, + { + "epoch": 1.32, + "learning_rate": 1.2359378797609055e-05, + "loss": 0.4209, + "step": 7765 + }, + { + "epoch": 1.32, + "learning_rate": 1.2357589355094275e-05, + "loss": 0.4453, + "step": 7766 + }, + { + "epoch": 1.32, + "learning_rate": 1.2355799832640452e-05, + "loss": 0.4012, + "step": 7767 + }, + { + "epoch": 1.32, + "learning_rate": 1.2354010230308266e-05, + "loss": 0.4279, + "step": 7768 + }, + { + "epoch": 1.32, + "learning_rate": 1.2352220548158393e-05, + "loss": 0.4097, + "step": 7769 + }, + { + "epoch": 1.33, + "learning_rate": 1.2350430786251521e-05, + "loss": 0.4326, + "step": 7770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2348640944648333e-05, + "loss": 0.4281, + "step": 7771 + }, + { + "epoch": 1.33, + "learning_rate": 1.2346851023409519e-05, + "loss": 0.4057, + "step": 7772 + }, + { + "epoch": 1.33, + "learning_rate": 1.2345061022595767e-05, + "loss": 0.4327, + "step": 7773 + }, + { + "epoch": 1.33, + "learning_rate": 1.2343270942267772e-05, + "loss": 0.4273, + "step": 7774 + }, + { + "epoch": 1.33, + "learning_rate": 1.2341480782486232e-05, + "loss": 0.4468, + "step": 7775 + }, + { + "epoch": 1.33, + "learning_rate": 1.2339690543311844e-05, + "loss": 0.4207, + "step": 7776 + }, + { + "epoch": 1.33, + "learning_rate": 1.233790022480531e-05, + "loss": 0.4231, + "step": 7777 + }, + { + "epoch": 1.33, + "learning_rate": 1.2336109827027337e-05, + "loss": 0.436, + "step": 7778 + }, + { + "epoch": 1.33, + "learning_rate": 1.2334319350038632e-05, + "loss": 0.4262, + "step": 7779 + }, + { + "epoch": 1.33, + "learning_rate": 1.2332528793899901e-05, + "loss": 0.4431, + "step": 7780 + }, + { + "epoch": 1.33, + "learning_rate": 1.233073815867186e-05, + "loss": 0.4252, + "step": 7781 + }, + { + "epoch": 1.33, + "learning_rate": 1.2328947444415223e-05, + "loss": 0.4132, + "step": 7782 + }, + { + "epoch": 1.33, + "learning_rate": 1.2327156651190708e-05, + "loss": 0.395, + "step": 7783 + }, + { + "epoch": 1.33, + "learning_rate": 1.2325365779059038e-05, + "loss": 0.4206, + "step": 7784 + }, + { + "epoch": 1.33, + "learning_rate": 1.2323574828080931e-05, + "loss": 0.4221, + "step": 7785 + }, + { + "epoch": 1.33, + "learning_rate": 1.2321783798317118e-05, + "loss": 0.4311, + "step": 7786 + }, + { + "epoch": 1.33, + "learning_rate": 1.2319992689828324e-05, + "loss": 0.44, + "step": 7787 + }, + { + "epoch": 1.33, + "learning_rate": 1.2318201502675285e-05, + "loss": 0.4436, + "step": 7788 + }, + { + "epoch": 1.33, + "learning_rate": 1.231641023691873e-05, + "loss": 0.4212, + "step": 7789 + }, + { + "epoch": 1.33, + "learning_rate": 1.2314618892619398e-05, + "loss": 0.4471, + "step": 7790 + }, + { + "epoch": 1.33, + "learning_rate": 1.231282746983803e-05, + "loss": 0.4705, + "step": 7791 + }, + { + "epoch": 1.33, + "learning_rate": 1.2311035968635362e-05, + "loss": 0.4484, + "step": 7792 + }, + { + "epoch": 1.33, + "learning_rate": 1.2309244389072146e-05, + "loss": 0.461, + "step": 7793 + }, + { + "epoch": 1.33, + "learning_rate": 1.2307452731209124e-05, + "loss": 0.4309, + "step": 7794 + }, + { + "epoch": 1.33, + "learning_rate": 1.230566099510705e-05, + "loss": 0.4774, + "step": 7795 + }, + { + "epoch": 1.33, + "learning_rate": 1.2303869180826673e-05, + "loss": 0.4463, + "step": 7796 + }, + { + "epoch": 1.33, + "learning_rate": 1.2302077288428751e-05, + "loss": 0.4323, + "step": 7797 + }, + { + "epoch": 1.33, + "learning_rate": 1.230028531797404e-05, + "loss": 0.4391, + "step": 7798 + }, + { + "epoch": 1.33, + "learning_rate": 1.2298493269523299e-05, + "loss": 0.4211, + "step": 7799 + }, + { + "epoch": 1.33, + "learning_rate": 1.2296701143137296e-05, + "loss": 0.4355, + "step": 7800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2294908938876792e-05, + "loss": 0.4485, + "step": 7801 + }, + { + "epoch": 1.33, + "learning_rate": 1.2293116656802559e-05, + "loss": 0.4432, + "step": 7802 + }, + { + "epoch": 1.33, + "learning_rate": 1.2291324296975366e-05, + "loss": 0.444, + "step": 7803 + }, + { + "epoch": 1.33, + "learning_rate": 1.228953185945599e-05, + "loss": 0.4283, + "step": 7804 + }, + { + "epoch": 1.33, + "learning_rate": 1.2287739344305204e-05, + "loss": 0.4051, + "step": 7805 + }, + { + "epoch": 1.33, + "learning_rate": 1.2285946751583786e-05, + "loss": 0.4247, + "step": 7806 + }, + { + "epoch": 1.33, + "learning_rate": 1.228415408135252e-05, + "loss": 0.3735, + "step": 7807 + }, + { + "epoch": 1.33, + "learning_rate": 1.228236133367219e-05, + "loss": 0.4403, + "step": 7808 + }, + { + "epoch": 1.33, + "learning_rate": 1.2280568508603582e-05, + "loss": 0.4153, + "step": 7809 + }, + { + "epoch": 1.33, + "learning_rate": 1.2278775606207487e-05, + "loss": 0.4124, + "step": 7810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2276982626544696e-05, + "loss": 0.4343, + "step": 7811 + }, + { + "epoch": 1.33, + "learning_rate": 1.2275189569676006e-05, + "loss": 0.4567, + "step": 7812 + }, + { + "epoch": 1.33, + "learning_rate": 1.2273396435662212e-05, + "loss": 0.4249, + "step": 7813 + }, + { + "epoch": 1.33, + "learning_rate": 1.2271603224564112e-05, + "loss": 0.4439, + "step": 7814 + }, + { + "epoch": 1.33, + "learning_rate": 1.2269809936442511e-05, + "loss": 0.4232, + "step": 7815 + }, + { + "epoch": 1.33, + "learning_rate": 1.2268016571358216e-05, + "loss": 0.4492, + "step": 7816 + }, + { + "epoch": 1.33, + "learning_rate": 1.2266223129372032e-05, + "loss": 0.4617, + "step": 7817 + }, + { + "epoch": 1.33, + "learning_rate": 1.2264429610544772e-05, + "loss": 0.4214, + "step": 7818 + }, + { + "epoch": 1.33, + "learning_rate": 1.2262636014937245e-05, + "loss": 0.4184, + "step": 7819 + }, + { + "epoch": 1.33, + "learning_rate": 1.2260842342610273e-05, + "loss": 0.45, + "step": 7820 + }, + { + "epoch": 1.33, + "learning_rate": 1.225904859362467e-05, + "loss": 0.4579, + "step": 7821 + }, + { + "epoch": 1.33, + "learning_rate": 1.2257254768041253e-05, + "loss": 0.431, + "step": 7822 + }, + { + "epoch": 1.33, + "learning_rate": 1.2255460865920854e-05, + "loss": 0.3993, + "step": 7823 + }, + { + "epoch": 1.33, + "learning_rate": 1.2253666887324293e-05, + "loss": 0.4366, + "step": 7824 + }, + { + "epoch": 1.33, + "learning_rate": 1.2251872832312401e-05, + "loss": 0.4572, + "step": 7825 + }, + { + "epoch": 1.33, + "learning_rate": 1.2250078700946006e-05, + "loss": 0.4599, + "step": 7826 + }, + { + "epoch": 1.33, + "learning_rate": 1.2248284493285946e-05, + "loss": 0.4217, + "step": 7827 + }, + { + "epoch": 1.34, + "learning_rate": 1.2246490209393058e-05, + "loss": 0.4133, + "step": 7828 + }, + { + "epoch": 1.34, + "learning_rate": 1.224469584932818e-05, + "loss": 0.4333, + "step": 7829 + }, + { + "epoch": 1.34, + "learning_rate": 1.2242901413152148e-05, + "loss": 0.4159, + "step": 7830 + }, + { + "epoch": 1.34, + "learning_rate": 1.2241106900925815e-05, + "loss": 0.4191, + "step": 7831 + }, + { + "epoch": 1.34, + "learning_rate": 1.2239312312710018e-05, + "loss": 0.4435, + "step": 7832 + }, + { + "epoch": 1.34, + "learning_rate": 1.2237517648565615e-05, + "loss": 0.4433, + "step": 7833 + }, + { + "epoch": 1.34, + "learning_rate": 1.2235722908553454e-05, + "loss": 0.4794, + "step": 7834 + }, + { + "epoch": 1.34, + "learning_rate": 1.223392809273439e-05, + "loss": 0.3971, + "step": 7835 + }, + { + "epoch": 1.34, + "learning_rate": 1.2232133201169278e-05, + "loss": 0.4037, + "step": 7836 + }, + { + "epoch": 1.34, + "learning_rate": 1.2230338233918983e-05, + "loss": 0.4241, + "step": 7837 + }, + { + "epoch": 1.34, + "learning_rate": 1.222854319104436e-05, + "loss": 0.4387, + "step": 7838 + }, + { + "epoch": 1.34, + "learning_rate": 1.2226748072606281e-05, + "loss": 0.4688, + "step": 7839 + }, + { + "epoch": 1.34, + "learning_rate": 1.2224952878665605e-05, + "loss": 0.4162, + "step": 7840 + }, + { + "epoch": 1.34, + "learning_rate": 1.2223157609283209e-05, + "loss": 0.4253, + "step": 7841 + }, + { + "epoch": 1.34, + "learning_rate": 1.2221362264519962e-05, + "loss": 0.4165, + "step": 7842 + }, + { + "epoch": 1.34, + "learning_rate": 1.2219566844436743e-05, + "loss": 0.4185, + "step": 7843 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217771349094425e-05, + "loss": 0.441, + "step": 7844 + }, + { + "epoch": 1.34, + "learning_rate": 1.2215975778553887e-05, + "loss": 0.4559, + "step": 7845 + }, + { + "epoch": 1.34, + "learning_rate": 1.2214180132876014e-05, + "loss": 0.4361, + "step": 7846 + }, + { + "epoch": 1.34, + "learning_rate": 1.2212384412121693e-05, + "loss": 0.4547, + "step": 7847 + }, + { + "epoch": 1.34, + "learning_rate": 1.221058861635181e-05, + "loss": 0.4414, + "step": 7848 + }, + { + "epoch": 1.34, + "learning_rate": 1.2208792745627254e-05, + "loss": 0.4294, + "step": 7849 + }, + { + "epoch": 1.34, + "learning_rate": 1.2206996800008921e-05, + "loss": 0.4393, + "step": 7850 + }, + { + "epoch": 1.34, + "learning_rate": 1.2205200779557703e-05, + "loss": 0.4373, + "step": 7851 + }, + { + "epoch": 1.34, + "learning_rate": 1.22034046843345e-05, + "loss": 0.4186, + "step": 7852 + }, + { + "epoch": 1.34, + "learning_rate": 1.2201608514400207e-05, + "loss": 0.4004, + "step": 7853 + }, + { + "epoch": 1.34, + "learning_rate": 1.2199812269815736e-05, + "loss": 0.431, + "step": 7854 + }, + { + "epoch": 1.34, + "learning_rate": 1.2198015950641985e-05, + "loss": 0.4245, + "step": 7855 + }, + { + "epoch": 1.34, + "learning_rate": 1.2196219556939867e-05, + "loss": 0.4113, + "step": 7856 + }, + { + "epoch": 1.34, + "learning_rate": 1.2194423088770289e-05, + "loss": 0.4309, + "step": 7857 + }, + { + "epoch": 1.34, + "learning_rate": 1.2192626546194167e-05, + "loss": 0.4451, + "step": 7858 + }, + { + "epoch": 1.34, + "learning_rate": 1.2190829929272414e-05, + "loss": 0.4092, + "step": 7859 + }, + { + "epoch": 1.34, + "learning_rate": 1.218903323806595e-05, + "loss": 0.4084, + "step": 7860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2187236472635693e-05, + "loss": 0.4125, + "step": 7861 + }, + { + "epoch": 1.34, + "learning_rate": 1.218543963304257e-05, + "loss": 0.4337, + "step": 7862 + }, + { + "epoch": 1.34, + "learning_rate": 1.2183642719347502e-05, + "loss": 0.4063, + "step": 7863 + }, + { + "epoch": 1.34, + "learning_rate": 1.2181845731611419e-05, + "loss": 0.4651, + "step": 7864 + }, + { + "epoch": 1.34, + "learning_rate": 1.2180048669895258e-05, + "loss": 0.4414, + "step": 7865 + }, + { + "epoch": 1.34, + "learning_rate": 1.2178251534259939e-05, + "loss": 0.3791, + "step": 7866 + }, + { + "epoch": 1.34, + "learning_rate": 1.217645432476641e-05, + "loss": 0.4386, + "step": 7867 + }, + { + "epoch": 1.34, + "learning_rate": 1.2174657041475601e-05, + "loss": 0.4122, + "step": 7868 + }, + { + "epoch": 1.34, + "learning_rate": 1.2172859684448462e-05, + "loss": 0.4462, + "step": 7869 + }, + { + "epoch": 1.34, + "learning_rate": 1.2171062253745925e-05, + "loss": 0.401, + "step": 7870 + }, + { + "epoch": 1.34, + "learning_rate": 1.216926474942894e-05, + "loss": 0.4159, + "step": 7871 + }, + { + "epoch": 1.34, + "learning_rate": 1.2167467171558456e-05, + "loss": 0.4193, + "step": 7872 + }, + { + "epoch": 1.34, + "learning_rate": 1.2165669520195426e-05, + "loss": 0.4251, + "step": 7873 + }, + { + "epoch": 1.34, + "learning_rate": 1.2163871795400802e-05, + "loss": 0.4311, + "step": 7874 + }, + { + "epoch": 1.34, + "learning_rate": 1.2162073997235536e-05, + "loss": 0.4333, + "step": 7875 + }, + { + "epoch": 1.34, + "learning_rate": 1.216027612576059e-05, + "loss": 0.46, + "step": 7876 + }, + { + "epoch": 1.34, + "learning_rate": 1.2158478181036923e-05, + "loss": 0.3989, + "step": 7877 + }, + { + "epoch": 1.34, + "learning_rate": 1.2156680163125498e-05, + "loss": 0.4587, + "step": 7878 + }, + { + "epoch": 1.34, + "learning_rate": 1.215488207208728e-05, + "loss": 0.4397, + "step": 7879 + }, + { + "epoch": 1.34, + "learning_rate": 1.215308390798324e-05, + "loss": 0.3925, + "step": 7880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2151285670874344e-05, + "loss": 0.4189, + "step": 7881 + }, + { + "epoch": 1.34, + "learning_rate": 1.214948736082157e-05, + "loss": 0.4232, + "step": 7882 + }, + { + "epoch": 1.34, + "learning_rate": 1.214768897788589e-05, + "loss": 0.4186, + "step": 7883 + }, + { + "epoch": 1.34, + "learning_rate": 1.2145890522128286e-05, + "loss": 0.4022, + "step": 7884 + }, + { + "epoch": 1.34, + "learning_rate": 1.2144091993609732e-05, + "loss": 0.427, + "step": 7885 + }, + { + "epoch": 1.34, + "learning_rate": 1.2142293392391216e-05, + "loss": 0.44, + "step": 7886 + }, + { + "epoch": 1.35, + "learning_rate": 1.214049471853372e-05, + "loss": 0.4293, + "step": 7887 + }, + { + "epoch": 1.35, + "learning_rate": 1.2138695972098235e-05, + "loss": 0.4284, + "step": 7888 + }, + { + "epoch": 1.35, + "learning_rate": 1.2136897153145748e-05, + "loss": 0.4546, + "step": 7889 + }, + { + "epoch": 1.35, + "learning_rate": 1.2135098261737255e-05, + "loss": 0.4371, + "step": 7890 + }, + { + "epoch": 1.35, + "learning_rate": 1.2133299297933751e-05, + "loss": 0.4449, + "step": 7891 + }, + { + "epoch": 1.35, + "learning_rate": 1.2131500261796231e-05, + "loss": 0.4262, + "step": 7892 + }, + { + "epoch": 1.35, + "learning_rate": 1.21297011533857e-05, + "loss": 0.4268, + "step": 7893 + }, + { + "epoch": 1.35, + "learning_rate": 1.2127901972763154e-05, + "loss": 0.4193, + "step": 7894 + }, + { + "epoch": 1.35, + "learning_rate": 1.2126102719989604e-05, + "loss": 0.4224, + "step": 7895 + }, + { + "epoch": 1.35, + "learning_rate": 1.2124303395126052e-05, + "loss": 0.4334, + "step": 7896 + }, + { + "epoch": 1.35, + "learning_rate": 1.212250399823351e-05, + "loss": 0.428, + "step": 7897 + }, + { + "epoch": 1.35, + "learning_rate": 1.2120704529372991e-05, + "loss": 0.4241, + "step": 7898 + }, + { + "epoch": 1.35, + "learning_rate": 1.2118904988605512e-05, + "loss": 0.4412, + "step": 7899 + }, + { + "epoch": 1.35, + "learning_rate": 1.2117105375992089e-05, + "loss": 0.4302, + "step": 7900 + }, + { + "epoch": 1.35, + "learning_rate": 1.2115305691593737e-05, + "loss": 0.4008, + "step": 7901 + }, + { + "epoch": 1.35, + "learning_rate": 1.2113505935471482e-05, + "loss": 0.4457, + "step": 7902 + }, + { + "epoch": 1.35, + "learning_rate": 1.2111706107686349e-05, + "loss": 0.4323, + "step": 7903 + }, + { + "epoch": 1.35, + "learning_rate": 1.2109906208299366e-05, + "loss": 0.4313, + "step": 7904 + }, + { + "epoch": 1.35, + "learning_rate": 1.2108106237371558e-05, + "loss": 0.4331, + "step": 7905 + }, + { + "epoch": 1.35, + "learning_rate": 1.210630619496396e-05, + "loss": 0.4355, + "step": 7906 + }, + { + "epoch": 1.35, + "learning_rate": 1.2104506081137608e-05, + "loss": 0.4505, + "step": 7907 + }, + { + "epoch": 1.35, + "learning_rate": 1.2102705895953536e-05, + "loss": 0.4424, + "step": 7908 + }, + { + "epoch": 1.35, + "learning_rate": 1.210090563947278e-05, + "loss": 0.4357, + "step": 7909 + }, + { + "epoch": 1.35, + "learning_rate": 1.209910531175639e-05, + "loss": 0.4291, + "step": 7910 + }, + { + "epoch": 1.35, + "learning_rate": 1.20973049128654e-05, + "loss": 0.4182, + "step": 7911 + }, + { + "epoch": 1.35, + "learning_rate": 1.2095504442860862e-05, + "loss": 0.449, + "step": 7912 + }, + { + "epoch": 1.35, + "learning_rate": 1.2093703901803825e-05, + "loss": 0.4097, + "step": 7913 + }, + { + "epoch": 1.35, + "learning_rate": 1.209190328975534e-05, + "loss": 0.4207, + "step": 7914 + }, + { + "epoch": 1.35, + "learning_rate": 1.2090102606776456e-05, + "loss": 0.3828, + "step": 7915 + }, + { + "epoch": 1.35, + "learning_rate": 1.2088301852928237e-05, + "loss": 0.4586, + "step": 7916 + }, + { + "epoch": 1.35, + "learning_rate": 1.2086501028271733e-05, + "loss": 0.4273, + "step": 7917 + }, + { + "epoch": 1.35, + "learning_rate": 1.2084700132868012e-05, + "loss": 0.4499, + "step": 7918 + }, + { + "epoch": 1.35, + "learning_rate": 1.2082899166778132e-05, + "loss": 0.4438, + "step": 7919 + }, + { + "epoch": 1.35, + "learning_rate": 1.208109813006316e-05, + "loss": 0.4229, + "step": 7920 + }, + { + "epoch": 1.35, + "learning_rate": 1.2079297022784165e-05, + "loss": 0.4061, + "step": 7921 + }, + { + "epoch": 1.35, + "learning_rate": 1.2077495845002217e-05, + "loss": 0.4094, + "step": 7922 + }, + { + "epoch": 1.35, + "learning_rate": 1.2075694596778385e-05, + "loss": 0.4444, + "step": 7923 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073893278173752e-05, + "loss": 0.4268, + "step": 7924 + }, + { + "epoch": 1.35, + "learning_rate": 1.207209188924939e-05, + "loss": 0.4128, + "step": 7925 + }, + { + "epoch": 1.35, + "learning_rate": 1.2070290430066377e-05, + "loss": 0.4056, + "step": 7926 + }, + { + "epoch": 1.35, + "learning_rate": 1.2068488900685802e-05, + "loss": 0.4048, + "step": 7927 + }, + { + "epoch": 1.35, + "learning_rate": 1.2066687301168743e-05, + "loss": 0.4078, + "step": 7928 + }, + { + "epoch": 1.35, + "learning_rate": 1.2064885631576293e-05, + "loss": 0.4162, + "step": 7929 + }, + { + "epoch": 1.35, + "learning_rate": 1.2063083891969534e-05, + "loss": 0.422, + "step": 7930 + }, + { + "epoch": 1.35, + "learning_rate": 1.2061282082409565e-05, + "loss": 0.4341, + "step": 7931 + }, + { + "epoch": 1.35, + "learning_rate": 1.2059480202957477e-05, + "loss": 0.4216, + "step": 7932 + }, + { + "epoch": 1.35, + "learning_rate": 1.2057678253674368e-05, + "loss": 0.4319, + "step": 7933 + }, + { + "epoch": 1.35, + "learning_rate": 1.2055876234621333e-05, + "loss": 0.4243, + "step": 7934 + }, + { + "epoch": 1.35, + "learning_rate": 1.2054074145859476e-05, + "loss": 0.3942, + "step": 7935 + }, + { + "epoch": 1.35, + "learning_rate": 1.2052271987449899e-05, + "loss": 0.4405, + "step": 7936 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050469759453714e-05, + "loss": 0.4314, + "step": 7937 + }, + { + "epoch": 1.35, + "learning_rate": 1.2048667461932022e-05, + "loss": 0.4248, + "step": 7938 + }, + { + "epoch": 1.35, + "learning_rate": 1.2046865094945936e-05, + "loss": 0.4104, + "step": 7939 + }, + { + "epoch": 1.35, + "learning_rate": 1.2045062658556572e-05, + "loss": 0.42, + "step": 7940 + }, + { + "epoch": 1.35, + "learning_rate": 1.2043260152825043e-05, + "loss": 0.4081, + "step": 7941 + }, + { + "epoch": 1.35, + "learning_rate": 1.2041457577812463e-05, + "loss": 0.4208, + "step": 7942 + }, + { + "epoch": 1.35, + "learning_rate": 1.2039654933579958e-05, + "loss": 0.4444, + "step": 7943 + }, + { + "epoch": 1.35, + "learning_rate": 1.203785222018865e-05, + "loss": 0.4209, + "step": 7944 + }, + { + "epoch": 1.35, + "learning_rate": 1.203604943769966e-05, + "loss": 0.4427, + "step": 7945 + }, + { + "epoch": 1.36, + "learning_rate": 1.2034246586174118e-05, + "loss": 0.4031, + "step": 7946 + }, + { + "epoch": 1.36, + "learning_rate": 1.2032443665673155e-05, + "loss": 0.4089, + "step": 7947 + }, + { + "epoch": 1.36, + "learning_rate": 1.2030640676257897e-05, + "loss": 0.4088, + "step": 7948 + }, + { + "epoch": 1.36, + "learning_rate": 1.2028837617989484e-05, + "loss": 0.4492, + "step": 7949 + }, + { + "epoch": 1.36, + "learning_rate": 1.2027034490929048e-05, + "loss": 0.3942, + "step": 7950 + }, + { + "epoch": 1.36, + "learning_rate": 1.2025231295137729e-05, + "loss": 0.3957, + "step": 7951 + }, + { + "epoch": 1.36, + "learning_rate": 1.2023428030676674e-05, + "loss": 0.4255, + "step": 7952 + }, + { + "epoch": 1.36, + "learning_rate": 1.2021624697607019e-05, + "loss": 0.4554, + "step": 7953 + }, + { + "epoch": 1.36, + "learning_rate": 1.2019821295989913e-05, + "loss": 0.3963, + "step": 7954 + }, + { + "epoch": 1.36, + "learning_rate": 1.2018017825886503e-05, + "loss": 0.4131, + "step": 7955 + }, + { + "epoch": 1.36, + "learning_rate": 1.2016214287357944e-05, + "loss": 0.4071, + "step": 7956 + }, + { + "epoch": 1.36, + "learning_rate": 1.2014410680465379e-05, + "loss": 0.42, + "step": 7957 + }, + { + "epoch": 1.36, + "learning_rate": 1.2012607005269971e-05, + "loss": 0.4058, + "step": 7958 + }, + { + "epoch": 1.36, + "learning_rate": 1.2010803261832877e-05, + "loss": 0.389, + "step": 7959 + }, + { + "epoch": 1.36, + "learning_rate": 1.2008999450215252e-05, + "loss": 0.4219, + "step": 7960 + }, + { + "epoch": 1.36, + "learning_rate": 1.2007195570478263e-05, + "loss": 0.4424, + "step": 7961 + }, + { + "epoch": 1.36, + "learning_rate": 1.2005391622683073e-05, + "loss": 0.4455, + "step": 7962 + }, + { + "epoch": 1.36, + "learning_rate": 1.2003587606890849e-05, + "loss": 0.4309, + "step": 7963 + }, + { + "epoch": 1.36, + "learning_rate": 1.200178352316276e-05, + "loss": 0.4345, + "step": 7964 + }, + { + "epoch": 1.36, + "learning_rate": 1.1999979371559974e-05, + "loss": 0.4232, + "step": 7965 + }, + { + "epoch": 1.36, + "learning_rate": 1.1998175152143665e-05, + "loss": 0.4472, + "step": 7966 + }, + { + "epoch": 1.36, + "learning_rate": 1.1996370864975015e-05, + "loss": 0.3975, + "step": 7967 + }, + { + "epoch": 1.36, + "learning_rate": 1.1994566510115195e-05, + "loss": 0.4424, + "step": 7968 + }, + { + "epoch": 1.36, + "learning_rate": 1.199276208762539e-05, + "loss": 0.4364, + "step": 7969 + }, + { + "epoch": 1.36, + "learning_rate": 1.1990957597566779e-05, + "loss": 0.4304, + "step": 7970 + }, + { + "epoch": 1.36, + "learning_rate": 1.1989153040000553e-05, + "loss": 0.4083, + "step": 7971 + }, + { + "epoch": 1.36, + "learning_rate": 1.1987348414987896e-05, + "loss": 0.4493, + "step": 7972 + }, + { + "epoch": 1.36, + "learning_rate": 1.1985543722589994e-05, + "loss": 0.4496, + "step": 7973 + }, + { + "epoch": 1.36, + "learning_rate": 1.1983738962868046e-05, + "loss": 0.4072, + "step": 7974 + }, + { + "epoch": 1.36, + "learning_rate": 1.1981934135883237e-05, + "loss": 0.4377, + "step": 7975 + }, + { + "epoch": 1.36, + "learning_rate": 1.1980129241696769e-05, + "loss": 0.4349, + "step": 7976 + }, + { + "epoch": 1.36, + "learning_rate": 1.1978324280369844e-05, + "loss": 0.452, + "step": 7977 + }, + { + "epoch": 1.36, + "learning_rate": 1.1976519251963658e-05, + "loss": 0.4178, + "step": 7978 + }, + { + "epoch": 1.36, + "learning_rate": 1.1974714156539418e-05, + "loss": 0.4487, + "step": 7979 + }, + { + "epoch": 1.36, + "learning_rate": 1.1972908994158325e-05, + "loss": 0.4196, + "step": 7980 + }, + { + "epoch": 1.36, + "learning_rate": 1.197110376488159e-05, + "loss": 0.3846, + "step": 7981 + }, + { + "epoch": 1.36, + "learning_rate": 1.1969298468770422e-05, + "loss": 0.4138, + "step": 7982 + }, + { + "epoch": 1.36, + "learning_rate": 1.1967493105886035e-05, + "loss": 0.3983, + "step": 7983 + }, + { + "epoch": 1.36, + "learning_rate": 1.1965687676289639e-05, + "loss": 0.4248, + "step": 7984 + }, + { + "epoch": 1.36, + "learning_rate": 1.1963882180042457e-05, + "loss": 0.4238, + "step": 7985 + }, + { + "epoch": 1.36, + "learning_rate": 1.1962076617205707e-05, + "loss": 0.4751, + "step": 7986 + }, + { + "epoch": 1.36, + "learning_rate": 1.1960270987840605e-05, + "loss": 0.4304, + "step": 7987 + }, + { + "epoch": 1.36, + "learning_rate": 1.1958465292008381e-05, + "loss": 0.4269, + "step": 7988 + }, + { + "epoch": 1.36, + "learning_rate": 1.195665952977026e-05, + "loss": 0.4034, + "step": 7989 + }, + { + "epoch": 1.36, + "learning_rate": 1.1954853701187465e-05, + "loss": 0.4021, + "step": 7990 + }, + { + "epoch": 1.36, + "learning_rate": 1.1953047806321231e-05, + "loss": 0.44, + "step": 7991 + }, + { + "epoch": 1.36, + "learning_rate": 1.1951241845232788e-05, + "loss": 0.4168, + "step": 7992 + }, + { + "epoch": 1.36, + "learning_rate": 1.1949435817983377e-05, + "loss": 0.401, + "step": 7993 + }, + { + "epoch": 1.36, + "learning_rate": 1.1947629724634228e-05, + "loss": 0.4078, + "step": 7994 + }, + { + "epoch": 1.36, + "learning_rate": 1.1945823565246586e-05, + "loss": 0.4071, + "step": 7995 + }, + { + "epoch": 1.36, + "learning_rate": 1.1944017339881687e-05, + "loss": 0.4439, + "step": 7996 + }, + { + "epoch": 1.36, + "learning_rate": 1.1942211048600781e-05, + "loss": 0.4135, + "step": 7997 + }, + { + "epoch": 1.36, + "learning_rate": 1.1940404691465107e-05, + "loss": 0.4164, + "step": 7998 + }, + { + "epoch": 1.36, + "learning_rate": 1.1938598268535916e-05, + "loss": 0.4411, + "step": 7999 + }, + { + "epoch": 1.36, + "learning_rate": 1.1936791779874462e-05, + "loss": 0.4518, + "step": 8000 + }, + { + "epoch": 1.36, + "learning_rate": 1.1934985225541998e-05, + "loss": 0.4176, + "step": 8001 + }, + { + "epoch": 1.36, + "learning_rate": 1.1933178605599773e-05, + "loss": 0.4315, + "step": 8002 + }, + { + "epoch": 1.36, + "learning_rate": 1.1931371920109051e-05, + "loss": 0.4368, + "step": 8003 + }, + { + "epoch": 1.37, + "learning_rate": 1.192956516913109e-05, + "loss": 0.4588, + "step": 8004 + }, + { + "epoch": 1.37, + "learning_rate": 1.1927758352727145e-05, + "loss": 0.4215, + "step": 8005 + }, + { + "epoch": 1.37, + "learning_rate": 1.1925951470958487e-05, + "loss": 0.4135, + "step": 8006 + }, + { + "epoch": 1.37, + "learning_rate": 1.192414452388638e-05, + "loss": 0.4279, + "step": 8007 + }, + { + "epoch": 1.37, + "learning_rate": 1.1922337511572093e-05, + "loss": 0.4253, + "step": 8008 + }, + { + "epoch": 1.37, + "learning_rate": 1.1920530434076895e-05, + "loss": 0.419, + "step": 8009 + }, + { + "epoch": 1.37, + "learning_rate": 1.191872329146206e-05, + "loss": 0.4326, + "step": 8010 + }, + { + "epoch": 1.37, + "learning_rate": 1.1916916083788865e-05, + "loss": 0.4683, + "step": 8011 + }, + { + "epoch": 1.37, + "learning_rate": 1.1915108811118585e-05, + "loss": 0.4376, + "step": 8012 + }, + { + "epoch": 1.37, + "learning_rate": 1.1913301473512498e-05, + "loss": 0.4313, + "step": 8013 + }, + { + "epoch": 1.37, + "learning_rate": 1.1911494071031886e-05, + "loss": 0.3889, + "step": 8014 + }, + { + "epoch": 1.37, + "learning_rate": 1.1909686603738036e-05, + "loss": 0.443, + "step": 8015 + }, + { + "epoch": 1.37, + "learning_rate": 1.1907879071692233e-05, + "loss": 0.4277, + "step": 8016 + }, + { + "epoch": 1.37, + "learning_rate": 1.1906071474955758e-05, + "loss": 0.4121, + "step": 8017 + }, + { + "epoch": 1.37, + "learning_rate": 1.190426381358991e-05, + "loss": 0.4673, + "step": 8018 + }, + { + "epoch": 1.37, + "learning_rate": 1.1902456087655982e-05, + "loss": 0.4039, + "step": 8019 + }, + { + "epoch": 1.37, + "learning_rate": 1.1900648297215268e-05, + "loss": 0.4158, + "step": 8020 + }, + { + "epoch": 1.37, + "learning_rate": 1.1898840442329056e-05, + "loss": 0.4451, + "step": 8021 + }, + { + "epoch": 1.37, + "learning_rate": 1.1897032523058653e-05, + "loss": 0.4475, + "step": 8022 + }, + { + "epoch": 1.37, + "learning_rate": 1.1895224539465362e-05, + "loss": 0.4061, + "step": 8023 + }, + { + "epoch": 1.37, + "learning_rate": 1.1893416491610481e-05, + "loss": 0.4313, + "step": 8024 + }, + { + "epoch": 1.37, + "learning_rate": 1.189160837955532e-05, + "loss": 0.4487, + "step": 8025 + }, + { + "epoch": 1.37, + "learning_rate": 1.1889800203361182e-05, + "loss": 0.4215, + "step": 8026 + }, + { + "epoch": 1.37, + "learning_rate": 1.1887991963089384e-05, + "loss": 0.3966, + "step": 8027 + }, + { + "epoch": 1.37, + "learning_rate": 1.1886183658801233e-05, + "loss": 0.4331, + "step": 8028 + }, + { + "epoch": 1.37, + "learning_rate": 1.1884375290558045e-05, + "loss": 0.4153, + "step": 8029 + }, + { + "epoch": 1.37, + "learning_rate": 1.1882566858421137e-05, + "loss": 0.4258, + "step": 8030 + }, + { + "epoch": 1.37, + "learning_rate": 1.1880758362451825e-05, + "loss": 0.4348, + "step": 8031 + }, + { + "epoch": 1.37, + "learning_rate": 1.1878949802711433e-05, + "loss": 0.3944, + "step": 8032 + }, + { + "epoch": 1.37, + "learning_rate": 1.1877141179261283e-05, + "loss": 0.404, + "step": 8033 + }, + { + "epoch": 1.37, + "learning_rate": 1.1875332492162699e-05, + "loss": 0.4382, + "step": 8034 + }, + { + "epoch": 1.37, + "learning_rate": 1.1873523741477011e-05, + "loss": 0.4343, + "step": 8035 + }, + { + "epoch": 1.37, + "learning_rate": 1.1871714927265548e-05, + "loss": 0.4232, + "step": 8036 + }, + { + "epoch": 1.37, + "learning_rate": 1.186990604958964e-05, + "loss": 0.4276, + "step": 8037 + }, + { + "epoch": 1.37, + "learning_rate": 1.1868097108510619e-05, + "loss": 0.4191, + "step": 8038 + }, + { + "epoch": 1.37, + "learning_rate": 1.1866288104089825e-05, + "loss": 0.4174, + "step": 8039 + }, + { + "epoch": 1.37, + "learning_rate": 1.1864479036388594e-05, + "loss": 0.4242, + "step": 8040 + }, + { + "epoch": 1.37, + "learning_rate": 1.1862669905468267e-05, + "loss": 0.4331, + "step": 8041 + }, + { + "epoch": 1.37, + "learning_rate": 1.1860860711390188e-05, + "loss": 0.3951, + "step": 8042 + }, + { + "epoch": 1.37, + "learning_rate": 1.1859051454215696e-05, + "loss": 0.4131, + "step": 8043 + }, + { + "epoch": 1.37, + "learning_rate": 1.1857242134006146e-05, + "loss": 0.4055, + "step": 8044 + }, + { + "epoch": 1.37, + "learning_rate": 1.1855432750822877e-05, + "loss": 0.434, + "step": 8045 + }, + { + "epoch": 1.37, + "learning_rate": 1.185362330472725e-05, + "loss": 0.3909, + "step": 8046 + }, + { + "epoch": 1.37, + "learning_rate": 1.1851813795780611e-05, + "loss": 0.4496, + "step": 8047 + }, + { + "epoch": 1.37, + "learning_rate": 1.1850004224044315e-05, + "loss": 0.4168, + "step": 8048 + }, + { + "epoch": 1.37, + "learning_rate": 1.1848194589579724e-05, + "loss": 0.4321, + "step": 8049 + }, + { + "epoch": 1.37, + "learning_rate": 1.18463848924482e-05, + "loss": 0.4534, + "step": 8050 + }, + { + "epoch": 1.37, + "learning_rate": 1.1844575132711096e-05, + "loss": 0.4245, + "step": 8051 + }, + { + "epoch": 1.37, + "learning_rate": 1.1842765310429778e-05, + "loss": 0.4275, + "step": 8052 + }, + { + "epoch": 1.37, + "learning_rate": 1.1840955425665617e-05, + "loss": 0.4211, + "step": 8053 + }, + { + "epoch": 1.37, + "learning_rate": 1.1839145478479974e-05, + "loss": 0.4331, + "step": 8054 + }, + { + "epoch": 1.37, + "learning_rate": 1.1837335468934223e-05, + "loss": 0.4396, + "step": 8055 + }, + { + "epoch": 1.37, + "learning_rate": 1.1835525397089736e-05, + "loss": 0.4295, + "step": 8056 + }, + { + "epoch": 1.37, + "learning_rate": 1.1833715263007888e-05, + "loss": 0.4163, + "step": 8057 + }, + { + "epoch": 1.37, + "learning_rate": 1.1831905066750053e-05, + "loss": 0.4637, + "step": 8058 + }, + { + "epoch": 1.37, + "learning_rate": 1.1830094808377615e-05, + "loss": 0.4671, + "step": 8059 + }, + { + "epoch": 1.37, + "learning_rate": 1.1828284487951947e-05, + "loss": 0.4651, + "step": 8060 + }, + { + "epoch": 1.37, + "learning_rate": 1.1826474105534435e-05, + "loss": 0.4125, + "step": 8061 + }, + { + "epoch": 1.37, + "learning_rate": 1.1824663661186464e-05, + "loss": 0.396, + "step": 8062 + }, + { + "epoch": 1.38, + "learning_rate": 1.1822853154969422e-05, + "loss": 0.45, + "step": 8063 + }, + { + "epoch": 1.38, + "learning_rate": 1.1821042586944698e-05, + "loss": 0.4274, + "step": 8064 + }, + { + "epoch": 1.38, + "learning_rate": 1.1819231957173682e-05, + "loss": 0.4645, + "step": 8065 + }, + { + "epoch": 1.38, + "learning_rate": 1.1817421265717765e-05, + "loss": 0.4404, + "step": 8066 + }, + { + "epoch": 1.38, + "learning_rate": 1.181561051263835e-05, + "loss": 0.4198, + "step": 8067 + }, + { + "epoch": 1.38, + "learning_rate": 1.1813799697996827e-05, + "loss": 0.4297, + "step": 8068 + }, + { + "epoch": 1.38, + "learning_rate": 1.1811988821854597e-05, + "loss": 0.4164, + "step": 8069 + }, + { + "epoch": 1.38, + "learning_rate": 1.181017788427306e-05, + "loss": 0.4069, + "step": 8070 + }, + { + "epoch": 1.38, + "learning_rate": 1.1808366885313623e-05, + "loss": 0.4461, + "step": 8071 + }, + { + "epoch": 1.38, + "learning_rate": 1.1806555825037694e-05, + "loss": 0.4023, + "step": 8072 + }, + { + "epoch": 1.38, + "learning_rate": 1.1804744703506671e-05, + "loss": 0.4348, + "step": 8073 + }, + { + "epoch": 1.38, + "learning_rate": 1.1802933520781975e-05, + "loss": 0.4332, + "step": 8074 + }, + { + "epoch": 1.38, + "learning_rate": 1.1801122276925014e-05, + "loss": 0.4164, + "step": 8075 + }, + { + "epoch": 1.38, + "learning_rate": 1.1799310971997202e-05, + "loss": 0.4521, + "step": 8076 + }, + { + "epoch": 1.38, + "learning_rate": 1.1797499606059952e-05, + "loss": 0.4399, + "step": 8077 + }, + { + "epoch": 1.38, + "learning_rate": 1.1795688179174685e-05, + "loss": 0.4209, + "step": 8078 + }, + { + "epoch": 1.38, + "learning_rate": 1.1793876691402822e-05, + "loss": 0.433, + "step": 8079 + }, + { + "epoch": 1.38, + "learning_rate": 1.1792065142805783e-05, + "loss": 0.396, + "step": 8080 + }, + { + "epoch": 1.38, + "learning_rate": 1.1790253533444993e-05, + "loss": 0.4205, + "step": 8081 + }, + { + "epoch": 1.38, + "learning_rate": 1.178844186338188e-05, + "loss": 0.4259, + "step": 8082 + }, + { + "epoch": 1.38, + "learning_rate": 1.1786630132677874e-05, + "loss": 0.4093, + "step": 8083 + }, + { + "epoch": 1.38, + "learning_rate": 1.1784818341394402e-05, + "loss": 0.4556, + "step": 8084 + }, + { + "epoch": 1.38, + "learning_rate": 1.1783006489592896e-05, + "loss": 0.4338, + "step": 8085 + }, + { + "epoch": 1.38, + "learning_rate": 1.1781194577334794e-05, + "loss": 0.3982, + "step": 8086 + }, + { + "epoch": 1.38, + "learning_rate": 1.177938260468153e-05, + "loss": 0.415, + "step": 8087 + }, + { + "epoch": 1.38, + "learning_rate": 1.1777570571694544e-05, + "loss": 0.471, + "step": 8088 + }, + { + "epoch": 1.38, + "learning_rate": 1.1775758478435274e-05, + "loss": 0.4231, + "step": 8089 + }, + { + "epoch": 1.38, + "learning_rate": 1.1773946324965171e-05, + "loss": 0.4219, + "step": 8090 + }, + { + "epoch": 1.38, + "learning_rate": 1.1772134111345673e-05, + "loss": 0.3975, + "step": 8091 + }, + { + "epoch": 1.38, + "learning_rate": 1.1770321837638228e-05, + "loss": 0.4332, + "step": 8092 + }, + { + "epoch": 1.38, + "learning_rate": 1.1768509503904283e-05, + "loss": 0.414, + "step": 8093 + }, + { + "epoch": 1.38, + "learning_rate": 1.1766697110205292e-05, + "loss": 0.4274, + "step": 8094 + }, + { + "epoch": 1.38, + "learning_rate": 1.1764884656602711e-05, + "loss": 0.4337, + "step": 8095 + }, + { + "epoch": 1.38, + "learning_rate": 1.1763072143157988e-05, + "loss": 0.4519, + "step": 8096 + }, + { + "epoch": 1.38, + "learning_rate": 1.1761259569932581e-05, + "loss": 0.4523, + "step": 8097 + }, + { + "epoch": 1.38, + "learning_rate": 1.1759446936987957e-05, + "loss": 0.4095, + "step": 8098 + }, + { + "epoch": 1.38, + "learning_rate": 1.175763424438557e-05, + "loss": 0.4192, + "step": 8099 + }, + { + "epoch": 1.38, + "learning_rate": 1.1755821492186886e-05, + "loss": 0.4407, + "step": 8100 + }, + { + "epoch": 1.38, + "learning_rate": 1.1754008680453364e-05, + "loss": 0.4299, + "step": 8101 + }, + { + "epoch": 1.38, + "learning_rate": 1.1752195809246483e-05, + "loss": 0.4516, + "step": 8102 + }, + { + "epoch": 1.38, + "learning_rate": 1.1750382878627699e-05, + "loss": 0.411, + "step": 8103 + }, + { + "epoch": 1.38, + "learning_rate": 1.1748569888658494e-05, + "loss": 0.4619, + "step": 8104 + }, + { + "epoch": 1.38, + "learning_rate": 1.1746756839400335e-05, + "loss": 0.4674, + "step": 8105 + }, + { + "epoch": 1.38, + "learning_rate": 1.1744943730914702e-05, + "loss": 0.4129, + "step": 8106 + }, + { + "epoch": 1.38, + "learning_rate": 1.174313056326307e-05, + "loss": 0.4161, + "step": 8107 + }, + { + "epoch": 1.38, + "learning_rate": 1.1741317336506914e-05, + "loss": 0.4199, + "step": 8108 + }, + { + "epoch": 1.38, + "learning_rate": 1.173950405070772e-05, + "loss": 0.4323, + "step": 8109 + }, + { + "epoch": 1.38, + "learning_rate": 1.1737690705926972e-05, + "loss": 0.4663, + "step": 8110 + }, + { + "epoch": 1.38, + "learning_rate": 1.1735877302226152e-05, + "loss": 0.4331, + "step": 8111 + }, + { + "epoch": 1.38, + "learning_rate": 1.173406383966675e-05, + "loss": 0.3978, + "step": 8112 + }, + { + "epoch": 1.38, + "learning_rate": 1.1732250318310256e-05, + "loss": 0.4301, + "step": 8113 + }, + { + "epoch": 1.38, + "learning_rate": 1.1730436738218155e-05, + "loss": 0.4364, + "step": 8114 + }, + { + "epoch": 1.38, + "learning_rate": 1.172862309945195e-05, + "loss": 0.44, + "step": 8115 + }, + { + "epoch": 1.38, + "learning_rate": 1.1726809402073127e-05, + "loss": 0.3888, + "step": 8116 + }, + { + "epoch": 1.38, + "learning_rate": 1.1724995646143192e-05, + "loss": 0.4205, + "step": 8117 + }, + { + "epoch": 1.38, + "learning_rate": 1.1723181831723633e-05, + "loss": 0.4091, + "step": 8118 + }, + { + "epoch": 1.38, + "learning_rate": 1.1721367958875963e-05, + "loss": 0.4161, + "step": 8119 + }, + { + "epoch": 1.38, + "learning_rate": 1.1719554027661675e-05, + "loss": 0.4372, + "step": 8120 + }, + { + "epoch": 1.39, + "learning_rate": 1.1717740038142282e-05, + "loss": 0.4105, + "step": 8121 + }, + { + "epoch": 1.39, + "learning_rate": 1.1715925990379288e-05, + "loss": 0.4337, + "step": 8122 + }, + { + "epoch": 1.39, + "learning_rate": 1.1714111884434202e-05, + "loss": 0.41, + "step": 8123 + }, + { + "epoch": 1.39, + "learning_rate": 1.1712297720368538e-05, + "loss": 0.4407, + "step": 8124 + }, + { + "epoch": 1.39, + "learning_rate": 1.1710483498243803e-05, + "loss": 0.4054, + "step": 8125 + }, + { + "epoch": 1.39, + "learning_rate": 1.1708669218121516e-05, + "loss": 0.3923, + "step": 8126 + }, + { + "epoch": 1.39, + "learning_rate": 1.1706854880063194e-05, + "loss": 0.4266, + "step": 8127 + }, + { + "epoch": 1.39, + "learning_rate": 1.1705040484130355e-05, + "loss": 0.4208, + "step": 8128 + }, + { + "epoch": 1.39, + "learning_rate": 1.170322603038452e-05, + "loss": 0.4557, + "step": 8129 + }, + { + "epoch": 1.39, + "learning_rate": 1.1701411518887213e-05, + "loss": 0.4286, + "step": 8130 + }, + { + "epoch": 1.39, + "learning_rate": 1.1699596949699959e-05, + "loss": 0.4329, + "step": 8131 + }, + { + "epoch": 1.39, + "learning_rate": 1.1697782322884286e-05, + "loss": 0.4701, + "step": 8132 + }, + { + "epoch": 1.39, + "learning_rate": 1.1695967638501715e-05, + "loss": 0.4568, + "step": 8133 + }, + { + "epoch": 1.39, + "learning_rate": 1.1694152896613785e-05, + "loss": 0.4043, + "step": 8134 + }, + { + "epoch": 1.39, + "learning_rate": 1.1692338097282026e-05, + "loss": 0.4331, + "step": 8135 + }, + { + "epoch": 1.39, + "learning_rate": 1.1690523240567973e-05, + "loss": 0.4272, + "step": 8136 + }, + { + "epoch": 1.39, + "learning_rate": 1.168870832653316e-05, + "loss": 0.4301, + "step": 8137 + }, + { + "epoch": 1.39, + "learning_rate": 1.1686893355239129e-05, + "loss": 0.429, + "step": 8138 + }, + { + "epoch": 1.39, + "learning_rate": 1.1685078326747419e-05, + "loss": 0.4387, + "step": 8139 + }, + { + "epoch": 1.39, + "learning_rate": 1.1683263241119573e-05, + "loss": 0.4167, + "step": 8140 + }, + { + "epoch": 1.39, + "learning_rate": 1.1681448098417137e-05, + "loss": 0.4047, + "step": 8141 + }, + { + "epoch": 1.39, + "learning_rate": 1.1679632898701649e-05, + "loss": 0.4512, + "step": 8142 + }, + { + "epoch": 1.39, + "learning_rate": 1.1677817642034665e-05, + "loss": 0.4186, + "step": 8143 + }, + { + "epoch": 1.39, + "learning_rate": 1.1676002328477737e-05, + "loss": 0.4237, + "step": 8144 + }, + { + "epoch": 1.39, + "learning_rate": 1.167418695809241e-05, + "loss": 0.4359, + "step": 8145 + }, + { + "epoch": 1.39, + "learning_rate": 1.167237153094024e-05, + "loss": 0.4393, + "step": 8146 + }, + { + "epoch": 1.39, + "learning_rate": 1.167055604708279e-05, + "loss": 0.3941, + "step": 8147 + }, + { + "epoch": 1.39, + "learning_rate": 1.1668740506581608e-05, + "loss": 0.4354, + "step": 8148 + }, + { + "epoch": 1.39, + "learning_rate": 1.1666924909498258e-05, + "loss": 0.4318, + "step": 8149 + }, + { + "epoch": 1.39, + "learning_rate": 1.16651092558943e-05, + "loss": 0.4113, + "step": 8150 + }, + { + "epoch": 1.39, + "learning_rate": 1.1663293545831302e-05, + "loss": 0.4394, + "step": 8151 + }, + { + "epoch": 1.39, + "learning_rate": 1.1661477779370824e-05, + "loss": 0.4163, + "step": 8152 + }, + { + "epoch": 1.39, + "learning_rate": 1.1659661956574436e-05, + "loss": 0.4461, + "step": 8153 + }, + { + "epoch": 1.39, + "learning_rate": 1.1657846077503709e-05, + "loss": 0.4259, + "step": 8154 + }, + { + "epoch": 1.39, + "learning_rate": 1.165603014222021e-05, + "loss": 0.4478, + "step": 8155 + }, + { + "epoch": 1.39, + "learning_rate": 1.1654214150785516e-05, + "loss": 0.4117, + "step": 8156 + }, + { + "epoch": 1.39, + "learning_rate": 1.16523981032612e-05, + "loss": 0.4397, + "step": 8157 + }, + { + "epoch": 1.39, + "learning_rate": 1.1650581999708836e-05, + "loss": 0.3983, + "step": 8158 + }, + { + "epoch": 1.39, + "learning_rate": 1.1648765840190012e-05, + "loss": 0.4151, + "step": 8159 + }, + { + "epoch": 1.39, + "learning_rate": 1.1646949624766299e-05, + "loss": 0.395, + "step": 8160 + }, + { + "epoch": 1.39, + "learning_rate": 1.1645133353499285e-05, + "loss": 0.3844, + "step": 8161 + }, + { + "epoch": 1.39, + "learning_rate": 1.1643317026450554e-05, + "loss": 0.4078, + "step": 8162 + }, + { + "epoch": 1.39, + "learning_rate": 1.164150064368169e-05, + "loss": 0.392, + "step": 8163 + }, + { + "epoch": 1.39, + "learning_rate": 1.1639684205254282e-05, + "loss": 0.4474, + "step": 8164 + }, + { + "epoch": 1.39, + "learning_rate": 1.1637867711229922e-05, + "loss": 0.4383, + "step": 8165 + }, + { + "epoch": 1.39, + "learning_rate": 1.1636051161670202e-05, + "loss": 0.4896, + "step": 8166 + }, + { + "epoch": 1.39, + "learning_rate": 1.1634234556636713e-05, + "loss": 0.4077, + "step": 8167 + }, + { + "epoch": 1.39, + "learning_rate": 1.163241789619105e-05, + "loss": 0.403, + "step": 8168 + }, + { + "epoch": 1.39, + "learning_rate": 1.1630601180394819e-05, + "loss": 0.4329, + "step": 8169 + }, + { + "epoch": 1.39, + "learning_rate": 1.1628784409309613e-05, + "loss": 0.3984, + "step": 8170 + }, + { + "epoch": 1.39, + "learning_rate": 1.1626967582997033e-05, + "loss": 0.4411, + "step": 8171 + }, + { + "epoch": 1.39, + "learning_rate": 1.1625150701518684e-05, + "loss": 0.4174, + "step": 8172 + }, + { + "epoch": 1.39, + "learning_rate": 1.162333376493617e-05, + "loss": 0.4344, + "step": 8173 + }, + { + "epoch": 1.39, + "learning_rate": 1.1621516773311102e-05, + "loss": 0.4312, + "step": 8174 + }, + { + "epoch": 1.39, + "learning_rate": 1.1619699726705082e-05, + "loss": 0.4051, + "step": 8175 + }, + { + "epoch": 1.39, + "learning_rate": 1.1617882625179726e-05, + "loss": 0.405, + "step": 8176 + }, + { + "epoch": 1.39, + "learning_rate": 1.1616065468796649e-05, + "loss": 0.4239, + "step": 8177 + }, + { + "epoch": 1.39, + "learning_rate": 1.1614248257617457e-05, + "loss": 0.4227, + "step": 8178 + }, + { + "epoch": 1.39, + "learning_rate": 1.1612430991703775e-05, + "loss": 0.4414, + "step": 8179 + }, + { + "epoch": 1.4, + "learning_rate": 1.1610613671117217e-05, + "loss": 0.4195, + "step": 8180 + }, + { + "epoch": 1.4, + "learning_rate": 1.1608796295919404e-05, + "loss": 0.4165, + "step": 8181 + }, + { + "epoch": 1.4, + "learning_rate": 1.1606978866171956e-05, + "loss": 0.4488, + "step": 8182 + }, + { + "epoch": 1.4, + "learning_rate": 1.1605161381936499e-05, + "loss": 0.4367, + "step": 8183 + }, + { + "epoch": 1.4, + "learning_rate": 1.1603343843274657e-05, + "loss": 0.416, + "step": 8184 + }, + { + "epoch": 1.4, + "learning_rate": 1.1601526250248063e-05, + "loss": 0.4057, + "step": 8185 + }, + { + "epoch": 1.4, + "learning_rate": 1.1599708602918338e-05, + "loss": 0.4286, + "step": 8186 + }, + { + "epoch": 1.4, + "learning_rate": 1.1597890901347118e-05, + "loss": 0.432, + "step": 8187 + }, + { + "epoch": 1.4, + "learning_rate": 1.1596073145596034e-05, + "loss": 0.4363, + "step": 8188 + }, + { + "epoch": 1.4, + "learning_rate": 1.1594255335726725e-05, + "loss": 0.4026, + "step": 8189 + }, + { + "epoch": 1.4, + "learning_rate": 1.1592437471800822e-05, + "loss": 0.4173, + "step": 8190 + }, + { + "epoch": 1.4, + "learning_rate": 1.1590619553879964e-05, + "loss": 0.4347, + "step": 8191 + }, + { + "epoch": 1.4, + "learning_rate": 1.1588801582025799e-05, + "loss": 0.4144, + "step": 8192 + }, + { + "epoch": 1.4, + "learning_rate": 1.158698355629996e-05, + "loss": 0.4117, + "step": 8193 + }, + { + "epoch": 1.4, + "learning_rate": 1.1585165476764093e-05, + "loss": 0.4301, + "step": 8194 + }, + { + "epoch": 1.4, + "learning_rate": 1.158334734347985e-05, + "loss": 0.4209, + "step": 8195 + }, + { + "epoch": 1.4, + "learning_rate": 1.1581529156508873e-05, + "loss": 0.4357, + "step": 8196 + }, + { + "epoch": 1.4, + "learning_rate": 1.157971091591281e-05, + "loss": 0.4007, + "step": 8197 + }, + { + "epoch": 1.4, + "learning_rate": 1.1577892621753315e-05, + "loss": 0.4578, + "step": 8198 + }, + { + "epoch": 1.4, + "learning_rate": 1.1576074274092042e-05, + "loss": 0.4158, + "step": 8199 + }, + { + "epoch": 1.4, + "learning_rate": 1.1574255872990645e-05, + "loss": 0.4106, + "step": 8200 + }, + { + "epoch": 1.4, + "learning_rate": 1.157243741851078e-05, + "loss": 0.443, + "step": 8201 + }, + { + "epoch": 1.4, + "learning_rate": 1.1570618910714109e-05, + "loss": 0.4287, + "step": 8202 + }, + { + "epoch": 1.4, + "learning_rate": 1.1568800349662285e-05, + "loss": 0.3918, + "step": 8203 + }, + { + "epoch": 1.4, + "learning_rate": 1.1566981735416977e-05, + "loss": 0.4055, + "step": 8204 + }, + { + "epoch": 1.4, + "learning_rate": 1.1565163068039847e-05, + "loss": 0.4416, + "step": 8205 + }, + { + "epoch": 1.4, + "learning_rate": 1.1563344347592557e-05, + "loss": 0.4657, + "step": 8206 + }, + { + "epoch": 1.4, + "learning_rate": 1.1561525574136783e-05, + "loss": 0.4625, + "step": 8207 + }, + { + "epoch": 1.4, + "learning_rate": 1.1559706747734186e-05, + "loss": 0.4294, + "step": 8208 + }, + { + "epoch": 1.4, + "learning_rate": 1.1557887868446441e-05, + "loss": 0.4035, + "step": 8209 + }, + { + "epoch": 1.4, + "learning_rate": 1.1556068936335221e-05, + "loss": 0.4038, + "step": 8210 + }, + { + "epoch": 1.4, + "learning_rate": 1.15542499514622e-05, + "loss": 0.4031, + "step": 8211 + }, + { + "epoch": 1.4, + "learning_rate": 1.1552430913889053e-05, + "loss": 0.439, + "step": 8212 + }, + { + "epoch": 1.4, + "learning_rate": 1.155061182367746e-05, + "loss": 0.4356, + "step": 8213 + }, + { + "epoch": 1.4, + "learning_rate": 1.15487926808891e-05, + "loss": 0.4601, + "step": 8214 + }, + { + "epoch": 1.4, + "learning_rate": 1.154697348558566e-05, + "loss": 0.4331, + "step": 8215 + }, + { + "epoch": 1.4, + "learning_rate": 1.1545154237828815e-05, + "loss": 0.4088, + "step": 8216 + }, + { + "epoch": 1.4, + "learning_rate": 1.1543334937680256e-05, + "loss": 0.4286, + "step": 8217 + }, + { + "epoch": 1.4, + "learning_rate": 1.1541515585201672e-05, + "loss": 0.4495, + "step": 8218 + }, + { + "epoch": 1.4, + "learning_rate": 1.153969618045475e-05, + "loss": 0.4273, + "step": 8219 + }, + { + "epoch": 1.4, + "learning_rate": 1.1537876723501175e-05, + "loss": 0.4101, + "step": 8220 + }, + { + "epoch": 1.4, + "learning_rate": 1.1536057214402645e-05, + "loss": 0.4195, + "step": 8221 + }, + { + "epoch": 1.4, + "learning_rate": 1.1534237653220857e-05, + "loss": 0.4247, + "step": 8222 + }, + { + "epoch": 1.4, + "learning_rate": 1.1532418040017502e-05, + "loss": 0.4135, + "step": 8223 + }, + { + "epoch": 1.4, + "learning_rate": 1.1530598374854279e-05, + "loss": 0.3991, + "step": 8224 + }, + { + "epoch": 1.4, + "learning_rate": 1.1528778657792887e-05, + "loss": 0.4278, + "step": 8225 + }, + { + "epoch": 1.4, + "learning_rate": 1.1526958888895032e-05, + "loss": 0.437, + "step": 8226 + }, + { + "epoch": 1.4, + "learning_rate": 1.1525139068222413e-05, + "loss": 0.4514, + "step": 8227 + }, + { + "epoch": 1.4, + "learning_rate": 1.1523319195836734e-05, + "loss": 0.441, + "step": 8228 + }, + { + "epoch": 1.4, + "learning_rate": 1.1521499271799703e-05, + "loss": 0.385, + "step": 8229 + }, + { + "epoch": 1.4, + "learning_rate": 1.151967929617303e-05, + "loss": 0.4693, + "step": 8230 + }, + { + "epoch": 1.4, + "learning_rate": 1.1517859269018421e-05, + "loss": 0.4261, + "step": 8231 + }, + { + "epoch": 1.4, + "learning_rate": 1.1516039190397592e-05, + "loss": 0.4084, + "step": 8232 + }, + { + "epoch": 1.4, + "learning_rate": 1.1514219060372254e-05, + "loss": 0.4066, + "step": 8233 + }, + { + "epoch": 1.4, + "learning_rate": 1.1512398879004126e-05, + "loss": 0.432, + "step": 8234 + }, + { + "epoch": 1.4, + "learning_rate": 1.1510578646354922e-05, + "loss": 0.4308, + "step": 8235 + }, + { + "epoch": 1.4, + "learning_rate": 1.1508758362486358e-05, + "loss": 0.4174, + "step": 8236 + }, + { + "epoch": 1.4, + "learning_rate": 1.150693802746016e-05, + "loss": 0.4126, + "step": 8237 + }, + { + "epoch": 1.4, + "learning_rate": 1.1505117641338051e-05, + "loss": 0.3827, + "step": 8238 + }, + { + "epoch": 1.41, + "learning_rate": 1.1503297204181746e-05, + "loss": 0.4688, + "step": 8239 + }, + { + "epoch": 1.41, + "learning_rate": 1.1501476716052982e-05, + "loss": 0.4515, + "step": 8240 + }, + { + "epoch": 1.41, + "learning_rate": 1.1499656177013481e-05, + "loss": 0.4147, + "step": 8241 + }, + { + "epoch": 1.41, + "learning_rate": 1.1497835587124971e-05, + "loss": 0.4068, + "step": 8242 + }, + { + "epoch": 1.41, + "learning_rate": 1.1496014946449185e-05, + "loss": 0.4365, + "step": 8243 + }, + { + "epoch": 1.41, + "learning_rate": 1.1494194255047854e-05, + "loss": 0.4109, + "step": 8244 + }, + { + "epoch": 1.41, + "learning_rate": 1.1492373512982717e-05, + "loss": 0.4521, + "step": 8245 + }, + { + "epoch": 1.41, + "learning_rate": 1.1490552720315504e-05, + "loss": 0.4364, + "step": 8246 + }, + { + "epoch": 1.41, + "learning_rate": 1.1488731877107957e-05, + "loss": 0.4488, + "step": 8247 + }, + { + "epoch": 1.41, + "learning_rate": 1.1486910983421812e-05, + "loss": 0.4235, + "step": 8248 + }, + { + "epoch": 1.41, + "learning_rate": 1.1485090039318816e-05, + "loss": 0.4381, + "step": 8249 + }, + { + "epoch": 1.41, + "learning_rate": 1.1483269044860706e-05, + "loss": 0.4374, + "step": 8250 + }, + { + "epoch": 1.41, + "learning_rate": 1.1481448000109233e-05, + "loss": 0.4458, + "step": 8251 + }, + { + "epoch": 1.41, + "learning_rate": 1.1479626905126133e-05, + "loss": 0.4379, + "step": 8252 + }, + { + "epoch": 1.41, + "learning_rate": 1.1477805759973164e-05, + "loss": 0.3979, + "step": 8253 + }, + { + "epoch": 1.41, + "learning_rate": 1.1475984564712071e-05, + "loss": 0.4166, + "step": 8254 + }, + { + "epoch": 1.41, + "learning_rate": 1.1474163319404608e-05, + "loss": 0.4093, + "step": 8255 + }, + { + "epoch": 1.41, + "learning_rate": 1.1472342024112526e-05, + "loss": 0.4342, + "step": 8256 + }, + { + "epoch": 1.41, + "learning_rate": 1.147052067889758e-05, + "loss": 0.4543, + "step": 8257 + }, + { + "epoch": 1.41, + "learning_rate": 1.146869928382153e-05, + "loss": 0.4279, + "step": 8258 + }, + { + "epoch": 1.41, + "learning_rate": 1.1466877838946126e-05, + "loss": 0.396, + "step": 8259 + }, + { + "epoch": 1.41, + "learning_rate": 1.1465056344333138e-05, + "loss": 0.4221, + "step": 8260 + }, + { + "epoch": 1.41, + "learning_rate": 1.1463234800044323e-05, + "loss": 0.4495, + "step": 8261 + }, + { + "epoch": 1.41, + "learning_rate": 1.146141320614144e-05, + "loss": 0.4331, + "step": 8262 + }, + { + "epoch": 1.41, + "learning_rate": 1.145959156268626e-05, + "loss": 0.4133, + "step": 8263 + }, + { + "epoch": 1.41, + "learning_rate": 1.1457769869740551e-05, + "loss": 0.4454, + "step": 8264 + }, + { + "epoch": 1.41, + "learning_rate": 1.1455948127366077e-05, + "loss": 0.4306, + "step": 8265 + }, + { + "epoch": 1.41, + "learning_rate": 1.1454126335624612e-05, + "loss": 0.3953, + "step": 8266 + }, + { + "epoch": 1.41, + "learning_rate": 1.145230449457792e-05, + "loss": 0.446, + "step": 8267 + }, + { + "epoch": 1.41, + "learning_rate": 1.1450482604287783e-05, + "loss": 0.4256, + "step": 8268 + }, + { + "epoch": 1.41, + "learning_rate": 1.1448660664815969e-05, + "loss": 0.4203, + "step": 8269 + }, + { + "epoch": 1.41, + "learning_rate": 1.1446838676224262e-05, + "loss": 0.4166, + "step": 8270 + }, + { + "epoch": 1.41, + "learning_rate": 1.1445016638574432e-05, + "loss": 0.4466, + "step": 8271 + }, + { + "epoch": 1.41, + "learning_rate": 1.1443194551928267e-05, + "loss": 0.4465, + "step": 8272 + }, + { + "epoch": 1.41, + "learning_rate": 1.1441372416347545e-05, + "loss": 0.4339, + "step": 8273 + }, + { + "epoch": 1.41, + "learning_rate": 1.143955023189405e-05, + "loss": 0.4265, + "step": 8274 + }, + { + "epoch": 1.41, + "learning_rate": 1.1437727998629566e-05, + "loss": 0.4296, + "step": 8275 + }, + { + "epoch": 1.41, + "learning_rate": 1.1435905716615878e-05, + "loss": 0.4137, + "step": 8276 + }, + { + "epoch": 1.41, + "learning_rate": 1.1434083385914778e-05, + "loss": 0.4328, + "step": 8277 + }, + { + "epoch": 1.41, + "learning_rate": 1.1432261006588055e-05, + "loss": 0.4375, + "step": 8278 + }, + { + "epoch": 1.41, + "learning_rate": 1.1430438578697501e-05, + "loss": 0.4005, + "step": 8279 + }, + { + "epoch": 1.41, + "learning_rate": 1.1428616102304905e-05, + "loss": 0.4327, + "step": 8280 + }, + { + "epoch": 1.41, + "learning_rate": 1.1426793577472069e-05, + "loss": 0.4134, + "step": 8281 + }, + { + "epoch": 1.41, + "learning_rate": 1.1424971004260785e-05, + "loss": 0.425, + "step": 8282 + }, + { + "epoch": 1.41, + "learning_rate": 1.1423148382732854e-05, + "loss": 0.3929, + "step": 8283 + }, + { + "epoch": 1.41, + "learning_rate": 1.1421325712950071e-05, + "loss": 0.4303, + "step": 8284 + }, + { + "epoch": 1.41, + "learning_rate": 1.1419502994974241e-05, + "loss": 0.4374, + "step": 8285 + }, + { + "epoch": 1.41, + "learning_rate": 1.1417680228867167e-05, + "loss": 0.3824, + "step": 8286 + }, + { + "epoch": 1.41, + "learning_rate": 1.1415857414690652e-05, + "loss": 0.3981, + "step": 8287 + }, + { + "epoch": 1.41, + "learning_rate": 1.1414034552506504e-05, + "loss": 0.4808, + "step": 8288 + }, + { + "epoch": 1.41, + "learning_rate": 1.141221164237653e-05, + "loss": 0.4273, + "step": 8289 + }, + { + "epoch": 1.41, + "learning_rate": 1.141038868436254e-05, + "loss": 0.4683, + "step": 8290 + }, + { + "epoch": 1.41, + "learning_rate": 1.1408565678526351e-05, + "loss": 0.4468, + "step": 8291 + }, + { + "epoch": 1.41, + "learning_rate": 1.1406742624929764e-05, + "loss": 0.4384, + "step": 8292 + }, + { + "epoch": 1.41, + "learning_rate": 1.1404919523634601e-05, + "loss": 0.4293, + "step": 8293 + }, + { + "epoch": 1.41, + "learning_rate": 1.1403096374702678e-05, + "loss": 0.403, + "step": 8294 + }, + { + "epoch": 1.41, + "learning_rate": 1.1401273178195811e-05, + "loss": 0.4273, + "step": 8295 + }, + { + "epoch": 1.41, + "learning_rate": 1.139944993417582e-05, + "loss": 0.446, + "step": 8296 + }, + { + "epoch": 1.42, + "learning_rate": 1.1397626642704523e-05, + "loss": 0.4386, + "step": 8297 + }, + { + "epoch": 1.42, + "learning_rate": 1.1395803303843749e-05, + "loss": 0.4382, + "step": 8298 + }, + { + "epoch": 1.42, + "learning_rate": 1.1393979917655318e-05, + "loss": 0.4414, + "step": 8299 + }, + { + "epoch": 1.42, + "learning_rate": 1.1392156484201055e-05, + "loss": 0.4545, + "step": 8300 + }, + { + "epoch": 1.42, + "learning_rate": 1.139033300354279e-05, + "loss": 0.4487, + "step": 8301 + }, + { + "epoch": 1.42, + "learning_rate": 1.138850947574235e-05, + "loss": 0.4075, + "step": 8302 + }, + { + "epoch": 1.42, + "learning_rate": 1.1386685900861565e-05, + "loss": 0.4403, + "step": 8303 + }, + { + "epoch": 1.42, + "learning_rate": 1.1384862278962269e-05, + "loss": 0.4314, + "step": 8304 + }, + { + "epoch": 1.42, + "learning_rate": 1.1383038610106297e-05, + "loss": 0.4423, + "step": 8305 + }, + { + "epoch": 1.42, + "learning_rate": 1.1381214894355479e-05, + "loss": 0.4391, + "step": 8306 + }, + { + "epoch": 1.42, + "learning_rate": 1.1379391131771657e-05, + "loss": 0.4536, + "step": 8307 + }, + { + "epoch": 1.42, + "learning_rate": 1.137756732241667e-05, + "loss": 0.4296, + "step": 8308 + }, + { + "epoch": 1.42, + "learning_rate": 1.1375743466352354e-05, + "loss": 0.4411, + "step": 8309 + }, + { + "epoch": 1.42, + "learning_rate": 1.1373919563640551e-05, + "loss": 0.4744, + "step": 8310 + }, + { + "epoch": 1.42, + "learning_rate": 1.1372095614343109e-05, + "loss": 0.422, + "step": 8311 + }, + { + "epoch": 1.42, + "learning_rate": 1.1370271618521867e-05, + "loss": 0.4138, + "step": 8312 + }, + { + "epoch": 1.42, + "learning_rate": 1.1368447576238675e-05, + "loss": 0.4312, + "step": 8313 + }, + { + "epoch": 1.42, + "learning_rate": 1.1366623487555382e-05, + "loss": 0.4364, + "step": 8314 + }, + { + "epoch": 1.42, + "learning_rate": 1.1364799352533833e-05, + "loss": 0.4034, + "step": 8315 + }, + { + "epoch": 1.42, + "learning_rate": 1.1362975171235884e-05, + "loss": 0.4039, + "step": 8316 + }, + { + "epoch": 1.42, + "learning_rate": 1.1361150943723385e-05, + "loss": 0.4527, + "step": 8317 + }, + { + "epoch": 1.42, + "learning_rate": 1.135932667005819e-05, + "loss": 0.3953, + "step": 8318 + }, + { + "epoch": 1.42, + "learning_rate": 1.1357502350302156e-05, + "loss": 0.3912, + "step": 8319 + }, + { + "epoch": 1.42, + "learning_rate": 1.1355677984517141e-05, + "loss": 0.425, + "step": 8320 + }, + { + "epoch": 1.42, + "learning_rate": 1.1353853572765002e-05, + "loss": 0.4299, + "step": 8321 + }, + { + "epoch": 1.42, + "learning_rate": 1.1352029115107603e-05, + "loss": 0.4181, + "step": 8322 + }, + { + "epoch": 1.42, + "learning_rate": 1.1350204611606802e-05, + "loss": 0.4332, + "step": 8323 + }, + { + "epoch": 1.42, + "learning_rate": 1.1348380062324464e-05, + "loss": 0.3934, + "step": 8324 + }, + { + "epoch": 1.42, + "learning_rate": 1.1346555467322456e-05, + "loss": 0.4221, + "step": 8325 + }, + { + "epoch": 1.42, + "learning_rate": 1.134473082666264e-05, + "loss": 0.4352, + "step": 8326 + }, + { + "epoch": 1.42, + "learning_rate": 1.1342906140406891e-05, + "loss": 0.4337, + "step": 8327 + }, + { + "epoch": 1.42, + "learning_rate": 1.1341081408617075e-05, + "loss": 0.4206, + "step": 8328 + }, + { + "epoch": 1.42, + "learning_rate": 1.1339256631355062e-05, + "loss": 0.4078, + "step": 8329 + }, + { + "epoch": 1.42, + "learning_rate": 1.133743180868273e-05, + "loss": 0.457, + "step": 8330 + }, + { + "epoch": 1.42, + "learning_rate": 1.1335606940661947e-05, + "loss": 0.431, + "step": 8331 + }, + { + "epoch": 1.42, + "learning_rate": 1.1333782027354596e-05, + "loss": 0.399, + "step": 8332 + }, + { + "epoch": 1.42, + "learning_rate": 1.1331957068822548e-05, + "loss": 0.3936, + "step": 8333 + }, + { + "epoch": 1.42, + "learning_rate": 1.1330132065127683e-05, + "loss": 0.4464, + "step": 8334 + }, + { + "epoch": 1.42, + "learning_rate": 1.1328307016331888e-05, + "loss": 0.4348, + "step": 8335 + }, + { + "epoch": 1.42, + "learning_rate": 1.1326481922497037e-05, + "loss": 0.4515, + "step": 8336 + }, + { + "epoch": 1.42, + "learning_rate": 1.1324656783685018e-05, + "loss": 0.415, + "step": 8337 + }, + { + "epoch": 1.42, + "learning_rate": 1.1322831599957715e-05, + "loss": 0.4119, + "step": 8338 + }, + { + "epoch": 1.42, + "learning_rate": 1.1321006371377019e-05, + "loss": 0.4334, + "step": 8339 + }, + { + "epoch": 1.42, + "learning_rate": 1.131918109800481e-05, + "loss": 0.4312, + "step": 8340 + }, + { + "epoch": 1.42, + "learning_rate": 1.1317355779902984e-05, + "loss": 0.4655, + "step": 8341 + }, + { + "epoch": 1.42, + "learning_rate": 1.1315530417133427e-05, + "loss": 0.4319, + "step": 8342 + }, + { + "epoch": 1.42, + "learning_rate": 1.131370500975804e-05, + "loss": 0.4444, + "step": 8343 + }, + { + "epoch": 1.42, + "learning_rate": 1.131187955783871e-05, + "loss": 0.4138, + "step": 8344 + }, + { + "epoch": 1.42, + "learning_rate": 1.1310054061437334e-05, + "loss": 0.4376, + "step": 8345 + }, + { + "epoch": 1.42, + "learning_rate": 1.1308228520615814e-05, + "loss": 0.4251, + "step": 8346 + }, + { + "epoch": 1.42, + "learning_rate": 1.1306402935436044e-05, + "loss": 0.4192, + "step": 8347 + }, + { + "epoch": 1.42, + "learning_rate": 1.1304577305959926e-05, + "loss": 0.4341, + "step": 8348 + }, + { + "epoch": 1.42, + "learning_rate": 1.1302751632249358e-05, + "loss": 0.4134, + "step": 8349 + }, + { + "epoch": 1.42, + "learning_rate": 1.130092591436625e-05, + "loss": 0.4272, + "step": 8350 + }, + { + "epoch": 1.42, + "learning_rate": 1.1299100152372503e-05, + "loss": 0.4366, + "step": 8351 + }, + { + "epoch": 1.42, + "learning_rate": 1.1297274346330022e-05, + "loss": 0.4236, + "step": 8352 + }, + { + "epoch": 1.42, + "learning_rate": 1.1295448496300719e-05, + "loss": 0.4188, + "step": 8353 + }, + { + "epoch": 1.42, + "learning_rate": 1.1293622602346501e-05, + "loss": 0.4364, + "step": 8354 + }, + { + "epoch": 1.42, + "learning_rate": 1.129179666452928e-05, + "loss": 0.4023, + "step": 8355 + }, + { + "epoch": 1.43, + "learning_rate": 1.1289970682910964e-05, + "loss": 0.4197, + "step": 8356 + }, + { + "epoch": 1.43, + "learning_rate": 1.1288144657553471e-05, + "loss": 0.3997, + "step": 8357 + }, + { + "epoch": 1.43, + "learning_rate": 1.1286318588518717e-05, + "loss": 0.4253, + "step": 8358 + }, + { + "epoch": 1.43, + "learning_rate": 1.1284492475868614e-05, + "loss": 0.3965, + "step": 8359 + }, + { + "epoch": 1.43, + "learning_rate": 1.1282666319665083e-05, + "loss": 0.4084, + "step": 8360 + }, + { + "epoch": 1.43, + "learning_rate": 1.1280840119970047e-05, + "loss": 0.4686, + "step": 8361 + }, + { + "epoch": 1.43, + "learning_rate": 1.1279013876845421e-05, + "loss": 0.4259, + "step": 8362 + }, + { + "epoch": 1.43, + "learning_rate": 1.1277187590353133e-05, + "loss": 0.409, + "step": 8363 + }, + { + "epoch": 1.43, + "learning_rate": 1.1275361260555102e-05, + "loss": 0.4319, + "step": 8364 + }, + { + "epoch": 1.43, + "learning_rate": 1.1273534887513258e-05, + "loss": 0.4212, + "step": 8365 + }, + { + "epoch": 1.43, + "learning_rate": 1.1271708471289524e-05, + "loss": 0.406, + "step": 8366 + }, + { + "epoch": 1.43, + "learning_rate": 1.1269882011945832e-05, + "loss": 0.4228, + "step": 8367 + }, + { + "epoch": 1.43, + "learning_rate": 1.126805550954411e-05, + "loss": 0.4315, + "step": 8368 + }, + { + "epoch": 1.43, + "learning_rate": 1.1266228964146291e-05, + "loss": 0.4345, + "step": 8369 + }, + { + "epoch": 1.43, + "learning_rate": 1.1264402375814308e-05, + "loss": 0.4082, + "step": 8370 + }, + { + "epoch": 1.43, + "learning_rate": 1.1262575744610092e-05, + "loss": 0.4593, + "step": 8371 + }, + { + "epoch": 1.43, + "learning_rate": 1.1260749070595583e-05, + "loss": 0.43, + "step": 8372 + }, + { + "epoch": 1.43, + "learning_rate": 1.1258922353832715e-05, + "loss": 0.4511, + "step": 8373 + }, + { + "epoch": 1.43, + "learning_rate": 1.1257095594383426e-05, + "loss": 0.4216, + "step": 8374 + }, + { + "epoch": 1.43, + "learning_rate": 1.1255268792309662e-05, + "loss": 0.4306, + "step": 8375 + }, + { + "epoch": 1.43, + "learning_rate": 1.1253441947673355e-05, + "loss": 0.4245, + "step": 8376 + }, + { + "epoch": 1.43, + "learning_rate": 1.125161506053646e-05, + "loss": 0.4236, + "step": 8377 + }, + { + "epoch": 1.43, + "learning_rate": 1.1249788130960915e-05, + "loss": 0.4118, + "step": 8378 + }, + { + "epoch": 1.43, + "learning_rate": 1.1247961159008662e-05, + "loss": 0.4417, + "step": 8379 + }, + { + "epoch": 1.43, + "learning_rate": 1.1246134144741656e-05, + "loss": 0.4192, + "step": 8380 + }, + { + "epoch": 1.43, + "learning_rate": 1.124430708822184e-05, + "loss": 0.4431, + "step": 8381 + }, + { + "epoch": 1.43, + "learning_rate": 1.1242479989511165e-05, + "loss": 0.3828, + "step": 8382 + }, + { + "epoch": 1.43, + "learning_rate": 1.1240652848671584e-05, + "loss": 0.4607, + "step": 8383 + }, + { + "epoch": 1.43, + "learning_rate": 1.1238825665765054e-05, + "loss": 0.4601, + "step": 8384 + }, + { + "epoch": 1.43, + "learning_rate": 1.1236998440853524e-05, + "loss": 0.4181, + "step": 8385 + }, + { + "epoch": 1.43, + "learning_rate": 1.1235171173998952e-05, + "loss": 0.3889, + "step": 8386 + }, + { + "epoch": 1.43, + "learning_rate": 1.1233343865263292e-05, + "loss": 0.438, + "step": 8387 + }, + { + "epoch": 1.43, + "learning_rate": 1.123151651470851e-05, + "loss": 0.4095, + "step": 8388 + }, + { + "epoch": 1.43, + "learning_rate": 1.122968912239656e-05, + "loss": 0.4233, + "step": 8389 + }, + { + "epoch": 1.43, + "learning_rate": 1.1227861688389404e-05, + "loss": 0.4173, + "step": 8390 + }, + { + "epoch": 1.43, + "learning_rate": 1.1226034212749005e-05, + "loss": 0.4211, + "step": 8391 + }, + { + "epoch": 1.43, + "learning_rate": 1.1224206695537332e-05, + "loss": 0.3963, + "step": 8392 + }, + { + "epoch": 1.43, + "learning_rate": 1.1222379136816347e-05, + "loss": 0.4374, + "step": 8393 + }, + { + "epoch": 1.43, + "learning_rate": 1.1220551536648017e-05, + "loss": 0.4651, + "step": 8394 + }, + { + "epoch": 1.43, + "learning_rate": 1.1218723895094313e-05, + "loss": 0.3996, + "step": 8395 + }, + { + "epoch": 1.43, + "learning_rate": 1.1216896212217202e-05, + "loss": 0.4248, + "step": 8396 + }, + { + "epoch": 1.43, + "learning_rate": 1.1215068488078656e-05, + "loss": 0.4649, + "step": 8397 + }, + { + "epoch": 1.43, + "learning_rate": 1.121324072274065e-05, + "loss": 0.4011, + "step": 8398 + }, + { + "epoch": 1.43, + "learning_rate": 1.1211412916265158e-05, + "loss": 0.4076, + "step": 8399 + }, + { + "epoch": 1.43, + "learning_rate": 1.1209585068714153e-05, + "loss": 0.4432, + "step": 8400 + }, + { + "epoch": 1.43, + "learning_rate": 1.1207757180149614e-05, + "loss": 0.4483, + "step": 8401 + }, + { + "epoch": 1.43, + "learning_rate": 1.120592925063352e-05, + "loss": 0.4537, + "step": 8402 + }, + { + "epoch": 1.43, + "learning_rate": 1.1204101280227853e-05, + "loss": 0.4193, + "step": 8403 + }, + { + "epoch": 1.43, + "learning_rate": 1.1202273268994586e-05, + "loss": 0.4538, + "step": 8404 + }, + { + "epoch": 1.43, + "learning_rate": 1.1200445216995708e-05, + "loss": 0.4614, + "step": 8405 + }, + { + "epoch": 1.43, + "learning_rate": 1.1198617124293201e-05, + "loss": 0.4085, + "step": 8406 + }, + { + "epoch": 1.43, + "learning_rate": 1.1196788990949054e-05, + "loss": 0.402, + "step": 8407 + }, + { + "epoch": 1.43, + "learning_rate": 1.119496081702525e-05, + "loss": 0.4232, + "step": 8408 + }, + { + "epoch": 1.43, + "learning_rate": 1.1193132602583776e-05, + "loss": 0.4327, + "step": 8409 + }, + { + "epoch": 1.43, + "learning_rate": 1.1191304347686625e-05, + "loss": 0.4379, + "step": 8410 + }, + { + "epoch": 1.43, + "learning_rate": 1.1189476052395787e-05, + "loss": 0.4739, + "step": 8411 + }, + { + "epoch": 1.43, + "learning_rate": 1.1187647716773249e-05, + "loss": 0.3899, + "step": 8412 + }, + { + "epoch": 1.43, + "learning_rate": 1.1185819340881012e-05, + "loss": 0.4431, + "step": 8413 + }, + { + "epoch": 1.43, + "learning_rate": 1.1183990924781069e-05, + "loss": 0.4099, + "step": 8414 + }, + { + "epoch": 1.44, + "learning_rate": 1.1182162468535413e-05, + "loss": 0.4265, + "step": 8415 + }, + { + "epoch": 1.44, + "learning_rate": 1.1180333972206044e-05, + "loss": 0.4128, + "step": 8416 + }, + { + "epoch": 1.44, + "learning_rate": 1.1178505435854962e-05, + "loss": 0.4545, + "step": 8417 + }, + { + "epoch": 1.44, + "learning_rate": 1.117667685954417e-05, + "loss": 0.4438, + "step": 8418 + }, + { + "epoch": 1.44, + "learning_rate": 1.1174848243335659e-05, + "loss": 0.4359, + "step": 8419 + }, + { + "epoch": 1.44, + "learning_rate": 1.117301958729144e-05, + "loss": 0.4314, + "step": 8420 + }, + { + "epoch": 1.44, + "learning_rate": 1.117119089147352e-05, + "loss": 0.411, + "step": 8421 + }, + { + "epoch": 1.44, + "learning_rate": 1.1169362155943902e-05, + "loss": 0.4176, + "step": 8422 + }, + { + "epoch": 1.44, + "learning_rate": 1.1167533380764588e-05, + "loss": 0.4256, + "step": 8423 + }, + { + "epoch": 1.44, + "learning_rate": 1.1165704565997593e-05, + "loss": 0.4193, + "step": 8424 + }, + { + "epoch": 1.44, + "learning_rate": 1.1163875711704926e-05, + "loss": 0.4317, + "step": 8425 + }, + { + "epoch": 1.44, + "learning_rate": 1.1162046817948597e-05, + "loss": 0.4606, + "step": 8426 + }, + { + "epoch": 1.44, + "learning_rate": 1.1160217884790617e-05, + "loss": 0.3992, + "step": 8427 + }, + { + "epoch": 1.44, + "learning_rate": 1.1158388912293e-05, + "loss": 0.4154, + "step": 8428 + }, + { + "epoch": 1.44, + "learning_rate": 1.1156559900517767e-05, + "loss": 0.4665, + "step": 8429 + }, + { + "epoch": 1.44, + "learning_rate": 1.1154730849526926e-05, + "loss": 0.417, + "step": 8430 + }, + { + "epoch": 1.44, + "learning_rate": 1.11529017593825e-05, + "loss": 0.4098, + "step": 8431 + }, + { + "epoch": 1.44, + "learning_rate": 1.1151072630146504e-05, + "loss": 0.4098, + "step": 8432 + }, + { + "epoch": 1.44, + "learning_rate": 1.1149243461880967e-05, + "loss": 0.4247, + "step": 8433 + }, + { + "epoch": 1.44, + "learning_rate": 1.1147414254647904e-05, + "loss": 0.4329, + "step": 8434 + }, + { + "epoch": 1.44, + "learning_rate": 1.1145585008509336e-05, + "loss": 0.4488, + "step": 8435 + }, + { + "epoch": 1.44, + "learning_rate": 1.1143755723527292e-05, + "loss": 0.4525, + "step": 8436 + }, + { + "epoch": 1.44, + "learning_rate": 1.11419263997638e-05, + "loss": 0.4189, + "step": 8437 + }, + { + "epoch": 1.44, + "learning_rate": 1.114009703728088e-05, + "loss": 0.4395, + "step": 8438 + }, + { + "epoch": 1.44, + "learning_rate": 1.1138267636140566e-05, + "loss": 0.3991, + "step": 8439 + }, + { + "epoch": 1.44, + "learning_rate": 1.1136438196404882e-05, + "loss": 0.4022, + "step": 8440 + }, + { + "epoch": 1.44, + "learning_rate": 1.1134608718135866e-05, + "loss": 0.4213, + "step": 8441 + }, + { + "epoch": 1.44, + "learning_rate": 1.113277920139555e-05, + "loss": 0.4115, + "step": 8442 + }, + { + "epoch": 1.44, + "learning_rate": 1.1130949646245962e-05, + "loss": 0.4463, + "step": 8443 + }, + { + "epoch": 1.44, + "learning_rate": 1.1129120052749139e-05, + "loss": 0.4612, + "step": 8444 + }, + { + "epoch": 1.44, + "learning_rate": 1.1127290420967118e-05, + "loss": 0.4054, + "step": 8445 + }, + { + "epoch": 1.44, + "learning_rate": 1.1125460750961936e-05, + "loss": 0.4362, + "step": 8446 + }, + { + "epoch": 1.44, + "learning_rate": 1.1123631042795631e-05, + "loss": 0.4769, + "step": 8447 + }, + { + "epoch": 1.44, + "learning_rate": 1.1121801296530248e-05, + "loss": 0.4488, + "step": 8448 + }, + { + "epoch": 1.44, + "learning_rate": 1.1119971512227822e-05, + "loss": 0.4296, + "step": 8449 + }, + { + "epoch": 1.44, + "learning_rate": 1.1118141689950401e-05, + "loss": 0.4089, + "step": 8450 + }, + { + "epoch": 1.44, + "learning_rate": 1.1116311829760025e-05, + "loss": 0.4397, + "step": 8451 + }, + { + "epoch": 1.44, + "learning_rate": 1.1114481931718742e-05, + "loss": 0.434, + "step": 8452 + }, + { + "epoch": 1.44, + "learning_rate": 1.1112651995888595e-05, + "loss": 0.4445, + "step": 8453 + }, + { + "epoch": 1.44, + "learning_rate": 1.1110822022331633e-05, + "loss": 0.4107, + "step": 8454 + }, + { + "epoch": 1.44, + "learning_rate": 1.1108992011109907e-05, + "loss": 0.3979, + "step": 8455 + }, + { + "epoch": 1.44, + "learning_rate": 1.110716196228547e-05, + "loss": 0.4307, + "step": 8456 + }, + { + "epoch": 1.44, + "learning_rate": 1.1105331875920369e-05, + "loss": 0.449, + "step": 8457 + }, + { + "epoch": 1.44, + "learning_rate": 1.1103501752076658e-05, + "loss": 0.4733, + "step": 8458 + }, + { + "epoch": 1.44, + "learning_rate": 1.1101671590816392e-05, + "loss": 0.4198, + "step": 8459 + }, + { + "epoch": 1.44, + "learning_rate": 1.1099841392201625e-05, + "loss": 0.4234, + "step": 8460 + }, + { + "epoch": 1.44, + "learning_rate": 1.1098011156294412e-05, + "loss": 0.3791, + "step": 8461 + }, + { + "epoch": 1.44, + "learning_rate": 1.1096180883156815e-05, + "loss": 0.423, + "step": 8462 + }, + { + "epoch": 1.44, + "learning_rate": 1.1094350572850896e-05, + "loss": 0.4249, + "step": 8463 + }, + { + "epoch": 1.44, + "learning_rate": 1.1092520225438707e-05, + "loss": 0.4147, + "step": 8464 + }, + { + "epoch": 1.44, + "learning_rate": 1.1090689840982318e-05, + "loss": 0.4212, + "step": 8465 + }, + { + "epoch": 1.44, + "learning_rate": 1.1088859419543785e-05, + "loss": 0.412, + "step": 8466 + }, + { + "epoch": 1.44, + "learning_rate": 1.1087028961185178e-05, + "loss": 0.4049, + "step": 8467 + }, + { + "epoch": 1.44, + "learning_rate": 1.1085198465968558e-05, + "loss": 0.4368, + "step": 8468 + }, + { + "epoch": 1.44, + "learning_rate": 1.1083367933955995e-05, + "loss": 0.4511, + "step": 8469 + }, + { + "epoch": 1.44, + "learning_rate": 1.1081537365209556e-05, + "loss": 0.4572, + "step": 8470 + }, + { + "epoch": 1.44, + "learning_rate": 1.1079706759791311e-05, + "loss": 0.4113, + "step": 8471 + }, + { + "epoch": 1.44, + "learning_rate": 1.1077876117763328e-05, + "loss": 0.4504, + "step": 8472 + }, + { + "epoch": 1.45, + "learning_rate": 1.1076045439187684e-05, + "loss": 0.4381, + "step": 8473 + }, + { + "epoch": 1.45, + "learning_rate": 1.1074214724126449e-05, + "loss": 0.4093, + "step": 8474 + }, + { + "epoch": 1.45, + "learning_rate": 1.1072383972641692e-05, + "loss": 0.4496, + "step": 8475 + }, + { + "epoch": 1.45, + "learning_rate": 1.1070553184795496e-05, + "loss": 0.4405, + "step": 8476 + }, + { + "epoch": 1.45, + "learning_rate": 1.1068722360649932e-05, + "loss": 0.4066, + "step": 8477 + }, + { + "epoch": 1.45, + "learning_rate": 1.1066891500267087e-05, + "loss": 0.3915, + "step": 8478 + }, + { + "epoch": 1.45, + "learning_rate": 1.106506060370903e-05, + "loss": 0.4235, + "step": 8479 + }, + { + "epoch": 1.45, + "learning_rate": 1.1063229671037848e-05, + "loss": 0.3889, + "step": 8480 + }, + { + "epoch": 1.45, + "learning_rate": 1.106139870231562e-05, + "loss": 0.3884, + "step": 8481 + }, + { + "epoch": 1.45, + "learning_rate": 1.1059567697604429e-05, + "loss": 0.4275, + "step": 8482 + }, + { + "epoch": 1.45, + "learning_rate": 1.105773665696636e-05, + "loss": 0.4142, + "step": 8483 + }, + { + "epoch": 1.45, + "learning_rate": 1.1055905580463495e-05, + "loss": 0.4405, + "step": 8484 + }, + { + "epoch": 1.45, + "learning_rate": 1.1054074468157924e-05, + "loss": 0.4009, + "step": 8485 + }, + { + "epoch": 1.45, + "learning_rate": 1.1052243320111737e-05, + "loss": 0.4192, + "step": 8486 + }, + { + "epoch": 1.45, + "learning_rate": 1.1050412136387017e-05, + "loss": 0.4344, + "step": 8487 + }, + { + "epoch": 1.45, + "learning_rate": 1.1048580917045855e-05, + "loss": 0.4335, + "step": 8488 + }, + { + "epoch": 1.45, + "learning_rate": 1.1046749662150349e-05, + "loss": 0.3996, + "step": 8489 + }, + { + "epoch": 1.45, + "learning_rate": 1.1044918371762585e-05, + "loss": 0.4222, + "step": 8490 + }, + { + "epoch": 1.45, + "learning_rate": 1.1043087045944657e-05, + "loss": 0.4234, + "step": 8491 + }, + { + "epoch": 1.45, + "learning_rate": 1.104125568475866e-05, + "loss": 0.3775, + "step": 8492 + }, + { + "epoch": 1.45, + "learning_rate": 1.1039424288266697e-05, + "loss": 0.4076, + "step": 8493 + }, + { + "epoch": 1.45, + "learning_rate": 1.1037592856530857e-05, + "loss": 0.4577, + "step": 8494 + }, + { + "epoch": 1.45, + "learning_rate": 1.103576138961324e-05, + "loss": 0.4114, + "step": 8495 + }, + { + "epoch": 1.45, + "learning_rate": 1.1033929887575948e-05, + "loss": 0.43, + "step": 8496 + }, + { + "epoch": 1.45, + "learning_rate": 1.1032098350481083e-05, + "loss": 0.4391, + "step": 8497 + }, + { + "epoch": 1.45, + "learning_rate": 1.1030266778390748e-05, + "loss": 0.4036, + "step": 8498 + }, + { + "epoch": 1.45, + "learning_rate": 1.102843517136704e-05, + "loss": 0.4284, + "step": 8499 + }, + { + "epoch": 1.45, + "learning_rate": 1.1026603529472066e-05, + "loss": 0.4087, + "step": 8500 + }, + { + "epoch": 1.45, + "learning_rate": 1.1024771852767937e-05, + "loss": 0.4481, + "step": 8501 + }, + { + "epoch": 1.45, + "learning_rate": 1.1022940141316753e-05, + "loss": 0.4097, + "step": 8502 + }, + { + "epoch": 1.45, + "learning_rate": 1.1021108395180623e-05, + "loss": 0.3914, + "step": 8503 + }, + { + "epoch": 1.45, + "learning_rate": 1.1019276614421663e-05, + "loss": 0.4303, + "step": 8504 + }, + { + "epoch": 1.45, + "learning_rate": 1.1017444799101974e-05, + "loss": 0.3986, + "step": 8505 + }, + { + "epoch": 1.45, + "learning_rate": 1.1015612949283678e-05, + "loss": 0.4273, + "step": 8506 + }, + { + "epoch": 1.45, + "learning_rate": 1.1013781065028877e-05, + "loss": 0.4453, + "step": 8507 + }, + { + "epoch": 1.45, + "learning_rate": 1.1011949146399692e-05, + "loss": 0.4213, + "step": 8508 + }, + { + "epoch": 1.45, + "learning_rate": 1.1010117193458234e-05, + "loss": 0.4047, + "step": 8509 + }, + { + "epoch": 1.45, + "learning_rate": 1.100828520626662e-05, + "loss": 0.4268, + "step": 8510 + }, + { + "epoch": 1.45, + "learning_rate": 1.1006453184886969e-05, + "loss": 0.4208, + "step": 8511 + }, + { + "epoch": 1.45, + "learning_rate": 1.1004621129381402e-05, + "loss": 0.4104, + "step": 8512 + }, + { + "epoch": 1.45, + "learning_rate": 1.1002789039812032e-05, + "loss": 0.4033, + "step": 8513 + }, + { + "epoch": 1.45, + "learning_rate": 1.1000956916240985e-05, + "loss": 0.4003, + "step": 8514 + }, + { + "epoch": 1.45, + "learning_rate": 1.0999124758730381e-05, + "loss": 0.3995, + "step": 8515 + }, + { + "epoch": 1.45, + "learning_rate": 1.0997292567342347e-05, + "loss": 0.4239, + "step": 8516 + }, + { + "epoch": 1.45, + "learning_rate": 1.0995460342138997e-05, + "loss": 0.4555, + "step": 8517 + }, + { + "epoch": 1.45, + "learning_rate": 1.0993628083182468e-05, + "loss": 0.4078, + "step": 8518 + }, + { + "epoch": 1.45, + "learning_rate": 1.0991795790534882e-05, + "loss": 0.3824, + "step": 8519 + }, + { + "epoch": 1.45, + "learning_rate": 1.0989963464258366e-05, + "loss": 0.3975, + "step": 8520 + }, + { + "epoch": 1.45, + "learning_rate": 1.0988131104415051e-05, + "loss": 0.4246, + "step": 8521 + }, + { + "epoch": 1.45, + "learning_rate": 1.0986298711067062e-05, + "loss": 0.4142, + "step": 8522 + }, + { + "epoch": 1.45, + "learning_rate": 1.098446628427654e-05, + "loss": 0.42, + "step": 8523 + }, + { + "epoch": 1.45, + "learning_rate": 1.0982633824105606e-05, + "loss": 0.4626, + "step": 8524 + }, + { + "epoch": 1.45, + "learning_rate": 1.09808013306164e-05, + "loss": 0.4047, + "step": 8525 + }, + { + "epoch": 1.45, + "learning_rate": 1.0978968803871055e-05, + "loss": 0.4736, + "step": 8526 + }, + { + "epoch": 1.45, + "learning_rate": 1.0977136243931708e-05, + "loss": 0.4311, + "step": 8527 + }, + { + "epoch": 1.45, + "learning_rate": 1.0975303650860494e-05, + "loss": 0.4324, + "step": 8528 + }, + { + "epoch": 1.45, + "learning_rate": 1.0973471024719554e-05, + "loss": 0.4054, + "step": 8529 + }, + { + "epoch": 1.45, + "learning_rate": 1.097163836557102e-05, + "loss": 0.4513, + "step": 8530 + }, + { + "epoch": 1.45, + "learning_rate": 1.0969805673477041e-05, + "loss": 0.3862, + "step": 8531 + }, + { + "epoch": 1.46, + "learning_rate": 1.096797294849975e-05, + "loss": 0.4052, + "step": 8532 + }, + { + "epoch": 1.46, + "learning_rate": 1.0966140190701295e-05, + "loss": 0.4026, + "step": 8533 + }, + { + "epoch": 1.46, + "learning_rate": 1.096430740014382e-05, + "loss": 0.4028, + "step": 8534 + }, + { + "epoch": 1.46, + "learning_rate": 1.0962474576889466e-05, + "loss": 0.3915, + "step": 8535 + }, + { + "epoch": 1.46, + "learning_rate": 1.0960641721000381e-05, + "loss": 0.4035, + "step": 8536 + }, + { + "epoch": 1.46, + "learning_rate": 1.0958808832538712e-05, + "loss": 0.4163, + "step": 8537 + }, + { + "epoch": 1.46, + "learning_rate": 1.0956975911566606e-05, + "loss": 0.4627, + "step": 8538 + }, + { + "epoch": 1.46, + "learning_rate": 1.095514295814621e-05, + "loss": 0.4304, + "step": 8539 + }, + { + "epoch": 1.46, + "learning_rate": 1.0953309972339678e-05, + "loss": 0.4187, + "step": 8540 + }, + { + "epoch": 1.46, + "learning_rate": 1.0951476954209159e-05, + "loss": 0.4155, + "step": 8541 + }, + { + "epoch": 1.46, + "learning_rate": 1.0949643903816808e-05, + "loss": 0.4307, + "step": 8542 + }, + { + "epoch": 1.46, + "learning_rate": 1.0947810821224774e-05, + "loss": 0.4389, + "step": 8543 + }, + { + "epoch": 1.46, + "learning_rate": 1.0945977706495215e-05, + "loss": 0.4321, + "step": 8544 + }, + { + "epoch": 1.46, + "learning_rate": 1.0944144559690286e-05, + "loss": 0.4301, + "step": 8545 + }, + { + "epoch": 1.46, + "learning_rate": 1.0942311380872146e-05, + "loss": 0.4481, + "step": 8546 + }, + { + "epoch": 1.46, + "learning_rate": 1.0940478170102947e-05, + "loss": 0.4443, + "step": 8547 + }, + { + "epoch": 1.46, + "learning_rate": 1.0938644927444849e-05, + "loss": 0.404, + "step": 8548 + }, + { + "epoch": 1.46, + "learning_rate": 1.0936811652960015e-05, + "loss": 0.4452, + "step": 8549 + }, + { + "epoch": 1.46, + "learning_rate": 1.0934978346710607e-05, + "loss": 0.4397, + "step": 8550 + }, + { + "epoch": 1.46, + "learning_rate": 1.0933145008758785e-05, + "loss": 0.4395, + "step": 8551 + }, + { + "epoch": 1.46, + "learning_rate": 1.093131163916671e-05, + "loss": 0.4106, + "step": 8552 + }, + { + "epoch": 1.46, + "learning_rate": 1.0929478237996552e-05, + "loss": 0.4279, + "step": 8553 + }, + { + "epoch": 1.46, + "learning_rate": 1.0927644805310473e-05, + "loss": 0.4118, + "step": 8554 + }, + { + "epoch": 1.46, + "learning_rate": 1.0925811341170636e-05, + "loss": 0.3986, + "step": 8555 + }, + { + "epoch": 1.46, + "learning_rate": 1.0923977845639213e-05, + "loss": 0.4041, + "step": 8556 + }, + { + "epoch": 1.46, + "learning_rate": 1.0922144318778373e-05, + "loss": 0.4166, + "step": 8557 + }, + { + "epoch": 1.46, + "learning_rate": 1.0920310760650281e-05, + "loss": 0.4081, + "step": 8558 + }, + { + "epoch": 1.46, + "learning_rate": 1.0918477171317112e-05, + "loss": 0.4071, + "step": 8559 + }, + { + "epoch": 1.46, + "learning_rate": 1.0916643550841037e-05, + "loss": 0.4303, + "step": 8560 + }, + { + "epoch": 1.46, + "learning_rate": 1.0914809899284228e-05, + "loss": 0.4418, + "step": 8561 + }, + { + "epoch": 1.46, + "learning_rate": 1.091297621670886e-05, + "loss": 0.4197, + "step": 8562 + }, + { + "epoch": 1.46, + "learning_rate": 1.0911142503177103e-05, + "loss": 0.415, + "step": 8563 + }, + { + "epoch": 1.46, + "learning_rate": 1.0909308758751141e-05, + "loss": 0.4067, + "step": 8564 + }, + { + "epoch": 1.46, + "learning_rate": 1.0907474983493144e-05, + "loss": 0.4151, + "step": 8565 + }, + { + "epoch": 1.46, + "learning_rate": 1.0905641177465293e-05, + "loss": 0.4433, + "step": 8566 + }, + { + "epoch": 1.46, + "learning_rate": 1.0903807340729767e-05, + "loss": 0.4254, + "step": 8567 + }, + { + "epoch": 1.46, + "learning_rate": 1.0901973473348746e-05, + "loss": 0.417, + "step": 8568 + }, + { + "epoch": 1.46, + "learning_rate": 1.0900139575384413e-05, + "loss": 0.4429, + "step": 8569 + }, + { + "epoch": 1.46, + "learning_rate": 1.0898305646898946e-05, + "loss": 0.4439, + "step": 8570 + }, + { + "epoch": 1.46, + "learning_rate": 1.0896471687954529e-05, + "loss": 0.4214, + "step": 8571 + }, + { + "epoch": 1.46, + "learning_rate": 1.0894637698613351e-05, + "loss": 0.4439, + "step": 8572 + }, + { + "epoch": 1.46, + "learning_rate": 1.0892803678937592e-05, + "loss": 0.4196, + "step": 8573 + }, + { + "epoch": 1.46, + "learning_rate": 1.0890969628989438e-05, + "loss": 0.3894, + "step": 8574 + }, + { + "epoch": 1.46, + "learning_rate": 1.088913554883108e-05, + "loss": 0.4277, + "step": 8575 + }, + { + "epoch": 1.46, + "learning_rate": 1.088730143852471e-05, + "loss": 0.4462, + "step": 8576 + }, + { + "epoch": 1.46, + "learning_rate": 1.0885467298132508e-05, + "loss": 0.4521, + "step": 8577 + }, + { + "epoch": 1.46, + "learning_rate": 1.0883633127716669e-05, + "loss": 0.4419, + "step": 8578 + }, + { + "epoch": 1.46, + "learning_rate": 1.0881798927339381e-05, + "loss": 0.4185, + "step": 8579 + }, + { + "epoch": 1.46, + "learning_rate": 1.0879964697062846e-05, + "loss": 0.4321, + "step": 8580 + }, + { + "epoch": 1.46, + "learning_rate": 1.0878130436949246e-05, + "loss": 0.4242, + "step": 8581 + }, + { + "epoch": 1.46, + "learning_rate": 1.0876296147060781e-05, + "loss": 0.4459, + "step": 8582 + }, + { + "epoch": 1.46, + "learning_rate": 1.0874461827459648e-05, + "loss": 0.4014, + "step": 8583 + }, + { + "epoch": 1.46, + "learning_rate": 1.087262747820804e-05, + "loss": 0.4496, + "step": 8584 + }, + { + "epoch": 1.46, + "learning_rate": 1.0870793099368158e-05, + "loss": 0.4269, + "step": 8585 + }, + { + "epoch": 1.46, + "learning_rate": 1.0868958691002196e-05, + "loss": 0.4577, + "step": 8586 + }, + { + "epoch": 1.46, + "learning_rate": 1.0867124253172358e-05, + "loss": 0.4734, + "step": 8587 + }, + { + "epoch": 1.46, + "learning_rate": 1.086528978594084e-05, + "loss": 0.4116, + "step": 8588 + }, + { + "epoch": 1.46, + "learning_rate": 1.0863455289369847e-05, + "loss": 0.4324, + "step": 8589 + }, + { + "epoch": 1.46, + "learning_rate": 1.0861620763521578e-05, + "loss": 0.4281, + "step": 8590 + }, + { + "epoch": 1.47, + "learning_rate": 1.0859786208458243e-05, + "loss": 0.4009, + "step": 8591 + }, + { + "epoch": 1.47, + "learning_rate": 1.0857951624242038e-05, + "loss": 0.4072, + "step": 8592 + }, + { + "epoch": 1.47, + "learning_rate": 1.0856117010935176e-05, + "loss": 0.4272, + "step": 8593 + }, + { + "epoch": 1.47, + "learning_rate": 1.085428236859986e-05, + "loss": 0.4369, + "step": 8594 + }, + { + "epoch": 1.47, + "learning_rate": 1.0852447697298296e-05, + "loss": 0.3947, + "step": 8595 + }, + { + "epoch": 1.47, + "learning_rate": 1.0850612997092696e-05, + "loss": 0.424, + "step": 8596 + }, + { + "epoch": 1.47, + "learning_rate": 1.0848778268045263e-05, + "loss": 0.4378, + "step": 8597 + }, + { + "epoch": 1.47, + "learning_rate": 1.0846943510218218e-05, + "loss": 0.4159, + "step": 8598 + }, + { + "epoch": 1.47, + "learning_rate": 1.0845108723673761e-05, + "loss": 0.4449, + "step": 8599 + }, + { + "epoch": 1.47, + "learning_rate": 1.084327390847411e-05, + "loss": 0.4031, + "step": 8600 + }, + { + "epoch": 1.47, + "learning_rate": 1.084143906468148e-05, + "loss": 0.4211, + "step": 8601 + }, + { + "epoch": 1.47, + "learning_rate": 1.0839604192358082e-05, + "loss": 0.4356, + "step": 8602 + }, + { + "epoch": 1.47, + "learning_rate": 1.0837769291566132e-05, + "loss": 0.4227, + "step": 8603 + }, + { + "epoch": 1.47, + "learning_rate": 1.0835934362367844e-05, + "loss": 0.4007, + "step": 8604 + }, + { + "epoch": 1.47, + "learning_rate": 1.083409940482544e-05, + "loss": 0.4117, + "step": 8605 + }, + { + "epoch": 1.47, + "learning_rate": 1.0832264419001135e-05, + "loss": 0.4162, + "step": 8606 + }, + { + "epoch": 1.47, + "learning_rate": 1.0830429404957148e-05, + "loss": 0.4586, + "step": 8607 + }, + { + "epoch": 1.47, + "learning_rate": 1.0828594362755697e-05, + "loss": 0.4345, + "step": 8608 + }, + { + "epoch": 1.47, + "learning_rate": 1.0826759292459012e-05, + "loss": 0.4353, + "step": 8609 + }, + { + "epoch": 1.47, + "learning_rate": 1.0824924194129307e-05, + "loss": 0.4278, + "step": 8610 + }, + { + "epoch": 1.47, + "learning_rate": 1.0823089067828802e-05, + "loss": 0.3992, + "step": 8611 + }, + { + "epoch": 1.47, + "learning_rate": 1.0821253913619727e-05, + "loss": 0.4577, + "step": 8612 + }, + { + "epoch": 1.47, + "learning_rate": 1.0819418731564309e-05, + "loss": 0.4131, + "step": 8613 + }, + { + "epoch": 1.47, + "learning_rate": 1.0817583521724763e-05, + "loss": 0.4227, + "step": 8614 + }, + { + "epoch": 1.47, + "learning_rate": 1.0815748284163328e-05, + "loss": 0.468, + "step": 8615 + }, + { + "epoch": 1.47, + "learning_rate": 1.0813913018942224e-05, + "loss": 0.4157, + "step": 8616 + }, + { + "epoch": 1.47, + "learning_rate": 1.0812077726123681e-05, + "loss": 0.4465, + "step": 8617 + }, + { + "epoch": 1.47, + "learning_rate": 1.081024240576993e-05, + "loss": 0.438, + "step": 8618 + }, + { + "epoch": 1.47, + "learning_rate": 1.08084070579432e-05, + "loss": 0.447, + "step": 8619 + }, + { + "epoch": 1.47, + "learning_rate": 1.0806571682705723e-05, + "loss": 0.4309, + "step": 8620 + }, + { + "epoch": 1.47, + "learning_rate": 1.0804736280119732e-05, + "loss": 0.4154, + "step": 8621 + }, + { + "epoch": 1.47, + "learning_rate": 1.0802900850247457e-05, + "loss": 0.443, + "step": 8622 + }, + { + "epoch": 1.47, + "learning_rate": 1.0801065393151133e-05, + "loss": 0.4388, + "step": 8623 + }, + { + "epoch": 1.47, + "learning_rate": 1.0799229908893001e-05, + "loss": 0.3966, + "step": 8624 + }, + { + "epoch": 1.47, + "learning_rate": 1.0797394397535293e-05, + "loss": 0.4221, + "step": 8625 + }, + { + "epoch": 1.47, + "learning_rate": 1.079555885914024e-05, + "loss": 0.4089, + "step": 8626 + }, + { + "epoch": 1.47, + "learning_rate": 1.0793723293770087e-05, + "loss": 0.4212, + "step": 8627 + }, + { + "epoch": 1.47, + "learning_rate": 1.079188770148707e-05, + "loss": 0.4546, + "step": 8628 + }, + { + "epoch": 1.47, + "learning_rate": 1.0790052082353433e-05, + "loss": 0.3907, + "step": 8629 + }, + { + "epoch": 1.47, + "learning_rate": 1.0788216436431409e-05, + "loss": 0.422, + "step": 8630 + }, + { + "epoch": 1.47, + "learning_rate": 1.0786380763783243e-05, + "loss": 0.4114, + "step": 8631 + }, + { + "epoch": 1.47, + "learning_rate": 1.0784545064471184e-05, + "loss": 0.3986, + "step": 8632 + }, + { + "epoch": 1.47, + "learning_rate": 1.0782709338557464e-05, + "loss": 0.4384, + "step": 8633 + }, + { + "epoch": 1.47, + "learning_rate": 1.0780873586104333e-05, + "loss": 0.3859, + "step": 8634 + }, + { + "epoch": 1.47, + "learning_rate": 1.0779037807174032e-05, + "loss": 0.4385, + "step": 8635 + }, + { + "epoch": 1.47, + "learning_rate": 1.0777202001828817e-05, + "loss": 0.4272, + "step": 8636 + }, + { + "epoch": 1.47, + "learning_rate": 1.0775366170130923e-05, + "loss": 0.424, + "step": 8637 + }, + { + "epoch": 1.47, + "learning_rate": 1.0773530312142603e-05, + "loss": 0.4387, + "step": 8638 + }, + { + "epoch": 1.47, + "learning_rate": 1.0771694427926108e-05, + "loss": 0.4152, + "step": 8639 + }, + { + "epoch": 1.47, + "learning_rate": 1.0769858517543684e-05, + "loss": 0.4467, + "step": 8640 + }, + { + "epoch": 1.47, + "learning_rate": 1.0768022581057583e-05, + "loss": 0.4614, + "step": 8641 + }, + { + "epoch": 1.47, + "learning_rate": 1.0766186618530055e-05, + "loss": 0.422, + "step": 8642 + }, + { + "epoch": 1.47, + "learning_rate": 1.076435063002335e-05, + "loss": 0.437, + "step": 8643 + }, + { + "epoch": 1.47, + "learning_rate": 1.0762514615599728e-05, + "loss": 0.3958, + "step": 8644 + }, + { + "epoch": 1.47, + "learning_rate": 1.0760678575321437e-05, + "loss": 0.3821, + "step": 8645 + }, + { + "epoch": 1.47, + "learning_rate": 1.0758842509250734e-05, + "loss": 0.4293, + "step": 8646 + }, + { + "epoch": 1.47, + "learning_rate": 1.0757006417449875e-05, + "loss": 0.4349, + "step": 8647 + }, + { + "epoch": 1.47, + "learning_rate": 1.0755170299981114e-05, + "loss": 0.4289, + "step": 8648 + }, + { + "epoch": 1.48, + "learning_rate": 1.0753334156906715e-05, + "loss": 0.3941, + "step": 8649 + }, + { + "epoch": 1.48, + "learning_rate": 1.0751497988288926e-05, + "loss": 0.4524, + "step": 8650 + }, + { + "epoch": 1.48, + "learning_rate": 1.0749661794190016e-05, + "loss": 0.4641, + "step": 8651 + }, + { + "epoch": 1.48, + "learning_rate": 1.074782557467224e-05, + "loss": 0.457, + "step": 8652 + }, + { + "epoch": 1.48, + "learning_rate": 1.0745989329797857e-05, + "loss": 0.4418, + "step": 8653 + }, + { + "epoch": 1.48, + "learning_rate": 1.0744153059629132e-05, + "loss": 0.4157, + "step": 8654 + }, + { + "epoch": 1.48, + "learning_rate": 1.074231676422833e-05, + "loss": 0.4372, + "step": 8655 + }, + { + "epoch": 1.48, + "learning_rate": 1.0740480443657711e-05, + "loss": 0.4619, + "step": 8656 + }, + { + "epoch": 1.48, + "learning_rate": 1.073864409797954e-05, + "loss": 0.446, + "step": 8657 + }, + { + "epoch": 1.48, + "learning_rate": 1.073680772725608e-05, + "loss": 0.4389, + "step": 8658 + }, + { + "epoch": 1.48, + "learning_rate": 1.0734971331549604e-05, + "loss": 0.3865, + "step": 8659 + }, + { + "epoch": 1.48, + "learning_rate": 1.073313491092237e-05, + "loss": 0.4067, + "step": 8660 + }, + { + "epoch": 1.48, + "learning_rate": 1.0731298465436649e-05, + "loss": 0.4131, + "step": 8661 + }, + { + "epoch": 1.48, + "learning_rate": 1.0729461995154713e-05, + "loss": 0.4083, + "step": 8662 + }, + { + "epoch": 1.48, + "learning_rate": 1.0727625500138827e-05, + "loss": 0.4501, + "step": 8663 + }, + { + "epoch": 1.48, + "learning_rate": 1.0725788980451262e-05, + "loss": 0.4455, + "step": 8664 + }, + { + "epoch": 1.48, + "learning_rate": 1.0723952436154295e-05, + "loss": 0.4539, + "step": 8665 + }, + { + "epoch": 1.48, + "learning_rate": 1.072211586731019e-05, + "loss": 0.415, + "step": 8666 + }, + { + "epoch": 1.48, + "learning_rate": 1.072027927398122e-05, + "loss": 0.4371, + "step": 8667 + }, + { + "epoch": 1.48, + "learning_rate": 1.0718442656229664e-05, + "loss": 0.4467, + "step": 8668 + }, + { + "epoch": 1.48, + "learning_rate": 1.0716606014117793e-05, + "loss": 0.4514, + "step": 8669 + }, + { + "epoch": 1.48, + "learning_rate": 1.0714769347707883e-05, + "loss": 0.4124, + "step": 8670 + }, + { + "epoch": 1.48, + "learning_rate": 1.071293265706221e-05, + "loss": 0.4324, + "step": 8671 + }, + { + "epoch": 1.48, + "learning_rate": 1.071109594224305e-05, + "loss": 0.4267, + "step": 8672 + }, + { + "epoch": 1.48, + "learning_rate": 1.0709259203312684e-05, + "loss": 0.429, + "step": 8673 + }, + { + "epoch": 1.48, + "learning_rate": 1.0707422440333387e-05, + "loss": 0.3928, + "step": 8674 + }, + { + "epoch": 1.48, + "learning_rate": 1.0705585653367436e-05, + "loss": 0.4236, + "step": 8675 + }, + { + "epoch": 1.48, + "learning_rate": 1.0703748842477118e-05, + "loss": 0.4333, + "step": 8676 + }, + { + "epoch": 1.48, + "learning_rate": 1.0701912007724711e-05, + "loss": 0.4386, + "step": 8677 + }, + { + "epoch": 1.48, + "learning_rate": 1.0700075149172493e-05, + "loss": 0.4072, + "step": 8678 + }, + { + "epoch": 1.48, + "learning_rate": 1.069823826688275e-05, + "loss": 0.4009, + "step": 8679 + }, + { + "epoch": 1.48, + "learning_rate": 1.0696401360917768e-05, + "loss": 0.4024, + "step": 8680 + }, + { + "epoch": 1.48, + "learning_rate": 1.0694564431339828e-05, + "loss": 0.4222, + "step": 8681 + }, + { + "epoch": 1.48, + "learning_rate": 1.0692727478211213e-05, + "loss": 0.423, + "step": 8682 + }, + { + "epoch": 1.48, + "learning_rate": 1.0690890501594211e-05, + "loss": 0.4086, + "step": 8683 + }, + { + "epoch": 1.48, + "learning_rate": 1.068905350155111e-05, + "loss": 0.3994, + "step": 8684 + }, + { + "epoch": 1.48, + "learning_rate": 1.0687216478144195e-05, + "loss": 0.412, + "step": 8685 + }, + { + "epoch": 1.48, + "learning_rate": 1.0685379431435756e-05, + "loss": 0.4287, + "step": 8686 + }, + { + "epoch": 1.48, + "learning_rate": 1.068354236148808e-05, + "loss": 0.4261, + "step": 8687 + }, + { + "epoch": 1.48, + "learning_rate": 1.068170526836346e-05, + "loss": 0.4517, + "step": 8688 + }, + { + "epoch": 1.48, + "learning_rate": 1.0679868152124185e-05, + "loss": 0.4342, + "step": 8689 + }, + { + "epoch": 1.48, + "learning_rate": 1.0678031012832542e-05, + "loss": 0.4435, + "step": 8690 + }, + { + "epoch": 1.48, + "learning_rate": 1.0676193850550826e-05, + "loss": 0.4412, + "step": 8691 + }, + { + "epoch": 1.48, + "learning_rate": 1.0674356665341336e-05, + "loss": 0.419, + "step": 8692 + }, + { + "epoch": 1.48, + "learning_rate": 1.0672519457266354e-05, + "loss": 0.4178, + "step": 8693 + }, + { + "epoch": 1.48, + "learning_rate": 1.0670682226388185e-05, + "loss": 0.4238, + "step": 8694 + }, + { + "epoch": 1.48, + "learning_rate": 1.0668844972769117e-05, + "loss": 0.4132, + "step": 8695 + }, + { + "epoch": 1.48, + "learning_rate": 1.0667007696471452e-05, + "loss": 0.4104, + "step": 8696 + }, + { + "epoch": 1.48, + "learning_rate": 1.0665170397557482e-05, + "loss": 0.4143, + "step": 8697 + }, + { + "epoch": 1.48, + "learning_rate": 1.0663333076089505e-05, + "loss": 0.4497, + "step": 8698 + }, + { + "epoch": 1.48, + "learning_rate": 1.066149573212982e-05, + "loss": 0.4412, + "step": 8699 + }, + { + "epoch": 1.48, + "learning_rate": 1.065965836574073e-05, + "loss": 0.4336, + "step": 8700 + }, + { + "epoch": 1.48, + "learning_rate": 1.0657820976984528e-05, + "loss": 0.4366, + "step": 8701 + }, + { + "epoch": 1.48, + "learning_rate": 1.0655983565923518e-05, + "loss": 0.4114, + "step": 8702 + }, + { + "epoch": 1.48, + "learning_rate": 1.0654146132619999e-05, + "loss": 0.4232, + "step": 8703 + }, + { + "epoch": 1.48, + "learning_rate": 1.0652308677136278e-05, + "loss": 0.4024, + "step": 8704 + }, + { + "epoch": 1.48, + "learning_rate": 1.0650471199534655e-05, + "loss": 0.384, + "step": 8705 + }, + { + "epoch": 1.48, + "learning_rate": 1.064863369987743e-05, + "loss": 0.4004, + "step": 8706 + }, + { + "epoch": 1.48, + "learning_rate": 1.0646796178226915e-05, + "loss": 0.4442, + "step": 8707 + }, + { + "epoch": 1.49, + "learning_rate": 1.0644958634645408e-05, + "loss": 0.4018, + "step": 8708 + }, + { + "epoch": 1.49, + "learning_rate": 1.0643121069195216e-05, + "loss": 0.4481, + "step": 8709 + }, + { + "epoch": 1.49, + "learning_rate": 1.064128348193865e-05, + "loss": 0.4205, + "step": 8710 + }, + { + "epoch": 1.49, + "learning_rate": 1.0639445872938012e-05, + "loss": 0.4265, + "step": 8711 + }, + { + "epoch": 1.49, + "learning_rate": 1.0637608242255616e-05, + "loss": 0.4506, + "step": 8712 + }, + { + "epoch": 1.49, + "learning_rate": 1.0635770589953764e-05, + "loss": 0.431, + "step": 8713 + }, + { + "epoch": 1.49, + "learning_rate": 1.0633932916094766e-05, + "loss": 0.4048, + "step": 8714 + }, + { + "epoch": 1.49, + "learning_rate": 1.063209522074094e-05, + "loss": 0.4045, + "step": 8715 + }, + { + "epoch": 1.49, + "learning_rate": 1.0630257503954588e-05, + "loss": 0.4458, + "step": 8716 + }, + { + "epoch": 1.49, + "learning_rate": 1.0628419765798026e-05, + "loss": 0.4266, + "step": 8717 + }, + { + "epoch": 1.49, + "learning_rate": 1.0626582006333565e-05, + "loss": 0.4114, + "step": 8718 + }, + { + "epoch": 1.49, + "learning_rate": 1.0624744225623523e-05, + "loss": 0.4235, + "step": 8719 + }, + { + "epoch": 1.49, + "learning_rate": 1.0622906423730206e-05, + "loss": 0.4236, + "step": 8720 + }, + { + "epoch": 1.49, + "learning_rate": 1.0621068600715935e-05, + "loss": 0.4309, + "step": 8721 + }, + { + "epoch": 1.49, + "learning_rate": 1.061923075664302e-05, + "loss": 0.4435, + "step": 8722 + }, + { + "epoch": 1.49, + "learning_rate": 1.0617392891573781e-05, + "loss": 0.4017, + "step": 8723 + }, + { + "epoch": 1.49, + "learning_rate": 1.0615555005570533e-05, + "loss": 0.4054, + "step": 8724 + }, + { + "epoch": 1.49, + "learning_rate": 1.061371709869559e-05, + "loss": 0.4263, + "step": 8725 + }, + { + "epoch": 1.49, + "learning_rate": 1.0611879171011278e-05, + "loss": 0.4127, + "step": 8726 + }, + { + "epoch": 1.49, + "learning_rate": 1.061004122257991e-05, + "loss": 0.4292, + "step": 8727 + }, + { + "epoch": 1.49, + "learning_rate": 1.060820325346381e-05, + "loss": 0.4266, + "step": 8728 + }, + { + "epoch": 1.49, + "learning_rate": 1.0606365263725289e-05, + "loss": 0.376, + "step": 8729 + }, + { + "epoch": 1.49, + "learning_rate": 1.060452725342668e-05, + "loss": 0.3899, + "step": 8730 + }, + { + "epoch": 1.49, + "learning_rate": 1.0602689222630295e-05, + "loss": 0.4129, + "step": 8731 + }, + { + "epoch": 1.49, + "learning_rate": 1.060085117139846e-05, + "loss": 0.3862, + "step": 8732 + }, + { + "epoch": 1.49, + "learning_rate": 1.0599013099793499e-05, + "loss": 0.4252, + "step": 8733 + }, + { + "epoch": 1.49, + "learning_rate": 1.0597175007877736e-05, + "loss": 0.3887, + "step": 8734 + }, + { + "epoch": 1.49, + "learning_rate": 1.0595336895713493e-05, + "loss": 0.4192, + "step": 8735 + }, + { + "epoch": 1.49, + "learning_rate": 1.05934987633631e-05, + "loss": 0.4364, + "step": 8736 + }, + { + "epoch": 1.49, + "learning_rate": 1.0591660610888872e-05, + "loss": 0.4187, + "step": 8737 + }, + { + "epoch": 1.49, + "learning_rate": 1.0589822438353148e-05, + "loss": 0.4263, + "step": 8738 + }, + { + "epoch": 1.49, + "learning_rate": 1.0587984245818249e-05, + "loss": 0.4329, + "step": 8739 + }, + { + "epoch": 1.49, + "learning_rate": 1.0586146033346499e-05, + "loss": 0.4097, + "step": 8740 + }, + { + "epoch": 1.49, + "learning_rate": 1.0584307801000236e-05, + "loss": 0.4122, + "step": 8741 + }, + { + "epoch": 1.49, + "learning_rate": 1.058246954884178e-05, + "loss": 0.3988, + "step": 8742 + }, + { + "epoch": 1.49, + "learning_rate": 1.0580631276933466e-05, + "loss": 0.4392, + "step": 8743 + }, + { + "epoch": 1.49, + "learning_rate": 1.0578792985337624e-05, + "loss": 0.4099, + "step": 8744 + }, + { + "epoch": 1.49, + "learning_rate": 1.0576954674116587e-05, + "loss": 0.4373, + "step": 8745 + }, + { + "epoch": 1.49, + "learning_rate": 1.0575116343332681e-05, + "loss": 0.4165, + "step": 8746 + }, + { + "epoch": 1.49, + "learning_rate": 1.057327799304824e-05, + "loss": 0.4283, + "step": 8747 + }, + { + "epoch": 1.49, + "learning_rate": 1.0571439623325602e-05, + "loss": 0.4132, + "step": 8748 + }, + { + "epoch": 1.49, + "learning_rate": 1.0569601234227098e-05, + "loss": 0.4361, + "step": 8749 + }, + { + "epoch": 1.49, + "learning_rate": 1.056776282581506e-05, + "loss": 0.4403, + "step": 8750 + }, + { + "epoch": 1.49, + "learning_rate": 1.0565924398151825e-05, + "loss": 0.4514, + "step": 8751 + }, + { + "epoch": 1.49, + "learning_rate": 1.0564085951299733e-05, + "loss": 0.4336, + "step": 8752 + }, + { + "epoch": 1.49, + "learning_rate": 1.0562247485321116e-05, + "loss": 0.4338, + "step": 8753 + }, + { + "epoch": 1.49, + "learning_rate": 1.0560409000278309e-05, + "loss": 0.4124, + "step": 8754 + }, + { + "epoch": 1.49, + "learning_rate": 1.0558570496233651e-05, + "loss": 0.4318, + "step": 8755 + }, + { + "epoch": 1.49, + "learning_rate": 1.0556731973249486e-05, + "loss": 0.4388, + "step": 8756 + }, + { + "epoch": 1.49, + "learning_rate": 1.0554893431388144e-05, + "loss": 0.4385, + "step": 8757 + }, + { + "epoch": 1.49, + "learning_rate": 1.055305487071197e-05, + "loss": 0.4027, + "step": 8758 + }, + { + "epoch": 1.49, + "learning_rate": 1.0551216291283304e-05, + "loss": 0.3978, + "step": 8759 + }, + { + "epoch": 1.49, + "learning_rate": 1.054937769316449e-05, + "loss": 0.4048, + "step": 8760 + }, + { + "epoch": 1.49, + "learning_rate": 1.0547539076417863e-05, + "loss": 0.4464, + "step": 8761 + }, + { + "epoch": 1.49, + "learning_rate": 1.0545700441105766e-05, + "loss": 0.3824, + "step": 8762 + }, + { + "epoch": 1.49, + "learning_rate": 1.0543861787290545e-05, + "loss": 0.4354, + "step": 8763 + }, + { + "epoch": 1.49, + "learning_rate": 1.0542023115034548e-05, + "loss": 0.4148, + "step": 8764 + }, + { + "epoch": 1.49, + "learning_rate": 1.0540184424400106e-05, + "loss": 0.4235, + "step": 8765 + }, + { + "epoch": 1.5, + "learning_rate": 1.0538345715449574e-05, + "loss": 0.4373, + "step": 8766 + }, + { + "epoch": 1.5, + "learning_rate": 1.0536506988245296e-05, + "loss": 0.4051, + "step": 8767 + }, + { + "epoch": 1.5, + "learning_rate": 1.0534668242849615e-05, + "loss": 0.4612, + "step": 8768 + }, + { + "epoch": 1.5, + "learning_rate": 1.0532829479324882e-05, + "loss": 0.4237, + "step": 8769 + }, + { + "epoch": 1.5, + "learning_rate": 1.0530990697733437e-05, + "loss": 0.4517, + "step": 8770 + }, + { + "epoch": 1.5, + "learning_rate": 1.0529151898137635e-05, + "loss": 0.442, + "step": 8771 + }, + { + "epoch": 1.5, + "learning_rate": 1.0527313080599817e-05, + "loss": 0.4273, + "step": 8772 + }, + { + "epoch": 1.5, + "learning_rate": 1.0525474245182338e-05, + "loss": 0.4461, + "step": 8773 + }, + { + "epoch": 1.5, + "learning_rate": 1.0523635391947547e-05, + "loss": 0.4229, + "step": 8774 + }, + { + "epoch": 1.5, + "learning_rate": 1.0521796520957794e-05, + "loss": 0.4403, + "step": 8775 + }, + { + "epoch": 1.5, + "learning_rate": 1.0519957632275428e-05, + "loss": 0.4503, + "step": 8776 + }, + { + "epoch": 1.5, + "learning_rate": 1.05181187259628e-05, + "loss": 0.4423, + "step": 8777 + }, + { + "epoch": 1.5, + "learning_rate": 1.0516279802082263e-05, + "loss": 0.4199, + "step": 8778 + }, + { + "epoch": 1.5, + "learning_rate": 1.0514440860696173e-05, + "loss": 0.4062, + "step": 8779 + }, + { + "epoch": 1.5, + "learning_rate": 1.0512601901866876e-05, + "loss": 0.4236, + "step": 8780 + }, + { + "epoch": 1.5, + "learning_rate": 1.051076292565673e-05, + "loss": 0.4154, + "step": 8781 + }, + { + "epoch": 1.5, + "learning_rate": 1.050892393212809e-05, + "loss": 0.4206, + "step": 8782 + }, + { + "epoch": 1.5, + "learning_rate": 1.0507084921343313e-05, + "loss": 0.4255, + "step": 8783 + }, + { + "epoch": 1.5, + "learning_rate": 1.050524589336475e-05, + "loss": 0.4164, + "step": 8784 + }, + { + "epoch": 1.5, + "learning_rate": 1.0503406848254758e-05, + "loss": 0.4128, + "step": 8785 + }, + { + "epoch": 1.5, + "learning_rate": 1.0501567786075697e-05, + "loss": 0.4359, + "step": 8786 + }, + { + "epoch": 1.5, + "learning_rate": 1.0499728706889918e-05, + "loss": 0.4228, + "step": 8787 + }, + { + "epoch": 1.5, + "learning_rate": 1.0497889610759784e-05, + "loss": 0.4249, + "step": 8788 + }, + { + "epoch": 1.5, + "learning_rate": 1.0496050497747652e-05, + "loss": 0.4193, + "step": 8789 + }, + { + "epoch": 1.5, + "learning_rate": 1.0494211367915883e-05, + "loss": 0.3972, + "step": 8790 + }, + { + "epoch": 1.5, + "learning_rate": 1.0492372221326832e-05, + "loss": 0.4777, + "step": 8791 + }, + { + "epoch": 1.5, + "learning_rate": 1.0490533058042864e-05, + "loss": 0.4009, + "step": 8792 + }, + { + "epoch": 1.5, + "learning_rate": 1.0488693878126337e-05, + "loss": 0.4131, + "step": 8793 + }, + { + "epoch": 1.5, + "learning_rate": 1.0486854681639614e-05, + "loss": 0.355, + "step": 8794 + }, + { + "epoch": 1.5, + "learning_rate": 1.0485015468645052e-05, + "loss": 0.4187, + "step": 8795 + }, + { + "epoch": 1.5, + "learning_rate": 1.0483176239205017e-05, + "loss": 0.4208, + "step": 8796 + }, + { + "epoch": 1.5, + "learning_rate": 1.0481336993381874e-05, + "loss": 0.4234, + "step": 8797 + }, + { + "epoch": 1.5, + "learning_rate": 1.0479497731237987e-05, + "loss": 0.4233, + "step": 8798 + }, + { + "epoch": 1.5, + "learning_rate": 1.0477658452835712e-05, + "loss": 0.4119, + "step": 8799 + }, + { + "epoch": 1.5, + "learning_rate": 1.0475819158237426e-05, + "loss": 0.4139, + "step": 8800 + }, + { + "epoch": 1.5, + "learning_rate": 1.0473979847505484e-05, + "loss": 0.392, + "step": 8801 + }, + { + "epoch": 1.5, + "learning_rate": 1.0472140520702254e-05, + "loss": 0.4495, + "step": 8802 + }, + { + "epoch": 1.5, + "learning_rate": 1.0470301177890104e-05, + "loss": 0.3972, + "step": 8803 + }, + { + "epoch": 1.5, + "learning_rate": 1.0468461819131397e-05, + "loss": 0.4222, + "step": 8804 + }, + { + "epoch": 1.5, + "learning_rate": 1.046662244448851e-05, + "loss": 0.4383, + "step": 8805 + }, + { + "epoch": 1.5, + "learning_rate": 1.0464783054023799e-05, + "loss": 0.4211, + "step": 8806 + }, + { + "epoch": 1.5, + "learning_rate": 1.046294364779964e-05, + "loss": 0.4392, + "step": 8807 + }, + { + "epoch": 1.5, + "learning_rate": 1.04611042258784e-05, + "loss": 0.4283, + "step": 8808 + }, + { + "epoch": 1.5, + "learning_rate": 1.0459264788322447e-05, + "loss": 0.4489, + "step": 8809 + }, + { + "epoch": 1.5, + "learning_rate": 1.0457425335194154e-05, + "loss": 0.4064, + "step": 8810 + }, + { + "epoch": 1.5, + "learning_rate": 1.0455585866555887e-05, + "loss": 0.4132, + "step": 8811 + }, + { + "epoch": 1.5, + "learning_rate": 1.045374638247002e-05, + "loss": 0.4221, + "step": 8812 + }, + { + "epoch": 1.5, + "learning_rate": 1.0451906882998927e-05, + "loss": 0.4378, + "step": 8813 + }, + { + "epoch": 1.5, + "learning_rate": 1.0450067368204975e-05, + "loss": 0.401, + "step": 8814 + }, + { + "epoch": 1.5, + "learning_rate": 1.044822783815054e-05, + "loss": 0.4215, + "step": 8815 + }, + { + "epoch": 1.5, + "learning_rate": 1.0446388292897995e-05, + "loss": 0.4022, + "step": 8816 + }, + { + "epoch": 1.5, + "learning_rate": 1.0444548732509715e-05, + "loss": 0.4097, + "step": 8817 + }, + { + "epoch": 1.5, + "learning_rate": 1.0442709157048067e-05, + "loss": 0.4177, + "step": 8818 + }, + { + "epoch": 1.5, + "learning_rate": 1.0440869566575433e-05, + "loss": 0.4171, + "step": 8819 + }, + { + "epoch": 1.5, + "learning_rate": 1.0439029961154188e-05, + "loss": 0.414, + "step": 8820 + }, + { + "epoch": 1.5, + "learning_rate": 1.0437190340846703e-05, + "loss": 0.4219, + "step": 8821 + }, + { + "epoch": 1.5, + "learning_rate": 1.043535070571536e-05, + "loss": 0.4417, + "step": 8822 + }, + { + "epoch": 1.5, + "learning_rate": 1.043351105582253e-05, + "loss": 0.4124, + "step": 8823 + }, + { + "epoch": 1.5, + "learning_rate": 1.0431671391230593e-05, + "loss": 0.4562, + "step": 8824 + }, + { + "epoch": 1.51, + "learning_rate": 1.0429831712001927e-05, + "loss": 0.425, + "step": 8825 + }, + { + "epoch": 1.51, + "learning_rate": 1.0427992018198912e-05, + "loss": 0.4174, + "step": 8826 + }, + { + "epoch": 1.51, + "learning_rate": 1.0426152309883921e-05, + "loss": 0.396, + "step": 8827 + }, + { + "epoch": 1.51, + "learning_rate": 1.0424312587119338e-05, + "loss": 0.4105, + "step": 8828 + }, + { + "epoch": 1.51, + "learning_rate": 1.042247284996754e-05, + "loss": 0.4136, + "step": 8829 + }, + { + "epoch": 1.51, + "learning_rate": 1.042063309849091e-05, + "loss": 0.4458, + "step": 8830 + }, + { + "epoch": 1.51, + "learning_rate": 1.0418793332751828e-05, + "loss": 0.4091, + "step": 8831 + }, + { + "epoch": 1.51, + "learning_rate": 1.0416953552812674e-05, + "loss": 0.4153, + "step": 8832 + }, + { + "epoch": 1.51, + "learning_rate": 1.0415113758735829e-05, + "loss": 0.4193, + "step": 8833 + }, + { + "epoch": 1.51, + "learning_rate": 1.0413273950583676e-05, + "loss": 0.4158, + "step": 8834 + }, + { + "epoch": 1.51, + "learning_rate": 1.0411434128418599e-05, + "loss": 0.3876, + "step": 8835 + }, + { + "epoch": 1.51, + "learning_rate": 1.0409594292302977e-05, + "loss": 0.4262, + "step": 8836 + }, + { + "epoch": 1.51, + "learning_rate": 1.0407754442299197e-05, + "loss": 0.4067, + "step": 8837 + }, + { + "epoch": 1.51, + "learning_rate": 1.0405914578469643e-05, + "loss": 0.4184, + "step": 8838 + }, + { + "epoch": 1.51, + "learning_rate": 1.0404074700876699e-05, + "loss": 0.4257, + "step": 8839 + }, + { + "epoch": 1.51, + "learning_rate": 1.0402234809582749e-05, + "loss": 0.4216, + "step": 8840 + }, + { + "epoch": 1.51, + "learning_rate": 1.040039490465018e-05, + "loss": 0.4446, + "step": 8841 + }, + { + "epoch": 1.51, + "learning_rate": 1.0398554986141373e-05, + "loss": 0.4478, + "step": 8842 + }, + { + "epoch": 1.51, + "learning_rate": 1.0396715054118725e-05, + "loss": 0.4221, + "step": 8843 + }, + { + "epoch": 1.51, + "learning_rate": 1.0394875108644609e-05, + "loss": 0.4424, + "step": 8844 + }, + { + "epoch": 1.51, + "learning_rate": 1.0393035149781423e-05, + "loss": 0.4176, + "step": 8845 + }, + { + "epoch": 1.51, + "learning_rate": 1.039119517759155e-05, + "loss": 0.457, + "step": 8846 + }, + { + "epoch": 1.51, + "learning_rate": 1.0389355192137379e-05, + "loss": 0.4102, + "step": 8847 + }, + { + "epoch": 1.51, + "learning_rate": 1.03875151934813e-05, + "loss": 0.4006, + "step": 8848 + }, + { + "epoch": 1.51, + "learning_rate": 1.0385675181685697e-05, + "loss": 0.4334, + "step": 8849 + }, + { + "epoch": 1.51, + "learning_rate": 1.0383835156812967e-05, + "loss": 0.4379, + "step": 8850 + }, + { + "epoch": 1.51, + "learning_rate": 1.0381995118925494e-05, + "loss": 0.4229, + "step": 8851 + }, + { + "epoch": 1.51, + "learning_rate": 1.0380155068085668e-05, + "loss": 0.4594, + "step": 8852 + }, + { + "epoch": 1.51, + "learning_rate": 1.0378315004355885e-05, + "loss": 0.4144, + "step": 8853 + }, + { + "epoch": 1.51, + "learning_rate": 1.0376474927798535e-05, + "loss": 0.4223, + "step": 8854 + }, + { + "epoch": 1.51, + "learning_rate": 1.0374634838476006e-05, + "loss": 0.4431, + "step": 8855 + }, + { + "epoch": 1.51, + "learning_rate": 1.0372794736450694e-05, + "loss": 0.417, + "step": 8856 + }, + { + "epoch": 1.51, + "learning_rate": 1.0370954621784988e-05, + "loss": 0.4661, + "step": 8857 + }, + { + "epoch": 1.51, + "learning_rate": 1.0369114494541284e-05, + "loss": 0.4341, + "step": 8858 + }, + { + "epoch": 1.51, + "learning_rate": 1.0367274354781971e-05, + "loss": 0.3996, + "step": 8859 + }, + { + "epoch": 1.51, + "learning_rate": 1.036543420256945e-05, + "loss": 0.4312, + "step": 8860 + }, + { + "epoch": 1.51, + "learning_rate": 1.0363594037966108e-05, + "loss": 0.4163, + "step": 8861 + }, + { + "epoch": 1.51, + "learning_rate": 1.0361753861034345e-05, + "loss": 0.4319, + "step": 8862 + }, + { + "epoch": 1.51, + "learning_rate": 1.0359913671836552e-05, + "loss": 0.4345, + "step": 8863 + }, + { + "epoch": 1.51, + "learning_rate": 1.0358073470435129e-05, + "loss": 0.4071, + "step": 8864 + }, + { + "epoch": 1.51, + "learning_rate": 1.035623325689247e-05, + "loss": 0.4103, + "step": 8865 + }, + { + "epoch": 1.51, + "learning_rate": 1.0354393031270969e-05, + "loss": 0.4013, + "step": 8866 + }, + { + "epoch": 1.51, + "learning_rate": 1.0352552793633022e-05, + "loss": 0.4406, + "step": 8867 + }, + { + "epoch": 1.51, + "learning_rate": 1.035071254404103e-05, + "loss": 0.4286, + "step": 8868 + }, + { + "epoch": 1.51, + "learning_rate": 1.0348872282557391e-05, + "loss": 0.4282, + "step": 8869 + }, + { + "epoch": 1.51, + "learning_rate": 1.03470320092445e-05, + "loss": 0.4288, + "step": 8870 + }, + { + "epoch": 1.51, + "learning_rate": 1.0345191724164754e-05, + "loss": 0.4384, + "step": 8871 + }, + { + "epoch": 1.51, + "learning_rate": 1.034335142738056e-05, + "loss": 0.4313, + "step": 8872 + }, + { + "epoch": 1.51, + "learning_rate": 1.0341511118954308e-05, + "loss": 0.4126, + "step": 8873 + }, + { + "epoch": 1.51, + "learning_rate": 1.03396707989484e-05, + "loss": 0.4171, + "step": 8874 + }, + { + "epoch": 1.51, + "learning_rate": 1.0337830467425237e-05, + "loss": 0.4188, + "step": 8875 + }, + { + "epoch": 1.51, + "learning_rate": 1.0335990124447217e-05, + "loss": 0.4057, + "step": 8876 + }, + { + "epoch": 1.51, + "learning_rate": 1.0334149770076747e-05, + "loss": 0.4477, + "step": 8877 + }, + { + "epoch": 1.51, + "learning_rate": 1.0332309404376222e-05, + "loss": 0.4061, + "step": 8878 + }, + { + "epoch": 1.51, + "learning_rate": 1.0330469027408047e-05, + "loss": 0.4474, + "step": 8879 + }, + { + "epoch": 1.51, + "learning_rate": 1.032862863923462e-05, + "loss": 0.4147, + "step": 8880 + }, + { + "epoch": 1.51, + "learning_rate": 1.0326788239918348e-05, + "loss": 0.4108, + "step": 8881 + }, + { + "epoch": 1.51, + "learning_rate": 1.0324947829521627e-05, + "loss": 0.396, + "step": 8882 + }, + { + "epoch": 1.51, + "learning_rate": 1.0323107408106868e-05, + "loss": 0.4144, + "step": 8883 + }, + { + "epoch": 1.52, + "learning_rate": 1.032126697573647e-05, + "loss": 0.4186, + "step": 8884 + }, + { + "epoch": 1.52, + "learning_rate": 1.0319426532472837e-05, + "loss": 0.4368, + "step": 8885 + }, + { + "epoch": 1.52, + "learning_rate": 1.031758607837837e-05, + "loss": 0.4252, + "step": 8886 + }, + { + "epoch": 1.52, + "learning_rate": 1.031574561351548e-05, + "loss": 0.4279, + "step": 8887 + }, + { + "epoch": 1.52, + "learning_rate": 1.0313905137946571e-05, + "loss": 0.4377, + "step": 8888 + }, + { + "epoch": 1.52, + "learning_rate": 1.0312064651734042e-05, + "loss": 0.4047, + "step": 8889 + }, + { + "epoch": 1.52, + "learning_rate": 1.0310224154940302e-05, + "loss": 0.3819, + "step": 8890 + }, + { + "epoch": 1.52, + "learning_rate": 1.0308383647627757e-05, + "loss": 0.413, + "step": 8891 + }, + { + "epoch": 1.52, + "learning_rate": 1.0306543129858819e-05, + "loss": 0.4337, + "step": 8892 + }, + { + "epoch": 1.52, + "learning_rate": 1.0304702601695884e-05, + "loss": 0.4352, + "step": 8893 + }, + { + "epoch": 1.52, + "learning_rate": 1.0302862063201367e-05, + "loss": 0.4424, + "step": 8894 + }, + { + "epoch": 1.52, + "learning_rate": 1.0301021514437672e-05, + "loss": 0.449, + "step": 8895 + }, + { + "epoch": 1.52, + "learning_rate": 1.029918095546721e-05, + "loss": 0.4153, + "step": 8896 + }, + { + "epoch": 1.52, + "learning_rate": 1.0297340386352381e-05, + "loss": 0.4458, + "step": 8897 + }, + { + "epoch": 1.52, + "learning_rate": 1.02954998071556e-05, + "loss": 0.3954, + "step": 8898 + }, + { + "epoch": 1.52, + "learning_rate": 1.0293659217939277e-05, + "loss": 0.4386, + "step": 8899 + }, + { + "epoch": 1.52, + "learning_rate": 1.029181861876582e-05, + "loss": 0.4096, + "step": 8900 + }, + { + "epoch": 1.52, + "learning_rate": 1.028997800969763e-05, + "loss": 0.4409, + "step": 8901 + }, + { + "epoch": 1.52, + "learning_rate": 1.0288137390797128e-05, + "loss": 0.4597, + "step": 8902 + }, + { + "epoch": 1.52, + "learning_rate": 1.0286296762126721e-05, + "loss": 0.4193, + "step": 8903 + }, + { + "epoch": 1.52, + "learning_rate": 1.0284456123748818e-05, + "loss": 0.4104, + "step": 8904 + }, + { + "epoch": 1.52, + "learning_rate": 1.0282615475725828e-05, + "loss": 0.4265, + "step": 8905 + }, + { + "epoch": 1.52, + "learning_rate": 1.0280774818120162e-05, + "loss": 0.431, + "step": 8906 + }, + { + "epoch": 1.52, + "learning_rate": 1.0278934150994238e-05, + "loss": 0.4317, + "step": 8907 + }, + { + "epoch": 1.52, + "learning_rate": 1.027709347441046e-05, + "loss": 0.4183, + "step": 8908 + }, + { + "epoch": 1.52, + "learning_rate": 1.0275252788431242e-05, + "loss": 0.4254, + "step": 8909 + }, + { + "epoch": 1.52, + "learning_rate": 1.0273412093119003e-05, + "loss": 0.404, + "step": 8910 + }, + { + "epoch": 1.52, + "learning_rate": 1.0271571388536143e-05, + "loss": 0.4069, + "step": 8911 + }, + { + "epoch": 1.52, + "learning_rate": 1.0269730674745089e-05, + "loss": 0.3952, + "step": 8912 + }, + { + "epoch": 1.52, + "learning_rate": 1.0267889951808241e-05, + "loss": 0.4366, + "step": 8913 + }, + { + "epoch": 1.52, + "learning_rate": 1.0266049219788023e-05, + "loss": 0.4441, + "step": 8914 + }, + { + "epoch": 1.52, + "learning_rate": 1.0264208478746843e-05, + "loss": 0.4333, + "step": 8915 + }, + { + "epoch": 1.52, + "learning_rate": 1.0262367728747115e-05, + "loss": 0.4127, + "step": 8916 + }, + { + "epoch": 1.52, + "learning_rate": 1.0260526969851256e-05, + "loss": 0.4194, + "step": 8917 + }, + { + "epoch": 1.52, + "learning_rate": 1.0258686202121684e-05, + "loss": 0.4192, + "step": 8918 + }, + { + "epoch": 1.52, + "learning_rate": 1.0256845425620808e-05, + "loss": 0.4475, + "step": 8919 + }, + { + "epoch": 1.52, + "learning_rate": 1.0255004640411049e-05, + "loss": 0.3988, + "step": 8920 + }, + { + "epoch": 1.52, + "learning_rate": 1.0253163846554815e-05, + "loss": 0.4272, + "step": 8921 + }, + { + "epoch": 1.52, + "learning_rate": 1.0251323044114529e-05, + "loss": 0.4148, + "step": 8922 + }, + { + "epoch": 1.52, + "learning_rate": 1.0249482233152603e-05, + "loss": 0.4269, + "step": 8923 + }, + { + "epoch": 1.52, + "learning_rate": 1.0247641413731455e-05, + "loss": 0.4298, + "step": 8924 + }, + { + "epoch": 1.52, + "learning_rate": 1.0245800585913504e-05, + "loss": 0.417, + "step": 8925 + }, + { + "epoch": 1.52, + "learning_rate": 1.0243959749761166e-05, + "loss": 0.4393, + "step": 8926 + }, + { + "epoch": 1.52, + "learning_rate": 1.0242118905336858e-05, + "loss": 0.417, + "step": 8927 + }, + { + "epoch": 1.52, + "learning_rate": 1.0240278052702997e-05, + "loss": 0.3935, + "step": 8928 + }, + { + "epoch": 1.52, + "learning_rate": 1.0238437191922002e-05, + "loss": 0.4281, + "step": 8929 + }, + { + "epoch": 1.52, + "learning_rate": 1.023659632305629e-05, + "loss": 0.4523, + "step": 8930 + }, + { + "epoch": 1.52, + "learning_rate": 1.0234755446168279e-05, + "loss": 0.4123, + "step": 8931 + }, + { + "epoch": 1.52, + "learning_rate": 1.023291456132039e-05, + "loss": 0.4328, + "step": 8932 + }, + { + "epoch": 1.52, + "learning_rate": 1.0231073668575044e-05, + "loss": 0.472, + "step": 8933 + }, + { + "epoch": 1.52, + "learning_rate": 1.0229232767994654e-05, + "loss": 0.3982, + "step": 8934 + }, + { + "epoch": 1.52, + "learning_rate": 1.0227391859641646e-05, + "loss": 0.4252, + "step": 8935 + }, + { + "epoch": 1.52, + "learning_rate": 1.0225550943578436e-05, + "loss": 0.4529, + "step": 8936 + }, + { + "epoch": 1.52, + "learning_rate": 1.0223710019867445e-05, + "loss": 0.4149, + "step": 8937 + }, + { + "epoch": 1.52, + "learning_rate": 1.0221869088571094e-05, + "loss": 0.4155, + "step": 8938 + }, + { + "epoch": 1.52, + "learning_rate": 1.0220028149751803e-05, + "loss": 0.4564, + "step": 8939 + }, + { + "epoch": 1.52, + "learning_rate": 1.0218187203471992e-05, + "loss": 0.416, + "step": 8940 + }, + { + "epoch": 1.52, + "learning_rate": 1.0216346249794087e-05, + "loss": 0.4438, + "step": 8941 + }, + { + "epoch": 1.53, + "learning_rate": 1.0214505288780503e-05, + "loss": 0.4317, + "step": 8942 + }, + { + "epoch": 1.53, + "learning_rate": 1.0212664320493668e-05, + "loss": 0.4148, + "step": 8943 + }, + { + "epoch": 1.53, + "learning_rate": 1.0210823344996e-05, + "loss": 0.4346, + "step": 8944 + }, + { + "epoch": 1.53, + "learning_rate": 1.0208982362349918e-05, + "loss": 0.3936, + "step": 8945 + }, + { + "epoch": 1.53, + "learning_rate": 1.0207141372617849e-05, + "loss": 0.4201, + "step": 8946 + }, + { + "epoch": 1.53, + "learning_rate": 1.0205300375862212e-05, + "loss": 0.3836, + "step": 8947 + }, + { + "epoch": 1.53, + "learning_rate": 1.020345937214544e-05, + "loss": 0.4634, + "step": 8948 + }, + { + "epoch": 1.53, + "learning_rate": 1.0201618361529942e-05, + "loss": 0.4306, + "step": 8949 + }, + { + "epoch": 1.53, + "learning_rate": 1.019977734407815e-05, + "loss": 0.4216, + "step": 8950 + }, + { + "epoch": 1.53, + "learning_rate": 1.0197936319852487e-05, + "loss": 0.4143, + "step": 8951 + }, + { + "epoch": 1.53, + "learning_rate": 1.0196095288915374e-05, + "loss": 0.4416, + "step": 8952 + }, + { + "epoch": 1.53, + "learning_rate": 1.0194254251329237e-05, + "loss": 0.4407, + "step": 8953 + }, + { + "epoch": 1.53, + "learning_rate": 1.01924132071565e-05, + "loss": 0.4289, + "step": 8954 + }, + { + "epoch": 1.53, + "learning_rate": 1.0190572156459585e-05, + "loss": 0.4135, + "step": 8955 + }, + { + "epoch": 1.53, + "learning_rate": 1.0188731099300921e-05, + "loss": 0.4251, + "step": 8956 + }, + { + "epoch": 1.53, + "learning_rate": 1.0186890035742932e-05, + "loss": 0.4061, + "step": 8957 + }, + { + "epoch": 1.53, + "learning_rate": 1.0185048965848037e-05, + "loss": 0.4459, + "step": 8958 + }, + { + "epoch": 1.53, + "learning_rate": 1.0183207889678673e-05, + "loss": 0.4268, + "step": 8959 + }, + { + "epoch": 1.53, + "learning_rate": 1.0181366807297255e-05, + "loss": 0.4506, + "step": 8960 + }, + { + "epoch": 1.53, + "learning_rate": 1.0179525718766212e-05, + "loss": 0.4448, + "step": 8961 + }, + { + "epoch": 1.53, + "learning_rate": 1.0177684624147973e-05, + "loss": 0.3937, + "step": 8962 + }, + { + "epoch": 1.53, + "learning_rate": 1.017584352350496e-05, + "loss": 0.3967, + "step": 8963 + }, + { + "epoch": 1.53, + "learning_rate": 1.0174002416899601e-05, + "loss": 0.4214, + "step": 8964 + }, + { + "epoch": 1.53, + "learning_rate": 1.0172161304394323e-05, + "loss": 0.4447, + "step": 8965 + }, + { + "epoch": 1.53, + "learning_rate": 1.0170320186051552e-05, + "loss": 0.4598, + "step": 8966 + }, + { + "epoch": 1.53, + "learning_rate": 1.0168479061933721e-05, + "loss": 0.4574, + "step": 8967 + }, + { + "epoch": 1.53, + "learning_rate": 1.0166637932103248e-05, + "loss": 0.4304, + "step": 8968 + }, + { + "epoch": 1.53, + "learning_rate": 1.0164796796622563e-05, + "loss": 0.4055, + "step": 8969 + }, + { + "epoch": 1.53, + "learning_rate": 1.0162955655554096e-05, + "loss": 0.4263, + "step": 8970 + }, + { + "epoch": 1.53, + "learning_rate": 1.0161114508960275e-05, + "loss": 0.3982, + "step": 8971 + }, + { + "epoch": 1.53, + "learning_rate": 1.0159273356903525e-05, + "loss": 0.4485, + "step": 8972 + }, + { + "epoch": 1.53, + "learning_rate": 1.0157432199446275e-05, + "loss": 0.4233, + "step": 8973 + }, + { + "epoch": 1.53, + "learning_rate": 1.0155591036650957e-05, + "loss": 0.4079, + "step": 8974 + }, + { + "epoch": 1.53, + "learning_rate": 1.0153749868579991e-05, + "loss": 0.3978, + "step": 8975 + }, + { + "epoch": 1.53, + "learning_rate": 1.0151908695295818e-05, + "loss": 0.4343, + "step": 8976 + }, + { + "epoch": 1.53, + "learning_rate": 1.0150067516860856e-05, + "loss": 0.4226, + "step": 8977 + }, + { + "epoch": 1.53, + "learning_rate": 1.0148226333337541e-05, + "loss": 0.4358, + "step": 8978 + }, + { + "epoch": 1.53, + "learning_rate": 1.0146385144788296e-05, + "loss": 0.4122, + "step": 8979 + }, + { + "epoch": 1.53, + "learning_rate": 1.0144543951275554e-05, + "loss": 0.4429, + "step": 8980 + }, + { + "epoch": 1.53, + "learning_rate": 1.0142702752861746e-05, + "loss": 0.4244, + "step": 8981 + }, + { + "epoch": 1.53, + "learning_rate": 1.01408615496093e-05, + "loss": 0.386, + "step": 8982 + }, + { + "epoch": 1.53, + "learning_rate": 1.0139020341580648e-05, + "loss": 0.442, + "step": 8983 + }, + { + "epoch": 1.53, + "learning_rate": 1.0137179128838215e-05, + "loss": 0.4001, + "step": 8984 + }, + { + "epoch": 1.53, + "learning_rate": 1.0135337911444433e-05, + "loss": 0.4073, + "step": 8985 + }, + { + "epoch": 1.53, + "learning_rate": 1.0133496689461737e-05, + "loss": 0.4101, + "step": 8986 + }, + { + "epoch": 1.53, + "learning_rate": 1.0131655462952552e-05, + "loss": 0.3998, + "step": 8987 + }, + { + "epoch": 1.53, + "learning_rate": 1.012981423197931e-05, + "loss": 0.4073, + "step": 8988 + }, + { + "epoch": 1.53, + "learning_rate": 1.0127972996604445e-05, + "loss": 0.404, + "step": 8989 + }, + { + "epoch": 1.53, + "learning_rate": 1.0126131756890382e-05, + "loss": 0.387, + "step": 8990 + }, + { + "epoch": 1.53, + "learning_rate": 1.012429051289956e-05, + "loss": 0.4191, + "step": 8991 + }, + { + "epoch": 1.53, + "learning_rate": 1.0122449264694403e-05, + "loss": 0.4248, + "step": 8992 + }, + { + "epoch": 1.53, + "learning_rate": 1.0120608012337348e-05, + "loss": 0.4498, + "step": 8993 + }, + { + "epoch": 1.53, + "learning_rate": 1.0118766755890819e-05, + "loss": 0.4312, + "step": 8994 + }, + { + "epoch": 1.53, + "learning_rate": 1.0116925495417253e-05, + "loss": 0.4053, + "step": 8995 + }, + { + "epoch": 1.53, + "learning_rate": 1.0115084230979083e-05, + "loss": 0.4577, + "step": 8996 + }, + { + "epoch": 1.53, + "learning_rate": 1.0113242962638743e-05, + "loss": 0.3929, + "step": 8997 + }, + { + "epoch": 1.53, + "learning_rate": 1.0111401690458655e-05, + "loss": 0.4327, + "step": 8998 + }, + { + "epoch": 1.53, + "learning_rate": 1.0109560414501263e-05, + "loss": 0.45, + "step": 8999 + }, + { + "epoch": 1.53, + "learning_rate": 1.010771913482899e-05, + "loss": 0.4259, + "step": 9000 + }, + { + "epoch": 1.54, + "learning_rate": 1.0105877851504278e-05, + "loss": 0.4172, + "step": 9001 + }, + { + "epoch": 1.54, + "learning_rate": 1.0104036564589547e-05, + "loss": 0.4571, + "step": 9002 + }, + { + "epoch": 1.54, + "learning_rate": 1.0102195274147239e-05, + "loss": 0.4113, + "step": 9003 + }, + { + "epoch": 1.54, + "learning_rate": 1.0100353980239787e-05, + "loss": 0.4207, + "step": 9004 + }, + { + "epoch": 1.54, + "learning_rate": 1.009851268292962e-05, + "loss": 0.4015, + "step": 9005 + }, + { + "epoch": 1.54, + "learning_rate": 1.0096671382279171e-05, + "loss": 0.4219, + "step": 9006 + }, + { + "epoch": 1.54, + "learning_rate": 1.0094830078350879e-05, + "loss": 0.4227, + "step": 9007 + }, + { + "epoch": 1.54, + "learning_rate": 1.0092988771207171e-05, + "loss": 0.4313, + "step": 9008 + }, + { + "epoch": 1.54, + "learning_rate": 1.0091147460910482e-05, + "loss": 0.4359, + "step": 9009 + }, + { + "epoch": 1.54, + "learning_rate": 1.0089306147523244e-05, + "loss": 0.401, + "step": 9010 + }, + { + "epoch": 1.54, + "learning_rate": 1.0087464831107894e-05, + "loss": 0.4399, + "step": 9011 + }, + { + "epoch": 1.54, + "learning_rate": 1.0085623511726866e-05, + "loss": 0.4199, + "step": 9012 + }, + { + "epoch": 1.54, + "learning_rate": 1.008378218944259e-05, + "loss": 0.3944, + "step": 9013 + }, + { + "epoch": 1.54, + "learning_rate": 1.0081940864317504e-05, + "loss": 0.4309, + "step": 9014 + }, + { + "epoch": 1.54, + "learning_rate": 1.0080099536414038e-05, + "loss": 0.4205, + "step": 9015 + }, + { + "epoch": 1.54, + "learning_rate": 1.0078258205794632e-05, + "loss": 0.4064, + "step": 9016 + }, + { + "epoch": 1.54, + "learning_rate": 1.0076416872521713e-05, + "loss": 0.4226, + "step": 9017 + }, + { + "epoch": 1.54, + "learning_rate": 1.0074575536657719e-05, + "loss": 0.3993, + "step": 9018 + }, + { + "epoch": 1.54, + "learning_rate": 1.0072734198265082e-05, + "loss": 0.3812, + "step": 9019 + }, + { + "epoch": 1.54, + "learning_rate": 1.0070892857406243e-05, + "loss": 0.4357, + "step": 9020 + }, + { + "epoch": 1.54, + "learning_rate": 1.0069051514143628e-05, + "loss": 0.4073, + "step": 9021 + }, + { + "epoch": 1.54, + "learning_rate": 1.0067210168539678e-05, + "loss": 0.4259, + "step": 9022 + }, + { + "epoch": 1.54, + "learning_rate": 1.0065368820656827e-05, + "loss": 0.4268, + "step": 9023 + }, + { + "epoch": 1.54, + "learning_rate": 1.0063527470557509e-05, + "loss": 0.4026, + "step": 9024 + }, + { + "epoch": 1.54, + "learning_rate": 1.0061686118304152e-05, + "loss": 0.442, + "step": 9025 + }, + { + "epoch": 1.54, + "learning_rate": 1.0059844763959202e-05, + "loss": 0.437, + "step": 9026 + }, + { + "epoch": 1.54, + "learning_rate": 1.0058003407585086e-05, + "loss": 0.4147, + "step": 9027 + }, + { + "epoch": 1.54, + "learning_rate": 1.0056162049244244e-05, + "loss": 0.4258, + "step": 9028 + }, + { + "epoch": 1.54, + "learning_rate": 1.005432068899911e-05, + "loss": 0.4032, + "step": 9029 + }, + { + "epoch": 1.54, + "learning_rate": 1.0052479326912117e-05, + "loss": 0.4266, + "step": 9030 + }, + { + "epoch": 1.54, + "learning_rate": 1.00506379630457e-05, + "loss": 0.4476, + "step": 9031 + }, + { + "epoch": 1.54, + "learning_rate": 1.0048796597462298e-05, + "loss": 0.4058, + "step": 9032 + }, + { + "epoch": 1.54, + "learning_rate": 1.0046955230224345e-05, + "loss": 0.4222, + "step": 9033 + }, + { + "epoch": 1.54, + "learning_rate": 1.0045113861394273e-05, + "loss": 0.4207, + "step": 9034 + }, + { + "epoch": 1.54, + "learning_rate": 1.0043272491034523e-05, + "loss": 0.3944, + "step": 9035 + }, + { + "epoch": 1.54, + "learning_rate": 1.0041431119207527e-05, + "loss": 0.4021, + "step": 9036 + }, + { + "epoch": 1.54, + "learning_rate": 1.0039589745975723e-05, + "loss": 0.4307, + "step": 9037 + }, + { + "epoch": 1.54, + "learning_rate": 1.0037748371401543e-05, + "loss": 0.4318, + "step": 9038 + }, + { + "epoch": 1.54, + "learning_rate": 1.0035906995547426e-05, + "loss": 0.4073, + "step": 9039 + }, + { + "epoch": 1.54, + "learning_rate": 1.0034065618475806e-05, + "loss": 0.4169, + "step": 9040 + }, + { + "epoch": 1.54, + "learning_rate": 1.003222424024912e-05, + "loss": 0.4274, + "step": 9041 + }, + { + "epoch": 1.54, + "learning_rate": 1.0030382860929804e-05, + "loss": 0.4386, + "step": 9042 + }, + { + "epoch": 1.54, + "learning_rate": 1.002854148058029e-05, + "loss": 0.4296, + "step": 9043 + }, + { + "epoch": 1.54, + "learning_rate": 1.0026700099263019e-05, + "loss": 0.4498, + "step": 9044 + }, + { + "epoch": 1.54, + "learning_rate": 1.0024858717040422e-05, + "loss": 0.4119, + "step": 9045 + }, + { + "epoch": 1.54, + "learning_rate": 1.002301733397494e-05, + "loss": 0.4679, + "step": 9046 + }, + { + "epoch": 1.54, + "learning_rate": 1.002117595012901e-05, + "loss": 0.4119, + "step": 9047 + }, + { + "epoch": 1.54, + "learning_rate": 1.001933456556506e-05, + "loss": 0.4457, + "step": 9048 + }, + { + "epoch": 1.54, + "learning_rate": 1.001749318034553e-05, + "loss": 0.4286, + "step": 9049 + }, + { + "epoch": 1.54, + "learning_rate": 1.0015651794532862e-05, + "loss": 0.4334, + "step": 9050 + }, + { + "epoch": 1.54, + "learning_rate": 1.0013810408189481e-05, + "loss": 0.4054, + "step": 9051 + }, + { + "epoch": 1.54, + "learning_rate": 1.0011969021377833e-05, + "loss": 0.4175, + "step": 9052 + }, + { + "epoch": 1.54, + "learning_rate": 1.0010127634160349e-05, + "loss": 0.4529, + "step": 9053 + }, + { + "epoch": 1.54, + "learning_rate": 1.0008286246599465e-05, + "loss": 0.3771, + "step": 9054 + }, + { + "epoch": 1.54, + "learning_rate": 1.000644485875762e-05, + "loss": 0.4065, + "step": 9055 + }, + { + "epoch": 1.54, + "learning_rate": 1.0004603470697248e-05, + "loss": 0.4265, + "step": 9056 + }, + { + "epoch": 1.54, + "learning_rate": 1.0002762082480787e-05, + "loss": 0.42, + "step": 9057 + }, + { + "epoch": 1.54, + "learning_rate": 1.0000920694170669e-05, + "loss": 0.4525, + "step": 9058 + }, + { + "epoch": 1.54, + "learning_rate": 9.999079305829336e-06, + "loss": 0.4412, + "step": 9059 + }, + { + "epoch": 1.55, + "learning_rate": 9.997237917519218e-06, + "loss": 0.3929, + "step": 9060 + }, + { + "epoch": 1.55, + "learning_rate": 9.995396529302754e-06, + "loss": 0.4699, + "step": 9061 + }, + { + "epoch": 1.55, + "learning_rate": 9.993555141242382e-06, + "loss": 0.4276, + "step": 9062 + }, + { + "epoch": 1.55, + "learning_rate": 9.991713753400536e-06, + "loss": 0.3972, + "step": 9063 + }, + { + "epoch": 1.55, + "learning_rate": 9.989872365839653e-06, + "loss": 0.4297, + "step": 9064 + }, + { + "epoch": 1.55, + "learning_rate": 9.988030978622169e-06, + "loss": 0.4183, + "step": 9065 + }, + { + "epoch": 1.55, + "learning_rate": 9.986189591810518e-06, + "loss": 0.4313, + "step": 9066 + }, + { + "epoch": 1.55, + "learning_rate": 9.984348205467143e-06, + "loss": 0.3841, + "step": 9067 + }, + { + "epoch": 1.55, + "learning_rate": 9.982506819654474e-06, + "loss": 0.4506, + "step": 9068 + }, + { + "epoch": 1.55, + "learning_rate": 9.980665434434944e-06, + "loss": 0.4293, + "step": 9069 + }, + { + "epoch": 1.55, + "learning_rate": 9.978824049870995e-06, + "loss": 0.4121, + "step": 9070 + }, + { + "epoch": 1.55, + "learning_rate": 9.976982666025063e-06, + "loss": 0.4343, + "step": 9071 + }, + { + "epoch": 1.55, + "learning_rate": 9.97514128295958e-06, + "loss": 0.4236, + "step": 9072 + }, + { + "epoch": 1.55, + "learning_rate": 9.973299900736983e-06, + "loss": 0.4257, + "step": 9073 + }, + { + "epoch": 1.55, + "learning_rate": 9.971458519419715e-06, + "loss": 0.4029, + "step": 9074 + }, + { + "epoch": 1.55, + "learning_rate": 9.969617139070203e-06, + "loss": 0.3952, + "step": 9075 + }, + { + "epoch": 1.55, + "learning_rate": 9.967775759750884e-06, + "loss": 0.4232, + "step": 9076 + }, + { + "epoch": 1.55, + "learning_rate": 9.965934381524195e-06, + "loss": 0.4332, + "step": 9077 + }, + { + "epoch": 1.55, + "learning_rate": 9.964093004452577e-06, + "loss": 0.4333, + "step": 9078 + }, + { + "epoch": 1.55, + "learning_rate": 9.96225162859846e-06, + "loss": 0.4417, + "step": 9079 + }, + { + "epoch": 1.55, + "learning_rate": 9.960410254024279e-06, + "loss": 0.4141, + "step": 9080 + }, + { + "epoch": 1.55, + "learning_rate": 9.958568880792473e-06, + "loss": 0.4068, + "step": 9081 + }, + { + "epoch": 1.55, + "learning_rate": 9.956727508965482e-06, + "loss": 0.4118, + "step": 9082 + }, + { + "epoch": 1.55, + "learning_rate": 9.954886138605729e-06, + "loss": 0.4442, + "step": 9083 + }, + { + "epoch": 1.55, + "learning_rate": 9.953044769775658e-06, + "loss": 0.4132, + "step": 9084 + }, + { + "epoch": 1.55, + "learning_rate": 9.951203402537704e-06, + "loss": 0.4107, + "step": 9085 + }, + { + "epoch": 1.55, + "learning_rate": 9.949362036954304e-06, + "loss": 0.3733, + "step": 9086 + }, + { + "epoch": 1.55, + "learning_rate": 9.947520673087886e-06, + "loss": 0.3801, + "step": 9087 + }, + { + "epoch": 1.55, + "learning_rate": 9.945679311000892e-06, + "loss": 0.4242, + "step": 9088 + }, + { + "epoch": 1.55, + "learning_rate": 9.943837950755761e-06, + "loss": 0.4555, + "step": 9089 + }, + { + "epoch": 1.55, + "learning_rate": 9.941996592414918e-06, + "loss": 0.411, + "step": 9090 + }, + { + "epoch": 1.55, + "learning_rate": 9.940155236040803e-06, + "loss": 0.4615, + "step": 9091 + }, + { + "epoch": 1.55, + "learning_rate": 9.93831388169585e-06, + "loss": 0.4707, + "step": 9092 + }, + { + "epoch": 1.55, + "learning_rate": 9.936472529442496e-06, + "loss": 0.4276, + "step": 9093 + }, + { + "epoch": 1.55, + "learning_rate": 9.934631179343177e-06, + "loss": 0.4273, + "step": 9094 + }, + { + "epoch": 1.55, + "learning_rate": 9.932789831460322e-06, + "loss": 0.4215, + "step": 9095 + }, + { + "epoch": 1.55, + "learning_rate": 9.93094848585637e-06, + "loss": 0.3992, + "step": 9096 + }, + { + "epoch": 1.55, + "learning_rate": 9.929107142593762e-06, + "loss": 0.4267, + "step": 9097 + }, + { + "epoch": 1.55, + "learning_rate": 9.92726580173492e-06, + "loss": 0.389, + "step": 9098 + }, + { + "epoch": 1.55, + "learning_rate": 9.925424463342285e-06, + "loss": 0.4304, + "step": 9099 + }, + { + "epoch": 1.55, + "learning_rate": 9.92358312747829e-06, + "loss": 0.3983, + "step": 9100 + }, + { + "epoch": 1.55, + "learning_rate": 9.921741794205372e-06, + "loss": 0.4015, + "step": 9101 + }, + { + "epoch": 1.55, + "learning_rate": 9.919900463585965e-06, + "loss": 0.4172, + "step": 9102 + }, + { + "epoch": 1.55, + "learning_rate": 9.918059135682498e-06, + "loss": 0.4498, + "step": 9103 + }, + { + "epoch": 1.55, + "learning_rate": 9.916217810557415e-06, + "loss": 0.4376, + "step": 9104 + }, + { + "epoch": 1.55, + "learning_rate": 9.914376488273139e-06, + "loss": 0.3747, + "step": 9105 + }, + { + "epoch": 1.55, + "learning_rate": 9.91253516889211e-06, + "loss": 0.3937, + "step": 9106 + }, + { + "epoch": 1.55, + "learning_rate": 9.910693852476758e-06, + "loss": 0.3922, + "step": 9107 + }, + { + "epoch": 1.55, + "learning_rate": 9.908852539089521e-06, + "loss": 0.4013, + "step": 9108 + }, + { + "epoch": 1.55, + "learning_rate": 9.907011228792832e-06, + "loss": 0.4401, + "step": 9109 + }, + { + "epoch": 1.55, + "learning_rate": 9.905169921649125e-06, + "loss": 0.4629, + "step": 9110 + }, + { + "epoch": 1.55, + "learning_rate": 9.903328617720829e-06, + "loss": 0.4244, + "step": 9111 + }, + { + "epoch": 1.55, + "learning_rate": 9.901487317070385e-06, + "loss": 0.4384, + "step": 9112 + }, + { + "epoch": 1.55, + "learning_rate": 9.899646019760217e-06, + "loss": 0.4223, + "step": 9113 + }, + { + "epoch": 1.55, + "learning_rate": 9.897804725852763e-06, + "loss": 0.4052, + "step": 9114 + }, + { + "epoch": 1.55, + "learning_rate": 9.895963435410455e-06, + "loss": 0.4246, + "step": 9115 + }, + { + "epoch": 1.55, + "learning_rate": 9.894122148495725e-06, + "loss": 0.4399, + "step": 9116 + }, + { + "epoch": 1.55, + "learning_rate": 9.89228086517101e-06, + "loss": 0.3984, + "step": 9117 + }, + { + "epoch": 1.56, + "learning_rate": 9.890439585498739e-06, + "loss": 0.442, + "step": 9118 + }, + { + "epoch": 1.56, + "learning_rate": 9.888598309541347e-06, + "loss": 0.4349, + "step": 9119 + }, + { + "epoch": 1.56, + "learning_rate": 9.886757037361264e-06, + "loss": 0.4153, + "step": 9120 + }, + { + "epoch": 1.56, + "learning_rate": 9.884915769020918e-06, + "loss": 0.4133, + "step": 9121 + }, + { + "epoch": 1.56, + "learning_rate": 9.883074504582749e-06, + "loss": 0.4504, + "step": 9122 + }, + { + "epoch": 1.56, + "learning_rate": 9.881233244109184e-06, + "loss": 0.3889, + "step": 9123 + }, + { + "epoch": 1.56, + "learning_rate": 9.879391987662657e-06, + "loss": 0.3986, + "step": 9124 + }, + { + "epoch": 1.56, + "learning_rate": 9.877550735305598e-06, + "loss": 0.4018, + "step": 9125 + }, + { + "epoch": 1.56, + "learning_rate": 9.875709487100444e-06, + "loss": 0.4002, + "step": 9126 + }, + { + "epoch": 1.56, + "learning_rate": 9.87386824310962e-06, + "loss": 0.4162, + "step": 9127 + }, + { + "epoch": 1.56, + "learning_rate": 9.87202700339556e-06, + "loss": 0.4489, + "step": 9128 + }, + { + "epoch": 1.56, + "learning_rate": 9.870185768020694e-06, + "loss": 0.4381, + "step": 9129 + }, + { + "epoch": 1.56, + "learning_rate": 9.868344537047453e-06, + "loss": 0.4091, + "step": 9130 + }, + { + "epoch": 1.56, + "learning_rate": 9.866503310538266e-06, + "loss": 0.3928, + "step": 9131 + }, + { + "epoch": 1.56, + "learning_rate": 9.864662088555568e-06, + "loss": 0.413, + "step": 9132 + }, + { + "epoch": 1.56, + "learning_rate": 9.862820871161788e-06, + "loss": 0.4267, + "step": 9133 + }, + { + "epoch": 1.56, + "learning_rate": 9.860979658419357e-06, + "loss": 0.4577, + "step": 9134 + }, + { + "epoch": 1.56, + "learning_rate": 9.859138450390702e-06, + "loss": 0.412, + "step": 9135 + }, + { + "epoch": 1.56, + "learning_rate": 9.857297247138257e-06, + "loss": 0.4245, + "step": 9136 + }, + { + "epoch": 1.56, + "learning_rate": 9.855456048724448e-06, + "loss": 0.4037, + "step": 9137 + }, + { + "epoch": 1.56, + "learning_rate": 9.853614855211706e-06, + "loss": 0.4026, + "step": 9138 + }, + { + "epoch": 1.56, + "learning_rate": 9.851773666662462e-06, + "loss": 0.4341, + "step": 9139 + }, + { + "epoch": 1.56, + "learning_rate": 9.849932483139144e-06, + "loss": 0.4551, + "step": 9140 + }, + { + "epoch": 1.56, + "learning_rate": 9.848091304704186e-06, + "loss": 0.4141, + "step": 9141 + }, + { + "epoch": 1.56, + "learning_rate": 9.84625013142001e-06, + "loss": 0.4178, + "step": 9142 + }, + { + "epoch": 1.56, + "learning_rate": 9.844408963349047e-06, + "loss": 0.434, + "step": 9143 + }, + { + "epoch": 1.56, + "learning_rate": 9.842567800553728e-06, + "loss": 0.4055, + "step": 9144 + }, + { + "epoch": 1.56, + "learning_rate": 9.840726643096479e-06, + "loss": 0.4064, + "step": 9145 + }, + { + "epoch": 1.56, + "learning_rate": 9.838885491039728e-06, + "loss": 0.3867, + "step": 9146 + }, + { + "epoch": 1.56, + "learning_rate": 9.837044344445905e-06, + "loss": 0.4074, + "step": 9147 + }, + { + "epoch": 1.56, + "learning_rate": 9.835203203377437e-06, + "loss": 0.4498, + "step": 9148 + }, + { + "epoch": 1.56, + "learning_rate": 9.833362067896757e-06, + "loss": 0.41, + "step": 9149 + }, + { + "epoch": 1.56, + "learning_rate": 9.831520938066284e-06, + "loss": 0.4287, + "step": 9150 + }, + { + "epoch": 1.56, + "learning_rate": 9.82967981394845e-06, + "loss": 0.4301, + "step": 9151 + }, + { + "epoch": 1.56, + "learning_rate": 9.827838695605679e-06, + "loss": 0.4028, + "step": 9152 + }, + { + "epoch": 1.56, + "learning_rate": 9.825997583100402e-06, + "loss": 0.4293, + "step": 9153 + }, + { + "epoch": 1.56, + "learning_rate": 9.824156476495043e-06, + "loss": 0.4146, + "step": 9154 + }, + { + "epoch": 1.56, + "learning_rate": 9.822315375852029e-06, + "loss": 0.4397, + "step": 9155 + }, + { + "epoch": 1.56, + "learning_rate": 9.82047428123379e-06, + "loss": 0.4385, + "step": 9156 + }, + { + "epoch": 1.56, + "learning_rate": 9.81863319270275e-06, + "loss": 0.407, + "step": 9157 + }, + { + "epoch": 1.56, + "learning_rate": 9.816792110321332e-06, + "loss": 0.4235, + "step": 9158 + }, + { + "epoch": 1.56, + "learning_rate": 9.814951034151965e-06, + "loss": 0.3776, + "step": 9159 + }, + { + "epoch": 1.56, + "learning_rate": 9.813109964257073e-06, + "loss": 0.4079, + "step": 9160 + }, + { + "epoch": 1.56, + "learning_rate": 9.81126890069908e-06, + "loss": 0.3837, + "step": 9161 + }, + { + "epoch": 1.56, + "learning_rate": 9.809427843540415e-06, + "loss": 0.4104, + "step": 9162 + }, + { + "epoch": 1.56, + "learning_rate": 9.807586792843502e-06, + "loss": 0.4204, + "step": 9163 + }, + { + "epoch": 1.56, + "learning_rate": 9.805745748670768e-06, + "loss": 0.4351, + "step": 9164 + }, + { + "epoch": 1.56, + "learning_rate": 9.80390471108463e-06, + "loss": 0.4086, + "step": 9165 + }, + { + "epoch": 1.56, + "learning_rate": 9.802063680147515e-06, + "loss": 0.427, + "step": 9166 + }, + { + "epoch": 1.56, + "learning_rate": 9.800222655921853e-06, + "loss": 0.4176, + "step": 9167 + }, + { + "epoch": 1.56, + "learning_rate": 9.79838163847006e-06, + "loss": 0.4431, + "step": 9168 + }, + { + "epoch": 1.56, + "learning_rate": 9.796540627854564e-06, + "loss": 0.4139, + "step": 9169 + }, + { + "epoch": 1.56, + "learning_rate": 9.794699624137786e-06, + "loss": 0.4321, + "step": 9170 + }, + { + "epoch": 1.56, + "learning_rate": 9.792858627382153e-06, + "loss": 0.419, + "step": 9171 + }, + { + "epoch": 1.56, + "learning_rate": 9.791017637650087e-06, + "loss": 0.3901, + "step": 9172 + }, + { + "epoch": 1.56, + "learning_rate": 9.789176655004007e-06, + "loss": 0.4203, + "step": 9173 + }, + { + "epoch": 1.56, + "learning_rate": 9.787335679506335e-06, + "loss": 0.4349, + "step": 9174 + }, + { + "epoch": 1.56, + "learning_rate": 9.785494711219498e-06, + "loss": 0.4148, + "step": 9175 + }, + { + "epoch": 1.56, + "learning_rate": 9.783653750205916e-06, + "loss": 0.4255, + "step": 9176 + }, + { + "epoch": 1.57, + "learning_rate": 9.781812796528008e-06, + "loss": 0.448, + "step": 9177 + }, + { + "epoch": 1.57, + "learning_rate": 9.779971850248198e-06, + "loss": 0.4207, + "step": 9178 + }, + { + "epoch": 1.57, + "learning_rate": 9.77813091142891e-06, + "loss": 0.4246, + "step": 9179 + }, + { + "epoch": 1.57, + "learning_rate": 9.776289980132559e-06, + "loss": 0.4364, + "step": 9180 + }, + { + "epoch": 1.57, + "learning_rate": 9.774449056421567e-06, + "loss": 0.424, + "step": 9181 + }, + { + "epoch": 1.57, + "learning_rate": 9.772608140358356e-06, + "loss": 0.4272, + "step": 9182 + }, + { + "epoch": 1.57, + "learning_rate": 9.77076723200535e-06, + "loss": 0.4106, + "step": 9183 + }, + { + "epoch": 1.57, + "learning_rate": 9.76892633142496e-06, + "loss": 0.4211, + "step": 9184 + }, + { + "epoch": 1.57, + "learning_rate": 9.767085438679611e-06, + "loss": 0.4084, + "step": 9185 + }, + { + "epoch": 1.57, + "learning_rate": 9.765244553831721e-06, + "loss": 0.3937, + "step": 9186 + }, + { + "epoch": 1.57, + "learning_rate": 9.763403676943716e-06, + "loss": 0.4067, + "step": 9187 + }, + { + "epoch": 1.57, + "learning_rate": 9.761562808078003e-06, + "loss": 0.3949, + "step": 9188 + }, + { + "epoch": 1.57, + "learning_rate": 9.759721947297006e-06, + "loss": 0.409, + "step": 9189 + }, + { + "epoch": 1.57, + "learning_rate": 9.757881094663147e-06, + "loss": 0.4265, + "step": 9190 + }, + { + "epoch": 1.57, + "learning_rate": 9.756040250238836e-06, + "loss": 0.4372, + "step": 9191 + }, + { + "epoch": 1.57, + "learning_rate": 9.754199414086498e-06, + "loss": 0.4103, + "step": 9192 + }, + { + "epoch": 1.57, + "learning_rate": 9.752358586268544e-06, + "loss": 0.4232, + "step": 9193 + }, + { + "epoch": 1.57, + "learning_rate": 9.750517766847402e-06, + "loss": 0.4387, + "step": 9194 + }, + { + "epoch": 1.57, + "learning_rate": 9.748676955885476e-06, + "loss": 0.3948, + "step": 9195 + }, + { + "epoch": 1.57, + "learning_rate": 9.746836153445188e-06, + "loss": 0.4408, + "step": 9196 + }, + { + "epoch": 1.57, + "learning_rate": 9.744995359588956e-06, + "loss": 0.4009, + "step": 9197 + }, + { + "epoch": 1.57, + "learning_rate": 9.743154574379194e-06, + "loss": 0.426, + "step": 9198 + }, + { + "epoch": 1.57, + "learning_rate": 9.741313797878317e-06, + "loss": 0.4402, + "step": 9199 + }, + { + "epoch": 1.57, + "learning_rate": 9.739473030148744e-06, + "loss": 0.4084, + "step": 9200 + }, + { + "epoch": 1.57, + "learning_rate": 9.737632271252885e-06, + "loss": 0.4433, + "step": 9201 + }, + { + "epoch": 1.57, + "learning_rate": 9.735791521253162e-06, + "loss": 0.4079, + "step": 9202 + }, + { + "epoch": 1.57, + "learning_rate": 9.733950780211982e-06, + "loss": 0.44, + "step": 9203 + }, + { + "epoch": 1.57, + "learning_rate": 9.732110048191762e-06, + "loss": 0.4141, + "step": 9204 + }, + { + "epoch": 1.57, + "learning_rate": 9.730269325254915e-06, + "loss": 0.4059, + "step": 9205 + }, + { + "epoch": 1.57, + "learning_rate": 9.728428611463859e-06, + "loss": 0.4323, + "step": 9206 + }, + { + "epoch": 1.57, + "learning_rate": 9.726587906881e-06, + "loss": 0.4252, + "step": 9207 + }, + { + "epoch": 1.57, + "learning_rate": 9.724747211568758e-06, + "loss": 0.4323, + "step": 9208 + }, + { + "epoch": 1.57, + "learning_rate": 9.722906525589545e-06, + "loss": 0.432, + "step": 9209 + }, + { + "epoch": 1.57, + "learning_rate": 9.721065849005767e-06, + "loss": 0.3927, + "step": 9210 + }, + { + "epoch": 1.57, + "learning_rate": 9.71922518187984e-06, + "loss": 0.409, + "step": 9211 + }, + { + "epoch": 1.57, + "learning_rate": 9.717384524274174e-06, + "loss": 0.4249, + "step": 9212 + }, + { + "epoch": 1.57, + "learning_rate": 9.715543876251184e-06, + "loss": 0.4258, + "step": 9213 + }, + { + "epoch": 1.57, + "learning_rate": 9.713703237873282e-06, + "loss": 0.4276, + "step": 9214 + }, + { + "epoch": 1.57, + "learning_rate": 9.711862609202872e-06, + "loss": 0.4349, + "step": 9215 + }, + { + "epoch": 1.57, + "learning_rate": 9.71002199030237e-06, + "loss": 0.3821, + "step": 9216 + }, + { + "epoch": 1.57, + "learning_rate": 9.708181381234187e-06, + "loss": 0.4145, + "step": 9217 + }, + { + "epoch": 1.57, + "learning_rate": 9.706340782060728e-06, + "loss": 0.4299, + "step": 9218 + }, + { + "epoch": 1.57, + "learning_rate": 9.704500192844401e-06, + "loss": 0.4146, + "step": 9219 + }, + { + "epoch": 1.57, + "learning_rate": 9.702659613647622e-06, + "loss": 0.403, + "step": 9220 + }, + { + "epoch": 1.57, + "learning_rate": 9.700819044532795e-06, + "loss": 0.4316, + "step": 9221 + }, + { + "epoch": 1.57, + "learning_rate": 9.69897848556233e-06, + "loss": 0.4191, + "step": 9222 + }, + { + "epoch": 1.57, + "learning_rate": 9.697137936798635e-06, + "loss": 0.437, + "step": 9223 + }, + { + "epoch": 1.57, + "learning_rate": 9.69529739830412e-06, + "loss": 0.4483, + "step": 9224 + }, + { + "epoch": 1.57, + "learning_rate": 9.693456870141186e-06, + "loss": 0.4226, + "step": 9225 + }, + { + "epoch": 1.57, + "learning_rate": 9.691616352372244e-06, + "loss": 0.4394, + "step": 9226 + }, + { + "epoch": 1.57, + "learning_rate": 9.689775845059701e-06, + "loss": 0.4662, + "step": 9227 + }, + { + "epoch": 1.57, + "learning_rate": 9.687935348265961e-06, + "loss": 0.4214, + "step": 9228 + }, + { + "epoch": 1.57, + "learning_rate": 9.686094862053432e-06, + "loss": 0.453, + "step": 9229 + }, + { + "epoch": 1.57, + "learning_rate": 9.684254386484522e-06, + "loss": 0.4091, + "step": 9230 + }, + { + "epoch": 1.57, + "learning_rate": 9.682413921621631e-06, + "loss": 0.4425, + "step": 9231 + }, + { + "epoch": 1.57, + "learning_rate": 9.68057346752717e-06, + "loss": 0.4222, + "step": 9232 + }, + { + "epoch": 1.57, + "learning_rate": 9.678733024263536e-06, + "loss": 0.4316, + "step": 9233 + }, + { + "epoch": 1.57, + "learning_rate": 9.676892591893136e-06, + "loss": 0.4542, + "step": 9234 + }, + { + "epoch": 1.57, + "learning_rate": 9.675052170478374e-06, + "loss": 0.4523, + "step": 9235 + }, + { + "epoch": 1.58, + "learning_rate": 9.673211760081655e-06, + "loss": 0.3879, + "step": 9236 + }, + { + "epoch": 1.58, + "learning_rate": 9.671371360765382e-06, + "loss": 0.4181, + "step": 9237 + }, + { + "epoch": 1.58, + "learning_rate": 9.669530972591956e-06, + "loss": 0.4412, + "step": 9238 + }, + { + "epoch": 1.58, + "learning_rate": 9.667690595623782e-06, + "loss": 0.4054, + "step": 9239 + }, + { + "epoch": 1.58, + "learning_rate": 9.665850229923258e-06, + "loss": 0.4168, + "step": 9240 + }, + { + "epoch": 1.58, + "learning_rate": 9.664009875552786e-06, + "loss": 0.4323, + "step": 9241 + }, + { + "epoch": 1.58, + "learning_rate": 9.662169532574767e-06, + "loss": 0.4245, + "step": 9242 + }, + { + "epoch": 1.58, + "learning_rate": 9.660329201051604e-06, + "loss": 0.4193, + "step": 9243 + }, + { + "epoch": 1.58, + "learning_rate": 9.658488881045695e-06, + "loss": 0.4039, + "step": 9244 + }, + { + "epoch": 1.58, + "learning_rate": 9.656648572619444e-06, + "loss": 0.4422, + "step": 9245 + }, + { + "epoch": 1.58, + "learning_rate": 9.654808275835246e-06, + "loss": 0.4093, + "step": 9246 + }, + { + "epoch": 1.58, + "learning_rate": 9.652967990755504e-06, + "loss": 0.4155, + "step": 9247 + }, + { + "epoch": 1.58, + "learning_rate": 9.651127717442614e-06, + "loss": 0.4147, + "step": 9248 + }, + { + "epoch": 1.58, + "learning_rate": 9.649287455958973e-06, + "loss": 0.4388, + "step": 9249 + }, + { + "epoch": 1.58, + "learning_rate": 9.647447206366981e-06, + "loss": 0.4079, + "step": 9250 + }, + { + "epoch": 1.58, + "learning_rate": 9.645606968729035e-06, + "loss": 0.4451, + "step": 9251 + }, + { + "epoch": 1.58, + "learning_rate": 9.643766743107533e-06, + "loss": 0.4351, + "step": 9252 + }, + { + "epoch": 1.58, + "learning_rate": 9.641926529564873e-06, + "loss": 0.4186, + "step": 9253 + }, + { + "epoch": 1.58, + "learning_rate": 9.640086328163452e-06, + "loss": 0.4559, + "step": 9254 + }, + { + "epoch": 1.58, + "learning_rate": 9.638246138965661e-06, + "loss": 0.4552, + "step": 9255 + }, + { + "epoch": 1.58, + "learning_rate": 9.636405962033896e-06, + "loss": 0.4205, + "step": 9256 + }, + { + "epoch": 1.58, + "learning_rate": 9.634565797430556e-06, + "loss": 0.4407, + "step": 9257 + }, + { + "epoch": 1.58, + "learning_rate": 9.63272564521803e-06, + "loss": 0.4315, + "step": 9258 + }, + { + "epoch": 1.58, + "learning_rate": 9.630885505458719e-06, + "loss": 0.4315, + "step": 9259 + }, + { + "epoch": 1.58, + "learning_rate": 9.629045378215014e-06, + "loss": 0.4582, + "step": 9260 + }, + { + "epoch": 1.58, + "learning_rate": 9.62720526354931e-06, + "loss": 0.4085, + "step": 9261 + }, + { + "epoch": 1.58, + "learning_rate": 9.625365161523998e-06, + "loss": 0.4248, + "step": 9262 + }, + { + "epoch": 1.58, + "learning_rate": 9.62352507220147e-06, + "loss": 0.4554, + "step": 9263 + }, + { + "epoch": 1.58, + "learning_rate": 9.621684995644118e-06, + "loss": 0.4127, + "step": 9264 + }, + { + "epoch": 1.58, + "learning_rate": 9.619844931914333e-06, + "loss": 0.4173, + "step": 9265 + }, + { + "epoch": 1.58, + "learning_rate": 9.61800488107451e-06, + "loss": 0.4328, + "step": 9266 + }, + { + "epoch": 1.58, + "learning_rate": 9.616164843187036e-06, + "loss": 0.4022, + "step": 9267 + }, + { + "epoch": 1.58, + "learning_rate": 9.614324818314303e-06, + "loss": 0.4382, + "step": 9268 + }, + { + "epoch": 1.58, + "learning_rate": 9.612484806518704e-06, + "loss": 0.4354, + "step": 9269 + }, + { + "epoch": 1.58, + "learning_rate": 9.610644807862625e-06, + "loss": 0.4278, + "step": 9270 + }, + { + "epoch": 1.58, + "learning_rate": 9.608804822408456e-06, + "loss": 0.4269, + "step": 9271 + }, + { + "epoch": 1.58, + "learning_rate": 9.60696485021858e-06, + "loss": 0.4374, + "step": 9272 + }, + { + "epoch": 1.58, + "learning_rate": 9.605124891355393e-06, + "loss": 0.4161, + "step": 9273 + }, + { + "epoch": 1.58, + "learning_rate": 9.60328494588128e-06, + "loss": 0.3887, + "step": 9274 + }, + { + "epoch": 1.58, + "learning_rate": 9.601445013858627e-06, + "loss": 0.4249, + "step": 9275 + }, + { + "epoch": 1.58, + "learning_rate": 9.59960509534982e-06, + "loss": 0.4275, + "step": 9276 + }, + { + "epoch": 1.58, + "learning_rate": 9.597765190417256e-06, + "loss": 0.3952, + "step": 9277 + }, + { + "epoch": 1.58, + "learning_rate": 9.595925299123305e-06, + "loss": 0.3966, + "step": 9278 + }, + { + "epoch": 1.58, + "learning_rate": 9.594085421530362e-06, + "loss": 0.4291, + "step": 9279 + }, + { + "epoch": 1.58, + "learning_rate": 9.592245557700805e-06, + "loss": 0.4073, + "step": 9280 + }, + { + "epoch": 1.58, + "learning_rate": 9.590405707697026e-06, + "loss": 0.4219, + "step": 9281 + }, + { + "epoch": 1.58, + "learning_rate": 9.588565871581405e-06, + "loss": 0.4261, + "step": 9282 + }, + { + "epoch": 1.58, + "learning_rate": 9.586726049416324e-06, + "loss": 0.4211, + "step": 9283 + }, + { + "epoch": 1.58, + "learning_rate": 9.584886241264174e-06, + "loss": 0.4091, + "step": 9284 + }, + { + "epoch": 1.58, + "learning_rate": 9.583046447187332e-06, + "loss": 0.4423, + "step": 9285 + }, + { + "epoch": 1.58, + "learning_rate": 9.581206667248175e-06, + "loss": 0.4005, + "step": 9286 + }, + { + "epoch": 1.58, + "learning_rate": 9.579366901509092e-06, + "loss": 0.4195, + "step": 9287 + }, + { + "epoch": 1.58, + "learning_rate": 9.577527150032462e-06, + "loss": 0.4634, + "step": 9288 + }, + { + "epoch": 1.58, + "learning_rate": 9.575687412880664e-06, + "loss": 0.4073, + "step": 9289 + }, + { + "epoch": 1.58, + "learning_rate": 9.57384769011608e-06, + "loss": 0.4274, + "step": 9290 + }, + { + "epoch": 1.58, + "learning_rate": 9.57200798180109e-06, + "loss": 0.4609, + "step": 9291 + }, + { + "epoch": 1.58, + "learning_rate": 9.570168287998076e-06, + "loss": 0.3927, + "step": 9292 + }, + { + "epoch": 1.58, + "learning_rate": 9.568328608769409e-06, + "loss": 0.433, + "step": 9293 + }, + { + "epoch": 1.59, + "learning_rate": 9.566488944177472e-06, + "loss": 0.4054, + "step": 9294 + }, + { + "epoch": 1.59, + "learning_rate": 9.564649294284644e-06, + "loss": 0.4685, + "step": 9295 + }, + { + "epoch": 1.59, + "learning_rate": 9.562809659153298e-06, + "loss": 0.4349, + "step": 9296 + }, + { + "epoch": 1.59, + "learning_rate": 9.560970038845814e-06, + "loss": 0.3931, + "step": 9297 + }, + { + "epoch": 1.59, + "learning_rate": 9.559130433424565e-06, + "loss": 0.4293, + "step": 9298 + }, + { + "epoch": 1.59, + "learning_rate": 9.557290842951932e-06, + "loss": 0.4056, + "step": 9299 + }, + { + "epoch": 1.59, + "learning_rate": 9.55545126749029e-06, + "loss": 0.4422, + "step": 9300 + }, + { + "epoch": 1.59, + "learning_rate": 9.553611707102008e-06, + "loss": 0.4503, + "step": 9301 + }, + { + "epoch": 1.59, + "learning_rate": 9.551772161849464e-06, + "loss": 0.4124, + "step": 9302 + }, + { + "epoch": 1.59, + "learning_rate": 9.549932631795027e-06, + "loss": 0.4197, + "step": 9303 + }, + { + "epoch": 1.59, + "learning_rate": 9.548093117001076e-06, + "loss": 0.4203, + "step": 9304 + }, + { + "epoch": 1.59, + "learning_rate": 9.54625361752998e-06, + "loss": 0.3981, + "step": 9305 + }, + { + "epoch": 1.59, + "learning_rate": 9.544414133444115e-06, + "loss": 0.426, + "step": 9306 + }, + { + "epoch": 1.59, + "learning_rate": 9.542574664805851e-06, + "loss": 0.4293, + "step": 9307 + }, + { + "epoch": 1.59, + "learning_rate": 9.540735211677556e-06, + "loss": 0.4079, + "step": 9308 + }, + { + "epoch": 1.59, + "learning_rate": 9.538895774121604e-06, + "loss": 0.4139, + "step": 9309 + }, + { + "epoch": 1.59, + "learning_rate": 9.537056352200364e-06, + "loss": 0.4016, + "step": 9310 + }, + { + "epoch": 1.59, + "learning_rate": 9.535216945976203e-06, + "loss": 0.4244, + "step": 9311 + }, + { + "epoch": 1.59, + "learning_rate": 9.533377555511494e-06, + "loss": 0.4376, + "step": 9312 + }, + { + "epoch": 1.59, + "learning_rate": 9.531538180868603e-06, + "loss": 0.4043, + "step": 9313 + }, + { + "epoch": 1.59, + "learning_rate": 9.529698822109898e-06, + "loss": 0.4182, + "step": 9314 + }, + { + "epoch": 1.59, + "learning_rate": 9.527859479297752e-06, + "loss": 0.4421, + "step": 9315 + }, + { + "epoch": 1.59, + "learning_rate": 9.526020152494521e-06, + "loss": 0.385, + "step": 9316 + }, + { + "epoch": 1.59, + "learning_rate": 9.524180841762577e-06, + "loss": 0.4204, + "step": 9317 + }, + { + "epoch": 1.59, + "learning_rate": 9.52234154716429e-06, + "loss": 0.4541, + "step": 9318 + }, + { + "epoch": 1.59, + "learning_rate": 9.520502268762016e-06, + "loss": 0.4574, + "step": 9319 + }, + { + "epoch": 1.59, + "learning_rate": 9.518663006618126e-06, + "loss": 0.4519, + "step": 9320 + }, + { + "epoch": 1.59, + "learning_rate": 9.516823760794983e-06, + "loss": 0.4414, + "step": 9321 + }, + { + "epoch": 1.59, + "learning_rate": 9.514984531354953e-06, + "loss": 0.4236, + "step": 9322 + }, + { + "epoch": 1.59, + "learning_rate": 9.513145318360393e-06, + "loss": 0.3996, + "step": 9323 + }, + { + "epoch": 1.59, + "learning_rate": 9.511306121873667e-06, + "loss": 0.4064, + "step": 9324 + }, + { + "epoch": 1.59, + "learning_rate": 9.50946694195714e-06, + "loss": 0.43, + "step": 9325 + }, + { + "epoch": 1.59, + "learning_rate": 9.507627778673171e-06, + "loss": 0.4196, + "step": 9326 + }, + { + "epoch": 1.59, + "learning_rate": 9.50578863208412e-06, + "loss": 0.4657, + "step": 9327 + }, + { + "epoch": 1.59, + "learning_rate": 9.503949502252348e-06, + "loss": 0.4019, + "step": 9328 + }, + { + "epoch": 1.59, + "learning_rate": 9.502110389240217e-06, + "loss": 0.4315, + "step": 9329 + }, + { + "epoch": 1.59, + "learning_rate": 9.500271293110087e-06, + "loss": 0.4342, + "step": 9330 + }, + { + "epoch": 1.59, + "learning_rate": 9.498432213924308e-06, + "loss": 0.4442, + "step": 9331 + }, + { + "epoch": 1.59, + "learning_rate": 9.496593151745243e-06, + "loss": 0.4373, + "step": 9332 + }, + { + "epoch": 1.59, + "learning_rate": 9.494754106635251e-06, + "loss": 0.4466, + "step": 9333 + }, + { + "epoch": 1.59, + "learning_rate": 9.492915078656689e-06, + "loss": 0.4114, + "step": 9334 + }, + { + "epoch": 1.59, + "learning_rate": 9.491076067871909e-06, + "loss": 0.4378, + "step": 9335 + }, + { + "epoch": 1.59, + "learning_rate": 9.489237074343269e-06, + "loss": 0.4202, + "step": 9336 + }, + { + "epoch": 1.59, + "learning_rate": 9.487398098133129e-06, + "loss": 0.4364, + "step": 9337 + }, + { + "epoch": 1.59, + "learning_rate": 9.485559139303832e-06, + "loss": 0.4477, + "step": 9338 + }, + { + "epoch": 1.59, + "learning_rate": 9.483720197917739e-06, + "loss": 0.4277, + "step": 9339 + }, + { + "epoch": 1.59, + "learning_rate": 9.481881274037203e-06, + "loss": 0.4095, + "step": 9340 + }, + { + "epoch": 1.59, + "learning_rate": 9.480042367724574e-06, + "loss": 0.4164, + "step": 9341 + }, + { + "epoch": 1.59, + "learning_rate": 9.478203479042209e-06, + "loss": 0.4193, + "step": 9342 + }, + { + "epoch": 1.59, + "learning_rate": 9.476364608052453e-06, + "loss": 0.3847, + "step": 9343 + }, + { + "epoch": 1.59, + "learning_rate": 9.474525754817662e-06, + "loss": 0.4661, + "step": 9344 + }, + { + "epoch": 1.59, + "learning_rate": 9.472686919400186e-06, + "loss": 0.4267, + "step": 9345 + }, + { + "epoch": 1.59, + "learning_rate": 9.470848101862372e-06, + "loss": 0.4342, + "step": 9346 + }, + { + "epoch": 1.59, + "learning_rate": 9.469009302266567e-06, + "loss": 0.3902, + "step": 9347 + }, + { + "epoch": 1.59, + "learning_rate": 9.467170520675121e-06, + "loss": 0.4174, + "step": 9348 + }, + { + "epoch": 1.59, + "learning_rate": 9.465331757150387e-06, + "loss": 0.4285, + "step": 9349 + }, + { + "epoch": 1.59, + "learning_rate": 9.463493011754706e-06, + "loss": 0.4155, + "step": 9350 + }, + { + "epoch": 1.59, + "learning_rate": 9.461654284550426e-06, + "loss": 0.4471, + "step": 9351 + }, + { + "epoch": 1.59, + "learning_rate": 9.459815575599897e-06, + "loss": 0.421, + "step": 9352 + }, + { + "epoch": 1.6, + "learning_rate": 9.457976884965459e-06, + "loss": 0.4161, + "step": 9353 + }, + { + "epoch": 1.6, + "learning_rate": 9.456138212709456e-06, + "loss": 0.4115, + "step": 9354 + }, + { + "epoch": 1.6, + "learning_rate": 9.454299558894235e-06, + "loss": 0.4584, + "step": 9355 + }, + { + "epoch": 1.6, + "learning_rate": 9.452460923582139e-06, + "loss": 0.4415, + "step": 9356 + }, + { + "epoch": 1.6, + "learning_rate": 9.450622306835516e-06, + "loss": 0.4119, + "step": 9357 + }, + { + "epoch": 1.6, + "learning_rate": 9.448783708716697e-06, + "loss": 0.407, + "step": 9358 + }, + { + "epoch": 1.6, + "learning_rate": 9.446945129288032e-06, + "loss": 0.3975, + "step": 9359 + }, + { + "epoch": 1.6, + "learning_rate": 9.44510656861186e-06, + "loss": 0.4171, + "step": 9360 + }, + { + "epoch": 1.6, + "learning_rate": 9.443268026750521e-06, + "loss": 0.4367, + "step": 9361 + }, + { + "epoch": 1.6, + "learning_rate": 9.441429503766352e-06, + "loss": 0.4108, + "step": 9362 + }, + { + "epoch": 1.6, + "learning_rate": 9.439590999721694e-06, + "loss": 0.4113, + "step": 9363 + }, + { + "epoch": 1.6, + "learning_rate": 9.437752514678888e-06, + "loss": 0.3949, + "step": 9364 + }, + { + "epoch": 1.6, + "learning_rate": 9.43591404870027e-06, + "loss": 0.3984, + "step": 9365 + }, + { + "epoch": 1.6, + "learning_rate": 9.434075601848175e-06, + "loss": 0.4283, + "step": 9366 + }, + { + "epoch": 1.6, + "learning_rate": 9.432237174184943e-06, + "loss": 0.4366, + "step": 9367 + }, + { + "epoch": 1.6, + "learning_rate": 9.430398765772907e-06, + "loss": 0.4054, + "step": 9368 + }, + { + "epoch": 1.6, + "learning_rate": 9.4285603766744e-06, + "loss": 0.4102, + "step": 9369 + }, + { + "epoch": 1.6, + "learning_rate": 9.426722006951761e-06, + "loss": 0.4224, + "step": 9370 + }, + { + "epoch": 1.6, + "learning_rate": 9.424883656667322e-06, + "loss": 0.4417, + "step": 9371 + }, + { + "epoch": 1.6, + "learning_rate": 9.423045325883415e-06, + "loss": 0.4266, + "step": 9372 + }, + { + "epoch": 1.6, + "learning_rate": 9.421207014662377e-06, + "loss": 0.4203, + "step": 9373 + }, + { + "epoch": 1.6, + "learning_rate": 9.419368723066534e-06, + "loss": 0.427, + "step": 9374 + }, + { + "epoch": 1.6, + "learning_rate": 9.417530451158225e-06, + "loss": 0.4056, + "step": 9375 + }, + { + "epoch": 1.6, + "learning_rate": 9.41569219899977e-06, + "loss": 0.3891, + "step": 9376 + }, + { + "epoch": 1.6, + "learning_rate": 9.413853966653503e-06, + "loss": 0.4121, + "step": 9377 + }, + { + "epoch": 1.6, + "learning_rate": 9.412015754181756e-06, + "loss": 0.4245, + "step": 9378 + }, + { + "epoch": 1.6, + "learning_rate": 9.410177561646855e-06, + "loss": 0.3914, + "step": 9379 + }, + { + "epoch": 1.6, + "learning_rate": 9.408339389111128e-06, + "loss": 0.4326, + "step": 9380 + }, + { + "epoch": 1.6, + "learning_rate": 9.406501236636904e-06, + "loss": 0.3943, + "step": 9381 + }, + { + "epoch": 1.6, + "learning_rate": 9.40466310428651e-06, + "loss": 0.4535, + "step": 9382 + }, + { + "epoch": 1.6, + "learning_rate": 9.402824992122268e-06, + "loss": 0.4124, + "step": 9383 + }, + { + "epoch": 1.6, + "learning_rate": 9.400986900206503e-06, + "loss": 0.3994, + "step": 9384 + }, + { + "epoch": 1.6, + "learning_rate": 9.399148828601542e-06, + "loss": 0.4236, + "step": 9385 + }, + { + "epoch": 1.6, + "learning_rate": 9.397310777369708e-06, + "loss": 0.4193, + "step": 9386 + }, + { + "epoch": 1.6, + "learning_rate": 9.395472746573324e-06, + "loss": 0.4374, + "step": 9387 + }, + { + "epoch": 1.6, + "learning_rate": 9.393634736274713e-06, + "loss": 0.4405, + "step": 9388 + }, + { + "epoch": 1.6, + "learning_rate": 9.391796746536194e-06, + "loss": 0.4079, + "step": 9389 + }, + { + "epoch": 1.6, + "learning_rate": 9.389958777420093e-06, + "loss": 0.4436, + "step": 9390 + }, + { + "epoch": 1.6, + "learning_rate": 9.388120828988727e-06, + "loss": 0.4071, + "step": 9391 + }, + { + "epoch": 1.6, + "learning_rate": 9.386282901304413e-06, + "loss": 0.3928, + "step": 9392 + }, + { + "epoch": 1.6, + "learning_rate": 9.384444994429472e-06, + "loss": 0.4346, + "step": 9393 + }, + { + "epoch": 1.6, + "learning_rate": 9.38260710842622e-06, + "loss": 0.4131, + "step": 9394 + }, + { + "epoch": 1.6, + "learning_rate": 9.38076924335698e-06, + "loss": 0.4267, + "step": 9395 + }, + { + "epoch": 1.6, + "learning_rate": 9.378931399284068e-06, + "loss": 0.4438, + "step": 9396 + }, + { + "epoch": 1.6, + "learning_rate": 9.377093576269797e-06, + "loss": 0.4521, + "step": 9397 + }, + { + "epoch": 1.6, + "learning_rate": 9.375255774376479e-06, + "loss": 0.4047, + "step": 9398 + }, + { + "epoch": 1.6, + "learning_rate": 9.373417993666436e-06, + "loss": 0.4038, + "step": 9399 + }, + { + "epoch": 1.6, + "learning_rate": 9.371580234201976e-06, + "loss": 0.42, + "step": 9400 + }, + { + "epoch": 1.6, + "learning_rate": 9.369742496045414e-06, + "loss": 0.4438, + "step": 9401 + }, + { + "epoch": 1.6, + "learning_rate": 9.367904779259063e-06, + "loss": 0.3813, + "step": 9402 + }, + { + "epoch": 1.6, + "learning_rate": 9.366067083905232e-06, + "loss": 0.4397, + "step": 9403 + }, + { + "epoch": 1.6, + "learning_rate": 9.36422941004624e-06, + "loss": 0.4275, + "step": 9404 + }, + { + "epoch": 1.6, + "learning_rate": 9.36239175774439e-06, + "loss": 0.4215, + "step": 9405 + }, + { + "epoch": 1.6, + "learning_rate": 9.360554127061992e-06, + "loss": 0.452, + "step": 9406 + }, + { + "epoch": 1.6, + "learning_rate": 9.358716518061356e-06, + "loss": 0.423, + "step": 9407 + }, + { + "epoch": 1.6, + "learning_rate": 9.356878930804785e-06, + "loss": 0.3798, + "step": 9408 + }, + { + "epoch": 1.6, + "learning_rate": 9.355041365354595e-06, + "loss": 0.4353, + "step": 9409 + }, + { + "epoch": 1.6, + "learning_rate": 9.353203821773089e-06, + "loss": 0.4088, + "step": 9410 + }, + { + "epoch": 1.61, + "learning_rate": 9.351366300122569e-06, + "loss": 0.452, + "step": 9411 + }, + { + "epoch": 1.61, + "learning_rate": 9.34952880046535e-06, + "loss": 0.4079, + "step": 9412 + }, + { + "epoch": 1.61, + "learning_rate": 9.347691322863724e-06, + "loss": 0.4256, + "step": 9413 + }, + { + "epoch": 1.61, + "learning_rate": 9.345853867380003e-06, + "loss": 0.4258, + "step": 9414 + }, + { + "epoch": 1.61, + "learning_rate": 9.344016434076485e-06, + "loss": 0.4268, + "step": 9415 + }, + { + "epoch": 1.61, + "learning_rate": 9.342179023015475e-06, + "loss": 0.4564, + "step": 9416 + }, + { + "epoch": 1.61, + "learning_rate": 9.340341634259274e-06, + "loss": 0.395, + "step": 9417 + }, + { + "epoch": 1.61, + "learning_rate": 9.33850426787018e-06, + "loss": 0.472, + "step": 9418 + }, + { + "epoch": 1.61, + "learning_rate": 9.336666923910495e-06, + "loss": 0.4135, + "step": 9419 + }, + { + "epoch": 1.61, + "learning_rate": 9.334829602442523e-06, + "loss": 0.4113, + "step": 9420 + }, + { + "epoch": 1.61, + "learning_rate": 9.332992303528551e-06, + "loss": 0.4639, + "step": 9421 + }, + { + "epoch": 1.61, + "learning_rate": 9.331155027230886e-06, + "loss": 0.4213, + "step": 9422 + }, + { + "epoch": 1.61, + "learning_rate": 9.329317773611818e-06, + "loss": 0.4105, + "step": 9423 + }, + { + "epoch": 1.61, + "learning_rate": 9.327480542733647e-06, + "loss": 0.4254, + "step": 9424 + }, + { + "epoch": 1.61, + "learning_rate": 9.325643334658667e-06, + "loss": 0.4324, + "step": 9425 + }, + { + "epoch": 1.61, + "learning_rate": 9.323806149449172e-06, + "loss": 0.4025, + "step": 9426 + }, + { + "epoch": 1.61, + "learning_rate": 9.321968987167463e-06, + "loss": 0.3955, + "step": 9427 + }, + { + "epoch": 1.61, + "learning_rate": 9.32013184787582e-06, + "loss": 0.4202, + "step": 9428 + }, + { + "epoch": 1.61, + "learning_rate": 9.318294731636542e-06, + "loss": 0.4047, + "step": 9429 + }, + { + "epoch": 1.61, + "learning_rate": 9.316457638511923e-06, + "loss": 0.4107, + "step": 9430 + }, + { + "epoch": 1.61, + "learning_rate": 9.314620568564247e-06, + "loss": 0.4092, + "step": 9431 + }, + { + "epoch": 1.61, + "learning_rate": 9.312783521855806e-06, + "loss": 0.4421, + "step": 9432 + }, + { + "epoch": 1.61, + "learning_rate": 9.310946498448891e-06, + "loss": 0.4194, + "step": 9433 + }, + { + "epoch": 1.61, + "learning_rate": 9.309109498405789e-06, + "loss": 0.4355, + "step": 9434 + }, + { + "epoch": 1.61, + "learning_rate": 9.307272521788792e-06, + "loss": 0.4166, + "step": 9435 + }, + { + "epoch": 1.61, + "learning_rate": 9.305435568660177e-06, + "loss": 0.4284, + "step": 9436 + }, + { + "epoch": 1.61, + "learning_rate": 9.303598639082235e-06, + "loss": 0.4271, + "step": 9437 + }, + { + "epoch": 1.61, + "learning_rate": 9.301761733117253e-06, + "loss": 0.4408, + "step": 9438 + }, + { + "epoch": 1.61, + "learning_rate": 9.29992485082751e-06, + "loss": 0.4381, + "step": 9439 + }, + { + "epoch": 1.61, + "learning_rate": 9.298087992275294e-06, + "loss": 0.424, + "step": 9440 + }, + { + "epoch": 1.61, + "learning_rate": 9.296251157522884e-06, + "loss": 0.4089, + "step": 9441 + }, + { + "epoch": 1.61, + "learning_rate": 9.294414346632569e-06, + "loss": 0.4098, + "step": 9442 + }, + { + "epoch": 1.61, + "learning_rate": 9.292577559666619e-06, + "loss": 0.4486, + "step": 9443 + }, + { + "epoch": 1.61, + "learning_rate": 9.290740796687322e-06, + "loss": 0.4366, + "step": 9444 + }, + { + "epoch": 1.61, + "learning_rate": 9.288904057756952e-06, + "loss": 0.394, + "step": 9445 + }, + { + "epoch": 1.61, + "learning_rate": 9.287067342937795e-06, + "loss": 0.4117, + "step": 9446 + }, + { + "epoch": 1.61, + "learning_rate": 9.28523065229212e-06, + "loss": 0.4415, + "step": 9447 + }, + { + "epoch": 1.61, + "learning_rate": 9.283393985882209e-06, + "loss": 0.4121, + "step": 9448 + }, + { + "epoch": 1.61, + "learning_rate": 9.281557343770338e-06, + "loss": 0.3984, + "step": 9449 + }, + { + "epoch": 1.61, + "learning_rate": 9.279720726018783e-06, + "loss": 0.4404, + "step": 9450 + }, + { + "epoch": 1.61, + "learning_rate": 9.277884132689816e-06, + "loss": 0.4407, + "step": 9451 + }, + { + "epoch": 1.61, + "learning_rate": 9.276047563845709e-06, + "loss": 0.4259, + "step": 9452 + }, + { + "epoch": 1.61, + "learning_rate": 9.27421101954874e-06, + "loss": 0.4252, + "step": 9453 + }, + { + "epoch": 1.61, + "learning_rate": 9.272374499861176e-06, + "loss": 0.4411, + "step": 9454 + }, + { + "epoch": 1.61, + "learning_rate": 9.27053800484529e-06, + "loss": 0.4085, + "step": 9455 + }, + { + "epoch": 1.61, + "learning_rate": 9.268701534563351e-06, + "loss": 0.4354, + "step": 9456 + }, + { + "epoch": 1.61, + "learning_rate": 9.26686508907763e-06, + "loss": 0.3915, + "step": 9457 + }, + { + "epoch": 1.61, + "learning_rate": 9.265028668450403e-06, + "loss": 0.4265, + "step": 9458 + }, + { + "epoch": 1.61, + "learning_rate": 9.263192272743922e-06, + "loss": 0.4351, + "step": 9459 + }, + { + "epoch": 1.61, + "learning_rate": 9.261355902020462e-06, + "loss": 0.4335, + "step": 9460 + }, + { + "epoch": 1.61, + "learning_rate": 9.259519556342292e-06, + "loss": 0.4132, + "step": 9461 + }, + { + "epoch": 1.61, + "learning_rate": 9.257683235771671e-06, + "loss": 0.4019, + "step": 9462 + }, + { + "epoch": 1.61, + "learning_rate": 9.255846940370867e-06, + "loss": 0.4039, + "step": 9463 + }, + { + "epoch": 1.61, + "learning_rate": 9.254010670202143e-06, + "loss": 0.4592, + "step": 9464 + }, + { + "epoch": 1.61, + "learning_rate": 9.252174425327767e-06, + "loss": 0.4174, + "step": 9465 + }, + { + "epoch": 1.61, + "learning_rate": 9.25033820580999e-06, + "loss": 0.4597, + "step": 9466 + }, + { + "epoch": 1.61, + "learning_rate": 9.248502011711076e-06, + "loss": 0.4024, + "step": 9467 + }, + { + "epoch": 1.61, + "learning_rate": 9.246665843093289e-06, + "loss": 0.3866, + "step": 9468 + }, + { + "epoch": 1.61, + "learning_rate": 9.244829700018888e-06, + "loss": 0.4089, + "step": 9469 + }, + { + "epoch": 1.62, + "learning_rate": 9.242993582550127e-06, + "loss": 0.4345, + "step": 9470 + }, + { + "epoch": 1.62, + "learning_rate": 9.241157490749268e-06, + "loss": 0.4276, + "step": 9471 + }, + { + "epoch": 1.62, + "learning_rate": 9.239321424678563e-06, + "loss": 0.4309, + "step": 9472 + }, + { + "epoch": 1.62, + "learning_rate": 9.237485384400277e-06, + "loss": 0.4007, + "step": 9473 + }, + { + "epoch": 1.62, + "learning_rate": 9.235649369976652e-06, + "loss": 0.3955, + "step": 9474 + }, + { + "epoch": 1.62, + "learning_rate": 9.23381338146995e-06, + "loss": 0.4068, + "step": 9475 + }, + { + "epoch": 1.62, + "learning_rate": 9.231977418942419e-06, + "loss": 0.4278, + "step": 9476 + }, + { + "epoch": 1.62, + "learning_rate": 9.230141482456318e-06, + "loss": 0.3882, + "step": 9477 + }, + { + "epoch": 1.62, + "learning_rate": 9.228305572073894e-06, + "loss": 0.4104, + "step": 9478 + }, + { + "epoch": 1.62, + "learning_rate": 9.226469687857397e-06, + "loss": 0.4001, + "step": 9479 + }, + { + "epoch": 1.62, + "learning_rate": 9.224633829869082e-06, + "loss": 0.4369, + "step": 9480 + }, + { + "epoch": 1.62, + "learning_rate": 9.222797998171188e-06, + "loss": 0.4105, + "step": 9481 + }, + { + "epoch": 1.62, + "learning_rate": 9.22096219282597e-06, + "loss": 0.426, + "step": 9482 + }, + { + "epoch": 1.62, + "learning_rate": 9.21912641389567e-06, + "loss": 0.4348, + "step": 9483 + }, + { + "epoch": 1.62, + "learning_rate": 9.217290661442538e-06, + "loss": 0.4229, + "step": 9484 + }, + { + "epoch": 1.62, + "learning_rate": 9.215454935528821e-06, + "loss": 0.4154, + "step": 9485 + }, + { + "epoch": 1.62, + "learning_rate": 9.213619236216757e-06, + "loss": 0.4537, + "step": 9486 + }, + { + "epoch": 1.62, + "learning_rate": 9.211783563568591e-06, + "loss": 0.435, + "step": 9487 + }, + { + "epoch": 1.62, + "learning_rate": 9.209947917646574e-06, + "loss": 0.4024, + "step": 9488 + }, + { + "epoch": 1.62, + "learning_rate": 9.208112298512932e-06, + "loss": 0.4299, + "step": 9489 + }, + { + "epoch": 1.62, + "learning_rate": 9.206276706229916e-06, + "loss": 0.4001, + "step": 9490 + }, + { + "epoch": 1.62, + "learning_rate": 9.204441140859763e-06, + "loss": 0.4099, + "step": 9491 + }, + { + "epoch": 1.62, + "learning_rate": 9.20260560246471e-06, + "loss": 0.4674, + "step": 9492 + }, + { + "epoch": 1.62, + "learning_rate": 9.200770091107002e-06, + "loss": 0.4578, + "step": 9493 + }, + { + "epoch": 1.62, + "learning_rate": 9.198934606848866e-06, + "loss": 0.434, + "step": 9494 + }, + { + "epoch": 1.62, + "learning_rate": 9.197099149752548e-06, + "loss": 0.4013, + "step": 9495 + }, + { + "epoch": 1.62, + "learning_rate": 9.195263719880275e-06, + "loss": 0.4161, + "step": 9496 + }, + { + "epoch": 1.62, + "learning_rate": 9.19342831729428e-06, + "loss": 0.3969, + "step": 9497 + }, + { + "epoch": 1.62, + "learning_rate": 9.191592942056803e-06, + "loss": 0.4423, + "step": 9498 + }, + { + "epoch": 1.62, + "learning_rate": 9.189757594230073e-06, + "loss": 0.4217, + "step": 9499 + }, + { + "epoch": 1.62, + "learning_rate": 9.187922273876322e-06, + "loss": 0.4026, + "step": 9500 + }, + { + "epoch": 1.62, + "learning_rate": 9.186086981057778e-06, + "loss": 0.4654, + "step": 9501 + }, + { + "epoch": 1.62, + "learning_rate": 9.184251715836673e-06, + "loss": 0.4425, + "step": 9502 + }, + { + "epoch": 1.62, + "learning_rate": 9.18241647827524e-06, + "loss": 0.41, + "step": 9503 + }, + { + "epoch": 1.62, + "learning_rate": 9.180581268435696e-06, + "loss": 0.4248, + "step": 9504 + }, + { + "epoch": 1.62, + "learning_rate": 9.178746086380274e-06, + "loss": 0.4006, + "step": 9505 + }, + { + "epoch": 1.62, + "learning_rate": 9.176910932171201e-06, + "loss": 0.4365, + "step": 9506 + }, + { + "epoch": 1.62, + "learning_rate": 9.175075805870696e-06, + "loss": 0.4174, + "step": 9507 + }, + { + "epoch": 1.62, + "learning_rate": 9.173240707540992e-06, + "loss": 0.4294, + "step": 9508 + }, + { + "epoch": 1.62, + "learning_rate": 9.171405637244301e-06, + "loss": 0.4284, + "step": 9509 + }, + { + "epoch": 1.62, + "learning_rate": 9.169570595042855e-06, + "loss": 0.4358, + "step": 9510 + }, + { + "epoch": 1.62, + "learning_rate": 9.16773558099887e-06, + "loss": 0.4451, + "step": 9511 + }, + { + "epoch": 1.62, + "learning_rate": 9.165900595174565e-06, + "loss": 0.413, + "step": 9512 + }, + { + "epoch": 1.62, + "learning_rate": 9.164065637632157e-06, + "loss": 0.4529, + "step": 9513 + }, + { + "epoch": 1.62, + "learning_rate": 9.162230708433872e-06, + "loss": 0.4306, + "step": 9514 + }, + { + "epoch": 1.62, + "learning_rate": 9.16039580764192e-06, + "loss": 0.4108, + "step": 9515 + }, + { + "epoch": 1.62, + "learning_rate": 9.158560935318523e-06, + "loss": 0.4127, + "step": 9516 + }, + { + "epoch": 1.62, + "learning_rate": 9.15672609152589e-06, + "loss": 0.4366, + "step": 9517 + }, + { + "epoch": 1.62, + "learning_rate": 9.154891276326244e-06, + "loss": 0.424, + "step": 9518 + }, + { + "epoch": 1.62, + "learning_rate": 9.153056489781789e-06, + "loss": 0.4149, + "step": 9519 + }, + { + "epoch": 1.62, + "learning_rate": 9.15122173195474e-06, + "loss": 0.3678, + "step": 9520 + }, + { + "epoch": 1.62, + "learning_rate": 9.149387002907309e-06, + "loss": 0.4303, + "step": 9521 + }, + { + "epoch": 1.62, + "learning_rate": 9.147552302701705e-06, + "loss": 0.4441, + "step": 9522 + }, + { + "epoch": 1.62, + "learning_rate": 9.145717631400142e-06, + "loss": 0.396, + "step": 9523 + }, + { + "epoch": 1.62, + "learning_rate": 9.143882989064826e-06, + "loss": 0.4218, + "step": 9524 + }, + { + "epoch": 1.62, + "learning_rate": 9.142048375757964e-06, + "loss": 0.4372, + "step": 9525 + }, + { + "epoch": 1.62, + "learning_rate": 9.140213791541762e-06, + "loss": 0.4096, + "step": 9526 + }, + { + "epoch": 1.62, + "learning_rate": 9.138379236478425e-06, + "loss": 0.4143, + "step": 9527 + }, + { + "epoch": 1.62, + "learning_rate": 9.136544710630158e-06, + "loss": 0.3994, + "step": 9528 + }, + { + "epoch": 1.63, + "learning_rate": 9.134710214059163e-06, + "loss": 0.4081, + "step": 9529 + }, + { + "epoch": 1.63, + "learning_rate": 9.132875746827645e-06, + "loss": 0.4315, + "step": 9530 + }, + { + "epoch": 1.63, + "learning_rate": 9.131041308997804e-06, + "loss": 0.4253, + "step": 9531 + }, + { + "epoch": 1.63, + "learning_rate": 9.129206900631846e-06, + "loss": 0.3986, + "step": 9532 + }, + { + "epoch": 1.63, + "learning_rate": 9.127372521791962e-06, + "loss": 0.4096, + "step": 9533 + }, + { + "epoch": 1.63, + "learning_rate": 9.125538172540357e-06, + "loss": 0.4319, + "step": 9534 + }, + { + "epoch": 1.63, + "learning_rate": 9.123703852939222e-06, + "loss": 0.4355, + "step": 9535 + }, + { + "epoch": 1.63, + "learning_rate": 9.121869563050757e-06, + "loss": 0.4458, + "step": 9536 + }, + { + "epoch": 1.63, + "learning_rate": 9.120035302937158e-06, + "loss": 0.4205, + "step": 9537 + }, + { + "epoch": 1.63, + "learning_rate": 9.118201072660619e-06, + "loss": 0.3999, + "step": 9538 + }, + { + "epoch": 1.63, + "learning_rate": 9.116366872283333e-06, + "loss": 0.4003, + "step": 9539 + }, + { + "epoch": 1.63, + "learning_rate": 9.114532701867499e-06, + "loss": 0.4305, + "step": 9540 + }, + { + "epoch": 1.63, + "learning_rate": 9.112698561475296e-06, + "loss": 0.4268, + "step": 9541 + }, + { + "epoch": 1.63, + "learning_rate": 9.110864451168922e-06, + "loss": 0.3981, + "step": 9542 + }, + { + "epoch": 1.63, + "learning_rate": 9.109030371010564e-06, + "loss": 0.4331, + "step": 9543 + }, + { + "epoch": 1.63, + "learning_rate": 9.107196321062411e-06, + "loss": 0.4029, + "step": 9544 + }, + { + "epoch": 1.63, + "learning_rate": 9.105362301386652e-06, + "loss": 0.4033, + "step": 9545 + }, + { + "epoch": 1.63, + "learning_rate": 9.10352831204547e-06, + "loss": 0.4129, + "step": 9546 + }, + { + "epoch": 1.63, + "learning_rate": 9.101694353101058e-06, + "loss": 0.4469, + "step": 9547 + }, + { + "epoch": 1.63, + "learning_rate": 9.099860424615592e-06, + "loss": 0.4105, + "step": 9548 + }, + { + "epoch": 1.63, + "learning_rate": 9.098026526651256e-06, + "loss": 0.4293, + "step": 9549 + }, + { + "epoch": 1.63, + "learning_rate": 9.096192659270237e-06, + "loss": 0.4248, + "step": 9550 + }, + { + "epoch": 1.63, + "learning_rate": 9.09435882253471e-06, + "loss": 0.4288, + "step": 9551 + }, + { + "epoch": 1.63, + "learning_rate": 9.092525016506858e-06, + "loss": 0.4431, + "step": 9552 + }, + { + "epoch": 1.63, + "learning_rate": 9.09069124124886e-06, + "loss": 0.4383, + "step": 9553 + }, + { + "epoch": 1.63, + "learning_rate": 9.088857496822895e-06, + "loss": 0.4069, + "step": 9554 + }, + { + "epoch": 1.63, + "learning_rate": 9.087023783291144e-06, + "loss": 0.4459, + "step": 9555 + }, + { + "epoch": 1.63, + "learning_rate": 9.085190100715775e-06, + "loss": 0.4268, + "step": 9556 + }, + { + "epoch": 1.63, + "learning_rate": 9.083356449158964e-06, + "loss": 0.4225, + "step": 9557 + }, + { + "epoch": 1.63, + "learning_rate": 9.081522828682891e-06, + "loss": 0.4195, + "step": 9558 + }, + { + "epoch": 1.63, + "learning_rate": 9.07968923934972e-06, + "loss": 0.4161, + "step": 9559 + }, + { + "epoch": 1.63, + "learning_rate": 9.07785568122163e-06, + "loss": 0.398, + "step": 9560 + }, + { + "epoch": 1.63, + "learning_rate": 9.076022154360787e-06, + "loss": 0.4174, + "step": 9561 + }, + { + "epoch": 1.63, + "learning_rate": 9.074188658829364e-06, + "loss": 0.4358, + "step": 9562 + }, + { + "epoch": 1.63, + "learning_rate": 9.072355194689532e-06, + "loss": 0.4343, + "step": 9563 + }, + { + "epoch": 1.63, + "learning_rate": 9.07052176200345e-06, + "loss": 0.4159, + "step": 9564 + }, + { + "epoch": 1.63, + "learning_rate": 9.068688360833292e-06, + "loss": 0.43, + "step": 9565 + }, + { + "epoch": 1.63, + "learning_rate": 9.066854991241218e-06, + "loss": 0.4452, + "step": 9566 + }, + { + "epoch": 1.63, + "learning_rate": 9.065021653289395e-06, + "loss": 0.4019, + "step": 9567 + }, + { + "epoch": 1.63, + "learning_rate": 9.063188347039985e-06, + "loss": 0.4138, + "step": 9568 + }, + { + "epoch": 1.63, + "learning_rate": 9.061355072555151e-06, + "loss": 0.4178, + "step": 9569 + }, + { + "epoch": 1.63, + "learning_rate": 9.05952182989706e-06, + "loss": 0.4235, + "step": 9570 + }, + { + "epoch": 1.63, + "learning_rate": 9.05768861912786e-06, + "loss": 0.4093, + "step": 9571 + }, + { + "epoch": 1.63, + "learning_rate": 9.055855440309716e-06, + "loss": 0.4644, + "step": 9572 + }, + { + "epoch": 1.63, + "learning_rate": 9.054022293504789e-06, + "loss": 0.4404, + "step": 9573 + }, + { + "epoch": 1.63, + "learning_rate": 9.052189178775229e-06, + "loss": 0.4331, + "step": 9574 + }, + { + "epoch": 1.63, + "learning_rate": 9.050356096183195e-06, + "loss": 0.4011, + "step": 9575 + }, + { + "epoch": 1.63, + "learning_rate": 9.048523045790843e-06, + "loss": 0.4174, + "step": 9576 + }, + { + "epoch": 1.63, + "learning_rate": 9.046690027660324e-06, + "loss": 0.4224, + "step": 9577 + }, + { + "epoch": 1.63, + "learning_rate": 9.044857041853795e-06, + "loss": 0.4188, + "step": 9578 + }, + { + "epoch": 1.63, + "learning_rate": 9.0430240884334e-06, + "loss": 0.4123, + "step": 9579 + }, + { + "epoch": 1.63, + "learning_rate": 9.041191167461291e-06, + "loss": 0.432, + "step": 9580 + }, + { + "epoch": 1.63, + "learning_rate": 9.039358278999624e-06, + "loss": 0.4187, + "step": 9581 + }, + { + "epoch": 1.63, + "learning_rate": 9.037525423110538e-06, + "loss": 0.4415, + "step": 9582 + }, + { + "epoch": 1.63, + "learning_rate": 9.035692599856182e-06, + "loss": 0.4069, + "step": 9583 + }, + { + "epoch": 1.63, + "learning_rate": 9.033859809298705e-06, + "loss": 0.3942, + "step": 9584 + }, + { + "epoch": 1.63, + "learning_rate": 9.032027051500255e-06, + "loss": 0.4284, + "step": 9585 + }, + { + "epoch": 1.63, + "learning_rate": 9.030194326522965e-06, + "loss": 0.3831, + "step": 9586 + }, + { + "epoch": 1.64, + "learning_rate": 9.028361634428983e-06, + "loss": 0.4329, + "step": 9587 + }, + { + "epoch": 1.64, + "learning_rate": 9.026528975280451e-06, + "loss": 0.4186, + "step": 9588 + }, + { + "epoch": 1.64, + "learning_rate": 9.02469634913951e-06, + "loss": 0.4335, + "step": 9589 + }, + { + "epoch": 1.64, + "learning_rate": 9.022863756068294e-06, + "loss": 0.4279, + "step": 9590 + }, + { + "epoch": 1.64, + "learning_rate": 9.021031196128946e-06, + "loss": 0.421, + "step": 9591 + }, + { + "epoch": 1.64, + "learning_rate": 9.0191986693836e-06, + "loss": 0.3957, + "step": 9592 + }, + { + "epoch": 1.64, + "learning_rate": 9.017366175894399e-06, + "loss": 0.4116, + "step": 9593 + }, + { + "epoch": 1.64, + "learning_rate": 9.015533715723466e-06, + "loss": 0.4531, + "step": 9594 + }, + { + "epoch": 1.64, + "learning_rate": 9.01370128893294e-06, + "loss": 0.4057, + "step": 9595 + }, + { + "epoch": 1.64, + "learning_rate": 9.011868895584952e-06, + "loss": 0.4194, + "step": 9596 + }, + { + "epoch": 1.64, + "learning_rate": 9.010036535741638e-06, + "loss": 0.428, + "step": 9597 + }, + { + "epoch": 1.64, + "learning_rate": 9.00820420946512e-06, + "loss": 0.3993, + "step": 9598 + }, + { + "epoch": 1.64, + "learning_rate": 9.006371916817533e-06, + "loss": 0.4279, + "step": 9599 + }, + { + "epoch": 1.64, + "learning_rate": 9.004539657861007e-06, + "loss": 0.4146, + "step": 9600 + }, + { + "epoch": 1.64, + "learning_rate": 9.00270743265766e-06, + "loss": 0.4145, + "step": 9601 + }, + { + "epoch": 1.64, + "learning_rate": 9.000875241269622e-06, + "loss": 0.3968, + "step": 9602 + }, + { + "epoch": 1.64, + "learning_rate": 8.999043083759016e-06, + "loss": 0.4446, + "step": 9603 + }, + { + "epoch": 1.64, + "learning_rate": 8.997210960187972e-06, + "loss": 0.4545, + "step": 9604 + }, + { + "epoch": 1.64, + "learning_rate": 8.995378870618602e-06, + "loss": 0.3991, + "step": 9605 + }, + { + "epoch": 1.64, + "learning_rate": 8.993546815113033e-06, + "loss": 0.3975, + "step": 9606 + }, + { + "epoch": 1.64, + "learning_rate": 8.991714793733381e-06, + "loss": 0.4406, + "step": 9607 + }, + { + "epoch": 1.64, + "learning_rate": 8.989882806541771e-06, + "loss": 0.4341, + "step": 9608 + }, + { + "epoch": 1.64, + "learning_rate": 8.988050853600313e-06, + "loss": 0.4253, + "step": 9609 + }, + { + "epoch": 1.64, + "learning_rate": 8.986218934971127e-06, + "loss": 0.4304, + "step": 9610 + }, + { + "epoch": 1.64, + "learning_rate": 8.984387050716326e-06, + "loss": 0.4576, + "step": 9611 + }, + { + "epoch": 1.64, + "learning_rate": 8.982555200898027e-06, + "loss": 0.4155, + "step": 9612 + }, + { + "epoch": 1.64, + "learning_rate": 8.980723385578339e-06, + "loss": 0.4329, + "step": 9613 + }, + { + "epoch": 1.64, + "learning_rate": 8.978891604819377e-06, + "loss": 0.3732, + "step": 9614 + }, + { + "epoch": 1.64, + "learning_rate": 8.977059858683252e-06, + "loss": 0.4376, + "step": 9615 + }, + { + "epoch": 1.64, + "learning_rate": 8.97522814723207e-06, + "loss": 0.3833, + "step": 9616 + }, + { + "epoch": 1.64, + "learning_rate": 8.973396470527936e-06, + "loss": 0.3976, + "step": 9617 + }, + { + "epoch": 1.64, + "learning_rate": 8.971564828632965e-06, + "loss": 0.4391, + "step": 9618 + }, + { + "epoch": 1.64, + "learning_rate": 8.969733221609256e-06, + "loss": 0.412, + "step": 9619 + }, + { + "epoch": 1.64, + "learning_rate": 8.967901649518918e-06, + "loss": 0.4192, + "step": 9620 + }, + { + "epoch": 1.64, + "learning_rate": 8.966070112424052e-06, + "loss": 0.423, + "step": 9621 + }, + { + "epoch": 1.64, + "learning_rate": 8.96423861038676e-06, + "loss": 0.442, + "step": 9622 + }, + { + "epoch": 1.64, + "learning_rate": 8.96240714346915e-06, + "loss": 0.3926, + "step": 9623 + }, + { + "epoch": 1.64, + "learning_rate": 8.960575711733309e-06, + "loss": 0.4025, + "step": 9624 + }, + { + "epoch": 1.64, + "learning_rate": 8.958744315241343e-06, + "loss": 0.4126, + "step": 9625 + }, + { + "epoch": 1.64, + "learning_rate": 8.956912954055347e-06, + "loss": 0.4206, + "step": 9626 + }, + { + "epoch": 1.64, + "learning_rate": 8.955081628237418e-06, + "loss": 0.4373, + "step": 9627 + }, + { + "epoch": 1.64, + "learning_rate": 8.953250337849655e-06, + "loss": 0.4304, + "step": 9628 + }, + { + "epoch": 1.64, + "learning_rate": 8.951419082954145e-06, + "loss": 0.4205, + "step": 9629 + }, + { + "epoch": 1.64, + "learning_rate": 8.94958786361299e-06, + "loss": 0.3938, + "step": 9630 + }, + { + "epoch": 1.64, + "learning_rate": 8.947756679888268e-06, + "loss": 0.4116, + "step": 9631 + }, + { + "epoch": 1.64, + "learning_rate": 8.945925531842078e-06, + "loss": 0.4348, + "step": 9632 + }, + { + "epoch": 1.64, + "learning_rate": 8.944094419536508e-06, + "loss": 0.4471, + "step": 9633 + }, + { + "epoch": 1.64, + "learning_rate": 8.942263343033644e-06, + "loss": 0.4462, + "step": 9634 + }, + { + "epoch": 1.64, + "learning_rate": 8.940432302395572e-06, + "loss": 0.3856, + "step": 9635 + }, + { + "epoch": 1.64, + "learning_rate": 8.938601297684383e-06, + "loss": 0.4082, + "step": 9636 + }, + { + "epoch": 1.64, + "learning_rate": 8.936770328962154e-06, + "loss": 0.4189, + "step": 9637 + }, + { + "epoch": 1.64, + "learning_rate": 8.934939396290974e-06, + "loss": 0.4455, + "step": 9638 + }, + { + "epoch": 1.64, + "learning_rate": 8.933108499732918e-06, + "loss": 0.4453, + "step": 9639 + }, + { + "epoch": 1.64, + "learning_rate": 8.93127763935007e-06, + "loss": 0.4195, + "step": 9640 + }, + { + "epoch": 1.64, + "learning_rate": 8.929446815204507e-06, + "loss": 0.439, + "step": 9641 + }, + { + "epoch": 1.64, + "learning_rate": 8.92761602735831e-06, + "loss": 0.4125, + "step": 9642 + }, + { + "epoch": 1.64, + "learning_rate": 8.925785275873555e-06, + "loss": 0.3855, + "step": 9643 + }, + { + "epoch": 1.64, + "learning_rate": 8.923954560812319e-06, + "loss": 0.4057, + "step": 9644 + }, + { + "epoch": 1.64, + "learning_rate": 8.922123882236675e-06, + "loss": 0.4479, + "step": 9645 + }, + { + "epoch": 1.65, + "learning_rate": 8.920293240208694e-06, + "loss": 0.4191, + "step": 9646 + }, + { + "epoch": 1.65, + "learning_rate": 8.918462634790449e-06, + "loss": 0.4056, + "step": 9647 + }, + { + "epoch": 1.65, + "learning_rate": 8.916632066044009e-06, + "loss": 0.4208, + "step": 9648 + }, + { + "epoch": 1.65, + "learning_rate": 8.914801534031445e-06, + "loss": 0.41, + "step": 9649 + }, + { + "epoch": 1.65, + "learning_rate": 8.912971038814826e-06, + "loss": 0.4422, + "step": 9650 + }, + { + "epoch": 1.65, + "learning_rate": 8.911140580456218e-06, + "loss": 0.4467, + "step": 9651 + }, + { + "epoch": 1.65, + "learning_rate": 8.909310159017685e-06, + "loss": 0.4638, + "step": 9652 + }, + { + "epoch": 1.65, + "learning_rate": 8.907479774561296e-06, + "loss": 0.4183, + "step": 9653 + }, + { + "epoch": 1.65, + "learning_rate": 8.90564942714911e-06, + "loss": 0.4346, + "step": 9654 + }, + { + "epoch": 1.65, + "learning_rate": 8.903819116843187e-06, + "loss": 0.4054, + "step": 9655 + }, + { + "epoch": 1.65, + "learning_rate": 8.90198884370559e-06, + "loss": 0.3785, + "step": 9656 + }, + { + "epoch": 1.65, + "learning_rate": 8.90015860779838e-06, + "loss": 0.413, + "step": 9657 + }, + { + "epoch": 1.65, + "learning_rate": 8.898328409183611e-06, + "loss": 0.4328, + "step": 9658 + }, + { + "epoch": 1.65, + "learning_rate": 8.896498247923345e-06, + "loss": 0.4332, + "step": 9659 + }, + { + "epoch": 1.65, + "learning_rate": 8.894668124079633e-06, + "loss": 0.4099, + "step": 9660 + }, + { + "epoch": 1.65, + "learning_rate": 8.892838037714533e-06, + "loss": 0.424, + "step": 9661 + }, + { + "epoch": 1.65, + "learning_rate": 8.891007988890094e-06, + "loss": 0.4209, + "step": 9662 + }, + { + "epoch": 1.65, + "learning_rate": 8.889177977668369e-06, + "loss": 0.4198, + "step": 9663 + }, + { + "epoch": 1.65, + "learning_rate": 8.88734800411141e-06, + "loss": 0.4293, + "step": 9664 + }, + { + "epoch": 1.65, + "learning_rate": 8.885518068281261e-06, + "loss": 0.4216, + "step": 9665 + }, + { + "epoch": 1.65, + "learning_rate": 8.883688170239977e-06, + "loss": 0.4389, + "step": 9666 + }, + { + "epoch": 1.65, + "learning_rate": 8.881858310049602e-06, + "loss": 0.4328, + "step": 9667 + }, + { + "epoch": 1.65, + "learning_rate": 8.880028487772181e-06, + "loss": 0.4146, + "step": 9668 + }, + { + "epoch": 1.65, + "learning_rate": 8.878198703469757e-06, + "loss": 0.4187, + "step": 9669 + }, + { + "epoch": 1.65, + "learning_rate": 8.876368957204372e-06, + "loss": 0.3857, + "step": 9670 + }, + { + "epoch": 1.65, + "learning_rate": 8.874539249038067e-06, + "loss": 0.4247, + "step": 9671 + }, + { + "epoch": 1.65, + "learning_rate": 8.872709579032885e-06, + "loss": 0.4119, + "step": 9672 + }, + { + "epoch": 1.65, + "learning_rate": 8.870879947250863e-06, + "loss": 0.4444, + "step": 9673 + }, + { + "epoch": 1.65, + "learning_rate": 8.86905035375404e-06, + "loss": 0.3982, + "step": 9674 + }, + { + "epoch": 1.65, + "learning_rate": 8.867220798604454e-06, + "loss": 0.3848, + "step": 9675 + }, + { + "epoch": 1.65, + "learning_rate": 8.865391281864135e-06, + "loss": 0.4477, + "step": 9676 + }, + { + "epoch": 1.65, + "learning_rate": 8.863561803595122e-06, + "loss": 0.4022, + "step": 9677 + }, + { + "epoch": 1.65, + "learning_rate": 8.86173236385944e-06, + "loss": 0.4588, + "step": 9678 + }, + { + "epoch": 1.65, + "learning_rate": 8.859902962719123e-06, + "loss": 0.4421, + "step": 9679 + }, + { + "epoch": 1.65, + "learning_rate": 8.858073600236203e-06, + "loss": 0.443, + "step": 9680 + }, + { + "epoch": 1.65, + "learning_rate": 8.856244276472708e-06, + "loss": 0.4343, + "step": 9681 + }, + { + "epoch": 1.65, + "learning_rate": 8.854414991490663e-06, + "loss": 0.4264, + "step": 9682 + }, + { + "epoch": 1.65, + "learning_rate": 8.852585745352101e-06, + "loss": 0.4163, + "step": 9683 + }, + { + "epoch": 1.65, + "learning_rate": 8.850756538119037e-06, + "loss": 0.4335, + "step": 9684 + }, + { + "epoch": 1.65, + "learning_rate": 8.848927369853499e-06, + "loss": 0.4286, + "step": 9685 + }, + { + "epoch": 1.65, + "learning_rate": 8.847098240617505e-06, + "loss": 0.413, + "step": 9686 + }, + { + "epoch": 1.65, + "learning_rate": 8.845269150473077e-06, + "loss": 0.4144, + "step": 9687 + }, + { + "epoch": 1.65, + "learning_rate": 8.843440099482236e-06, + "loss": 0.3902, + "step": 9688 + }, + { + "epoch": 1.65, + "learning_rate": 8.841611087707e-06, + "loss": 0.3983, + "step": 9689 + }, + { + "epoch": 1.65, + "learning_rate": 8.839782115209383e-06, + "loss": 0.4251, + "step": 9690 + }, + { + "epoch": 1.65, + "learning_rate": 8.837953182051408e-06, + "loss": 0.4685, + "step": 9691 + }, + { + "epoch": 1.65, + "learning_rate": 8.836124288295077e-06, + "loss": 0.4076, + "step": 9692 + }, + { + "epoch": 1.65, + "learning_rate": 8.83429543400241e-06, + "loss": 0.4066, + "step": 9693 + }, + { + "epoch": 1.65, + "learning_rate": 8.832466619235414e-06, + "loss": 0.4184, + "step": 9694 + }, + { + "epoch": 1.65, + "learning_rate": 8.830637844056102e-06, + "loss": 0.408, + "step": 9695 + }, + { + "epoch": 1.65, + "learning_rate": 8.828809108526481e-06, + "loss": 0.3944, + "step": 9696 + }, + { + "epoch": 1.65, + "learning_rate": 8.82698041270856e-06, + "loss": 0.4373, + "step": 9697 + }, + { + "epoch": 1.65, + "learning_rate": 8.825151756664345e-06, + "loss": 0.4019, + "step": 9698 + }, + { + "epoch": 1.65, + "learning_rate": 8.823323140455837e-06, + "loss": 0.4176, + "step": 9699 + }, + { + "epoch": 1.65, + "learning_rate": 8.82149456414504e-06, + "loss": 0.4561, + "step": 9700 + }, + { + "epoch": 1.65, + "learning_rate": 8.819666027793958e-06, + "loss": 0.4199, + "step": 9701 + }, + { + "epoch": 1.65, + "learning_rate": 8.81783753146459e-06, + "loss": 0.4335, + "step": 9702 + }, + { + "epoch": 1.65, + "learning_rate": 8.816009075218934e-06, + "loss": 0.4012, + "step": 9703 + }, + { + "epoch": 1.65, + "learning_rate": 8.814180659118987e-06, + "loss": 0.391, + "step": 9704 + }, + { + "epoch": 1.66, + "learning_rate": 8.812352283226751e-06, + "loss": 0.418, + "step": 9705 + }, + { + "epoch": 1.66, + "learning_rate": 8.81052394760422e-06, + "loss": 0.4203, + "step": 9706 + }, + { + "epoch": 1.66, + "learning_rate": 8.808695652313378e-06, + "loss": 0.4181, + "step": 9707 + }, + { + "epoch": 1.66, + "learning_rate": 8.806867397416226e-06, + "loss": 0.4127, + "step": 9708 + }, + { + "epoch": 1.66, + "learning_rate": 8.805039182974756e-06, + "loss": 0.4218, + "step": 9709 + }, + { + "epoch": 1.66, + "learning_rate": 8.80321100905095e-06, + "loss": 0.4328, + "step": 9710 + }, + { + "epoch": 1.66, + "learning_rate": 8.801382875706799e-06, + "loss": 0.4391, + "step": 9711 + }, + { + "epoch": 1.66, + "learning_rate": 8.799554783004292e-06, + "loss": 0.4146, + "step": 9712 + }, + { + "epoch": 1.66, + "learning_rate": 8.797726731005419e-06, + "loss": 0.4145, + "step": 9713 + }, + { + "epoch": 1.66, + "learning_rate": 8.795898719772154e-06, + "loss": 0.4227, + "step": 9714 + }, + { + "epoch": 1.66, + "learning_rate": 8.794070749366483e-06, + "loss": 0.4165, + "step": 9715 + }, + { + "epoch": 1.66, + "learning_rate": 8.79224281985039e-06, + "loss": 0.405, + "step": 9716 + }, + { + "epoch": 1.66, + "learning_rate": 8.79041493128585e-06, + "loss": 0.3748, + "step": 9717 + }, + { + "epoch": 1.66, + "learning_rate": 8.788587083734845e-06, + "loss": 0.4236, + "step": 9718 + }, + { + "epoch": 1.66, + "learning_rate": 8.786759277259352e-06, + "loss": 0.4228, + "step": 9719 + }, + { + "epoch": 1.66, + "learning_rate": 8.784931511921346e-06, + "loss": 0.3883, + "step": 9720 + }, + { + "epoch": 1.66, + "learning_rate": 8.783103787782803e-06, + "loss": 0.4025, + "step": 9721 + }, + { + "epoch": 1.66, + "learning_rate": 8.781276104905692e-06, + "loss": 0.4258, + "step": 9722 + }, + { + "epoch": 1.66, + "learning_rate": 8.779448463351987e-06, + "loss": 0.3904, + "step": 9723 + }, + { + "epoch": 1.66, + "learning_rate": 8.777620863183658e-06, + "loss": 0.4406, + "step": 9724 + }, + { + "epoch": 1.66, + "learning_rate": 8.775793304462672e-06, + "loss": 0.4086, + "step": 9725 + }, + { + "epoch": 1.66, + "learning_rate": 8.773965787250997e-06, + "loss": 0.4021, + "step": 9726 + }, + { + "epoch": 1.66, + "learning_rate": 8.772138311610598e-06, + "loss": 0.4356, + "step": 9727 + }, + { + "epoch": 1.66, + "learning_rate": 8.770310877603446e-06, + "loss": 0.4076, + "step": 9728 + }, + { + "epoch": 1.66, + "learning_rate": 8.768483485291494e-06, + "loss": 0.4253, + "step": 9729 + }, + { + "epoch": 1.66, + "learning_rate": 8.76665613473671e-06, + "loss": 0.4468, + "step": 9730 + }, + { + "epoch": 1.66, + "learning_rate": 8.76482882600105e-06, + "loss": 0.3901, + "step": 9731 + }, + { + "epoch": 1.66, + "learning_rate": 8.763001559146479e-06, + "loss": 0.4382, + "step": 9732 + }, + { + "epoch": 1.66, + "learning_rate": 8.761174334234948e-06, + "loss": 0.4105, + "step": 9733 + }, + { + "epoch": 1.66, + "learning_rate": 8.759347151328414e-06, + "loss": 0.412, + "step": 9734 + }, + { + "epoch": 1.66, + "learning_rate": 8.757520010488835e-06, + "loss": 0.4092, + "step": 9735 + }, + { + "epoch": 1.66, + "learning_rate": 8.755692911778165e-06, + "loss": 0.4607, + "step": 9736 + }, + { + "epoch": 1.66, + "learning_rate": 8.753865855258349e-06, + "loss": 0.4258, + "step": 9737 + }, + { + "epoch": 1.66, + "learning_rate": 8.75203884099134e-06, + "loss": 0.4152, + "step": 9738 + }, + { + "epoch": 1.66, + "learning_rate": 8.750211869039088e-06, + "loss": 0.434, + "step": 9739 + }, + { + "epoch": 1.66, + "learning_rate": 8.748384939463543e-06, + "loss": 0.4022, + "step": 9740 + }, + { + "epoch": 1.66, + "learning_rate": 8.746558052326643e-06, + "loss": 0.4034, + "step": 9741 + }, + { + "epoch": 1.66, + "learning_rate": 8.74473120769034e-06, + "loss": 0.4193, + "step": 9742 + }, + { + "epoch": 1.66, + "learning_rate": 8.742904405616579e-06, + "loss": 0.4322, + "step": 9743 + }, + { + "epoch": 1.66, + "learning_rate": 8.74107764616729e-06, + "loss": 0.4217, + "step": 9744 + }, + { + "epoch": 1.66, + "learning_rate": 8.73925092940442e-06, + "loss": 0.4027, + "step": 9745 + }, + { + "epoch": 1.66, + "learning_rate": 8.73742425538991e-06, + "loss": 0.4255, + "step": 9746 + }, + { + "epoch": 1.66, + "learning_rate": 8.735597624185694e-06, + "loss": 0.4358, + "step": 9747 + }, + { + "epoch": 1.66, + "learning_rate": 8.73377103585371e-06, + "loss": 0.4211, + "step": 9748 + }, + { + "epoch": 1.66, + "learning_rate": 8.731944490455892e-06, + "loss": 0.3912, + "step": 9749 + }, + { + "epoch": 1.66, + "learning_rate": 8.730117988054168e-06, + "loss": 0.3893, + "step": 9750 + }, + { + "epoch": 1.66, + "learning_rate": 8.728291528710479e-06, + "loss": 0.4264, + "step": 9751 + }, + { + "epoch": 1.66, + "learning_rate": 8.726465112486746e-06, + "loss": 0.4168, + "step": 9752 + }, + { + "epoch": 1.66, + "learning_rate": 8.724638739444901e-06, + "loss": 0.4533, + "step": 9753 + }, + { + "epoch": 1.66, + "learning_rate": 8.72281240964687e-06, + "loss": 0.4446, + "step": 9754 + }, + { + "epoch": 1.66, + "learning_rate": 8.720986123154582e-06, + "loss": 0.4397, + "step": 9755 + }, + { + "epoch": 1.66, + "learning_rate": 8.719159880029957e-06, + "loss": 0.4156, + "step": 9756 + }, + { + "epoch": 1.66, + "learning_rate": 8.717333680334916e-06, + "loss": 0.42, + "step": 9757 + }, + { + "epoch": 1.66, + "learning_rate": 8.71550752413139e-06, + "loss": 0.4282, + "step": 9758 + }, + { + "epoch": 1.66, + "learning_rate": 8.713681411481288e-06, + "loss": 0.3947, + "step": 9759 + }, + { + "epoch": 1.66, + "learning_rate": 8.71185534244653e-06, + "loss": 0.3971, + "step": 9760 + }, + { + "epoch": 1.66, + "learning_rate": 8.710029317089038e-06, + "loss": 0.4166, + "step": 9761 + }, + { + "epoch": 1.66, + "learning_rate": 8.708203335470723e-06, + "loss": 0.4366, + "step": 9762 + }, + { + "epoch": 1.67, + "learning_rate": 8.706377397653502e-06, + "loss": 0.4301, + "step": 9763 + }, + { + "epoch": 1.67, + "learning_rate": 8.704551503699283e-06, + "loss": 0.4547, + "step": 9764 + }, + { + "epoch": 1.67, + "learning_rate": 8.702725653669978e-06, + "loss": 0.4304, + "step": 9765 + }, + { + "epoch": 1.67, + "learning_rate": 8.700899847627502e-06, + "loss": 0.4034, + "step": 9766 + }, + { + "epoch": 1.67, + "learning_rate": 8.699074085633754e-06, + "loss": 0.4194, + "step": 9767 + }, + { + "epoch": 1.67, + "learning_rate": 8.697248367750645e-06, + "loss": 0.4431, + "step": 9768 + }, + { + "epoch": 1.67, + "learning_rate": 8.69542269404008e-06, + "loss": 0.4106, + "step": 9769 + }, + { + "epoch": 1.67, + "learning_rate": 8.69359706456396e-06, + "loss": 0.3845, + "step": 9770 + }, + { + "epoch": 1.67, + "learning_rate": 8.69177147938419e-06, + "loss": 0.3962, + "step": 9771 + }, + { + "epoch": 1.67, + "learning_rate": 8.689945938562668e-06, + "loss": 0.4219, + "step": 9772 + }, + { + "epoch": 1.67, + "learning_rate": 8.688120442161296e-06, + "loss": 0.4128, + "step": 9773 + }, + { + "epoch": 1.67, + "learning_rate": 8.686294990241965e-06, + "loss": 0.4335, + "step": 9774 + }, + { + "epoch": 1.67, + "learning_rate": 8.684469582866574e-06, + "loss": 0.4095, + "step": 9775 + }, + { + "epoch": 1.67, + "learning_rate": 8.682644220097021e-06, + "loss": 0.4109, + "step": 9776 + }, + { + "epoch": 1.67, + "learning_rate": 8.680818901995193e-06, + "loss": 0.4185, + "step": 9777 + }, + { + "epoch": 1.67, + "learning_rate": 8.678993628622985e-06, + "loss": 0.4013, + "step": 9778 + }, + { + "epoch": 1.67, + "learning_rate": 8.677168400042287e-06, + "loss": 0.4235, + "step": 9779 + }, + { + "epoch": 1.67, + "learning_rate": 8.675343216314983e-06, + "loss": 0.4044, + "step": 9780 + }, + { + "epoch": 1.67, + "learning_rate": 8.673518077502968e-06, + "loss": 0.3713, + "step": 9781 + }, + { + "epoch": 1.67, + "learning_rate": 8.671692983668118e-06, + "loss": 0.4429, + "step": 9782 + }, + { + "epoch": 1.67, + "learning_rate": 8.66986793487232e-06, + "loss": 0.3711, + "step": 9783 + }, + { + "epoch": 1.67, + "learning_rate": 8.668042931177457e-06, + "loss": 0.4328, + "step": 9784 + }, + { + "epoch": 1.67, + "learning_rate": 8.666217972645407e-06, + "loss": 0.3971, + "step": 9785 + }, + { + "epoch": 1.67, + "learning_rate": 8.664393059338053e-06, + "loss": 0.4174, + "step": 9786 + }, + { + "epoch": 1.67, + "learning_rate": 8.662568191317273e-06, + "loss": 0.4157, + "step": 9787 + }, + { + "epoch": 1.67, + "learning_rate": 8.66074336864494e-06, + "loss": 0.4528, + "step": 9788 + }, + { + "epoch": 1.67, + "learning_rate": 8.65891859138293e-06, + "loss": 0.4321, + "step": 9789 + }, + { + "epoch": 1.67, + "learning_rate": 8.657093859593112e-06, + "loss": 0.3957, + "step": 9790 + }, + { + "epoch": 1.67, + "learning_rate": 8.655269173337361e-06, + "loss": 0.4086, + "step": 9791 + }, + { + "epoch": 1.67, + "learning_rate": 8.653444532677548e-06, + "loss": 0.3934, + "step": 9792 + }, + { + "epoch": 1.67, + "learning_rate": 8.651619937675538e-06, + "loss": 0.3792, + "step": 9793 + }, + { + "epoch": 1.67, + "learning_rate": 8.6497953883932e-06, + "loss": 0.4061, + "step": 9794 + }, + { + "epoch": 1.67, + "learning_rate": 8.6479708848924e-06, + "loss": 0.4262, + "step": 9795 + }, + { + "epoch": 1.67, + "learning_rate": 8.646146427235e-06, + "loss": 0.424, + "step": 9796 + }, + { + "epoch": 1.67, + "learning_rate": 8.644322015482864e-06, + "loss": 0.4011, + "step": 9797 + }, + { + "epoch": 1.67, + "learning_rate": 8.642497649697848e-06, + "loss": 0.4203, + "step": 9798 + }, + { + "epoch": 1.67, + "learning_rate": 8.640673329941813e-06, + "loss": 0.4407, + "step": 9799 + }, + { + "epoch": 1.67, + "learning_rate": 8.638849056276618e-06, + "loss": 0.3932, + "step": 9800 + }, + { + "epoch": 1.67, + "learning_rate": 8.637024828764118e-06, + "loss": 0.4462, + "step": 9801 + }, + { + "epoch": 1.67, + "learning_rate": 8.635200647466169e-06, + "loss": 0.4057, + "step": 9802 + }, + { + "epoch": 1.67, + "learning_rate": 8.633376512444623e-06, + "loss": 0.4008, + "step": 9803 + }, + { + "epoch": 1.67, + "learning_rate": 8.631552423761328e-06, + "loss": 0.4081, + "step": 9804 + }, + { + "epoch": 1.67, + "learning_rate": 8.629728381478137e-06, + "loss": 0.3932, + "step": 9805 + }, + { + "epoch": 1.67, + "learning_rate": 8.627904385656896e-06, + "loss": 0.4307, + "step": 9806 + }, + { + "epoch": 1.67, + "learning_rate": 8.62608043635945e-06, + "loss": 0.4171, + "step": 9807 + }, + { + "epoch": 1.67, + "learning_rate": 8.624256533647648e-06, + "loss": 0.4277, + "step": 9808 + }, + { + "epoch": 1.67, + "learning_rate": 8.622432677583332e-06, + "loss": 0.4576, + "step": 9809 + }, + { + "epoch": 1.67, + "learning_rate": 8.620608868228344e-06, + "loss": 0.4119, + "step": 9810 + }, + { + "epoch": 1.67, + "learning_rate": 8.618785105644524e-06, + "loss": 0.4361, + "step": 9811 + }, + { + "epoch": 1.67, + "learning_rate": 8.616961389893706e-06, + "loss": 0.4144, + "step": 9812 + }, + { + "epoch": 1.67, + "learning_rate": 8.615137721037734e-06, + "loss": 0.4159, + "step": 9813 + }, + { + "epoch": 1.67, + "learning_rate": 8.613314099138438e-06, + "loss": 0.3854, + "step": 9814 + }, + { + "epoch": 1.67, + "learning_rate": 8.611490524257653e-06, + "loss": 0.4079, + "step": 9815 + }, + { + "epoch": 1.67, + "learning_rate": 8.609666996457211e-06, + "loss": 0.4001, + "step": 9816 + }, + { + "epoch": 1.67, + "learning_rate": 8.607843515798945e-06, + "loss": 0.3849, + "step": 9817 + }, + { + "epoch": 1.67, + "learning_rate": 8.606020082344686e-06, + "loss": 0.4036, + "step": 9818 + }, + { + "epoch": 1.67, + "learning_rate": 8.604196696156254e-06, + "loss": 0.4021, + "step": 9819 + }, + { + "epoch": 1.67, + "learning_rate": 8.60237335729548e-06, + "loss": 0.4125, + "step": 9820 + }, + { + "epoch": 1.67, + "learning_rate": 8.600550065824184e-06, + "loss": 0.4439, + "step": 9821 + }, + { + "epoch": 1.68, + "learning_rate": 8.598726821804194e-06, + "loss": 0.4184, + "step": 9822 + }, + { + "epoch": 1.68, + "learning_rate": 8.596903625297325e-06, + "loss": 0.4026, + "step": 9823 + }, + { + "epoch": 1.68, + "learning_rate": 8.5950804763654e-06, + "loss": 0.4231, + "step": 9824 + }, + { + "epoch": 1.68, + "learning_rate": 8.593257375070236e-06, + "loss": 0.3988, + "step": 9825 + }, + { + "epoch": 1.68, + "learning_rate": 8.591434321473656e-06, + "loss": 0.4235, + "step": 9826 + }, + { + "epoch": 1.68, + "learning_rate": 8.589611315637461e-06, + "loss": 0.4162, + "step": 9827 + }, + { + "epoch": 1.68, + "learning_rate": 8.587788357623473e-06, + "loss": 0.3956, + "step": 9828 + }, + { + "epoch": 1.68, + "learning_rate": 8.5859654474935e-06, + "loss": 0.3794, + "step": 9829 + }, + { + "epoch": 1.68, + "learning_rate": 8.584142585309351e-06, + "loss": 0.433, + "step": 9830 + }, + { + "epoch": 1.68, + "learning_rate": 8.582319771132835e-06, + "loss": 0.4287, + "step": 9831 + }, + { + "epoch": 1.68, + "learning_rate": 8.58049700502576e-06, + "loss": 0.4416, + "step": 9832 + }, + { + "epoch": 1.68, + "learning_rate": 8.578674287049934e-06, + "loss": 0.4227, + "step": 9833 + }, + { + "epoch": 1.68, + "learning_rate": 8.576851617267151e-06, + "loss": 0.447, + "step": 9834 + }, + { + "epoch": 1.68, + "learning_rate": 8.575028995739217e-06, + "loss": 0.4052, + "step": 9835 + }, + { + "epoch": 1.68, + "learning_rate": 8.573206422527933e-06, + "loss": 0.3886, + "step": 9836 + }, + { + "epoch": 1.68, + "learning_rate": 8.571383897695097e-06, + "loss": 0.4013, + "step": 9837 + }, + { + "epoch": 1.68, + "learning_rate": 8.5695614213025e-06, + "loss": 0.3954, + "step": 9838 + }, + { + "epoch": 1.68, + "learning_rate": 8.567738993411946e-06, + "loss": 0.4041, + "step": 9839 + }, + { + "epoch": 1.68, + "learning_rate": 8.565916614085222e-06, + "loss": 0.4694, + "step": 9840 + }, + { + "epoch": 1.68, + "learning_rate": 8.564094283384126e-06, + "loss": 0.3987, + "step": 9841 + }, + { + "epoch": 1.68, + "learning_rate": 8.56227200137044e-06, + "loss": 0.3933, + "step": 9842 + }, + { + "epoch": 1.68, + "learning_rate": 8.560449768105954e-06, + "loss": 0.4049, + "step": 9843 + }, + { + "epoch": 1.68, + "learning_rate": 8.558627583652459e-06, + "loss": 0.4385, + "step": 9844 + }, + { + "epoch": 1.68, + "learning_rate": 8.556805448071736e-06, + "loss": 0.4333, + "step": 9845 + }, + { + "epoch": 1.68, + "learning_rate": 8.55498336142557e-06, + "loss": 0.4297, + "step": 9846 + }, + { + "epoch": 1.68, + "learning_rate": 8.55316132377574e-06, + "loss": 0.4394, + "step": 9847 + }, + { + "epoch": 1.68, + "learning_rate": 8.55133933518403e-06, + "loss": 0.4169, + "step": 9848 + }, + { + "epoch": 1.68, + "learning_rate": 8.549517395712222e-06, + "loss": 0.4354, + "step": 9849 + }, + { + "epoch": 1.68, + "learning_rate": 8.547695505422082e-06, + "loss": 0.4553, + "step": 9850 + }, + { + "epoch": 1.68, + "learning_rate": 8.545873664375393e-06, + "loss": 0.4101, + "step": 9851 + }, + { + "epoch": 1.68, + "learning_rate": 8.544051872633925e-06, + "loss": 0.4319, + "step": 9852 + }, + { + "epoch": 1.68, + "learning_rate": 8.54223013025945e-06, + "loss": 0.3894, + "step": 9853 + }, + { + "epoch": 1.68, + "learning_rate": 8.540408437313738e-06, + "loss": 0.4401, + "step": 9854 + }, + { + "epoch": 1.68, + "learning_rate": 8.53858679385856e-06, + "loss": 0.4186, + "step": 9855 + }, + { + "epoch": 1.68, + "learning_rate": 8.536765199955683e-06, + "loss": 0.3906, + "step": 9856 + }, + { + "epoch": 1.68, + "learning_rate": 8.534943655666866e-06, + "loss": 0.4257, + "step": 9857 + }, + { + "epoch": 1.68, + "learning_rate": 8.533122161053875e-06, + "loss": 0.4562, + "step": 9858 + }, + { + "epoch": 1.68, + "learning_rate": 8.531300716178474e-06, + "loss": 0.4401, + "step": 9859 + }, + { + "epoch": 1.68, + "learning_rate": 8.529479321102422e-06, + "loss": 0.3843, + "step": 9860 + }, + { + "epoch": 1.68, + "learning_rate": 8.527657975887477e-06, + "loss": 0.4271, + "step": 9861 + }, + { + "epoch": 1.68, + "learning_rate": 8.525836680595394e-06, + "loss": 0.3673, + "step": 9862 + }, + { + "epoch": 1.68, + "learning_rate": 8.524015435287929e-06, + "loss": 0.4235, + "step": 9863 + }, + { + "epoch": 1.68, + "learning_rate": 8.52219424002684e-06, + "loss": 0.4329, + "step": 9864 + }, + { + "epoch": 1.68, + "learning_rate": 8.52037309487387e-06, + "loss": 0.4011, + "step": 9865 + }, + { + "epoch": 1.68, + "learning_rate": 8.518551999890772e-06, + "loss": 0.4406, + "step": 9866 + }, + { + "epoch": 1.68, + "learning_rate": 8.516730955139296e-06, + "loss": 0.4332, + "step": 9867 + }, + { + "epoch": 1.68, + "learning_rate": 8.514909960681186e-06, + "loss": 0.4487, + "step": 9868 + }, + { + "epoch": 1.68, + "learning_rate": 8.513089016578188e-06, + "loss": 0.4494, + "step": 9869 + }, + { + "epoch": 1.68, + "learning_rate": 8.511268122892043e-06, + "loss": 0.4396, + "step": 9870 + }, + { + "epoch": 1.68, + "learning_rate": 8.5094472796845e-06, + "loss": 0.4447, + "step": 9871 + }, + { + "epoch": 1.68, + "learning_rate": 8.507626487017286e-06, + "loss": 0.4278, + "step": 9872 + }, + { + "epoch": 1.68, + "learning_rate": 8.505805744952148e-06, + "loss": 0.4069, + "step": 9873 + }, + { + "epoch": 1.68, + "learning_rate": 8.503985053550818e-06, + "loss": 0.4338, + "step": 9874 + }, + { + "epoch": 1.68, + "learning_rate": 8.502164412875032e-06, + "loss": 0.4113, + "step": 9875 + }, + { + "epoch": 1.68, + "learning_rate": 8.500343822986522e-06, + "loss": 0.4126, + "step": 9876 + }, + { + "epoch": 1.68, + "learning_rate": 8.498523283947019e-06, + "loss": 0.4078, + "step": 9877 + }, + { + "epoch": 1.68, + "learning_rate": 8.496702795818254e-06, + "loss": 0.4301, + "step": 9878 + }, + { + "epoch": 1.68, + "learning_rate": 8.494882358661956e-06, + "loss": 0.4084, + "step": 9879 + }, + { + "epoch": 1.69, + "learning_rate": 8.493061972539842e-06, + "loss": 0.4154, + "step": 9880 + }, + { + "epoch": 1.69, + "learning_rate": 8.491241637513644e-06, + "loss": 0.3941, + "step": 9881 + }, + { + "epoch": 1.69, + "learning_rate": 8.489421353645081e-06, + "loss": 0.4032, + "step": 9882 + }, + { + "epoch": 1.69, + "learning_rate": 8.487601120995877e-06, + "loss": 0.4229, + "step": 9883 + }, + { + "epoch": 1.69, + "learning_rate": 8.485780939627748e-06, + "loss": 0.3894, + "step": 9884 + }, + { + "epoch": 1.69, + "learning_rate": 8.48396080960241e-06, + "loss": 0.4214, + "step": 9885 + }, + { + "epoch": 1.69, + "learning_rate": 8.482140730981584e-06, + "loss": 0.442, + "step": 9886 + }, + { + "epoch": 1.69, + "learning_rate": 8.480320703826975e-06, + "loss": 0.4304, + "step": 9887 + }, + { + "epoch": 1.69, + "learning_rate": 8.4785007282003e-06, + "loss": 0.4085, + "step": 9888 + }, + { + "epoch": 1.69, + "learning_rate": 8.476680804163268e-06, + "loss": 0.4143, + "step": 9889 + }, + { + "epoch": 1.69, + "learning_rate": 8.47486093177759e-06, + "loss": 0.4238, + "step": 9890 + }, + { + "epoch": 1.69, + "learning_rate": 8.473041111104972e-06, + "loss": 0.4219, + "step": 9891 + }, + { + "epoch": 1.69, + "learning_rate": 8.471221342207113e-06, + "loss": 0.4196, + "step": 9892 + }, + { + "epoch": 1.69, + "learning_rate": 8.469401625145723e-06, + "loss": 0.4066, + "step": 9893 + }, + { + "epoch": 1.69, + "learning_rate": 8.467581959982504e-06, + "loss": 0.4196, + "step": 9894 + }, + { + "epoch": 1.69, + "learning_rate": 8.465762346779147e-06, + "loss": 0.3826, + "step": 9895 + }, + { + "epoch": 1.69, + "learning_rate": 8.463942785597356e-06, + "loss": 0.427, + "step": 9896 + }, + { + "epoch": 1.69, + "learning_rate": 8.462123276498828e-06, + "loss": 0.4143, + "step": 9897 + }, + { + "epoch": 1.69, + "learning_rate": 8.460303819545253e-06, + "loss": 0.4037, + "step": 9898 + }, + { + "epoch": 1.69, + "learning_rate": 8.458484414798331e-06, + "loss": 0.4279, + "step": 9899 + }, + { + "epoch": 1.69, + "learning_rate": 8.456665062319743e-06, + "loss": 0.3994, + "step": 9900 + }, + { + "epoch": 1.69, + "learning_rate": 8.454845762171188e-06, + "loss": 0.4267, + "step": 9901 + }, + { + "epoch": 1.69, + "learning_rate": 8.453026514414345e-06, + "loss": 0.3883, + "step": 9902 + }, + { + "epoch": 1.69, + "learning_rate": 8.451207319110902e-06, + "loss": 0.4023, + "step": 9903 + }, + { + "epoch": 1.69, + "learning_rate": 8.449388176322542e-06, + "loss": 0.4175, + "step": 9904 + }, + { + "epoch": 1.69, + "learning_rate": 8.447569086110949e-06, + "loss": 0.4224, + "step": 9905 + }, + { + "epoch": 1.69, + "learning_rate": 8.445750048537803e-06, + "loss": 0.4147, + "step": 9906 + }, + { + "epoch": 1.69, + "learning_rate": 8.443931063664784e-06, + "loss": 0.3942, + "step": 9907 + }, + { + "epoch": 1.69, + "learning_rate": 8.44211213155356e-06, + "loss": 0.3901, + "step": 9908 + }, + { + "epoch": 1.69, + "learning_rate": 8.44029325226582e-06, + "loss": 0.436, + "step": 9909 + }, + { + "epoch": 1.69, + "learning_rate": 8.438474425863222e-06, + "loss": 0.4194, + "step": 9910 + }, + { + "epoch": 1.69, + "learning_rate": 8.436655652407445e-06, + "loss": 0.4312, + "step": 9911 + }, + { + "epoch": 1.69, + "learning_rate": 8.434836931960156e-06, + "loss": 0.4276, + "step": 9912 + }, + { + "epoch": 1.69, + "learning_rate": 8.433018264583024e-06, + "loss": 0.4218, + "step": 9913 + }, + { + "epoch": 1.69, + "learning_rate": 8.431199650337718e-06, + "loss": 0.3965, + "step": 9914 + }, + { + "epoch": 1.69, + "learning_rate": 8.429381089285894e-06, + "loss": 0.4467, + "step": 9915 + }, + { + "epoch": 1.69, + "learning_rate": 8.427562581489223e-06, + "loss": 0.4306, + "step": 9916 + }, + { + "epoch": 1.69, + "learning_rate": 8.42574412700936e-06, + "loss": 0.4121, + "step": 9917 + }, + { + "epoch": 1.69, + "learning_rate": 8.423925725907962e-06, + "loss": 0.4363, + "step": 9918 + }, + { + "epoch": 1.69, + "learning_rate": 8.422107378246687e-06, + "loss": 0.4017, + "step": 9919 + }, + { + "epoch": 1.69, + "learning_rate": 8.420289084087193e-06, + "loss": 0.4237, + "step": 9920 + }, + { + "epoch": 1.69, + "learning_rate": 8.418470843491132e-06, + "loss": 0.4383, + "step": 9921 + }, + { + "epoch": 1.69, + "learning_rate": 8.416652656520154e-06, + "loss": 0.4003, + "step": 9922 + }, + { + "epoch": 1.69, + "learning_rate": 8.414834523235908e-06, + "loss": 0.405, + "step": 9923 + }, + { + "epoch": 1.69, + "learning_rate": 8.413016443700043e-06, + "loss": 0.3932, + "step": 9924 + }, + { + "epoch": 1.69, + "learning_rate": 8.411198417974206e-06, + "loss": 0.4386, + "step": 9925 + }, + { + "epoch": 1.69, + "learning_rate": 8.409380446120037e-06, + "loss": 0.4006, + "step": 9926 + }, + { + "epoch": 1.69, + "learning_rate": 8.407562528199182e-06, + "loss": 0.4077, + "step": 9927 + }, + { + "epoch": 1.69, + "learning_rate": 8.405744664273278e-06, + "loss": 0.394, + "step": 9928 + }, + { + "epoch": 1.69, + "learning_rate": 8.403926854403967e-06, + "loss": 0.4385, + "step": 9929 + }, + { + "epoch": 1.69, + "learning_rate": 8.402109098652884e-06, + "loss": 0.4517, + "step": 9930 + }, + { + "epoch": 1.69, + "learning_rate": 8.400291397081667e-06, + "loss": 0.4389, + "step": 9931 + }, + { + "epoch": 1.69, + "learning_rate": 8.398473749751944e-06, + "loss": 0.4238, + "step": 9932 + }, + { + "epoch": 1.69, + "learning_rate": 8.396656156725344e-06, + "loss": 0.4231, + "step": 9933 + }, + { + "epoch": 1.69, + "learning_rate": 8.394838618063503e-06, + "loss": 0.4078, + "step": 9934 + }, + { + "epoch": 1.69, + "learning_rate": 8.393021133828046e-06, + "loss": 0.4338, + "step": 9935 + }, + { + "epoch": 1.69, + "learning_rate": 8.391203704080598e-06, + "loss": 0.4351, + "step": 9936 + }, + { + "epoch": 1.69, + "learning_rate": 8.389386328882783e-06, + "loss": 0.4068, + "step": 9937 + }, + { + "epoch": 1.69, + "learning_rate": 8.387569008296226e-06, + "loss": 0.4493, + "step": 9938 + }, + { + "epoch": 1.7, + "learning_rate": 8.385751742382545e-06, + "loss": 0.3895, + "step": 9939 + }, + { + "epoch": 1.7, + "learning_rate": 8.383934531203356e-06, + "loss": 0.4094, + "step": 9940 + }, + { + "epoch": 1.7, + "learning_rate": 8.382117374820275e-06, + "loss": 0.4364, + "step": 9941 + }, + { + "epoch": 1.7, + "learning_rate": 8.38030027329492e-06, + "loss": 0.4264, + "step": 9942 + }, + { + "epoch": 1.7, + "learning_rate": 8.378483226688901e-06, + "loss": 0.4376, + "step": 9943 + }, + { + "epoch": 1.7, + "learning_rate": 8.376666235063831e-06, + "loss": 0.4155, + "step": 9944 + }, + { + "epoch": 1.7, + "learning_rate": 8.374849298481316e-06, + "loss": 0.4091, + "step": 9945 + }, + { + "epoch": 1.7, + "learning_rate": 8.373032417002972e-06, + "loss": 0.4389, + "step": 9946 + }, + { + "epoch": 1.7, + "learning_rate": 8.37121559069039e-06, + "loss": 0.443, + "step": 9947 + }, + { + "epoch": 1.7, + "learning_rate": 8.369398819605186e-06, + "loss": 0.4493, + "step": 9948 + }, + { + "epoch": 1.7, + "learning_rate": 8.367582103808951e-06, + "loss": 0.4017, + "step": 9949 + }, + { + "epoch": 1.7, + "learning_rate": 8.36576544336329e-06, + "loss": 0.4215, + "step": 9950 + }, + { + "epoch": 1.7, + "learning_rate": 8.363948838329802e-06, + "loss": 0.4512, + "step": 9951 + }, + { + "epoch": 1.7, + "learning_rate": 8.36213228877008e-06, + "loss": 0.4252, + "step": 9952 + }, + { + "epoch": 1.7, + "learning_rate": 8.36031579474572e-06, + "loss": 0.4554, + "step": 9953 + }, + { + "epoch": 1.7, + "learning_rate": 8.358499356318314e-06, + "loss": 0.4444, + "step": 9954 + }, + { + "epoch": 1.7, + "learning_rate": 8.35668297354945e-06, + "loss": 0.4477, + "step": 9955 + }, + { + "epoch": 1.7, + "learning_rate": 8.354866646500718e-06, + "loss": 0.4222, + "step": 9956 + }, + { + "epoch": 1.7, + "learning_rate": 8.353050375233703e-06, + "loss": 0.4514, + "step": 9957 + }, + { + "epoch": 1.7, + "learning_rate": 8.351234159809991e-06, + "loss": 0.4321, + "step": 9958 + }, + { + "epoch": 1.7, + "learning_rate": 8.349418000291163e-06, + "loss": 0.4311, + "step": 9959 + }, + { + "epoch": 1.7, + "learning_rate": 8.347601896738801e-06, + "loss": 0.4329, + "step": 9960 + }, + { + "epoch": 1.7, + "learning_rate": 8.345785849214489e-06, + "loss": 0.4329, + "step": 9961 + }, + { + "epoch": 1.7, + "learning_rate": 8.343969857779793e-06, + "loss": 0.4123, + "step": 9962 + }, + { + "epoch": 1.7, + "learning_rate": 8.342153922496294e-06, + "loss": 0.3988, + "step": 9963 + }, + { + "epoch": 1.7, + "learning_rate": 8.340338043425566e-06, + "loss": 0.4323, + "step": 9964 + }, + { + "epoch": 1.7, + "learning_rate": 8.338522220629179e-06, + "loss": 0.4388, + "step": 9965 + }, + { + "epoch": 1.7, + "learning_rate": 8.336706454168701e-06, + "loss": 0.4155, + "step": 9966 + }, + { + "epoch": 1.7, + "learning_rate": 8.3348907441057e-06, + "loss": 0.4195, + "step": 9967 + }, + { + "epoch": 1.7, + "learning_rate": 8.333075090501742e-06, + "loss": 0.4201, + "step": 9968 + }, + { + "epoch": 1.7, + "learning_rate": 8.331259493418395e-06, + "loss": 0.4044, + "step": 9969 + }, + { + "epoch": 1.7, + "learning_rate": 8.329443952917213e-06, + "loss": 0.4164, + "step": 9970 + }, + { + "epoch": 1.7, + "learning_rate": 8.327628469059761e-06, + "loss": 0.4496, + "step": 9971 + }, + { + "epoch": 1.7, + "learning_rate": 8.325813041907592e-06, + "loss": 0.4527, + "step": 9972 + }, + { + "epoch": 1.7, + "learning_rate": 8.323997671522266e-06, + "loss": 0.4188, + "step": 9973 + }, + { + "epoch": 1.7, + "learning_rate": 8.322182357965335e-06, + "loss": 0.4178, + "step": 9974 + }, + { + "epoch": 1.7, + "learning_rate": 8.320367101298351e-06, + "loss": 0.4267, + "step": 9975 + }, + { + "epoch": 1.7, + "learning_rate": 8.31855190158287e-06, + "loss": 0.4344, + "step": 9976 + }, + { + "epoch": 1.7, + "learning_rate": 8.31673675888043e-06, + "loss": 0.4208, + "step": 9977 + }, + { + "epoch": 1.7, + "learning_rate": 8.314921673252583e-06, + "loss": 0.3959, + "step": 9978 + }, + { + "epoch": 1.7, + "learning_rate": 8.313106644760874e-06, + "loss": 0.4137, + "step": 9979 + }, + { + "epoch": 1.7, + "learning_rate": 8.311291673466843e-06, + "loss": 0.3943, + "step": 9980 + }, + { + "epoch": 1.7, + "learning_rate": 8.30947675943203e-06, + "loss": 0.4271, + "step": 9981 + }, + { + "epoch": 1.7, + "learning_rate": 8.307661902717976e-06, + "loss": 0.4489, + "step": 9982 + }, + { + "epoch": 1.7, + "learning_rate": 8.305847103386217e-06, + "loss": 0.4093, + "step": 9983 + }, + { + "epoch": 1.7, + "learning_rate": 8.30403236149829e-06, + "loss": 0.4406, + "step": 9984 + }, + { + "epoch": 1.7, + "learning_rate": 8.30221767711572e-06, + "loss": 0.4019, + "step": 9985 + }, + { + "epoch": 1.7, + "learning_rate": 8.300403050300043e-06, + "loss": 0.4216, + "step": 9986 + }, + { + "epoch": 1.7, + "learning_rate": 8.298588481112789e-06, + "loss": 0.4437, + "step": 9987 + }, + { + "epoch": 1.7, + "learning_rate": 8.296773969615482e-06, + "loss": 0.4282, + "step": 9988 + }, + { + "epoch": 1.7, + "learning_rate": 8.294959515869647e-06, + "loss": 0.4216, + "step": 9989 + }, + { + "epoch": 1.7, + "learning_rate": 8.293145119936808e-06, + "loss": 0.41, + "step": 9990 + }, + { + "epoch": 1.7, + "learning_rate": 8.291330781878489e-06, + "loss": 0.4102, + "step": 9991 + }, + { + "epoch": 1.7, + "learning_rate": 8.2895165017562e-06, + "loss": 0.3958, + "step": 9992 + }, + { + "epoch": 1.7, + "learning_rate": 8.287702279631467e-06, + "loss": 0.4175, + "step": 9993 + }, + { + "epoch": 1.7, + "learning_rate": 8.2858881155658e-06, + "loss": 0.4463, + "step": 9994 + }, + { + "epoch": 1.7, + "learning_rate": 8.284074009620715e-06, + "loss": 0.4201, + "step": 9995 + }, + { + "epoch": 1.7, + "learning_rate": 8.28225996185772e-06, + "loss": 0.4321, + "step": 9996 + }, + { + "epoch": 1.7, + "learning_rate": 8.280445972338325e-06, + "loss": 0.4352, + "step": 9997 + }, + { + "epoch": 1.71, + "learning_rate": 8.278632041124039e-06, + "loss": 0.3857, + "step": 9998 + }, + { + "epoch": 1.71, + "learning_rate": 8.27681816827637e-06, + "loss": 0.4599, + "step": 9999 + }, + { + "epoch": 1.71, + "learning_rate": 8.275004353856815e-06, + "loss": 0.4076, + "step": 10000 + }, + { + "epoch": 1.71, + "learning_rate": 8.273190597926875e-06, + "loss": 0.4527, + "step": 10001 + }, + { + "epoch": 1.71, + "learning_rate": 8.271376900548053e-06, + "loss": 0.3904, + "step": 10002 + }, + { + "epoch": 1.71, + "learning_rate": 8.269563261781846e-06, + "loss": 0.4577, + "step": 10003 + }, + { + "epoch": 1.71, + "learning_rate": 8.267749681689747e-06, + "loss": 0.4301, + "step": 10004 + }, + { + "epoch": 1.71, + "learning_rate": 8.26593616033325e-06, + "loss": 0.4121, + "step": 10005 + }, + { + "epoch": 1.71, + "learning_rate": 8.264122697773853e-06, + "loss": 0.4296, + "step": 10006 + }, + { + "epoch": 1.71, + "learning_rate": 8.262309294073033e-06, + "loss": 0.4015, + "step": 10007 + }, + { + "epoch": 1.71, + "learning_rate": 8.260495949292282e-06, + "loss": 0.4511, + "step": 10008 + }, + { + "epoch": 1.71, + "learning_rate": 8.25868266349309e-06, + "loss": 0.4331, + "step": 10009 + }, + { + "epoch": 1.71, + "learning_rate": 8.256869436736934e-06, + "loss": 0.4301, + "step": 10010 + }, + { + "epoch": 1.71, + "learning_rate": 8.255056269085301e-06, + "loss": 0.4677, + "step": 10011 + }, + { + "epoch": 1.71, + "learning_rate": 8.253243160599666e-06, + "loss": 0.4238, + "step": 10012 + }, + { + "epoch": 1.71, + "learning_rate": 8.251430111341508e-06, + "loss": 0.438, + "step": 10013 + }, + { + "epoch": 1.71, + "learning_rate": 8.249617121372304e-06, + "loss": 0.4288, + "step": 10014 + }, + { + "epoch": 1.71, + "learning_rate": 8.247804190753524e-06, + "loss": 0.4129, + "step": 10015 + }, + { + "epoch": 1.71, + "learning_rate": 8.245991319546638e-06, + "loss": 0.4685, + "step": 10016 + }, + { + "epoch": 1.71, + "learning_rate": 8.24417850781312e-06, + "loss": 0.4449, + "step": 10017 + }, + { + "epoch": 1.71, + "learning_rate": 8.242365755614435e-06, + "loss": 0.4252, + "step": 10018 + }, + { + "epoch": 1.71, + "learning_rate": 8.240553063012045e-06, + "loss": 0.4089, + "step": 10019 + }, + { + "epoch": 1.71, + "learning_rate": 8.238740430067419e-06, + "loss": 0.4188, + "step": 10020 + }, + { + "epoch": 1.71, + "learning_rate": 8.236927856842019e-06, + "loss": 0.4155, + "step": 10021 + }, + { + "epoch": 1.71, + "learning_rate": 8.235115343397295e-06, + "loss": 0.4126, + "step": 10022 + }, + { + "epoch": 1.71, + "learning_rate": 8.23330288979471e-06, + "loss": 0.3798, + "step": 10023 + }, + { + "epoch": 1.71, + "learning_rate": 8.23149049609572e-06, + "loss": 0.4688, + "step": 10024 + }, + { + "epoch": 1.71, + "learning_rate": 8.229678162361776e-06, + "loss": 0.4101, + "step": 10025 + }, + { + "epoch": 1.71, + "learning_rate": 8.22786588865433e-06, + "loss": 0.3752, + "step": 10026 + }, + { + "epoch": 1.71, + "learning_rate": 8.22605367503483e-06, + "loss": 0.4214, + "step": 10027 + }, + { + "epoch": 1.71, + "learning_rate": 8.224241521564724e-06, + "loss": 0.4225, + "step": 10028 + }, + { + "epoch": 1.71, + "learning_rate": 8.222429428305461e-06, + "loss": 0.4261, + "step": 10029 + }, + { + "epoch": 1.71, + "learning_rate": 8.220617395318474e-06, + "loss": 0.4158, + "step": 10030 + }, + { + "epoch": 1.71, + "learning_rate": 8.218805422665211e-06, + "loss": 0.3856, + "step": 10031 + }, + { + "epoch": 1.71, + "learning_rate": 8.216993510407106e-06, + "loss": 0.4169, + "step": 10032 + }, + { + "epoch": 1.71, + "learning_rate": 8.2151816586056e-06, + "loss": 0.4056, + "step": 10033 + }, + { + "epoch": 1.71, + "learning_rate": 8.21336986732213e-06, + "loss": 0.4212, + "step": 10034 + }, + { + "epoch": 1.71, + "learning_rate": 8.211558136618121e-06, + "loss": 0.4085, + "step": 10035 + }, + { + "epoch": 1.71, + "learning_rate": 8.20974646655501e-06, + "loss": 0.4145, + "step": 10036 + }, + { + "epoch": 1.71, + "learning_rate": 8.207934857194222e-06, + "loss": 0.4221, + "step": 10037 + }, + { + "epoch": 1.71, + "learning_rate": 8.206123308597183e-06, + "loss": 0.4409, + "step": 10038 + }, + { + "epoch": 1.71, + "learning_rate": 8.204311820825318e-06, + "loss": 0.4427, + "step": 10039 + }, + { + "epoch": 1.71, + "learning_rate": 8.20250039394005e-06, + "loss": 0.4246, + "step": 10040 + }, + { + "epoch": 1.71, + "learning_rate": 8.2006890280028e-06, + "loss": 0.4056, + "step": 10041 + }, + { + "epoch": 1.71, + "learning_rate": 8.198877723074989e-06, + "loss": 0.4251, + "step": 10042 + }, + { + "epoch": 1.71, + "learning_rate": 8.197066479218026e-06, + "loss": 0.4354, + "step": 10043 + }, + { + "epoch": 1.71, + "learning_rate": 8.195255296493332e-06, + "loss": 0.4296, + "step": 10044 + }, + { + "epoch": 1.71, + "learning_rate": 8.193444174962313e-06, + "loss": 0.427, + "step": 10045 + }, + { + "epoch": 1.71, + "learning_rate": 8.19163311468638e-06, + "loss": 0.4576, + "step": 10046 + }, + { + "epoch": 1.71, + "learning_rate": 8.189822115726943e-06, + "loss": 0.3824, + "step": 10047 + }, + { + "epoch": 1.71, + "learning_rate": 8.188011178145407e-06, + "loss": 0.452, + "step": 10048 + }, + { + "epoch": 1.71, + "learning_rate": 8.186200302003176e-06, + "loss": 0.4423, + "step": 10049 + }, + { + "epoch": 1.71, + "learning_rate": 8.184389487361654e-06, + "loss": 0.437, + "step": 10050 + }, + { + "epoch": 1.71, + "learning_rate": 8.182578734282235e-06, + "loss": 0.4427, + "step": 10051 + }, + { + "epoch": 1.71, + "learning_rate": 8.180768042826323e-06, + "loss": 0.4125, + "step": 10052 + }, + { + "epoch": 1.71, + "learning_rate": 8.178957413055307e-06, + "loss": 0.4078, + "step": 10053 + }, + { + "epoch": 1.71, + "learning_rate": 8.177146845030581e-06, + "loss": 0.4066, + "step": 10054 + }, + { + "epoch": 1.71, + "learning_rate": 8.175336338813538e-06, + "loss": 0.4169, + "step": 10055 + }, + { + "epoch": 1.72, + "learning_rate": 8.173525894465566e-06, + "loss": 0.4034, + "step": 10056 + }, + { + "epoch": 1.72, + "learning_rate": 8.171715512048055e-06, + "loss": 0.4115, + "step": 10057 + }, + { + "epoch": 1.72, + "learning_rate": 8.16990519162239e-06, + "loss": 0.3992, + "step": 10058 + }, + { + "epoch": 1.72, + "learning_rate": 8.16809493324995e-06, + "loss": 0.4155, + "step": 10059 + }, + { + "epoch": 1.72, + "learning_rate": 8.166284736992117e-06, + "loss": 0.4248, + "step": 10060 + }, + { + "epoch": 1.72, + "learning_rate": 8.164474602910268e-06, + "loss": 0.4241, + "step": 10061 + }, + { + "epoch": 1.72, + "learning_rate": 8.16266453106578e-06, + "loss": 0.4337, + "step": 10062 + }, + { + "epoch": 1.72, + "learning_rate": 8.16085452152003e-06, + "loss": 0.4348, + "step": 10063 + }, + { + "epoch": 1.72, + "learning_rate": 8.159044574334388e-06, + "loss": 0.4169, + "step": 10064 + }, + { + "epoch": 1.72, + "learning_rate": 8.157234689570226e-06, + "loss": 0.4301, + "step": 10065 + }, + { + "epoch": 1.72, + "learning_rate": 8.155424867288907e-06, + "loss": 0.4267, + "step": 10066 + }, + { + "epoch": 1.72, + "learning_rate": 8.153615107551804e-06, + "loss": 0.4083, + "step": 10067 + }, + { + "epoch": 1.72, + "learning_rate": 8.151805410420278e-06, + "loss": 0.4017, + "step": 10068 + }, + { + "epoch": 1.72, + "learning_rate": 8.149995775955686e-06, + "loss": 0.4304, + "step": 10069 + }, + { + "epoch": 1.72, + "learning_rate": 8.148186204219392e-06, + "loss": 0.4151, + "step": 10070 + }, + { + "epoch": 1.72, + "learning_rate": 8.146376695272753e-06, + "loss": 0.4318, + "step": 10071 + }, + { + "epoch": 1.72, + "learning_rate": 8.144567249177121e-06, + "loss": 0.4031, + "step": 10072 + }, + { + "epoch": 1.72, + "learning_rate": 8.142757865993858e-06, + "loss": 0.4601, + "step": 10073 + }, + { + "epoch": 1.72, + "learning_rate": 8.140948545784306e-06, + "loss": 0.4222, + "step": 10074 + }, + { + "epoch": 1.72, + "learning_rate": 8.139139288609817e-06, + "loss": 0.3915, + "step": 10075 + }, + { + "epoch": 1.72, + "learning_rate": 8.137330094531736e-06, + "loss": 0.4278, + "step": 10076 + }, + { + "epoch": 1.72, + "learning_rate": 8.13552096361141e-06, + "loss": 0.4109, + "step": 10077 + }, + { + "epoch": 1.72, + "learning_rate": 8.133711895910179e-06, + "loss": 0.4511, + "step": 10078 + }, + { + "epoch": 1.72, + "learning_rate": 8.131902891489383e-06, + "loss": 0.4164, + "step": 10079 + }, + { + "epoch": 1.72, + "learning_rate": 8.130093950410362e-06, + "loss": 0.3937, + "step": 10080 + }, + { + "epoch": 1.72, + "learning_rate": 8.128285072734454e-06, + "loss": 0.3771, + "step": 10081 + }, + { + "epoch": 1.72, + "learning_rate": 8.126476258522992e-06, + "loss": 0.4294, + "step": 10082 + }, + { + "epoch": 1.72, + "learning_rate": 8.124667507837304e-06, + "loss": 0.4043, + "step": 10083 + }, + { + "epoch": 1.72, + "learning_rate": 8.12285882073872e-06, + "loss": 0.4186, + "step": 10084 + }, + { + "epoch": 1.72, + "learning_rate": 8.121050197288569e-06, + "loss": 0.4452, + "step": 10085 + }, + { + "epoch": 1.72, + "learning_rate": 8.119241637548176e-06, + "loss": 0.4495, + "step": 10086 + }, + { + "epoch": 1.72, + "learning_rate": 8.117433141578865e-06, + "loss": 0.4251, + "step": 10087 + }, + { + "epoch": 1.72, + "learning_rate": 8.115624709441955e-06, + "loss": 0.4421, + "step": 10088 + }, + { + "epoch": 1.72, + "learning_rate": 8.11381634119877e-06, + "loss": 0.4144, + "step": 10089 + }, + { + "epoch": 1.72, + "learning_rate": 8.11200803691062e-06, + "loss": 0.389, + "step": 10090 + }, + { + "epoch": 1.72, + "learning_rate": 8.11019979663882e-06, + "loss": 0.4197, + "step": 10091 + }, + { + "epoch": 1.72, + "learning_rate": 8.108391620444684e-06, + "loss": 0.4303, + "step": 10092 + }, + { + "epoch": 1.72, + "learning_rate": 8.106583508389522e-06, + "loss": 0.4435, + "step": 10093 + }, + { + "epoch": 1.72, + "learning_rate": 8.104775460534641e-06, + "loss": 0.4402, + "step": 10094 + }, + { + "epoch": 1.72, + "learning_rate": 8.102967476941349e-06, + "loss": 0.4214, + "step": 10095 + }, + { + "epoch": 1.72, + "learning_rate": 8.101159557670945e-06, + "loss": 0.432, + "step": 10096 + }, + { + "epoch": 1.72, + "learning_rate": 8.099351702784739e-06, + "loss": 0.4174, + "step": 10097 + }, + { + "epoch": 1.72, + "learning_rate": 8.09754391234402e-06, + "loss": 0.4465, + "step": 10098 + }, + { + "epoch": 1.72, + "learning_rate": 8.095736186410091e-06, + "loss": 0.4394, + "step": 10099 + }, + { + "epoch": 1.72, + "learning_rate": 8.093928525044244e-06, + "loss": 0.4176, + "step": 10100 + }, + { + "epoch": 1.72, + "learning_rate": 8.092120928307772e-06, + "loss": 0.4267, + "step": 10101 + }, + { + "epoch": 1.72, + "learning_rate": 8.090313396261965e-06, + "loss": 0.4123, + "step": 10102 + }, + { + "epoch": 1.72, + "learning_rate": 8.088505928968114e-06, + "loss": 0.4289, + "step": 10103 + }, + { + "epoch": 1.72, + "learning_rate": 8.086698526487507e-06, + "loss": 0.4513, + "step": 10104 + }, + { + "epoch": 1.72, + "learning_rate": 8.084891188881418e-06, + "loss": 0.4263, + "step": 10105 + }, + { + "epoch": 1.72, + "learning_rate": 8.083083916211137e-06, + "loss": 0.4311, + "step": 10106 + }, + { + "epoch": 1.72, + "learning_rate": 8.081276708537942e-06, + "loss": 0.4292, + "step": 10107 + }, + { + "epoch": 1.72, + "learning_rate": 8.079469565923107e-06, + "loss": 0.4223, + "step": 10108 + }, + { + "epoch": 1.72, + "learning_rate": 8.077662488427908e-06, + "loss": 0.3874, + "step": 10109 + }, + { + "epoch": 1.72, + "learning_rate": 8.075855476113622e-06, + "loss": 0.4502, + "step": 10110 + }, + { + "epoch": 1.72, + "learning_rate": 8.074048529041514e-06, + "loss": 0.4163, + "step": 10111 + }, + { + "epoch": 1.72, + "learning_rate": 8.072241647272858e-06, + "loss": 0.4375, + "step": 10112 + }, + { + "epoch": 1.72, + "learning_rate": 8.070434830868915e-06, + "loss": 0.4191, + "step": 10113 + }, + { + "epoch": 1.72, + "learning_rate": 8.06862807989095e-06, + "loss": 0.4207, + "step": 10114 + }, + { + "epoch": 1.73, + "learning_rate": 8.066821394400229e-06, + "loss": 0.4659, + "step": 10115 + }, + { + "epoch": 1.73, + "learning_rate": 8.065014774458004e-06, + "loss": 0.407, + "step": 10116 + }, + { + "epoch": 1.73, + "learning_rate": 8.063208220125538e-06, + "loss": 0.441, + "step": 10117 + }, + { + "epoch": 1.73, + "learning_rate": 8.061401731464083e-06, + "loss": 0.4214, + "step": 10118 + }, + { + "epoch": 1.73, + "learning_rate": 8.059595308534898e-06, + "loss": 0.3692, + "step": 10119 + }, + { + "epoch": 1.73, + "learning_rate": 8.057788951399226e-06, + "loss": 0.3831, + "step": 10120 + }, + { + "epoch": 1.73, + "learning_rate": 8.055982660118315e-06, + "loss": 0.4484, + "step": 10121 + }, + { + "epoch": 1.73, + "learning_rate": 8.054176434753417e-06, + "loss": 0.4125, + "step": 10122 + }, + { + "epoch": 1.73, + "learning_rate": 8.052370275365775e-06, + "loss": 0.4367, + "step": 10123 + }, + { + "epoch": 1.73, + "learning_rate": 8.050564182016625e-06, + "loss": 0.4253, + "step": 10124 + }, + { + "epoch": 1.73, + "learning_rate": 8.04875815476721e-06, + "loss": 0.4189, + "step": 10125 + }, + { + "epoch": 1.73, + "learning_rate": 8.04695219367877e-06, + "loss": 0.4093, + "step": 10126 + }, + { + "epoch": 1.73, + "learning_rate": 8.045146298812539e-06, + "loss": 0.4267, + "step": 10127 + }, + { + "epoch": 1.73, + "learning_rate": 8.043340470229745e-06, + "loss": 0.4163, + "step": 10128 + }, + { + "epoch": 1.73, + "learning_rate": 8.041534707991622e-06, + "loss": 0.4016, + "step": 10129 + }, + { + "epoch": 1.73, + "learning_rate": 8.039729012159398e-06, + "loss": 0.3844, + "step": 10130 + }, + { + "epoch": 1.73, + "learning_rate": 8.037923382794297e-06, + "loss": 0.4285, + "step": 10131 + }, + { + "epoch": 1.73, + "learning_rate": 8.036117819957543e-06, + "loss": 0.3975, + "step": 10132 + }, + { + "epoch": 1.73, + "learning_rate": 8.034312323710361e-06, + "loss": 0.3953, + "step": 10133 + }, + { + "epoch": 1.73, + "learning_rate": 8.03250689411397e-06, + "loss": 0.4099, + "step": 10134 + }, + { + "epoch": 1.73, + "learning_rate": 8.030701531229583e-06, + "loss": 0.3862, + "step": 10135 + }, + { + "epoch": 1.73, + "learning_rate": 8.028896235118412e-06, + "loss": 0.404, + "step": 10136 + }, + { + "epoch": 1.73, + "learning_rate": 8.027091005841678e-06, + "loss": 0.4003, + "step": 10137 + }, + { + "epoch": 1.73, + "learning_rate": 8.025285843460586e-06, + "loss": 0.4084, + "step": 10138 + }, + { + "epoch": 1.73, + "learning_rate": 8.023480748036343e-06, + "loss": 0.429, + "step": 10139 + }, + { + "epoch": 1.73, + "learning_rate": 8.021675719630157e-06, + "loss": 0.4349, + "step": 10140 + }, + { + "epoch": 1.73, + "learning_rate": 8.01987075830323e-06, + "loss": 0.4115, + "step": 10141 + }, + { + "epoch": 1.73, + "learning_rate": 8.018065864116768e-06, + "loss": 0.4123, + "step": 10142 + }, + { + "epoch": 1.73, + "learning_rate": 8.01626103713196e-06, + "loss": 0.4248, + "step": 10143 + }, + { + "epoch": 1.73, + "learning_rate": 8.01445627741001e-06, + "loss": 0.4054, + "step": 10144 + }, + { + "epoch": 1.73, + "learning_rate": 8.012651585012107e-06, + "loss": 0.4065, + "step": 10145 + }, + { + "epoch": 1.73, + "learning_rate": 8.01084695999945e-06, + "loss": 0.3905, + "step": 10146 + }, + { + "epoch": 1.73, + "learning_rate": 8.009042402433221e-06, + "loss": 0.4154, + "step": 10147 + }, + { + "epoch": 1.73, + "learning_rate": 8.007237912374612e-06, + "loss": 0.4426, + "step": 10148 + }, + { + "epoch": 1.73, + "learning_rate": 8.00543348988481e-06, + "loss": 0.4061, + "step": 10149 + }, + { + "epoch": 1.73, + "learning_rate": 8.00362913502499e-06, + "loss": 0.4135, + "step": 10150 + }, + { + "epoch": 1.73, + "learning_rate": 8.001824847856337e-06, + "loss": 0.4084, + "step": 10151 + }, + { + "epoch": 1.73, + "learning_rate": 8.00002062844003e-06, + "loss": 0.3957, + "step": 10152 + }, + { + "epoch": 1.73, + "learning_rate": 7.998216476837245e-06, + "loss": 0.4266, + "step": 10153 + }, + { + "epoch": 1.73, + "learning_rate": 7.996412393109154e-06, + "loss": 0.4344, + "step": 10154 + }, + { + "epoch": 1.73, + "learning_rate": 7.994608377316928e-06, + "loss": 0.3918, + "step": 10155 + }, + { + "epoch": 1.73, + "learning_rate": 7.992804429521737e-06, + "loss": 0.414, + "step": 10156 + }, + { + "epoch": 1.73, + "learning_rate": 7.991000549784753e-06, + "loss": 0.42, + "step": 10157 + }, + { + "epoch": 1.73, + "learning_rate": 7.989196738167128e-06, + "loss": 0.4216, + "step": 10158 + }, + { + "epoch": 1.73, + "learning_rate": 7.987392994730032e-06, + "loss": 0.412, + "step": 10159 + }, + { + "epoch": 1.73, + "learning_rate": 7.985589319534623e-06, + "loss": 0.4575, + "step": 10160 + }, + { + "epoch": 1.73, + "learning_rate": 7.98378571264206e-06, + "loss": 0.4295, + "step": 10161 + }, + { + "epoch": 1.73, + "learning_rate": 7.981982174113499e-06, + "loss": 0.4251, + "step": 10162 + }, + { + "epoch": 1.73, + "learning_rate": 7.980178704010089e-06, + "loss": 0.3923, + "step": 10163 + }, + { + "epoch": 1.73, + "learning_rate": 7.978375302392986e-06, + "loss": 0.4494, + "step": 10164 + }, + { + "epoch": 1.73, + "learning_rate": 7.97657196932333e-06, + "loss": 0.4213, + "step": 10165 + }, + { + "epoch": 1.73, + "learning_rate": 7.974768704862273e-06, + "loss": 0.4095, + "step": 10166 + }, + { + "epoch": 1.73, + "learning_rate": 7.972965509070954e-06, + "loss": 0.4209, + "step": 10167 + }, + { + "epoch": 1.73, + "learning_rate": 7.97116238201052e-06, + "loss": 0.4417, + "step": 10168 + }, + { + "epoch": 1.73, + "learning_rate": 7.969359323742108e-06, + "loss": 0.4454, + "step": 10169 + }, + { + "epoch": 1.73, + "learning_rate": 7.96755633432685e-06, + "loss": 0.4378, + "step": 10170 + }, + { + "epoch": 1.73, + "learning_rate": 7.965753413825884e-06, + "loss": 0.4165, + "step": 10171 + }, + { + "epoch": 1.73, + "learning_rate": 7.963950562300344e-06, + "loss": 0.4299, + "step": 10172 + }, + { + "epoch": 1.73, + "learning_rate": 7.962147779811355e-06, + "loss": 0.4291, + "step": 10173 + }, + { + "epoch": 1.74, + "learning_rate": 7.960345066420045e-06, + "loss": 0.4073, + "step": 10174 + }, + { + "epoch": 1.74, + "learning_rate": 7.958542422187538e-06, + "loss": 0.4442, + "step": 10175 + }, + { + "epoch": 1.74, + "learning_rate": 7.95673984717496e-06, + "loss": 0.4554, + "step": 10176 + }, + { + "epoch": 1.74, + "learning_rate": 7.954937341443432e-06, + "loss": 0.409, + "step": 10177 + }, + { + "epoch": 1.74, + "learning_rate": 7.953134905054065e-06, + "loss": 0.469, + "step": 10178 + }, + { + "epoch": 1.74, + "learning_rate": 7.951332538067982e-06, + "loss": 0.4254, + "step": 10179 + }, + { + "epoch": 1.74, + "learning_rate": 7.94953024054629e-06, + "loss": 0.4065, + "step": 10180 + }, + { + "epoch": 1.74, + "learning_rate": 7.947728012550103e-06, + "loss": 0.3966, + "step": 10181 + }, + { + "epoch": 1.74, + "learning_rate": 7.945925854140527e-06, + "loss": 0.4273, + "step": 10182 + }, + { + "epoch": 1.74, + "learning_rate": 7.94412376537867e-06, + "loss": 0.4216, + "step": 10183 + }, + { + "epoch": 1.74, + "learning_rate": 7.942321746325635e-06, + "loss": 0.4177, + "step": 10184 + }, + { + "epoch": 1.74, + "learning_rate": 7.940519797042526e-06, + "loss": 0.4299, + "step": 10185 + }, + { + "epoch": 1.74, + "learning_rate": 7.938717917590437e-06, + "loss": 0.3872, + "step": 10186 + }, + { + "epoch": 1.74, + "learning_rate": 7.936916108030471e-06, + "loss": 0.4212, + "step": 10187 + }, + { + "epoch": 1.74, + "learning_rate": 7.935114368423714e-06, + "loss": 0.4109, + "step": 10188 + }, + { + "epoch": 1.74, + "learning_rate": 7.93331269883126e-06, + "loss": 0.3742, + "step": 10189 + }, + { + "epoch": 1.74, + "learning_rate": 7.931511099314202e-06, + "loss": 0.4183, + "step": 10190 + }, + { + "epoch": 1.74, + "learning_rate": 7.929709569933624e-06, + "loss": 0.4239, + "step": 10191 + }, + { + "epoch": 1.74, + "learning_rate": 7.927908110750613e-06, + "loss": 0.3942, + "step": 10192 + }, + { + "epoch": 1.74, + "learning_rate": 7.92610672182625e-06, + "loss": 0.403, + "step": 10193 + }, + { + "epoch": 1.74, + "learning_rate": 7.924305403221617e-06, + "loss": 0.4342, + "step": 10194 + }, + { + "epoch": 1.74, + "learning_rate": 7.922504154997788e-06, + "loss": 0.4243, + "step": 10195 + }, + { + "epoch": 1.74, + "learning_rate": 7.920702977215839e-06, + "loss": 0.4214, + "step": 10196 + }, + { + "epoch": 1.74, + "learning_rate": 7.918901869936844e-06, + "loss": 0.4273, + "step": 10197 + }, + { + "epoch": 1.74, + "learning_rate": 7.917100833221871e-06, + "loss": 0.4131, + "step": 10198 + }, + { + "epoch": 1.74, + "learning_rate": 7.91529986713199e-06, + "loss": 0.4033, + "step": 10199 + }, + { + "epoch": 1.74, + "learning_rate": 7.913498971728267e-06, + "loss": 0.394, + "step": 10200 + }, + { + "epoch": 1.74, + "learning_rate": 7.911698147071766e-06, + "loss": 0.4014, + "step": 10201 + }, + { + "epoch": 1.74, + "learning_rate": 7.909897393223545e-06, + "loss": 0.416, + "step": 10202 + }, + { + "epoch": 1.74, + "learning_rate": 7.908096710244665e-06, + "loss": 0.4197, + "step": 10203 + }, + { + "epoch": 1.74, + "learning_rate": 7.906296098196179e-06, + "loss": 0.408, + "step": 10204 + }, + { + "epoch": 1.74, + "learning_rate": 7.90449555713914e-06, + "loss": 0.4423, + "step": 10205 + }, + { + "epoch": 1.74, + "learning_rate": 7.902695087134602e-06, + "loss": 0.4166, + "step": 10206 + }, + { + "epoch": 1.74, + "learning_rate": 7.900894688243614e-06, + "loss": 0.4351, + "step": 10207 + }, + { + "epoch": 1.74, + "learning_rate": 7.89909436052722e-06, + "loss": 0.4425, + "step": 10208 + }, + { + "epoch": 1.74, + "learning_rate": 7.89729410404647e-06, + "loss": 0.4004, + "step": 10209 + }, + { + "epoch": 1.74, + "learning_rate": 7.895493918862395e-06, + "loss": 0.4119, + "step": 10210 + }, + { + "epoch": 1.74, + "learning_rate": 7.893693805036042e-06, + "loss": 0.3797, + "step": 10211 + }, + { + "epoch": 1.74, + "learning_rate": 7.891893762628444e-06, + "loss": 0.367, + "step": 10212 + }, + { + "epoch": 1.74, + "learning_rate": 7.890093791700637e-06, + "loss": 0.4006, + "step": 10213 + }, + { + "epoch": 1.74, + "learning_rate": 7.888293892313653e-06, + "loss": 0.4312, + "step": 10214 + }, + { + "epoch": 1.74, + "learning_rate": 7.886494064528518e-06, + "loss": 0.4323, + "step": 10215 + }, + { + "epoch": 1.74, + "learning_rate": 7.884694308406266e-06, + "loss": 0.4248, + "step": 10216 + }, + { + "epoch": 1.74, + "learning_rate": 7.882894624007918e-06, + "loss": 0.418, + "step": 10217 + }, + { + "epoch": 1.74, + "learning_rate": 7.88109501139449e-06, + "loss": 0.4062, + "step": 10218 + }, + { + "epoch": 1.74, + "learning_rate": 7.879295470627014e-06, + "loss": 0.4426, + "step": 10219 + }, + { + "epoch": 1.74, + "learning_rate": 7.877496001766493e-06, + "loss": 0.391, + "step": 10220 + }, + { + "epoch": 1.74, + "learning_rate": 7.875696604873953e-06, + "loss": 0.4335, + "step": 10221 + }, + { + "epoch": 1.74, + "learning_rate": 7.8738972800104e-06, + "loss": 0.4136, + "step": 10222 + }, + { + "epoch": 1.74, + "learning_rate": 7.872098027236847e-06, + "loss": 0.366, + "step": 10223 + }, + { + "epoch": 1.74, + "learning_rate": 7.870298846614306e-06, + "loss": 0.4203, + "step": 10224 + }, + { + "epoch": 1.74, + "learning_rate": 7.86849973820377e-06, + "loss": 0.4114, + "step": 10225 + }, + { + "epoch": 1.74, + "learning_rate": 7.86670070206625e-06, + "loss": 0.4232, + "step": 10226 + }, + { + "epoch": 1.74, + "learning_rate": 7.864901738262747e-06, + "loss": 0.4488, + "step": 10227 + }, + { + "epoch": 1.74, + "learning_rate": 7.863102846854254e-06, + "loss": 0.4081, + "step": 10228 + }, + { + "epoch": 1.74, + "learning_rate": 7.861304027901766e-06, + "loss": 0.4547, + "step": 10229 + }, + { + "epoch": 1.74, + "learning_rate": 7.859505281466282e-06, + "loss": 0.4324, + "step": 10230 + }, + { + "epoch": 1.74, + "learning_rate": 7.857706607608786e-06, + "loss": 0.4429, + "step": 10231 + }, + { + "epoch": 1.75, + "learning_rate": 7.855908006390273e-06, + "loss": 0.4344, + "step": 10232 + }, + { + "epoch": 1.75, + "learning_rate": 7.85410947787172e-06, + "loss": 0.4211, + "step": 10233 + }, + { + "epoch": 1.75, + "learning_rate": 7.852311022114114e-06, + "loss": 0.4288, + "step": 10234 + }, + { + "epoch": 1.75, + "learning_rate": 7.850512639178433e-06, + "loss": 0.4338, + "step": 10235 + }, + { + "epoch": 1.75, + "learning_rate": 7.848714329125658e-06, + "loss": 0.4453, + "step": 10236 + }, + { + "epoch": 1.75, + "learning_rate": 7.846916092016762e-06, + "loss": 0.437, + "step": 10237 + }, + { + "epoch": 1.75, + "learning_rate": 7.84511792791272e-06, + "loss": 0.4479, + "step": 10238 + }, + { + "epoch": 1.75, + "learning_rate": 7.843319836874502e-06, + "loss": 0.387, + "step": 10239 + }, + { + "epoch": 1.75, + "learning_rate": 7.841521818963082e-06, + "loss": 0.4191, + "step": 10240 + }, + { + "epoch": 1.75, + "learning_rate": 7.839723874239414e-06, + "loss": 0.4036, + "step": 10241 + }, + { + "epoch": 1.75, + "learning_rate": 7.837926002764468e-06, + "loss": 0.4253, + "step": 10242 + }, + { + "epoch": 1.75, + "learning_rate": 7.836128204599202e-06, + "loss": 0.4027, + "step": 10243 + }, + { + "epoch": 1.75, + "learning_rate": 7.834330479804575e-06, + "loss": 0.4051, + "step": 10244 + }, + { + "epoch": 1.75, + "learning_rate": 7.832532828441544e-06, + "loss": 0.4216, + "step": 10245 + }, + { + "epoch": 1.75, + "learning_rate": 7.83073525057106e-06, + "loss": 0.4339, + "step": 10246 + }, + { + "epoch": 1.75, + "learning_rate": 7.828937746254082e-06, + "loss": 0.4345, + "step": 10247 + }, + { + "epoch": 1.75, + "learning_rate": 7.827140315551544e-06, + "loss": 0.4121, + "step": 10248 + }, + { + "epoch": 1.75, + "learning_rate": 7.8253429585244e-06, + "loss": 0.4228, + "step": 10249 + }, + { + "epoch": 1.75, + "learning_rate": 7.823545675233595e-06, + "loss": 0.3814, + "step": 10250 + }, + { + "epoch": 1.75, + "learning_rate": 7.821748465740063e-06, + "loss": 0.4225, + "step": 10251 + }, + { + "epoch": 1.75, + "learning_rate": 7.819951330104747e-06, + "loss": 0.4285, + "step": 10252 + }, + { + "epoch": 1.75, + "learning_rate": 7.818154268388581e-06, + "loss": 0.4249, + "step": 10253 + }, + { + "epoch": 1.75, + "learning_rate": 7.816357280652498e-06, + "loss": 0.4071, + "step": 10254 + }, + { + "epoch": 1.75, + "learning_rate": 7.814560366957436e-06, + "loss": 0.4252, + "step": 10255 + }, + { + "epoch": 1.75, + "learning_rate": 7.81276352736431e-06, + "loss": 0.4258, + "step": 10256 + }, + { + "epoch": 1.75, + "learning_rate": 7.810966761934053e-06, + "loss": 0.4275, + "step": 10257 + }, + { + "epoch": 1.75, + "learning_rate": 7.80917007072759e-06, + "loss": 0.4209, + "step": 10258 + }, + { + "epoch": 1.75, + "learning_rate": 7.807373453805835e-06, + "loss": 0.4261, + "step": 10259 + }, + { + "epoch": 1.75, + "learning_rate": 7.805576911229711e-06, + "loss": 0.4251, + "step": 10260 + }, + { + "epoch": 1.75, + "learning_rate": 7.803780443060134e-06, + "loss": 0.4258, + "step": 10261 + }, + { + "epoch": 1.75, + "learning_rate": 7.80198404935802e-06, + "loss": 0.3983, + "step": 10262 + }, + { + "epoch": 1.75, + "learning_rate": 7.800187730184267e-06, + "loss": 0.4229, + "step": 10263 + }, + { + "epoch": 1.75, + "learning_rate": 7.798391485599794e-06, + "loss": 0.4063, + "step": 10264 + }, + { + "epoch": 1.75, + "learning_rate": 7.796595315665505e-06, + "loss": 0.3778, + "step": 10265 + }, + { + "epoch": 1.75, + "learning_rate": 7.794799220442302e-06, + "loss": 0.4164, + "step": 10266 + }, + { + "epoch": 1.75, + "learning_rate": 7.793003199991082e-06, + "loss": 0.4162, + "step": 10267 + }, + { + "epoch": 1.75, + "learning_rate": 7.791207254372747e-06, + "loss": 0.4145, + "step": 10268 + }, + { + "epoch": 1.75, + "learning_rate": 7.789411383648192e-06, + "loss": 0.4217, + "step": 10269 + }, + { + "epoch": 1.75, + "learning_rate": 7.78761558787831e-06, + "loss": 0.4717, + "step": 10270 + }, + { + "epoch": 1.75, + "learning_rate": 7.785819867123989e-06, + "loss": 0.4183, + "step": 10271 + }, + { + "epoch": 1.75, + "learning_rate": 7.784024221446116e-06, + "loss": 0.4354, + "step": 10272 + }, + { + "epoch": 1.75, + "learning_rate": 7.782228650905578e-06, + "loss": 0.3784, + "step": 10273 + }, + { + "epoch": 1.75, + "learning_rate": 7.78043315556326e-06, + "loss": 0.3866, + "step": 10274 + }, + { + "epoch": 1.75, + "learning_rate": 7.77863773548004e-06, + "loss": 0.4366, + "step": 10275 + }, + { + "epoch": 1.75, + "learning_rate": 7.776842390716791e-06, + "loss": 0.4309, + "step": 10276 + }, + { + "epoch": 1.75, + "learning_rate": 7.7750471213344e-06, + "loss": 0.4117, + "step": 10277 + }, + { + "epoch": 1.75, + "learning_rate": 7.773251927393724e-06, + "loss": 0.4029, + "step": 10278 + }, + { + "epoch": 1.75, + "learning_rate": 7.771456808955643e-06, + "loss": 0.4053, + "step": 10279 + }, + { + "epoch": 1.75, + "learning_rate": 7.76966176608102e-06, + "loss": 0.445, + "step": 10280 + }, + { + "epoch": 1.75, + "learning_rate": 7.767866798830724e-06, + "loss": 0.3883, + "step": 10281 + }, + { + "epoch": 1.75, + "learning_rate": 7.766071907265613e-06, + "loss": 0.4358, + "step": 10282 + }, + { + "epoch": 1.75, + "learning_rate": 7.764277091446548e-06, + "loss": 0.4206, + "step": 10283 + }, + { + "epoch": 1.75, + "learning_rate": 7.762482351434386e-06, + "loss": 0.4228, + "step": 10284 + }, + { + "epoch": 1.75, + "learning_rate": 7.760687687289985e-06, + "loss": 0.4059, + "step": 10285 + }, + { + "epoch": 1.75, + "learning_rate": 7.75889309907419e-06, + "loss": 0.4438, + "step": 10286 + }, + { + "epoch": 1.75, + "learning_rate": 7.757098586847853e-06, + "loss": 0.3966, + "step": 10287 + }, + { + "epoch": 1.75, + "learning_rate": 7.755304150671823e-06, + "loss": 0.4753, + "step": 10288 + }, + { + "epoch": 1.75, + "learning_rate": 7.753509790606944e-06, + "loss": 0.4018, + "step": 10289 + }, + { + "epoch": 1.75, + "learning_rate": 7.751715506714054e-06, + "loss": 0.3794, + "step": 10290 + }, + { + "epoch": 1.76, + "learning_rate": 7.749921299053994e-06, + "loss": 0.4485, + "step": 10291 + }, + { + "epoch": 1.76, + "learning_rate": 7.748127167687604e-06, + "loss": 0.4202, + "step": 10292 + }, + { + "epoch": 1.76, + "learning_rate": 7.746333112675712e-06, + "loss": 0.3888, + "step": 10293 + }, + { + "epoch": 1.76, + "learning_rate": 7.74453913407915e-06, + "loss": 0.4082, + "step": 10294 + }, + { + "epoch": 1.76, + "learning_rate": 7.742745231958749e-06, + "loss": 0.4159, + "step": 10295 + }, + { + "epoch": 1.76, + "learning_rate": 7.740951406375334e-06, + "loss": 0.397, + "step": 10296 + }, + { + "epoch": 1.76, + "learning_rate": 7.73915765738973e-06, + "loss": 0.432, + "step": 10297 + }, + { + "epoch": 1.76, + "learning_rate": 7.737363985062757e-06, + "loss": 0.4083, + "step": 10298 + }, + { + "epoch": 1.76, + "learning_rate": 7.73557038945523e-06, + "loss": 0.4424, + "step": 10299 + }, + { + "epoch": 1.76, + "learning_rate": 7.733776870627971e-06, + "loss": 0.4314, + "step": 10300 + }, + { + "epoch": 1.76, + "learning_rate": 7.731983428641787e-06, + "loss": 0.4284, + "step": 10301 + }, + { + "epoch": 1.76, + "learning_rate": 7.730190063557492e-06, + "loss": 0.4313, + "step": 10302 + }, + { + "epoch": 1.76, + "learning_rate": 7.728396775435891e-06, + "loss": 0.4143, + "step": 10303 + }, + { + "epoch": 1.76, + "learning_rate": 7.726603564337791e-06, + "loss": 0.4491, + "step": 10304 + }, + { + "epoch": 1.76, + "learning_rate": 7.724810430323996e-06, + "loss": 0.414, + "step": 10305 + }, + { + "epoch": 1.76, + "learning_rate": 7.723017373455305e-06, + "loss": 0.4344, + "step": 10306 + }, + { + "epoch": 1.76, + "learning_rate": 7.721224393792518e-06, + "loss": 0.4076, + "step": 10307 + }, + { + "epoch": 1.76, + "learning_rate": 7.719431491396423e-06, + "loss": 0.4364, + "step": 10308 + }, + { + "epoch": 1.76, + "learning_rate": 7.717638666327813e-06, + "loss": 0.4098, + "step": 10309 + }, + { + "epoch": 1.76, + "learning_rate": 7.715845918647484e-06, + "loss": 0.3899, + "step": 10310 + }, + { + "epoch": 1.76, + "learning_rate": 7.714053248416216e-06, + "loss": 0.4105, + "step": 10311 + }, + { + "epoch": 1.76, + "learning_rate": 7.712260655694799e-06, + "loss": 0.393, + "step": 10312 + }, + { + "epoch": 1.76, + "learning_rate": 7.710468140544012e-06, + "loss": 0.4511, + "step": 10313 + }, + { + "epoch": 1.76, + "learning_rate": 7.708675703024633e-06, + "loss": 0.4138, + "step": 10314 + }, + { + "epoch": 1.76, + "learning_rate": 7.706883343197446e-06, + "loss": 0.4296, + "step": 10315 + }, + { + "epoch": 1.76, + "learning_rate": 7.705091061123211e-06, + "loss": 0.4194, + "step": 10316 + }, + { + "epoch": 1.76, + "learning_rate": 7.703298856862708e-06, + "loss": 0.4368, + "step": 10317 + }, + { + "epoch": 1.76, + "learning_rate": 7.701506730476703e-06, + "loss": 0.4027, + "step": 10318 + }, + { + "epoch": 1.76, + "learning_rate": 7.699714682025962e-06, + "loss": 0.4249, + "step": 10319 + }, + { + "epoch": 1.76, + "learning_rate": 7.69792271157125e-06, + "loss": 0.4268, + "step": 10320 + }, + { + "epoch": 1.76, + "learning_rate": 7.696130819173328e-06, + "loss": 0.477, + "step": 10321 + }, + { + "epoch": 1.76, + "learning_rate": 7.694339004892954e-06, + "loss": 0.4332, + "step": 10322 + }, + { + "epoch": 1.76, + "learning_rate": 7.69254726879088e-06, + "loss": 0.3975, + "step": 10323 + }, + { + "epoch": 1.76, + "learning_rate": 7.690755610927858e-06, + "loss": 0.4207, + "step": 10324 + }, + { + "epoch": 1.76, + "learning_rate": 7.688964031364641e-06, + "loss": 0.4104, + "step": 10325 + }, + { + "epoch": 1.76, + "learning_rate": 7.687172530161974e-06, + "loss": 0.3879, + "step": 10326 + }, + { + "epoch": 1.76, + "learning_rate": 7.685381107380604e-06, + "loss": 0.4291, + "step": 10327 + }, + { + "epoch": 1.76, + "learning_rate": 7.683589763081273e-06, + "loss": 0.4329, + "step": 10328 + }, + { + "epoch": 1.76, + "learning_rate": 7.681798497324717e-06, + "loss": 0.4291, + "step": 10329 + }, + { + "epoch": 1.76, + "learning_rate": 7.680007310171678e-06, + "loss": 0.4151, + "step": 10330 + }, + { + "epoch": 1.76, + "learning_rate": 7.678216201682886e-06, + "loss": 0.4271, + "step": 10331 + }, + { + "epoch": 1.76, + "learning_rate": 7.676425171919074e-06, + "loss": 0.4013, + "step": 10332 + }, + { + "epoch": 1.76, + "learning_rate": 7.674634220940966e-06, + "loss": 0.4307, + "step": 10333 + }, + { + "epoch": 1.76, + "learning_rate": 7.672843348809294e-06, + "loss": 0.4048, + "step": 10334 + }, + { + "epoch": 1.76, + "learning_rate": 7.671052555584779e-06, + "loss": 0.4193, + "step": 10335 + }, + { + "epoch": 1.76, + "learning_rate": 7.669261841328143e-06, + "loss": 0.4417, + "step": 10336 + }, + { + "epoch": 1.76, + "learning_rate": 7.667471206100102e-06, + "loss": 0.4236, + "step": 10337 + }, + { + "epoch": 1.76, + "learning_rate": 7.665680649961371e-06, + "loss": 0.4022, + "step": 10338 + }, + { + "epoch": 1.76, + "learning_rate": 7.663890172972665e-06, + "loss": 0.4368, + "step": 10339 + }, + { + "epoch": 1.76, + "learning_rate": 7.662099775194692e-06, + "loss": 0.4388, + "step": 10340 + }, + { + "epoch": 1.76, + "learning_rate": 7.660309456688157e-06, + "loss": 0.4181, + "step": 10341 + }, + { + "epoch": 1.76, + "learning_rate": 7.65851921751377e-06, + "loss": 0.4106, + "step": 10342 + }, + { + "epoch": 1.76, + "learning_rate": 7.65672905773223e-06, + "loss": 0.4234, + "step": 10343 + }, + { + "epoch": 1.76, + "learning_rate": 7.654938977404234e-06, + "loss": 0.4092, + "step": 10344 + }, + { + "epoch": 1.76, + "learning_rate": 7.653148976590485e-06, + "loss": 0.4021, + "step": 10345 + }, + { + "epoch": 1.76, + "learning_rate": 7.65135905535167e-06, + "loss": 0.4125, + "step": 10346 + }, + { + "epoch": 1.76, + "learning_rate": 7.649569213748482e-06, + "loss": 0.4092, + "step": 10347 + }, + { + "epoch": 1.76, + "learning_rate": 7.647779451841609e-06, + "loss": 0.3696, + "step": 10348 + }, + { + "epoch": 1.76, + "learning_rate": 7.645989769691737e-06, + "loss": 0.4198, + "step": 10349 + }, + { + "epoch": 1.77, + "learning_rate": 7.64420016735955e-06, + "loss": 0.4078, + "step": 10350 + }, + { + "epoch": 1.77, + "learning_rate": 7.642410644905726e-06, + "loss": 0.4492, + "step": 10351 + }, + { + "epoch": 1.77, + "learning_rate": 7.64062120239095e-06, + "loss": 0.4234, + "step": 10352 + }, + { + "epoch": 1.77, + "learning_rate": 7.638831839875887e-06, + "loss": 0.381, + "step": 10353 + }, + { + "epoch": 1.77, + "learning_rate": 7.637042557421215e-06, + "loss": 0.4072, + "step": 10354 + }, + { + "epoch": 1.77, + "learning_rate": 7.635253355087598e-06, + "loss": 0.4222, + "step": 10355 + }, + { + "epoch": 1.77, + "learning_rate": 7.633464232935708e-06, + "loss": 0.4, + "step": 10356 + }, + { + "epoch": 1.77, + "learning_rate": 7.631675191026206e-06, + "loss": 0.4189, + "step": 10357 + }, + { + "epoch": 1.77, + "learning_rate": 7.629886229419754e-06, + "loss": 0.3852, + "step": 10358 + }, + { + "epoch": 1.77, + "learning_rate": 7.628097348177013e-06, + "loss": 0.4206, + "step": 10359 + }, + { + "epoch": 1.77, + "learning_rate": 7.626308547358639e-06, + "loss": 0.4193, + "step": 10360 + }, + { + "epoch": 1.77, + "learning_rate": 7.6245198270252785e-06, + "loss": 0.3805, + "step": 10361 + }, + { + "epoch": 1.77, + "learning_rate": 7.62273118723759e-06, + "loss": 0.4196, + "step": 10362 + }, + { + "epoch": 1.77, + "learning_rate": 7.6209426280562136e-06, + "loss": 0.388, + "step": 10363 + }, + { + "epoch": 1.77, + "learning_rate": 7.619154149541797e-06, + "loss": 0.3816, + "step": 10364 + }, + { + "epoch": 1.77, + "learning_rate": 7.617365751754985e-06, + "loss": 0.4377, + "step": 10365 + }, + { + "epoch": 1.77, + "learning_rate": 7.615577434756414e-06, + "loss": 0.4169, + "step": 10366 + }, + { + "epoch": 1.77, + "learning_rate": 7.613789198606726e-06, + "loss": 0.4227, + "step": 10367 + }, + { + "epoch": 1.77, + "learning_rate": 7.612001043366547e-06, + "loss": 0.4195, + "step": 10368 + }, + { + "epoch": 1.77, + "learning_rate": 7.610212969096511e-06, + "loss": 0.3998, + "step": 10369 + }, + { + "epoch": 1.77, + "learning_rate": 7.60842497585725e-06, + "loss": 0.4405, + "step": 10370 + }, + { + "epoch": 1.77, + "learning_rate": 7.6066370637093855e-06, + "loss": 0.4488, + "step": 10371 + }, + { + "epoch": 1.77, + "learning_rate": 7.60484923271354e-06, + "loss": 0.4069, + "step": 10372 + }, + { + "epoch": 1.77, + "learning_rate": 7.603061482930338e-06, + "loss": 0.4356, + "step": 10373 + }, + { + "epoch": 1.77, + "learning_rate": 7.601273814420393e-06, + "loss": 0.4133, + "step": 10374 + }, + { + "epoch": 1.77, + "learning_rate": 7.599486227244326e-06, + "loss": 0.391, + "step": 10375 + }, + { + "epoch": 1.77, + "learning_rate": 7.597698721462739e-06, + "loss": 0.4112, + "step": 10376 + }, + { + "epoch": 1.77, + "learning_rate": 7.595911297136247e-06, + "loss": 0.4206, + "step": 10377 + }, + { + "epoch": 1.77, + "learning_rate": 7.594123954325459e-06, + "loss": 0.3853, + "step": 10378 + }, + { + "epoch": 1.77, + "learning_rate": 7.5923366930909715e-06, + "loss": 0.4313, + "step": 10379 + }, + { + "epoch": 1.77, + "learning_rate": 7.590549513493389e-06, + "loss": 0.3873, + "step": 10380 + }, + { + "epoch": 1.77, + "learning_rate": 7.588762415593311e-06, + "loss": 0.397, + "step": 10381 + }, + { + "epoch": 1.77, + "learning_rate": 7.586975399451334e-06, + "loss": 0.4693, + "step": 10382 + }, + { + "epoch": 1.77, + "learning_rate": 7.585188465128046e-06, + "loss": 0.4274, + "step": 10383 + }, + { + "epoch": 1.77, + "learning_rate": 7.583401612684037e-06, + "loss": 0.435, + "step": 10384 + }, + { + "epoch": 1.77, + "learning_rate": 7.5816148421798994e-06, + "loss": 0.4157, + "step": 10385 + }, + { + "epoch": 1.77, + "learning_rate": 7.579828153676212e-06, + "loss": 0.4004, + "step": 10386 + }, + { + "epoch": 1.77, + "learning_rate": 7.578041547233557e-06, + "loss": 0.444, + "step": 10387 + }, + { + "epoch": 1.77, + "learning_rate": 7.576255022912514e-06, + "loss": 0.4137, + "step": 10388 + }, + { + "epoch": 1.77, + "learning_rate": 7.5744685807736604e-06, + "loss": 0.4096, + "step": 10389 + }, + { + "epoch": 1.77, + "learning_rate": 7.572682220877572e-06, + "loss": 0.3922, + "step": 10390 + }, + { + "epoch": 1.77, + "learning_rate": 7.570895943284811e-06, + "loss": 0.4187, + "step": 10391 + }, + { + "epoch": 1.77, + "learning_rate": 7.5691097480559485e-06, + "loss": 0.4284, + "step": 10392 + }, + { + "epoch": 1.77, + "learning_rate": 7.5673236352515525e-06, + "loss": 0.4004, + "step": 10393 + }, + { + "epoch": 1.77, + "learning_rate": 7.5655376049321795e-06, + "loss": 0.4313, + "step": 10394 + }, + { + "epoch": 1.77, + "learning_rate": 7.5637516571583915e-06, + "loss": 0.4074, + "step": 10395 + }, + { + "epoch": 1.77, + "learning_rate": 7.561965791990744e-06, + "loss": 0.4313, + "step": 10396 + }, + { + "epoch": 1.77, + "learning_rate": 7.560180009489796e-06, + "loss": 0.4344, + "step": 10397 + }, + { + "epoch": 1.77, + "learning_rate": 7.558394309716088e-06, + "loss": 0.4315, + "step": 10398 + }, + { + "epoch": 1.77, + "learning_rate": 7.556608692730175e-06, + "loss": 0.4317, + "step": 10399 + }, + { + "epoch": 1.77, + "learning_rate": 7.5548231585926e-06, + "loss": 0.4232, + "step": 10400 + }, + { + "epoch": 1.77, + "learning_rate": 7.553037707363907e-06, + "loss": 0.3902, + "step": 10401 + }, + { + "epoch": 1.77, + "learning_rate": 7.551252339104633e-06, + "loss": 0.425, + "step": 10402 + }, + { + "epoch": 1.77, + "learning_rate": 7.5494670538753155e-06, + "loss": 0.454, + "step": 10403 + }, + { + "epoch": 1.77, + "learning_rate": 7.547681851736489e-06, + "loss": 0.4073, + "step": 10404 + }, + { + "epoch": 1.77, + "learning_rate": 7.545896732748689e-06, + "loss": 0.4278, + "step": 10405 + }, + { + "epoch": 1.77, + "learning_rate": 7.544111696972434e-06, + "loss": 0.4414, + "step": 10406 + }, + { + "epoch": 1.77, + "learning_rate": 7.542326744468254e-06, + "loss": 0.4404, + "step": 10407 + }, + { + "epoch": 1.78, + "learning_rate": 7.540541875296672e-06, + "loss": 0.4189, + "step": 10408 + }, + { + "epoch": 1.78, + "learning_rate": 7.538757089518211e-06, + "loss": 0.4475, + "step": 10409 + }, + { + "epoch": 1.78, + "learning_rate": 7.536972387193382e-06, + "loss": 0.4024, + "step": 10410 + }, + { + "epoch": 1.78, + "learning_rate": 7.535187768382701e-06, + "loss": 0.4318, + "step": 10411 + }, + { + "epoch": 1.78, + "learning_rate": 7.533403233146686e-06, + "loss": 0.4246, + "step": 10412 + }, + { + "epoch": 1.78, + "learning_rate": 7.531618781545833e-06, + "loss": 0.4663, + "step": 10413 + }, + { + "epoch": 1.78, + "learning_rate": 7.529834413640656e-06, + "loss": 0.4373, + "step": 10414 + }, + { + "epoch": 1.78, + "learning_rate": 7.528050129491655e-06, + "loss": 0.4552, + "step": 10415 + }, + { + "epoch": 1.78, + "learning_rate": 7.526265929159331e-06, + "loss": 0.422, + "step": 10416 + }, + { + "epoch": 1.78, + "learning_rate": 7.524481812704183e-06, + "loss": 0.4291, + "step": 10417 + }, + { + "epoch": 1.78, + "learning_rate": 7.5226977801867004e-06, + "loss": 0.3946, + "step": 10418 + }, + { + "epoch": 1.78, + "learning_rate": 7.520913831667378e-06, + "loss": 0.4172, + "step": 10419 + }, + { + "epoch": 1.78, + "learning_rate": 7.519129967206708e-06, + "loss": 0.4359, + "step": 10420 + }, + { + "epoch": 1.78, + "learning_rate": 7.517346186865167e-06, + "loss": 0.4304, + "step": 10421 + }, + { + "epoch": 1.78, + "learning_rate": 7.515562490703242e-06, + "loss": 0.4259, + "step": 10422 + }, + { + "epoch": 1.78, + "learning_rate": 7.513778878781414e-06, + "loss": 0.4193, + "step": 10423 + }, + { + "epoch": 1.78, + "learning_rate": 7.5119953511601595e-06, + "loss": 0.4382, + "step": 10424 + }, + { + "epoch": 1.78, + "learning_rate": 7.5102119078999545e-06, + "loss": 0.4187, + "step": 10425 + }, + { + "epoch": 1.78, + "learning_rate": 7.5084285490612685e-06, + "loss": 0.4058, + "step": 10426 + }, + { + "epoch": 1.78, + "learning_rate": 7.506645274704573e-06, + "loss": 0.4461, + "step": 10427 + }, + { + "epoch": 1.78, + "learning_rate": 7.504862084890327e-06, + "loss": 0.4308, + "step": 10428 + }, + { + "epoch": 1.78, + "learning_rate": 7.503078979678997e-06, + "loss": 0.4177, + "step": 10429 + }, + { + "epoch": 1.78, + "learning_rate": 7.501295959131044e-06, + "loss": 0.4251, + "step": 10430 + }, + { + "epoch": 1.78, + "learning_rate": 7.499513023306924e-06, + "loss": 0.3841, + "step": 10431 + }, + { + "epoch": 1.78, + "learning_rate": 7.497730172267095e-06, + "loss": 0.4077, + "step": 10432 + }, + { + "epoch": 1.78, + "learning_rate": 7.495947406072002e-06, + "loss": 0.3906, + "step": 10433 + }, + { + "epoch": 1.78, + "learning_rate": 7.494164724782095e-06, + "loss": 0.4173, + "step": 10434 + }, + { + "epoch": 1.78, + "learning_rate": 7.4923821284578256e-06, + "loss": 0.4294, + "step": 10435 + }, + { + "epoch": 1.78, + "learning_rate": 7.490599617159628e-06, + "loss": 0.4197, + "step": 10436 + }, + { + "epoch": 1.78, + "learning_rate": 7.488817190947946e-06, + "loss": 0.4835, + "step": 10437 + }, + { + "epoch": 1.78, + "learning_rate": 7.487034849883217e-06, + "loss": 0.4177, + "step": 10438 + }, + { + "epoch": 1.78, + "learning_rate": 7.485252594025873e-06, + "loss": 0.4067, + "step": 10439 + }, + { + "epoch": 1.78, + "learning_rate": 7.483470423436349e-06, + "loss": 0.4082, + "step": 10440 + }, + { + "epoch": 1.78, + "learning_rate": 7.4816883381750696e-06, + "loss": 0.4236, + "step": 10441 + }, + { + "epoch": 1.78, + "learning_rate": 7.47990633830246e-06, + "loss": 0.4193, + "step": 10442 + }, + { + "epoch": 1.78, + "learning_rate": 7.47812442387895e-06, + "loss": 0.4037, + "step": 10443 + }, + { + "epoch": 1.78, + "learning_rate": 7.476342594964947e-06, + "loss": 0.4253, + "step": 10444 + }, + { + "epoch": 1.78, + "learning_rate": 7.474560851620873e-06, + "loss": 0.3927, + "step": 10445 + }, + { + "epoch": 1.78, + "learning_rate": 7.472779193907144e-06, + "loss": 0.4172, + "step": 10446 + }, + { + "epoch": 1.78, + "learning_rate": 7.470997621884171e-06, + "loss": 0.4218, + "step": 10447 + }, + { + "epoch": 1.78, + "learning_rate": 7.46921613561236e-06, + "loss": 0.4225, + "step": 10448 + }, + { + "epoch": 1.78, + "learning_rate": 7.467434735152117e-06, + "loss": 0.4178, + "step": 10449 + }, + { + "epoch": 1.78, + "learning_rate": 7.465653420563846e-06, + "loss": 0.4066, + "step": 10450 + }, + { + "epoch": 1.78, + "learning_rate": 7.463872191907939e-06, + "loss": 0.435, + "step": 10451 + }, + { + "epoch": 1.78, + "learning_rate": 7.4620910492447994e-06, + "loss": 0.4437, + "step": 10452 + }, + { + "epoch": 1.78, + "learning_rate": 7.460309992634817e-06, + "loss": 0.4028, + "step": 10453 + }, + { + "epoch": 1.78, + "learning_rate": 7.4585290221383834e-06, + "loss": 0.3959, + "step": 10454 + }, + { + "epoch": 1.78, + "learning_rate": 7.456748137815886e-06, + "loss": 0.4035, + "step": 10455 + }, + { + "epoch": 1.78, + "learning_rate": 7.454967339727713e-06, + "loss": 0.4353, + "step": 10456 + }, + { + "epoch": 1.78, + "learning_rate": 7.453186627934242e-06, + "loss": 0.4256, + "step": 10457 + }, + { + "epoch": 1.78, + "learning_rate": 7.451406002495855e-06, + "loss": 0.3897, + "step": 10458 + }, + { + "epoch": 1.78, + "learning_rate": 7.449625463472922e-06, + "loss": 0.3879, + "step": 10459 + }, + { + "epoch": 1.78, + "learning_rate": 7.447845010925819e-06, + "loss": 0.4046, + "step": 10460 + }, + { + "epoch": 1.78, + "learning_rate": 7.4460646449149165e-06, + "loss": 0.3953, + "step": 10461 + }, + { + "epoch": 1.78, + "learning_rate": 7.444284365500583e-06, + "loss": 0.4226, + "step": 10462 + }, + { + "epoch": 1.78, + "learning_rate": 7.4425041727431806e-06, + "loss": 0.392, + "step": 10463 + }, + { + "epoch": 1.78, + "learning_rate": 7.440724066703073e-06, + "loss": 0.4258, + "step": 10464 + }, + { + "epoch": 1.78, + "learning_rate": 7.4389440474406175e-06, + "loss": 0.4218, + "step": 10465 + }, + { + "epoch": 1.78, + "learning_rate": 7.4371641150161666e-06, + "loss": 0.404, + "step": 10466 + }, + { + "epoch": 1.79, + "learning_rate": 7.435384269490073e-06, + "loss": 0.3955, + "step": 10467 + }, + { + "epoch": 1.79, + "learning_rate": 7.43360451092269e-06, + "loss": 0.4229, + "step": 10468 + }, + { + "epoch": 1.79, + "learning_rate": 7.431824839374359e-06, + "loss": 0.3966, + "step": 10469 + }, + { + "epoch": 1.79, + "learning_rate": 7.430045254905427e-06, + "loss": 0.4434, + "step": 10470 + }, + { + "epoch": 1.79, + "learning_rate": 7.428265757576234e-06, + "loss": 0.4278, + "step": 10471 + }, + { + "epoch": 1.79, + "learning_rate": 7.4264863474471196e-06, + "loss": 0.4092, + "step": 10472 + }, + { + "epoch": 1.79, + "learning_rate": 7.424707024578417e-06, + "loss": 0.4186, + "step": 10473 + }, + { + "epoch": 1.79, + "learning_rate": 7.422927789030456e-06, + "loss": 0.4164, + "step": 10474 + }, + { + "epoch": 1.79, + "learning_rate": 7.421148640863565e-06, + "loss": 0.4348, + "step": 10475 + }, + { + "epoch": 1.79, + "learning_rate": 7.419369580138071e-06, + "loss": 0.4245, + "step": 10476 + }, + { + "epoch": 1.79, + "learning_rate": 7.4175906069142975e-06, + "loss": 0.4286, + "step": 10477 + }, + { + "epoch": 1.79, + "learning_rate": 7.415811721252564e-06, + "loss": 0.4108, + "step": 10478 + }, + { + "epoch": 1.79, + "learning_rate": 7.41403292321319e-06, + "loss": 0.4078, + "step": 10479 + }, + { + "epoch": 1.79, + "learning_rate": 7.4122542128564855e-06, + "loss": 0.4093, + "step": 10480 + }, + { + "epoch": 1.79, + "learning_rate": 7.4104755902427615e-06, + "loss": 0.4255, + "step": 10481 + }, + { + "epoch": 1.79, + "learning_rate": 7.408697055432329e-06, + "loss": 0.4237, + "step": 10482 + }, + { + "epoch": 1.79, + "learning_rate": 7.4069186084854896e-06, + "loss": 0.4305, + "step": 10483 + }, + { + "epoch": 1.79, + "learning_rate": 7.4051402494625455e-06, + "loss": 0.4048, + "step": 10484 + }, + { + "epoch": 1.79, + "learning_rate": 7.403361978423798e-06, + "loss": 0.4221, + "step": 10485 + }, + { + "epoch": 1.79, + "learning_rate": 7.401583795429541e-06, + "loss": 0.4221, + "step": 10486 + }, + { + "epoch": 1.79, + "learning_rate": 7.399805700540071e-06, + "loss": 0.3941, + "step": 10487 + }, + { + "epoch": 1.79, + "learning_rate": 7.398027693815677e-06, + "loss": 0.4282, + "step": 10488 + }, + { + "epoch": 1.79, + "learning_rate": 7.3962497753166415e-06, + "loss": 0.4341, + "step": 10489 + }, + { + "epoch": 1.79, + "learning_rate": 7.3944719451032545e-06, + "loss": 0.4283, + "step": 10490 + }, + { + "epoch": 1.79, + "learning_rate": 7.392694203235793e-06, + "loss": 0.4042, + "step": 10491 + }, + { + "epoch": 1.79, + "learning_rate": 7.390916549774536e-06, + "loss": 0.4105, + "step": 10492 + }, + { + "epoch": 1.79, + "learning_rate": 7.389138984779758e-06, + "loss": 0.378, + "step": 10493 + }, + { + "epoch": 1.79, + "learning_rate": 7.387361508311735e-06, + "loss": 0.3731, + "step": 10494 + }, + { + "epoch": 1.79, + "learning_rate": 7.385584120430736e-06, + "loss": 0.4191, + "step": 10495 + }, + { + "epoch": 1.79, + "learning_rate": 7.3838068211970194e-06, + "loss": 0.4255, + "step": 10496 + }, + { + "epoch": 1.79, + "learning_rate": 7.3820296106708554e-06, + "loss": 0.4079, + "step": 10497 + }, + { + "epoch": 1.79, + "learning_rate": 7.380252488912501e-06, + "loss": 0.4161, + "step": 10498 + }, + { + "epoch": 1.79, + "learning_rate": 7.3784754559822145e-06, + "loss": 0.4635, + "step": 10499 + }, + { + "epoch": 1.79, + "learning_rate": 7.3766985119402475e-06, + "loss": 0.4044, + "step": 10500 + }, + { + "epoch": 1.79, + "learning_rate": 7.374921656846856e-06, + "loss": 0.423, + "step": 10501 + }, + { + "epoch": 1.79, + "learning_rate": 7.373144890762284e-06, + "loss": 0.4171, + "step": 10502 + }, + { + "epoch": 1.79, + "learning_rate": 7.3713682137467816e-06, + "loss": 0.4084, + "step": 10503 + }, + { + "epoch": 1.79, + "learning_rate": 7.369591625860584e-06, + "loss": 0.4273, + "step": 10504 + }, + { + "epoch": 1.79, + "learning_rate": 7.367815127163933e-06, + "loss": 0.4325, + "step": 10505 + }, + { + "epoch": 1.79, + "learning_rate": 7.366038717717066e-06, + "loss": 0.3822, + "step": 10506 + }, + { + "epoch": 1.79, + "learning_rate": 7.364262397580211e-06, + "loss": 0.4207, + "step": 10507 + }, + { + "epoch": 1.79, + "learning_rate": 7.3624861668136025e-06, + "loss": 0.4138, + "step": 10508 + }, + { + "epoch": 1.79, + "learning_rate": 7.3607100254774665e-06, + "loss": 0.426, + "step": 10509 + }, + { + "epoch": 1.79, + "learning_rate": 7.358933973632032e-06, + "loss": 0.4574, + "step": 10510 + }, + { + "epoch": 1.79, + "learning_rate": 7.357158011337509e-06, + "loss": 0.4305, + "step": 10511 + }, + { + "epoch": 1.79, + "learning_rate": 7.355382138654121e-06, + "loss": 0.4242, + "step": 10512 + }, + { + "epoch": 1.79, + "learning_rate": 7.3536063556420845e-06, + "loss": 0.4138, + "step": 10513 + }, + { + "epoch": 1.79, + "learning_rate": 7.351830662361606e-06, + "loss": 0.3999, + "step": 10514 + }, + { + "epoch": 1.79, + "learning_rate": 7.350055058872898e-06, + "loss": 0.4285, + "step": 10515 + }, + { + "epoch": 1.79, + "learning_rate": 7.348279545236166e-06, + "loss": 0.4212, + "step": 10516 + }, + { + "epoch": 1.79, + "learning_rate": 7.34650412151161e-06, + "loss": 0.4216, + "step": 10517 + }, + { + "epoch": 1.79, + "learning_rate": 7.344728787759437e-06, + "loss": 0.4338, + "step": 10518 + }, + { + "epoch": 1.79, + "learning_rate": 7.3429535440398324e-06, + "loss": 0.4247, + "step": 10519 + }, + { + "epoch": 1.79, + "learning_rate": 7.341178390412995e-06, + "loss": 0.4449, + "step": 10520 + }, + { + "epoch": 1.79, + "learning_rate": 7.339403326939118e-06, + "loss": 0.4323, + "step": 10521 + }, + { + "epoch": 1.79, + "learning_rate": 7.337628353678383e-06, + "loss": 0.4018, + "step": 10522 + }, + { + "epoch": 1.79, + "learning_rate": 7.335853470690977e-06, + "loss": 0.4193, + "step": 10523 + }, + { + "epoch": 1.79, + "learning_rate": 7.334078678037082e-06, + "loss": 0.4089, + "step": 10524 + }, + { + "epoch": 1.8, + "learning_rate": 7.332303975776878e-06, + "loss": 0.3905, + "step": 10525 + }, + { + "epoch": 1.8, + "learning_rate": 7.330529363970533e-06, + "loss": 0.4161, + "step": 10526 + }, + { + "epoch": 1.8, + "learning_rate": 7.328754842678223e-06, + "loss": 0.4086, + "step": 10527 + }, + { + "epoch": 1.8, + "learning_rate": 7.326980411960118e-06, + "loss": 0.3958, + "step": 10528 + }, + { + "epoch": 1.8, + "learning_rate": 7.3252060718763855e-06, + "loss": 0.4307, + "step": 10529 + }, + { + "epoch": 1.8, + "learning_rate": 7.323431822487182e-06, + "loss": 0.4111, + "step": 10530 + }, + { + "epoch": 1.8, + "learning_rate": 7.321657663852671e-06, + "loss": 0.4122, + "step": 10531 + }, + { + "epoch": 1.8, + "learning_rate": 7.319883596033008e-06, + "loss": 0.3975, + "step": 10532 + }, + { + "epoch": 1.8, + "learning_rate": 7.318109619088354e-06, + "loss": 0.449, + "step": 10533 + }, + { + "epoch": 1.8, + "learning_rate": 7.316335733078846e-06, + "loss": 0.4204, + "step": 10534 + }, + { + "epoch": 1.8, + "learning_rate": 7.31456193806464e-06, + "loss": 0.4133, + "step": 10535 + }, + { + "epoch": 1.8, + "learning_rate": 7.3127882341058766e-06, + "loss": 0.46, + "step": 10536 + }, + { + "epoch": 1.8, + "learning_rate": 7.311014621262702e-06, + "loss": 0.4181, + "step": 10537 + }, + { + "epoch": 1.8, + "learning_rate": 7.30924109959525e-06, + "loss": 0.4427, + "step": 10538 + }, + { + "epoch": 1.8, + "learning_rate": 7.307467669163655e-06, + "loss": 0.4071, + "step": 10539 + }, + { + "epoch": 1.8, + "learning_rate": 7.305694330028054e-06, + "loss": 0.3965, + "step": 10540 + }, + { + "epoch": 1.8, + "learning_rate": 7.303921082248571e-06, + "loss": 0.3988, + "step": 10541 + }, + { + "epoch": 1.8, + "learning_rate": 7.3021479258853316e-06, + "loss": 0.4061, + "step": 10542 + }, + { + "epoch": 1.8, + "learning_rate": 7.300374860998459e-06, + "loss": 0.4229, + "step": 10543 + }, + { + "epoch": 1.8, + "learning_rate": 7.298601887648077e-06, + "loss": 0.4175, + "step": 10544 + }, + { + "epoch": 1.8, + "learning_rate": 7.296829005894296e-06, + "loss": 0.415, + "step": 10545 + }, + { + "epoch": 1.8, + "learning_rate": 7.2950562157972316e-06, + "loss": 0.4336, + "step": 10546 + }, + { + "epoch": 1.8, + "learning_rate": 7.293283517416995e-06, + "loss": 0.4334, + "step": 10547 + }, + { + "epoch": 1.8, + "learning_rate": 7.291510910813696e-06, + "loss": 0.4421, + "step": 10548 + }, + { + "epoch": 1.8, + "learning_rate": 7.289738396047431e-06, + "loss": 0.4083, + "step": 10549 + }, + { + "epoch": 1.8, + "learning_rate": 7.287965973178306e-06, + "loss": 0.4027, + "step": 10550 + }, + { + "epoch": 1.8, + "learning_rate": 7.286193642266417e-06, + "loss": 0.4373, + "step": 10551 + }, + { + "epoch": 1.8, + "learning_rate": 7.284421403371861e-06, + "loss": 0.3997, + "step": 10552 + }, + { + "epoch": 1.8, + "learning_rate": 7.282649256554726e-06, + "loss": 0.4186, + "step": 10553 + }, + { + "epoch": 1.8, + "learning_rate": 7.280877201875102e-06, + "loss": 0.4184, + "step": 10554 + }, + { + "epoch": 1.8, + "learning_rate": 7.279105239393079e-06, + "loss": 0.3951, + "step": 10555 + }, + { + "epoch": 1.8, + "learning_rate": 7.277333369168731e-06, + "loss": 0.4538, + "step": 10556 + }, + { + "epoch": 1.8, + "learning_rate": 7.275561591262139e-06, + "loss": 0.4119, + "step": 10557 + }, + { + "epoch": 1.8, + "learning_rate": 7.273789905733381e-06, + "loss": 0.4297, + "step": 10558 + }, + { + "epoch": 1.8, + "learning_rate": 7.272018312642531e-06, + "loss": 0.41, + "step": 10559 + }, + { + "epoch": 1.8, + "learning_rate": 7.2702468120496576e-06, + "loss": 0.4213, + "step": 10560 + }, + { + "epoch": 1.8, + "learning_rate": 7.268475404014823e-06, + "loss": 0.4173, + "step": 10561 + }, + { + "epoch": 1.8, + "learning_rate": 7.266704088598096e-06, + "loss": 0.3909, + "step": 10562 + }, + { + "epoch": 1.8, + "learning_rate": 7.2649328658595375e-06, + "loss": 0.4124, + "step": 10563 + }, + { + "epoch": 1.8, + "learning_rate": 7.263161735859199e-06, + "loss": 0.3957, + "step": 10564 + }, + { + "epoch": 1.8, + "learning_rate": 7.261390698657136e-06, + "loss": 0.4036, + "step": 10565 + }, + { + "epoch": 1.8, + "learning_rate": 7.259619754313401e-06, + "loss": 0.4354, + "step": 10566 + }, + { + "epoch": 1.8, + "learning_rate": 7.25784890288804e-06, + "loss": 0.3935, + "step": 10567 + }, + { + "epoch": 1.8, + "learning_rate": 7.256078144441104e-06, + "loss": 0.391, + "step": 10568 + }, + { + "epoch": 1.8, + "learning_rate": 7.2543074790326225e-06, + "loss": 0.39, + "step": 10569 + }, + { + "epoch": 1.8, + "learning_rate": 7.252536906722647e-06, + "loss": 0.4523, + "step": 10570 + }, + { + "epoch": 1.8, + "learning_rate": 7.2507664275712e-06, + "loss": 0.432, + "step": 10571 + }, + { + "epoch": 1.8, + "learning_rate": 7.248996041638319e-06, + "loss": 0.4133, + "step": 10572 + }, + { + "epoch": 1.8, + "learning_rate": 7.2472257489840325e-06, + "loss": 0.4032, + "step": 10573 + }, + { + "epoch": 1.8, + "learning_rate": 7.2454555496683665e-06, + "loss": 0.4019, + "step": 10574 + }, + { + "epoch": 1.8, + "learning_rate": 7.243685443751342e-06, + "loss": 0.4156, + "step": 10575 + }, + { + "epoch": 1.8, + "learning_rate": 7.241915431292982e-06, + "loss": 0.4372, + "step": 10576 + }, + { + "epoch": 1.8, + "learning_rate": 7.240145512353297e-06, + "loss": 0.4027, + "step": 10577 + }, + { + "epoch": 1.8, + "learning_rate": 7.238375686992307e-06, + "loss": 0.4185, + "step": 10578 + }, + { + "epoch": 1.8, + "learning_rate": 7.236605955270012e-06, + "loss": 0.4424, + "step": 10579 + }, + { + "epoch": 1.8, + "learning_rate": 7.234836317246423e-06, + "loss": 0.4409, + "step": 10580 + }, + { + "epoch": 1.8, + "learning_rate": 7.233066772981545e-06, + "loss": 0.4215, + "step": 10581 + }, + { + "epoch": 1.8, + "learning_rate": 7.231297322535378e-06, + "loss": 0.4377, + "step": 10582 + }, + { + "epoch": 1.8, + "learning_rate": 7.229527965967918e-06, + "loss": 0.4009, + "step": 10583 + }, + { + "epoch": 1.81, + "learning_rate": 7.227758703339157e-06, + "loss": 0.4072, + "step": 10584 + }, + { + "epoch": 1.81, + "learning_rate": 7.2259895347090905e-06, + "loss": 0.4054, + "step": 10585 + }, + { + "epoch": 1.81, + "learning_rate": 7.224220460137701e-06, + "loss": 0.4189, + "step": 10586 + }, + { + "epoch": 1.81, + "learning_rate": 7.222451479684973e-06, + "loss": 0.4278, + "step": 10587 + }, + { + "epoch": 1.81, + "learning_rate": 7.220682593410888e-06, + "loss": 0.3978, + "step": 10588 + }, + { + "epoch": 1.81, + "learning_rate": 7.218913801375425e-06, + "loss": 0.4414, + "step": 10589 + }, + { + "epoch": 1.81, + "learning_rate": 7.21714510363856e-06, + "loss": 0.4256, + "step": 10590 + }, + { + "epoch": 1.81, + "learning_rate": 7.215376500260263e-06, + "loss": 0.4444, + "step": 10591 + }, + { + "epoch": 1.81, + "learning_rate": 7.213607991300501e-06, + "loss": 0.4276, + "step": 10592 + }, + { + "epoch": 1.81, + "learning_rate": 7.2118395768192415e-06, + "loss": 0.4083, + "step": 10593 + }, + { + "epoch": 1.81, + "learning_rate": 7.210071256876445e-06, + "loss": 0.417, + "step": 10594 + }, + { + "epoch": 1.81, + "learning_rate": 7.208303031532067e-06, + "loss": 0.3979, + "step": 10595 + }, + { + "epoch": 1.81, + "learning_rate": 7.206534900846067e-06, + "loss": 0.4156, + "step": 10596 + }, + { + "epoch": 1.81, + "learning_rate": 7.204766864878396e-06, + "loss": 0.4022, + "step": 10597 + }, + { + "epoch": 1.81, + "learning_rate": 7.202998923689002e-06, + "loss": 0.4079, + "step": 10598 + }, + { + "epoch": 1.81, + "learning_rate": 7.2012310773378354e-06, + "loss": 0.4323, + "step": 10599 + }, + { + "epoch": 1.81, + "learning_rate": 7.1994633258848344e-06, + "loss": 0.3985, + "step": 10600 + }, + { + "epoch": 1.81, + "learning_rate": 7.197695669389939e-06, + "loss": 0.4293, + "step": 10601 + }, + { + "epoch": 1.81, + "learning_rate": 7.195928107913084e-06, + "loss": 0.4361, + "step": 10602 + }, + { + "epoch": 1.81, + "learning_rate": 7.194160641514203e-06, + "loss": 0.4173, + "step": 10603 + }, + { + "epoch": 1.81, + "learning_rate": 7.192393270253226e-06, + "loss": 0.454, + "step": 10604 + }, + { + "epoch": 1.81, + "learning_rate": 7.190625994190081e-06, + "loss": 0.4028, + "step": 10605 + }, + { + "epoch": 1.81, + "learning_rate": 7.18885881338469e-06, + "loss": 0.4112, + "step": 10606 + }, + { + "epoch": 1.81, + "learning_rate": 7.187091727896975e-06, + "loss": 0.4167, + "step": 10607 + }, + { + "epoch": 1.81, + "learning_rate": 7.1853247377868495e-06, + "loss": 0.3791, + "step": 10608 + }, + { + "epoch": 1.81, + "learning_rate": 7.183557843114231e-06, + "loss": 0.4121, + "step": 10609 + }, + { + "epoch": 1.81, + "learning_rate": 7.1817910439390235e-06, + "loss": 0.4321, + "step": 10610 + }, + { + "epoch": 1.81, + "learning_rate": 7.180024340321138e-06, + "loss": 0.3971, + "step": 10611 + }, + { + "epoch": 1.81, + "learning_rate": 7.178257732320477e-06, + "loss": 0.4332, + "step": 10612 + }, + { + "epoch": 1.81, + "learning_rate": 7.176491219996944e-06, + "loss": 0.4005, + "step": 10613 + }, + { + "epoch": 1.81, + "learning_rate": 7.174724803410434e-06, + "loss": 0.4164, + "step": 10614 + }, + { + "epoch": 1.81, + "learning_rate": 7.172958482620844e-06, + "loss": 0.4105, + "step": 10615 + }, + { + "epoch": 1.81, + "learning_rate": 7.17119225768806e-06, + "loss": 0.4065, + "step": 10616 + }, + { + "epoch": 1.81, + "learning_rate": 7.169426128671974e-06, + "loss": 0.4264, + "step": 10617 + }, + { + "epoch": 1.81, + "learning_rate": 7.167660095632466e-06, + "loss": 0.4216, + "step": 10618 + }, + { + "epoch": 1.81, + "learning_rate": 7.165894158629419e-06, + "loss": 0.419, + "step": 10619 + }, + { + "epoch": 1.81, + "learning_rate": 7.164128317722712e-06, + "loss": 0.4272, + "step": 10620 + }, + { + "epoch": 1.81, + "learning_rate": 7.162362572972217e-06, + "loss": 0.4558, + "step": 10621 + }, + { + "epoch": 1.81, + "learning_rate": 7.160596924437808e-06, + "loss": 0.4137, + "step": 10622 + }, + { + "epoch": 1.81, + "learning_rate": 7.1588313721793565e-06, + "loss": 0.3959, + "step": 10623 + }, + { + "epoch": 1.81, + "learning_rate": 7.157065916256719e-06, + "loss": 0.421, + "step": 10624 + }, + { + "epoch": 1.81, + "learning_rate": 7.155300556729763e-06, + "loss": 0.4242, + "step": 10625 + }, + { + "epoch": 1.81, + "learning_rate": 7.153535293658342e-06, + "loss": 0.3878, + "step": 10626 + }, + { + "epoch": 1.81, + "learning_rate": 7.1517701271023136e-06, + "loss": 0.4232, + "step": 10627 + }, + { + "epoch": 1.81, + "learning_rate": 7.15000505712153e-06, + "loss": 0.4126, + "step": 10628 + }, + { + "epoch": 1.81, + "learning_rate": 7.148240083775837e-06, + "loss": 0.4233, + "step": 10629 + }, + { + "epoch": 1.81, + "learning_rate": 7.146475207125086e-06, + "loss": 0.4385, + "step": 10630 + }, + { + "epoch": 1.81, + "learning_rate": 7.144710427229114e-06, + "loss": 0.3923, + "step": 10631 + }, + { + "epoch": 1.81, + "learning_rate": 7.142945744147759e-06, + "loss": 0.439, + "step": 10632 + }, + { + "epoch": 1.81, + "learning_rate": 7.141181157940859e-06, + "loss": 0.3968, + "step": 10633 + }, + { + "epoch": 1.81, + "learning_rate": 7.1394166686682435e-06, + "loss": 0.3912, + "step": 10634 + }, + { + "epoch": 1.81, + "learning_rate": 7.137652276389741e-06, + "loss": 0.403, + "step": 10635 + }, + { + "epoch": 1.81, + "learning_rate": 7.135887981165179e-06, + "loss": 0.4194, + "step": 10636 + }, + { + "epoch": 1.81, + "learning_rate": 7.134123783054378e-06, + "loss": 0.395, + "step": 10637 + }, + { + "epoch": 1.81, + "learning_rate": 7.132359682117164e-06, + "loss": 0.4075, + "step": 10638 + }, + { + "epoch": 1.81, + "learning_rate": 7.130595678413341e-06, + "loss": 0.4301, + "step": 10639 + }, + { + "epoch": 1.81, + "learning_rate": 7.1288317720027265e-06, + "loss": 0.4185, + "step": 10640 + }, + { + "epoch": 1.81, + "learning_rate": 7.127067962945131e-06, + "loss": 0.4185, + "step": 10641 + }, + { + "epoch": 1.81, + "learning_rate": 7.1253042513003575e-06, + "loss": 0.4138, + "step": 10642 + }, + { + "epoch": 1.82, + "learning_rate": 7.12354063712821e-06, + "loss": 0.4393, + "step": 10643 + }, + { + "epoch": 1.82, + "learning_rate": 7.121777120488487e-06, + "loss": 0.4048, + "step": 10644 + }, + { + "epoch": 1.82, + "learning_rate": 7.120013701440984e-06, + "loss": 0.4001, + "step": 10645 + }, + { + "epoch": 1.82, + "learning_rate": 7.118250380045499e-06, + "loss": 0.4308, + "step": 10646 + }, + { + "epoch": 1.82, + "learning_rate": 7.116487156361811e-06, + "loss": 0.3788, + "step": 10647 + }, + { + "epoch": 1.82, + "learning_rate": 7.1147240304497135e-06, + "loss": 0.43, + "step": 10648 + }, + { + "epoch": 1.82, + "learning_rate": 7.112961002368983e-06, + "loss": 0.4276, + "step": 10649 + }, + { + "epoch": 1.82, + "learning_rate": 7.111198072179404e-06, + "loss": 0.4257, + "step": 10650 + }, + { + "epoch": 1.82, + "learning_rate": 7.109435239940749e-06, + "loss": 0.3683, + "step": 10651 + }, + { + "epoch": 1.82, + "learning_rate": 7.107672505712793e-06, + "loss": 0.3943, + "step": 10652 + }, + { + "epoch": 1.82, + "learning_rate": 7.105909869555307e-06, + "loss": 0.4218, + "step": 10653 + }, + { + "epoch": 1.82, + "learning_rate": 7.104147331528051e-06, + "loss": 0.4139, + "step": 10654 + }, + { + "epoch": 1.82, + "learning_rate": 7.10238489169079e-06, + "loss": 0.431, + "step": 10655 + }, + { + "epoch": 1.82, + "learning_rate": 7.1006225501032865e-06, + "loss": 0.4261, + "step": 10656 + }, + { + "epoch": 1.82, + "learning_rate": 7.098860306825293e-06, + "loss": 0.4048, + "step": 10657 + }, + { + "epoch": 1.82, + "learning_rate": 7.097098161916561e-06, + "loss": 0.4098, + "step": 10658 + }, + { + "epoch": 1.82, + "learning_rate": 7.0953361154368415e-06, + "loss": 0.4049, + "step": 10659 + }, + { + "epoch": 1.82, + "learning_rate": 7.093574167445882e-06, + "loss": 0.3917, + "step": 10660 + }, + { + "epoch": 1.82, + "learning_rate": 7.091812318003426e-06, + "loss": 0.3927, + "step": 10661 + }, + { + "epoch": 1.82, + "learning_rate": 7.090050567169207e-06, + "loss": 0.4384, + "step": 10662 + }, + { + "epoch": 1.82, + "learning_rate": 7.088288915002965e-06, + "loss": 0.4097, + "step": 10663 + }, + { + "epoch": 1.82, + "learning_rate": 7.0865273615644326e-06, + "loss": 0.4142, + "step": 10664 + }, + { + "epoch": 1.82, + "learning_rate": 7.084765906913335e-06, + "loss": 0.4231, + "step": 10665 + }, + { + "epoch": 1.82, + "learning_rate": 7.083004551109405e-06, + "loss": 0.3892, + "step": 10666 + }, + { + "epoch": 1.82, + "learning_rate": 7.081243294212358e-06, + "loss": 0.4294, + "step": 10667 + }, + { + "epoch": 1.82, + "learning_rate": 7.079482136281921e-06, + "loss": 0.438, + "step": 10668 + }, + { + "epoch": 1.82, + "learning_rate": 7.077721077377801e-06, + "loss": 0.4536, + "step": 10669 + }, + { + "epoch": 1.82, + "learning_rate": 7.075960117559716e-06, + "loss": 0.4288, + "step": 10670 + }, + { + "epoch": 1.82, + "learning_rate": 7.074199256887373e-06, + "loss": 0.4264, + "step": 10671 + }, + { + "epoch": 1.82, + "learning_rate": 7.07243849542048e-06, + "loss": 0.4467, + "step": 10672 + }, + { + "epoch": 1.82, + "learning_rate": 7.0706778332187355e-06, + "loss": 0.4138, + "step": 10673 + }, + { + "epoch": 1.82, + "learning_rate": 7.06891727034184e-06, + "loss": 0.4309, + "step": 10674 + }, + { + "epoch": 1.82, + "learning_rate": 7.067156806849491e-06, + "loss": 0.4278, + "step": 10675 + }, + { + "epoch": 1.82, + "learning_rate": 7.065396442801382e-06, + "loss": 0.4265, + "step": 10676 + }, + { + "epoch": 1.82, + "learning_rate": 7.0636361782571975e-06, + "loss": 0.4383, + "step": 10677 + }, + { + "epoch": 1.82, + "learning_rate": 7.0618760132766226e-06, + "loss": 0.3932, + "step": 10678 + }, + { + "epoch": 1.82, + "learning_rate": 7.060115947919344e-06, + "loss": 0.3759, + "step": 10679 + }, + { + "epoch": 1.82, + "learning_rate": 7.058355982245038e-06, + "loss": 0.4433, + "step": 10680 + }, + { + "epoch": 1.82, + "learning_rate": 7.056596116313379e-06, + "loss": 0.4106, + "step": 10681 + }, + { + "epoch": 1.82, + "learning_rate": 7.054836350184039e-06, + "loss": 0.4512, + "step": 10682 + }, + { + "epoch": 1.82, + "learning_rate": 7.053076683916692e-06, + "loss": 0.4085, + "step": 10683 + }, + { + "epoch": 1.82, + "learning_rate": 7.051317117570995e-06, + "loss": 0.4362, + "step": 10684 + }, + { + "epoch": 1.82, + "learning_rate": 7.049557651206613e-06, + "loss": 0.4346, + "step": 10685 + }, + { + "epoch": 1.82, + "learning_rate": 7.047798284883205e-06, + "loss": 0.4484, + "step": 10686 + }, + { + "epoch": 1.82, + "learning_rate": 7.046039018660426e-06, + "loss": 0.4351, + "step": 10687 + }, + { + "epoch": 1.82, + "learning_rate": 7.0442798525979305e-06, + "loss": 0.3858, + "step": 10688 + }, + { + "epoch": 1.82, + "learning_rate": 7.042520786755362e-06, + "loss": 0.412, + "step": 10689 + }, + { + "epoch": 1.82, + "learning_rate": 7.040761821192365e-06, + "loss": 0.405, + "step": 10690 + }, + { + "epoch": 1.82, + "learning_rate": 7.03900295596859e-06, + "loss": 0.3948, + "step": 10691 + }, + { + "epoch": 1.82, + "learning_rate": 7.037244191143662e-06, + "loss": 0.4438, + "step": 10692 + }, + { + "epoch": 1.82, + "learning_rate": 7.035485526777224e-06, + "loss": 0.4212, + "step": 10693 + }, + { + "epoch": 1.82, + "learning_rate": 7.033726962928903e-06, + "loss": 0.4241, + "step": 10694 + }, + { + "epoch": 1.82, + "learning_rate": 7.031968499658332e-06, + "loss": 0.3997, + "step": 10695 + }, + { + "epoch": 1.82, + "learning_rate": 7.030210137025129e-06, + "loss": 0.3761, + "step": 10696 + }, + { + "epoch": 1.82, + "learning_rate": 7.0284518750889196e-06, + "loss": 0.3934, + "step": 10697 + }, + { + "epoch": 1.82, + "learning_rate": 7.0266937139093224e-06, + "loss": 0.408, + "step": 10698 + }, + { + "epoch": 1.82, + "learning_rate": 7.0249356535459465e-06, + "loss": 0.4459, + "step": 10699 + }, + { + "epoch": 1.82, + "learning_rate": 7.023177694058405e-06, + "loss": 0.381, + "step": 10700 + }, + { + "epoch": 1.83, + "learning_rate": 7.021419835506307e-06, + "loss": 0.4416, + "step": 10701 + }, + { + "epoch": 1.83, + "learning_rate": 7.019662077949253e-06, + "loss": 0.4011, + "step": 10702 + }, + { + "epoch": 1.83, + "learning_rate": 7.017904421446848e-06, + "loss": 0.4131, + "step": 10703 + }, + { + "epoch": 1.83, + "learning_rate": 7.016146866058685e-06, + "loss": 0.4091, + "step": 10704 + }, + { + "epoch": 1.83, + "learning_rate": 7.014389411844359e-06, + "loss": 0.4234, + "step": 10705 + }, + { + "epoch": 1.83, + "learning_rate": 7.012632058863464e-06, + "loss": 0.4426, + "step": 10706 + }, + { + "epoch": 1.83, + "learning_rate": 7.01087480717558e-06, + "loss": 0.4261, + "step": 10707 + }, + { + "epoch": 1.83, + "learning_rate": 7.009117656840292e-06, + "loss": 0.4086, + "step": 10708 + }, + { + "epoch": 1.83, + "learning_rate": 7.007360607917182e-06, + "loss": 0.4186, + "step": 10709 + }, + { + "epoch": 1.83, + "learning_rate": 7.005603660465825e-06, + "loss": 0.4189, + "step": 10710 + }, + { + "epoch": 1.83, + "learning_rate": 7.003846814545798e-06, + "loss": 0.4257, + "step": 10711 + }, + { + "epoch": 1.83, + "learning_rate": 7.002090070216665e-06, + "loss": 0.4332, + "step": 10712 + }, + { + "epoch": 1.83, + "learning_rate": 7.000333427538e-06, + "loss": 0.4265, + "step": 10713 + }, + { + "epoch": 1.83, + "learning_rate": 6.998576886569352e-06, + "loss": 0.4209, + "step": 10714 + }, + { + "epoch": 1.83, + "learning_rate": 6.996820447370291e-06, + "loss": 0.439, + "step": 10715 + }, + { + "epoch": 1.83, + "learning_rate": 6.995064110000372e-06, + "loss": 0.4257, + "step": 10716 + }, + { + "epoch": 1.83, + "learning_rate": 6.993307874519143e-06, + "loss": 0.4694, + "step": 10717 + }, + { + "epoch": 1.83, + "learning_rate": 6.9915517409861555e-06, + "loss": 0.3874, + "step": 10718 + }, + { + "epoch": 1.83, + "learning_rate": 6.989795709460957e-06, + "loss": 0.4042, + "step": 10719 + }, + { + "epoch": 1.83, + "learning_rate": 6.9880397800030855e-06, + "loss": 0.462, + "step": 10720 + }, + { + "epoch": 1.83, + "learning_rate": 6.986283952672085e-06, + "loss": 0.4007, + "step": 10721 + }, + { + "epoch": 1.83, + "learning_rate": 6.984528227527483e-06, + "loss": 0.4088, + "step": 10722 + }, + { + "epoch": 1.83, + "learning_rate": 6.982772604628815e-06, + "loss": 0.4093, + "step": 10723 + }, + { + "epoch": 1.83, + "learning_rate": 6.981017084035609e-06, + "loss": 0.4002, + "step": 10724 + }, + { + "epoch": 1.83, + "learning_rate": 6.979261665807389e-06, + "loss": 0.428, + "step": 10725 + }, + { + "epoch": 1.83, + "learning_rate": 6.977506350003675e-06, + "loss": 0.4466, + "step": 10726 + }, + { + "epoch": 1.83, + "learning_rate": 6.97575113668399e-06, + "loss": 0.4152, + "step": 10727 + }, + { + "epoch": 1.83, + "learning_rate": 6.973996025907846e-06, + "loss": 0.4158, + "step": 10728 + }, + { + "epoch": 1.83, + "learning_rate": 6.972241017734751e-06, + "loss": 0.378, + "step": 10729 + }, + { + "epoch": 1.83, + "learning_rate": 6.9704861122242105e-06, + "loss": 0.4101, + "step": 10730 + }, + { + "epoch": 1.83, + "learning_rate": 6.968731309435732e-06, + "loss": 0.3903, + "step": 10731 + }, + { + "epoch": 1.83, + "learning_rate": 6.966976609428817e-06, + "loss": 0.4031, + "step": 10732 + }, + { + "epoch": 1.83, + "learning_rate": 6.965222012262958e-06, + "loss": 0.4436, + "step": 10733 + }, + { + "epoch": 1.83, + "learning_rate": 6.963467517997652e-06, + "loss": 0.4194, + "step": 10734 + }, + { + "epoch": 1.83, + "learning_rate": 6.9617131266923895e-06, + "loss": 0.3938, + "step": 10735 + }, + { + "epoch": 1.83, + "learning_rate": 6.959958838406655e-06, + "loss": 0.4126, + "step": 10736 + }, + { + "epoch": 1.83, + "learning_rate": 6.9582046531999315e-06, + "loss": 0.4084, + "step": 10737 + }, + { + "epoch": 1.83, + "learning_rate": 6.956450571131696e-06, + "loss": 0.4248, + "step": 10738 + }, + { + "epoch": 1.83, + "learning_rate": 6.954696592261425e-06, + "loss": 0.4317, + "step": 10739 + }, + { + "epoch": 1.83, + "learning_rate": 6.952942716648594e-06, + "loss": 0.3722, + "step": 10740 + }, + { + "epoch": 1.83, + "learning_rate": 6.951188944352669e-06, + "loss": 0.4418, + "step": 10741 + }, + { + "epoch": 1.83, + "learning_rate": 6.949435275433121e-06, + "loss": 0.4175, + "step": 10742 + }, + { + "epoch": 1.83, + "learning_rate": 6.947681709949405e-06, + "loss": 0.4526, + "step": 10743 + }, + { + "epoch": 1.83, + "learning_rate": 6.94592824796098e-06, + "loss": 0.4286, + "step": 10744 + }, + { + "epoch": 1.83, + "learning_rate": 6.944174889527306e-06, + "loss": 0.4056, + "step": 10745 + }, + { + "epoch": 1.83, + "learning_rate": 6.9424216347078275e-06, + "loss": 0.4331, + "step": 10746 + }, + { + "epoch": 1.83, + "learning_rate": 6.940668483561997e-06, + "loss": 0.4101, + "step": 10747 + }, + { + "epoch": 1.83, + "learning_rate": 6.938915436149256e-06, + "loss": 0.4331, + "step": 10748 + }, + { + "epoch": 1.83, + "learning_rate": 6.937162492529047e-06, + "loss": 0.3894, + "step": 10749 + }, + { + "epoch": 1.83, + "learning_rate": 6.93540965276081e-06, + "loss": 0.4617, + "step": 10750 + }, + { + "epoch": 1.83, + "learning_rate": 6.933656916903975e-06, + "loss": 0.4376, + "step": 10751 + }, + { + "epoch": 1.83, + "learning_rate": 6.9319042850179705e-06, + "loss": 0.3945, + "step": 10752 + }, + { + "epoch": 1.83, + "learning_rate": 6.930151757162229e-06, + "loss": 0.4179, + "step": 10753 + }, + { + "epoch": 1.83, + "learning_rate": 6.928399333396167e-06, + "loss": 0.4128, + "step": 10754 + }, + { + "epoch": 1.83, + "learning_rate": 6.9266470137792085e-06, + "loss": 0.4229, + "step": 10755 + }, + { + "epoch": 1.83, + "learning_rate": 6.924894798370767e-06, + "loss": 0.4268, + "step": 10756 + }, + { + "epoch": 1.83, + "learning_rate": 6.923142687230258e-06, + "loss": 0.4237, + "step": 10757 + }, + { + "epoch": 1.83, + "learning_rate": 6.921390680417093e-06, + "loss": 0.4081, + "step": 10758 + }, + { + "epoch": 1.83, + "learning_rate": 6.919638777990668e-06, + "loss": 0.4086, + "step": 10759 + }, + { + "epoch": 1.84, + "learning_rate": 6.917886980010393e-06, + "loss": 0.4042, + "step": 10760 + }, + { + "epoch": 1.84, + "learning_rate": 6.916135286535664e-06, + "loss": 0.4313, + "step": 10761 + }, + { + "epoch": 1.84, + "learning_rate": 6.9143836976258725e-06, + "loss": 0.4595, + "step": 10762 + }, + { + "epoch": 1.84, + "learning_rate": 6.912632213340415e-06, + "loss": 0.4505, + "step": 10763 + }, + { + "epoch": 1.84, + "learning_rate": 6.9108808337386775e-06, + "loss": 0.4486, + "step": 10764 + }, + { + "epoch": 1.84, + "learning_rate": 6.909129558880045e-06, + "loss": 0.3792, + "step": 10765 + }, + { + "epoch": 1.84, + "learning_rate": 6.9073783888239e-06, + "loss": 0.3769, + "step": 10766 + }, + { + "epoch": 1.84, + "learning_rate": 6.905627323629614e-06, + "loss": 0.4308, + "step": 10767 + }, + { + "epoch": 1.84, + "learning_rate": 6.903876363356563e-06, + "loss": 0.4386, + "step": 10768 + }, + { + "epoch": 1.84, + "learning_rate": 6.902125508064117e-06, + "loss": 0.4369, + "step": 10769 + }, + { + "epoch": 1.84, + "learning_rate": 6.900374757811643e-06, + "loss": 0.4217, + "step": 10770 + }, + { + "epoch": 1.84, + "learning_rate": 6.898624112658503e-06, + "loss": 0.4006, + "step": 10771 + }, + { + "epoch": 1.84, + "learning_rate": 6.896873572664058e-06, + "loss": 0.4227, + "step": 10772 + }, + { + "epoch": 1.84, + "learning_rate": 6.8951231378876675e-06, + "loss": 0.3866, + "step": 10773 + }, + { + "epoch": 1.84, + "learning_rate": 6.893372808388674e-06, + "loss": 0.4268, + "step": 10774 + }, + { + "epoch": 1.84, + "learning_rate": 6.8916225842264305e-06, + "loss": 0.4033, + "step": 10775 + }, + { + "epoch": 1.84, + "learning_rate": 6.889872465460286e-06, + "loss": 0.4323, + "step": 10776 + }, + { + "epoch": 1.84, + "learning_rate": 6.888122452149576e-06, + "loss": 0.4277, + "step": 10777 + }, + { + "epoch": 1.84, + "learning_rate": 6.8863725443536415e-06, + "loss": 0.4035, + "step": 10778 + }, + { + "epoch": 1.84, + "learning_rate": 6.884622742131816e-06, + "loss": 0.3968, + "step": 10779 + }, + { + "epoch": 1.84, + "learning_rate": 6.88287304554343e-06, + "loss": 0.4604, + "step": 10780 + }, + { + "epoch": 1.84, + "learning_rate": 6.881123454647816e-06, + "loss": 0.4066, + "step": 10781 + }, + { + "epoch": 1.84, + "learning_rate": 6.879373969504288e-06, + "loss": 0.3785, + "step": 10782 + }, + { + "epoch": 1.84, + "learning_rate": 6.877624590172172e-06, + "loss": 0.4237, + "step": 10783 + }, + { + "epoch": 1.84, + "learning_rate": 6.875875316710785e-06, + "loss": 0.419, + "step": 10784 + }, + { + "epoch": 1.84, + "learning_rate": 6.874126149179436e-06, + "loss": 0.4513, + "step": 10785 + }, + { + "epoch": 1.84, + "learning_rate": 6.872377087637436e-06, + "loss": 0.4299, + "step": 10786 + }, + { + "epoch": 1.84, + "learning_rate": 6.87062813214409e-06, + "loss": 0.4063, + "step": 10787 + }, + { + "epoch": 1.84, + "learning_rate": 6.868879282758707e-06, + "loss": 0.4199, + "step": 10788 + }, + { + "epoch": 1.84, + "learning_rate": 6.867130539540574e-06, + "loss": 0.3916, + "step": 10789 + }, + { + "epoch": 1.84, + "learning_rate": 6.865381902548991e-06, + "loss": 0.3794, + "step": 10790 + }, + { + "epoch": 1.84, + "learning_rate": 6.86363337184325e-06, + "loss": 0.3979, + "step": 10791 + }, + { + "epoch": 1.84, + "learning_rate": 6.86188494748264e-06, + "loss": 0.4279, + "step": 10792 + }, + { + "epoch": 1.84, + "learning_rate": 6.860136629526441e-06, + "loss": 0.4067, + "step": 10793 + }, + { + "epoch": 1.84, + "learning_rate": 6.858388418033934e-06, + "loss": 0.4045, + "step": 10794 + }, + { + "epoch": 1.84, + "learning_rate": 6.856640313064399e-06, + "loss": 0.4351, + "step": 10795 + }, + { + "epoch": 1.84, + "learning_rate": 6.854892314677111e-06, + "loss": 0.4103, + "step": 10796 + }, + { + "epoch": 1.84, + "learning_rate": 6.853144422931331e-06, + "loss": 0.4374, + "step": 10797 + }, + { + "epoch": 1.84, + "learning_rate": 6.851396637886332e-06, + "loss": 0.3943, + "step": 10798 + }, + { + "epoch": 1.84, + "learning_rate": 6.849648959601374e-06, + "loss": 0.4156, + "step": 10799 + }, + { + "epoch": 1.84, + "learning_rate": 6.847901388135715e-06, + "loss": 0.3984, + "step": 10800 + }, + { + "epoch": 1.84, + "learning_rate": 6.846153923548611e-06, + "loss": 0.3873, + "step": 10801 + }, + { + "epoch": 1.84, + "learning_rate": 6.844406565899313e-06, + "loss": 0.4427, + "step": 10802 + }, + { + "epoch": 1.84, + "learning_rate": 6.842659315247076e-06, + "loss": 0.3965, + "step": 10803 + }, + { + "epoch": 1.84, + "learning_rate": 6.8409121716511305e-06, + "loss": 0.4302, + "step": 10804 + }, + { + "epoch": 1.84, + "learning_rate": 6.839165135170725e-06, + "loss": 0.4197, + "step": 10805 + }, + { + "epoch": 1.84, + "learning_rate": 6.837418205865097e-06, + "loss": 0.434, + "step": 10806 + }, + { + "epoch": 1.84, + "learning_rate": 6.83567138379348e-06, + "loss": 0.4356, + "step": 10807 + }, + { + "epoch": 1.84, + "learning_rate": 6.8339246690150996e-06, + "loss": 0.4162, + "step": 10808 + }, + { + "epoch": 1.84, + "learning_rate": 6.8321780615891855e-06, + "loss": 0.4582, + "step": 10809 + }, + { + "epoch": 1.84, + "learning_rate": 6.830431561574957e-06, + "loss": 0.4022, + "step": 10810 + }, + { + "epoch": 1.84, + "learning_rate": 6.828685169031641e-06, + "loss": 0.4235, + "step": 10811 + }, + { + "epoch": 1.84, + "learning_rate": 6.8269388840184414e-06, + "loss": 0.3932, + "step": 10812 + }, + { + "epoch": 1.84, + "learning_rate": 6.8251927065945755e-06, + "loss": 0.4151, + "step": 10813 + }, + { + "epoch": 1.84, + "learning_rate": 6.823446636819251e-06, + "loss": 0.4012, + "step": 10814 + }, + { + "epoch": 1.84, + "learning_rate": 6.821700674751672e-06, + "loss": 0.4433, + "step": 10815 + }, + { + "epoch": 1.84, + "learning_rate": 6.819954820451038e-06, + "loss": 0.4328, + "step": 10816 + }, + { + "epoch": 1.84, + "learning_rate": 6.818209073976545e-06, + "loss": 0.3825, + "step": 10817 + }, + { + "epoch": 1.84, + "learning_rate": 6.8164634353873925e-06, + "loss": 0.4403, + "step": 10818 + }, + { + "epoch": 1.85, + "learning_rate": 6.814717904742762e-06, + "loss": 0.4189, + "step": 10819 + }, + { + "epoch": 1.85, + "learning_rate": 6.812972482101843e-06, + "loss": 0.4436, + "step": 10820 + }, + { + "epoch": 1.85, + "learning_rate": 6.8112271675238154e-06, + "loss": 0.4388, + "step": 10821 + }, + { + "epoch": 1.85, + "learning_rate": 6.809481961067861e-06, + "loss": 0.3873, + "step": 10822 + }, + { + "epoch": 1.85, + "learning_rate": 6.807736862793155e-06, + "loss": 0.4302, + "step": 10823 + }, + { + "epoch": 1.85, + "learning_rate": 6.805991872758866e-06, + "loss": 0.413, + "step": 10824 + }, + { + "epoch": 1.85, + "learning_rate": 6.8042469910241615e-06, + "loss": 0.4361, + "step": 10825 + }, + { + "epoch": 1.85, + "learning_rate": 6.80250221764821e-06, + "loss": 0.4029, + "step": 10826 + }, + { + "epoch": 1.85, + "learning_rate": 6.800757552690166e-06, + "loss": 0.4751, + "step": 10827 + }, + { + "epoch": 1.85, + "learning_rate": 6.799012996209186e-06, + "loss": 0.3993, + "step": 10828 + }, + { + "epoch": 1.85, + "learning_rate": 6.797268548264428e-06, + "loss": 0.4056, + "step": 10829 + }, + { + "epoch": 1.85, + "learning_rate": 6.795524208915036e-06, + "loss": 0.4296, + "step": 10830 + }, + { + "epoch": 1.85, + "learning_rate": 6.7937799782201626e-06, + "loss": 0.4236, + "step": 10831 + }, + { + "epoch": 1.85, + "learning_rate": 6.792035856238941e-06, + "loss": 0.4313, + "step": 10832 + }, + { + "epoch": 1.85, + "learning_rate": 6.7902918430305145e-06, + "loss": 0.4052, + "step": 10833 + }, + { + "epoch": 1.85, + "learning_rate": 6.788547938654019e-06, + "loss": 0.4153, + "step": 10834 + }, + { + "epoch": 1.85, + "learning_rate": 6.786804143168579e-06, + "loss": 0.4022, + "step": 10835 + }, + { + "epoch": 1.85, + "learning_rate": 6.785060456633326e-06, + "loss": 0.4242, + "step": 10836 + }, + { + "epoch": 1.85, + "learning_rate": 6.78331687910738e-06, + "loss": 0.4649, + "step": 10837 + }, + { + "epoch": 1.85, + "learning_rate": 6.781573410649865e-06, + "loss": 0.3817, + "step": 10838 + }, + { + "epoch": 1.85, + "learning_rate": 6.779830051319898e-06, + "loss": 0.4315, + "step": 10839 + }, + { + "epoch": 1.85, + "learning_rate": 6.778086801176585e-06, + "loss": 0.3838, + "step": 10840 + }, + { + "epoch": 1.85, + "learning_rate": 6.7763436602790425e-06, + "loss": 0.4348, + "step": 10841 + }, + { + "epoch": 1.85, + "learning_rate": 6.7746006286863665e-06, + "loss": 0.4146, + "step": 10842 + }, + { + "epoch": 1.85, + "learning_rate": 6.772857706457663e-06, + "loss": 0.4632, + "step": 10843 + }, + { + "epoch": 1.85, + "learning_rate": 6.771114893652029e-06, + "loss": 0.4037, + "step": 10844 + }, + { + "epoch": 1.85, + "learning_rate": 6.769372190328558e-06, + "loss": 0.4377, + "step": 10845 + }, + { + "epoch": 1.85, + "learning_rate": 6.767629596546341e-06, + "loss": 0.4159, + "step": 10846 + }, + { + "epoch": 1.85, + "learning_rate": 6.7658871123644644e-06, + "loss": 0.3996, + "step": 10847 + }, + { + "epoch": 1.85, + "learning_rate": 6.764144737842008e-06, + "loss": 0.3831, + "step": 10848 + }, + { + "epoch": 1.85, + "learning_rate": 6.762402473038057e-06, + "loss": 0.4121, + "step": 10849 + }, + { + "epoch": 1.85, + "learning_rate": 6.760660318011678e-06, + "loss": 0.4155, + "step": 10850 + }, + { + "epoch": 1.85, + "learning_rate": 6.758918272821948e-06, + "loss": 0.4611, + "step": 10851 + }, + { + "epoch": 1.85, + "learning_rate": 6.757176337527932e-06, + "loss": 0.4069, + "step": 10852 + }, + { + "epoch": 1.85, + "learning_rate": 6.7554345121886965e-06, + "loss": 0.379, + "step": 10853 + }, + { + "epoch": 1.85, + "learning_rate": 6.753692796863302e-06, + "loss": 0.4096, + "step": 10854 + }, + { + "epoch": 1.85, + "learning_rate": 6.751951191610802e-06, + "loss": 0.3783, + "step": 10855 + }, + { + "epoch": 1.85, + "learning_rate": 6.750209696490252e-06, + "loss": 0.442, + "step": 10856 + }, + { + "epoch": 1.85, + "learning_rate": 6.748468311560701e-06, + "loss": 0.4406, + "step": 10857 + }, + { + "epoch": 1.85, + "learning_rate": 6.746727036881191e-06, + "loss": 0.429, + "step": 10858 + }, + { + "epoch": 1.85, + "learning_rate": 6.744985872510766e-06, + "loss": 0.4272, + "step": 10859 + }, + { + "epoch": 1.85, + "learning_rate": 6.743244818508465e-06, + "loss": 0.404, + "step": 10860 + }, + { + "epoch": 1.85, + "learning_rate": 6.741503874933319e-06, + "loss": 0.3892, + "step": 10861 + }, + { + "epoch": 1.85, + "learning_rate": 6.7397630418443634e-06, + "loss": 0.3929, + "step": 10862 + }, + { + "epoch": 1.85, + "learning_rate": 6.73802231930062e-06, + "loss": 0.3606, + "step": 10863 + }, + { + "epoch": 1.85, + "learning_rate": 6.736281707361117e-06, + "loss": 0.4039, + "step": 10864 + }, + { + "epoch": 1.85, + "learning_rate": 6.734541206084866e-06, + "loss": 0.4272, + "step": 10865 + }, + { + "epoch": 1.85, + "learning_rate": 6.732800815530887e-06, + "loss": 0.4231, + "step": 10866 + }, + { + "epoch": 1.85, + "learning_rate": 6.73106053575819e-06, + "loss": 0.4177, + "step": 10867 + }, + { + "epoch": 1.85, + "learning_rate": 6.729320366825785e-06, + "loss": 0.4141, + "step": 10868 + }, + { + "epoch": 1.85, + "learning_rate": 6.727580308792673e-06, + "loss": 0.4297, + "step": 10869 + }, + { + "epoch": 1.85, + "learning_rate": 6.725840361717859e-06, + "loss": 0.4135, + "step": 10870 + }, + { + "epoch": 1.85, + "learning_rate": 6.724100525660337e-06, + "loss": 0.4094, + "step": 10871 + }, + { + "epoch": 1.85, + "learning_rate": 6.7223608006791e-06, + "loss": 0.406, + "step": 10872 + }, + { + "epoch": 1.85, + "learning_rate": 6.720621186833134e-06, + "loss": 0.4002, + "step": 10873 + }, + { + "epoch": 1.85, + "learning_rate": 6.718881684181426e-06, + "loss": 0.3895, + "step": 10874 + }, + { + "epoch": 1.85, + "learning_rate": 6.71714229278296e-06, + "loss": 0.4096, + "step": 10875 + }, + { + "epoch": 1.85, + "learning_rate": 6.7154030126967105e-06, + "loss": 0.4187, + "step": 10876 + }, + { + "epoch": 1.86, + "learning_rate": 6.7136638439816544e-06, + "loss": 0.3793, + "step": 10877 + }, + { + "epoch": 1.86, + "learning_rate": 6.711924786696763e-06, + "loss": 0.4275, + "step": 10878 + }, + { + "epoch": 1.86, + "learning_rate": 6.710185840901e-06, + "loss": 0.4435, + "step": 10879 + }, + { + "epoch": 1.86, + "learning_rate": 6.708447006653328e-06, + "loss": 0.383, + "step": 10880 + }, + { + "epoch": 1.86, + "learning_rate": 6.706708284012704e-06, + "loss": 0.4517, + "step": 10881 + }, + { + "epoch": 1.86, + "learning_rate": 6.704969673038087e-06, + "loss": 0.4143, + "step": 10882 + }, + { + "epoch": 1.86, + "learning_rate": 6.703231173788425e-06, + "loss": 0.4085, + "step": 10883 + }, + { + "epoch": 1.86, + "learning_rate": 6.701492786322666e-06, + "loss": 0.3912, + "step": 10884 + }, + { + "epoch": 1.86, + "learning_rate": 6.699754510699756e-06, + "loss": 0.4125, + "step": 10885 + }, + { + "epoch": 1.86, + "learning_rate": 6.698016346978637e-06, + "loss": 0.3963, + "step": 10886 + }, + { + "epoch": 1.86, + "learning_rate": 6.696278295218239e-06, + "loss": 0.4009, + "step": 10887 + }, + { + "epoch": 1.86, + "learning_rate": 6.694540355477498e-06, + "loss": 0.4146, + "step": 10888 + }, + { + "epoch": 1.86, + "learning_rate": 6.69280252781534e-06, + "loss": 0.4162, + "step": 10889 + }, + { + "epoch": 1.86, + "learning_rate": 6.691064812290691e-06, + "loss": 0.4496, + "step": 10890 + }, + { + "epoch": 1.86, + "learning_rate": 6.689327208962472e-06, + "loss": 0.4007, + "step": 10891 + }, + { + "epoch": 1.86, + "learning_rate": 6.6875897178896e-06, + "loss": 0.4125, + "step": 10892 + }, + { + "epoch": 1.86, + "learning_rate": 6.685852339130992e-06, + "loss": 0.43, + "step": 10893 + }, + { + "epoch": 1.86, + "learning_rate": 6.684115072745553e-06, + "loss": 0.4359, + "step": 10894 + }, + { + "epoch": 1.86, + "learning_rate": 6.682377918792187e-06, + "loss": 0.4133, + "step": 10895 + }, + { + "epoch": 1.86, + "learning_rate": 6.6806408773298024e-06, + "loss": 0.4103, + "step": 10896 + }, + { + "epoch": 1.86, + "learning_rate": 6.67890394841729e-06, + "loss": 0.3939, + "step": 10897 + }, + { + "epoch": 1.86, + "learning_rate": 6.677167132113549e-06, + "loss": 0.4143, + "step": 10898 + }, + { + "epoch": 1.86, + "learning_rate": 6.675430428477468e-06, + "loss": 0.3957, + "step": 10899 + }, + { + "epoch": 1.86, + "learning_rate": 6.673693837567932e-06, + "loss": 0.3986, + "step": 10900 + }, + { + "epoch": 1.86, + "learning_rate": 6.6719573594438315e-06, + "loss": 0.4087, + "step": 10901 + }, + { + "epoch": 1.86, + "learning_rate": 6.670220994164035e-06, + "loss": 0.4222, + "step": 10902 + }, + { + "epoch": 1.86, + "learning_rate": 6.668484741787422e-06, + "loss": 0.4307, + "step": 10903 + }, + { + "epoch": 1.86, + "learning_rate": 6.666748602372868e-06, + "loss": 0.4054, + "step": 10904 + }, + { + "epoch": 1.86, + "learning_rate": 6.665012575979233e-06, + "loss": 0.4315, + "step": 10905 + }, + { + "epoch": 1.86, + "learning_rate": 6.663276662665385e-06, + "loss": 0.3917, + "step": 10906 + }, + { + "epoch": 1.86, + "learning_rate": 6.661540862490181e-06, + "loss": 0.4031, + "step": 10907 + }, + { + "epoch": 1.86, + "learning_rate": 6.659805175512481e-06, + "loss": 0.4138, + "step": 10908 + }, + { + "epoch": 1.86, + "learning_rate": 6.65806960179114e-06, + "loss": 0.4091, + "step": 10909 + }, + { + "epoch": 1.86, + "learning_rate": 6.6563341413849945e-06, + "loss": 0.3719, + "step": 10910 + }, + { + "epoch": 1.86, + "learning_rate": 6.6545987943529e-06, + "loss": 0.4051, + "step": 10911 + }, + { + "epoch": 1.86, + "learning_rate": 6.652863560753691e-06, + "loss": 0.4094, + "step": 10912 + }, + { + "epoch": 1.86, + "learning_rate": 6.651128440646206e-06, + "loss": 0.4153, + "step": 10913 + }, + { + "epoch": 1.86, + "learning_rate": 6.649393434089277e-06, + "loss": 0.399, + "step": 10914 + }, + { + "epoch": 1.86, + "learning_rate": 6.647658541141735e-06, + "loss": 0.4357, + "step": 10915 + }, + { + "epoch": 1.86, + "learning_rate": 6.6459237618624094e-06, + "loss": 0.4277, + "step": 10916 + }, + { + "epoch": 1.86, + "learning_rate": 6.644189096310111e-06, + "loss": 0.3994, + "step": 10917 + }, + { + "epoch": 1.86, + "learning_rate": 6.642454544543664e-06, + "loss": 0.433, + "step": 10918 + }, + { + "epoch": 1.86, + "learning_rate": 6.640720106621881e-06, + "loss": 0.4004, + "step": 10919 + }, + { + "epoch": 1.86, + "learning_rate": 6.638985782603571e-06, + "loss": 0.4119, + "step": 10920 + }, + { + "epoch": 1.86, + "learning_rate": 6.637251572547541e-06, + "loss": 0.4269, + "step": 10921 + }, + { + "epoch": 1.86, + "learning_rate": 6.635517476512592e-06, + "loss": 0.4222, + "step": 10922 + }, + { + "epoch": 1.86, + "learning_rate": 6.633783494557522e-06, + "loss": 0.4078, + "step": 10923 + }, + { + "epoch": 1.86, + "learning_rate": 6.632049626741131e-06, + "loss": 0.3761, + "step": 10924 + }, + { + "epoch": 1.86, + "learning_rate": 6.630315873122202e-06, + "loss": 0.3995, + "step": 10925 + }, + { + "epoch": 1.86, + "learning_rate": 6.6285822337595215e-06, + "loss": 0.3944, + "step": 10926 + }, + { + "epoch": 1.86, + "learning_rate": 6.626848708711879e-06, + "loss": 0.4126, + "step": 10927 + }, + { + "epoch": 1.86, + "learning_rate": 6.625115298038046e-06, + "loss": 0.4273, + "step": 10928 + }, + { + "epoch": 1.86, + "learning_rate": 6.623382001796801e-06, + "loss": 0.4189, + "step": 10929 + }, + { + "epoch": 1.86, + "learning_rate": 6.621648820046915e-06, + "loss": 0.4112, + "step": 10930 + }, + { + "epoch": 1.86, + "learning_rate": 6.619915752847158e-06, + "loss": 0.4024, + "step": 10931 + }, + { + "epoch": 1.86, + "learning_rate": 6.6181828002562875e-06, + "loss": 0.425, + "step": 10932 + }, + { + "epoch": 1.86, + "learning_rate": 6.616449962333065e-06, + "loss": 0.3826, + "step": 10933 + }, + { + "epoch": 1.86, + "learning_rate": 6.614717239136246e-06, + "loss": 0.3879, + "step": 10934 + }, + { + "epoch": 1.86, + "learning_rate": 6.612984630724584e-06, + "loss": 0.4206, + "step": 10935 + }, + { + "epoch": 1.87, + "learning_rate": 6.611252137156825e-06, + "loss": 0.4022, + "step": 10936 + }, + { + "epoch": 1.87, + "learning_rate": 6.6095197584917135e-06, + "loss": 0.4306, + "step": 10937 + }, + { + "epoch": 1.87, + "learning_rate": 6.607787494787988e-06, + "loss": 0.4228, + "step": 10938 + }, + { + "epoch": 1.87, + "learning_rate": 6.606055346104391e-06, + "loss": 0.4067, + "step": 10939 + }, + { + "epoch": 1.87, + "learning_rate": 6.6043233124996456e-06, + "loss": 0.4461, + "step": 10940 + }, + { + "epoch": 1.87, + "learning_rate": 6.602591394032483e-06, + "loss": 0.436, + "step": 10941 + }, + { + "epoch": 1.87, + "learning_rate": 6.600859590761629e-06, + "loss": 0.4234, + "step": 10942 + }, + { + "epoch": 1.87, + "learning_rate": 6.5991279027458056e-06, + "loss": 0.4221, + "step": 10943 + }, + { + "epoch": 1.87, + "learning_rate": 6.597396330043726e-06, + "loss": 0.4101, + "step": 10944 + }, + { + "epoch": 1.87, + "learning_rate": 6.595664872714103e-06, + "loss": 0.4035, + "step": 10945 + }, + { + "epoch": 1.87, + "learning_rate": 6.59393353081565e-06, + "loss": 0.4093, + "step": 10946 + }, + { + "epoch": 1.87, + "learning_rate": 6.592202304407066e-06, + "loss": 0.4356, + "step": 10947 + }, + { + "epoch": 1.87, + "learning_rate": 6.590471193547052e-06, + "loss": 0.4124, + "step": 10948 + }, + { + "epoch": 1.87, + "learning_rate": 6.58874019829431e-06, + "loss": 0.4173, + "step": 10949 + }, + { + "epoch": 1.87, + "learning_rate": 6.587009318707529e-06, + "loss": 0.4439, + "step": 10950 + }, + { + "epoch": 1.87, + "learning_rate": 6.5852785548454e-06, + "loss": 0.4069, + "step": 10951 + }, + { + "epoch": 1.87, + "learning_rate": 6.583547906766605e-06, + "loss": 0.4417, + "step": 10952 + }, + { + "epoch": 1.87, + "learning_rate": 6.58181737452983e-06, + "loss": 0.3926, + "step": 10953 + }, + { + "epoch": 1.87, + "learning_rate": 6.580086958193752e-06, + "loss": 0.4063, + "step": 10954 + }, + { + "epoch": 1.87, + "learning_rate": 6.57835665781704e-06, + "loss": 0.3981, + "step": 10955 + }, + { + "epoch": 1.87, + "learning_rate": 6.576626473458365e-06, + "loss": 0.4378, + "step": 10956 + }, + { + "epoch": 1.87, + "learning_rate": 6.5748964051763945e-06, + "loss": 0.4086, + "step": 10957 + }, + { + "epoch": 1.87, + "learning_rate": 6.573166453029791e-06, + "loss": 0.3969, + "step": 10958 + }, + { + "epoch": 1.87, + "learning_rate": 6.571436617077208e-06, + "loss": 0.3933, + "step": 10959 + }, + { + "epoch": 1.87, + "learning_rate": 6.569706897377301e-06, + "loss": 0.4248, + "step": 10960 + }, + { + "epoch": 1.87, + "learning_rate": 6.567977293988726e-06, + "loss": 0.4359, + "step": 10961 + }, + { + "epoch": 1.87, + "learning_rate": 6.566247806970119e-06, + "loss": 0.4335, + "step": 10962 + }, + { + "epoch": 1.87, + "learning_rate": 6.5645184363801255e-06, + "loss": 0.3976, + "step": 10963 + }, + { + "epoch": 1.87, + "learning_rate": 6.562789182277383e-06, + "loss": 0.4276, + "step": 10964 + }, + { + "epoch": 1.87, + "learning_rate": 6.561060044720528e-06, + "loss": 0.3792, + "step": 10965 + }, + { + "epoch": 1.87, + "learning_rate": 6.55933102376819e-06, + "loss": 0.4473, + "step": 10966 + }, + { + "epoch": 1.87, + "learning_rate": 6.557602119478993e-06, + "loss": 0.4047, + "step": 10967 + }, + { + "epoch": 1.87, + "learning_rate": 6.55587333191156e-06, + "loss": 0.438, + "step": 10968 + }, + { + "epoch": 1.87, + "learning_rate": 6.554144661124515e-06, + "loss": 0.4092, + "step": 10969 + }, + { + "epoch": 1.87, + "learning_rate": 6.552416107176462e-06, + "loss": 0.3996, + "step": 10970 + }, + { + "epoch": 1.87, + "learning_rate": 6.550687670126015e-06, + "loss": 0.3752, + "step": 10971 + }, + { + "epoch": 1.87, + "learning_rate": 6.548959350031783e-06, + "loss": 0.4114, + "step": 10972 + }, + { + "epoch": 1.87, + "learning_rate": 6.547231146952366e-06, + "loss": 0.4205, + "step": 10973 + }, + { + "epoch": 1.87, + "learning_rate": 6.545503060946365e-06, + "loss": 0.3953, + "step": 10974 + }, + { + "epoch": 1.87, + "learning_rate": 6.543775092072371e-06, + "loss": 0.4184, + "step": 10975 + }, + { + "epoch": 1.87, + "learning_rate": 6.54204724038898e-06, + "loss": 0.3982, + "step": 10976 + }, + { + "epoch": 1.87, + "learning_rate": 6.5403195059547686e-06, + "loss": 0.3936, + "step": 10977 + }, + { + "epoch": 1.87, + "learning_rate": 6.538591888828328e-06, + "loss": 0.4388, + "step": 10978 + }, + { + "epoch": 1.87, + "learning_rate": 6.536864389068231e-06, + "loss": 0.4296, + "step": 10979 + }, + { + "epoch": 1.87, + "learning_rate": 6.535137006733056e-06, + "loss": 0.4528, + "step": 10980 + }, + { + "epoch": 1.87, + "learning_rate": 6.533409741881374e-06, + "loss": 0.4308, + "step": 10981 + }, + { + "epoch": 1.87, + "learning_rate": 6.531682594571751e-06, + "loss": 0.4232, + "step": 10982 + }, + { + "epoch": 1.87, + "learning_rate": 6.529955564862747e-06, + "loss": 0.4188, + "step": 10983 + }, + { + "epoch": 1.87, + "learning_rate": 6.528228652812924e-06, + "loss": 0.4262, + "step": 10984 + }, + { + "epoch": 1.87, + "learning_rate": 6.526501858480833e-06, + "loss": 0.393, + "step": 10985 + }, + { + "epoch": 1.87, + "learning_rate": 6.524775181925025e-06, + "loss": 0.413, + "step": 10986 + }, + { + "epoch": 1.87, + "learning_rate": 6.523048623204049e-06, + "loss": 0.451, + "step": 10987 + }, + { + "epoch": 1.87, + "learning_rate": 6.521322182376446e-06, + "loss": 0.4528, + "step": 10988 + }, + { + "epoch": 1.87, + "learning_rate": 6.519595859500755e-06, + "loss": 0.4081, + "step": 10989 + }, + { + "epoch": 1.87, + "learning_rate": 6.5178696546355146e-06, + "loss": 0.4094, + "step": 10990 + }, + { + "epoch": 1.87, + "learning_rate": 6.516143567839251e-06, + "loss": 0.453, + "step": 10991 + }, + { + "epoch": 1.87, + "learning_rate": 6.514417599170491e-06, + "loss": 0.4437, + "step": 10992 + }, + { + "epoch": 1.87, + "learning_rate": 6.5126917486877565e-06, + "loss": 0.3814, + "step": 10993 + }, + { + "epoch": 1.87, + "learning_rate": 6.510966016449566e-06, + "loss": 0.4349, + "step": 10994 + }, + { + "epoch": 1.88, + "learning_rate": 6.509240402514436e-06, + "loss": 0.4556, + "step": 10995 + }, + { + "epoch": 1.88, + "learning_rate": 6.507514906940876e-06, + "loss": 0.399, + "step": 10996 + }, + { + "epoch": 1.88, + "learning_rate": 6.505789529787395e-06, + "loss": 0.4154, + "step": 10997 + }, + { + "epoch": 1.88, + "learning_rate": 6.5040642711124925e-06, + "loss": 0.4006, + "step": 10998 + }, + { + "epoch": 1.88, + "learning_rate": 6.50233913097467e-06, + "loss": 0.4118, + "step": 10999 + }, + { + "epoch": 1.88, + "learning_rate": 6.500614109432419e-06, + "loss": 0.3885, + "step": 11000 + }, + { + "epoch": 1.88, + "learning_rate": 6.49888920654423e-06, + "loss": 0.4544, + "step": 11001 + }, + { + "epoch": 1.88, + "learning_rate": 6.497164422368592e-06, + "loss": 0.4115, + "step": 11002 + }, + { + "epoch": 1.88, + "learning_rate": 6.495439756963984e-06, + "loss": 0.4262, + "step": 11003 + }, + { + "epoch": 1.88, + "learning_rate": 6.4937152103888865e-06, + "loss": 0.4033, + "step": 11004 + }, + { + "epoch": 1.88, + "learning_rate": 6.491990782701777e-06, + "loss": 0.4012, + "step": 11005 + }, + { + "epoch": 1.88, + "learning_rate": 6.490266473961122e-06, + "loss": 0.4167, + "step": 11006 + }, + { + "epoch": 1.88, + "learning_rate": 6.488542284225387e-06, + "loss": 0.403, + "step": 11007 + }, + { + "epoch": 1.88, + "learning_rate": 6.486818213553039e-06, + "loss": 0.4114, + "step": 11008 + }, + { + "epoch": 1.88, + "learning_rate": 6.485094262002529e-06, + "loss": 0.4299, + "step": 11009 + }, + { + "epoch": 1.88, + "learning_rate": 6.483370429632317e-06, + "loss": 0.393, + "step": 11010 + }, + { + "epoch": 1.88, + "learning_rate": 6.481646716500849e-06, + "loss": 0.4142, + "step": 11011 + }, + { + "epoch": 1.88, + "learning_rate": 6.479923122666576e-06, + "loss": 0.4092, + "step": 11012 + }, + { + "epoch": 1.88, + "learning_rate": 6.478199648187939e-06, + "loss": 0.4261, + "step": 11013 + }, + { + "epoch": 1.88, + "learning_rate": 6.476476293123375e-06, + "loss": 0.4016, + "step": 11014 + }, + { + "epoch": 1.88, + "learning_rate": 6.474753057531318e-06, + "loss": 0.4177, + "step": 11015 + }, + { + "epoch": 1.88, + "learning_rate": 6.4730299414701945e-06, + "loss": 0.4476, + "step": 11016 + }, + { + "epoch": 1.88, + "learning_rate": 6.471306944998434e-06, + "loss": 0.4206, + "step": 11017 + }, + { + "epoch": 1.88, + "learning_rate": 6.469584068174459e-06, + "loss": 0.4177, + "step": 11018 + }, + { + "epoch": 1.88, + "learning_rate": 6.467861311056685e-06, + "loss": 0.4242, + "step": 11019 + }, + { + "epoch": 1.88, + "learning_rate": 6.4661386737035284e-06, + "loss": 0.4424, + "step": 11020 + }, + { + "epoch": 1.88, + "learning_rate": 6.464416156173396e-06, + "loss": 0.4237, + "step": 11021 + }, + { + "epoch": 1.88, + "learning_rate": 6.462693758524698e-06, + "loss": 0.4132, + "step": 11022 + }, + { + "epoch": 1.88, + "learning_rate": 6.460971480815832e-06, + "loss": 0.4359, + "step": 11023 + }, + { + "epoch": 1.88, + "learning_rate": 6.459249323105193e-06, + "loss": 0.4205, + "step": 11024 + }, + { + "epoch": 1.88, + "learning_rate": 6.457527285451179e-06, + "loss": 0.4368, + "step": 11025 + }, + { + "epoch": 1.88, + "learning_rate": 6.455805367912177e-06, + "loss": 0.4234, + "step": 11026 + }, + { + "epoch": 1.88, + "learning_rate": 6.4540835705465735e-06, + "loss": 0.3995, + "step": 11027 + }, + { + "epoch": 1.88, + "learning_rate": 6.452361893412748e-06, + "loss": 0.4101, + "step": 11028 + }, + { + "epoch": 1.88, + "learning_rate": 6.450640336569085e-06, + "loss": 0.4189, + "step": 11029 + }, + { + "epoch": 1.88, + "learning_rate": 6.448918900073946e-06, + "loss": 0.4209, + "step": 11030 + }, + { + "epoch": 1.88, + "learning_rate": 6.447197583985707e-06, + "loss": 0.4004, + "step": 11031 + }, + { + "epoch": 1.88, + "learning_rate": 6.44547638836273e-06, + "loss": 0.4045, + "step": 11032 + }, + { + "epoch": 1.88, + "learning_rate": 6.4437553132633765e-06, + "loss": 0.3932, + "step": 11033 + }, + { + "epoch": 1.88, + "learning_rate": 6.442034358746003e-06, + "loss": 0.4664, + "step": 11034 + }, + { + "epoch": 1.88, + "learning_rate": 6.440313524868964e-06, + "loss": 0.3752, + "step": 11035 + }, + { + "epoch": 1.88, + "learning_rate": 6.438592811690607e-06, + "loss": 0.4073, + "step": 11036 + }, + { + "epoch": 1.88, + "learning_rate": 6.436872219269279e-06, + "loss": 0.4408, + "step": 11037 + }, + { + "epoch": 1.88, + "learning_rate": 6.4351517476633126e-06, + "loss": 0.4215, + "step": 11038 + }, + { + "epoch": 1.88, + "learning_rate": 6.433431396931054e-06, + "loss": 0.4057, + "step": 11039 + }, + { + "epoch": 1.88, + "learning_rate": 6.431711167130825e-06, + "loss": 0.4293, + "step": 11040 + }, + { + "epoch": 1.88, + "learning_rate": 6.42999105832096e-06, + "loss": 0.378, + "step": 11041 + }, + { + "epoch": 1.88, + "learning_rate": 6.4282710705597814e-06, + "loss": 0.3815, + "step": 11042 + }, + { + "epoch": 1.88, + "learning_rate": 6.42655120390561e-06, + "loss": 0.436, + "step": 11043 + }, + { + "epoch": 1.88, + "learning_rate": 6.424831458416765e-06, + "loss": 0.4148, + "step": 11044 + }, + { + "epoch": 1.88, + "learning_rate": 6.423111834151549e-06, + "loss": 0.4214, + "step": 11045 + }, + { + "epoch": 1.88, + "learning_rate": 6.421392331168274e-06, + "loss": 0.406, + "step": 11046 + }, + { + "epoch": 1.88, + "learning_rate": 6.419672949525247e-06, + "loss": 0.4232, + "step": 11047 + }, + { + "epoch": 1.88, + "learning_rate": 6.417953689280763e-06, + "loss": 0.4355, + "step": 11048 + }, + { + "epoch": 1.88, + "learning_rate": 6.4162345504931145e-06, + "loss": 0.4237, + "step": 11049 + }, + { + "epoch": 1.88, + "learning_rate": 6.414515533220599e-06, + "loss": 0.4286, + "step": 11050 + }, + { + "epoch": 1.88, + "learning_rate": 6.412796637521499e-06, + "loss": 0.4443, + "step": 11051 + }, + { + "epoch": 1.88, + "learning_rate": 6.411077863454105e-06, + "loss": 0.3995, + "step": 11052 + }, + { + "epoch": 1.89, + "learning_rate": 6.409359211076683e-06, + "loss": 0.3798, + "step": 11053 + }, + { + "epoch": 1.89, + "learning_rate": 6.407640680447516e-06, + "loss": 0.4206, + "step": 11054 + }, + { + "epoch": 1.89, + "learning_rate": 6.405922271624874e-06, + "loss": 0.4214, + "step": 11055 + }, + { + "epoch": 1.89, + "learning_rate": 6.404203984667019e-06, + "loss": 0.3998, + "step": 11056 + }, + { + "epoch": 1.89, + "learning_rate": 6.402485819632216e-06, + "loss": 0.4326, + "step": 11057 + }, + { + "epoch": 1.89, + "learning_rate": 6.400767776578722e-06, + "loss": 0.4044, + "step": 11058 + }, + { + "epoch": 1.89, + "learning_rate": 6.399049855564798e-06, + "loss": 0.3952, + "step": 11059 + }, + { + "epoch": 1.89, + "learning_rate": 6.3973320566486815e-06, + "loss": 0.3784, + "step": 11060 + }, + { + "epoch": 1.89, + "learning_rate": 6.3956143798886265e-06, + "loss": 0.4198, + "step": 11061 + }, + { + "epoch": 1.89, + "learning_rate": 6.393896825342873e-06, + "loss": 0.4207, + "step": 11062 + }, + { + "epoch": 1.89, + "learning_rate": 6.392179393069654e-06, + "loss": 0.4273, + "step": 11063 + }, + { + "epoch": 1.89, + "learning_rate": 6.390462083127207e-06, + "loss": 0.4075, + "step": 11064 + }, + { + "epoch": 1.89, + "learning_rate": 6.38874489557376e-06, + "loss": 0.4275, + "step": 11065 + }, + { + "epoch": 1.89, + "learning_rate": 6.387027830467538e-06, + "loss": 0.4409, + "step": 11066 + }, + { + "epoch": 1.89, + "learning_rate": 6.385310887866766e-06, + "loss": 0.4059, + "step": 11067 + }, + { + "epoch": 1.89, + "learning_rate": 6.383594067829652e-06, + "loss": 0.4421, + "step": 11068 + }, + { + "epoch": 1.89, + "learning_rate": 6.381877370414411e-06, + "loss": 0.3833, + "step": 11069 + }, + { + "epoch": 1.89, + "learning_rate": 6.3801607956792574e-06, + "loss": 0.4175, + "step": 11070 + }, + { + "epoch": 1.89, + "learning_rate": 6.378444343682387e-06, + "loss": 0.3986, + "step": 11071 + }, + { + "epoch": 1.89, + "learning_rate": 6.3767280144820046e-06, + "loss": 0.4465, + "step": 11072 + }, + { + "epoch": 1.89, + "learning_rate": 6.375011808136304e-06, + "loss": 0.413, + "step": 11073 + }, + { + "epoch": 1.89, + "learning_rate": 6.373295724703481e-06, + "loss": 0.4465, + "step": 11074 + }, + { + "epoch": 1.89, + "learning_rate": 6.371579764241718e-06, + "loss": 0.427, + "step": 11075 + }, + { + "epoch": 1.89, + "learning_rate": 6.369863926809198e-06, + "loss": 0.4246, + "step": 11076 + }, + { + "epoch": 1.89, + "learning_rate": 6.368148212464103e-06, + "loss": 0.4423, + "step": 11077 + }, + { + "epoch": 1.89, + "learning_rate": 6.3664326212646065e-06, + "loss": 0.3988, + "step": 11078 + }, + { + "epoch": 1.89, + "learning_rate": 6.36471715326888e-06, + "loss": 0.4167, + "step": 11079 + }, + { + "epoch": 1.89, + "learning_rate": 6.36300180853509e-06, + "loss": 0.4545, + "step": 11080 + }, + { + "epoch": 1.89, + "learning_rate": 6.361286587121398e-06, + "loss": 0.4057, + "step": 11081 + }, + { + "epoch": 1.89, + "learning_rate": 6.3595714890859665e-06, + "loss": 0.4101, + "step": 11082 + }, + { + "epoch": 1.89, + "learning_rate": 6.357856514486942e-06, + "loss": 0.4327, + "step": 11083 + }, + { + "epoch": 1.89, + "learning_rate": 6.356141663382478e-06, + "loss": 0.4428, + "step": 11084 + }, + { + "epoch": 1.89, + "learning_rate": 6.35442693583072e-06, + "loss": 0.4423, + "step": 11085 + }, + { + "epoch": 1.89, + "learning_rate": 6.3527123318898125e-06, + "loss": 0.4254, + "step": 11086 + }, + { + "epoch": 1.89, + "learning_rate": 6.350997851617888e-06, + "loss": 0.4115, + "step": 11087 + }, + { + "epoch": 1.89, + "learning_rate": 6.349283495073081e-06, + "loss": 0.4018, + "step": 11088 + }, + { + "epoch": 1.89, + "learning_rate": 6.3475692623135255e-06, + "loss": 0.3877, + "step": 11089 + }, + { + "epoch": 1.89, + "learning_rate": 6.345855153397338e-06, + "loss": 0.3907, + "step": 11090 + }, + { + "epoch": 1.89, + "learning_rate": 6.344141168382641e-06, + "loss": 0.4133, + "step": 11091 + }, + { + "epoch": 1.89, + "learning_rate": 6.342427307327554e-06, + "loss": 0.4243, + "step": 11092 + }, + { + "epoch": 1.89, + "learning_rate": 6.340713570290187e-06, + "loss": 0.4014, + "step": 11093 + }, + { + "epoch": 1.89, + "learning_rate": 6.338999957328651e-06, + "loss": 0.4046, + "step": 11094 + }, + { + "epoch": 1.89, + "learning_rate": 6.337286468501044e-06, + "loss": 0.4572, + "step": 11095 + }, + { + "epoch": 1.89, + "learning_rate": 6.335573103865469e-06, + "loss": 0.451, + "step": 11096 + }, + { + "epoch": 1.89, + "learning_rate": 6.333859863480024e-06, + "loss": 0.429, + "step": 11097 + }, + { + "epoch": 1.89, + "learning_rate": 6.332146747402793e-06, + "loss": 0.4369, + "step": 11098 + }, + { + "epoch": 1.89, + "learning_rate": 6.330433755691867e-06, + "loss": 0.4244, + "step": 11099 + }, + { + "epoch": 1.89, + "learning_rate": 6.328720888405329e-06, + "loss": 0.4189, + "step": 11100 + }, + { + "epoch": 1.89, + "learning_rate": 6.3270081456012544e-06, + "loss": 0.4065, + "step": 11101 + }, + { + "epoch": 1.89, + "learning_rate": 6.325295527337723e-06, + "loss": 0.4251, + "step": 11102 + }, + { + "epoch": 1.89, + "learning_rate": 6.323583033672799e-06, + "loss": 0.3929, + "step": 11103 + }, + { + "epoch": 1.89, + "learning_rate": 6.321870664664555e-06, + "loss": 0.4423, + "step": 11104 + }, + { + "epoch": 1.89, + "learning_rate": 6.320158420371043e-06, + "loss": 0.4451, + "step": 11105 + }, + { + "epoch": 1.89, + "learning_rate": 6.318446300850325e-06, + "loss": 0.4248, + "step": 11106 + }, + { + "epoch": 1.89, + "learning_rate": 6.316734306160455e-06, + "loss": 0.4187, + "step": 11107 + }, + { + "epoch": 1.89, + "learning_rate": 6.315022436359479e-06, + "loss": 0.3882, + "step": 11108 + }, + { + "epoch": 1.89, + "learning_rate": 6.3133106915054475e-06, + "loss": 0.4291, + "step": 11109 + }, + { + "epoch": 1.89, + "learning_rate": 6.311599071656393e-06, + "loss": 0.4368, + "step": 11110 + }, + { + "epoch": 1.89, + "learning_rate": 6.309887576870357e-06, + "loss": 0.4111, + "step": 11111 + }, + { + "epoch": 1.9, + "learning_rate": 6.308176207205373e-06, + "loss": 0.4273, + "step": 11112 + }, + { + "epoch": 1.9, + "learning_rate": 6.306464962719461e-06, + "loss": 0.4063, + "step": 11113 + }, + { + "epoch": 1.9, + "learning_rate": 6.30475384347065e-06, + "loss": 0.3545, + "step": 11114 + }, + { + "epoch": 1.9, + "learning_rate": 6.303042849516956e-06, + "loss": 0.4421, + "step": 11115 + }, + { + "epoch": 1.9, + "learning_rate": 6.301331980916396e-06, + "loss": 0.4287, + "step": 11116 + }, + { + "epoch": 1.9, + "learning_rate": 6.299621237726983e-06, + "loss": 0.4155, + "step": 11117 + }, + { + "epoch": 1.9, + "learning_rate": 6.297910620006719e-06, + "loss": 0.4297, + "step": 11118 + }, + { + "epoch": 1.9, + "learning_rate": 6.296200127813608e-06, + "loss": 0.3831, + "step": 11119 + }, + { + "epoch": 1.9, + "learning_rate": 6.294489761205649e-06, + "loss": 0.4042, + "step": 11120 + }, + { + "epoch": 1.9, + "learning_rate": 6.292779520240833e-06, + "loss": 0.3951, + "step": 11121 + }, + { + "epoch": 1.9, + "learning_rate": 6.291069404977148e-06, + "loss": 0.4138, + "step": 11122 + }, + { + "epoch": 1.9, + "learning_rate": 6.289359415472583e-06, + "loss": 0.4534, + "step": 11123 + }, + { + "epoch": 1.9, + "learning_rate": 6.287649551785118e-06, + "loss": 0.4357, + "step": 11124 + }, + { + "epoch": 1.9, + "learning_rate": 6.285939813972729e-06, + "loss": 0.449, + "step": 11125 + }, + { + "epoch": 1.9, + "learning_rate": 6.284230202093386e-06, + "loss": 0.4186, + "step": 11126 + }, + { + "epoch": 1.9, + "learning_rate": 6.282520716205065e-06, + "loss": 0.4233, + "step": 11127 + }, + { + "epoch": 1.9, + "learning_rate": 6.280811356365719e-06, + "loss": 0.4126, + "step": 11128 + }, + { + "epoch": 1.9, + "learning_rate": 6.2791021226333134e-06, + "loss": 0.4063, + "step": 11129 + }, + { + "epoch": 1.9, + "learning_rate": 6.277393015065802e-06, + "loss": 0.4185, + "step": 11130 + }, + { + "epoch": 1.9, + "learning_rate": 6.275684033721134e-06, + "loss": 0.4328, + "step": 11131 + }, + { + "epoch": 1.9, + "learning_rate": 6.27397517865726e-06, + "loss": 0.4243, + "step": 11132 + }, + { + "epoch": 1.9, + "learning_rate": 6.2722664499321214e-06, + "loss": 0.4441, + "step": 11133 + }, + { + "epoch": 1.9, + "learning_rate": 6.270557847603656e-06, + "loss": 0.427, + "step": 11134 + }, + { + "epoch": 1.9, + "learning_rate": 6.268849371729797e-06, + "loss": 0.3961, + "step": 11135 + }, + { + "epoch": 1.9, + "learning_rate": 6.26714102236847e-06, + "loss": 0.3974, + "step": 11136 + }, + { + "epoch": 1.9, + "learning_rate": 6.265432799577606e-06, + "loss": 0.4099, + "step": 11137 + }, + { + "epoch": 1.9, + "learning_rate": 6.263724703415122e-06, + "loss": 0.4439, + "step": 11138 + }, + { + "epoch": 1.9, + "learning_rate": 6.262016733938937e-06, + "loss": 0.3924, + "step": 11139 + }, + { + "epoch": 1.9, + "learning_rate": 6.260308891206963e-06, + "loss": 0.4407, + "step": 11140 + }, + { + "epoch": 1.9, + "learning_rate": 6.25860117527711e-06, + "loss": 0.4111, + "step": 11141 + }, + { + "epoch": 1.9, + "learning_rate": 6.256893586207278e-06, + "loss": 0.4121, + "step": 11142 + }, + { + "epoch": 1.9, + "learning_rate": 6.255186124055368e-06, + "loss": 0.3892, + "step": 11143 + }, + { + "epoch": 1.9, + "learning_rate": 6.253478788879274e-06, + "loss": 0.4179, + "step": 11144 + }, + { + "epoch": 1.9, + "learning_rate": 6.251771580736887e-06, + "loss": 0.4163, + "step": 11145 + }, + { + "epoch": 1.9, + "learning_rate": 6.250064499686095e-06, + "loss": 0.4135, + "step": 11146 + }, + { + "epoch": 1.9, + "learning_rate": 6.2483575457847775e-06, + "loss": 0.3905, + "step": 11147 + }, + { + "epoch": 1.9, + "learning_rate": 6.246650719090818e-06, + "loss": 0.4143, + "step": 11148 + }, + { + "epoch": 1.9, + "learning_rate": 6.244944019662086e-06, + "loss": 0.4291, + "step": 11149 + }, + { + "epoch": 1.9, + "learning_rate": 6.24323744755645e-06, + "loss": 0.3961, + "step": 11150 + }, + { + "epoch": 1.9, + "learning_rate": 6.241531002831775e-06, + "loss": 0.4012, + "step": 11151 + }, + { + "epoch": 1.9, + "learning_rate": 6.239824685545923e-06, + "loss": 0.4216, + "step": 11152 + }, + { + "epoch": 1.9, + "learning_rate": 6.238118495756749e-06, + "loss": 0.4467, + "step": 11153 + }, + { + "epoch": 1.9, + "learning_rate": 6.236412433522107e-06, + "loss": 0.4108, + "step": 11154 + }, + { + "epoch": 1.9, + "learning_rate": 6.234706498899843e-06, + "loss": 0.4137, + "step": 11155 + }, + { + "epoch": 1.9, + "learning_rate": 6.2330006919478014e-06, + "loss": 0.3998, + "step": 11156 + }, + { + "epoch": 1.9, + "learning_rate": 6.231295012723823e-06, + "loss": 0.388, + "step": 11157 + }, + { + "epoch": 1.9, + "learning_rate": 6.229589461285737e-06, + "loss": 0.4185, + "step": 11158 + }, + { + "epoch": 1.9, + "learning_rate": 6.227884037691378e-06, + "loss": 0.4018, + "step": 11159 + }, + { + "epoch": 1.9, + "learning_rate": 6.22617874199857e-06, + "loss": 0.3816, + "step": 11160 + }, + { + "epoch": 1.9, + "learning_rate": 6.224473574265133e-06, + "loss": 0.4182, + "step": 11161 + }, + { + "epoch": 1.9, + "learning_rate": 6.222768534548889e-06, + "loss": 0.4237, + "step": 11162 + }, + { + "epoch": 1.9, + "learning_rate": 6.221063622907648e-06, + "loss": 0.3904, + "step": 11163 + }, + { + "epoch": 1.9, + "learning_rate": 6.219358839399223e-06, + "loss": 0.3735, + "step": 11164 + }, + { + "epoch": 1.9, + "learning_rate": 6.217654184081412e-06, + "loss": 0.452, + "step": 11165 + }, + { + "epoch": 1.9, + "learning_rate": 6.215949657012018e-06, + "loss": 0.4207, + "step": 11166 + }, + { + "epoch": 1.9, + "learning_rate": 6.214245258248835e-06, + "loss": 0.4408, + "step": 11167 + }, + { + "epoch": 1.9, + "learning_rate": 6.212540987849655e-06, + "loss": 0.4347, + "step": 11168 + }, + { + "epoch": 1.9, + "learning_rate": 6.210836845872266e-06, + "loss": 0.3813, + "step": 11169 + }, + { + "epoch": 1.91, + "learning_rate": 6.209132832374448e-06, + "loss": 0.4193, + "step": 11170 + }, + { + "epoch": 1.91, + "learning_rate": 6.207428947413982e-06, + "loss": 0.422, + "step": 11171 + }, + { + "epoch": 1.91, + "learning_rate": 6.205725191048645e-06, + "loss": 0.3957, + "step": 11172 + }, + { + "epoch": 1.91, + "learning_rate": 6.204021563336198e-06, + "loss": 0.445, + "step": 11173 + }, + { + "epoch": 1.91, + "learning_rate": 6.202318064334411e-06, + "loss": 0.4176, + "step": 11174 + }, + { + "epoch": 1.91, + "learning_rate": 6.200614694101045e-06, + "loss": 0.4213, + "step": 11175 + }, + { + "epoch": 1.91, + "learning_rate": 6.1989114526938535e-06, + "loss": 0.4041, + "step": 11176 + }, + { + "epoch": 1.91, + "learning_rate": 6.197208340170591e-06, + "loss": 0.4175, + "step": 11177 + }, + { + "epoch": 1.91, + "learning_rate": 6.195505356589004e-06, + "loss": 0.4285, + "step": 11178 + }, + { + "epoch": 1.91, + "learning_rate": 6.1938025020068405e-06, + "loss": 0.3998, + "step": 11179 + }, + { + "epoch": 1.91, + "learning_rate": 6.19209977648183e-06, + "loss": 0.386, + "step": 11180 + }, + { + "epoch": 1.91, + "learning_rate": 6.190397180071714e-06, + "loss": 0.4139, + "step": 11181 + }, + { + "epoch": 1.91, + "learning_rate": 6.1886947128342224e-06, + "loss": 0.3906, + "step": 11182 + }, + { + "epoch": 1.91, + "learning_rate": 6.186992374827077e-06, + "loss": 0.3949, + "step": 11183 + }, + { + "epoch": 1.91, + "learning_rate": 6.185290166108e-06, + "loss": 0.399, + "step": 11184 + }, + { + "epoch": 1.91, + "learning_rate": 6.183588086734711e-06, + "loss": 0.4219, + "step": 11185 + }, + { + "epoch": 1.91, + "learning_rate": 6.181886136764922e-06, + "loss": 0.398, + "step": 11186 + }, + { + "epoch": 1.91, + "learning_rate": 6.180184316256344e-06, + "loss": 0.4318, + "step": 11187 + }, + { + "epoch": 1.91, + "learning_rate": 6.178482625266674e-06, + "loss": 0.4341, + "step": 11188 + }, + { + "epoch": 1.91, + "learning_rate": 6.176781063853614e-06, + "loss": 0.4196, + "step": 11189 + }, + { + "epoch": 1.91, + "learning_rate": 6.175079632074863e-06, + "loss": 0.4129, + "step": 11190 + }, + { + "epoch": 1.91, + "learning_rate": 6.173378329988107e-06, + "loss": 0.4311, + "step": 11191 + }, + { + "epoch": 1.91, + "learning_rate": 6.171677157651033e-06, + "loss": 0.3896, + "step": 11192 + }, + { + "epoch": 1.91, + "learning_rate": 6.169976115121323e-06, + "loss": 0.4374, + "step": 11193 + }, + { + "epoch": 1.91, + "learning_rate": 6.1682752024566595e-06, + "loss": 0.4178, + "step": 11194 + }, + { + "epoch": 1.91, + "learning_rate": 6.166574419714708e-06, + "loss": 0.4038, + "step": 11195 + }, + { + "epoch": 1.91, + "learning_rate": 6.164873766953139e-06, + "loss": 0.4288, + "step": 11196 + }, + { + "epoch": 1.91, + "learning_rate": 6.163173244229618e-06, + "loss": 0.4082, + "step": 11197 + }, + { + "epoch": 1.91, + "learning_rate": 6.161472851601808e-06, + "loss": 0.4517, + "step": 11198 + }, + { + "epoch": 1.91, + "learning_rate": 6.159772589127358e-06, + "loss": 0.4326, + "step": 11199 + }, + { + "epoch": 1.91, + "learning_rate": 6.1580724568639215e-06, + "loss": 0.3994, + "step": 11200 + }, + { + "epoch": 1.91, + "learning_rate": 6.1563724548691476e-06, + "loss": 0.4368, + "step": 11201 + }, + { + "epoch": 1.91, + "learning_rate": 6.154672583200681e-06, + "loss": 0.3785, + "step": 11202 + }, + { + "epoch": 1.91, + "learning_rate": 6.1529728419161486e-06, + "loss": 0.4121, + "step": 11203 + }, + { + "epoch": 1.91, + "learning_rate": 6.151273231073191e-06, + "loss": 0.4015, + "step": 11204 + }, + { + "epoch": 1.91, + "learning_rate": 6.149573750729436e-06, + "loss": 0.4129, + "step": 11205 + }, + { + "epoch": 1.91, + "learning_rate": 6.147874400942511e-06, + "loss": 0.4305, + "step": 11206 + }, + { + "epoch": 1.91, + "learning_rate": 6.146175181770031e-06, + "loss": 0.4013, + "step": 11207 + }, + { + "epoch": 1.91, + "learning_rate": 6.144476093269615e-06, + "loss": 0.418, + "step": 11208 + }, + { + "epoch": 1.91, + "learning_rate": 6.142777135498877e-06, + "loss": 0.4121, + "step": 11209 + }, + { + "epoch": 1.91, + "learning_rate": 6.141078308515414e-06, + "loss": 0.418, + "step": 11210 + }, + { + "epoch": 1.91, + "learning_rate": 6.139379612376834e-06, + "loss": 0.4067, + "step": 11211 + }, + { + "epoch": 1.91, + "learning_rate": 6.137681047140738e-06, + "loss": 0.3827, + "step": 11212 + }, + { + "epoch": 1.91, + "learning_rate": 6.135982612864716e-06, + "loss": 0.4088, + "step": 11213 + }, + { + "epoch": 1.91, + "learning_rate": 6.134284309606357e-06, + "loss": 0.441, + "step": 11214 + }, + { + "epoch": 1.91, + "learning_rate": 6.132586137423244e-06, + "loss": 0.3807, + "step": 11215 + }, + { + "epoch": 1.91, + "learning_rate": 6.130888096372961e-06, + "loss": 0.3969, + "step": 11216 + }, + { + "epoch": 1.91, + "learning_rate": 6.1291901865130854e-06, + "loss": 0.418, + "step": 11217 + }, + { + "epoch": 1.91, + "learning_rate": 6.127492407901181e-06, + "loss": 0.4339, + "step": 11218 + }, + { + "epoch": 1.91, + "learning_rate": 6.125794760594819e-06, + "loss": 0.4142, + "step": 11219 + }, + { + "epoch": 1.91, + "learning_rate": 6.124097244651559e-06, + "loss": 0.4393, + "step": 11220 + }, + { + "epoch": 1.91, + "learning_rate": 6.122399860128965e-06, + "loss": 0.4187, + "step": 11221 + }, + { + "epoch": 1.91, + "learning_rate": 6.120702607084583e-06, + "loss": 0.4392, + "step": 11222 + }, + { + "epoch": 1.91, + "learning_rate": 6.119005485575966e-06, + "loss": 0.4286, + "step": 11223 + }, + { + "epoch": 1.91, + "learning_rate": 6.1173084956606585e-06, + "loss": 0.4221, + "step": 11224 + }, + { + "epoch": 1.91, + "learning_rate": 6.115611637396204e-06, + "loss": 0.4203, + "step": 11225 + }, + { + "epoch": 1.91, + "learning_rate": 6.113914910840129e-06, + "loss": 0.3958, + "step": 11226 + }, + { + "epoch": 1.91, + "learning_rate": 6.1122183160499695e-06, + "loss": 0.4319, + "step": 11227 + }, + { + "epoch": 1.91, + "learning_rate": 6.110521853083251e-06, + "loss": 0.4106, + "step": 11228 + }, + { + "epoch": 1.92, + "learning_rate": 6.108825521997501e-06, + "loss": 0.41, + "step": 11229 + }, + { + "epoch": 1.92, + "learning_rate": 6.1071293228502294e-06, + "loss": 0.3912, + "step": 11230 + }, + { + "epoch": 1.92, + "learning_rate": 6.105433255698954e-06, + "loss": 0.4153, + "step": 11231 + }, + { + "epoch": 1.92, + "learning_rate": 6.103737320601186e-06, + "loss": 0.4525, + "step": 11232 + }, + { + "epoch": 1.92, + "learning_rate": 6.102041517614423e-06, + "loss": 0.368, + "step": 11233 + }, + { + "epoch": 1.92, + "learning_rate": 6.100345846796167e-06, + "loss": 0.4249, + "step": 11234 + }, + { + "epoch": 1.92, + "learning_rate": 6.098650308203915e-06, + "loss": 0.419, + "step": 11235 + }, + { + "epoch": 1.92, + "learning_rate": 6.0969549018951554e-06, + "loss": 0.3867, + "step": 11236 + }, + { + "epoch": 1.92, + "learning_rate": 6.095259627927379e-06, + "loss": 0.4352, + "step": 11237 + }, + { + "epoch": 1.92, + "learning_rate": 6.093564486358063e-06, + "loss": 0.443, + "step": 11238 + }, + { + "epoch": 1.92, + "learning_rate": 6.091869477244688e-06, + "loss": 0.3962, + "step": 11239 + }, + { + "epoch": 1.92, + "learning_rate": 6.090174600644728e-06, + "loss": 0.3847, + "step": 11240 + }, + { + "epoch": 1.92, + "learning_rate": 6.088479856615646e-06, + "loss": 0.4599, + "step": 11241 + }, + { + "epoch": 1.92, + "learning_rate": 6.086785245214908e-06, + "loss": 0.383, + "step": 11242 + }, + { + "epoch": 1.92, + "learning_rate": 6.085090766499972e-06, + "loss": 0.4272, + "step": 11243 + }, + { + "epoch": 1.92, + "learning_rate": 6.083396420528298e-06, + "loss": 0.4164, + "step": 11244 + }, + { + "epoch": 1.92, + "learning_rate": 6.081702207357334e-06, + "loss": 0.4296, + "step": 11245 + }, + { + "epoch": 1.92, + "learning_rate": 6.080008127044523e-06, + "loss": 0.4201, + "step": 11246 + }, + { + "epoch": 1.92, + "learning_rate": 6.078314179647313e-06, + "loss": 0.4381, + "step": 11247 + }, + { + "epoch": 1.92, + "learning_rate": 6.076620365223133e-06, + "loss": 0.4279, + "step": 11248 + }, + { + "epoch": 1.92, + "learning_rate": 6.074926683829418e-06, + "loss": 0.465, + "step": 11249 + }, + { + "epoch": 1.92, + "learning_rate": 6.073233135523596e-06, + "loss": 0.4257, + "step": 11250 + }, + { + "epoch": 1.92, + "learning_rate": 6.071539720363091e-06, + "loss": 0.4342, + "step": 11251 + }, + { + "epoch": 1.92, + "learning_rate": 6.069846438405322e-06, + "loss": 0.4014, + "step": 11252 + }, + { + "epoch": 1.92, + "learning_rate": 6.068153289707707e-06, + "loss": 0.4233, + "step": 11253 + }, + { + "epoch": 1.92, + "learning_rate": 6.0664602743276475e-06, + "loss": 0.412, + "step": 11254 + }, + { + "epoch": 1.92, + "learning_rate": 6.064767392322559e-06, + "loss": 0.4209, + "step": 11255 + }, + { + "epoch": 1.92, + "learning_rate": 6.063074643749831e-06, + "loss": 0.3945, + "step": 11256 + }, + { + "epoch": 1.92, + "learning_rate": 6.0613820286668645e-06, + "loss": 0.4088, + "step": 11257 + }, + { + "epoch": 1.92, + "learning_rate": 6.059689547131054e-06, + "loss": 0.3782, + "step": 11258 + }, + { + "epoch": 1.92, + "learning_rate": 6.057997199199781e-06, + "loss": 0.4426, + "step": 11259 + }, + { + "epoch": 1.92, + "learning_rate": 6.056304984930437e-06, + "loss": 0.4228, + "step": 11260 + }, + { + "epoch": 1.92, + "learning_rate": 6.0546129043803905e-06, + "loss": 0.4078, + "step": 11261 + }, + { + "epoch": 1.92, + "learning_rate": 6.052920957607022e-06, + "loss": 0.4153, + "step": 11262 + }, + { + "epoch": 1.92, + "learning_rate": 6.051229144667696e-06, + "loss": 0.3932, + "step": 11263 + }, + { + "epoch": 1.92, + "learning_rate": 6.049537465619779e-06, + "loss": 0.4058, + "step": 11264 + }, + { + "epoch": 1.92, + "learning_rate": 6.047845920520629e-06, + "loss": 0.4274, + "step": 11265 + }, + { + "epoch": 1.92, + "learning_rate": 6.046154509427604e-06, + "loss": 0.4198, + "step": 11266 + }, + { + "epoch": 1.92, + "learning_rate": 6.044463232398055e-06, + "loss": 0.4342, + "step": 11267 + }, + { + "epoch": 1.92, + "learning_rate": 6.042772089489327e-06, + "loss": 0.4253, + "step": 11268 + }, + { + "epoch": 1.92, + "learning_rate": 6.041081080758761e-06, + "loss": 0.3871, + "step": 11269 + }, + { + "epoch": 1.92, + "learning_rate": 6.039390206263697e-06, + "loss": 0.3843, + "step": 11270 + }, + { + "epoch": 1.92, + "learning_rate": 6.037699466061466e-06, + "loss": 0.4036, + "step": 11271 + }, + { + "epoch": 1.92, + "learning_rate": 6.036008860209393e-06, + "loss": 0.4045, + "step": 11272 + }, + { + "epoch": 1.92, + "learning_rate": 6.034318388764806e-06, + "loss": 0.4358, + "step": 11273 + }, + { + "epoch": 1.92, + "learning_rate": 6.032628051785022e-06, + "loss": 0.4065, + "step": 11274 + }, + { + "epoch": 1.92, + "learning_rate": 6.030937849327356e-06, + "loss": 0.469, + "step": 11275 + }, + { + "epoch": 1.92, + "learning_rate": 6.02924778144912e-06, + "loss": 0.3899, + "step": 11276 + }, + { + "epoch": 1.92, + "learning_rate": 6.027557848207617e-06, + "loss": 0.3821, + "step": 11277 + }, + { + "epoch": 1.92, + "learning_rate": 6.0258680496601485e-06, + "loss": 0.4309, + "step": 11278 + }, + { + "epoch": 1.92, + "learning_rate": 6.024178385864008e-06, + "loss": 0.4075, + "step": 11279 + }, + { + "epoch": 1.92, + "learning_rate": 6.022488856876489e-06, + "loss": 0.4244, + "step": 11280 + }, + { + "epoch": 1.92, + "learning_rate": 6.020799462754878e-06, + "loss": 0.4437, + "step": 11281 + }, + { + "epoch": 1.92, + "learning_rate": 6.0191102035564595e-06, + "loss": 0.4014, + "step": 11282 + }, + { + "epoch": 1.92, + "learning_rate": 6.01742107933851e-06, + "loss": 0.4046, + "step": 11283 + }, + { + "epoch": 1.92, + "learning_rate": 6.015732090158304e-06, + "loss": 0.4331, + "step": 11284 + }, + { + "epoch": 1.92, + "learning_rate": 6.014043236073111e-06, + "loss": 0.3964, + "step": 11285 + }, + { + "epoch": 1.92, + "learning_rate": 6.012354517140191e-06, + "loss": 0.4004, + "step": 11286 + }, + { + "epoch": 1.92, + "learning_rate": 6.010665933416806e-06, + "loss": 0.4239, + "step": 11287 + }, + { + "epoch": 1.93, + "learning_rate": 6.00897748496021e-06, + "loss": 0.4074, + "step": 11288 + }, + { + "epoch": 1.93, + "learning_rate": 6.007289171827655e-06, + "loss": 0.4374, + "step": 11289 + }, + { + "epoch": 1.93, + "learning_rate": 6.005600994076386e-06, + "loss": 0.4091, + "step": 11290 + }, + { + "epoch": 1.93, + "learning_rate": 6.003912951763644e-06, + "loss": 0.399, + "step": 11291 + }, + { + "epoch": 1.93, + "learning_rate": 6.00222504494667e-06, + "loss": 0.4022, + "step": 11292 + }, + { + "epoch": 1.93, + "learning_rate": 6.000537273682689e-06, + "loss": 0.444, + "step": 11293 + }, + { + "epoch": 1.93, + "learning_rate": 5.998849638028932e-06, + "loss": 0.394, + "step": 11294 + }, + { + "epoch": 1.93, + "learning_rate": 5.997162138042621e-06, + "loss": 0.4228, + "step": 11295 + }, + { + "epoch": 1.93, + "learning_rate": 5.995474773780974e-06, + "loss": 0.3949, + "step": 11296 + }, + { + "epoch": 1.93, + "learning_rate": 5.993787545301204e-06, + "loss": 0.4169, + "step": 11297 + }, + { + "epoch": 1.93, + "learning_rate": 5.992100452660523e-06, + "loss": 0.4227, + "step": 11298 + }, + { + "epoch": 1.93, + "learning_rate": 5.9904134959161345e-06, + "loss": 0.4199, + "step": 11299 + }, + { + "epoch": 1.93, + "learning_rate": 5.98872667512524e-06, + "loss": 0.3897, + "step": 11300 + }, + { + "epoch": 1.93, + "learning_rate": 5.987039990345028e-06, + "loss": 0.4308, + "step": 11301 + }, + { + "epoch": 1.93, + "learning_rate": 5.985353441632697e-06, + "loss": 0.4235, + "step": 11302 + }, + { + "epoch": 1.93, + "learning_rate": 5.983667029045426e-06, + "loss": 0.4268, + "step": 11303 + }, + { + "epoch": 1.93, + "learning_rate": 5.9819807526404e-06, + "loss": 0.3766, + "step": 11304 + }, + { + "epoch": 1.93, + "learning_rate": 5.980294612474796e-06, + "loss": 0.386, + "step": 11305 + }, + { + "epoch": 1.93, + "learning_rate": 5.978608608605786e-06, + "loss": 0.3897, + "step": 11306 + }, + { + "epoch": 1.93, + "learning_rate": 5.976922741090541e-06, + "loss": 0.4434, + "step": 11307 + }, + { + "epoch": 1.93, + "learning_rate": 5.975237009986216e-06, + "loss": 0.4167, + "step": 11308 + }, + { + "epoch": 1.93, + "learning_rate": 5.973551415349973e-06, + "loss": 0.4472, + "step": 11309 + }, + { + "epoch": 1.93, + "learning_rate": 5.97186595723897e-06, + "loss": 0.4028, + "step": 11310 + }, + { + "epoch": 1.93, + "learning_rate": 5.9701806357103474e-06, + "loss": 0.4318, + "step": 11311 + }, + { + "epoch": 1.93, + "learning_rate": 5.968495450821254e-06, + "loss": 0.3901, + "step": 11312 + }, + { + "epoch": 1.93, + "learning_rate": 5.966810402628831e-06, + "loss": 0.4524, + "step": 11313 + }, + { + "epoch": 1.93, + "learning_rate": 5.965125491190213e-06, + "loss": 0.4044, + "step": 11314 + }, + { + "epoch": 1.93, + "learning_rate": 5.963440716562533e-06, + "loss": 0.3927, + "step": 11315 + }, + { + "epoch": 1.93, + "learning_rate": 5.96175607880291e-06, + "loss": 0.429, + "step": 11316 + }, + { + "epoch": 1.93, + "learning_rate": 5.9600715779684674e-06, + "loss": 0.4438, + "step": 11317 + }, + { + "epoch": 1.93, + "learning_rate": 5.9583872141163256e-06, + "loss": 0.3943, + "step": 11318 + }, + { + "epoch": 1.93, + "learning_rate": 5.956702987303593e-06, + "loss": 0.4191, + "step": 11319 + }, + { + "epoch": 1.93, + "learning_rate": 5.9550188975873775e-06, + "loss": 0.4299, + "step": 11320 + }, + { + "epoch": 1.93, + "learning_rate": 5.953334945024783e-06, + "loss": 0.4191, + "step": 11321 + }, + { + "epoch": 1.93, + "learning_rate": 5.951651129672909e-06, + "loss": 0.42, + "step": 11322 + }, + { + "epoch": 1.93, + "learning_rate": 5.9499674515888425e-06, + "loss": 0.4116, + "step": 11323 + }, + { + "epoch": 1.93, + "learning_rate": 5.948283910829678e-06, + "loss": 0.4211, + "step": 11324 + }, + { + "epoch": 1.93, + "learning_rate": 5.946600507452498e-06, + "loss": 0.3888, + "step": 11325 + }, + { + "epoch": 1.93, + "learning_rate": 5.944917241514381e-06, + "loss": 0.43, + "step": 11326 + }, + { + "epoch": 1.93, + "learning_rate": 5.943234113072401e-06, + "loss": 0.421, + "step": 11327 + }, + { + "epoch": 1.93, + "learning_rate": 5.941551122183629e-06, + "loss": 0.4263, + "step": 11328 + }, + { + "epoch": 1.93, + "learning_rate": 5.939868268905129e-06, + "loss": 0.4306, + "step": 11329 + }, + { + "epoch": 1.93, + "learning_rate": 5.938185553293969e-06, + "loss": 0.405, + "step": 11330 + }, + { + "epoch": 1.93, + "learning_rate": 5.936502975407194e-06, + "loss": 0.4279, + "step": 11331 + }, + { + "epoch": 1.93, + "learning_rate": 5.934820535301861e-06, + "loss": 0.3951, + "step": 11332 + }, + { + "epoch": 1.93, + "learning_rate": 5.933138233035021e-06, + "loss": 0.4351, + "step": 11333 + }, + { + "epoch": 1.93, + "learning_rate": 5.9314560686637056e-06, + "loss": 0.3861, + "step": 11334 + }, + { + "epoch": 1.93, + "learning_rate": 5.92977404224496e-06, + "loss": 0.3972, + "step": 11335 + }, + { + "epoch": 1.93, + "learning_rate": 5.928092153835814e-06, + "loss": 0.4205, + "step": 11336 + }, + { + "epoch": 1.93, + "learning_rate": 5.926410403493299e-06, + "loss": 0.416, + "step": 11337 + }, + { + "epoch": 1.93, + "learning_rate": 5.924728791274432e-06, + "loss": 0.4232, + "step": 11338 + }, + { + "epoch": 1.93, + "learning_rate": 5.923047317236237e-06, + "loss": 0.41, + "step": 11339 + }, + { + "epoch": 1.93, + "learning_rate": 5.921365981435724e-06, + "loss": 0.429, + "step": 11340 + }, + { + "epoch": 1.93, + "learning_rate": 5.9196847839299065e-06, + "loss": 0.4372, + "step": 11341 + }, + { + "epoch": 1.93, + "learning_rate": 5.918003724775785e-06, + "loss": 0.4314, + "step": 11342 + }, + { + "epoch": 1.93, + "learning_rate": 5.916322804030361e-06, + "loss": 0.3823, + "step": 11343 + }, + { + "epoch": 1.93, + "learning_rate": 5.914642021750632e-06, + "loss": 0.3874, + "step": 11344 + }, + { + "epoch": 1.93, + "learning_rate": 5.9129613779935865e-06, + "loss": 0.4103, + "step": 11345 + }, + { + "epoch": 1.94, + "learning_rate": 5.91128087281621e-06, + "loss": 0.3558, + "step": 11346 + }, + { + "epoch": 1.94, + "learning_rate": 5.9096005062754814e-06, + "loss": 0.4077, + "step": 11347 + }, + { + "epoch": 1.94, + "learning_rate": 5.907920278428379e-06, + "loss": 0.432, + "step": 11348 + }, + { + "epoch": 1.94, + "learning_rate": 5.906240189331878e-06, + "loss": 0.3997, + "step": 11349 + }, + { + "epoch": 1.94, + "learning_rate": 5.90456023904294e-06, + "loss": 0.4506, + "step": 11350 + }, + { + "epoch": 1.94, + "learning_rate": 5.902880427618529e-06, + "loss": 0.3997, + "step": 11351 + }, + { + "epoch": 1.94, + "learning_rate": 5.901200755115608e-06, + "loss": 0.4536, + "step": 11352 + }, + { + "epoch": 1.94, + "learning_rate": 5.899521221591119e-06, + "loss": 0.4403, + "step": 11353 + }, + { + "epoch": 1.94, + "learning_rate": 5.897841827102017e-06, + "loss": 0.4155, + "step": 11354 + }, + { + "epoch": 1.94, + "learning_rate": 5.896162571705244e-06, + "loss": 0.4363, + "step": 11355 + }, + { + "epoch": 1.94, + "learning_rate": 5.89448345545774e-06, + "loss": 0.4036, + "step": 11356 + }, + { + "epoch": 1.94, + "learning_rate": 5.89280447841644e-06, + "loss": 0.3746, + "step": 11357 + }, + { + "epoch": 1.94, + "learning_rate": 5.891125640638269e-06, + "loss": 0.4298, + "step": 11358 + }, + { + "epoch": 1.94, + "learning_rate": 5.889446942180152e-06, + "loss": 0.4141, + "step": 11359 + }, + { + "epoch": 1.94, + "learning_rate": 5.887768383099017e-06, + "loss": 0.4052, + "step": 11360 + }, + { + "epoch": 1.94, + "learning_rate": 5.886089963451769e-06, + "loss": 0.4236, + "step": 11361 + }, + { + "epoch": 1.94, + "learning_rate": 5.8844116832953215e-06, + "loss": 0.4145, + "step": 11362 + }, + { + "epoch": 1.94, + "learning_rate": 5.88273354268658e-06, + "loss": 0.404, + "step": 11363 + }, + { + "epoch": 1.94, + "learning_rate": 5.8810555416824475e-06, + "loss": 0.4212, + "step": 11364 + }, + { + "epoch": 1.94, + "learning_rate": 5.879377680339818e-06, + "loss": 0.4079, + "step": 11365 + }, + { + "epoch": 1.94, + "learning_rate": 5.877699958715583e-06, + "loss": 0.3846, + "step": 11366 + }, + { + "epoch": 1.94, + "learning_rate": 5.876022376866636e-06, + "loss": 0.3935, + "step": 11367 + }, + { + "epoch": 1.94, + "learning_rate": 5.874344934849847e-06, + "loss": 0.4248, + "step": 11368 + }, + { + "epoch": 1.94, + "learning_rate": 5.8726676327221e-06, + "loss": 0.4089, + "step": 11369 + }, + { + "epoch": 1.94, + "learning_rate": 5.870990470540265e-06, + "loss": 0.4374, + "step": 11370 + }, + { + "epoch": 1.94, + "learning_rate": 5.869313448361213e-06, + "loss": 0.3823, + "step": 11371 + }, + { + "epoch": 1.94, + "learning_rate": 5.867636566241807e-06, + "loss": 0.4421, + "step": 11372 + }, + { + "epoch": 1.94, + "learning_rate": 5.865959824238901e-06, + "loss": 0.4382, + "step": 11373 + }, + { + "epoch": 1.94, + "learning_rate": 5.86428322240935e-06, + "loss": 0.4156, + "step": 11374 + }, + { + "epoch": 1.94, + "learning_rate": 5.86260676081001e-06, + "loss": 0.4086, + "step": 11375 + }, + { + "epoch": 1.94, + "learning_rate": 5.860930439497713e-06, + "loss": 0.4297, + "step": 11376 + }, + { + "epoch": 1.94, + "learning_rate": 5.859254258529303e-06, + "loss": 0.4249, + "step": 11377 + }, + { + "epoch": 1.94, + "learning_rate": 5.857578217961617e-06, + "loss": 0.4143, + "step": 11378 + }, + { + "epoch": 1.94, + "learning_rate": 5.855902317851483e-06, + "loss": 0.3907, + "step": 11379 + }, + { + "epoch": 1.94, + "learning_rate": 5.8542265582557275e-06, + "loss": 0.4063, + "step": 11380 + }, + { + "epoch": 1.94, + "learning_rate": 5.852550939231168e-06, + "loss": 0.4102, + "step": 11381 + }, + { + "epoch": 1.94, + "learning_rate": 5.8508754608346244e-06, + "loss": 0.3926, + "step": 11382 + }, + { + "epoch": 1.94, + "learning_rate": 5.849200123122898e-06, + "loss": 0.4091, + "step": 11383 + }, + { + "epoch": 1.94, + "learning_rate": 5.847524926152803e-06, + "loss": 0.4358, + "step": 11384 + }, + { + "epoch": 1.94, + "learning_rate": 5.845849869981137e-06, + "loss": 0.4171, + "step": 11385 + }, + { + "epoch": 1.94, + "learning_rate": 5.8441749546646975e-06, + "loss": 0.4198, + "step": 11386 + }, + { + "epoch": 1.94, + "learning_rate": 5.842500180260277e-06, + "loss": 0.4099, + "step": 11387 + }, + { + "epoch": 1.94, + "learning_rate": 5.840825546824662e-06, + "loss": 0.3953, + "step": 11388 + }, + { + "epoch": 1.94, + "learning_rate": 5.8391510544146316e-06, + "loss": 0.419, + "step": 11389 + }, + { + "epoch": 1.94, + "learning_rate": 5.837476703086969e-06, + "loss": 0.3957, + "step": 11390 + }, + { + "epoch": 1.94, + "learning_rate": 5.835802492898439e-06, + "loss": 0.4209, + "step": 11391 + }, + { + "epoch": 1.94, + "learning_rate": 5.834128423905814e-06, + "loss": 0.4179, + "step": 11392 + }, + { + "epoch": 1.94, + "learning_rate": 5.832454496165855e-06, + "loss": 0.4145, + "step": 11393 + }, + { + "epoch": 1.94, + "learning_rate": 5.83078070973532e-06, + "loss": 0.4086, + "step": 11394 + }, + { + "epoch": 1.94, + "learning_rate": 5.8291070646709645e-06, + "loss": 0.3998, + "step": 11395 + }, + { + "epoch": 1.94, + "learning_rate": 5.827433561029534e-06, + "loss": 0.4073, + "step": 11396 + }, + { + "epoch": 1.94, + "learning_rate": 5.8257601988677785e-06, + "loss": 0.419, + "step": 11397 + }, + { + "epoch": 1.94, + "learning_rate": 5.824086978242429e-06, + "loss": 0.4078, + "step": 11398 + }, + { + "epoch": 1.94, + "learning_rate": 5.822413899210222e-06, + "loss": 0.4347, + "step": 11399 + }, + { + "epoch": 1.94, + "learning_rate": 5.8207409618278865e-06, + "loss": 0.4359, + "step": 11400 + }, + { + "epoch": 1.94, + "learning_rate": 5.819068166152153e-06, + "loss": 0.4106, + "step": 11401 + }, + { + "epoch": 1.94, + "learning_rate": 5.817395512239732e-06, + "loss": 0.4002, + "step": 11402 + }, + { + "epoch": 1.94, + "learning_rate": 5.815723000147342e-06, + "loss": 0.4136, + "step": 11403 + }, + { + "epoch": 1.94, + "learning_rate": 5.814050629931693e-06, + "loss": 0.4415, + "step": 11404 + }, + { + "epoch": 1.95, + "learning_rate": 5.812378401649494e-06, + "loss": 0.4179, + "step": 11405 + }, + { + "epoch": 1.95, + "learning_rate": 5.8107063153574405e-06, + "loss": 0.4341, + "step": 11406 + }, + { + "epoch": 1.95, + "learning_rate": 5.8090343711122285e-06, + "loss": 0.3986, + "step": 11407 + }, + { + "epoch": 1.95, + "learning_rate": 5.807362568970552e-06, + "loss": 0.4128, + "step": 11408 + }, + { + "epoch": 1.95, + "learning_rate": 5.805690908989092e-06, + "loss": 0.4261, + "step": 11409 + }, + { + "epoch": 1.95, + "learning_rate": 5.804019391224534e-06, + "loss": 0.4412, + "step": 11410 + }, + { + "epoch": 1.95, + "learning_rate": 5.802348015733553e-06, + "loss": 0.4083, + "step": 11411 + }, + { + "epoch": 1.95, + "learning_rate": 5.8006767825728205e-06, + "loss": 0.431, + "step": 11412 + }, + { + "epoch": 1.95, + "learning_rate": 5.799005691799009e-06, + "loss": 0.4156, + "step": 11413 + }, + { + "epoch": 1.95, + "learning_rate": 5.797334743468767e-06, + "loss": 0.4183, + "step": 11414 + }, + { + "epoch": 1.95, + "learning_rate": 5.795663937638762e-06, + "loss": 0.4053, + "step": 11415 + }, + { + "epoch": 1.95, + "learning_rate": 5.793993274365644e-06, + "loss": 0.3916, + "step": 11416 + }, + { + "epoch": 1.95, + "learning_rate": 5.7923227537060615e-06, + "loss": 0.4117, + "step": 11417 + }, + { + "epoch": 1.95, + "learning_rate": 5.790652375716653e-06, + "loss": 0.4345, + "step": 11418 + }, + { + "epoch": 1.95, + "learning_rate": 5.788982140454056e-06, + "loss": 0.4407, + "step": 11419 + }, + { + "epoch": 1.95, + "learning_rate": 5.787312047974911e-06, + "loss": 0.4086, + "step": 11420 + }, + { + "epoch": 1.95, + "learning_rate": 5.785642098335838e-06, + "loss": 0.392, + "step": 11421 + }, + { + "epoch": 1.95, + "learning_rate": 5.783972291593461e-06, + "loss": 0.3876, + "step": 11422 + }, + { + "epoch": 1.95, + "learning_rate": 5.782302627804402e-06, + "loss": 0.4101, + "step": 11423 + }, + { + "epoch": 1.95, + "learning_rate": 5.7806331070252705e-06, + "loss": 0.3894, + "step": 11424 + }, + { + "epoch": 1.95, + "learning_rate": 5.778963729312679e-06, + "loss": 0.3997, + "step": 11425 + }, + { + "epoch": 1.95, + "learning_rate": 5.77729449472323e-06, + "loss": 0.4539, + "step": 11426 + }, + { + "epoch": 1.95, + "learning_rate": 5.775625403313522e-06, + "loss": 0.4083, + "step": 11427 + }, + { + "epoch": 1.95, + "learning_rate": 5.773956455140153e-06, + "loss": 0.4292, + "step": 11428 + }, + { + "epoch": 1.95, + "learning_rate": 5.7722876502597025e-06, + "loss": 0.4065, + "step": 11429 + }, + { + "epoch": 1.95, + "learning_rate": 5.770618988728762e-06, + "loss": 0.428, + "step": 11430 + }, + { + "epoch": 1.95, + "learning_rate": 5.768950470603911e-06, + "loss": 0.4222, + "step": 11431 + }, + { + "epoch": 1.95, + "learning_rate": 5.767282095941725e-06, + "loss": 0.3977, + "step": 11432 + }, + { + "epoch": 1.95, + "learning_rate": 5.765613864798768e-06, + "loss": 0.4076, + "step": 11433 + }, + { + "epoch": 1.95, + "learning_rate": 5.763945777231609e-06, + "loss": 0.4194, + "step": 11434 + }, + { + "epoch": 1.95, + "learning_rate": 5.762277833296811e-06, + "loss": 0.435, + "step": 11435 + }, + { + "epoch": 1.95, + "learning_rate": 5.760610033050922e-06, + "loss": 0.4227, + "step": 11436 + }, + { + "epoch": 1.95, + "learning_rate": 5.758942376550496e-06, + "loss": 0.4187, + "step": 11437 + }, + { + "epoch": 1.95, + "learning_rate": 5.757274863852078e-06, + "loss": 0.4084, + "step": 11438 + }, + { + "epoch": 1.95, + "learning_rate": 5.7556074950122085e-06, + "loss": 0.4074, + "step": 11439 + }, + { + "epoch": 1.95, + "learning_rate": 5.753940270087424e-06, + "loss": 0.4092, + "step": 11440 + }, + { + "epoch": 1.95, + "learning_rate": 5.752273189134256e-06, + "loss": 0.4131, + "step": 11441 + }, + { + "epoch": 1.95, + "learning_rate": 5.750606252209228e-06, + "loss": 0.3819, + "step": 11442 + }, + { + "epoch": 1.95, + "learning_rate": 5.748939459368865e-06, + "loss": 0.3941, + "step": 11443 + }, + { + "epoch": 1.95, + "learning_rate": 5.747272810669678e-06, + "loss": 0.4404, + "step": 11444 + }, + { + "epoch": 1.95, + "learning_rate": 5.745606306168181e-06, + "loss": 0.427, + "step": 11445 + }, + { + "epoch": 1.95, + "learning_rate": 5.743939945920879e-06, + "loss": 0.4184, + "step": 11446 + }, + { + "epoch": 1.95, + "learning_rate": 5.742273729984274e-06, + "loss": 0.431, + "step": 11447 + }, + { + "epoch": 1.95, + "learning_rate": 5.7406076584148685e-06, + "loss": 0.385, + "step": 11448 + }, + { + "epoch": 1.95, + "learning_rate": 5.738941731269144e-06, + "loss": 0.3797, + "step": 11449 + }, + { + "epoch": 1.95, + "learning_rate": 5.737275948603595e-06, + "loss": 0.4104, + "step": 11450 + }, + { + "epoch": 1.95, + "learning_rate": 5.735610310474698e-06, + "loss": 0.4199, + "step": 11451 + }, + { + "epoch": 1.95, + "learning_rate": 5.733944816938931e-06, + "loss": 0.4042, + "step": 11452 + }, + { + "epoch": 1.95, + "learning_rate": 5.732279468052768e-06, + "loss": 0.3936, + "step": 11453 + }, + { + "epoch": 1.95, + "learning_rate": 5.730614263872676e-06, + "loss": 0.4393, + "step": 11454 + }, + { + "epoch": 1.95, + "learning_rate": 5.7289492044551155e-06, + "loss": 0.399, + "step": 11455 + }, + { + "epoch": 1.95, + "learning_rate": 5.727284289856546e-06, + "loss": 0.4014, + "step": 11456 + }, + { + "epoch": 1.95, + "learning_rate": 5.725619520133418e-06, + "loss": 0.4248, + "step": 11457 + }, + { + "epoch": 1.95, + "learning_rate": 5.723954895342185e-06, + "loss": 0.4214, + "step": 11458 + }, + { + "epoch": 1.95, + "learning_rate": 5.722290415539282e-06, + "loss": 0.4095, + "step": 11459 + }, + { + "epoch": 1.95, + "learning_rate": 5.720626080781148e-06, + "loss": 0.4059, + "step": 11460 + }, + { + "epoch": 1.95, + "learning_rate": 5.718961891124217e-06, + "loss": 0.3784, + "step": 11461 + }, + { + "epoch": 1.95, + "learning_rate": 5.7172978466249175e-06, + "loss": 0.4127, + "step": 11462 + }, + { + "epoch": 1.95, + "learning_rate": 5.715633947339676e-06, + "loss": 0.4159, + "step": 11463 + }, + { + "epoch": 1.96, + "learning_rate": 5.713970193324903e-06, + "loss": 0.4583, + "step": 11464 + }, + { + "epoch": 1.96, + "learning_rate": 5.712306584637014e-06, + "loss": 0.3899, + "step": 11465 + }, + { + "epoch": 1.96, + "learning_rate": 5.710643121332423e-06, + "loss": 0.4262, + "step": 11466 + }, + { + "epoch": 1.96, + "learning_rate": 5.7089798034675245e-06, + "loss": 0.4444, + "step": 11467 + }, + { + "epoch": 1.96, + "learning_rate": 5.70731663109872e-06, + "loss": 0.3853, + "step": 11468 + }, + { + "epoch": 1.96, + "learning_rate": 5.705653604282406e-06, + "loss": 0.4069, + "step": 11469 + }, + { + "epoch": 1.96, + "learning_rate": 5.703990723074966e-06, + "loss": 0.4043, + "step": 11470 + }, + { + "epoch": 1.96, + "learning_rate": 5.7023279875327875e-06, + "loss": 0.4267, + "step": 11471 + }, + { + "epoch": 1.96, + "learning_rate": 5.700665397712248e-06, + "loss": 0.3959, + "step": 11472 + }, + { + "epoch": 1.96, + "learning_rate": 5.6990029536697235e-06, + "loss": 0.4139, + "step": 11473 + }, + { + "epoch": 1.96, + "learning_rate": 5.697340655461577e-06, + "loss": 0.4253, + "step": 11474 + }, + { + "epoch": 1.96, + "learning_rate": 5.695678503144175e-06, + "loss": 0.4174, + "step": 11475 + }, + { + "epoch": 1.96, + "learning_rate": 5.694016496773876e-06, + "loss": 0.3874, + "step": 11476 + }, + { + "epoch": 1.96, + "learning_rate": 5.692354636407036e-06, + "loss": 0.3998, + "step": 11477 + }, + { + "epoch": 1.96, + "learning_rate": 5.690692922100001e-06, + "loss": 0.4115, + "step": 11478 + }, + { + "epoch": 1.96, + "learning_rate": 5.68903135390912e-06, + "loss": 0.396, + "step": 11479 + }, + { + "epoch": 1.96, + "learning_rate": 5.687369931890722e-06, + "loss": 0.4256, + "step": 11480 + }, + { + "epoch": 1.96, + "learning_rate": 5.685708656101154e-06, + "loss": 0.4111, + "step": 11481 + }, + { + "epoch": 1.96, + "learning_rate": 5.684047526596735e-06, + "loss": 0.3959, + "step": 11482 + }, + { + "epoch": 1.96, + "learning_rate": 5.68238654343379e-06, + "loss": 0.3902, + "step": 11483 + }, + { + "epoch": 1.96, + "learning_rate": 5.680725706668641e-06, + "loss": 0.4189, + "step": 11484 + }, + { + "epoch": 1.96, + "learning_rate": 5.679065016357603e-06, + "loss": 0.4455, + "step": 11485 + }, + { + "epoch": 1.96, + "learning_rate": 5.677404472556982e-06, + "loss": 0.3905, + "step": 11486 + }, + { + "epoch": 1.96, + "learning_rate": 5.675744075323084e-06, + "loss": 0.3715, + "step": 11487 + }, + { + "epoch": 1.96, + "learning_rate": 5.6740838247122135e-06, + "loss": 0.4329, + "step": 11488 + }, + { + "epoch": 1.96, + "learning_rate": 5.672423720780656e-06, + "loss": 0.4324, + "step": 11489 + }, + { + "epoch": 1.96, + "learning_rate": 5.670763763584703e-06, + "loss": 0.3988, + "step": 11490 + }, + { + "epoch": 1.96, + "learning_rate": 5.6691039531806415e-06, + "loss": 0.4607, + "step": 11491 + }, + { + "epoch": 1.96, + "learning_rate": 5.667444289624748e-06, + "loss": 0.4231, + "step": 11492 + }, + { + "epoch": 1.96, + "learning_rate": 5.6657847729732996e-06, + "loss": 0.4271, + "step": 11493 + }, + { + "epoch": 1.96, + "learning_rate": 5.6641254032825656e-06, + "loss": 0.395, + "step": 11494 + }, + { + "epoch": 1.96, + "learning_rate": 5.662466180608812e-06, + "loss": 0.4188, + "step": 11495 + }, + { + "epoch": 1.96, + "learning_rate": 5.660807105008292e-06, + "loss": 0.4078, + "step": 11496 + }, + { + "epoch": 1.96, + "learning_rate": 5.659148176537268e-06, + "loss": 0.3953, + "step": 11497 + }, + { + "epoch": 1.96, + "learning_rate": 5.657489395251982e-06, + "loss": 0.3945, + "step": 11498 + }, + { + "epoch": 1.96, + "learning_rate": 5.655830761208681e-06, + "loss": 0.4135, + "step": 11499 + }, + { + "epoch": 1.96, + "learning_rate": 5.6541722744636066e-06, + "loss": 0.4172, + "step": 11500 + }, + { + "epoch": 1.96, + "learning_rate": 5.6525139350729904e-06, + "loss": 0.4058, + "step": 11501 + }, + { + "epoch": 1.96, + "learning_rate": 5.650855743093064e-06, + "loss": 0.42, + "step": 11502 + }, + { + "epoch": 1.96, + "learning_rate": 5.649197698580056e-06, + "loss": 0.3932, + "step": 11503 + }, + { + "epoch": 1.96, + "learning_rate": 5.647539801590176e-06, + "loss": 0.4126, + "step": 11504 + }, + { + "epoch": 1.96, + "learning_rate": 5.645882052179644e-06, + "loss": 0.434, + "step": 11505 + }, + { + "epoch": 1.96, + "learning_rate": 5.64422445040467e-06, + "loss": 0.4133, + "step": 11506 + }, + { + "epoch": 1.96, + "learning_rate": 5.642566996321457e-06, + "loss": 0.4232, + "step": 11507 + }, + { + "epoch": 1.96, + "learning_rate": 5.640909689986206e-06, + "loss": 0.4128, + "step": 11508 + }, + { + "epoch": 1.96, + "learning_rate": 5.63925253145511e-06, + "loss": 0.4272, + "step": 11509 + }, + { + "epoch": 1.96, + "learning_rate": 5.6375955207843625e-06, + "loss": 0.432, + "step": 11510 + }, + { + "epoch": 1.96, + "learning_rate": 5.6359386580301425e-06, + "loss": 0.4351, + "step": 11511 + }, + { + "epoch": 1.96, + "learning_rate": 5.634281943248629e-06, + "loss": 0.4454, + "step": 11512 + }, + { + "epoch": 1.96, + "learning_rate": 5.632625376496006e-06, + "loss": 0.4362, + "step": 11513 + }, + { + "epoch": 1.96, + "learning_rate": 5.630968957828429e-06, + "loss": 0.4124, + "step": 11514 + }, + { + "epoch": 1.96, + "learning_rate": 5.629312687302071e-06, + "loss": 0.379, + "step": 11515 + }, + { + "epoch": 1.96, + "learning_rate": 5.627656564973088e-06, + "loss": 0.4005, + "step": 11516 + }, + { + "epoch": 1.96, + "learning_rate": 5.626000590897637e-06, + "loss": 0.3759, + "step": 11517 + }, + { + "epoch": 1.96, + "learning_rate": 5.6243447651318705e-06, + "loss": 0.3984, + "step": 11518 + }, + { + "epoch": 1.96, + "learning_rate": 5.622689087731923e-06, + "loss": 0.4373, + "step": 11519 + }, + { + "epoch": 1.96, + "learning_rate": 5.621033558753939e-06, + "loss": 0.3954, + "step": 11520 + }, + { + "epoch": 1.96, + "learning_rate": 5.619378178254053e-06, + "loss": 0.4063, + "step": 11521 + }, + { + "epoch": 1.97, + "learning_rate": 5.617722946288396e-06, + "loss": 0.4016, + "step": 11522 + }, + { + "epoch": 1.97, + "learning_rate": 5.616067862913088e-06, + "loss": 0.4211, + "step": 11523 + }, + { + "epoch": 1.97, + "learning_rate": 5.614412928184252e-06, + "loss": 0.4241, + "step": 11524 + }, + { + "epoch": 1.97, + "learning_rate": 5.612758142158004e-06, + "loss": 0.3904, + "step": 11525 + }, + { + "epoch": 1.97, + "learning_rate": 5.611103504890444e-06, + "loss": 0.4272, + "step": 11526 + }, + { + "epoch": 1.97, + "learning_rate": 5.609449016437682e-06, + "loss": 0.3875, + "step": 11527 + }, + { + "epoch": 1.97, + "learning_rate": 5.60779467685582e-06, + "loss": 0.4275, + "step": 11528 + }, + { + "epoch": 1.97, + "learning_rate": 5.606140486200944e-06, + "loss": 0.4083, + "step": 11529 + }, + { + "epoch": 1.97, + "learning_rate": 5.604486444529147e-06, + "loss": 0.4227, + "step": 11530 + }, + { + "epoch": 1.97, + "learning_rate": 5.602832551896513e-06, + "loss": 0.4228, + "step": 11531 + }, + { + "epoch": 1.97, + "learning_rate": 5.60117880835912e-06, + "loss": 0.3909, + "step": 11532 + }, + { + "epoch": 1.97, + "learning_rate": 5.599525213973046e-06, + "loss": 0.4037, + "step": 11533 + }, + { + "epoch": 1.97, + "learning_rate": 5.597871768794352e-06, + "loss": 0.4409, + "step": 11534 + }, + { + "epoch": 1.97, + "learning_rate": 5.596218472879105e-06, + "loss": 0.423, + "step": 11535 + }, + { + "epoch": 1.97, + "learning_rate": 5.594565326283363e-06, + "loss": 0.4089, + "step": 11536 + }, + { + "epoch": 1.97, + "learning_rate": 5.5929123290631815e-06, + "loss": 0.4353, + "step": 11537 + }, + { + "epoch": 1.97, + "learning_rate": 5.591259481274606e-06, + "loss": 0.4389, + "step": 11538 + }, + { + "epoch": 1.97, + "learning_rate": 5.589606782973683e-06, + "loss": 0.4164, + "step": 11539 + }, + { + "epoch": 1.97, + "learning_rate": 5.587954234216453e-06, + "loss": 0.3722, + "step": 11540 + }, + { + "epoch": 1.97, + "learning_rate": 5.586301835058939e-06, + "loss": 0.4072, + "step": 11541 + }, + { + "epoch": 1.97, + "learning_rate": 5.584649585557177e-06, + "loss": 0.4334, + "step": 11542 + }, + { + "epoch": 1.97, + "learning_rate": 5.582997485767187e-06, + "loss": 0.4106, + "step": 11543 + }, + { + "epoch": 1.97, + "learning_rate": 5.581345535744993e-06, + "loss": 0.407, + "step": 11544 + }, + { + "epoch": 1.97, + "learning_rate": 5.5796937355466e-06, + "loss": 0.4605, + "step": 11545 + }, + { + "epoch": 1.97, + "learning_rate": 5.578042085228017e-06, + "loss": 0.4003, + "step": 11546 + }, + { + "epoch": 1.97, + "learning_rate": 5.576390584845249e-06, + "loss": 0.3757, + "step": 11547 + }, + { + "epoch": 1.97, + "learning_rate": 5.5747392344542985e-06, + "loss": 0.4067, + "step": 11548 + }, + { + "epoch": 1.97, + "learning_rate": 5.573088034111147e-06, + "loss": 0.3801, + "step": 11549 + }, + { + "epoch": 1.97, + "learning_rate": 5.571436983871787e-06, + "loss": 0.4017, + "step": 11550 + }, + { + "epoch": 1.97, + "learning_rate": 5.569786083792202e-06, + "loss": 0.3917, + "step": 11551 + }, + { + "epoch": 1.97, + "learning_rate": 5.5681353339283696e-06, + "loss": 0.3766, + "step": 11552 + }, + { + "epoch": 1.97, + "learning_rate": 5.566484734336259e-06, + "loss": 0.3859, + "step": 11553 + }, + { + "epoch": 1.97, + "learning_rate": 5.564834285071841e-06, + "loss": 0.4259, + "step": 11554 + }, + { + "epoch": 1.97, + "learning_rate": 5.563183986191079e-06, + "loss": 0.4065, + "step": 11555 + }, + { + "epoch": 1.97, + "learning_rate": 5.561533837749924e-06, + "loss": 0.3969, + "step": 11556 + }, + { + "epoch": 1.97, + "learning_rate": 5.55988383980433e-06, + "loss": 0.429, + "step": 11557 + }, + { + "epoch": 1.97, + "learning_rate": 5.558233992410243e-06, + "loss": 0.3996, + "step": 11558 + }, + { + "epoch": 1.97, + "learning_rate": 5.556584295623606e-06, + "loss": 0.4474, + "step": 11559 + }, + { + "epoch": 1.97, + "learning_rate": 5.554934749500359e-06, + "loss": 0.4402, + "step": 11560 + }, + { + "epoch": 1.97, + "learning_rate": 5.553285354096426e-06, + "loss": 0.4345, + "step": 11561 + }, + { + "epoch": 1.97, + "learning_rate": 5.5516361094677375e-06, + "loss": 0.4127, + "step": 11562 + }, + { + "epoch": 1.97, + "learning_rate": 5.549987015670215e-06, + "loss": 0.4122, + "step": 11563 + }, + { + "epoch": 1.97, + "learning_rate": 5.5483380727597715e-06, + "loss": 0.4332, + "step": 11564 + }, + { + "epoch": 1.97, + "learning_rate": 5.546689280792319e-06, + "loss": 0.4059, + "step": 11565 + }, + { + "epoch": 1.97, + "learning_rate": 5.545040639823764e-06, + "loss": 0.4022, + "step": 11566 + }, + { + "epoch": 1.97, + "learning_rate": 5.5433921499100085e-06, + "loss": 0.397, + "step": 11567 + }, + { + "epoch": 1.97, + "learning_rate": 5.541743811106945e-06, + "loss": 0.4413, + "step": 11568 + }, + { + "epoch": 1.97, + "learning_rate": 5.540095623470467e-06, + "loss": 0.3927, + "step": 11569 + }, + { + "epoch": 1.97, + "learning_rate": 5.538447587056462e-06, + "loss": 0.4109, + "step": 11570 + }, + { + "epoch": 1.97, + "learning_rate": 5.536799701920801e-06, + "loss": 0.3899, + "step": 11571 + }, + { + "epoch": 1.97, + "learning_rate": 5.535151968119366e-06, + "loss": 0.4047, + "step": 11572 + }, + { + "epoch": 1.97, + "learning_rate": 5.533504385708024e-06, + "loss": 0.3909, + "step": 11573 + }, + { + "epoch": 1.97, + "learning_rate": 5.531856954742643e-06, + "loss": 0.369, + "step": 11574 + }, + { + "epoch": 1.97, + "learning_rate": 5.5302096752790834e-06, + "loss": 0.4163, + "step": 11575 + }, + { + "epoch": 1.97, + "learning_rate": 5.528562547373193e-06, + "loss": 0.4004, + "step": 11576 + }, + { + "epoch": 1.97, + "learning_rate": 5.526915571080825e-06, + "loss": 0.4253, + "step": 11577 + }, + { + "epoch": 1.97, + "learning_rate": 5.525268746457829e-06, + "loss": 0.4044, + "step": 11578 + }, + { + "epoch": 1.97, + "learning_rate": 5.523622073560034e-06, + "loss": 0.4175, + "step": 11579 + }, + { + "epoch": 1.97, + "learning_rate": 5.5219755524432795e-06, + "loss": 0.4173, + "step": 11580 + }, + { + "epoch": 1.98, + "learning_rate": 5.5203291831633935e-06, + "loss": 0.3994, + "step": 11581 + }, + { + "epoch": 1.98, + "learning_rate": 5.518682965776198e-06, + "loss": 0.4249, + "step": 11582 + }, + { + "epoch": 1.98, + "learning_rate": 5.517036900337516e-06, + "loss": 0.4576, + "step": 11583 + }, + { + "epoch": 1.98, + "learning_rate": 5.515390986903157e-06, + "loss": 0.4213, + "step": 11584 + }, + { + "epoch": 1.98, + "learning_rate": 5.513745225528933e-06, + "loss": 0.395, + "step": 11585 + }, + { + "epoch": 1.98, + "learning_rate": 5.512099616270641e-06, + "loss": 0.4385, + "step": 11586 + }, + { + "epoch": 1.98, + "learning_rate": 5.510454159184082e-06, + "loss": 0.3855, + "step": 11587 + }, + { + "epoch": 1.98, + "learning_rate": 5.5088088543250496e-06, + "loss": 0.4235, + "step": 11588 + }, + { + "epoch": 1.98, + "learning_rate": 5.507163701749329e-06, + "loss": 0.3995, + "step": 11589 + }, + { + "epoch": 1.98, + "learning_rate": 5.505518701512706e-06, + "loss": 0.4153, + "step": 11590 + }, + { + "epoch": 1.98, + "learning_rate": 5.503873853670958e-06, + "loss": 0.4229, + "step": 11591 + }, + { + "epoch": 1.98, + "learning_rate": 5.50222915827985e-06, + "loss": 0.4224, + "step": 11592 + }, + { + "epoch": 1.98, + "learning_rate": 5.5005846153951605e-06, + "loss": 0.3718, + "step": 11593 + }, + { + "epoch": 1.98, + "learning_rate": 5.498940225072639e-06, + "loss": 0.4074, + "step": 11594 + }, + { + "epoch": 1.98, + "learning_rate": 5.4972959873680475e-06, + "loss": 0.4242, + "step": 11595 + }, + { + "epoch": 1.98, + "learning_rate": 5.495651902337138e-06, + "loss": 0.4117, + "step": 11596 + }, + { + "epoch": 1.98, + "learning_rate": 5.494007970035655e-06, + "loss": 0.4289, + "step": 11597 + }, + { + "epoch": 1.98, + "learning_rate": 5.4923641905193405e-06, + "loss": 0.4235, + "step": 11598 + }, + { + "epoch": 1.98, + "learning_rate": 5.490720563843931e-06, + "loss": 0.448, + "step": 11599 + }, + { + "epoch": 1.98, + "learning_rate": 5.48907709006516e-06, + "loss": 0.4179, + "step": 11600 + }, + { + "epoch": 1.98, + "learning_rate": 5.487433769238746e-06, + "loss": 0.4265, + "step": 11601 + }, + { + "epoch": 1.98, + "learning_rate": 5.4857906014204106e-06, + "loss": 0.4023, + "step": 11602 + }, + { + "epoch": 1.98, + "learning_rate": 5.484147586665873e-06, + "loss": 0.4266, + "step": 11603 + }, + { + "epoch": 1.98, + "learning_rate": 5.48250472503084e-06, + "loss": 0.4518, + "step": 11604 + }, + { + "epoch": 1.98, + "learning_rate": 5.480862016571018e-06, + "loss": 0.4043, + "step": 11605 + }, + { + "epoch": 1.98, + "learning_rate": 5.479219461342106e-06, + "loss": 0.4287, + "step": 11606 + }, + { + "epoch": 1.98, + "learning_rate": 5.477577059399801e-06, + "loss": 0.394, + "step": 11607 + }, + { + "epoch": 1.98, + "learning_rate": 5.475934810799786e-06, + "loss": 0.4022, + "step": 11608 + }, + { + "epoch": 1.98, + "learning_rate": 5.474292715597751e-06, + "loss": 0.4274, + "step": 11609 + }, + { + "epoch": 1.98, + "learning_rate": 5.472650773849369e-06, + "loss": 0.4293, + "step": 11610 + }, + { + "epoch": 1.98, + "learning_rate": 5.471008985610316e-06, + "loss": 0.4, + "step": 11611 + }, + { + "epoch": 1.98, + "learning_rate": 5.469367350936261e-06, + "loss": 0.4204, + "step": 11612 + }, + { + "epoch": 1.98, + "learning_rate": 5.467725869882865e-06, + "loss": 0.4257, + "step": 11613 + }, + { + "epoch": 1.98, + "learning_rate": 5.46608454250579e-06, + "loss": 0.4299, + "step": 11614 + }, + { + "epoch": 1.98, + "learning_rate": 5.464443368860685e-06, + "loss": 0.4085, + "step": 11615 + }, + { + "epoch": 1.98, + "learning_rate": 5.462802349003202e-06, + "loss": 0.4025, + "step": 11616 + }, + { + "epoch": 1.98, + "learning_rate": 5.461161482988977e-06, + "loss": 0.4016, + "step": 11617 + }, + { + "epoch": 1.98, + "learning_rate": 5.459520770873649e-06, + "loss": 0.4362, + "step": 11618 + }, + { + "epoch": 1.98, + "learning_rate": 5.457880212712852e-06, + "loss": 0.3634, + "step": 11619 + }, + { + "epoch": 1.98, + "learning_rate": 5.45623980856221e-06, + "loss": 0.4163, + "step": 11620 + }, + { + "epoch": 1.98, + "learning_rate": 5.454599558477346e-06, + "loss": 0.4178, + "step": 11621 + }, + { + "epoch": 1.98, + "learning_rate": 5.45295946251388e-06, + "loss": 0.403, + "step": 11622 + }, + { + "epoch": 1.98, + "learning_rate": 5.4513195207274136e-06, + "loss": 0.4172, + "step": 11623 + }, + { + "epoch": 1.98, + "learning_rate": 5.449679733173559e-06, + "loss": 0.4043, + "step": 11624 + }, + { + "epoch": 1.98, + "learning_rate": 5.4480400999079194e-06, + "loss": 0.4044, + "step": 11625 + }, + { + "epoch": 1.98, + "learning_rate": 5.446400620986082e-06, + "loss": 0.4229, + "step": 11626 + }, + { + "epoch": 1.98, + "learning_rate": 5.444761296463641e-06, + "loss": 0.4253, + "step": 11627 + }, + { + "epoch": 1.98, + "learning_rate": 5.44312212639618e-06, + "loss": 0.4135, + "step": 11628 + }, + { + "epoch": 1.98, + "learning_rate": 5.441483110839279e-06, + "loss": 0.4444, + "step": 11629 + }, + { + "epoch": 1.98, + "learning_rate": 5.4398442498485134e-06, + "loss": 0.3826, + "step": 11630 + }, + { + "epoch": 1.98, + "learning_rate": 5.4382055434794556e-06, + "loss": 0.3894, + "step": 11631 + }, + { + "epoch": 1.98, + "learning_rate": 5.436566991787662e-06, + "loss": 0.4504, + "step": 11632 + }, + { + "epoch": 1.98, + "learning_rate": 5.434928594828693e-06, + "loss": 0.4466, + "step": 11633 + }, + { + "epoch": 1.98, + "learning_rate": 5.433290352658104e-06, + "loss": 0.4076, + "step": 11634 + }, + { + "epoch": 1.98, + "learning_rate": 5.431652265331442e-06, + "loss": 0.3971, + "step": 11635 + }, + { + "epoch": 1.98, + "learning_rate": 5.430014332904251e-06, + "loss": 0.3783, + "step": 11636 + }, + { + "epoch": 1.98, + "learning_rate": 5.428376555432067e-06, + "loss": 0.4039, + "step": 11637 + }, + { + "epoch": 1.98, + "learning_rate": 5.426738932970428e-06, + "loss": 0.4418, + "step": 11638 + }, + { + "epoch": 1.98, + "learning_rate": 5.425101465574851e-06, + "loss": 0.39, + "step": 11639 + }, + { + "epoch": 1.99, + "learning_rate": 5.423464153300868e-06, + "loss": 0.3906, + "step": 11640 + }, + { + "epoch": 1.99, + "learning_rate": 5.421826996203986e-06, + "loss": 0.4031, + "step": 11641 + }, + { + "epoch": 1.99, + "learning_rate": 5.4201899943397205e-06, + "loss": 0.4278, + "step": 11642 + }, + { + "epoch": 1.99, + "learning_rate": 5.418553147763578e-06, + "loss": 0.4299, + "step": 11643 + }, + { + "epoch": 1.99, + "learning_rate": 5.4169164565310585e-06, + "loss": 0.4144, + "step": 11644 + }, + { + "epoch": 1.99, + "learning_rate": 5.415279920697658e-06, + "loss": 0.4028, + "step": 11645 + }, + { + "epoch": 1.99, + "learning_rate": 5.413643540318872e-06, + "loss": 0.3695, + "step": 11646 + }, + { + "epoch": 1.99, + "learning_rate": 5.412007315450174e-06, + "loss": 0.4459, + "step": 11647 + }, + { + "epoch": 1.99, + "learning_rate": 5.410371246147052e-06, + "loss": 0.4095, + "step": 11648 + }, + { + "epoch": 1.99, + "learning_rate": 5.4087353324649764e-06, + "loss": 0.4014, + "step": 11649 + }, + { + "epoch": 1.99, + "learning_rate": 5.407099574459418e-06, + "loss": 0.3987, + "step": 11650 + }, + { + "epoch": 1.99, + "learning_rate": 5.405463972185842e-06, + "loss": 0.4326, + "step": 11651 + }, + { + "epoch": 1.99, + "learning_rate": 5.403828525699703e-06, + "loss": 0.4115, + "step": 11652 + }, + { + "epoch": 1.99, + "learning_rate": 5.402193235056463e-06, + "loss": 0.4326, + "step": 11653 + }, + { + "epoch": 1.99, + "learning_rate": 5.40055810031156e-06, + "loss": 0.3995, + "step": 11654 + }, + { + "epoch": 1.99, + "learning_rate": 5.39892312152044e-06, + "loss": 0.3854, + "step": 11655 + }, + { + "epoch": 1.99, + "learning_rate": 5.3972882987385455e-06, + "loss": 0.3983, + "step": 11656 + }, + { + "epoch": 1.99, + "learning_rate": 5.395653632021298e-06, + "loss": 0.4076, + "step": 11657 + }, + { + "epoch": 1.99, + "learning_rate": 5.394019121424132e-06, + "loss": 0.4353, + "step": 11658 + }, + { + "epoch": 1.99, + "learning_rate": 5.392384767002468e-06, + "loss": 0.4206, + "step": 11659 + }, + { + "epoch": 1.99, + "learning_rate": 5.39075056881172e-06, + "loss": 0.4294, + "step": 11660 + }, + { + "epoch": 1.99, + "learning_rate": 5.389116526907305e-06, + "loss": 0.4294, + "step": 11661 + }, + { + "epoch": 1.99, + "learning_rate": 5.387482641344621e-06, + "loss": 0.4098, + "step": 11662 + }, + { + "epoch": 1.99, + "learning_rate": 5.38584891217907e-06, + "loss": 0.4138, + "step": 11663 + }, + { + "epoch": 1.99, + "learning_rate": 5.38421533946605e-06, + "loss": 0.406, + "step": 11664 + }, + { + "epoch": 1.99, + "learning_rate": 5.38258192326095e-06, + "loss": 0.4045, + "step": 11665 + }, + { + "epoch": 1.99, + "learning_rate": 5.380948663619152e-06, + "loss": 0.4071, + "step": 11666 + }, + { + "epoch": 1.99, + "learning_rate": 5.379315560596038e-06, + "loss": 0.4013, + "step": 11667 + }, + { + "epoch": 1.99, + "learning_rate": 5.377682614246984e-06, + "loss": 0.4451, + "step": 11668 + }, + { + "epoch": 1.99, + "learning_rate": 5.376049824627352e-06, + "loss": 0.3873, + "step": 11669 + }, + { + "epoch": 1.99, + "learning_rate": 5.374417191792507e-06, + "loss": 0.3976, + "step": 11670 + }, + { + "epoch": 1.99, + "learning_rate": 5.372784715797809e-06, + "loss": 0.4311, + "step": 11671 + }, + { + "epoch": 1.99, + "learning_rate": 5.371152396698613e-06, + "loss": 0.4546, + "step": 11672 + }, + { + "epoch": 1.99, + "learning_rate": 5.369520234550259e-06, + "loss": 0.3963, + "step": 11673 + }, + { + "epoch": 1.99, + "learning_rate": 5.367888229408094e-06, + "loss": 0.4403, + "step": 11674 + }, + { + "epoch": 1.99, + "learning_rate": 5.366256381327453e-06, + "loss": 0.4239, + "step": 11675 + }, + { + "epoch": 1.99, + "learning_rate": 5.364624690363671e-06, + "loss": 0.4465, + "step": 11676 + }, + { + "epoch": 1.99, + "learning_rate": 5.362993156572067e-06, + "loss": 0.4593, + "step": 11677 + }, + { + "epoch": 1.99, + "learning_rate": 5.361361780007965e-06, + "loss": 0.3844, + "step": 11678 + }, + { + "epoch": 1.99, + "learning_rate": 5.359730560726681e-06, + "loss": 0.4462, + "step": 11679 + }, + { + "epoch": 1.99, + "learning_rate": 5.358099498783524e-06, + "loss": 0.3917, + "step": 11680 + }, + { + "epoch": 1.99, + "learning_rate": 5.356468594233799e-06, + "loss": 0.4449, + "step": 11681 + }, + { + "epoch": 1.99, + "learning_rate": 5.354837847132805e-06, + "loss": 0.4317, + "step": 11682 + }, + { + "epoch": 1.99, + "learning_rate": 5.353207257535842e-06, + "loss": 0.4249, + "step": 11683 + }, + { + "epoch": 1.99, + "learning_rate": 5.351576825498186e-06, + "loss": 0.4361, + "step": 11684 + }, + { + "epoch": 1.99, + "learning_rate": 5.3499465510751295e-06, + "loss": 0.3805, + "step": 11685 + }, + { + "epoch": 1.99, + "learning_rate": 5.3483164343219464e-06, + "loss": 0.4116, + "step": 11686 + }, + { + "epoch": 1.99, + "learning_rate": 5.3466864752939155e-06, + "loss": 0.3926, + "step": 11687 + }, + { + "epoch": 1.99, + "learning_rate": 5.345056674046294e-06, + "loss": 0.4409, + "step": 11688 + }, + { + "epoch": 1.99, + "learning_rate": 5.343427030634349e-06, + "loss": 0.4312, + "step": 11689 + }, + { + "epoch": 1.99, + "learning_rate": 5.341797545113338e-06, + "loss": 0.395, + "step": 11690 + }, + { + "epoch": 1.99, + "learning_rate": 5.340168217538516e-06, + "loss": 0.4243, + "step": 11691 + }, + { + "epoch": 1.99, + "learning_rate": 5.338539047965119e-06, + "loss": 0.3915, + "step": 11692 + }, + { + "epoch": 1.99, + "learning_rate": 5.336910036448393e-06, + "loss": 0.4006, + "step": 11693 + }, + { + "epoch": 1.99, + "learning_rate": 5.33528118304357e-06, + "loss": 0.4211, + "step": 11694 + }, + { + "epoch": 1.99, + "learning_rate": 5.3336524878058845e-06, + "loss": 0.4368, + "step": 11695 + }, + { + "epoch": 1.99, + "learning_rate": 5.332023950790558e-06, + "loss": 0.4327, + "step": 11696 + }, + { + "epoch": 1.99, + "learning_rate": 5.33039557205281e-06, + "loss": 0.3671, + "step": 11697 + }, + { + "epoch": 2.0, + "learning_rate": 5.328767351647858e-06, + "loss": 0.4027, + "step": 11698 + }, + { + "epoch": 2.0, + "learning_rate": 5.327139289630903e-06, + "loss": 0.4183, + "step": 11699 + }, + { + "epoch": 2.0, + "learning_rate": 5.32551138605715e-06, + "loss": 0.4161, + "step": 11700 + }, + { + "epoch": 2.0, + "learning_rate": 5.3238836409817975e-06, + "loss": 0.4899, + "step": 11701 + }, + { + "epoch": 2.0, + "learning_rate": 5.322256054460039e-06, + "loss": 0.3956, + "step": 11702 + }, + { + "epoch": 2.0, + "learning_rate": 5.3206286265470645e-06, + "loss": 0.4259, + "step": 11703 + }, + { + "epoch": 2.0, + "learning_rate": 5.319001357298045e-06, + "loss": 0.4488, + "step": 11704 + }, + { + "epoch": 2.0, + "learning_rate": 5.317374246768163e-06, + "loss": 0.4231, + "step": 11705 + }, + { + "epoch": 2.0, + "learning_rate": 5.3157472950125945e-06, + "loss": 0.4042, + "step": 11706 + }, + { + "epoch": 2.0, + "learning_rate": 5.314120502086493e-06, + "loss": 0.4366, + "step": 11707 + }, + { + "epoch": 2.0, + "learning_rate": 5.3124938680450255e-06, + "loss": 0.3752, + "step": 11708 + }, + { + "epoch": 2.0, + "learning_rate": 5.310867392943344e-06, + "loss": 0.4296, + "step": 11709 + }, + { + "epoch": 2.0, + "learning_rate": 5.3092410768365995e-06, + "loss": 0.3951, + "step": 11710 + }, + { + "epoch": 2.0, + "learning_rate": 5.307614919779935e-06, + "loss": 0.414, + "step": 11711 + }, + { + "epoch": 2.0, + "learning_rate": 5.305988921828488e-06, + "loss": 0.4387, + "step": 11712 + }, + { + "epoch": 2.0, + "learning_rate": 5.304363083037396e-06, + "loss": 0.4176, + "step": 11713 + }, + { + "epoch": 2.0, + "learning_rate": 5.302737403461778e-06, + "loss": 0.4043, + "step": 11714 + }, + { + "epoch": 2.0, + "learning_rate": 5.301111883156762e-06, + "loss": 0.4295, + "step": 11715 + }, + { + "epoch": 2.0, + "learning_rate": 5.299486522177463e-06, + "loss": 0.4018, + "step": 11716 + }, + { + "epoch": 2.0, + "learning_rate": 5.297861320578993e-06, + "loss": 0.3916, + "step": 11717 + }, + { + "epoch": 2.0, + "learning_rate": 5.296236278416458e-06, + "loss": 0.4183, + "step": 11718 + }, + { + "epoch": 2.0, + "learning_rate": 5.29461139574496e-06, + "loss": 0.4095, + "step": 11719 + }, + { + "epoch": 2.0, + "learning_rate": 5.292986672619589e-06, + "loss": 0.4139, + "step": 11720 + }, + { + "epoch": 2.0, + "learning_rate": 5.291362109095442e-06, + "loss": 0.4252, + "step": 11721 + }, + { + "epoch": 2.0, + "learning_rate": 5.2897377052275954e-06, + "loss": 0.4032, + "step": 11722 + }, + { + "epoch": 2.0, + "learning_rate": 5.288113461071132e-06, + "loss": 0.4033, + "step": 11723 + }, + { + "epoch": 2.0, + "learning_rate": 5.2864893766811235e-06, + "loss": 0.4416, + "step": 11724 + }, + { + "epoch": 2.0, + "learning_rate": 5.28486545211264e-06, + "loss": 0.4342, + "step": 11725 + }, + { + "epoch": 2.0, + "learning_rate": 5.283241687420744e-06, + "loss": 0.428, + "step": 11726 + }, + { + "epoch": 2.0, + "learning_rate": 5.2816180826604914e-06, + "loss": 0.385, + "step": 11727 + }, + { + "epoch": 2.0, + "learning_rate": 5.2799946378869386e-06, + "loss": 0.3414, + "step": 11728 + }, + { + "epoch": 2.0, + "learning_rate": 5.278371353155124e-06, + "loss": 0.3749, + "step": 11729 + }, + { + "epoch": 2.0, + "learning_rate": 5.2767482285200925e-06, + "loss": 0.3457, + "step": 11730 + }, + { + "epoch": 2.0, + "learning_rate": 5.27512526403688e-06, + "loss": 0.2938, + "step": 11731 + }, + { + "epoch": 2.0, + "learning_rate": 5.2735024597605154e-06, + "loss": 0.3423, + "step": 11732 + }, + { + "epoch": 2.0, + "learning_rate": 5.271879815746024e-06, + "loss": 0.3152, + "step": 11733 + }, + { + "epoch": 2.0, + "learning_rate": 5.270257332048428e-06, + "loss": 0.3433, + "step": 11734 + }, + { + "epoch": 2.0, + "learning_rate": 5.268635008722736e-06, + "loss": 0.3285, + "step": 11735 + }, + { + "epoch": 2.0, + "learning_rate": 5.267012845823957e-06, + "loss": 0.3547, + "step": 11736 + }, + { + "epoch": 2.0, + "learning_rate": 5.265390843407099e-06, + "loss": 0.3114, + "step": 11737 + }, + { + "epoch": 2.0, + "learning_rate": 5.263769001527153e-06, + "loss": 0.3449, + "step": 11738 + }, + { + "epoch": 2.0, + "learning_rate": 5.262147320239113e-06, + "loss": 0.3249, + "step": 11739 + }, + { + "epoch": 2.0, + "learning_rate": 5.2605257995979646e-06, + "loss": 0.3204, + "step": 11740 + }, + { + "epoch": 2.0, + "learning_rate": 5.258904439658692e-06, + "loss": 0.3288, + "step": 11741 + }, + { + "epoch": 2.0, + "learning_rate": 5.257283240476268e-06, + "loss": 0.3346, + "step": 11742 + }, + { + "epoch": 2.0, + "learning_rate": 5.255662202105669e-06, + "loss": 0.357, + "step": 11743 + }, + { + "epoch": 2.0, + "learning_rate": 5.25404132460185e-06, + "loss": 0.3293, + "step": 11744 + }, + { + "epoch": 2.0, + "learning_rate": 5.252420608019775e-06, + "loss": 0.3105, + "step": 11745 + }, + { + "epoch": 2.0, + "learning_rate": 5.2508000524143985e-06, + "loss": 0.3268, + "step": 11746 + }, + { + "epoch": 2.0, + "learning_rate": 5.249179657840668e-06, + "loss": 0.3237, + "step": 11747 + }, + { + "epoch": 2.0, + "learning_rate": 5.247559424353527e-06, + "loss": 0.3145, + "step": 11748 + }, + { + "epoch": 2.0, + "learning_rate": 5.245939352007912e-06, + "loss": 0.342, + "step": 11749 + }, + { + "epoch": 2.0, + "learning_rate": 5.2443194408587586e-06, + "loss": 0.3273, + "step": 11750 + }, + { + "epoch": 2.0, + "learning_rate": 5.242699690960986e-06, + "loss": 0.3158, + "step": 11751 + }, + { + "epoch": 2.0, + "learning_rate": 5.241080102369525e-06, + "loss": 0.3173, + "step": 11752 + }, + { + "epoch": 2.0, + "learning_rate": 5.23946067513928e-06, + "loss": 0.3212, + "step": 11753 + }, + { + "epoch": 2.0, + "learning_rate": 5.237841409325168e-06, + "loss": 0.333, + "step": 11754 + }, + { + "epoch": 2.0, + "learning_rate": 5.236222304982093e-06, + "loss": 0.326, + "step": 11755 + }, + { + "epoch": 2.0, + "learning_rate": 5.234603362164952e-06, + "loss": 0.3237, + "step": 11756 + }, + { + "epoch": 2.01, + "learning_rate": 5.23298458092864e-06, + "loss": 0.3276, + "step": 11757 + }, + { + "epoch": 2.01, + "learning_rate": 5.23136596132805e-06, + "loss": 0.3263, + "step": 11758 + }, + { + "epoch": 2.01, + "learning_rate": 5.229747503418056e-06, + "loss": 0.3186, + "step": 11759 + }, + { + "epoch": 2.01, + "learning_rate": 5.228129207253539e-06, + "loss": 0.3303, + "step": 11760 + }, + { + "epoch": 2.01, + "learning_rate": 5.226511072889371e-06, + "loss": 0.3215, + "step": 11761 + }, + { + "epoch": 2.01, + "learning_rate": 5.2248931003804174e-06, + "loss": 0.3175, + "step": 11762 + }, + { + "epoch": 2.01, + "learning_rate": 5.223275289781541e-06, + "loss": 0.3173, + "step": 11763 + }, + { + "epoch": 2.01, + "learning_rate": 5.221657641147595e-06, + "loss": 0.3379, + "step": 11764 + }, + { + "epoch": 2.01, + "learning_rate": 5.2200401545334315e-06, + "loss": 0.2984, + "step": 11765 + }, + { + "epoch": 2.01, + "learning_rate": 5.218422829993897e-06, + "loss": 0.3451, + "step": 11766 + }, + { + "epoch": 2.01, + "learning_rate": 5.216805667583822e-06, + "loss": 0.3222, + "step": 11767 + }, + { + "epoch": 2.01, + "learning_rate": 5.215188667358049e-06, + "loss": 0.31, + "step": 11768 + }, + { + "epoch": 2.01, + "learning_rate": 5.2135718293713966e-06, + "loss": 0.3066, + "step": 11769 + }, + { + "epoch": 2.01, + "learning_rate": 5.2119551536786924e-06, + "loss": 0.3337, + "step": 11770 + }, + { + "epoch": 2.01, + "learning_rate": 5.210338640334754e-06, + "loss": 0.3382, + "step": 11771 + }, + { + "epoch": 2.01, + "learning_rate": 5.2087222893943875e-06, + "loss": 0.3385, + "step": 11772 + }, + { + "epoch": 2.01, + "learning_rate": 5.207106100912409e-06, + "loss": 0.3631, + "step": 11773 + }, + { + "epoch": 2.01, + "learning_rate": 5.205490074943609e-06, + "loss": 0.2935, + "step": 11774 + }, + { + "epoch": 2.01, + "learning_rate": 5.2038742115427855e-06, + "loss": 0.3264, + "step": 11775 + }, + { + "epoch": 2.01, + "learning_rate": 5.202258510764726e-06, + "loss": 0.3222, + "step": 11776 + }, + { + "epoch": 2.01, + "learning_rate": 5.200642972664218e-06, + "loss": 0.3016, + "step": 11777 + }, + { + "epoch": 2.01, + "learning_rate": 5.199027597296036e-06, + "loss": 0.3155, + "step": 11778 + }, + { + "epoch": 2.01, + "learning_rate": 5.197412384714956e-06, + "loss": 0.3195, + "step": 11779 + }, + { + "epoch": 2.01, + "learning_rate": 5.195797334975743e-06, + "loss": 0.3075, + "step": 11780 + }, + { + "epoch": 2.01, + "learning_rate": 5.194182448133163e-06, + "loss": 0.319, + "step": 11781 + }, + { + "epoch": 2.01, + "learning_rate": 5.192567724241966e-06, + "loss": 0.3433, + "step": 11782 + }, + { + "epoch": 2.01, + "learning_rate": 5.190953163356904e-06, + "loss": 0.3126, + "step": 11783 + }, + { + "epoch": 2.01, + "learning_rate": 5.1893387655327265e-06, + "loss": 0.3459, + "step": 11784 + }, + { + "epoch": 2.01, + "learning_rate": 5.1877245308241674e-06, + "loss": 0.3144, + "step": 11785 + }, + { + "epoch": 2.01, + "learning_rate": 5.186110459285963e-06, + "loss": 0.3298, + "step": 11786 + }, + { + "epoch": 2.01, + "learning_rate": 5.18449655097284e-06, + "loss": 0.3255, + "step": 11787 + }, + { + "epoch": 2.01, + "learning_rate": 5.182882805939528e-06, + "loss": 0.3046, + "step": 11788 + }, + { + "epoch": 2.01, + "learning_rate": 5.181269224240737e-06, + "loss": 0.3208, + "step": 11789 + }, + { + "epoch": 2.01, + "learning_rate": 5.179655805931181e-06, + "loss": 0.3213, + "step": 11790 + }, + { + "epoch": 2.01, + "learning_rate": 5.178042551065566e-06, + "loss": 0.3019, + "step": 11791 + }, + { + "epoch": 2.01, + "learning_rate": 5.176429459698595e-06, + "loss": 0.3172, + "step": 11792 + }, + { + "epoch": 2.01, + "learning_rate": 5.174816531884962e-06, + "loss": 0.2913, + "step": 11793 + }, + { + "epoch": 2.01, + "learning_rate": 5.173203767679356e-06, + "loss": 0.3145, + "step": 11794 + }, + { + "epoch": 2.01, + "learning_rate": 5.1715911671364625e-06, + "loss": 0.3306, + "step": 11795 + }, + { + "epoch": 2.01, + "learning_rate": 5.169978730310963e-06, + "loss": 0.34, + "step": 11796 + }, + { + "epoch": 2.01, + "learning_rate": 5.1683664572575235e-06, + "loss": 0.3287, + "step": 11797 + }, + { + "epoch": 2.01, + "learning_rate": 5.166754348030816e-06, + "loss": 0.3256, + "step": 11798 + }, + { + "epoch": 2.01, + "learning_rate": 5.165142402685503e-06, + "loss": 0.3027, + "step": 11799 + }, + { + "epoch": 2.01, + "learning_rate": 5.163530621276238e-06, + "loss": 0.296, + "step": 11800 + }, + { + "epoch": 2.01, + "learning_rate": 5.1619190038576735e-06, + "loss": 0.3352, + "step": 11801 + }, + { + "epoch": 2.01, + "learning_rate": 5.160307550484454e-06, + "loss": 0.3375, + "step": 11802 + }, + { + "epoch": 2.01, + "learning_rate": 5.158696261211223e-06, + "loss": 0.2901, + "step": 11803 + }, + { + "epoch": 2.01, + "learning_rate": 5.157085136092609e-06, + "loss": 0.3531, + "step": 11804 + }, + { + "epoch": 2.01, + "learning_rate": 5.155474175183241e-06, + "loss": 0.3439, + "step": 11805 + }, + { + "epoch": 2.01, + "learning_rate": 5.153863378537747e-06, + "loss": 0.3109, + "step": 11806 + }, + { + "epoch": 2.01, + "learning_rate": 5.152252746210739e-06, + "loss": 0.3122, + "step": 11807 + }, + { + "epoch": 2.01, + "learning_rate": 5.1506422782568345e-06, + "loss": 0.3246, + "step": 11808 + }, + { + "epoch": 2.01, + "learning_rate": 5.149031974730634e-06, + "loss": 0.2933, + "step": 11809 + }, + { + "epoch": 2.01, + "learning_rate": 5.1474218356867436e-06, + "loss": 0.3267, + "step": 11810 + }, + { + "epoch": 2.01, + "learning_rate": 5.145811861179758e-06, + "loss": 0.2998, + "step": 11811 + }, + { + "epoch": 2.01, + "learning_rate": 5.144202051264261e-06, + "loss": 0.3263, + "step": 11812 + }, + { + "epoch": 2.01, + "learning_rate": 5.142592405994841e-06, + "loss": 0.3228, + "step": 11813 + }, + { + "epoch": 2.01, + "learning_rate": 5.140982925426075e-06, + "loss": 0.3289, + "step": 11814 + }, + { + "epoch": 2.02, + "learning_rate": 5.139373609612541e-06, + "loss": 0.299, + "step": 11815 + }, + { + "epoch": 2.02, + "learning_rate": 5.137764458608798e-06, + "loss": 0.3061, + "step": 11816 + }, + { + "epoch": 2.02, + "learning_rate": 5.136155472469411e-06, + "loss": 0.3086, + "step": 11817 + }, + { + "epoch": 2.02, + "learning_rate": 5.1345466512489375e-06, + "loss": 0.3261, + "step": 11818 + }, + { + "epoch": 2.02, + "learning_rate": 5.13293799500193e-06, + "loss": 0.3365, + "step": 11819 + }, + { + "epoch": 2.02, + "learning_rate": 5.131329503782927e-06, + "loss": 0.3285, + "step": 11820 + }, + { + "epoch": 2.02, + "learning_rate": 5.129721177646472e-06, + "loss": 0.3308, + "step": 11821 + }, + { + "epoch": 2.02, + "learning_rate": 5.128113016647096e-06, + "loss": 0.3309, + "step": 11822 + }, + { + "epoch": 2.02, + "learning_rate": 5.12650502083933e-06, + "loss": 0.3213, + "step": 11823 + }, + { + "epoch": 2.02, + "learning_rate": 5.1248971902776955e-06, + "loss": 0.3097, + "step": 11824 + }, + { + "epoch": 2.02, + "learning_rate": 5.12328952501671e-06, + "loss": 0.3196, + "step": 11825 + }, + { + "epoch": 2.02, + "learning_rate": 5.1216820251108876e-06, + "loss": 0.3091, + "step": 11826 + }, + { + "epoch": 2.02, + "learning_rate": 5.1200746906147255e-06, + "loss": 0.3145, + "step": 11827 + }, + { + "epoch": 2.02, + "learning_rate": 5.11846752158273e-06, + "loss": 0.3025, + "step": 11828 + }, + { + "epoch": 2.02, + "learning_rate": 5.116860518069395e-06, + "loss": 0.3295, + "step": 11829 + }, + { + "epoch": 2.02, + "learning_rate": 5.115253680129208e-06, + "loss": 0.3021, + "step": 11830 + }, + { + "epoch": 2.02, + "learning_rate": 5.113647007816655e-06, + "loss": 0.3253, + "step": 11831 + }, + { + "epoch": 2.02, + "learning_rate": 5.112040501186211e-06, + "loss": 0.3432, + "step": 11832 + }, + { + "epoch": 2.02, + "learning_rate": 5.1104341602923454e-06, + "loss": 0.3131, + "step": 11833 + }, + { + "epoch": 2.02, + "learning_rate": 5.108827985189535e-06, + "loss": 0.3272, + "step": 11834 + }, + { + "epoch": 2.02, + "learning_rate": 5.1072219759322264e-06, + "loss": 0.3197, + "step": 11835 + }, + { + "epoch": 2.02, + "learning_rate": 5.105616132574883e-06, + "loss": 0.3285, + "step": 11836 + }, + { + "epoch": 2.02, + "learning_rate": 5.104010455171953e-06, + "loss": 0.3014, + "step": 11837 + }, + { + "epoch": 2.02, + "learning_rate": 5.10240494377788e-06, + "loss": 0.301, + "step": 11838 + }, + { + "epoch": 2.02, + "learning_rate": 5.100799598447101e-06, + "loss": 0.3039, + "step": 11839 + }, + { + "epoch": 2.02, + "learning_rate": 5.0991944192340526e-06, + "loss": 0.3004, + "step": 11840 + }, + { + "epoch": 2.02, + "learning_rate": 5.097589406193162e-06, + "loss": 0.2986, + "step": 11841 + }, + { + "epoch": 2.02, + "learning_rate": 5.0959845593788435e-06, + "loss": 0.3198, + "step": 11842 + }, + { + "epoch": 2.02, + "learning_rate": 5.094379878845519e-06, + "loss": 0.3253, + "step": 11843 + }, + { + "epoch": 2.02, + "learning_rate": 5.092775364647595e-06, + "loss": 0.3626, + "step": 11844 + }, + { + "epoch": 2.02, + "learning_rate": 5.091171016839478e-06, + "loss": 0.3322, + "step": 11845 + }, + { + "epoch": 2.02, + "learning_rate": 5.0895668354755705e-06, + "loss": 0.2951, + "step": 11846 + }, + { + "epoch": 2.02, + "learning_rate": 5.087962820610257e-06, + "loss": 0.3147, + "step": 11847 + }, + { + "epoch": 2.02, + "learning_rate": 5.086358972297931e-06, + "loss": 0.2901, + "step": 11848 + }, + { + "epoch": 2.02, + "learning_rate": 5.084755290592975e-06, + "loss": 0.3086, + "step": 11849 + }, + { + "epoch": 2.02, + "learning_rate": 5.083151775549762e-06, + "loss": 0.325, + "step": 11850 + }, + { + "epoch": 2.02, + "learning_rate": 5.081548427222661e-06, + "loss": 0.3213, + "step": 11851 + }, + { + "epoch": 2.02, + "learning_rate": 5.079945245666041e-06, + "loss": 0.3275, + "step": 11852 + }, + { + "epoch": 2.02, + "learning_rate": 5.078342230934259e-06, + "loss": 0.3162, + "step": 11853 + }, + { + "epoch": 2.02, + "learning_rate": 5.07673938308167e-06, + "loss": 0.3168, + "step": 11854 + }, + { + "epoch": 2.02, + "learning_rate": 5.075136702162622e-06, + "loss": 0.3096, + "step": 11855 + }, + { + "epoch": 2.02, + "learning_rate": 5.073534188231462e-06, + "loss": 0.2982, + "step": 11856 + }, + { + "epoch": 2.02, + "learning_rate": 5.071931841342514e-06, + "loss": 0.3189, + "step": 11857 + }, + { + "epoch": 2.02, + "learning_rate": 5.070329661550119e-06, + "loss": 0.315, + "step": 11858 + }, + { + "epoch": 2.02, + "learning_rate": 5.068727648908598e-06, + "loss": 0.3146, + "step": 11859 + }, + { + "epoch": 2.02, + "learning_rate": 5.067125803472274e-06, + "loss": 0.3215, + "step": 11860 + }, + { + "epoch": 2.02, + "learning_rate": 5.065524125295458e-06, + "loss": 0.311, + "step": 11861 + }, + { + "epoch": 2.02, + "learning_rate": 5.063922614432464e-06, + "loss": 0.3218, + "step": 11862 + }, + { + "epoch": 2.02, + "learning_rate": 5.062321270937584e-06, + "loss": 0.3258, + "step": 11863 + }, + { + "epoch": 2.02, + "learning_rate": 5.060720094865127e-06, + "loss": 0.3113, + "step": 11864 + }, + { + "epoch": 2.02, + "learning_rate": 5.059119086269374e-06, + "loss": 0.2955, + "step": 11865 + }, + { + "epoch": 2.02, + "learning_rate": 5.057518245204615e-06, + "loss": 0.3289, + "step": 11866 + }, + { + "epoch": 2.02, + "learning_rate": 5.0559175717251284e-06, + "loss": 0.2942, + "step": 11867 + }, + { + "epoch": 2.02, + "learning_rate": 5.0543170658851905e-06, + "loss": 0.3395, + "step": 11868 + }, + { + "epoch": 2.02, + "learning_rate": 5.05271672773907e-06, + "loss": 0.3438, + "step": 11869 + }, + { + "epoch": 2.02, + "learning_rate": 5.051116557341027e-06, + "loss": 0.3057, + "step": 11870 + }, + { + "epoch": 2.02, + "learning_rate": 5.0495165547453265e-06, + "loss": 0.3467, + "step": 11871 + }, + { + "epoch": 2.02, + "learning_rate": 5.047916720006207e-06, + "loss": 0.3096, + "step": 11872 + }, + { + "epoch": 2.02, + "learning_rate": 5.0463170531779246e-06, + "loss": 0.3328, + "step": 11873 + }, + { + "epoch": 2.03, + "learning_rate": 5.0447175543147135e-06, + "loss": 0.3267, + "step": 11874 + }, + { + "epoch": 2.03, + "learning_rate": 5.043118223470811e-06, + "loss": 0.2949, + "step": 11875 + }, + { + "epoch": 2.03, + "learning_rate": 5.041519060700446e-06, + "loss": 0.3146, + "step": 11876 + }, + { + "epoch": 2.03, + "learning_rate": 5.039920066057844e-06, + "loss": 0.3181, + "step": 11877 + }, + { + "epoch": 2.03, + "learning_rate": 5.038321239597217e-06, + "loss": 0.3188, + "step": 11878 + }, + { + "epoch": 2.03, + "learning_rate": 5.0367225813727776e-06, + "loss": 0.3072, + "step": 11879 + }, + { + "epoch": 2.03, + "learning_rate": 5.035124091438735e-06, + "loss": 0.3272, + "step": 11880 + }, + { + "epoch": 2.03, + "learning_rate": 5.033525769849284e-06, + "loss": 0.3139, + "step": 11881 + }, + { + "epoch": 2.03, + "learning_rate": 5.031927616658623e-06, + "loss": 0.308, + "step": 11882 + }, + { + "epoch": 2.03, + "learning_rate": 5.03032963192094e-06, + "loss": 0.3185, + "step": 11883 + }, + { + "epoch": 2.03, + "learning_rate": 5.028731815690417e-06, + "loss": 0.344, + "step": 11884 + }, + { + "epoch": 2.03, + "learning_rate": 5.027134168021233e-06, + "loss": 0.2996, + "step": 11885 + }, + { + "epoch": 2.03, + "learning_rate": 5.025536688967564e-06, + "loss": 0.3208, + "step": 11886 + }, + { + "epoch": 2.03, + "learning_rate": 5.023939378583566e-06, + "loss": 0.336, + "step": 11887 + }, + { + "epoch": 2.03, + "learning_rate": 5.022342236923403e-06, + "loss": 0.3158, + "step": 11888 + }, + { + "epoch": 2.03, + "learning_rate": 5.020745264041231e-06, + "loss": 0.3269, + "step": 11889 + }, + { + "epoch": 2.03, + "learning_rate": 5.0191484599911984e-06, + "loss": 0.3318, + "step": 11890 + }, + { + "epoch": 2.03, + "learning_rate": 5.017551824827449e-06, + "loss": 0.311, + "step": 11891 + }, + { + "epoch": 2.03, + "learning_rate": 5.015955358604119e-06, + "loss": 0.3009, + "step": 11892 + }, + { + "epoch": 2.03, + "learning_rate": 5.014359061375343e-06, + "loss": 0.2921, + "step": 11893 + }, + { + "epoch": 2.03, + "learning_rate": 5.012762933195241e-06, + "loss": 0.3279, + "step": 11894 + }, + { + "epoch": 2.03, + "learning_rate": 5.011166974117938e-06, + "loss": 0.3165, + "step": 11895 + }, + { + "epoch": 2.03, + "learning_rate": 5.009571184197545e-06, + "loss": 0.3196, + "step": 11896 + }, + { + "epoch": 2.03, + "learning_rate": 5.00797556348817e-06, + "loss": 0.3102, + "step": 11897 + }, + { + "epoch": 2.03, + "learning_rate": 5.0063801120439186e-06, + "loss": 0.3512, + "step": 11898 + }, + { + "epoch": 2.03, + "learning_rate": 5.0047848299188874e-06, + "loss": 0.3022, + "step": 11899 + }, + { + "epoch": 2.03, + "learning_rate": 5.0031897171671675e-06, + "loss": 0.3279, + "step": 11900 + }, + { + "epoch": 2.03, + "learning_rate": 5.001594773842849e-06, + "loss": 0.2959, + "step": 11901 + }, + { + "epoch": 2.03, + "learning_rate": 5.000000000000003e-06, + "loss": 0.2968, + "step": 11902 + }, + { + "epoch": 2.03, + "learning_rate": 4.998405395692708e-06, + "loss": 0.328, + "step": 11903 + }, + { + "epoch": 2.03, + "learning_rate": 4.9968109609750324e-06, + "loss": 0.3064, + "step": 11904 + }, + { + "epoch": 2.03, + "learning_rate": 4.99521669590104e-06, + "loss": 0.3164, + "step": 11905 + }, + { + "epoch": 2.03, + "learning_rate": 4.993622600524786e-06, + "loss": 0.3268, + "step": 11906 + }, + { + "epoch": 2.03, + "learning_rate": 4.9920286749003235e-06, + "loss": 0.3048, + "step": 11907 + }, + { + "epoch": 2.03, + "learning_rate": 4.990434919081696e-06, + "loss": 0.3224, + "step": 11908 + }, + { + "epoch": 2.03, + "learning_rate": 4.988841333122947e-06, + "loss": 0.3332, + "step": 11909 + }, + { + "epoch": 2.03, + "learning_rate": 4.9872479170781044e-06, + "loss": 0.3408, + "step": 11910 + }, + { + "epoch": 2.03, + "learning_rate": 4.985654671001204e-06, + "loss": 0.2974, + "step": 11911 + }, + { + "epoch": 2.03, + "learning_rate": 4.9840615949462574e-06, + "loss": 0.3164, + "step": 11912 + }, + { + "epoch": 2.03, + "learning_rate": 4.982468688967289e-06, + "loss": 0.3303, + "step": 11913 + }, + { + "epoch": 2.03, + "learning_rate": 4.980875953118307e-06, + "loss": 0.3065, + "step": 11914 + }, + { + "epoch": 2.03, + "learning_rate": 4.9792833874533175e-06, + "loss": 0.3228, + "step": 11915 + }, + { + "epoch": 2.03, + "learning_rate": 4.977690992026324e-06, + "loss": 0.3145, + "step": 11916 + }, + { + "epoch": 2.03, + "learning_rate": 4.97609876689131e-06, + "loss": 0.306, + "step": 11917 + }, + { + "epoch": 2.03, + "learning_rate": 4.974506712102272e-06, + "loss": 0.3289, + "step": 11918 + }, + { + "epoch": 2.03, + "learning_rate": 4.9729148277131865e-06, + "loss": 0.3235, + "step": 11919 + }, + { + "epoch": 2.03, + "learning_rate": 4.971323113778032e-06, + "loss": 0.3172, + "step": 11920 + }, + { + "epoch": 2.03, + "learning_rate": 4.969731570350781e-06, + "loss": 0.3212, + "step": 11921 + }, + { + "epoch": 2.03, + "learning_rate": 4.968140197485395e-06, + "loss": 0.3028, + "step": 11922 + }, + { + "epoch": 2.03, + "learning_rate": 4.966548995235835e-06, + "loss": 0.3247, + "step": 11923 + }, + { + "epoch": 2.03, + "learning_rate": 4.964957963656055e-06, + "loss": 0.3498, + "step": 11924 + }, + { + "epoch": 2.03, + "learning_rate": 4.963367102799997e-06, + "loss": 0.3119, + "step": 11925 + }, + { + "epoch": 2.03, + "learning_rate": 4.961776412721607e-06, + "loss": 0.3191, + "step": 11926 + }, + { + "epoch": 2.03, + "learning_rate": 4.960185893474823e-06, + "loss": 0.325, + "step": 11927 + }, + { + "epoch": 2.03, + "learning_rate": 4.958595545113567e-06, + "loss": 0.3384, + "step": 11928 + }, + { + "epoch": 2.03, + "learning_rate": 4.957005367691768e-06, + "loss": 0.3124, + "step": 11929 + }, + { + "epoch": 2.03, + "learning_rate": 4.955415361263345e-06, + "loss": 0.3133, + "step": 11930 + }, + { + "epoch": 2.03, + "learning_rate": 4.953825525882212e-06, + "loss": 0.3074, + "step": 11931 + }, + { + "epoch": 2.03, + "learning_rate": 4.952235861602269e-06, + "loss": 0.3284, + "step": 11932 + }, + { + "epoch": 2.04, + "learning_rate": 4.950646368477422e-06, + "loss": 0.3578, + "step": 11933 + }, + { + "epoch": 2.04, + "learning_rate": 4.949057046561565e-06, + "loss": 0.312, + "step": 11934 + }, + { + "epoch": 2.04, + "learning_rate": 4.9474678959085874e-06, + "loss": 0.3322, + "step": 11935 + }, + { + "epoch": 2.04, + "learning_rate": 4.945878916572373e-06, + "loss": 0.3405, + "step": 11936 + }, + { + "epoch": 2.04, + "learning_rate": 4.944290108606799e-06, + "loss": 0.3206, + "step": 11937 + }, + { + "epoch": 2.04, + "learning_rate": 4.942701472065738e-06, + "loss": 0.3284, + "step": 11938 + }, + { + "epoch": 2.04, + "learning_rate": 4.941113007003059e-06, + "loss": 0.3191, + "step": 11939 + }, + { + "epoch": 2.04, + "learning_rate": 4.939524713472616e-06, + "loss": 0.3268, + "step": 11940 + }, + { + "epoch": 2.04, + "learning_rate": 4.937936591528265e-06, + "loss": 0.2966, + "step": 11941 + }, + { + "epoch": 2.04, + "learning_rate": 4.93634864122386e-06, + "loss": 0.3107, + "step": 11942 + }, + { + "epoch": 2.04, + "learning_rate": 4.934760862613237e-06, + "loss": 0.3212, + "step": 11943 + }, + { + "epoch": 2.04, + "learning_rate": 4.933173255750235e-06, + "loss": 0.3289, + "step": 11944 + }, + { + "epoch": 2.04, + "learning_rate": 4.931585820688687e-06, + "loss": 0.3074, + "step": 11945 + }, + { + "epoch": 2.04, + "learning_rate": 4.929998557482421e-06, + "loss": 0.3069, + "step": 11946 + }, + { + "epoch": 2.04, + "learning_rate": 4.928411466185248e-06, + "loss": 0.3092, + "step": 11947 + }, + { + "epoch": 2.04, + "learning_rate": 4.926824546850987e-06, + "loss": 0.3201, + "step": 11948 + }, + { + "epoch": 2.04, + "learning_rate": 4.925237799533445e-06, + "loss": 0.3006, + "step": 11949 + }, + { + "epoch": 2.04, + "learning_rate": 4.923651224286424e-06, + "loss": 0.3292, + "step": 11950 + }, + { + "epoch": 2.04, + "learning_rate": 4.922064821163721e-06, + "loss": 0.3369, + "step": 11951 + }, + { + "epoch": 2.04, + "learning_rate": 4.920478590219127e-06, + "loss": 0.3422, + "step": 11952 + }, + { + "epoch": 2.04, + "learning_rate": 4.918892531506424e-06, + "loss": 0.3352, + "step": 11953 + }, + { + "epoch": 2.04, + "learning_rate": 4.917306645079396e-06, + "loss": 0.3242, + "step": 11954 + }, + { + "epoch": 2.04, + "learning_rate": 4.915720930991808e-06, + "loss": 0.3482, + "step": 11955 + }, + { + "epoch": 2.04, + "learning_rate": 4.914135389297432e-06, + "loss": 0.3298, + "step": 11956 + }, + { + "epoch": 2.04, + "learning_rate": 4.912550020050027e-06, + "loss": 0.3181, + "step": 11957 + }, + { + "epoch": 2.04, + "learning_rate": 4.910964823303354e-06, + "loss": 0.3314, + "step": 11958 + }, + { + "epoch": 2.04, + "learning_rate": 4.909379799111153e-06, + "loss": 0.3243, + "step": 11959 + }, + { + "epoch": 2.04, + "learning_rate": 4.907794947527172e-06, + "loss": 0.3229, + "step": 11960 + }, + { + "epoch": 2.04, + "learning_rate": 4.9062102686051535e-06, + "loss": 0.3045, + "step": 11961 + }, + { + "epoch": 2.04, + "learning_rate": 4.904625762398822e-06, + "loss": 0.3471, + "step": 11962 + }, + { + "epoch": 2.04, + "learning_rate": 4.903041428961905e-06, + "loss": 0.3193, + "step": 11963 + }, + { + "epoch": 2.04, + "learning_rate": 4.901457268348125e-06, + "loss": 0.3169, + "step": 11964 + }, + { + "epoch": 2.04, + "learning_rate": 4.899873280611195e-06, + "loss": 0.3207, + "step": 11965 + }, + { + "epoch": 2.04, + "learning_rate": 4.898289465804824e-06, + "loss": 0.3172, + "step": 11966 + }, + { + "epoch": 2.04, + "learning_rate": 4.896705823982715e-06, + "loss": 0.32, + "step": 11967 + }, + { + "epoch": 2.04, + "learning_rate": 4.895122355198563e-06, + "loss": 0.3093, + "step": 11968 + }, + { + "epoch": 2.04, + "learning_rate": 4.893539059506065e-06, + "loss": 0.3079, + "step": 11969 + }, + { + "epoch": 2.04, + "learning_rate": 4.8919559369588965e-06, + "loss": 0.3082, + "step": 11970 + }, + { + "epoch": 2.04, + "learning_rate": 4.8903729876107406e-06, + "loss": 0.3168, + "step": 11971 + }, + { + "epoch": 2.04, + "learning_rate": 4.888790211515273e-06, + "loss": 0.348, + "step": 11972 + }, + { + "epoch": 2.04, + "learning_rate": 4.887207608726158e-06, + "loss": 0.3178, + "step": 11973 + }, + { + "epoch": 2.04, + "learning_rate": 4.885625179297062e-06, + "loss": 0.3096, + "step": 11974 + }, + { + "epoch": 2.04, + "learning_rate": 4.8840429232816335e-06, + "loss": 0.2971, + "step": 11975 + }, + { + "epoch": 2.04, + "learning_rate": 4.8824608407335285e-06, + "loss": 0.3014, + "step": 11976 + }, + { + "epoch": 2.04, + "learning_rate": 4.880878931706384e-06, + "loss": 0.3238, + "step": 11977 + }, + { + "epoch": 2.04, + "learning_rate": 4.879297196253843e-06, + "loss": 0.3327, + "step": 11978 + }, + { + "epoch": 2.04, + "learning_rate": 4.877715634429535e-06, + "loss": 0.318, + "step": 11979 + }, + { + "epoch": 2.04, + "learning_rate": 4.876134246287089e-06, + "loss": 0.3143, + "step": 11980 + }, + { + "epoch": 2.04, + "learning_rate": 4.874553031880123e-06, + "loss": 0.3356, + "step": 11981 + }, + { + "epoch": 2.04, + "learning_rate": 4.872971991262253e-06, + "loss": 0.299, + "step": 11982 + }, + { + "epoch": 2.04, + "learning_rate": 4.871391124487087e-06, + "loss": 0.3186, + "step": 11983 + }, + { + "epoch": 2.04, + "learning_rate": 4.86981043160823e-06, + "loss": 0.3048, + "step": 11984 + }, + { + "epoch": 2.04, + "learning_rate": 4.868229912679274e-06, + "loss": 0.3229, + "step": 11985 + }, + { + "epoch": 2.04, + "learning_rate": 4.866649567753812e-06, + "loss": 0.3259, + "step": 11986 + }, + { + "epoch": 2.04, + "learning_rate": 4.865069396885428e-06, + "loss": 0.2976, + "step": 11987 + }, + { + "epoch": 2.04, + "learning_rate": 4.8634894001277035e-06, + "loss": 0.3199, + "step": 11988 + }, + { + "epoch": 2.04, + "learning_rate": 4.861909577534212e-06, + "loss": 0.3307, + "step": 11989 + }, + { + "epoch": 2.04, + "learning_rate": 4.860329929158516e-06, + "loss": 0.3077, + "step": 11990 + }, + { + "epoch": 2.05, + "learning_rate": 4.85875045505418e-06, + "loss": 0.311, + "step": 11991 + }, + { + "epoch": 2.05, + "learning_rate": 4.857171155274763e-06, + "loss": 0.3082, + "step": 11992 + }, + { + "epoch": 2.05, + "learning_rate": 4.8555920298738055e-06, + "loss": 0.3172, + "step": 11993 + }, + { + "epoch": 2.05, + "learning_rate": 4.854013078904856e-06, + "loss": 0.3266, + "step": 11994 + }, + { + "epoch": 2.05, + "learning_rate": 4.852434302421455e-06, + "loss": 0.2837, + "step": 11995 + }, + { + "epoch": 2.05, + "learning_rate": 4.85085570047713e-06, + "loss": 0.3313, + "step": 11996 + }, + { + "epoch": 2.05, + "learning_rate": 4.849277273125408e-06, + "loss": 0.3213, + "step": 11997 + }, + { + "epoch": 2.05, + "learning_rate": 4.8476990204198095e-06, + "loss": 0.3253, + "step": 11998 + }, + { + "epoch": 2.05, + "learning_rate": 4.846120942413851e-06, + "loss": 0.2983, + "step": 11999 + }, + { + "epoch": 2.05, + "learning_rate": 4.844543039161036e-06, + "loss": 0.3574, + "step": 12000 + }, + { + "epoch": 2.05, + "learning_rate": 4.842965310714867e-06, + "loss": 0.3253, + "step": 12001 + }, + { + "epoch": 2.05, + "learning_rate": 4.8413877571288425e-06, + "loss": 0.3068, + "step": 12002 + }, + { + "epoch": 2.05, + "learning_rate": 4.8398103784564525e-06, + "loss": 0.3176, + "step": 12003 + }, + { + "epoch": 2.05, + "learning_rate": 4.838233174751179e-06, + "loss": 0.3013, + "step": 12004 + }, + { + "epoch": 2.05, + "learning_rate": 4.836656146066507e-06, + "loss": 0.3037, + "step": 12005 + }, + { + "epoch": 2.05, + "learning_rate": 4.835079292455901e-06, + "loss": 0.3279, + "step": 12006 + }, + { + "epoch": 2.05, + "learning_rate": 4.833502613972834e-06, + "loss": 0.3122, + "step": 12007 + }, + { + "epoch": 2.05, + "learning_rate": 4.831926110670761e-06, + "loss": 0.3246, + "step": 12008 + }, + { + "epoch": 2.05, + "learning_rate": 4.830349782603136e-06, + "loss": 0.3186, + "step": 12009 + }, + { + "epoch": 2.05, + "learning_rate": 4.828773629823414e-06, + "loss": 0.3179, + "step": 12010 + }, + { + "epoch": 2.05, + "learning_rate": 4.827197652385033e-06, + "loss": 0.3158, + "step": 12011 + }, + { + "epoch": 2.05, + "learning_rate": 4.825621850341431e-06, + "loss": 0.3066, + "step": 12012 + }, + { + "epoch": 2.05, + "learning_rate": 4.824046223746041e-06, + "loss": 0.3158, + "step": 12013 + }, + { + "epoch": 2.05, + "learning_rate": 4.822470772652289e-06, + "loss": 0.3105, + "step": 12014 + }, + { + "epoch": 2.05, + "learning_rate": 4.820895497113587e-06, + "loss": 0.3176, + "step": 12015 + }, + { + "epoch": 2.05, + "learning_rate": 4.819320397183352e-06, + "loss": 0.3268, + "step": 12016 + }, + { + "epoch": 2.05, + "learning_rate": 4.817745472914993e-06, + "loss": 0.2974, + "step": 12017 + }, + { + "epoch": 2.05, + "learning_rate": 4.816170724361909e-06, + "loss": 0.316, + "step": 12018 + }, + { + "epoch": 2.05, + "learning_rate": 4.814596151577495e-06, + "loss": 0.3046, + "step": 12019 + }, + { + "epoch": 2.05, + "learning_rate": 4.813021754615142e-06, + "loss": 0.335, + "step": 12020 + }, + { + "epoch": 2.05, + "learning_rate": 4.811447533528235e-06, + "loss": 0.3226, + "step": 12021 + }, + { + "epoch": 2.05, + "learning_rate": 4.809873488370145e-06, + "loss": 0.2921, + "step": 12022 + }, + { + "epoch": 2.05, + "learning_rate": 4.808299619194251e-06, + "loss": 0.3187, + "step": 12023 + }, + { + "epoch": 2.05, + "learning_rate": 4.806725926053909e-06, + "loss": 0.3042, + "step": 12024 + }, + { + "epoch": 2.05, + "learning_rate": 4.805152409002485e-06, + "loss": 0.3036, + "step": 12025 + }, + { + "epoch": 2.05, + "learning_rate": 4.80357906809333e-06, + "loss": 0.301, + "step": 12026 + }, + { + "epoch": 2.05, + "learning_rate": 4.8020059033797925e-06, + "loss": 0.3314, + "step": 12027 + }, + { + "epoch": 2.05, + "learning_rate": 4.8004329149152144e-06, + "loss": 0.3248, + "step": 12028 + }, + { + "epoch": 2.05, + "learning_rate": 4.798860102752934e-06, + "loss": 0.3169, + "step": 12029 + }, + { + "epoch": 2.05, + "learning_rate": 4.797287466946273e-06, + "loss": 0.2914, + "step": 12030 + }, + { + "epoch": 2.05, + "learning_rate": 4.79571500754856e-06, + "loss": 0.3054, + "step": 12031 + }, + { + "epoch": 2.05, + "learning_rate": 4.794142724613112e-06, + "loss": 0.3151, + "step": 12032 + }, + { + "epoch": 2.05, + "learning_rate": 4.792570618193241e-06, + "loss": 0.3232, + "step": 12033 + }, + { + "epoch": 2.05, + "learning_rate": 4.790998688342252e-06, + "loss": 0.3293, + "step": 12034 + }, + { + "epoch": 2.05, + "learning_rate": 4.789426935113443e-06, + "loss": 0.3241, + "step": 12035 + }, + { + "epoch": 2.05, + "learning_rate": 4.787855358560115e-06, + "loss": 0.3069, + "step": 12036 + }, + { + "epoch": 2.05, + "learning_rate": 4.7862839587355455e-06, + "loss": 0.3082, + "step": 12037 + }, + { + "epoch": 2.05, + "learning_rate": 4.7847127356930214e-06, + "loss": 0.3474, + "step": 12038 + }, + { + "epoch": 2.05, + "learning_rate": 4.783141689485821e-06, + "loss": 0.3149, + "step": 12039 + }, + { + "epoch": 2.05, + "learning_rate": 4.781570820167205e-06, + "loss": 0.3027, + "step": 12040 + }, + { + "epoch": 2.05, + "learning_rate": 4.780000127790444e-06, + "loss": 0.3244, + "step": 12041 + }, + { + "epoch": 2.05, + "learning_rate": 4.7784296124087935e-06, + "loss": 0.3342, + "step": 12042 + }, + { + "epoch": 2.05, + "learning_rate": 4.776859274075506e-06, + "loss": 0.3225, + "step": 12043 + }, + { + "epoch": 2.05, + "learning_rate": 4.7752891128438305e-06, + "loss": 0.3319, + "step": 12044 + }, + { + "epoch": 2.05, + "learning_rate": 4.773719128766999e-06, + "loss": 0.3344, + "step": 12045 + }, + { + "epoch": 2.05, + "learning_rate": 4.77214932189825e-06, + "loss": 0.3358, + "step": 12046 + }, + { + "epoch": 2.05, + "learning_rate": 4.77057969229081e-06, + "loss": 0.3387, + "step": 12047 + }, + { + "epoch": 2.05, + "learning_rate": 4.7690102399979e-06, + "loss": 0.3231, + "step": 12048 + }, + { + "epoch": 2.05, + "learning_rate": 4.767440965072737e-06, + "loss": 0.3325, + "step": 12049 + }, + { + "epoch": 2.06, + "learning_rate": 4.76587186756853e-06, + "loss": 0.3316, + "step": 12050 + }, + { + "epoch": 2.06, + "learning_rate": 4.764302947538482e-06, + "loss": 0.3356, + "step": 12051 + }, + { + "epoch": 2.06, + "learning_rate": 4.762734205035796e-06, + "loss": 0.3324, + "step": 12052 + }, + { + "epoch": 2.06, + "learning_rate": 4.761165640113654e-06, + "loss": 0.3029, + "step": 12053 + }, + { + "epoch": 2.06, + "learning_rate": 4.75959725282525e-06, + "loss": 0.3248, + "step": 12054 + }, + { + "epoch": 2.06, + "learning_rate": 4.758029043223756e-06, + "loss": 0.3021, + "step": 12055 + }, + { + "epoch": 2.06, + "learning_rate": 4.756461011362349e-06, + "loss": 0.2953, + "step": 12056 + }, + { + "epoch": 2.06, + "learning_rate": 4.754893157294197e-06, + "loss": 0.3158, + "step": 12057 + }, + { + "epoch": 2.06, + "learning_rate": 4.753325481072461e-06, + "loss": 0.3067, + "step": 12058 + }, + { + "epoch": 2.06, + "learning_rate": 4.7517579827502995e-06, + "loss": 0.3308, + "step": 12059 + }, + { + "epoch": 2.06, + "learning_rate": 4.750190662380855e-06, + "loss": 0.3396, + "step": 12060 + }, + { + "epoch": 2.06, + "learning_rate": 4.748623520017274e-06, + "loss": 0.3096, + "step": 12061 + }, + { + "epoch": 2.06, + "learning_rate": 4.747056555712694e-06, + "loss": 0.3065, + "step": 12062 + }, + { + "epoch": 2.06, + "learning_rate": 4.745489769520247e-06, + "loss": 0.3271, + "step": 12063 + }, + { + "epoch": 2.06, + "learning_rate": 4.743923161493058e-06, + "loss": 0.3194, + "step": 12064 + }, + { + "epoch": 2.06, + "learning_rate": 4.7423567316842455e-06, + "loss": 0.2972, + "step": 12065 + }, + { + "epoch": 2.06, + "learning_rate": 4.740790480146923e-06, + "loss": 0.3275, + "step": 12066 + }, + { + "epoch": 2.06, + "learning_rate": 4.739224406934201e-06, + "loss": 0.2996, + "step": 12067 + }, + { + "epoch": 2.06, + "learning_rate": 4.737658512099174e-06, + "loss": 0.3132, + "step": 12068 + }, + { + "epoch": 2.06, + "learning_rate": 4.7360927956949385e-06, + "loss": 0.3181, + "step": 12069 + }, + { + "epoch": 2.06, + "learning_rate": 4.734527257774589e-06, + "loss": 0.3259, + "step": 12070 + }, + { + "epoch": 2.06, + "learning_rate": 4.7329618983912e-06, + "loss": 0.3094, + "step": 12071 + }, + { + "epoch": 2.06, + "learning_rate": 4.731396717597854e-06, + "loss": 0.3063, + "step": 12072 + }, + { + "epoch": 2.06, + "learning_rate": 4.72983171544762e-06, + "loss": 0.32, + "step": 12073 + }, + { + "epoch": 2.06, + "learning_rate": 4.728266891993566e-06, + "loss": 0.3045, + "step": 12074 + }, + { + "epoch": 2.06, + "learning_rate": 4.7267022472887436e-06, + "loss": 0.3314, + "step": 12075 + }, + { + "epoch": 2.06, + "learning_rate": 4.725137781386211e-06, + "loss": 0.3105, + "step": 12076 + }, + { + "epoch": 2.06, + "learning_rate": 4.723573494339011e-06, + "loss": 0.3083, + "step": 12077 + }, + { + "epoch": 2.06, + "learning_rate": 4.722009386200188e-06, + "loss": 0.3192, + "step": 12078 + }, + { + "epoch": 2.06, + "learning_rate": 4.720445457022773e-06, + "loss": 0.2972, + "step": 12079 + }, + { + "epoch": 2.06, + "learning_rate": 4.718881706859797e-06, + "loss": 0.3412, + "step": 12080 + }, + { + "epoch": 2.06, + "learning_rate": 4.71731813576428e-06, + "loss": 0.3298, + "step": 12081 + }, + { + "epoch": 2.06, + "learning_rate": 4.715754743789245e-06, + "loss": 0.3256, + "step": 12082 + }, + { + "epoch": 2.06, + "learning_rate": 4.714191530987692e-06, + "loss": 0.3298, + "step": 12083 + }, + { + "epoch": 2.06, + "learning_rate": 4.7126284974126295e-06, + "loss": 0.3076, + "step": 12084 + }, + { + "epoch": 2.06, + "learning_rate": 4.7110656431170545e-06, + "loss": 0.3284, + "step": 12085 + }, + { + "epoch": 2.06, + "learning_rate": 4.709502968153964e-06, + "loss": 0.3098, + "step": 12086 + }, + { + "epoch": 2.06, + "learning_rate": 4.707940472576335e-06, + "loss": 0.2952, + "step": 12087 + }, + { + "epoch": 2.06, + "learning_rate": 4.706378156437152e-06, + "loss": 0.3082, + "step": 12088 + }, + { + "epoch": 2.06, + "learning_rate": 4.704816019789393e-06, + "loss": 0.3147, + "step": 12089 + }, + { + "epoch": 2.06, + "learning_rate": 4.703254062686017e-06, + "loss": 0.3081, + "step": 12090 + }, + { + "epoch": 2.06, + "learning_rate": 4.7016922851799895e-06, + "loss": 0.3132, + "step": 12091 + }, + { + "epoch": 2.06, + "learning_rate": 4.7001306873242655e-06, + "loss": 0.3345, + "step": 12092 + }, + { + "epoch": 2.06, + "learning_rate": 4.698569269171794e-06, + "loss": 0.3154, + "step": 12093 + }, + { + "epoch": 2.06, + "learning_rate": 4.6970080307755194e-06, + "loss": 0.3105, + "step": 12094 + }, + { + "epoch": 2.06, + "learning_rate": 4.695446972188378e-06, + "loss": 0.284, + "step": 12095 + }, + { + "epoch": 2.06, + "learning_rate": 4.693886093463301e-06, + "loss": 0.3293, + "step": 12096 + }, + { + "epoch": 2.06, + "learning_rate": 4.692325394653217e-06, + "loss": 0.321, + "step": 12097 + }, + { + "epoch": 2.06, + "learning_rate": 4.690764875811037e-06, + "loss": 0.3145, + "step": 12098 + }, + { + "epoch": 2.06, + "learning_rate": 4.689204536989677e-06, + "loss": 0.3255, + "step": 12099 + }, + { + "epoch": 2.06, + "learning_rate": 4.687644378242044e-06, + "loss": 0.3219, + "step": 12100 + }, + { + "epoch": 2.06, + "learning_rate": 4.6860843996210435e-06, + "loss": 0.3172, + "step": 12101 + }, + { + "epoch": 2.06, + "learning_rate": 4.684524601179561e-06, + "loss": 0.3073, + "step": 12102 + }, + { + "epoch": 2.06, + "learning_rate": 4.682964982970489e-06, + "loss": 0.3168, + "step": 12103 + }, + { + "epoch": 2.06, + "learning_rate": 4.681405545046713e-06, + "loss": 0.3273, + "step": 12104 + }, + { + "epoch": 2.06, + "learning_rate": 4.679846287461101e-06, + "loss": 0.3205, + "step": 12105 + }, + { + "epoch": 2.06, + "learning_rate": 4.678287210266529e-06, + "loss": 0.3344, + "step": 12106 + }, + { + "epoch": 2.06, + "learning_rate": 4.6767283135158585e-06, + "loss": 0.3005, + "step": 12107 + }, + { + "epoch": 2.06, + "learning_rate": 4.675169597261947e-06, + "loss": 0.3356, + "step": 12108 + }, + { + "epoch": 2.07, + "learning_rate": 4.673611061557648e-06, + "loss": 0.3127, + "step": 12109 + }, + { + "epoch": 2.07, + "learning_rate": 4.672052706455805e-06, + "loss": 0.3273, + "step": 12110 + }, + { + "epoch": 2.07, + "learning_rate": 4.6704945320092586e-06, + "loss": 0.3107, + "step": 12111 + }, + { + "epoch": 2.07, + "learning_rate": 4.668936538270845e-06, + "loss": 0.3117, + "step": 12112 + }, + { + "epoch": 2.07, + "learning_rate": 4.667378725293385e-06, + "loss": 0.2948, + "step": 12113 + }, + { + "epoch": 2.07, + "learning_rate": 4.665821093129702e-06, + "loss": 0.3008, + "step": 12114 + }, + { + "epoch": 2.07, + "learning_rate": 4.6642636418326115e-06, + "loss": 0.3311, + "step": 12115 + }, + { + "epoch": 2.07, + "learning_rate": 4.662706371454922e-06, + "loss": 0.3221, + "step": 12116 + }, + { + "epoch": 2.07, + "learning_rate": 4.661149282049439e-06, + "loss": 0.3156, + "step": 12117 + }, + { + "epoch": 2.07, + "learning_rate": 4.659592373668952e-06, + "loss": 0.3433, + "step": 12118 + }, + { + "epoch": 2.07, + "learning_rate": 4.658035646366259e-06, + "loss": 0.3268, + "step": 12119 + }, + { + "epoch": 2.07, + "learning_rate": 4.656479100194137e-06, + "loss": 0.321, + "step": 12120 + }, + { + "epoch": 2.07, + "learning_rate": 4.6549227352053675e-06, + "loss": 0.3449, + "step": 12121 + }, + { + "epoch": 2.07, + "learning_rate": 4.65336655145272e-06, + "loss": 0.3053, + "step": 12122 + }, + { + "epoch": 2.07, + "learning_rate": 4.651810548988964e-06, + "loss": 0.3036, + "step": 12123 + }, + { + "epoch": 2.07, + "learning_rate": 4.650254727866858e-06, + "loss": 0.3054, + "step": 12124 + }, + { + "epoch": 2.07, + "learning_rate": 4.648699088139152e-06, + "loss": 0.3162, + "step": 12125 + }, + { + "epoch": 2.07, + "learning_rate": 4.647143629858598e-06, + "loss": 0.3085, + "step": 12126 + }, + { + "epoch": 2.07, + "learning_rate": 4.6455883530779385e-06, + "loss": 0.3436, + "step": 12127 + }, + { + "epoch": 2.07, + "learning_rate": 4.644033257849902e-06, + "loss": 0.3283, + "step": 12128 + }, + { + "epoch": 2.07, + "learning_rate": 4.6424783442272185e-06, + "loss": 0.3067, + "step": 12129 + }, + { + "epoch": 2.07, + "learning_rate": 4.640923612262614e-06, + "loss": 0.3266, + "step": 12130 + }, + { + "epoch": 2.07, + "learning_rate": 4.639369062008801e-06, + "loss": 0.3221, + "step": 12131 + }, + { + "epoch": 2.07, + "learning_rate": 4.637814693518494e-06, + "loss": 0.3389, + "step": 12132 + }, + { + "epoch": 2.07, + "learning_rate": 4.636260506844399e-06, + "loss": 0.3079, + "step": 12133 + }, + { + "epoch": 2.07, + "learning_rate": 4.634706502039206e-06, + "loss": 0.3112, + "step": 12134 + }, + { + "epoch": 2.07, + "learning_rate": 4.633152679155615e-06, + "loss": 0.2991, + "step": 12135 + }, + { + "epoch": 2.07, + "learning_rate": 4.6315990382463036e-06, + "loss": 0.337, + "step": 12136 + }, + { + "epoch": 2.07, + "learning_rate": 4.6300455793639565e-06, + "loss": 0.3247, + "step": 12137 + }, + { + "epoch": 2.07, + "learning_rate": 4.628492302561245e-06, + "loss": 0.313, + "step": 12138 + }, + { + "epoch": 2.07, + "learning_rate": 4.626939207890837e-06, + "loss": 0.314, + "step": 12139 + }, + { + "epoch": 2.07, + "learning_rate": 4.625386295405394e-06, + "loss": 0.3167, + "step": 12140 + }, + { + "epoch": 2.07, + "learning_rate": 4.62383356515757e-06, + "loss": 0.308, + "step": 12141 + }, + { + "epoch": 2.07, + "learning_rate": 4.622281017200019e-06, + "loss": 0.3, + "step": 12142 + }, + { + "epoch": 2.07, + "learning_rate": 4.620728651585373e-06, + "loss": 0.3031, + "step": 12143 + }, + { + "epoch": 2.07, + "learning_rate": 4.619176468366274e-06, + "loss": 0.3345, + "step": 12144 + }, + { + "epoch": 2.07, + "learning_rate": 4.617624467595352e-06, + "loss": 0.3238, + "step": 12145 + }, + { + "epoch": 2.07, + "learning_rate": 4.616072649325231e-06, + "loss": 0.3329, + "step": 12146 + }, + { + "epoch": 2.07, + "learning_rate": 4.6145210136085274e-06, + "loss": 0.3212, + "step": 12147 + }, + { + "epoch": 2.07, + "learning_rate": 4.6129695604978566e-06, + "loss": 0.2961, + "step": 12148 + }, + { + "epoch": 2.07, + "learning_rate": 4.611418290045818e-06, + "loss": 0.3292, + "step": 12149 + }, + { + "epoch": 2.07, + "learning_rate": 4.609867202305013e-06, + "loss": 0.3162, + "step": 12150 + }, + { + "epoch": 2.07, + "learning_rate": 4.6083162973280395e-06, + "loss": 0.3389, + "step": 12151 + }, + { + "epoch": 2.07, + "learning_rate": 4.606765575167474e-06, + "loss": 0.3393, + "step": 12152 + }, + { + "epoch": 2.07, + "learning_rate": 4.605215035875905e-06, + "loss": 0.3253, + "step": 12153 + }, + { + "epoch": 2.07, + "learning_rate": 4.603664679505904e-06, + "loss": 0.2974, + "step": 12154 + }, + { + "epoch": 2.07, + "learning_rate": 4.602114506110038e-06, + "loss": 0.3063, + "step": 12155 + }, + { + "epoch": 2.07, + "learning_rate": 4.6005645157408715e-06, + "loss": 0.3208, + "step": 12156 + }, + { + "epoch": 2.07, + "learning_rate": 4.599014708450963e-06, + "loss": 0.3285, + "step": 12157 + }, + { + "epoch": 2.07, + "learning_rate": 4.5974650842928544e-06, + "loss": 0.3167, + "step": 12158 + }, + { + "epoch": 2.07, + "learning_rate": 4.595915643319093e-06, + "loss": 0.2929, + "step": 12159 + }, + { + "epoch": 2.07, + "learning_rate": 4.594366385582215e-06, + "loss": 0.3289, + "step": 12160 + }, + { + "epoch": 2.07, + "learning_rate": 4.592817311134753e-06, + "loss": 0.3297, + "step": 12161 + }, + { + "epoch": 2.07, + "learning_rate": 4.591268420029229e-06, + "loss": 0.3087, + "step": 12162 + }, + { + "epoch": 2.07, + "learning_rate": 4.589719712318164e-06, + "loss": 0.308, + "step": 12163 + }, + { + "epoch": 2.07, + "learning_rate": 4.5881711880540725e-06, + "loss": 0.3302, + "step": 12164 + }, + { + "epoch": 2.07, + "learning_rate": 4.586622847289454e-06, + "loss": 0.3181, + "step": 12165 + }, + { + "epoch": 2.07, + "learning_rate": 4.585074690076814e-06, + "loss": 0.3189, + "step": 12166 + }, + { + "epoch": 2.08, + "learning_rate": 4.583526716468639e-06, + "loss": 0.3264, + "step": 12167 + }, + { + "epoch": 2.08, + "learning_rate": 4.581978926517423e-06, + "loss": 0.2938, + "step": 12168 + }, + { + "epoch": 2.08, + "learning_rate": 4.5804313202756435e-06, + "loss": 0.3206, + "step": 12169 + }, + { + "epoch": 2.08, + "learning_rate": 4.578883897795776e-06, + "loss": 0.3109, + "step": 12170 + }, + { + "epoch": 2.08, + "learning_rate": 4.577336659130292e-06, + "loss": 0.3171, + "step": 12171 + }, + { + "epoch": 2.08, + "learning_rate": 4.575789604331653e-06, + "loss": 0.3094, + "step": 12172 + }, + { + "epoch": 2.08, + "learning_rate": 4.574242733452311e-06, + "loss": 0.3348, + "step": 12173 + }, + { + "epoch": 2.08, + "learning_rate": 4.572696046544719e-06, + "loss": 0.297, + "step": 12174 + }, + { + "epoch": 2.08, + "learning_rate": 4.571149543661319e-06, + "loss": 0.3355, + "step": 12175 + }, + { + "epoch": 2.08, + "learning_rate": 4.569603224854552e-06, + "loss": 0.2988, + "step": 12176 + }, + { + "epoch": 2.08, + "learning_rate": 4.5680570901768464e-06, + "loss": 0.3138, + "step": 12177 + }, + { + "epoch": 2.08, + "learning_rate": 4.5665111396806275e-06, + "loss": 0.3141, + "step": 12178 + }, + { + "epoch": 2.08, + "learning_rate": 4.564965373418318e-06, + "loss": 0.3209, + "step": 12179 + }, + { + "epoch": 2.08, + "learning_rate": 4.563419791442324e-06, + "loss": 0.3323, + "step": 12180 + }, + { + "epoch": 2.08, + "learning_rate": 4.561874393805053e-06, + "loss": 0.2968, + "step": 12181 + }, + { + "epoch": 2.08, + "learning_rate": 4.560329180558911e-06, + "loss": 0.3158, + "step": 12182 + }, + { + "epoch": 2.08, + "learning_rate": 4.558784151756284e-06, + "loss": 0.3223, + "step": 12183 + }, + { + "epoch": 2.08, + "learning_rate": 4.557239307449562e-06, + "loss": 0.3125, + "step": 12184 + }, + { + "epoch": 2.08, + "learning_rate": 4.5556946476911255e-06, + "loss": 0.3131, + "step": 12185 + }, + { + "epoch": 2.08, + "learning_rate": 4.554150172533352e-06, + "loss": 0.3286, + "step": 12186 + }, + { + "epoch": 2.08, + "learning_rate": 4.552605882028612e-06, + "loss": 0.3144, + "step": 12187 + }, + { + "epoch": 2.08, + "learning_rate": 4.551061776229262e-06, + "loss": 0.3054, + "step": 12188 + }, + { + "epoch": 2.08, + "learning_rate": 4.5495178551876605e-06, + "loss": 0.3072, + "step": 12189 + }, + { + "epoch": 2.08, + "learning_rate": 4.547974118956158e-06, + "loss": 0.3172, + "step": 12190 + }, + { + "epoch": 2.08, + "learning_rate": 4.546430567587098e-06, + "loss": 0.3144, + "step": 12191 + }, + { + "epoch": 2.08, + "learning_rate": 4.544887201132818e-06, + "loss": 0.304, + "step": 12192 + }, + { + "epoch": 2.08, + "learning_rate": 4.543344019645649e-06, + "loss": 0.3148, + "step": 12193 + }, + { + "epoch": 2.08, + "learning_rate": 4.5418010231779196e-06, + "loss": 0.3154, + "step": 12194 + }, + { + "epoch": 2.08, + "learning_rate": 4.540258211781941e-06, + "loss": 0.3265, + "step": 12195 + }, + { + "epoch": 2.08, + "learning_rate": 4.5387155855100286e-06, + "loss": 0.3257, + "step": 12196 + }, + { + "epoch": 2.08, + "learning_rate": 4.537173144414489e-06, + "loss": 0.2919, + "step": 12197 + }, + { + "epoch": 2.08, + "learning_rate": 4.535630888547626e-06, + "loss": 0.3284, + "step": 12198 + }, + { + "epoch": 2.08, + "learning_rate": 4.5340888179617245e-06, + "loss": 0.3196, + "step": 12199 + }, + { + "epoch": 2.08, + "learning_rate": 4.5325469327090765e-06, + "loss": 0.314, + "step": 12200 + }, + { + "epoch": 2.08, + "learning_rate": 4.531005232841963e-06, + "loss": 0.32, + "step": 12201 + }, + { + "epoch": 2.08, + "learning_rate": 4.529463718412661e-06, + "loss": 0.3396, + "step": 12202 + }, + { + "epoch": 2.08, + "learning_rate": 4.527922389473431e-06, + "loss": 0.2833, + "step": 12203 + }, + { + "epoch": 2.08, + "learning_rate": 4.526381246076541e-06, + "loss": 0.3217, + "step": 12204 + }, + { + "epoch": 2.08, + "learning_rate": 4.524840288274245e-06, + "loss": 0.337, + "step": 12205 + }, + { + "epoch": 2.08, + "learning_rate": 4.523299516118793e-06, + "loss": 0.3214, + "step": 12206 + }, + { + "epoch": 2.08, + "learning_rate": 4.521758929662429e-06, + "loss": 0.3278, + "step": 12207 + }, + { + "epoch": 2.08, + "learning_rate": 4.520218528957388e-06, + "loss": 0.3021, + "step": 12208 + }, + { + "epoch": 2.08, + "learning_rate": 4.518678314055901e-06, + "loss": 0.3103, + "step": 12209 + }, + { + "epoch": 2.08, + "learning_rate": 4.517138285010196e-06, + "loss": 0.3401, + "step": 12210 + }, + { + "epoch": 2.08, + "learning_rate": 4.515598441872485e-06, + "loss": 0.3047, + "step": 12211 + }, + { + "epoch": 2.08, + "learning_rate": 4.514058784694981e-06, + "loss": 0.3315, + "step": 12212 + }, + { + "epoch": 2.08, + "learning_rate": 4.512519313529896e-06, + "loss": 0.3109, + "step": 12213 + }, + { + "epoch": 2.08, + "learning_rate": 4.510980028429419e-06, + "loss": 0.3304, + "step": 12214 + }, + { + "epoch": 2.08, + "learning_rate": 4.509440929445746e-06, + "loss": 0.3326, + "step": 12215 + }, + { + "epoch": 2.08, + "learning_rate": 4.507902016631067e-06, + "loss": 0.3551, + "step": 12216 + }, + { + "epoch": 2.08, + "learning_rate": 4.506363290037562e-06, + "loss": 0.311, + "step": 12217 + }, + { + "epoch": 2.08, + "learning_rate": 4.504824749717399e-06, + "loss": 0.315, + "step": 12218 + }, + { + "epoch": 2.08, + "learning_rate": 4.503286395722749e-06, + "loss": 0.3017, + "step": 12219 + }, + { + "epoch": 2.08, + "learning_rate": 4.501748228105774e-06, + "loss": 0.2984, + "step": 12220 + }, + { + "epoch": 2.08, + "learning_rate": 4.500210246918626e-06, + "loss": 0.3132, + "step": 12221 + }, + { + "epoch": 2.08, + "learning_rate": 4.498672452213458e-06, + "loss": 0.3127, + "step": 12222 + }, + { + "epoch": 2.08, + "learning_rate": 4.497134844042407e-06, + "loss": 0.307, + "step": 12223 + }, + { + "epoch": 2.08, + "learning_rate": 4.495597422457613e-06, + "loss": 0.3133, + "step": 12224 + }, + { + "epoch": 2.08, + "learning_rate": 4.494060187511207e-06, + "loss": 0.3064, + "step": 12225 + }, + { + "epoch": 2.09, + "learning_rate": 4.492523139255306e-06, + "loss": 0.3148, + "step": 12226 + }, + { + "epoch": 2.09, + "learning_rate": 4.49098627774203e-06, + "loss": 0.2929, + "step": 12227 + }, + { + "epoch": 2.09, + "learning_rate": 4.489449603023489e-06, + "loss": 0.3245, + "step": 12228 + }, + { + "epoch": 2.09, + "learning_rate": 4.487913115151792e-06, + "loss": 0.3049, + "step": 12229 + }, + { + "epoch": 2.09, + "learning_rate": 4.486376814179029e-06, + "loss": 0.328, + "step": 12230 + }, + { + "epoch": 2.09, + "learning_rate": 4.4848407001572945e-06, + "loss": 0.3186, + "step": 12231 + }, + { + "epoch": 2.09, + "learning_rate": 4.483304773138678e-06, + "loss": 0.3282, + "step": 12232 + }, + { + "epoch": 2.09, + "learning_rate": 4.481769033175252e-06, + "loss": 0.3013, + "step": 12233 + }, + { + "epoch": 2.09, + "learning_rate": 4.480233480319092e-06, + "loss": 0.3002, + "step": 12234 + }, + { + "epoch": 2.09, + "learning_rate": 4.478698114622262e-06, + "loss": 0.3098, + "step": 12235 + }, + { + "epoch": 2.09, + "learning_rate": 4.477162936136825e-06, + "loss": 0.3118, + "step": 12236 + }, + { + "epoch": 2.09, + "learning_rate": 4.475627944914833e-06, + "loss": 0.3279, + "step": 12237 + }, + { + "epoch": 2.09, + "learning_rate": 4.474093141008334e-06, + "loss": 0.339, + "step": 12238 + }, + { + "epoch": 2.09, + "learning_rate": 4.472558524469366e-06, + "loss": 0.3264, + "step": 12239 + }, + { + "epoch": 2.09, + "learning_rate": 4.471024095349971e-06, + "loss": 0.3121, + "step": 12240 + }, + { + "epoch": 2.09, + "learning_rate": 4.469489853702167e-06, + "loss": 0.3527, + "step": 12241 + }, + { + "epoch": 2.09, + "learning_rate": 4.46795579957798e-06, + "loss": 0.3179, + "step": 12242 + }, + { + "epoch": 2.09, + "learning_rate": 4.466421933029426e-06, + "loss": 0.3127, + "step": 12243 + }, + { + "epoch": 2.09, + "learning_rate": 4.464888254108517e-06, + "loss": 0.3097, + "step": 12244 + }, + { + "epoch": 2.09, + "learning_rate": 4.463354762867248e-06, + "loss": 0.3026, + "step": 12245 + }, + { + "epoch": 2.09, + "learning_rate": 4.46182145935762e-06, + "loss": 0.3315, + "step": 12246 + }, + { + "epoch": 2.09, + "learning_rate": 4.460288343631626e-06, + "loss": 0.3264, + "step": 12247 + }, + { + "epoch": 2.09, + "learning_rate": 4.458755415741241e-06, + "loss": 0.3272, + "step": 12248 + }, + { + "epoch": 2.09, + "learning_rate": 4.457222675738449e-06, + "loss": 0.3057, + "step": 12249 + }, + { + "epoch": 2.09, + "learning_rate": 4.455690123675217e-06, + "loss": 0.3242, + "step": 12250 + }, + { + "epoch": 2.09, + "learning_rate": 4.454157759603512e-06, + "loss": 0.3065, + "step": 12251 + }, + { + "epoch": 2.09, + "learning_rate": 4.452625583575291e-06, + "loss": 0.347, + "step": 12252 + }, + { + "epoch": 2.09, + "learning_rate": 4.451093595642506e-06, + "loss": 0.31, + "step": 12253 + }, + { + "epoch": 2.09, + "learning_rate": 4.449561795857102e-06, + "loss": 0.3161, + "step": 12254 + }, + { + "epoch": 2.09, + "learning_rate": 4.4480301842710216e-06, + "loss": 0.3158, + "step": 12255 + }, + { + "epoch": 2.09, + "learning_rate": 4.44649876093619e-06, + "loss": 0.3091, + "step": 12256 + }, + { + "epoch": 2.09, + "learning_rate": 4.444967525904538e-06, + "loss": 0.2848, + "step": 12257 + }, + { + "epoch": 2.09, + "learning_rate": 4.4434364792279835e-06, + "loss": 0.3268, + "step": 12258 + }, + { + "epoch": 2.09, + "learning_rate": 4.441905620958441e-06, + "loss": 0.3195, + "step": 12259 + }, + { + "epoch": 2.09, + "learning_rate": 4.440374951147821e-06, + "loss": 0.3584, + "step": 12260 + }, + { + "epoch": 2.09, + "learning_rate": 4.438844469848017e-06, + "loss": 0.3502, + "step": 12261 + }, + { + "epoch": 2.09, + "learning_rate": 4.43731417711093e-06, + "loss": 0.3364, + "step": 12262 + }, + { + "epoch": 2.09, + "learning_rate": 4.435784072988441e-06, + "loss": 0.3168, + "step": 12263 + }, + { + "epoch": 2.09, + "learning_rate": 4.434254157532435e-06, + "loss": 0.3258, + "step": 12264 + }, + { + "epoch": 2.09, + "learning_rate": 4.432724430794786e-06, + "loss": 0.3154, + "step": 12265 + }, + { + "epoch": 2.09, + "learning_rate": 4.431194892827364e-06, + "loss": 0.3179, + "step": 12266 + }, + { + "epoch": 2.09, + "learning_rate": 4.42966554368203e-06, + "loss": 0.3369, + "step": 12267 + }, + { + "epoch": 2.09, + "learning_rate": 4.428136383410642e-06, + "loss": 0.3205, + "step": 12268 + }, + { + "epoch": 2.09, + "learning_rate": 4.426607412065047e-06, + "loss": 0.3128, + "step": 12269 + }, + { + "epoch": 2.09, + "learning_rate": 4.425078629697092e-06, + "loss": 0.3214, + "step": 12270 + }, + { + "epoch": 2.09, + "learning_rate": 4.423550036358607e-06, + "loss": 0.325, + "step": 12271 + }, + { + "epoch": 2.09, + "learning_rate": 4.422021632101426e-06, + "loss": 0.3216, + "step": 12272 + }, + { + "epoch": 2.09, + "learning_rate": 4.420493416977373e-06, + "loss": 0.324, + "step": 12273 + }, + { + "epoch": 2.09, + "learning_rate": 4.418965391038264e-06, + "loss": 0.3269, + "step": 12274 + }, + { + "epoch": 2.09, + "learning_rate": 4.41743755433591e-06, + "loss": 0.3043, + "step": 12275 + }, + { + "epoch": 2.09, + "learning_rate": 4.415909906922121e-06, + "loss": 0.3286, + "step": 12276 + }, + { + "epoch": 2.09, + "learning_rate": 4.414382448848687e-06, + "loss": 0.2991, + "step": 12277 + }, + { + "epoch": 2.09, + "learning_rate": 4.412855180167406e-06, + "loss": 0.2923, + "step": 12278 + }, + { + "epoch": 2.09, + "learning_rate": 4.411328100930056e-06, + "loss": 0.303, + "step": 12279 + }, + { + "epoch": 2.09, + "learning_rate": 4.409801211188422e-06, + "loss": 0.3591, + "step": 12280 + }, + { + "epoch": 2.09, + "learning_rate": 4.4082745109942725e-06, + "loss": 0.3366, + "step": 12281 + }, + { + "epoch": 2.09, + "learning_rate": 4.406748000399377e-06, + "loss": 0.306, + "step": 12282 + }, + { + "epoch": 2.09, + "learning_rate": 4.405221679455492e-06, + "loss": 0.3196, + "step": 12283 + }, + { + "epoch": 2.09, + "learning_rate": 4.403695548214374e-06, + "loss": 0.313, + "step": 12284 + }, + { + "epoch": 2.1, + "learning_rate": 4.402169606727771e-06, + "loss": 0.3342, + "step": 12285 + }, + { + "epoch": 2.1, + "learning_rate": 4.400643855047417e-06, + "loss": 0.3061, + "step": 12286 + }, + { + "epoch": 2.1, + "learning_rate": 4.399118293225048e-06, + "loss": 0.3012, + "step": 12287 + }, + { + "epoch": 2.1, + "learning_rate": 4.3975929213123935e-06, + "loss": 0.3182, + "step": 12288 + }, + { + "epoch": 2.1, + "learning_rate": 4.396067739361172e-06, + "loss": 0.3121, + "step": 12289 + }, + { + "epoch": 2.1, + "learning_rate": 4.394542747423102e-06, + "loss": 0.3229, + "step": 12290 + }, + { + "epoch": 2.1, + "learning_rate": 4.3930179455498916e-06, + "loss": 0.3433, + "step": 12291 + }, + { + "epoch": 2.1, + "learning_rate": 4.391493333793235e-06, + "loss": 0.3272, + "step": 12292 + }, + { + "epoch": 2.1, + "learning_rate": 4.389968912204833e-06, + "loss": 0.3102, + "step": 12293 + }, + { + "epoch": 2.1, + "learning_rate": 4.388444680836377e-06, + "loss": 0.3372, + "step": 12294 + }, + { + "epoch": 2.1, + "learning_rate": 4.386920639739541e-06, + "loss": 0.3041, + "step": 12295 + }, + { + "epoch": 2.1, + "learning_rate": 4.385396788966007e-06, + "loss": 0.3276, + "step": 12296 + }, + { + "epoch": 2.1, + "learning_rate": 4.383873128567443e-06, + "loss": 0.3209, + "step": 12297 + }, + { + "epoch": 2.1, + "learning_rate": 4.3823496585955115e-06, + "loss": 0.3197, + "step": 12298 + }, + { + "epoch": 2.1, + "learning_rate": 4.38082637910187e-06, + "loss": 0.3026, + "step": 12299 + }, + { + "epoch": 2.1, + "learning_rate": 4.379303290138172e-06, + "loss": 0.3266, + "step": 12300 + }, + { + "epoch": 2.1, + "learning_rate": 4.377780391756053e-06, + "loss": 0.3223, + "step": 12301 + }, + { + "epoch": 2.1, + "learning_rate": 4.3762576840071525e-06, + "loss": 0.2988, + "step": 12302 + }, + { + "epoch": 2.1, + "learning_rate": 4.374735166943104e-06, + "loss": 0.3072, + "step": 12303 + }, + { + "epoch": 2.1, + "learning_rate": 4.373212840615529e-06, + "loss": 0.2965, + "step": 12304 + }, + { + "epoch": 2.1, + "learning_rate": 4.371690705076048e-06, + "loss": 0.305, + "step": 12305 + }, + { + "epoch": 2.1, + "learning_rate": 4.37016876037627e-06, + "loss": 0.3162, + "step": 12306 + }, + { + "epoch": 2.1, + "learning_rate": 4.368647006567805e-06, + "loss": 0.3278, + "step": 12307 + }, + { + "epoch": 2.1, + "learning_rate": 4.367125443702241e-06, + "loss": 0.3322, + "step": 12308 + }, + { + "epoch": 2.1, + "learning_rate": 4.365604071831181e-06, + "loss": 0.3141, + "step": 12309 + }, + { + "epoch": 2.1, + "learning_rate": 4.3640828910062004e-06, + "loss": 0.3126, + "step": 12310 + }, + { + "epoch": 2.1, + "learning_rate": 4.362561901278883e-06, + "loss": 0.3279, + "step": 12311 + }, + { + "epoch": 2.1, + "learning_rate": 4.3610411027008e-06, + "loss": 0.3232, + "step": 12312 + }, + { + "epoch": 2.1, + "learning_rate": 4.359520495323519e-06, + "loss": 0.3223, + "step": 12313 + }, + { + "epoch": 2.1, + "learning_rate": 4.3580000791985975e-06, + "loss": 0.3083, + "step": 12314 + }, + { + "epoch": 2.1, + "learning_rate": 4.356479854377593e-06, + "loss": 0.3264, + "step": 12315 + }, + { + "epoch": 2.1, + "learning_rate": 4.354959820912045e-06, + "loss": 0.3023, + "step": 12316 + }, + { + "epoch": 2.1, + "learning_rate": 4.353439978853496e-06, + "loss": 0.331, + "step": 12317 + }, + { + "epoch": 2.1, + "learning_rate": 4.35192032825348e-06, + "loss": 0.3358, + "step": 12318 + }, + { + "epoch": 2.1, + "learning_rate": 4.350400869163526e-06, + "loss": 0.3295, + "step": 12319 + }, + { + "epoch": 2.1, + "learning_rate": 4.348881601635151e-06, + "loss": 0.3199, + "step": 12320 + }, + { + "epoch": 2.1, + "learning_rate": 4.34736252571987e-06, + "loss": 0.3237, + "step": 12321 + }, + { + "epoch": 2.1, + "learning_rate": 4.3458436414691955e-06, + "loss": 0.3208, + "step": 12322 + }, + { + "epoch": 2.1, + "learning_rate": 4.34432494893462e-06, + "loss": 0.3452, + "step": 12323 + }, + { + "epoch": 2.1, + "learning_rate": 4.342806448167641e-06, + "loss": 0.31, + "step": 12324 + }, + { + "epoch": 2.1, + "learning_rate": 4.341288139219752e-06, + "loss": 0.293, + "step": 12325 + }, + { + "epoch": 2.1, + "learning_rate": 4.339770022142426e-06, + "loss": 0.3037, + "step": 12326 + }, + { + "epoch": 2.1, + "learning_rate": 4.338252096987142e-06, + "loss": 0.3246, + "step": 12327 + }, + { + "epoch": 2.1, + "learning_rate": 4.336734363805368e-06, + "loss": 0.3005, + "step": 12328 + }, + { + "epoch": 2.1, + "learning_rate": 4.335216822648567e-06, + "loss": 0.3231, + "step": 12329 + }, + { + "epoch": 2.1, + "learning_rate": 4.333699473568196e-06, + "loss": 0.3374, + "step": 12330 + }, + { + "epoch": 2.1, + "learning_rate": 4.3321823166156985e-06, + "loss": 0.2896, + "step": 12331 + }, + { + "epoch": 2.1, + "learning_rate": 4.33066535184252e-06, + "loss": 0.3124, + "step": 12332 + }, + { + "epoch": 2.1, + "learning_rate": 4.329148579300098e-06, + "loss": 0.3019, + "step": 12333 + }, + { + "epoch": 2.1, + "learning_rate": 4.3276319990398576e-06, + "loss": 0.3314, + "step": 12334 + }, + { + "epoch": 2.1, + "learning_rate": 4.326115611113225e-06, + "loss": 0.3253, + "step": 12335 + }, + { + "epoch": 2.1, + "learning_rate": 4.324599415571616e-06, + "loss": 0.333, + "step": 12336 + }, + { + "epoch": 2.1, + "learning_rate": 4.3230834124664455e-06, + "loss": 0.3106, + "step": 12337 + }, + { + "epoch": 2.1, + "learning_rate": 4.321567601849106e-06, + "loss": 0.3115, + "step": 12338 + }, + { + "epoch": 2.1, + "learning_rate": 4.320051983771001e-06, + "loss": 0.3259, + "step": 12339 + }, + { + "epoch": 2.1, + "learning_rate": 4.318536558283518e-06, + "loss": 0.3215, + "step": 12340 + }, + { + "epoch": 2.1, + "learning_rate": 4.317021325438048e-06, + "loss": 0.3559, + "step": 12341 + }, + { + "epoch": 2.1, + "learning_rate": 4.315506285285956e-06, + "loss": 0.3143, + "step": 12342 + }, + { + "epoch": 2.11, + "learning_rate": 4.31399143787862e-06, + "loss": 0.3404, + "step": 12343 + }, + { + "epoch": 2.11, + "learning_rate": 4.312476783267405e-06, + "loss": 0.3179, + "step": 12344 + }, + { + "epoch": 2.11, + "learning_rate": 4.3109623215036686e-06, + "loss": 0.3035, + "step": 12345 + }, + { + "epoch": 2.11, + "learning_rate": 4.309448052638757e-06, + "loss": 0.3344, + "step": 12346 + }, + { + "epoch": 2.11, + "learning_rate": 4.307933976724017e-06, + "loss": 0.3082, + "step": 12347 + }, + { + "epoch": 2.11, + "learning_rate": 4.306420093810787e-06, + "loss": 0.3302, + "step": 12348 + }, + { + "epoch": 2.11, + "learning_rate": 4.3049064039503994e-06, + "loss": 0.3256, + "step": 12349 + }, + { + "epoch": 2.11, + "learning_rate": 4.303392907194177e-06, + "loss": 0.3248, + "step": 12350 + }, + { + "epoch": 2.11, + "learning_rate": 4.301879603593441e-06, + "loss": 0.3064, + "step": 12351 + }, + { + "epoch": 2.11, + "learning_rate": 4.300366493199504e-06, + "loss": 0.3273, + "step": 12352 + }, + { + "epoch": 2.11, + "learning_rate": 4.2988535760636645e-06, + "loss": 0.3044, + "step": 12353 + }, + { + "epoch": 2.11, + "learning_rate": 4.297340852237225e-06, + "loss": 0.3132, + "step": 12354 + }, + { + "epoch": 2.11, + "learning_rate": 4.2958283217714795e-06, + "loss": 0.3279, + "step": 12355 + }, + { + "epoch": 2.11, + "learning_rate": 4.294315984717714e-06, + "loss": 0.3132, + "step": 12356 + }, + { + "epoch": 2.11, + "learning_rate": 4.292803841127202e-06, + "loss": 0.3003, + "step": 12357 + }, + { + "epoch": 2.11, + "learning_rate": 4.29129189105122e-06, + "loss": 0.336, + "step": 12358 + }, + { + "epoch": 2.11, + "learning_rate": 4.289780134541033e-06, + "loss": 0.3159, + "step": 12359 + }, + { + "epoch": 2.11, + "learning_rate": 4.288268571647904e-06, + "loss": 0.3227, + "step": 12360 + }, + { + "epoch": 2.11, + "learning_rate": 4.286757202423078e-06, + "loss": 0.3189, + "step": 12361 + }, + { + "epoch": 2.11, + "learning_rate": 4.285246026917805e-06, + "loss": 0.3154, + "step": 12362 + }, + { + "epoch": 2.11, + "learning_rate": 4.283735045183326e-06, + "loss": 0.3029, + "step": 12363 + }, + { + "epoch": 2.11, + "learning_rate": 4.282224257270873e-06, + "loss": 0.3309, + "step": 12364 + }, + { + "epoch": 2.11, + "learning_rate": 4.2807136632316725e-06, + "loss": 0.3245, + "step": 12365 + }, + { + "epoch": 2.11, + "learning_rate": 4.279203263116943e-06, + "loss": 0.3009, + "step": 12366 + }, + { + "epoch": 2.11, + "learning_rate": 4.277693056977904e-06, + "loss": 0.3256, + "step": 12367 + }, + { + "epoch": 2.11, + "learning_rate": 4.276183044865754e-06, + "loss": 0.3, + "step": 12368 + }, + { + "epoch": 2.11, + "learning_rate": 4.274673226831695e-06, + "loss": 0.3368, + "step": 12369 + }, + { + "epoch": 2.11, + "learning_rate": 4.273163602926923e-06, + "loss": 0.3043, + "step": 12370 + }, + { + "epoch": 2.11, + "learning_rate": 4.271654173202624e-06, + "loss": 0.3236, + "step": 12371 + }, + { + "epoch": 2.11, + "learning_rate": 4.270144937709981e-06, + "loss": 0.3314, + "step": 12372 + }, + { + "epoch": 2.11, + "learning_rate": 4.268635896500163e-06, + "loss": 0.3335, + "step": 12373 + }, + { + "epoch": 2.11, + "learning_rate": 4.267127049624339e-06, + "loss": 0.3161, + "step": 12374 + }, + { + "epoch": 2.11, + "learning_rate": 4.265618397133674e-06, + "loss": 0.312, + "step": 12375 + }, + { + "epoch": 2.11, + "learning_rate": 4.264109939079314e-06, + "loss": 0.331, + "step": 12376 + }, + { + "epoch": 2.11, + "learning_rate": 4.26260167551241e-06, + "loss": 0.317, + "step": 12377 + }, + { + "epoch": 2.11, + "learning_rate": 4.261093606484104e-06, + "loss": 0.3273, + "step": 12378 + }, + { + "epoch": 2.11, + "learning_rate": 4.25958573204553e-06, + "loss": 0.3037, + "step": 12379 + }, + { + "epoch": 2.11, + "learning_rate": 4.258078052247816e-06, + "loss": 0.3022, + "step": 12380 + }, + { + "epoch": 2.11, + "learning_rate": 4.256570567142082e-06, + "loss": 0.3084, + "step": 12381 + }, + { + "epoch": 2.11, + "learning_rate": 4.255063276779447e-06, + "loss": 0.3221, + "step": 12382 + }, + { + "epoch": 2.11, + "learning_rate": 4.253556181211009e-06, + "loss": 0.3058, + "step": 12383 + }, + { + "epoch": 2.11, + "learning_rate": 4.252049280487877e-06, + "loss": 0.3027, + "step": 12384 + }, + { + "epoch": 2.11, + "learning_rate": 4.250542574661143e-06, + "loss": 0.3258, + "step": 12385 + }, + { + "epoch": 2.11, + "learning_rate": 4.2490360637818965e-06, + "loss": 0.3281, + "step": 12386 + }, + { + "epoch": 2.11, + "learning_rate": 4.247529747901217e-06, + "loss": 0.3182, + "step": 12387 + }, + { + "epoch": 2.11, + "learning_rate": 4.246023627070185e-06, + "loss": 0.3, + "step": 12388 + }, + { + "epoch": 2.11, + "learning_rate": 4.24451770133986e-06, + "loss": 0.3047, + "step": 12389 + }, + { + "epoch": 2.11, + "learning_rate": 4.243011970761311e-06, + "loss": 0.3053, + "step": 12390 + }, + { + "epoch": 2.11, + "learning_rate": 4.241506435385587e-06, + "loss": 0.3194, + "step": 12391 + }, + { + "epoch": 2.11, + "learning_rate": 4.2400010952637384e-06, + "loss": 0.3206, + "step": 12392 + }, + { + "epoch": 2.11, + "learning_rate": 4.238495950446808e-06, + "loss": 0.3419, + "step": 12393 + }, + { + "epoch": 2.11, + "learning_rate": 4.236991000985831e-06, + "loss": 0.3234, + "step": 12394 + }, + { + "epoch": 2.11, + "learning_rate": 4.235486246931835e-06, + "loss": 0.3122, + "step": 12395 + }, + { + "epoch": 2.11, + "learning_rate": 4.233981688335842e-06, + "loss": 0.313, + "step": 12396 + }, + { + "epoch": 2.11, + "learning_rate": 4.232477325248867e-06, + "loss": 0.33, + "step": 12397 + }, + { + "epoch": 2.11, + "learning_rate": 4.230973157721923e-06, + "loss": 0.3045, + "step": 12398 + }, + { + "epoch": 2.11, + "learning_rate": 4.2294691858060055e-06, + "loss": 0.3451, + "step": 12399 + }, + { + "epoch": 2.11, + "learning_rate": 4.227965409552112e-06, + "loss": 0.3298, + "step": 12400 + }, + { + "epoch": 2.11, + "learning_rate": 4.226461829011231e-06, + "loss": 0.3087, + "step": 12401 + }, + { + "epoch": 2.12, + "learning_rate": 4.224958444234346e-06, + "loss": 0.3246, + "step": 12402 + }, + { + "epoch": 2.12, + "learning_rate": 4.223455255272436e-06, + "loss": 0.3122, + "step": 12403 + }, + { + "epoch": 2.12, + "learning_rate": 4.221952262176461e-06, + "loss": 0.3469, + "step": 12404 + }, + { + "epoch": 2.12, + "learning_rate": 4.220449464997387e-06, + "loss": 0.3154, + "step": 12405 + }, + { + "epoch": 2.12, + "learning_rate": 4.218946863786174e-06, + "loss": 0.2874, + "step": 12406 + }, + { + "epoch": 2.12, + "learning_rate": 4.217444458593764e-06, + "loss": 0.3057, + "step": 12407 + }, + { + "epoch": 2.12, + "learning_rate": 4.215942249471101e-06, + "loss": 0.3057, + "step": 12408 + }, + { + "epoch": 2.12, + "learning_rate": 4.214440236469123e-06, + "loss": 0.3069, + "step": 12409 + }, + { + "epoch": 2.12, + "learning_rate": 4.2129384196387565e-06, + "loss": 0.3172, + "step": 12410 + }, + { + "epoch": 2.12, + "learning_rate": 4.211436799030925e-06, + "loss": 0.3336, + "step": 12411 + }, + { + "epoch": 2.12, + "learning_rate": 4.209935374696546e-06, + "loss": 0.33, + "step": 12412 + }, + { + "epoch": 2.12, + "learning_rate": 4.208434146686529e-06, + "loss": 0.3189, + "step": 12413 + }, + { + "epoch": 2.12, + "learning_rate": 4.206933115051769e-06, + "loss": 0.3073, + "step": 12414 + }, + { + "epoch": 2.12, + "learning_rate": 4.205432279843168e-06, + "loss": 0.3341, + "step": 12415 + }, + { + "epoch": 2.12, + "learning_rate": 4.203931641111612e-06, + "loss": 0.2923, + "step": 12416 + }, + { + "epoch": 2.12, + "learning_rate": 4.202431198907987e-06, + "loss": 0.3283, + "step": 12417 + }, + { + "epoch": 2.12, + "learning_rate": 4.200930953283165e-06, + "loss": 0.3205, + "step": 12418 + }, + { + "epoch": 2.12, + "learning_rate": 4.19943090428802e-06, + "loss": 0.2808, + "step": 12419 + }, + { + "epoch": 2.12, + "learning_rate": 4.197931051973407e-06, + "loss": 0.3344, + "step": 12420 + }, + { + "epoch": 2.12, + "learning_rate": 4.196431396390189e-06, + "loss": 0.3172, + "step": 12421 + }, + { + "epoch": 2.12, + "learning_rate": 4.194931937589208e-06, + "loss": 0.3143, + "step": 12422 + }, + { + "epoch": 2.12, + "learning_rate": 4.193432675621311e-06, + "loss": 0.3116, + "step": 12423 + }, + { + "epoch": 2.12, + "learning_rate": 4.191933610537331e-06, + "loss": 0.3037, + "step": 12424 + }, + { + "epoch": 2.12, + "learning_rate": 4.190434742388098e-06, + "loss": 0.312, + "step": 12425 + }, + { + "epoch": 2.12, + "learning_rate": 4.188936071224436e-06, + "loss": 0.3298, + "step": 12426 + }, + { + "epoch": 2.12, + "learning_rate": 4.187437597097158e-06, + "loss": 0.3038, + "step": 12427 + }, + { + "epoch": 2.12, + "learning_rate": 4.185939320057078e-06, + "loss": 0.3442, + "step": 12428 + }, + { + "epoch": 2.12, + "learning_rate": 4.184441240154991e-06, + "loss": 0.3113, + "step": 12429 + }, + { + "epoch": 2.12, + "learning_rate": 4.182943357441695e-06, + "loss": 0.3138, + "step": 12430 + }, + { + "epoch": 2.12, + "learning_rate": 4.181445671967982e-06, + "loss": 0.3252, + "step": 12431 + }, + { + "epoch": 2.12, + "learning_rate": 4.17994818378463e-06, + "loss": 0.3221, + "step": 12432 + }, + { + "epoch": 2.12, + "learning_rate": 4.178450892942417e-06, + "loss": 0.3226, + "step": 12433 + }, + { + "epoch": 2.12, + "learning_rate": 4.1769537994921115e-06, + "loss": 0.3369, + "step": 12434 + }, + { + "epoch": 2.12, + "learning_rate": 4.175456903484479e-06, + "loss": 0.2953, + "step": 12435 + }, + { + "epoch": 2.12, + "learning_rate": 4.173960204970269e-06, + "loss": 0.297, + "step": 12436 + }, + { + "epoch": 2.12, + "learning_rate": 4.1724637040002345e-06, + "loss": 0.3468, + "step": 12437 + }, + { + "epoch": 2.12, + "learning_rate": 4.170967400625112e-06, + "loss": 0.3318, + "step": 12438 + }, + { + "epoch": 2.12, + "learning_rate": 4.1694712948956415e-06, + "loss": 0.3226, + "step": 12439 + }, + { + "epoch": 2.12, + "learning_rate": 4.16797538686255e-06, + "loss": 0.3037, + "step": 12440 + }, + { + "epoch": 2.12, + "learning_rate": 4.16647967657656e-06, + "loss": 0.3133, + "step": 12441 + }, + { + "epoch": 2.12, + "learning_rate": 4.164984164088387e-06, + "loss": 0.3149, + "step": 12442 + }, + { + "epoch": 2.12, + "learning_rate": 4.163488849448742e-06, + "loss": 0.3216, + "step": 12443 + }, + { + "epoch": 2.12, + "learning_rate": 4.16199373270832e-06, + "loss": 0.313, + "step": 12444 + }, + { + "epoch": 2.12, + "learning_rate": 4.160498813917821e-06, + "loss": 0.3316, + "step": 12445 + }, + { + "epoch": 2.12, + "learning_rate": 4.159004093127933e-06, + "loss": 0.3182, + "step": 12446 + }, + { + "epoch": 2.12, + "learning_rate": 4.157509570389336e-06, + "loss": 0.3208, + "step": 12447 + }, + { + "epoch": 2.12, + "learning_rate": 4.1560152457527055e-06, + "loss": 0.3003, + "step": 12448 + }, + { + "epoch": 2.12, + "learning_rate": 4.154521119268712e-06, + "loss": 0.3092, + "step": 12449 + }, + { + "epoch": 2.12, + "learning_rate": 4.153027190988018e-06, + "loss": 0.3245, + "step": 12450 + }, + { + "epoch": 2.12, + "learning_rate": 4.151533460961272e-06, + "loss": 0.323, + "step": 12451 + }, + { + "epoch": 2.12, + "learning_rate": 4.150039929239125e-06, + "loss": 0.3107, + "step": 12452 + }, + { + "epoch": 2.12, + "learning_rate": 4.148546595872224e-06, + "loss": 0.3146, + "step": 12453 + }, + { + "epoch": 2.12, + "learning_rate": 4.147053460911195e-06, + "loss": 0.3121, + "step": 12454 + }, + { + "epoch": 2.12, + "learning_rate": 4.145560524406669e-06, + "loss": 0.3181, + "step": 12455 + }, + { + "epoch": 2.12, + "learning_rate": 4.144067786409267e-06, + "loss": 0.3273, + "step": 12456 + }, + { + "epoch": 2.12, + "learning_rate": 4.142575246969605e-06, + "loss": 0.3077, + "step": 12457 + }, + { + "epoch": 2.12, + "learning_rate": 4.141082906138293e-06, + "loss": 0.3021, + "step": 12458 + }, + { + "epoch": 2.12, + "learning_rate": 4.139590763965925e-06, + "loss": 0.3282, + "step": 12459 + }, + { + "epoch": 2.13, + "learning_rate": 4.138098820503099e-06, + "loss": 0.3061, + "step": 12460 + }, + { + "epoch": 2.13, + "learning_rate": 4.136607075800403e-06, + "loss": 0.3123, + "step": 12461 + }, + { + "epoch": 2.13, + "learning_rate": 4.135115529908417e-06, + "loss": 0.3306, + "step": 12462 + }, + { + "epoch": 2.13, + "learning_rate": 4.1336241828777146e-06, + "loss": 0.325, + "step": 12463 + }, + { + "epoch": 2.13, + "learning_rate": 4.1321330347588636e-06, + "loss": 0.2968, + "step": 12464 + }, + { + "epoch": 2.13, + "learning_rate": 4.13064208560243e-06, + "loss": 0.3116, + "step": 12465 + }, + { + "epoch": 2.13, + "learning_rate": 4.1291513354589576e-06, + "loss": 0.3031, + "step": 12466 + }, + { + "epoch": 2.13, + "learning_rate": 4.127660784378998e-06, + "loss": 0.3128, + "step": 12467 + }, + { + "epoch": 2.13, + "learning_rate": 4.1261704324130945e-06, + "loss": 0.3154, + "step": 12468 + }, + { + "epoch": 2.13, + "learning_rate": 4.1246802796117744e-06, + "loss": 0.3177, + "step": 12469 + }, + { + "epoch": 2.13, + "learning_rate": 4.123190326025568e-06, + "loss": 0.3083, + "step": 12470 + }, + { + "epoch": 2.13, + "learning_rate": 4.121700571704995e-06, + "loss": 0.3069, + "step": 12471 + }, + { + "epoch": 2.13, + "learning_rate": 4.120211016700568e-06, + "loss": 0.3264, + "step": 12472 + }, + { + "epoch": 2.13, + "learning_rate": 4.118721661062799e-06, + "loss": 0.34, + "step": 12473 + }, + { + "epoch": 2.13, + "learning_rate": 4.117232504842179e-06, + "loss": 0.3316, + "step": 12474 + }, + { + "epoch": 2.13, + "learning_rate": 4.115743548089205e-06, + "loss": 0.3436, + "step": 12475 + }, + { + "epoch": 2.13, + "learning_rate": 4.114254790854363e-06, + "loss": 0.3219, + "step": 12476 + }, + { + "epoch": 2.13, + "learning_rate": 4.112766233188132e-06, + "loss": 0.3231, + "step": 12477 + }, + { + "epoch": 2.13, + "learning_rate": 4.111277875140986e-06, + "loss": 0.32, + "step": 12478 + }, + { + "epoch": 2.13, + "learning_rate": 4.109789716763391e-06, + "loss": 0.3264, + "step": 12479 + }, + { + "epoch": 2.13, + "learning_rate": 4.108301758105808e-06, + "loss": 0.3133, + "step": 12480 + }, + { + "epoch": 2.13, + "learning_rate": 4.106813999218684e-06, + "loss": 0.3027, + "step": 12481 + }, + { + "epoch": 2.13, + "learning_rate": 4.1053264401524664e-06, + "loss": 0.3191, + "step": 12482 + }, + { + "epoch": 2.13, + "learning_rate": 4.103839080957596e-06, + "loss": 0.3041, + "step": 12483 + }, + { + "epoch": 2.13, + "learning_rate": 4.102351921684506e-06, + "loss": 0.3086, + "step": 12484 + }, + { + "epoch": 2.13, + "learning_rate": 4.1008649623836176e-06, + "loss": 0.3207, + "step": 12485 + }, + { + "epoch": 2.13, + "learning_rate": 4.09937820310535e-06, + "loss": 0.2897, + "step": 12486 + }, + { + "epoch": 2.13, + "learning_rate": 4.097891643900116e-06, + "loss": 0.3399, + "step": 12487 + }, + { + "epoch": 2.13, + "learning_rate": 4.096405284818325e-06, + "loss": 0.3059, + "step": 12488 + }, + { + "epoch": 2.13, + "learning_rate": 4.0949191259103675e-06, + "loss": 0.3138, + "step": 12489 + }, + { + "epoch": 2.13, + "learning_rate": 4.0934331672266366e-06, + "loss": 0.2781, + "step": 12490 + }, + { + "epoch": 2.13, + "learning_rate": 4.0919474088175195e-06, + "loss": 0.3009, + "step": 12491 + }, + { + "epoch": 2.13, + "learning_rate": 4.090461850733393e-06, + "loss": 0.3182, + "step": 12492 + }, + { + "epoch": 2.13, + "learning_rate": 4.088976493024628e-06, + "loss": 0.3386, + "step": 12493 + }, + { + "epoch": 2.13, + "learning_rate": 4.087491335741588e-06, + "loss": 0.3021, + "step": 12494 + }, + { + "epoch": 2.13, + "learning_rate": 4.086006378934635e-06, + "loss": 0.3342, + "step": 12495 + }, + { + "epoch": 2.13, + "learning_rate": 4.084521622654112e-06, + "loss": 0.3358, + "step": 12496 + }, + { + "epoch": 2.13, + "learning_rate": 4.083037066950367e-06, + "loss": 0.3315, + "step": 12497 + }, + { + "epoch": 2.13, + "learning_rate": 4.081552711873735e-06, + "loss": 0.3107, + "step": 12498 + }, + { + "epoch": 2.13, + "learning_rate": 4.080068557474549e-06, + "loss": 0.3304, + "step": 12499 + }, + { + "epoch": 2.13, + "learning_rate": 4.078584603803134e-06, + "loss": 0.3105, + "step": 12500 + }, + { + "epoch": 2.13, + "learning_rate": 4.077100850909801e-06, + "loss": 0.3472, + "step": 12501 + }, + { + "epoch": 2.13, + "learning_rate": 4.0756172988448614e-06, + "loss": 0.3108, + "step": 12502 + }, + { + "epoch": 2.13, + "learning_rate": 4.0741339476586224e-06, + "loss": 0.3532, + "step": 12503 + }, + { + "epoch": 2.13, + "learning_rate": 4.072650797401374e-06, + "loss": 0.2976, + "step": 12504 + }, + { + "epoch": 2.13, + "learning_rate": 4.071167848123409e-06, + "loss": 0.2982, + "step": 12505 + }, + { + "epoch": 2.13, + "learning_rate": 4.069685099875008e-06, + "loss": 0.2918, + "step": 12506 + }, + { + "epoch": 2.13, + "learning_rate": 4.0682025527064486e-06, + "loss": 0.3136, + "step": 12507 + }, + { + "epoch": 2.13, + "learning_rate": 4.066720206667999e-06, + "loss": 0.3164, + "step": 12508 + }, + { + "epoch": 2.13, + "learning_rate": 4.065238061809922e-06, + "loss": 0.3128, + "step": 12509 + }, + { + "epoch": 2.13, + "learning_rate": 4.063756118182475e-06, + "loss": 0.2998, + "step": 12510 + }, + { + "epoch": 2.13, + "learning_rate": 4.0622743758359e-06, + "loss": 0.2966, + "step": 12511 + }, + { + "epoch": 2.13, + "learning_rate": 4.060792834820443e-06, + "loss": 0.3112, + "step": 12512 + }, + { + "epoch": 2.13, + "learning_rate": 4.059311495186338e-06, + "loss": 0.3176, + "step": 12513 + }, + { + "epoch": 2.13, + "learning_rate": 4.057830356983813e-06, + "loss": 0.3233, + "step": 12514 + }, + { + "epoch": 2.13, + "learning_rate": 4.056349420263092e-06, + "loss": 0.3095, + "step": 12515 + }, + { + "epoch": 2.13, + "learning_rate": 4.0548686850743845e-06, + "loss": 0.3125, + "step": 12516 + }, + { + "epoch": 2.13, + "learning_rate": 4.053388151467898e-06, + "loss": 0.2925, + "step": 12517 + }, + { + "epoch": 2.13, + "learning_rate": 4.05190781949384e-06, + "loss": 0.2989, + "step": 12518 + }, + { + "epoch": 2.14, + "learning_rate": 4.0504276892023945e-06, + "loss": 0.3283, + "step": 12519 + }, + { + "epoch": 2.14, + "learning_rate": 4.048947760643754e-06, + "loss": 0.3047, + "step": 12520 + }, + { + "epoch": 2.14, + "learning_rate": 4.047468033868097e-06, + "loss": 0.303, + "step": 12521 + }, + { + "epoch": 2.14, + "learning_rate": 4.045988508925599e-06, + "loss": 0.3286, + "step": 12522 + }, + { + "epoch": 2.14, + "learning_rate": 4.044509185866423e-06, + "loss": 0.3087, + "step": 12523 + }, + { + "epoch": 2.14, + "learning_rate": 4.043030064740732e-06, + "loss": 0.3204, + "step": 12524 + }, + { + "epoch": 2.14, + "learning_rate": 4.041551145598681e-06, + "loss": 0.3168, + "step": 12525 + }, + { + "epoch": 2.14, + "learning_rate": 4.040072428490408e-06, + "loss": 0.3217, + "step": 12526 + }, + { + "epoch": 2.14, + "learning_rate": 4.038593913466055e-06, + "loss": 0.3396, + "step": 12527 + }, + { + "epoch": 2.14, + "learning_rate": 4.037115600575758e-06, + "loss": 0.3178, + "step": 12528 + }, + { + "epoch": 2.14, + "learning_rate": 4.035637489869637e-06, + "loss": 0.3313, + "step": 12529 + }, + { + "epoch": 2.14, + "learning_rate": 4.034159581397815e-06, + "loss": 0.3197, + "step": 12530 + }, + { + "epoch": 2.14, + "learning_rate": 4.032681875210405e-06, + "loss": 0.3034, + "step": 12531 + }, + { + "epoch": 2.14, + "learning_rate": 4.031204371357503e-06, + "loss": 0.2939, + "step": 12532 + }, + { + "epoch": 2.14, + "learning_rate": 4.029727069889217e-06, + "loss": 0.3379, + "step": 12533 + }, + { + "epoch": 2.14, + "learning_rate": 4.028249970855631e-06, + "loss": 0.3253, + "step": 12534 + }, + { + "epoch": 2.14, + "learning_rate": 4.026773074306831e-06, + "loss": 0.3078, + "step": 12535 + }, + { + "epoch": 2.14, + "learning_rate": 4.025296380292895e-06, + "loss": 0.3111, + "step": 12536 + }, + { + "epoch": 2.14, + "learning_rate": 4.023819888863893e-06, + "loss": 0.3026, + "step": 12537 + }, + { + "epoch": 2.14, + "learning_rate": 4.022343600069888e-06, + "loss": 0.3201, + "step": 12538 + }, + { + "epoch": 2.14, + "learning_rate": 4.020867513960939e-06, + "loss": 0.3013, + "step": 12539 + }, + { + "epoch": 2.14, + "learning_rate": 4.019391630587096e-06, + "loss": 0.301, + "step": 12540 + }, + { + "epoch": 2.14, + "learning_rate": 4.017915949998398e-06, + "loss": 0.3122, + "step": 12541 + }, + { + "epoch": 2.14, + "learning_rate": 4.016440472244884e-06, + "loss": 0.3114, + "step": 12542 + }, + { + "epoch": 2.14, + "learning_rate": 4.014965197376581e-06, + "loss": 0.3193, + "step": 12543 + }, + { + "epoch": 2.14, + "learning_rate": 4.0134901254435135e-06, + "loss": 0.3262, + "step": 12544 + }, + { + "epoch": 2.14, + "learning_rate": 4.012015256495696e-06, + "loss": 0.3191, + "step": 12545 + }, + { + "epoch": 2.14, + "learning_rate": 4.010540590583137e-06, + "loss": 0.3127, + "step": 12546 + }, + { + "epoch": 2.14, + "learning_rate": 4.009066127755843e-06, + "loss": 0.3249, + "step": 12547 + }, + { + "epoch": 2.14, + "learning_rate": 4.0075918680638e-06, + "loss": 0.3217, + "step": 12548 + }, + { + "epoch": 2.14, + "learning_rate": 4.006117811557005e-06, + "loss": 0.3321, + "step": 12549 + }, + { + "epoch": 2.14, + "learning_rate": 4.004643958285428e-06, + "loss": 0.3071, + "step": 12550 + }, + { + "epoch": 2.14, + "learning_rate": 4.00317030829905e-06, + "loss": 0.3165, + "step": 12551 + }, + { + "epoch": 2.14, + "learning_rate": 4.001696861647839e-06, + "loss": 0.3298, + "step": 12552 + }, + { + "epoch": 2.14, + "learning_rate": 4.000223618381752e-06, + "loss": 0.3096, + "step": 12553 + }, + { + "epoch": 2.14, + "learning_rate": 3.998750578550745e-06, + "loss": 0.3054, + "step": 12554 + }, + { + "epoch": 2.14, + "learning_rate": 3.9972777422047674e-06, + "loss": 0.3394, + "step": 12555 + }, + { + "epoch": 2.14, + "learning_rate": 3.9958051093937515e-06, + "loss": 0.3188, + "step": 12556 + }, + { + "epoch": 2.14, + "learning_rate": 3.994332680167633e-06, + "loss": 0.32, + "step": 12557 + }, + { + "epoch": 2.14, + "learning_rate": 3.992860454576339e-06, + "loss": 0.3075, + "step": 12558 + }, + { + "epoch": 2.14, + "learning_rate": 3.991388432669787e-06, + "loss": 0.3143, + "step": 12559 + }, + { + "epoch": 2.14, + "learning_rate": 3.989916614497891e-06, + "loss": 0.3065, + "step": 12560 + }, + { + "epoch": 2.14, + "learning_rate": 3.9884450001105534e-06, + "loss": 0.3385, + "step": 12561 + }, + { + "epoch": 2.14, + "learning_rate": 3.986973589557679e-06, + "loss": 0.3146, + "step": 12562 + }, + { + "epoch": 2.14, + "learning_rate": 3.985502382889149e-06, + "loss": 0.2847, + "step": 12563 + }, + { + "epoch": 2.14, + "learning_rate": 3.984031380154854e-06, + "loss": 0.3001, + "step": 12564 + }, + { + "epoch": 2.14, + "learning_rate": 3.982560581404673e-06, + "loss": 0.2986, + "step": 12565 + }, + { + "epoch": 2.14, + "learning_rate": 3.98108998668847e-06, + "loss": 0.3527, + "step": 12566 + }, + { + "epoch": 2.14, + "learning_rate": 3.979619596056112e-06, + "loss": 0.2984, + "step": 12567 + }, + { + "epoch": 2.14, + "learning_rate": 3.978149409557457e-06, + "loss": 0.3071, + "step": 12568 + }, + { + "epoch": 2.14, + "learning_rate": 3.9766794272423535e-06, + "loss": 0.3133, + "step": 12569 + }, + { + "epoch": 2.14, + "learning_rate": 3.975209649160648e-06, + "loss": 0.311, + "step": 12570 + }, + { + "epoch": 2.14, + "learning_rate": 3.973740075362169e-06, + "loss": 0.3414, + "step": 12571 + }, + { + "epoch": 2.14, + "learning_rate": 3.97227070589675e-06, + "loss": 0.2969, + "step": 12572 + }, + { + "epoch": 2.14, + "learning_rate": 3.970801540814213e-06, + "loss": 0.3386, + "step": 12573 + }, + { + "epoch": 2.14, + "learning_rate": 3.969332580164374e-06, + "loss": 0.3162, + "step": 12574 + }, + { + "epoch": 2.14, + "learning_rate": 3.967863823997038e-06, + "loss": 0.3067, + "step": 12575 + }, + { + "epoch": 2.14, + "learning_rate": 3.966395272362008e-06, + "loss": 0.3363, + "step": 12576 + }, + { + "epoch": 2.14, + "learning_rate": 3.964926925309079e-06, + "loss": 0.2922, + "step": 12577 + }, + { + "epoch": 2.15, + "learning_rate": 3.963458782888043e-06, + "loss": 0.3082, + "step": 12578 + }, + { + "epoch": 2.15, + "learning_rate": 3.961990845148671e-06, + "loss": 0.3115, + "step": 12579 + }, + { + "epoch": 2.15, + "learning_rate": 3.960523112140745e-06, + "loss": 0.3041, + "step": 12580 + }, + { + "epoch": 2.15, + "learning_rate": 3.959055583914023e-06, + "loss": 0.3067, + "step": 12581 + }, + { + "epoch": 2.15, + "learning_rate": 3.95758826051827e-06, + "loss": 0.3103, + "step": 12582 + }, + { + "epoch": 2.15, + "learning_rate": 3.956121142003237e-06, + "loss": 0.3331, + "step": 12583 + }, + { + "epoch": 2.15, + "learning_rate": 3.954654228418673e-06, + "loss": 0.3011, + "step": 12584 + }, + { + "epoch": 2.15, + "learning_rate": 3.953187519814315e-06, + "loss": 0.2952, + "step": 12585 + }, + { + "epoch": 2.15, + "learning_rate": 3.951721016239893e-06, + "loss": 0.3272, + "step": 12586 + }, + { + "epoch": 2.15, + "learning_rate": 3.950254717745132e-06, + "loss": 0.3315, + "step": 12587 + }, + { + "epoch": 2.15, + "learning_rate": 3.948788624379752e-06, + "loss": 0.3081, + "step": 12588 + }, + { + "epoch": 2.15, + "learning_rate": 3.947322736193463e-06, + "loss": 0.3058, + "step": 12589 + }, + { + "epoch": 2.15, + "learning_rate": 3.9458570532359696e-06, + "loss": 0.302, + "step": 12590 + }, + { + "epoch": 2.15, + "learning_rate": 3.9443915755569685e-06, + "loss": 0.3099, + "step": 12591 + }, + { + "epoch": 2.15, + "learning_rate": 3.942926303206149e-06, + "loss": 0.2891, + "step": 12592 + }, + { + "epoch": 2.15, + "learning_rate": 3.941461236233198e-06, + "loss": 0.323, + "step": 12593 + }, + { + "epoch": 2.15, + "learning_rate": 3.939996374687787e-06, + "loss": 0.3349, + "step": 12594 + }, + { + "epoch": 2.15, + "learning_rate": 3.938531718619586e-06, + "loss": 0.3238, + "step": 12595 + }, + { + "epoch": 2.15, + "learning_rate": 3.937067268078262e-06, + "loss": 0.3229, + "step": 12596 + }, + { + "epoch": 2.15, + "learning_rate": 3.935603023113462e-06, + "loss": 0.3342, + "step": 12597 + }, + { + "epoch": 2.15, + "learning_rate": 3.93413898377484e-06, + "loss": 0.2985, + "step": 12598 + }, + { + "epoch": 2.15, + "learning_rate": 3.932675150112036e-06, + "loss": 0.3085, + "step": 12599 + }, + { + "epoch": 2.15, + "learning_rate": 3.931211522174683e-06, + "loss": 0.3173, + "step": 12600 + }, + { + "epoch": 2.15, + "learning_rate": 3.929748100012416e-06, + "loss": 0.3267, + "step": 12601 + }, + { + "epoch": 2.15, + "learning_rate": 3.928284883674844e-06, + "loss": 0.3027, + "step": 12602 + }, + { + "epoch": 2.15, + "learning_rate": 3.926821873211586e-06, + "loss": 0.2873, + "step": 12603 + }, + { + "epoch": 2.15, + "learning_rate": 3.9253590686722484e-06, + "loss": 0.3197, + "step": 12604 + }, + { + "epoch": 2.15, + "learning_rate": 3.9238964701064305e-06, + "loss": 0.2965, + "step": 12605 + }, + { + "epoch": 2.15, + "learning_rate": 3.922434077563726e-06, + "loss": 0.3207, + "step": 12606 + }, + { + "epoch": 2.15, + "learning_rate": 3.9209718910937174e-06, + "loss": 0.2938, + "step": 12607 + }, + { + "epoch": 2.15, + "learning_rate": 3.91950991074599e-06, + "loss": 0.2959, + "step": 12608 + }, + { + "epoch": 2.15, + "learning_rate": 3.918048136570105e-06, + "loss": 0.3386, + "step": 12609 + }, + { + "epoch": 2.15, + "learning_rate": 3.916586568615633e-06, + "loss": 0.3225, + "step": 12610 + }, + { + "epoch": 2.15, + "learning_rate": 3.915125206932132e-06, + "loss": 0.3217, + "step": 12611 + }, + { + "epoch": 2.15, + "learning_rate": 3.913664051569154e-06, + "loss": 0.3008, + "step": 12612 + }, + { + "epoch": 2.15, + "learning_rate": 3.9122031025762365e-06, + "loss": 0.3143, + "step": 12613 + }, + { + "epoch": 2.15, + "learning_rate": 3.910742360002919e-06, + "loss": 0.2934, + "step": 12614 + }, + { + "epoch": 2.15, + "learning_rate": 3.909281823898733e-06, + "loss": 0.3057, + "step": 12615 + }, + { + "epoch": 2.15, + "learning_rate": 3.907821494313202e-06, + "loss": 0.3461, + "step": 12616 + }, + { + "epoch": 2.15, + "learning_rate": 3.906361371295836e-06, + "loss": 0.3165, + "step": 12617 + }, + { + "epoch": 2.15, + "learning_rate": 3.9049014548961465e-06, + "loss": 0.3245, + "step": 12618 + }, + { + "epoch": 2.15, + "learning_rate": 3.903441745163636e-06, + "loss": 0.3161, + "step": 12619 + }, + { + "epoch": 2.15, + "learning_rate": 3.901982242147797e-06, + "loss": 0.3318, + "step": 12620 + }, + { + "epoch": 2.15, + "learning_rate": 3.900522945898118e-06, + "loss": 0.3136, + "step": 12621 + }, + { + "epoch": 2.15, + "learning_rate": 3.8990638564640816e-06, + "loss": 0.3303, + "step": 12622 + }, + { + "epoch": 2.15, + "learning_rate": 3.897604973895161e-06, + "loss": 0.3167, + "step": 12623 + }, + { + "epoch": 2.15, + "learning_rate": 3.896146298240821e-06, + "loss": 0.3009, + "step": 12624 + }, + { + "epoch": 2.15, + "learning_rate": 3.894687829550519e-06, + "loss": 0.319, + "step": 12625 + }, + { + "epoch": 2.15, + "learning_rate": 3.89322956787371e-06, + "loss": 0.3223, + "step": 12626 + }, + { + "epoch": 2.15, + "learning_rate": 3.891771513259842e-06, + "loss": 0.3123, + "step": 12627 + }, + { + "epoch": 2.15, + "learning_rate": 3.890313665758348e-06, + "loss": 0.3095, + "step": 12628 + }, + { + "epoch": 2.15, + "learning_rate": 3.888856025418662e-06, + "loss": 0.3158, + "step": 12629 + }, + { + "epoch": 2.15, + "learning_rate": 3.887398592290209e-06, + "loss": 0.2807, + "step": 12630 + }, + { + "epoch": 2.15, + "learning_rate": 3.885941366422408e-06, + "loss": 0.3252, + "step": 12631 + }, + { + "epoch": 2.15, + "learning_rate": 3.8844843478646655e-06, + "loss": 0.2987, + "step": 12632 + }, + { + "epoch": 2.15, + "learning_rate": 3.883027536666385e-06, + "loss": 0.3416, + "step": 12633 + }, + { + "epoch": 2.15, + "learning_rate": 3.881570932876963e-06, + "loss": 0.3185, + "step": 12634 + }, + { + "epoch": 2.15, + "learning_rate": 3.880114536545793e-06, + "loss": 0.3072, + "step": 12635 + }, + { + "epoch": 2.16, + "learning_rate": 3.878658347722252e-06, + "loss": 0.3076, + "step": 12636 + }, + { + "epoch": 2.16, + "learning_rate": 3.877202366455717e-06, + "loss": 0.2949, + "step": 12637 + }, + { + "epoch": 2.16, + "learning_rate": 3.875746592795559e-06, + "loss": 0.3121, + "step": 12638 + }, + { + "epoch": 2.16, + "learning_rate": 3.874291026791135e-06, + "loss": 0.2959, + "step": 12639 + }, + { + "epoch": 2.16, + "learning_rate": 3.8728356684918e-06, + "loss": 0.3137, + "step": 12640 + }, + { + "epoch": 2.16, + "learning_rate": 3.871380517946901e-06, + "loss": 0.3225, + "step": 12641 + }, + { + "epoch": 2.16, + "learning_rate": 3.869925575205778e-06, + "loss": 0.2965, + "step": 12642 + }, + { + "epoch": 2.16, + "learning_rate": 3.868470840317768e-06, + "loss": 0.322, + "step": 12643 + }, + { + "epoch": 2.16, + "learning_rate": 3.86701631333219e-06, + "loss": 0.3153, + "step": 12644 + }, + { + "epoch": 2.16, + "learning_rate": 3.8655619942983655e-06, + "loss": 0.3448, + "step": 12645 + }, + { + "epoch": 2.16, + "learning_rate": 3.864107883265611e-06, + "loss": 0.3231, + "step": 12646 + }, + { + "epoch": 2.16, + "learning_rate": 3.862653980283222e-06, + "loss": 0.315, + "step": 12647 + }, + { + "epoch": 2.16, + "learning_rate": 3.861200285400501e-06, + "loss": 0.3235, + "step": 12648 + }, + { + "epoch": 2.16, + "learning_rate": 3.8597467986667405e-06, + "loss": 0.3088, + "step": 12649 + }, + { + "epoch": 2.16, + "learning_rate": 3.858293520131221e-06, + "loss": 0.309, + "step": 12650 + }, + { + "epoch": 2.16, + "learning_rate": 3.85684044984322e-06, + "loss": 0.3278, + "step": 12651 + }, + { + "epoch": 2.16, + "learning_rate": 3.855387587852007e-06, + "loss": 0.3128, + "step": 12652 + }, + { + "epoch": 2.16, + "learning_rate": 3.853934934206849e-06, + "loss": 0.3194, + "step": 12653 + }, + { + "epoch": 2.16, + "learning_rate": 3.852482488956992e-06, + "loss": 0.3003, + "step": 12654 + }, + { + "epoch": 2.16, + "learning_rate": 3.851030252151689e-06, + "loss": 0.3296, + "step": 12655 + }, + { + "epoch": 2.16, + "learning_rate": 3.849578223840183e-06, + "loss": 0.3267, + "step": 12656 + }, + { + "epoch": 2.16, + "learning_rate": 3.848126404071704e-06, + "loss": 0.3211, + "step": 12657 + }, + { + "epoch": 2.16, + "learning_rate": 3.846674792895485e-06, + "loss": 0.318, + "step": 12658 + }, + { + "epoch": 2.16, + "learning_rate": 3.845223390360739e-06, + "loss": 0.3005, + "step": 12659 + }, + { + "epoch": 2.16, + "learning_rate": 3.8437721965166815e-06, + "loss": 0.313, + "step": 12660 + }, + { + "epoch": 2.16, + "learning_rate": 3.842321211412523e-06, + "loss": 0.3107, + "step": 12661 + }, + { + "epoch": 2.16, + "learning_rate": 3.840870435097455e-06, + "loss": 0.3167, + "step": 12662 + }, + { + "epoch": 2.16, + "learning_rate": 3.83941986762067e-06, + "loss": 0.3018, + "step": 12663 + }, + { + "epoch": 2.16, + "learning_rate": 3.837969509031356e-06, + "loss": 0.3223, + "step": 12664 + }, + { + "epoch": 2.16, + "learning_rate": 3.83651935937869e-06, + "loss": 0.3105, + "step": 12665 + }, + { + "epoch": 2.16, + "learning_rate": 3.8350694187118395e-06, + "loss": 0.3188, + "step": 12666 + }, + { + "epoch": 2.16, + "learning_rate": 3.833619687079972e-06, + "loss": 0.3119, + "step": 12667 + }, + { + "epoch": 2.16, + "learning_rate": 3.832170164532244e-06, + "loss": 0.3068, + "step": 12668 + }, + { + "epoch": 2.16, + "learning_rate": 3.8307208511177985e-06, + "loss": 0.3279, + "step": 12669 + }, + { + "epoch": 2.16, + "learning_rate": 3.829271746885781e-06, + "loss": 0.333, + "step": 12670 + }, + { + "epoch": 2.16, + "learning_rate": 3.8278228518853265e-06, + "loss": 0.2989, + "step": 12671 + }, + { + "epoch": 2.16, + "learning_rate": 3.826374166165564e-06, + "loss": 0.295, + "step": 12672 + }, + { + "epoch": 2.16, + "learning_rate": 3.824925689775613e-06, + "loss": 0.3265, + "step": 12673 + }, + { + "epoch": 2.16, + "learning_rate": 3.82347742276459e-06, + "loss": 0.332, + "step": 12674 + }, + { + "epoch": 2.16, + "learning_rate": 3.822029365181595e-06, + "loss": 0.3154, + "step": 12675 + }, + { + "epoch": 2.16, + "learning_rate": 3.820581517075737e-06, + "loss": 0.3061, + "step": 12676 + }, + { + "epoch": 2.16, + "learning_rate": 3.819133878496098e-06, + "loss": 0.3231, + "step": 12677 + }, + { + "epoch": 2.16, + "learning_rate": 3.817686449491766e-06, + "loss": 0.3175, + "step": 12678 + }, + { + "epoch": 2.16, + "learning_rate": 3.816239230111824e-06, + "loss": 0.3267, + "step": 12679 + }, + { + "epoch": 2.16, + "learning_rate": 3.814792220405339e-06, + "loss": 0.327, + "step": 12680 + }, + { + "epoch": 2.16, + "learning_rate": 3.813345420421376e-06, + "loss": 0.3063, + "step": 12681 + }, + { + "epoch": 2.16, + "learning_rate": 3.811898830208991e-06, + "loss": 0.3302, + "step": 12682 + }, + { + "epoch": 2.16, + "learning_rate": 3.810452449817239e-06, + "loss": 0.3014, + "step": 12683 + }, + { + "epoch": 2.16, + "learning_rate": 3.8090062792951544e-06, + "loss": 0.3179, + "step": 12684 + }, + { + "epoch": 2.16, + "learning_rate": 3.8075603186917765e-06, + "loss": 0.3137, + "step": 12685 + }, + { + "epoch": 2.16, + "learning_rate": 3.806114568056133e-06, + "loss": 0.3115, + "step": 12686 + }, + { + "epoch": 2.16, + "learning_rate": 3.8046690274372466e-06, + "loss": 0.3298, + "step": 12687 + }, + { + "epoch": 2.16, + "learning_rate": 3.8032236968841294e-06, + "loss": 0.3029, + "step": 12688 + }, + { + "epoch": 2.16, + "learning_rate": 3.8017785764457904e-06, + "loss": 0.3101, + "step": 12689 + }, + { + "epoch": 2.16, + "learning_rate": 3.800333666171231e-06, + "loss": 0.3107, + "step": 12690 + }, + { + "epoch": 2.16, + "learning_rate": 3.7988889661094385e-06, + "loss": 0.3263, + "step": 12691 + }, + { + "epoch": 2.16, + "learning_rate": 3.7974444763094044e-06, + "loss": 0.3071, + "step": 12692 + }, + { + "epoch": 2.16, + "learning_rate": 3.796000196820101e-06, + "loss": 0.292, + "step": 12693 + }, + { + "epoch": 2.16, + "learning_rate": 3.7945561276905028e-06, + "loss": 0.3294, + "step": 12694 + }, + { + "epoch": 2.17, + "learning_rate": 3.7931122689695742e-06, + "loss": 0.3087, + "step": 12695 + }, + { + "epoch": 2.17, + "learning_rate": 3.7916686207062723e-06, + "loss": 0.3118, + "step": 12696 + }, + { + "epoch": 2.17, + "learning_rate": 3.790225182949546e-06, + "loss": 0.3232, + "step": 12697 + }, + { + "epoch": 2.17, + "learning_rate": 3.788781955748343e-06, + "loss": 0.3016, + "step": 12698 + }, + { + "epoch": 2.17, + "learning_rate": 3.7873389391515914e-06, + "loss": 0.3218, + "step": 12699 + }, + { + "epoch": 2.17, + "learning_rate": 3.7858961332082235e-06, + "loss": 0.3264, + "step": 12700 + }, + { + "epoch": 2.17, + "learning_rate": 3.784453537967161e-06, + "loss": 0.3188, + "step": 12701 + }, + { + "epoch": 2.17, + "learning_rate": 3.7830111534773163e-06, + "loss": 0.3173, + "step": 12702 + }, + { + "epoch": 2.17, + "learning_rate": 3.781568979787599e-06, + "loss": 0.3359, + "step": 12703 + }, + { + "epoch": 2.17, + "learning_rate": 3.7801270169469064e-06, + "loss": 0.3278, + "step": 12704 + }, + { + "epoch": 2.17, + "learning_rate": 3.778685265004137e-06, + "loss": 0.3253, + "step": 12705 + }, + { + "epoch": 2.17, + "learning_rate": 3.777243724008168e-06, + "loss": 0.3173, + "step": 12706 + }, + { + "epoch": 2.17, + "learning_rate": 3.7758023940078824e-06, + "loss": 0.2955, + "step": 12707 + }, + { + "epoch": 2.17, + "learning_rate": 3.7743612750521553e-06, + "loss": 0.3374, + "step": 12708 + }, + { + "epoch": 2.17, + "learning_rate": 3.7729203671898427e-06, + "loss": 0.3127, + "step": 12709 + }, + { + "epoch": 2.17, + "learning_rate": 3.7714796704698055e-06, + "loss": 0.2963, + "step": 12710 + }, + { + "epoch": 2.17, + "learning_rate": 3.7700391849408937e-06, + "loss": 0.333, + "step": 12711 + }, + { + "epoch": 2.17, + "learning_rate": 3.7685989106519505e-06, + "loss": 0.3264, + "step": 12712 + }, + { + "epoch": 2.17, + "learning_rate": 3.767158847651814e-06, + "loss": 0.3041, + "step": 12713 + }, + { + "epoch": 2.17, + "learning_rate": 3.765718995989306e-06, + "loss": 0.3165, + "step": 12714 + }, + { + "epoch": 2.17, + "learning_rate": 3.7642793557132516e-06, + "loss": 0.3103, + "step": 12715 + }, + { + "epoch": 2.17, + "learning_rate": 3.7628399268724647e-06, + "loss": 0.2988, + "step": 12716 + }, + { + "epoch": 2.17, + "learning_rate": 3.761400709515751e-06, + "loss": 0.3215, + "step": 12717 + }, + { + "epoch": 2.17, + "learning_rate": 3.759961703691912e-06, + "loss": 0.3032, + "step": 12718 + }, + { + "epoch": 2.17, + "learning_rate": 3.758522909449739e-06, + "loss": 0.3101, + "step": 12719 + }, + { + "epoch": 2.17, + "learning_rate": 3.7570843268380185e-06, + "loss": 0.2912, + "step": 12720 + }, + { + "epoch": 2.17, + "learning_rate": 3.7556459559055303e-06, + "loss": 0.3192, + "step": 12721 + }, + { + "epoch": 2.17, + "learning_rate": 3.7542077967010404e-06, + "loss": 0.3257, + "step": 12722 + }, + { + "epoch": 2.17, + "learning_rate": 3.7527698492733177e-06, + "loss": 0.3139, + "step": 12723 + }, + { + "epoch": 2.17, + "learning_rate": 3.7513321136711147e-06, + "loss": 0.3122, + "step": 12724 + }, + { + "epoch": 2.17, + "learning_rate": 3.7498945899431806e-06, + "loss": 0.3166, + "step": 12725 + }, + { + "epoch": 2.17, + "learning_rate": 3.748457278138261e-06, + "loss": 0.3032, + "step": 12726 + }, + { + "epoch": 2.17, + "learning_rate": 3.7470201783050886e-06, + "loss": 0.3212, + "step": 12727 + }, + { + "epoch": 2.17, + "learning_rate": 3.7455832904923963e-06, + "loss": 0.3189, + "step": 12728 + }, + { + "epoch": 2.17, + "learning_rate": 3.744146614748897e-06, + "loss": 0.3268, + "step": 12729 + }, + { + "epoch": 2.17, + "learning_rate": 3.7427101511233088e-06, + "loss": 0.3088, + "step": 12730 + }, + { + "epoch": 2.17, + "learning_rate": 3.741273899664337e-06, + "loss": 0.3147, + "step": 12731 + }, + { + "epoch": 2.17, + "learning_rate": 3.739837860420681e-06, + "loss": 0.301, + "step": 12732 + }, + { + "epoch": 2.17, + "learning_rate": 3.738402033441032e-06, + "loss": 0.3105, + "step": 12733 + }, + { + "epoch": 2.17, + "learning_rate": 3.7369664187740762e-06, + "loss": 0.3328, + "step": 12734 + }, + { + "epoch": 2.17, + "learning_rate": 3.735531016468491e-06, + "loss": 0.3137, + "step": 12735 + }, + { + "epoch": 2.17, + "learning_rate": 3.7340958265729486e-06, + "loss": 0.3198, + "step": 12736 + }, + { + "epoch": 2.17, + "learning_rate": 3.7326608491361072e-06, + "loss": 0.3051, + "step": 12737 + }, + { + "epoch": 2.17, + "learning_rate": 3.731226084206625e-06, + "loss": 0.2913, + "step": 12738 + }, + { + "epoch": 2.17, + "learning_rate": 3.7297915318331536e-06, + "loss": 0.3037, + "step": 12739 + }, + { + "epoch": 2.17, + "learning_rate": 3.728357192064329e-06, + "loss": 0.3006, + "step": 12740 + }, + { + "epoch": 2.17, + "learning_rate": 3.7269230649487888e-06, + "loss": 0.3169, + "step": 12741 + }, + { + "epoch": 2.17, + "learning_rate": 3.7254891505351596e-06, + "loss": 0.2923, + "step": 12742 + }, + { + "epoch": 2.17, + "learning_rate": 3.7240554488720647e-06, + "loss": 0.293, + "step": 12743 + }, + { + "epoch": 2.17, + "learning_rate": 3.7226219600081105e-06, + "loss": 0.3149, + "step": 12744 + }, + { + "epoch": 2.17, + "learning_rate": 3.7211886839919054e-06, + "loss": 0.3333, + "step": 12745 + }, + { + "epoch": 2.17, + "learning_rate": 3.7197556208720474e-06, + "loss": 0.3118, + "step": 12746 + }, + { + "epoch": 2.17, + "learning_rate": 3.7183227706971283e-06, + "loss": 0.3232, + "step": 12747 + }, + { + "epoch": 2.17, + "learning_rate": 3.7168901335157313e-06, + "loss": 0.3224, + "step": 12748 + }, + { + "epoch": 2.17, + "learning_rate": 3.7154577093764334e-06, + "loss": 0.3346, + "step": 12749 + }, + { + "epoch": 2.17, + "learning_rate": 3.7140254983278033e-06, + "loss": 0.2864, + "step": 12750 + }, + { + "epoch": 2.17, + "learning_rate": 3.7125935004184067e-06, + "loss": 0.301, + "step": 12751 + }, + { + "epoch": 2.17, + "learning_rate": 3.711161715696793e-06, + "loss": 0.3245, + "step": 12752 + }, + { + "epoch": 2.17, + "learning_rate": 3.709730144211511e-06, + "loss": 0.3189, + "step": 12753 + }, + { + "epoch": 2.18, + "learning_rate": 3.708298786011103e-06, + "loss": 0.2911, + "step": 12754 + }, + { + "epoch": 2.18, + "learning_rate": 3.706867641144105e-06, + "loss": 0.3105, + "step": 12755 + }, + { + "epoch": 2.18, + "learning_rate": 3.705436709659037e-06, + "loss": 0.3149, + "step": 12756 + }, + { + "epoch": 2.18, + "learning_rate": 3.7040059916044193e-06, + "loss": 0.292, + "step": 12757 + }, + { + "epoch": 2.18, + "learning_rate": 3.702575487028768e-06, + "loss": 0.3161, + "step": 12758 + }, + { + "epoch": 2.18, + "learning_rate": 3.701145195980581e-06, + "loss": 0.3065, + "step": 12759 + }, + { + "epoch": 2.18, + "learning_rate": 3.699715118508358e-06, + "loss": 0.3043, + "step": 12760 + }, + { + "epoch": 2.18, + "learning_rate": 3.6982852546605886e-06, + "loss": 0.3148, + "step": 12761 + }, + { + "epoch": 2.18, + "learning_rate": 3.696855604485756e-06, + "loss": 0.3084, + "step": 12762 + }, + { + "epoch": 2.18, + "learning_rate": 3.6954261680323358e-06, + "loss": 0.3187, + "step": 12763 + }, + { + "epoch": 2.18, + "learning_rate": 3.6939969453487946e-06, + "loss": 0.3183, + "step": 12764 + }, + { + "epoch": 2.18, + "learning_rate": 3.6925679364835954e-06, + "loss": 0.3181, + "step": 12765 + }, + { + "epoch": 2.18, + "learning_rate": 3.691139141485193e-06, + "loss": 0.2943, + "step": 12766 + }, + { + "epoch": 2.18, + "learning_rate": 3.689710560402028e-06, + "loss": 0.3391, + "step": 12767 + }, + { + "epoch": 2.18, + "learning_rate": 3.688282193282542e-06, + "loss": 0.3116, + "step": 12768 + }, + { + "epoch": 2.18, + "learning_rate": 3.6868540401751685e-06, + "loss": 0.3123, + "step": 12769 + }, + { + "epoch": 2.18, + "learning_rate": 3.685426101128333e-06, + "loss": 0.3172, + "step": 12770 + }, + { + "epoch": 2.18, + "learning_rate": 3.6839983761904484e-06, + "loss": 0.3201, + "step": 12771 + }, + { + "epoch": 2.18, + "learning_rate": 3.682570865409927e-06, + "loss": 0.3081, + "step": 12772 + }, + { + "epoch": 2.18, + "learning_rate": 3.6811435688351748e-06, + "loss": 0.2878, + "step": 12773 + }, + { + "epoch": 2.18, + "learning_rate": 3.679716486514582e-06, + "loss": 0.2993, + "step": 12774 + }, + { + "epoch": 2.18, + "learning_rate": 3.6782896184965377e-06, + "loss": 0.3215, + "step": 12775 + }, + { + "epoch": 2.18, + "learning_rate": 3.676862964829424e-06, + "loss": 0.3035, + "step": 12776 + }, + { + "epoch": 2.18, + "learning_rate": 3.6754365255616153e-06, + "loss": 0.3022, + "step": 12777 + }, + { + "epoch": 2.18, + "learning_rate": 3.6740103007414774e-06, + "loss": 0.3357, + "step": 12778 + }, + { + "epoch": 2.18, + "learning_rate": 3.67258429041737e-06, + "loss": 0.3047, + "step": 12779 + }, + { + "epoch": 2.18, + "learning_rate": 3.6711584946376444e-06, + "loss": 0.3045, + "step": 12780 + }, + { + "epoch": 2.18, + "learning_rate": 3.6697329134506487e-06, + "loss": 0.3308, + "step": 12781 + }, + { + "epoch": 2.18, + "learning_rate": 3.6683075469047135e-06, + "loss": 0.3613, + "step": 12782 + }, + { + "epoch": 2.18, + "learning_rate": 3.6668823950481726e-06, + "loss": 0.3242, + "step": 12783 + }, + { + "epoch": 2.18, + "learning_rate": 3.6654574579293476e-06, + "loss": 0.308, + "step": 12784 + }, + { + "epoch": 2.18, + "learning_rate": 3.6640327355965554e-06, + "loss": 0.3054, + "step": 12785 + }, + { + "epoch": 2.18, + "learning_rate": 3.662608228098107e-06, + "loss": 0.3056, + "step": 12786 + }, + { + "epoch": 2.18, + "learning_rate": 3.6611839354822964e-06, + "loss": 0.3128, + "step": 12787 + }, + { + "epoch": 2.18, + "learning_rate": 3.6597598577974214e-06, + "loss": 0.2976, + "step": 12788 + }, + { + "epoch": 2.18, + "learning_rate": 3.658335995091771e-06, + "loss": 0.3081, + "step": 12789 + }, + { + "epoch": 2.18, + "learning_rate": 3.656912347413618e-06, + "loss": 0.338, + "step": 12790 + }, + { + "epoch": 2.18, + "learning_rate": 3.655488914811237e-06, + "loss": 0.3153, + "step": 12791 + }, + { + "epoch": 2.18, + "learning_rate": 3.6540656973328927e-06, + "loss": 0.2965, + "step": 12792 + }, + { + "epoch": 2.18, + "learning_rate": 3.6526426950268424e-06, + "loss": 0.2958, + "step": 12793 + }, + { + "epoch": 2.18, + "learning_rate": 3.651219907941336e-06, + "loss": 0.3148, + "step": 12794 + }, + { + "epoch": 2.18, + "learning_rate": 3.6497973361246153e-06, + "loss": 0.3284, + "step": 12795 + }, + { + "epoch": 2.18, + "learning_rate": 3.648374979624921e-06, + "loss": 0.3254, + "step": 12796 + }, + { + "epoch": 2.18, + "learning_rate": 3.646952838490473e-06, + "loss": 0.3118, + "step": 12797 + }, + { + "epoch": 2.18, + "learning_rate": 3.645530912769494e-06, + "loss": 0.327, + "step": 12798 + }, + { + "epoch": 2.18, + "learning_rate": 3.6441092025102e-06, + "loss": 0.3241, + "step": 12799 + }, + { + "epoch": 2.18, + "learning_rate": 3.642687707760796e-06, + "loss": 0.3205, + "step": 12800 + }, + { + "epoch": 2.18, + "learning_rate": 3.6412664285694797e-06, + "loss": 0.3117, + "step": 12801 + }, + { + "epoch": 2.18, + "learning_rate": 3.639845364984448e-06, + "loss": 0.3131, + "step": 12802 + }, + { + "epoch": 2.18, + "learning_rate": 3.638424517053877e-06, + "loss": 0.3256, + "step": 12803 + }, + { + "epoch": 2.18, + "learning_rate": 3.6370038848259505e-06, + "loss": 0.3123, + "step": 12804 + }, + { + "epoch": 2.18, + "learning_rate": 3.635583468348831e-06, + "loss": 0.2972, + "step": 12805 + }, + { + "epoch": 2.18, + "learning_rate": 3.634163267670685e-06, + "loss": 0.3149, + "step": 12806 + }, + { + "epoch": 2.18, + "learning_rate": 3.632743282839666e-06, + "loss": 0.3133, + "step": 12807 + }, + { + "epoch": 2.18, + "learning_rate": 3.6313235139039226e-06, + "loss": 0.3124, + "step": 12808 + }, + { + "epoch": 2.18, + "learning_rate": 3.629903960911596e-06, + "loss": 0.3115, + "step": 12809 + }, + { + "epoch": 2.18, + "learning_rate": 3.628484623910816e-06, + "loss": 0.3149, + "step": 12810 + }, + { + "epoch": 2.18, + "learning_rate": 3.6270655029497158e-06, + "loss": 0.2993, + "step": 12811 + }, + { + "epoch": 2.19, + "learning_rate": 3.625646598076403e-06, + "loss": 0.313, + "step": 12812 + }, + { + "epoch": 2.19, + "learning_rate": 3.624227909338995e-06, + "loss": 0.3064, + "step": 12813 + }, + { + "epoch": 2.19, + "learning_rate": 3.6228094367855936e-06, + "loss": 0.3287, + "step": 12814 + }, + { + "epoch": 2.19, + "learning_rate": 3.6213911804642955e-06, + "loss": 0.3373, + "step": 12815 + }, + { + "epoch": 2.19, + "learning_rate": 3.6199731404231896e-06, + "loss": 0.2919, + "step": 12816 + }, + { + "epoch": 2.19, + "learning_rate": 3.6185553167103617e-06, + "loss": 0.2994, + "step": 12817 + }, + { + "epoch": 2.19, + "learning_rate": 3.617137709373878e-06, + "loss": 0.33, + "step": 12818 + }, + { + "epoch": 2.19, + "learning_rate": 3.6157203184618106e-06, + "loss": 0.3178, + "step": 12819 + }, + { + "epoch": 2.19, + "learning_rate": 3.614303144022221e-06, + "loss": 0.3279, + "step": 12820 + }, + { + "epoch": 2.19, + "learning_rate": 3.612886186103155e-06, + "loss": 0.3218, + "step": 12821 + }, + { + "epoch": 2.19, + "learning_rate": 3.611469444752661e-06, + "loss": 0.3124, + "step": 12822 + }, + { + "epoch": 2.19, + "learning_rate": 3.6100529200187773e-06, + "loss": 0.339, + "step": 12823 + }, + { + "epoch": 2.19, + "learning_rate": 3.608636611949534e-06, + "loss": 0.3047, + "step": 12824 + }, + { + "epoch": 2.19, + "learning_rate": 3.6072205205929534e-06, + "loss": 0.2943, + "step": 12825 + }, + { + "epoch": 2.19, + "learning_rate": 3.605804645997054e-06, + "loss": 0.3298, + "step": 12826 + }, + { + "epoch": 2.19, + "learning_rate": 3.604388988209838e-06, + "loss": 0.3051, + "step": 12827 + }, + { + "epoch": 2.19, + "learning_rate": 3.6029735472793103e-06, + "loss": 0.3052, + "step": 12828 + }, + { + "epoch": 2.19, + "learning_rate": 3.6015583232534633e-06, + "loss": 0.3404, + "step": 12829 + }, + { + "epoch": 2.19, + "learning_rate": 3.6001433161802834e-06, + "loss": 0.3272, + "step": 12830 + }, + { + "epoch": 2.19, + "learning_rate": 3.598728526107751e-06, + "loss": 0.3141, + "step": 12831 + }, + { + "epoch": 2.19, + "learning_rate": 3.597313953083834e-06, + "loss": 0.3077, + "step": 12832 + }, + { + "epoch": 2.19, + "learning_rate": 3.5958995971565036e-06, + "loss": 0.3066, + "step": 12833 + }, + { + "epoch": 2.19, + "learning_rate": 3.5944854583737076e-06, + "loss": 0.3209, + "step": 12834 + }, + { + "epoch": 2.19, + "learning_rate": 3.593071536783402e-06, + "loss": 0.3239, + "step": 12835 + }, + { + "epoch": 2.19, + "learning_rate": 3.591657832433524e-06, + "loss": 0.3219, + "step": 12836 + }, + { + "epoch": 2.19, + "learning_rate": 3.5902443453720094e-06, + "loss": 0.3245, + "step": 12837 + }, + { + "epoch": 2.19, + "learning_rate": 3.588831075646786e-06, + "loss": 0.3105, + "step": 12838 + }, + { + "epoch": 2.19, + "learning_rate": 3.5874180233057755e-06, + "loss": 0.3231, + "step": 12839 + }, + { + "epoch": 2.19, + "learning_rate": 3.5860051883968873e-06, + "loss": 0.312, + "step": 12840 + }, + { + "epoch": 2.19, + "learning_rate": 3.5845925709680328e-06, + "loss": 0.3132, + "step": 12841 + }, + { + "epoch": 2.19, + "learning_rate": 3.583180171067101e-06, + "loss": 0.3151, + "step": 12842 + }, + { + "epoch": 2.19, + "learning_rate": 3.581767988741988e-06, + "loss": 0.307, + "step": 12843 + }, + { + "epoch": 2.19, + "learning_rate": 3.580356024040573e-06, + "loss": 0.3185, + "step": 12844 + }, + { + "epoch": 2.19, + "learning_rate": 3.5789442770107363e-06, + "loss": 0.3232, + "step": 12845 + }, + { + "epoch": 2.19, + "learning_rate": 3.577532747700342e-06, + "loss": 0.3137, + "step": 12846 + }, + { + "epoch": 2.19, + "learning_rate": 3.576121436157254e-06, + "loss": 0.2968, + "step": 12847 + }, + { + "epoch": 2.19, + "learning_rate": 3.574710342429324e-06, + "loss": 0.3183, + "step": 12848 + }, + { + "epoch": 2.19, + "learning_rate": 3.5732994665644027e-06, + "loss": 0.3324, + "step": 12849 + }, + { + "epoch": 2.19, + "learning_rate": 3.571888808610321e-06, + "loss": 0.3429, + "step": 12850 + }, + { + "epoch": 2.19, + "learning_rate": 3.570478368614917e-06, + "loss": 0.3178, + "step": 12851 + }, + { + "epoch": 2.19, + "learning_rate": 3.569068146626009e-06, + "loss": 0.3058, + "step": 12852 + }, + { + "epoch": 2.19, + "learning_rate": 3.5676581426914158e-06, + "loss": 0.334, + "step": 12853 + }, + { + "epoch": 2.19, + "learning_rate": 3.566248356858948e-06, + "loss": 0.3031, + "step": 12854 + }, + { + "epoch": 2.19, + "learning_rate": 3.5648387891764058e-06, + "loss": 0.3079, + "step": 12855 + }, + { + "epoch": 2.19, + "learning_rate": 3.563429439691587e-06, + "loss": 0.311, + "step": 12856 + }, + { + "epoch": 2.19, + "learning_rate": 3.562020308452273e-06, + "loss": 0.3106, + "step": 12857 + }, + { + "epoch": 2.19, + "learning_rate": 3.560611395506246e-06, + "loss": 0.331, + "step": 12858 + }, + { + "epoch": 2.19, + "learning_rate": 3.5592027009012784e-06, + "loss": 0.3311, + "step": 12859 + }, + { + "epoch": 2.19, + "learning_rate": 3.5577942246851347e-06, + "loss": 0.3322, + "step": 12860 + }, + { + "epoch": 2.19, + "learning_rate": 3.5563859669055723e-06, + "loss": 0.3167, + "step": 12861 + }, + { + "epoch": 2.19, + "learning_rate": 3.5549779276103414e-06, + "loss": 0.3007, + "step": 12862 + }, + { + "epoch": 2.19, + "learning_rate": 3.553570106847184e-06, + "loss": 0.3057, + "step": 12863 + }, + { + "epoch": 2.19, + "learning_rate": 3.5521625046638388e-06, + "loss": 0.336, + "step": 12864 + }, + { + "epoch": 2.19, + "learning_rate": 3.550755121108028e-06, + "loss": 0.3084, + "step": 12865 + }, + { + "epoch": 2.19, + "learning_rate": 3.5493479562274736e-06, + "loss": 0.3062, + "step": 12866 + }, + { + "epoch": 2.19, + "learning_rate": 3.547941010069892e-06, + "loss": 0.3281, + "step": 12867 + }, + { + "epoch": 2.19, + "learning_rate": 3.546534282682983e-06, + "loss": 0.3102, + "step": 12868 + }, + { + "epoch": 2.19, + "learning_rate": 3.5451277741144476e-06, + "loss": 0.3359, + "step": 12869 + }, + { + "epoch": 2.19, + "learning_rate": 3.543721484411976e-06, + "loss": 0.3149, + "step": 12870 + }, + { + "epoch": 2.2, + "learning_rate": 3.5423154136232564e-06, + "loss": 0.2993, + "step": 12871 + }, + { + "epoch": 2.2, + "learning_rate": 3.5409095617959544e-06, + "loss": 0.3107, + "step": 12872 + }, + { + "epoch": 2.2, + "learning_rate": 3.5395039289777454e-06, + "loss": 0.3379, + "step": 12873 + }, + { + "epoch": 2.2, + "learning_rate": 3.5380985152162885e-06, + "loss": 0.3249, + "step": 12874 + }, + { + "epoch": 2.2, + "learning_rate": 3.536693320559238e-06, + "loss": 0.343, + "step": 12875 + }, + { + "epoch": 2.2, + "learning_rate": 3.5352883450542387e-06, + "loss": 0.3414, + "step": 12876 + }, + { + "epoch": 2.2, + "learning_rate": 3.5338835887489308e-06, + "loss": 0.3211, + "step": 12877 + }, + { + "epoch": 2.2, + "learning_rate": 3.5324790516909447e-06, + "loss": 0.3056, + "step": 12878 + }, + { + "epoch": 2.2, + "learning_rate": 3.5310747339279073e-06, + "loss": 0.3204, + "step": 12879 + }, + { + "epoch": 2.2, + "learning_rate": 3.5296706355074284e-06, + "loss": 0.3133, + "step": 12880 + }, + { + "epoch": 2.2, + "learning_rate": 3.5282667564771212e-06, + "loss": 0.2973, + "step": 12881 + }, + { + "epoch": 2.2, + "learning_rate": 3.5268630968845896e-06, + "loss": 0.3132, + "step": 12882 + }, + { + "epoch": 2.2, + "learning_rate": 3.5254596567774214e-06, + "loss": 0.3344, + "step": 12883 + }, + { + "epoch": 2.2, + "learning_rate": 3.5240564362032072e-06, + "loss": 0.307, + "step": 12884 + }, + { + "epoch": 2.2, + "learning_rate": 3.522653435209524e-06, + "loss": 0.3094, + "step": 12885 + }, + { + "epoch": 2.2, + "learning_rate": 3.5212506538439486e-06, + "loss": 0.3149, + "step": 12886 + }, + { + "epoch": 2.2, + "learning_rate": 3.5198480921540377e-06, + "loss": 0.3384, + "step": 12887 + }, + { + "epoch": 2.2, + "learning_rate": 3.5184457501873525e-06, + "loss": 0.315, + "step": 12888 + }, + { + "epoch": 2.2, + "learning_rate": 3.517043627991441e-06, + "loss": 0.3312, + "step": 12889 + }, + { + "epoch": 2.2, + "learning_rate": 3.5156417256138464e-06, + "loss": 0.3052, + "step": 12890 + }, + { + "epoch": 2.2, + "learning_rate": 3.514240043102103e-06, + "loss": 0.3, + "step": 12891 + }, + { + "epoch": 2.2, + "learning_rate": 3.5128385805037367e-06, + "loss": 0.3177, + "step": 12892 + }, + { + "epoch": 2.2, + "learning_rate": 3.511437337866268e-06, + "loss": 0.3097, + "step": 12893 + }, + { + "epoch": 2.2, + "learning_rate": 3.510036315237213e-06, + "loss": 0.33, + "step": 12894 + }, + { + "epoch": 2.2, + "learning_rate": 3.508635512664067e-06, + "loss": 0.3102, + "step": 12895 + }, + { + "epoch": 2.2, + "learning_rate": 3.507234930194333e-06, + "loss": 0.2885, + "step": 12896 + }, + { + "epoch": 2.2, + "learning_rate": 3.5058345678755014e-06, + "loss": 0.3057, + "step": 12897 + }, + { + "epoch": 2.2, + "learning_rate": 3.5044344257550546e-06, + "loss": 0.322, + "step": 12898 + }, + { + "epoch": 2.2, + "learning_rate": 3.5030345038804625e-06, + "loss": 0.3227, + "step": 12899 + }, + { + "epoch": 2.2, + "learning_rate": 3.5016348022991965e-06, + "loss": 0.2938, + "step": 12900 + }, + { + "epoch": 2.2, + "learning_rate": 3.500235321058718e-06, + "loss": 0.2999, + "step": 12901 + }, + { + "epoch": 2.2, + "learning_rate": 3.4988360602064742e-06, + "loss": 0.3182, + "step": 12902 + }, + { + "epoch": 2.2, + "learning_rate": 3.497437019789913e-06, + "loss": 0.3334, + "step": 12903 + }, + { + "epoch": 2.2, + "learning_rate": 3.4960381998564717e-06, + "loss": 0.2826, + "step": 12904 + }, + { + "epoch": 2.2, + "learning_rate": 3.49463960045358e-06, + "loss": 0.3169, + "step": 12905 + }, + { + "epoch": 2.2, + "learning_rate": 3.493241221628659e-06, + "loss": 0.3226, + "step": 12906 + }, + { + "epoch": 2.2, + "learning_rate": 3.4918430634291266e-06, + "loss": 0.3257, + "step": 12907 + }, + { + "epoch": 2.2, + "learning_rate": 3.4904451259023884e-06, + "loss": 0.3371, + "step": 12908 + }, + { + "epoch": 2.2, + "learning_rate": 3.4890474090958493e-06, + "loss": 0.3306, + "step": 12909 + }, + { + "epoch": 2.2, + "learning_rate": 3.487649913056893e-06, + "loss": 0.2932, + "step": 12910 + }, + { + "epoch": 2.2, + "learning_rate": 3.48625263783291e-06, + "loss": 0.3258, + "step": 12911 + }, + { + "epoch": 2.2, + "learning_rate": 3.4848555834712762e-06, + "loss": 0.3169, + "step": 12912 + }, + { + "epoch": 2.2, + "learning_rate": 3.4834587500193617e-06, + "loss": 0.3089, + "step": 12913 + }, + { + "epoch": 2.2, + "learning_rate": 3.4820621375245343e-06, + "loss": 0.3128, + "step": 12914 + }, + { + "epoch": 2.2, + "learning_rate": 3.4806657460341397e-06, + "loss": 0.2809, + "step": 12915 + }, + { + "epoch": 2.2, + "learning_rate": 3.479269575595535e-06, + "loss": 0.2949, + "step": 12916 + }, + { + "epoch": 2.2, + "learning_rate": 3.477873626256052e-06, + "loss": 0.3532, + "step": 12917 + }, + { + "epoch": 2.2, + "learning_rate": 3.4764778980630266e-06, + "loss": 0.3102, + "step": 12918 + }, + { + "epoch": 2.2, + "learning_rate": 3.4750823910637843e-06, + "loss": 0.3142, + "step": 12919 + }, + { + "epoch": 2.2, + "learning_rate": 3.473687105305643e-06, + "loss": 0.3095, + "step": 12920 + }, + { + "epoch": 2.2, + "learning_rate": 3.472292040835912e-06, + "loss": 0.3145, + "step": 12921 + }, + { + "epoch": 2.2, + "learning_rate": 3.4708971977018947e-06, + "loss": 0.3268, + "step": 12922 + }, + { + "epoch": 2.2, + "learning_rate": 3.469502575950886e-06, + "loss": 0.323, + "step": 12923 + }, + { + "epoch": 2.2, + "learning_rate": 3.468108175630176e-06, + "loss": 0.2959, + "step": 12924 + }, + { + "epoch": 2.2, + "learning_rate": 3.466713996787039e-06, + "loss": 0.3055, + "step": 12925 + }, + { + "epoch": 2.2, + "learning_rate": 3.4653200394687515e-06, + "loss": 0.3353, + "step": 12926 + }, + { + "epoch": 2.2, + "learning_rate": 3.463926303722577e-06, + "loss": 0.3041, + "step": 12927 + }, + { + "epoch": 2.2, + "learning_rate": 3.462532789595774e-06, + "loss": 0.3004, + "step": 12928 + }, + { + "epoch": 2.2, + "learning_rate": 3.461139497135595e-06, + "loss": 0.3145, + "step": 12929 + }, + { + "epoch": 2.21, + "learning_rate": 3.459746426389278e-06, + "loss": 0.319, + "step": 12930 + }, + { + "epoch": 2.21, + "learning_rate": 3.4583535774040587e-06, + "loss": 0.3357, + "step": 12931 + }, + { + "epoch": 2.21, + "learning_rate": 3.4569609502271697e-06, + "loss": 0.3221, + "step": 12932 + }, + { + "epoch": 2.21, + "learning_rate": 3.455568544905824e-06, + "loss": 0.319, + "step": 12933 + }, + { + "epoch": 2.21, + "learning_rate": 3.454176361487236e-06, + "loss": 0.3254, + "step": 12934 + }, + { + "epoch": 2.21, + "learning_rate": 3.4527844000186127e-06, + "loss": 0.2919, + "step": 12935 + }, + { + "epoch": 2.21, + "learning_rate": 3.4513926605471504e-06, + "loss": 0.3175, + "step": 12936 + }, + { + "epoch": 2.21, + "learning_rate": 3.4500011431200386e-06, + "loss": 0.3226, + "step": 12937 + }, + { + "epoch": 2.21, + "learning_rate": 3.44860984778446e-06, + "loss": 0.3209, + "step": 12938 + }, + { + "epoch": 2.21, + "learning_rate": 3.447218774587593e-06, + "loss": 0.3427, + "step": 12939 + }, + { + "epoch": 2.21, + "learning_rate": 3.445827923576598e-06, + "loss": 0.2854, + "step": 12940 + }, + { + "epoch": 2.21, + "learning_rate": 3.444437294798638e-06, + "loss": 0.3024, + "step": 12941 + }, + { + "epoch": 2.21, + "learning_rate": 3.4430468883008663e-06, + "loss": 0.3037, + "step": 12942 + }, + { + "epoch": 2.21, + "learning_rate": 3.4416567041304262e-06, + "loss": 0.2812, + "step": 12943 + }, + { + "epoch": 2.21, + "learning_rate": 3.4402667423344537e-06, + "loss": 0.3101, + "step": 12944 + }, + { + "epoch": 2.21, + "learning_rate": 3.4388770029600848e-06, + "loss": 0.3022, + "step": 12945 + }, + { + "epoch": 2.21, + "learning_rate": 3.4374874860544327e-06, + "loss": 0.3315, + "step": 12946 + }, + { + "epoch": 2.21, + "learning_rate": 3.4360981916646197e-06, + "loss": 0.3419, + "step": 12947 + }, + { + "epoch": 2.21, + "learning_rate": 3.434709119837745e-06, + "loss": 0.3187, + "step": 12948 + }, + { + "epoch": 2.21, + "learning_rate": 3.4333202706209123e-06, + "loss": 0.3149, + "step": 12949 + }, + { + "epoch": 2.21, + "learning_rate": 3.4319316440612127e-06, + "loss": 0.3195, + "step": 12950 + }, + { + "epoch": 2.21, + "learning_rate": 3.4305432402057305e-06, + "loss": 0.3017, + "step": 12951 + }, + { + "epoch": 2.21, + "learning_rate": 3.4291550591015434e-06, + "loss": 0.2964, + "step": 12952 + }, + { + "epoch": 2.21, + "learning_rate": 3.4277671007957202e-06, + "loss": 0.313, + "step": 12953 + }, + { + "epoch": 2.21, + "learning_rate": 3.4263793653353263e-06, + "loss": 0.3242, + "step": 12954 + }, + { + "epoch": 2.21, + "learning_rate": 3.4249918527674075e-06, + "loss": 0.3077, + "step": 12955 + }, + { + "epoch": 2.21, + "learning_rate": 3.4236045631390145e-06, + "loss": 0.3005, + "step": 12956 + }, + { + "epoch": 2.21, + "learning_rate": 3.4222174964971876e-06, + "loss": 0.3254, + "step": 12957 + }, + { + "epoch": 2.21, + "learning_rate": 3.4208306528889566e-06, + "loss": 0.317, + "step": 12958 + }, + { + "epoch": 2.21, + "learning_rate": 3.4194440323613454e-06, + "loss": 0.3086, + "step": 12959 + }, + { + "epoch": 2.21, + "learning_rate": 3.4180576349613716e-06, + "loss": 0.333, + "step": 12960 + }, + { + "epoch": 2.21, + "learning_rate": 3.4166714607360463e-06, + "loss": 0.3214, + "step": 12961 + }, + { + "epoch": 2.21, + "learning_rate": 3.4152855097323646e-06, + "loss": 0.3024, + "step": 12962 + }, + { + "epoch": 2.21, + "learning_rate": 3.4138997819973254e-06, + "loss": 0.3246, + "step": 12963 + }, + { + "epoch": 2.21, + "learning_rate": 3.4125142775779098e-06, + "loss": 0.3115, + "step": 12964 + }, + { + "epoch": 2.21, + "learning_rate": 3.411128996521098e-06, + "loss": 0.35, + "step": 12965 + }, + { + "epoch": 2.21, + "learning_rate": 3.409743938873862e-06, + "loss": 0.327, + "step": 12966 + }, + { + "epoch": 2.21, + "learning_rate": 3.4083591046831653e-06, + "loss": 0.3263, + "step": 12967 + }, + { + "epoch": 2.21, + "learning_rate": 3.406974493995963e-06, + "loss": 0.314, + "step": 12968 + }, + { + "epoch": 2.21, + "learning_rate": 3.4055901068592056e-06, + "loss": 0.3364, + "step": 12969 + }, + { + "epoch": 2.21, + "learning_rate": 3.4042059433198282e-06, + "loss": 0.2959, + "step": 12970 + }, + { + "epoch": 2.21, + "learning_rate": 3.402822003424768e-06, + "loss": 0.323, + "step": 12971 + }, + { + "epoch": 2.21, + "learning_rate": 3.4014382872209484e-06, + "loss": 0.3318, + "step": 12972 + }, + { + "epoch": 2.21, + "learning_rate": 3.4000547947552887e-06, + "loss": 0.3286, + "step": 12973 + }, + { + "epoch": 2.21, + "learning_rate": 3.3986715260746994e-06, + "loss": 0.32, + "step": 12974 + }, + { + "epoch": 2.21, + "learning_rate": 3.397288481226081e-06, + "loss": 0.3075, + "step": 12975 + }, + { + "epoch": 2.21, + "learning_rate": 3.3959056602563335e-06, + "loss": 0.3151, + "step": 12976 + }, + { + "epoch": 2.21, + "learning_rate": 3.3945230632123382e-06, + "loss": 0.3116, + "step": 12977 + }, + { + "epoch": 2.21, + "learning_rate": 3.393140690140977e-06, + "loss": 0.3188, + "step": 12978 + }, + { + "epoch": 2.21, + "learning_rate": 3.3917585410891262e-06, + "loss": 0.3267, + "step": 12979 + }, + { + "epoch": 2.21, + "learning_rate": 3.3903766161036434e-06, + "loss": 0.3015, + "step": 12980 + }, + { + "epoch": 2.21, + "learning_rate": 3.3889949152313894e-06, + "loss": 0.3179, + "step": 12981 + }, + { + "epoch": 2.21, + "learning_rate": 3.387613438519214e-06, + "loss": 0.3257, + "step": 12982 + }, + { + "epoch": 2.21, + "learning_rate": 3.3862321860139578e-06, + "loss": 0.3303, + "step": 12983 + }, + { + "epoch": 2.21, + "learning_rate": 3.384851157762461e-06, + "loss": 0.3236, + "step": 12984 + }, + { + "epoch": 2.21, + "learning_rate": 3.38347035381154e-06, + "loss": 0.294, + "step": 12985 + }, + { + "epoch": 2.21, + "learning_rate": 3.3820897742080204e-06, + "loss": 0.3238, + "step": 12986 + }, + { + "epoch": 2.21, + "learning_rate": 3.380709418998711e-06, + "loss": 0.3058, + "step": 12987 + }, + { + "epoch": 2.22, + "learning_rate": 3.3793292882304183e-06, + "loss": 0.3135, + "step": 12988 + }, + { + "epoch": 2.22, + "learning_rate": 3.3779493819499366e-06, + "loss": 0.2876, + "step": 12989 + }, + { + "epoch": 2.22, + "learning_rate": 3.3765697002040555e-06, + "loss": 0.3165, + "step": 12990 + }, + { + "epoch": 2.22, + "learning_rate": 3.3751902430395558e-06, + "loss": 0.2815, + "step": 12991 + }, + { + "epoch": 2.22, + "learning_rate": 3.373811010503214e-06, + "loss": 0.3371, + "step": 12992 + }, + { + "epoch": 2.22, + "learning_rate": 3.3724320026417888e-06, + "loss": 0.3061, + "step": 12993 + }, + { + "epoch": 2.22, + "learning_rate": 3.371053219502046e-06, + "loss": 0.295, + "step": 12994 + }, + { + "epoch": 2.22, + "learning_rate": 3.3696746611307298e-06, + "loss": 0.3175, + "step": 12995 + }, + { + "epoch": 2.22, + "learning_rate": 3.368296327574585e-06, + "loss": 0.3246, + "step": 12996 + }, + { + "epoch": 2.22, + "learning_rate": 3.3669182188803486e-06, + "loss": 0.3312, + "step": 12997 + }, + { + "epoch": 2.22, + "learning_rate": 3.365540335094746e-06, + "loss": 0.3049, + "step": 12998 + }, + { + "epoch": 2.22, + "learning_rate": 3.364162676264503e-06, + "loss": 0.3137, + "step": 12999 + }, + { + "epoch": 2.22, + "learning_rate": 3.362785242436324e-06, + "loss": 0.3347, + "step": 13000 + }, + { + "epoch": 2.22, + "learning_rate": 3.3614080336569168e-06, + "loss": 0.2972, + "step": 13001 + }, + { + "epoch": 2.22, + "learning_rate": 3.3600310499729794e-06, + "loss": 0.3398, + "step": 13002 + }, + { + "epoch": 2.22, + "learning_rate": 3.3586542914312016e-06, + "loss": 0.3146, + "step": 13003 + }, + { + "epoch": 2.22, + "learning_rate": 3.3572777580782646e-06, + "loss": 0.319, + "step": 13004 + }, + { + "epoch": 2.22, + "learning_rate": 3.3559014499608432e-06, + "loss": 0.3021, + "step": 13005 + }, + { + "epoch": 2.22, + "learning_rate": 3.354525367125604e-06, + "loss": 0.3316, + "step": 13006 + }, + { + "epoch": 2.22, + "learning_rate": 3.353149509619209e-06, + "loss": 0.3182, + "step": 13007 + }, + { + "epoch": 2.22, + "learning_rate": 3.3517738774883025e-06, + "loss": 0.313, + "step": 13008 + }, + { + "epoch": 2.22, + "learning_rate": 3.3503984707795324e-06, + "loss": 0.3266, + "step": 13009 + }, + { + "epoch": 2.22, + "learning_rate": 3.349023289539537e-06, + "loss": 0.3109, + "step": 13010 + }, + { + "epoch": 2.22, + "learning_rate": 3.347648333814939e-06, + "loss": 0.3252, + "step": 13011 + }, + { + "epoch": 2.22, + "learning_rate": 3.3462736036523637e-06, + "loss": 0.3154, + "step": 13012 + }, + { + "epoch": 2.22, + "learning_rate": 3.3448990990984207e-06, + "loss": 0.3314, + "step": 13013 + }, + { + "epoch": 2.22, + "learning_rate": 3.3435248201997216e-06, + "loss": 0.304, + "step": 13014 + }, + { + "epoch": 2.22, + "learning_rate": 3.3421507670028577e-06, + "loss": 0.3164, + "step": 13015 + }, + { + "epoch": 2.22, + "learning_rate": 3.340776939554421e-06, + "loss": 0.3006, + "step": 13016 + }, + { + "epoch": 2.22, + "learning_rate": 3.3394033379009948e-06, + "loss": 0.3238, + "step": 13017 + }, + { + "epoch": 2.22, + "learning_rate": 3.338029962089154e-06, + "loss": 0.3299, + "step": 13018 + }, + { + "epoch": 2.22, + "learning_rate": 3.336656812165465e-06, + "loss": 0.3257, + "step": 13019 + }, + { + "epoch": 2.22, + "learning_rate": 3.335283888176489e-06, + "loss": 0.3211, + "step": 13020 + }, + { + "epoch": 2.22, + "learning_rate": 3.333911190168777e-06, + "loss": 0.3432, + "step": 13021 + }, + { + "epoch": 2.22, + "learning_rate": 3.3325387181888756e-06, + "loss": 0.3214, + "step": 13022 + }, + { + "epoch": 2.22, + "learning_rate": 3.331166472283316e-06, + "loss": 0.3325, + "step": 13023 + }, + { + "epoch": 2.22, + "learning_rate": 3.3297944524986293e-06, + "loss": 0.3101, + "step": 13024 + }, + { + "epoch": 2.22, + "learning_rate": 3.3284226588813386e-06, + "loss": 0.3434, + "step": 13025 + }, + { + "epoch": 2.22, + "learning_rate": 3.3270510914779587e-06, + "loss": 0.3337, + "step": 13026 + }, + { + "epoch": 2.22, + "learning_rate": 3.325679750334989e-06, + "loss": 0.2896, + "step": 13027 + }, + { + "epoch": 2.22, + "learning_rate": 3.324308635498933e-06, + "loss": 0.3057, + "step": 13028 + }, + { + "epoch": 2.22, + "learning_rate": 3.3229377470162827e-06, + "loss": 0.2849, + "step": 13029 + }, + { + "epoch": 2.22, + "learning_rate": 3.3215670849335156e-06, + "loss": 0.3163, + "step": 13030 + }, + { + "epoch": 2.22, + "learning_rate": 3.3201966492971095e-06, + "loss": 0.3166, + "step": 13031 + }, + { + "epoch": 2.22, + "learning_rate": 3.318826440153532e-06, + "loss": 0.3129, + "step": 13032 + }, + { + "epoch": 2.22, + "learning_rate": 3.317456457549242e-06, + "loss": 0.3194, + "step": 13033 + }, + { + "epoch": 2.22, + "learning_rate": 3.316086701530693e-06, + "loss": 0.3, + "step": 13034 + }, + { + "epoch": 2.22, + "learning_rate": 3.3147171721443296e-06, + "loss": 0.3036, + "step": 13035 + }, + { + "epoch": 2.22, + "learning_rate": 3.3133478694365875e-06, + "loss": 0.3246, + "step": 13036 + }, + { + "epoch": 2.22, + "learning_rate": 3.3119787934538994e-06, + "loss": 0.2977, + "step": 13037 + }, + { + "epoch": 2.22, + "learning_rate": 3.310609944242681e-06, + "loss": 0.2925, + "step": 13038 + }, + { + "epoch": 2.22, + "learning_rate": 3.309241321849348e-06, + "loss": 0.3242, + "step": 13039 + }, + { + "epoch": 2.22, + "learning_rate": 3.3078729263203067e-06, + "loss": 0.3474, + "step": 13040 + }, + { + "epoch": 2.22, + "learning_rate": 3.3065047577019593e-06, + "loss": 0.3008, + "step": 13041 + }, + { + "epoch": 2.22, + "learning_rate": 3.3051368160406907e-06, + "loss": 0.325, + "step": 13042 + }, + { + "epoch": 2.22, + "learning_rate": 3.3037691013828852e-06, + "loss": 0.3197, + "step": 13043 + }, + { + "epoch": 2.22, + "learning_rate": 3.3024016137749227e-06, + "loss": 0.3153, + "step": 13044 + }, + { + "epoch": 2.22, + "learning_rate": 3.3010343532631628e-06, + "loss": 0.3258, + "step": 13045 + }, + { + "epoch": 2.22, + "learning_rate": 3.29966731989397e-06, + "loss": 0.3375, + "step": 13046 + }, + { + "epoch": 2.23, + "learning_rate": 3.298300513713697e-06, + "loss": 0.3244, + "step": 13047 + }, + { + "epoch": 2.23, + "learning_rate": 3.2969339347686856e-06, + "loss": 0.3407, + "step": 13048 + }, + { + "epoch": 2.23, + "learning_rate": 3.2955675831052757e-06, + "loss": 0.2929, + "step": 13049 + }, + { + "epoch": 2.23, + "learning_rate": 3.2942014587697934e-06, + "loss": 0.3112, + "step": 13050 + }, + { + "epoch": 2.23, + "learning_rate": 3.2928355618085626e-06, + "loss": 0.311, + "step": 13051 + }, + { + "epoch": 2.23, + "learning_rate": 3.291469892267899e-06, + "loss": 0.3182, + "step": 13052 + }, + { + "epoch": 2.23, + "learning_rate": 3.2901044501941013e-06, + "loss": 0.3274, + "step": 13053 + }, + { + "epoch": 2.23, + "learning_rate": 3.2887392356334723e-06, + "loss": 0.3054, + "step": 13054 + }, + { + "epoch": 2.23, + "learning_rate": 3.2873742486323015e-06, + "loss": 0.3317, + "step": 13055 + }, + { + "epoch": 2.23, + "learning_rate": 3.2860094892368723e-06, + "loss": 0.3162, + "step": 13056 + }, + { + "epoch": 2.23, + "learning_rate": 3.2846449574934625e-06, + "loss": 0.3305, + "step": 13057 + }, + { + "epoch": 2.23, + "learning_rate": 3.283280653448333e-06, + "loss": 0.3087, + "step": 13058 + }, + { + "epoch": 2.23, + "learning_rate": 3.2819165771477503e-06, + "loss": 0.2989, + "step": 13059 + }, + { + "epoch": 2.23, + "learning_rate": 3.280552728637959e-06, + "loss": 0.3184, + "step": 13060 + }, + { + "epoch": 2.23, + "learning_rate": 3.2791891079652083e-06, + "loss": 0.2961, + "step": 13061 + }, + { + "epoch": 2.23, + "learning_rate": 3.277825715175732e-06, + "loss": 0.2907, + "step": 13062 + }, + { + "epoch": 2.23, + "learning_rate": 3.276462550315761e-06, + "loss": 0.3177, + "step": 13063 + }, + { + "epoch": 2.23, + "learning_rate": 3.275099613431515e-06, + "loss": 0.324, + "step": 13064 + }, + { + "epoch": 2.23, + "learning_rate": 3.273736904569208e-06, + "loss": 0.3187, + "step": 13065 + }, + { + "epoch": 2.23, + "learning_rate": 3.2723744237750456e-06, + "loss": 0.3305, + "step": 13066 + }, + { + "epoch": 2.23, + "learning_rate": 3.271012171095228e-06, + "loss": 0.3006, + "step": 13067 + }, + { + "epoch": 2.23, + "learning_rate": 3.26965014657594e-06, + "loss": 0.3379, + "step": 13068 + }, + { + "epoch": 2.23, + "learning_rate": 3.2682883502633654e-06, + "loss": 0.3222, + "step": 13069 + }, + { + "epoch": 2.23, + "learning_rate": 3.266926782203681e-06, + "loss": 0.3381, + "step": 13070 + }, + { + "epoch": 2.23, + "learning_rate": 3.2655654424430515e-06, + "loss": 0.3018, + "step": 13071 + }, + { + "epoch": 2.23, + "learning_rate": 3.2642043310276405e-06, + "loss": 0.3339, + "step": 13072 + }, + { + "epoch": 2.23, + "learning_rate": 3.262843448003592e-06, + "loss": 0.317, + "step": 13073 + }, + { + "epoch": 2.23, + "learning_rate": 3.2614827934170534e-06, + "loss": 0.3231, + "step": 13074 + }, + { + "epoch": 2.23, + "learning_rate": 3.260122367314165e-06, + "loss": 0.302, + "step": 13075 + }, + { + "epoch": 2.23, + "learning_rate": 3.2587621697410453e-06, + "loss": 0.3113, + "step": 13076 + }, + { + "epoch": 2.23, + "learning_rate": 3.257402200743821e-06, + "loss": 0.3142, + "step": 13077 + }, + { + "epoch": 2.23, + "learning_rate": 3.2560424603686034e-06, + "loss": 0.328, + "step": 13078 + }, + { + "epoch": 2.23, + "learning_rate": 3.2546829486614983e-06, + "loss": 0.2954, + "step": 13079 + }, + { + "epoch": 2.23, + "learning_rate": 3.253323665668601e-06, + "loss": 0.3343, + "step": 13080 + }, + { + "epoch": 2.23, + "learning_rate": 3.2519646114360026e-06, + "loss": 0.3187, + "step": 13081 + }, + { + "epoch": 2.23, + "learning_rate": 3.2506057860097873e-06, + "loss": 0.3215, + "step": 13082 + }, + { + "epoch": 2.23, + "learning_rate": 3.249247189436021e-06, + "loss": 0.3081, + "step": 13083 + }, + { + "epoch": 2.23, + "learning_rate": 3.247888821760776e-06, + "loss": 0.295, + "step": 13084 + }, + { + "epoch": 2.23, + "learning_rate": 3.246530683030108e-06, + "loss": 0.3211, + "step": 13085 + }, + { + "epoch": 2.23, + "learning_rate": 3.2451727732900695e-06, + "loss": 0.3292, + "step": 13086 + }, + { + "epoch": 2.23, + "learning_rate": 3.243815092586702e-06, + "loss": 0.3336, + "step": 13087 + }, + { + "epoch": 2.23, + "learning_rate": 3.2424576409660436e-06, + "loss": 0.2973, + "step": 13088 + }, + { + "epoch": 2.23, + "learning_rate": 3.2411004184741168e-06, + "loss": 0.3321, + "step": 13089 + }, + { + "epoch": 2.23, + "learning_rate": 3.239743425156945e-06, + "loss": 0.3228, + "step": 13090 + }, + { + "epoch": 2.23, + "learning_rate": 3.238386661060536e-06, + "loss": 0.3038, + "step": 13091 + }, + { + "epoch": 2.23, + "learning_rate": 3.2370301262308956e-06, + "loss": 0.3113, + "step": 13092 + }, + { + "epoch": 2.23, + "learning_rate": 3.2356738207140205e-06, + "loss": 0.3268, + "step": 13093 + }, + { + "epoch": 2.23, + "learning_rate": 3.234317744555898e-06, + "loss": 0.3102, + "step": 13094 + }, + { + "epoch": 2.23, + "learning_rate": 3.2329618978025103e-06, + "loss": 0.3006, + "step": 13095 + }, + { + "epoch": 2.23, + "learning_rate": 3.2316062804998295e-06, + "loss": 0.2856, + "step": 13096 + }, + { + "epoch": 2.23, + "learning_rate": 3.2302508926938237e-06, + "loss": 0.323, + "step": 13097 + }, + { + "epoch": 2.23, + "learning_rate": 3.2288957344304428e-06, + "loss": 0.3074, + "step": 13098 + }, + { + "epoch": 2.23, + "learning_rate": 3.2275408057556424e-06, + "loss": 0.3375, + "step": 13099 + }, + { + "epoch": 2.23, + "learning_rate": 3.226186106715362e-06, + "loss": 0.3116, + "step": 13100 + }, + { + "epoch": 2.23, + "learning_rate": 3.2248316373555356e-06, + "loss": 0.3269, + "step": 13101 + }, + { + "epoch": 2.23, + "learning_rate": 3.2234773977220903e-06, + "loss": 0.3358, + "step": 13102 + }, + { + "epoch": 2.23, + "learning_rate": 3.2221233878609428e-06, + "loss": 0.3247, + "step": 13103 + }, + { + "epoch": 2.23, + "learning_rate": 3.220769607818008e-06, + "loss": 0.3237, + "step": 13104 + }, + { + "epoch": 2.24, + "learning_rate": 3.2194160576391844e-06, + "loss": 0.3022, + "step": 13105 + }, + { + "epoch": 2.24, + "learning_rate": 3.218062737370369e-06, + "loss": 0.3065, + "step": 13106 + }, + { + "epoch": 2.24, + "learning_rate": 3.2167096470574454e-06, + "loss": 0.3003, + "step": 13107 + }, + { + "epoch": 2.24, + "learning_rate": 3.2153567867462952e-06, + "loss": 0.305, + "step": 13108 + }, + { + "epoch": 2.24, + "learning_rate": 3.2140041564827917e-06, + "loss": 0.2994, + "step": 13109 + }, + { + "epoch": 2.24, + "learning_rate": 3.212651756312796e-06, + "loss": 0.2848, + "step": 13110 + }, + { + "epoch": 2.24, + "learning_rate": 3.211299586282166e-06, + "loss": 0.3262, + "step": 13111 + }, + { + "epoch": 2.24, + "learning_rate": 3.209947646436752e-06, + "loss": 0.3205, + "step": 13112 + }, + { + "epoch": 2.24, + "learning_rate": 3.2085959368223883e-06, + "loss": 0.305, + "step": 13113 + }, + { + "epoch": 2.24, + "learning_rate": 3.2072444574849117e-06, + "loss": 0.3246, + "step": 13114 + }, + { + "epoch": 2.24, + "learning_rate": 3.2058932084701457e-06, + "loss": 0.3387, + "step": 13115 + }, + { + "epoch": 2.24, + "learning_rate": 3.204542189823908e-06, + "loss": 0.3288, + "step": 13116 + }, + { + "epoch": 2.24, + "learning_rate": 3.2031914015920063e-06, + "loss": 0.3242, + "step": 13117 + }, + { + "epoch": 2.24, + "learning_rate": 3.2018408438202443e-06, + "loss": 0.3227, + "step": 13118 + }, + { + "epoch": 2.24, + "learning_rate": 3.2004905165544165e-06, + "loss": 0.3262, + "step": 13119 + }, + { + "epoch": 2.24, + "learning_rate": 3.199140419840303e-06, + "loss": 0.3297, + "step": 13120 + }, + { + "epoch": 2.24, + "learning_rate": 3.197790553723685e-06, + "loss": 0.3155, + "step": 13121 + }, + { + "epoch": 2.24, + "learning_rate": 3.1964409182503376e-06, + "loss": 0.3171, + "step": 13122 + }, + { + "epoch": 2.24, + "learning_rate": 3.195091513466013e-06, + "loss": 0.3071, + "step": 13123 + }, + { + "epoch": 2.24, + "learning_rate": 3.19374233941647e-06, + "loss": 0.311, + "step": 13124 + }, + { + "epoch": 2.24, + "learning_rate": 3.1923933961474573e-06, + "loss": 0.3208, + "step": 13125 + }, + { + "epoch": 2.24, + "learning_rate": 3.1910446837047115e-06, + "loss": 0.3238, + "step": 13126 + }, + { + "epoch": 2.24, + "learning_rate": 3.1896962021339673e-06, + "loss": 0.335, + "step": 13127 + }, + { + "epoch": 2.24, + "learning_rate": 3.1883479514809424e-06, + "loss": 0.3209, + "step": 13128 + }, + { + "epoch": 2.24, + "learning_rate": 3.1869999317913537e-06, + "loss": 0.3199, + "step": 13129 + }, + { + "epoch": 2.24, + "learning_rate": 3.1856521431109088e-06, + "loss": 0.3173, + "step": 13130 + }, + { + "epoch": 2.24, + "learning_rate": 3.184304585485308e-06, + "loss": 0.2996, + "step": 13131 + }, + { + "epoch": 2.24, + "learning_rate": 3.182957258960244e-06, + "loss": 0.3098, + "step": 13132 + }, + { + "epoch": 2.24, + "learning_rate": 3.181610163581399e-06, + "loss": 0.3043, + "step": 13133 + }, + { + "epoch": 2.24, + "learning_rate": 3.1802632993944525e-06, + "loss": 0.3096, + "step": 13134 + }, + { + "epoch": 2.24, + "learning_rate": 3.178916666445068e-06, + "loss": 0.3088, + "step": 13135 + }, + { + "epoch": 2.24, + "learning_rate": 3.1775702647789074e-06, + "loss": 0.3079, + "step": 13136 + }, + { + "epoch": 2.24, + "learning_rate": 3.1762240944416266e-06, + "loss": 0.2939, + "step": 13137 + }, + { + "epoch": 2.24, + "learning_rate": 3.1748781554788654e-06, + "loss": 0.3096, + "step": 13138 + }, + { + "epoch": 2.24, + "learning_rate": 3.1735324479362626e-06, + "loss": 0.3352, + "step": 13139 + }, + { + "epoch": 2.24, + "learning_rate": 3.1721869718594476e-06, + "loss": 0.3036, + "step": 13140 + }, + { + "epoch": 2.24, + "learning_rate": 3.1708417272940416e-06, + "loss": 0.3268, + "step": 13141 + }, + { + "epoch": 2.24, + "learning_rate": 3.1694967142856615e-06, + "loss": 0.311, + "step": 13142 + }, + { + "epoch": 2.24, + "learning_rate": 3.168151932879906e-06, + "loss": 0.2998, + "step": 13143 + }, + { + "epoch": 2.24, + "learning_rate": 3.1668073831223756e-06, + "loss": 0.3253, + "step": 13144 + }, + { + "epoch": 2.24, + "learning_rate": 3.1654630650586606e-06, + "loss": 0.3268, + "step": 13145 + }, + { + "epoch": 2.24, + "learning_rate": 3.1641189787343428e-06, + "loss": 0.3232, + "step": 13146 + }, + { + "epoch": 2.24, + "learning_rate": 3.1627751241949967e-06, + "loss": 0.3072, + "step": 13147 + }, + { + "epoch": 2.24, + "learning_rate": 3.1614315014861886e-06, + "loss": 0.3146, + "step": 13148 + }, + { + "epoch": 2.24, + "learning_rate": 3.1600881106534786e-06, + "loss": 0.3048, + "step": 13149 + }, + { + "epoch": 2.24, + "learning_rate": 3.1587449517424117e-06, + "loss": 0.3303, + "step": 13150 + }, + { + "epoch": 2.24, + "learning_rate": 3.157402024798535e-06, + "loss": 0.3212, + "step": 13151 + }, + { + "epoch": 2.24, + "learning_rate": 3.1560593298673815e-06, + "loss": 0.2951, + "step": 13152 + }, + { + "epoch": 2.24, + "learning_rate": 3.1547168669944828e-06, + "loss": 0.3372, + "step": 13153 + }, + { + "epoch": 2.24, + "learning_rate": 3.1533746362253494e-06, + "loss": 0.2907, + "step": 13154 + }, + { + "epoch": 2.24, + "learning_rate": 3.152032637605498e-06, + "loss": 0.3112, + "step": 13155 + }, + { + "epoch": 2.24, + "learning_rate": 3.1506908711804297e-06, + "loss": 0.3082, + "step": 13156 + }, + { + "epoch": 2.24, + "learning_rate": 3.149349336995645e-06, + "loss": 0.3035, + "step": 13157 + }, + { + "epoch": 2.24, + "learning_rate": 3.1480080350966247e-06, + "loss": 0.3029, + "step": 13158 + }, + { + "epoch": 2.24, + "learning_rate": 3.1466669655288507e-06, + "loss": 0.3156, + "step": 13159 + }, + { + "epoch": 2.24, + "learning_rate": 3.1453261283377956e-06, + "loss": 0.295, + "step": 13160 + }, + { + "epoch": 2.24, + "learning_rate": 3.1439855235689232e-06, + "loss": 0.3391, + "step": 13161 + }, + { + "epoch": 2.24, + "learning_rate": 3.1426451512676893e-06, + "loss": 0.3229, + "step": 13162 + }, + { + "epoch": 2.24, + "learning_rate": 3.1413050114795417e-06, + "loss": 0.3256, + "step": 13163 + }, + { + "epoch": 2.25, + "learning_rate": 3.1399651042499247e-06, + "loss": 0.3174, + "step": 13164 + }, + { + "epoch": 2.25, + "learning_rate": 3.1386254296242634e-06, + "loss": 0.324, + "step": 13165 + }, + { + "epoch": 2.25, + "learning_rate": 3.1372859876479867e-06, + "loss": 0.2982, + "step": 13166 + }, + { + "epoch": 2.25, + "learning_rate": 3.135946778366511e-06, + "loss": 0.3066, + "step": 13167 + }, + { + "epoch": 2.25, + "learning_rate": 3.1346078018252433e-06, + "loss": 0.3239, + "step": 13168 + }, + { + "epoch": 2.25, + "learning_rate": 3.13326905806959e-06, + "loss": 0.3393, + "step": 13169 + }, + { + "epoch": 2.25, + "learning_rate": 3.1319305471449366e-06, + "loss": 0.3328, + "step": 13170 + }, + { + "epoch": 2.25, + "learning_rate": 3.1305922690966705e-06, + "loss": 0.3068, + "step": 13171 + }, + { + "epoch": 2.25, + "learning_rate": 3.129254223970173e-06, + "loss": 0.3228, + "step": 13172 + }, + { + "epoch": 2.25, + "learning_rate": 3.1279164118108075e-06, + "loss": 0.3178, + "step": 13173 + }, + { + "epoch": 2.25, + "learning_rate": 3.1265788326639368e-06, + "loss": 0.3023, + "step": 13174 + }, + { + "epoch": 2.25, + "learning_rate": 3.125241486574915e-06, + "loss": 0.3175, + "step": 13175 + }, + { + "epoch": 2.25, + "learning_rate": 3.1239043735890885e-06, + "loss": 0.3045, + "step": 13176 + }, + { + "epoch": 2.25, + "learning_rate": 3.1225674937517948e-06, + "loss": 0.3148, + "step": 13177 + }, + { + "epoch": 2.25, + "learning_rate": 3.121230847108363e-06, + "loss": 0.3284, + "step": 13178 + }, + { + "epoch": 2.25, + "learning_rate": 3.1198944337041148e-06, + "loss": 0.3288, + "step": 13179 + }, + { + "epoch": 2.25, + "learning_rate": 3.118558253584368e-06, + "loss": 0.3048, + "step": 13180 + }, + { + "epoch": 2.25, + "learning_rate": 3.117222306794422e-06, + "loss": 0.3103, + "step": 13181 + }, + { + "epoch": 2.25, + "learning_rate": 3.115886593379578e-06, + "loss": 0.3179, + "step": 13182 + }, + { + "epoch": 2.25, + "learning_rate": 3.1145511133851258e-06, + "loss": 0.3135, + "step": 13183 + }, + { + "epoch": 2.25, + "learning_rate": 3.1132158668563506e-06, + "loss": 0.2995, + "step": 13184 + }, + { + "epoch": 2.25, + "learning_rate": 3.1118808538385225e-06, + "loss": 0.3083, + "step": 13185 + }, + { + "epoch": 2.25, + "learning_rate": 3.110546074376908e-06, + "loss": 0.3146, + "step": 13186 + }, + { + "epoch": 2.25, + "learning_rate": 3.109211528516771e-06, + "loss": 0.3389, + "step": 13187 + }, + { + "epoch": 2.25, + "learning_rate": 3.1078772163033554e-06, + "loss": 0.2995, + "step": 13188 + }, + { + "epoch": 2.25, + "learning_rate": 3.106543137781908e-06, + "loss": 0.3143, + "step": 13189 + }, + { + "epoch": 2.25, + "learning_rate": 3.1052092929976606e-06, + "loss": 0.2979, + "step": 13190 + }, + { + "epoch": 2.25, + "learning_rate": 3.1038756819958416e-06, + "loss": 0.3106, + "step": 13191 + }, + { + "epoch": 2.25, + "learning_rate": 3.1025423048216707e-06, + "loss": 0.301, + "step": 13192 + }, + { + "epoch": 2.25, + "learning_rate": 3.1012091615203587e-06, + "loss": 0.3394, + "step": 13193 + }, + { + "epoch": 2.25, + "learning_rate": 3.099876252137107e-06, + "loss": 0.3247, + "step": 13194 + }, + { + "epoch": 2.25, + "learning_rate": 3.0985435767171156e-06, + "loss": 0.3198, + "step": 13195 + }, + { + "epoch": 2.25, + "learning_rate": 3.0972111353055655e-06, + "loss": 0.3371, + "step": 13196 + }, + { + "epoch": 2.25, + "learning_rate": 3.0958789279476374e-06, + "loss": 0.3142, + "step": 13197 + }, + { + "epoch": 2.25, + "learning_rate": 3.0945469546885044e-06, + "loss": 0.308, + "step": 13198 + }, + { + "epoch": 2.25, + "learning_rate": 3.0932152155733288e-06, + "loss": 0.3131, + "step": 13199 + }, + { + "epoch": 2.25, + "learning_rate": 3.0918837106472687e-06, + "loss": 0.3276, + "step": 13200 + }, + { + "epoch": 2.25, + "learning_rate": 3.0905524399554663e-06, + "loss": 0.3026, + "step": 13201 + }, + { + "epoch": 2.25, + "learning_rate": 3.089221403543068e-06, + "loss": 0.3293, + "step": 13202 + }, + { + "epoch": 2.25, + "learning_rate": 3.087890601455197e-06, + "loss": 0.3019, + "step": 13203 + }, + { + "epoch": 2.25, + "learning_rate": 3.0865600337369818e-06, + "loss": 0.3025, + "step": 13204 + }, + { + "epoch": 2.25, + "learning_rate": 3.0852297004335376e-06, + "loss": 0.2985, + "step": 13205 + }, + { + "epoch": 2.25, + "learning_rate": 3.0838996015899724e-06, + "loss": 0.2929, + "step": 13206 + }, + { + "epoch": 2.25, + "learning_rate": 3.082569737251385e-06, + "loss": 0.3124, + "step": 13207 + }, + { + "epoch": 2.25, + "learning_rate": 3.0812401074628694e-06, + "loss": 0.335, + "step": 13208 + }, + { + "epoch": 2.25, + "learning_rate": 3.0799107122695084e-06, + "loss": 0.3198, + "step": 13209 + }, + { + "epoch": 2.25, + "learning_rate": 3.0785815517163795e-06, + "loss": 0.3315, + "step": 13210 + }, + { + "epoch": 2.25, + "learning_rate": 3.077252625848546e-06, + "loss": 0.3197, + "step": 13211 + }, + { + "epoch": 2.25, + "learning_rate": 3.0759239347110713e-06, + "loss": 0.3251, + "step": 13212 + }, + { + "epoch": 2.25, + "learning_rate": 3.0745954783490073e-06, + "loss": 0.3097, + "step": 13213 + }, + { + "epoch": 2.25, + "learning_rate": 3.0732672568073974e-06, + "loss": 0.328, + "step": 13214 + }, + { + "epoch": 2.25, + "learning_rate": 3.071939270131278e-06, + "loss": 0.3309, + "step": 13215 + }, + { + "epoch": 2.25, + "learning_rate": 3.0706115183656806e-06, + "loss": 0.3103, + "step": 13216 + }, + { + "epoch": 2.25, + "learning_rate": 3.069284001555619e-06, + "loss": 0.335, + "step": 13217 + }, + { + "epoch": 2.25, + "learning_rate": 3.0679567197461135e-06, + "loss": 0.3106, + "step": 13218 + }, + { + "epoch": 2.25, + "learning_rate": 3.066629672982159e-06, + "loss": 0.3225, + "step": 13219 + }, + { + "epoch": 2.25, + "learning_rate": 3.0653028613087564e-06, + "loss": 0.2922, + "step": 13220 + }, + { + "epoch": 2.25, + "learning_rate": 3.0639762847708954e-06, + "loss": 0.3233, + "step": 13221 + }, + { + "epoch": 2.25, + "learning_rate": 3.062649943413554e-06, + "loss": 0.2971, + "step": 13222 + }, + { + "epoch": 2.26, + "learning_rate": 3.0613238372817056e-06, + "loss": 0.3033, + "step": 13223 + }, + { + "epoch": 2.26, + "learning_rate": 3.059997966420315e-06, + "loss": 0.286, + "step": 13224 + }, + { + "epoch": 2.26, + "learning_rate": 3.0586723308743415e-06, + "loss": 0.3198, + "step": 13225 + }, + { + "epoch": 2.26, + "learning_rate": 3.057346930688728e-06, + "loss": 0.3154, + "step": 13226 + }, + { + "epoch": 2.26, + "learning_rate": 3.0560217659084167e-06, + "loss": 0.3142, + "step": 13227 + }, + { + "epoch": 2.26, + "learning_rate": 3.0546968365783415e-06, + "loss": 0.3165, + "step": 13228 + }, + { + "epoch": 2.26, + "learning_rate": 3.053372142743426e-06, + "loss": 0.3093, + "step": 13229 + }, + { + "epoch": 2.26, + "learning_rate": 3.052047684448587e-06, + "loss": 0.3183, + "step": 13230 + }, + { + "epoch": 2.26, + "learning_rate": 3.050723461738736e-06, + "loss": 0.3149, + "step": 13231 + }, + { + "epoch": 2.26, + "learning_rate": 3.0493994746587676e-06, + "loss": 0.3177, + "step": 13232 + }, + { + "epoch": 2.26, + "learning_rate": 3.0480757232535773e-06, + "loss": 0.3073, + "step": 13233 + }, + { + "epoch": 2.26, + "learning_rate": 3.0467522075680535e-06, + "loss": 0.3155, + "step": 13234 + }, + { + "epoch": 2.26, + "learning_rate": 3.045428927647065e-06, + "loss": 0.3086, + "step": 13235 + }, + { + "epoch": 2.26, + "learning_rate": 3.0441058835354854e-06, + "loss": 0.2989, + "step": 13236 + }, + { + "epoch": 2.26, + "learning_rate": 3.042783075278174e-06, + "loss": 0.3149, + "step": 13237 + }, + { + "epoch": 2.26, + "learning_rate": 3.0414605029199838e-06, + "loss": 0.2997, + "step": 13238 + }, + { + "epoch": 2.26, + "learning_rate": 3.0401381665057596e-06, + "loss": 0.3341, + "step": 13239 + }, + { + "epoch": 2.26, + "learning_rate": 3.038816066080341e-06, + "loss": 0.2839, + "step": 13240 + }, + { + "epoch": 2.26, + "learning_rate": 3.03749420168855e-06, + "loss": 0.3219, + "step": 13241 + }, + { + "epoch": 2.26, + "learning_rate": 3.0361725733752112e-06, + "loss": 0.3112, + "step": 13242 + }, + { + "epoch": 2.26, + "learning_rate": 3.0348511811851353e-06, + "loss": 0.2829, + "step": 13243 + }, + { + "epoch": 2.26, + "learning_rate": 3.0335300251631295e-06, + "loss": 0.3154, + "step": 13244 + }, + { + "epoch": 2.26, + "learning_rate": 3.0322091053539893e-06, + "loss": 0.32, + "step": 13245 + }, + { + "epoch": 2.26, + "learning_rate": 3.0308884218025013e-06, + "loss": 0.3072, + "step": 13246 + }, + { + "epoch": 2.26, + "learning_rate": 3.0295679745534524e-06, + "loss": 0.3411, + "step": 13247 + }, + { + "epoch": 2.26, + "learning_rate": 3.0282477636516074e-06, + "loss": 0.2924, + "step": 13248 + }, + { + "epoch": 2.26, + "learning_rate": 3.026927789141737e-06, + "loss": 0.3053, + "step": 13249 + }, + { + "epoch": 2.26, + "learning_rate": 3.0256080510685914e-06, + "loss": 0.2933, + "step": 13250 + }, + { + "epoch": 2.26, + "learning_rate": 3.0242885494769226e-06, + "loss": 0.3208, + "step": 13251 + }, + { + "epoch": 2.26, + "learning_rate": 3.022969284411471e-06, + "loss": 0.3246, + "step": 13252 + }, + { + "epoch": 2.26, + "learning_rate": 3.021650255916969e-06, + "loss": 0.3239, + "step": 13253 + }, + { + "epoch": 2.26, + "learning_rate": 3.020331464038141e-06, + "loss": 0.2977, + "step": 13254 + }, + { + "epoch": 2.26, + "learning_rate": 3.019012908819706e-06, + "loss": 0.3257, + "step": 13255 + }, + { + "epoch": 2.26, + "learning_rate": 3.017694590306367e-06, + "loss": 0.3113, + "step": 13256 + }, + { + "epoch": 2.26, + "learning_rate": 3.0163765085428278e-06, + "loss": 0.3075, + "step": 13257 + }, + { + "epoch": 2.26, + "learning_rate": 3.01505866357378e-06, + "loss": 0.2868, + "step": 13258 + }, + { + "epoch": 2.26, + "learning_rate": 3.013741055443907e-06, + "loss": 0.3119, + "step": 13259 + }, + { + "epoch": 2.26, + "learning_rate": 3.012423684197888e-06, + "loss": 0.3087, + "step": 13260 + }, + { + "epoch": 2.26, + "learning_rate": 3.011106549880388e-06, + "loss": 0.3062, + "step": 13261 + }, + { + "epoch": 2.26, + "learning_rate": 3.0097896525360725e-06, + "loss": 0.3093, + "step": 13262 + }, + { + "epoch": 2.26, + "learning_rate": 3.008472992209587e-06, + "loss": 0.3083, + "step": 13263 + }, + { + "epoch": 2.26, + "learning_rate": 3.0071565689455774e-06, + "loss": 0.3229, + "step": 13264 + }, + { + "epoch": 2.26, + "learning_rate": 3.005840382788685e-06, + "loss": 0.3254, + "step": 13265 + }, + { + "epoch": 2.26, + "learning_rate": 3.00452443378353e-06, + "loss": 0.3132, + "step": 13266 + }, + { + "epoch": 2.26, + "learning_rate": 3.0032087219747364e-06, + "loss": 0.3053, + "step": 13267 + }, + { + "epoch": 2.26, + "learning_rate": 3.0018932474069152e-06, + "loss": 0.3455, + "step": 13268 + }, + { + "epoch": 2.26, + "learning_rate": 3.0005780101246716e-06, + "loss": 0.3318, + "step": 13269 + }, + { + "epoch": 2.26, + "learning_rate": 2.999263010172605e-06, + "loss": 0.3032, + "step": 13270 + }, + { + "epoch": 2.26, + "learning_rate": 2.9979482475952947e-06, + "loss": 0.3197, + "step": 13271 + }, + { + "epoch": 2.26, + "learning_rate": 2.9966337224373265e-06, + "loss": 0.3233, + "step": 13272 + }, + { + "epoch": 2.26, + "learning_rate": 2.99531943474327e-06, + "loss": 0.3009, + "step": 13273 + }, + { + "epoch": 2.26, + "learning_rate": 2.99400538455769e-06, + "loss": 0.334, + "step": 13274 + }, + { + "epoch": 2.26, + "learning_rate": 2.9926915719251406e-06, + "loss": 0.3313, + "step": 13275 + }, + { + "epoch": 2.26, + "learning_rate": 2.991377996890171e-06, + "loss": 0.3125, + "step": 13276 + }, + { + "epoch": 2.26, + "learning_rate": 2.9900646594973237e-06, + "loss": 0.2962, + "step": 13277 + }, + { + "epoch": 2.26, + "learning_rate": 2.9887515597911243e-06, + "loss": 0.3021, + "step": 13278 + }, + { + "epoch": 2.26, + "learning_rate": 2.9874386978160972e-06, + "loss": 0.3002, + "step": 13279 + }, + { + "epoch": 2.26, + "learning_rate": 2.986126073616761e-06, + "loss": 0.2953, + "step": 13280 + }, + { + "epoch": 2.27, + "learning_rate": 2.984813687237622e-06, + "loss": 0.3136, + "step": 13281 + }, + { + "epoch": 2.27, + "learning_rate": 2.983501538723177e-06, + "loss": 0.3063, + "step": 13282 + }, + { + "epoch": 2.27, + "learning_rate": 2.9821896281179185e-06, + "loss": 0.2974, + "step": 13283 + }, + { + "epoch": 2.27, + "learning_rate": 2.9808779554663305e-06, + "loss": 0.3218, + "step": 13284 + }, + { + "epoch": 2.27, + "learning_rate": 2.97956652081289e-06, + "loss": 0.3143, + "step": 13285 + }, + { + "epoch": 2.27, + "learning_rate": 2.9782553242020585e-06, + "loss": 0.3106, + "step": 13286 + }, + { + "epoch": 2.27, + "learning_rate": 2.976944365678297e-06, + "loss": 0.3285, + "step": 13287 + }, + { + "epoch": 2.27, + "learning_rate": 2.9756336452860578e-06, + "loss": 0.3162, + "step": 13288 + }, + { + "epoch": 2.27, + "learning_rate": 2.9743231630697823e-06, + "loss": 0.2872, + "step": 13289 + }, + { + "epoch": 2.27, + "learning_rate": 2.9730129190739067e-06, + "loss": 0.3034, + "step": 13290 + }, + { + "epoch": 2.27, + "learning_rate": 2.9717029133428566e-06, + "loss": 0.3145, + "step": 13291 + }, + { + "epoch": 2.27, + "learning_rate": 2.9703931459210534e-06, + "loss": 0.3083, + "step": 13292 + }, + { + "epoch": 2.27, + "learning_rate": 2.9690836168529016e-06, + "loss": 0.3175, + "step": 13293 + }, + { + "epoch": 2.27, + "learning_rate": 2.9677743261828063e-06, + "loss": 0.3314, + "step": 13294 + }, + { + "epoch": 2.27, + "learning_rate": 2.9664652739551627e-06, + "loss": 0.3142, + "step": 13295 + }, + { + "epoch": 2.27, + "learning_rate": 2.965156460214359e-06, + "loss": 0.3438, + "step": 13296 + }, + { + "epoch": 2.27, + "learning_rate": 2.963847885004767e-06, + "loss": 0.2792, + "step": 13297 + }, + { + "epoch": 2.27, + "learning_rate": 2.962539548370762e-06, + "loss": 0.2948, + "step": 13298 + }, + { + "epoch": 2.27, + "learning_rate": 2.961231450356703e-06, + "loss": 0.3066, + "step": 13299 + }, + { + "epoch": 2.27, + "learning_rate": 2.959923591006949e-06, + "loss": 0.3265, + "step": 13300 + }, + { + "epoch": 2.27, + "learning_rate": 2.958615970365839e-06, + "loss": 0.3255, + "step": 13301 + }, + { + "epoch": 2.27, + "learning_rate": 2.957308588477713e-06, + "loss": 0.3238, + "step": 13302 + }, + { + "epoch": 2.27, + "learning_rate": 2.9560014453869003e-06, + "loss": 0.349, + "step": 13303 + }, + { + "epoch": 2.27, + "learning_rate": 2.954694541137725e-06, + "loss": 0.3393, + "step": 13304 + }, + { + "epoch": 2.27, + "learning_rate": 2.953387875774497e-06, + "loss": 0.2998, + "step": 13305 + }, + { + "epoch": 2.27, + "learning_rate": 2.952081449341523e-06, + "loss": 0.295, + "step": 13306 + }, + { + "epoch": 2.27, + "learning_rate": 2.9507752618831044e-06, + "loss": 0.3187, + "step": 13307 + }, + { + "epoch": 2.27, + "learning_rate": 2.949469313443523e-06, + "loss": 0.3257, + "step": 13308 + }, + { + "epoch": 2.27, + "learning_rate": 2.9481636040670614e-06, + "loss": 0.3138, + "step": 13309 + }, + { + "epoch": 2.27, + "learning_rate": 2.946858133797995e-06, + "loss": 0.3323, + "step": 13310 + }, + { + "epoch": 2.27, + "learning_rate": 2.945552902680586e-06, + "loss": 0.3437, + "step": 13311 + }, + { + "epoch": 2.27, + "learning_rate": 2.944247910759097e-06, + "loss": 0.3202, + "step": 13312 + }, + { + "epoch": 2.27, + "learning_rate": 2.942943158077769e-06, + "loss": 0.3033, + "step": 13313 + }, + { + "epoch": 2.27, + "learning_rate": 2.941638644680844e-06, + "loss": 0.3157, + "step": 13314 + }, + { + "epoch": 2.27, + "learning_rate": 2.94033437061256e-06, + "loss": 0.3364, + "step": 13315 + }, + { + "epoch": 2.27, + "learning_rate": 2.9390303359171323e-06, + "loss": 0.2987, + "step": 13316 + }, + { + "epoch": 2.27, + "learning_rate": 2.9377265406387823e-06, + "loss": 0.3114, + "step": 13317 + }, + { + "epoch": 2.27, + "learning_rate": 2.9364229848217163e-06, + "loss": 0.2983, + "step": 13318 + }, + { + "epoch": 2.27, + "learning_rate": 2.9351196685101356e-06, + "loss": 0.334, + "step": 13319 + }, + { + "epoch": 2.27, + "learning_rate": 2.9338165917482307e-06, + "loss": 0.2796, + "step": 13320 + }, + { + "epoch": 2.27, + "learning_rate": 2.932513754580185e-06, + "loss": 0.2936, + "step": 13321 + }, + { + "epoch": 2.27, + "learning_rate": 2.9312111570501788e-06, + "loss": 0.2981, + "step": 13322 + }, + { + "epoch": 2.27, + "learning_rate": 2.929908799202371e-06, + "loss": 0.3259, + "step": 13323 + }, + { + "epoch": 2.27, + "learning_rate": 2.9286066810809253e-06, + "loss": 0.3099, + "step": 13324 + }, + { + "epoch": 2.27, + "learning_rate": 2.9273048027299922e-06, + "loss": 0.3194, + "step": 13325 + }, + { + "epoch": 2.27, + "learning_rate": 2.926003164193715e-06, + "loss": 0.2984, + "step": 13326 + }, + { + "epoch": 2.27, + "learning_rate": 2.924701765516229e-06, + "loss": 0.3016, + "step": 13327 + }, + { + "epoch": 2.27, + "learning_rate": 2.923400606741662e-06, + "loss": 0.3199, + "step": 13328 + }, + { + "epoch": 2.27, + "learning_rate": 2.9220996879141283e-06, + "loss": 0.3277, + "step": 13329 + }, + { + "epoch": 2.27, + "learning_rate": 2.920799009077744e-06, + "loss": 0.318, + "step": 13330 + }, + { + "epoch": 2.27, + "learning_rate": 2.9194985702766044e-06, + "loss": 0.3179, + "step": 13331 + }, + { + "epoch": 2.27, + "learning_rate": 2.918198371554808e-06, + "loss": 0.3262, + "step": 13332 + }, + { + "epoch": 2.27, + "learning_rate": 2.9168984129564405e-06, + "loss": 0.3026, + "step": 13333 + }, + { + "epoch": 2.27, + "learning_rate": 2.915598694525578e-06, + "loss": 0.2956, + "step": 13334 + }, + { + "epoch": 2.27, + "learning_rate": 2.914299216306292e-06, + "loss": 0.3108, + "step": 13335 + }, + { + "epoch": 2.27, + "learning_rate": 2.912999978342643e-06, + "loss": 0.3334, + "step": 13336 + }, + { + "epoch": 2.27, + "learning_rate": 2.91170098067869e-06, + "loss": 0.3367, + "step": 13337 + }, + { + "epoch": 2.27, + "learning_rate": 2.9104022233584684e-06, + "loss": 0.3164, + "step": 13338 + }, + { + "epoch": 2.27, + "learning_rate": 2.9091037064260206e-06, + "loss": 0.333, + "step": 13339 + }, + { + "epoch": 2.28, + "learning_rate": 2.9078054299253755e-06, + "loss": 0.3131, + "step": 13340 + }, + { + "epoch": 2.28, + "learning_rate": 2.906507393900553e-06, + "loss": 0.2968, + "step": 13341 + }, + { + "epoch": 2.28, + "learning_rate": 2.9052095983955665e-06, + "loss": 0.2918, + "step": 13342 + }, + { + "epoch": 2.28, + "learning_rate": 2.903912043454423e-06, + "loss": 0.3205, + "step": 13343 + }, + { + "epoch": 2.28, + "learning_rate": 2.9026147291211125e-06, + "loss": 0.3301, + "step": 13344 + }, + { + "epoch": 2.28, + "learning_rate": 2.901317655439628e-06, + "loss": 0.3389, + "step": 13345 + }, + { + "epoch": 2.28, + "learning_rate": 2.9000208224539505e-06, + "loss": 0.3135, + "step": 13346 + }, + { + "epoch": 2.28, + "learning_rate": 2.8987242302080477e-06, + "loss": 0.3258, + "step": 13347 + }, + { + "epoch": 2.28, + "learning_rate": 2.897427878745884e-06, + "loss": 0.3175, + "step": 13348 + }, + { + "epoch": 2.28, + "learning_rate": 2.8961317681114163e-06, + "loss": 0.3259, + "step": 13349 + }, + { + "epoch": 2.28, + "learning_rate": 2.8948358983485924e-06, + "loss": 0.3274, + "step": 13350 + }, + { + "epoch": 2.28, + "learning_rate": 2.8935402695013514e-06, + "loss": 0.3071, + "step": 13351 + }, + { + "epoch": 2.28, + "learning_rate": 2.8922448816136263e-06, + "loss": 0.2964, + "step": 13352 + }, + { + "epoch": 2.28, + "learning_rate": 2.8909497347293334e-06, + "loss": 0.3132, + "step": 13353 + }, + { + "epoch": 2.28, + "learning_rate": 2.889654828892393e-06, + "loss": 0.2986, + "step": 13354 + }, + { + "epoch": 2.28, + "learning_rate": 2.8883601641467096e-06, + "loss": 0.3179, + "step": 13355 + }, + { + "epoch": 2.28, + "learning_rate": 2.8870657405361825e-06, + "loss": 0.3095, + "step": 13356 + }, + { + "epoch": 2.28, + "learning_rate": 2.885771558104701e-06, + "loss": 0.3059, + "step": 13357 + }, + { + "epoch": 2.28, + "learning_rate": 2.8844776168961475e-06, + "loss": 0.3107, + "step": 13358 + }, + { + "epoch": 2.28, + "learning_rate": 2.8831839169543998e-06, + "loss": 0.3007, + "step": 13359 + }, + { + "epoch": 2.28, + "learning_rate": 2.8818904583233152e-06, + "loss": 0.3288, + "step": 13360 + }, + { + "epoch": 2.28, + "learning_rate": 2.880597241046759e-06, + "loss": 0.3277, + "step": 13361 + }, + { + "epoch": 2.28, + "learning_rate": 2.879304265168574e-06, + "loss": 0.3025, + "step": 13362 + }, + { + "epoch": 2.28, + "learning_rate": 2.878011530732604e-06, + "loss": 0.3004, + "step": 13363 + }, + { + "epoch": 2.28, + "learning_rate": 2.8767190377826824e-06, + "loss": 0.3414, + "step": 13364 + }, + { + "epoch": 2.28, + "learning_rate": 2.875426786362634e-06, + "loss": 0.3197, + "step": 13365 + }, + { + "epoch": 2.28, + "learning_rate": 2.8741347765162743e-06, + "loss": 0.3165, + "step": 13366 + }, + { + "epoch": 2.28, + "learning_rate": 2.872843008287416e-06, + "loss": 0.3129, + "step": 13367 + }, + { + "epoch": 2.28, + "learning_rate": 2.8715514817198508e-06, + "loss": 0.2955, + "step": 13368 + }, + { + "epoch": 2.28, + "learning_rate": 2.8702601968573772e-06, + "loss": 0.3216, + "step": 13369 + }, + { + "epoch": 2.28, + "learning_rate": 2.868969153743776e-06, + "loss": 0.3216, + "step": 13370 + }, + { + "epoch": 2.28, + "learning_rate": 2.8676783524228248e-06, + "loss": 0.3205, + "step": 13371 + }, + { + "epoch": 2.28, + "learning_rate": 2.866387792938289e-06, + "loss": 0.2943, + "step": 13372 + }, + { + "epoch": 2.28, + "learning_rate": 2.865097475333929e-06, + "loss": 0.3322, + "step": 13373 + }, + { + "epoch": 2.28, + "learning_rate": 2.8638073996534955e-06, + "loss": 0.3003, + "step": 13374 + }, + { + "epoch": 2.28, + "learning_rate": 2.862517565940733e-06, + "loss": 0.3358, + "step": 13375 + }, + { + "epoch": 2.28, + "learning_rate": 2.8612279742393735e-06, + "loss": 0.2964, + "step": 13376 + }, + { + "epoch": 2.28, + "learning_rate": 2.859938624593145e-06, + "loss": 0.3056, + "step": 13377 + }, + { + "epoch": 2.28, + "learning_rate": 2.858649517045763e-06, + "loss": 0.3162, + "step": 13378 + }, + { + "epoch": 2.28, + "learning_rate": 2.857360651640938e-06, + "loss": 0.3231, + "step": 13379 + }, + { + "epoch": 2.28, + "learning_rate": 2.8560720284223732e-06, + "loss": 0.3182, + "step": 13380 + }, + { + "epoch": 2.28, + "learning_rate": 2.854783647433762e-06, + "loss": 0.3101, + "step": 13381 + }, + { + "epoch": 2.28, + "learning_rate": 2.853495508718789e-06, + "loss": 0.321, + "step": 13382 + }, + { + "epoch": 2.28, + "learning_rate": 2.8522076123211352e-06, + "loss": 0.3106, + "step": 13383 + }, + { + "epoch": 2.28, + "learning_rate": 2.8509199582844615e-06, + "loss": 0.33, + "step": 13384 + }, + { + "epoch": 2.28, + "learning_rate": 2.8496325466524342e-06, + "loss": 0.3334, + "step": 13385 + }, + { + "epoch": 2.28, + "learning_rate": 2.8483453774687032e-06, + "loss": 0.3077, + "step": 13386 + }, + { + "epoch": 2.28, + "learning_rate": 2.847058450776915e-06, + "loss": 0.3061, + "step": 13387 + }, + { + "epoch": 2.28, + "learning_rate": 2.845771766620703e-06, + "loss": 0.3129, + "step": 13388 + }, + { + "epoch": 2.28, + "learning_rate": 2.8444853250436976e-06, + "loss": 0.3252, + "step": 13389 + }, + { + "epoch": 2.28, + "learning_rate": 2.8431991260895197e-06, + "loss": 0.3196, + "step": 13390 + }, + { + "epoch": 2.28, + "learning_rate": 2.8419131698017756e-06, + "loss": 0.3288, + "step": 13391 + }, + { + "epoch": 2.28, + "learning_rate": 2.84062745622407e-06, + "loss": 0.3138, + "step": 13392 + }, + { + "epoch": 2.28, + "learning_rate": 2.8393419854000014e-06, + "loss": 0.3109, + "step": 13393 + }, + { + "epoch": 2.28, + "learning_rate": 2.838056757373151e-06, + "loss": 0.3005, + "step": 13394 + }, + { + "epoch": 2.28, + "learning_rate": 2.836771772187099e-06, + "loss": 0.2904, + "step": 13395 + }, + { + "epoch": 2.28, + "learning_rate": 2.8354870298854163e-06, + "loss": 0.3069, + "step": 13396 + }, + { + "epoch": 2.28, + "learning_rate": 2.834202530511665e-06, + "loss": 0.3158, + "step": 13397 + }, + { + "epoch": 2.28, + "learning_rate": 2.8329182741094007e-06, + "loss": 0.331, + "step": 13398 + }, + { + "epoch": 2.29, + "learning_rate": 2.831634260722165e-06, + "loss": 0.3171, + "step": 13399 + }, + { + "epoch": 2.29, + "learning_rate": 2.8303504903934953e-06, + "loss": 0.3316, + "step": 13400 + }, + { + "epoch": 2.29, + "learning_rate": 2.829066963166922e-06, + "loss": 0.3046, + "step": 13401 + }, + { + "epoch": 2.29, + "learning_rate": 2.827783679085966e-06, + "loss": 0.336, + "step": 13402 + }, + { + "epoch": 2.29, + "learning_rate": 2.8265006381941395e-06, + "loss": 0.3095, + "step": 13403 + }, + { + "epoch": 2.29, + "learning_rate": 2.825217840534946e-06, + "loss": 0.3263, + "step": 13404 + }, + { + "epoch": 2.29, + "learning_rate": 2.8239352861518866e-06, + "loss": 0.293, + "step": 13405 + }, + { + "epoch": 2.29, + "learning_rate": 2.8226529750884403e-06, + "loss": 0.3164, + "step": 13406 + }, + { + "epoch": 2.29, + "learning_rate": 2.82137090738809e-06, + "loss": 0.3247, + "step": 13407 + }, + { + "epoch": 2.29, + "learning_rate": 2.8200890830943127e-06, + "loss": 0.3173, + "step": 13408 + }, + { + "epoch": 2.29, + "learning_rate": 2.8188075022505624e-06, + "loss": 0.305, + "step": 13409 + }, + { + "epoch": 2.29, + "learning_rate": 2.817526164900297e-06, + "loss": 0.3231, + "step": 13410 + }, + { + "epoch": 2.29, + "learning_rate": 2.816245071086965e-06, + "loss": 0.3214, + "step": 13411 + }, + { + "epoch": 2.29, + "learning_rate": 2.8149642208540027e-06, + "loss": 0.309, + "step": 13412 + }, + { + "epoch": 2.29, + "learning_rate": 2.8136836142448433e-06, + "loss": 0.3022, + "step": 13413 + }, + { + "epoch": 2.29, + "learning_rate": 2.8124032513029032e-06, + "loss": 0.3169, + "step": 13414 + }, + { + "epoch": 2.29, + "learning_rate": 2.811123132071598e-06, + "loss": 0.3113, + "step": 13415 + }, + { + "epoch": 2.29, + "learning_rate": 2.8098432565943333e-06, + "loss": 0.3335, + "step": 13416 + }, + { + "epoch": 2.29, + "learning_rate": 2.8085636249145066e-06, + "loss": 0.3289, + "step": 13417 + }, + { + "epoch": 2.29, + "learning_rate": 2.807284237075505e-06, + "loss": 0.3339, + "step": 13418 + }, + { + "epoch": 2.29, + "learning_rate": 2.8060050931207107e-06, + "loss": 0.314, + "step": 13419 + }, + { + "epoch": 2.29, + "learning_rate": 2.8047261930934964e-06, + "loss": 0.3004, + "step": 13420 + }, + { + "epoch": 2.29, + "learning_rate": 2.8034475370372217e-06, + "loss": 0.3206, + "step": 13421 + }, + { + "epoch": 2.29, + "learning_rate": 2.8021691249952453e-06, + "loss": 0.2933, + "step": 13422 + }, + { + "epoch": 2.29, + "learning_rate": 2.8008909570109135e-06, + "loss": 0.3166, + "step": 13423 + }, + { + "epoch": 2.29, + "learning_rate": 2.7996130331275682e-06, + "loss": 0.3556, + "step": 13424 + }, + { + "epoch": 2.29, + "learning_rate": 2.798335353388535e-06, + "loss": 0.3131, + "step": 13425 + }, + { + "epoch": 2.29, + "learning_rate": 2.797057917837139e-06, + "loss": 0.2884, + "step": 13426 + }, + { + "epoch": 2.29, + "learning_rate": 2.7957807265166936e-06, + "loss": 0.3124, + "step": 13427 + }, + { + "epoch": 2.29, + "learning_rate": 2.794503779470509e-06, + "loss": 0.2961, + "step": 13428 + }, + { + "epoch": 2.29, + "learning_rate": 2.7932270767418757e-06, + "loss": 0.3194, + "step": 13429 + }, + { + "epoch": 2.29, + "learning_rate": 2.7919506183740865e-06, + "loss": 0.3174, + "step": 13430 + }, + { + "epoch": 2.29, + "learning_rate": 2.790674404410422e-06, + "loss": 0.2994, + "step": 13431 + }, + { + "epoch": 2.29, + "learning_rate": 2.789398434894155e-06, + "loss": 0.3209, + "step": 13432 + }, + { + "epoch": 2.29, + "learning_rate": 2.7881227098685503e-06, + "loss": 0.3141, + "step": 13433 + }, + { + "epoch": 2.29, + "learning_rate": 2.7868472293768643e-06, + "loss": 0.3284, + "step": 13434 + }, + { + "epoch": 2.29, + "learning_rate": 2.785571993462347e-06, + "loss": 0.3251, + "step": 13435 + }, + { + "epoch": 2.29, + "learning_rate": 2.7842970021682327e-06, + "loss": 0.3032, + "step": 13436 + }, + { + "epoch": 2.29, + "learning_rate": 2.783022255537755e-06, + "loss": 0.3221, + "step": 13437 + }, + { + "epoch": 2.29, + "learning_rate": 2.781747753614138e-06, + "loss": 0.3297, + "step": 13438 + }, + { + "epoch": 2.29, + "learning_rate": 2.7804734964405976e-06, + "loss": 0.3169, + "step": 13439 + }, + { + "epoch": 2.29, + "learning_rate": 2.7791994840603353e-06, + "loss": 0.3123, + "step": 13440 + }, + { + "epoch": 2.29, + "learning_rate": 2.777925716516553e-06, + "loss": 0.3304, + "step": 13441 + }, + { + "epoch": 2.29, + "learning_rate": 2.7766521938524393e-06, + "loss": 0.2978, + "step": 13442 + }, + { + "epoch": 2.29, + "learning_rate": 2.775378916111179e-06, + "loss": 0.3128, + "step": 13443 + }, + { + "epoch": 2.29, + "learning_rate": 2.774105883335938e-06, + "loss": 0.3015, + "step": 13444 + }, + { + "epoch": 2.29, + "learning_rate": 2.772833095569887e-06, + "loss": 0.2906, + "step": 13445 + }, + { + "epoch": 2.29, + "learning_rate": 2.7715605528561795e-06, + "loss": 0.3214, + "step": 13446 + }, + { + "epoch": 2.29, + "learning_rate": 2.770288255237966e-06, + "loss": 0.3094, + "step": 13447 + }, + { + "epoch": 2.29, + "learning_rate": 2.7690162027583856e-06, + "loss": 0.3099, + "step": 13448 + }, + { + "epoch": 2.29, + "learning_rate": 2.76774439546057e-06, + "loss": 0.323, + "step": 13449 + }, + { + "epoch": 2.29, + "learning_rate": 2.7664728333876443e-06, + "loss": 0.3261, + "step": 13450 + }, + { + "epoch": 2.29, + "learning_rate": 2.7652015165827208e-06, + "loss": 0.3315, + "step": 13451 + }, + { + "epoch": 2.29, + "learning_rate": 2.7639304450889047e-06, + "loss": 0.2971, + "step": 13452 + }, + { + "epoch": 2.29, + "learning_rate": 2.7626596189492983e-06, + "loss": 0.3168, + "step": 13453 + }, + { + "epoch": 2.29, + "learning_rate": 2.7613890382069896e-06, + "loss": 0.3097, + "step": 13454 + }, + { + "epoch": 2.29, + "learning_rate": 2.7601187029050635e-06, + "loss": 0.3239, + "step": 13455 + }, + { + "epoch": 2.29, + "learning_rate": 2.7588486130865877e-06, + "loss": 0.3328, + "step": 13456 + }, + { + "epoch": 2.3, + "learning_rate": 2.7575787687946297e-06, + "loss": 0.2944, + "step": 13457 + }, + { + "epoch": 2.3, + "learning_rate": 2.7563091700722513e-06, + "loss": 0.331, + "step": 13458 + }, + { + "epoch": 2.3, + "learning_rate": 2.7550398169624927e-06, + "loss": 0.2798, + "step": 13459 + }, + { + "epoch": 2.3, + "learning_rate": 2.7537707095083965e-06, + "loss": 0.325, + "step": 13460 + }, + { + "epoch": 2.3, + "learning_rate": 2.752501847752996e-06, + "loss": 0.3491, + "step": 13461 + }, + { + "epoch": 2.3, + "learning_rate": 2.7512332317393153e-06, + "loss": 0.3272, + "step": 13462 + }, + { + "epoch": 2.3, + "learning_rate": 2.749964861510368e-06, + "loss": 0.3111, + "step": 13463 + }, + { + "epoch": 2.3, + "learning_rate": 2.748696737109162e-06, + "loss": 0.3113, + "step": 13464 + }, + { + "epoch": 2.3, + "learning_rate": 2.7474288585786967e-06, + "loss": 0.3149, + "step": 13465 + }, + { + "epoch": 2.3, + "learning_rate": 2.7461612259619587e-06, + "loss": 0.3458, + "step": 13466 + }, + { + "epoch": 2.3, + "learning_rate": 2.7448938393019308e-06, + "loss": 0.3365, + "step": 13467 + }, + { + "epoch": 2.3, + "learning_rate": 2.7436266986415884e-06, + "loss": 0.3037, + "step": 13468 + }, + { + "epoch": 2.3, + "learning_rate": 2.7423598040238953e-06, + "loss": 0.3227, + "step": 13469 + }, + { + "epoch": 2.3, + "learning_rate": 2.7410931554918074e-06, + "loss": 0.2892, + "step": 13470 + }, + { + "epoch": 2.3, + "learning_rate": 2.7398267530882773e-06, + "loss": 0.3313, + "step": 13471 + }, + { + "epoch": 2.3, + "learning_rate": 2.738560596856239e-06, + "loss": 0.3174, + "step": 13472 + }, + { + "epoch": 2.3, + "learning_rate": 2.737294686838631e-06, + "loss": 0.3067, + "step": 13473 + }, + { + "epoch": 2.3, + "learning_rate": 2.736029023078368e-06, + "loss": 0.3265, + "step": 13474 + }, + { + "epoch": 2.3, + "learning_rate": 2.73476360561837e-06, + "loss": 0.2848, + "step": 13475 + }, + { + "epoch": 2.3, + "learning_rate": 2.733498434501545e-06, + "loss": 0.3416, + "step": 13476 + }, + { + "epoch": 2.3, + "learning_rate": 2.7322335097707875e-06, + "loss": 0.3019, + "step": 13477 + }, + { + "epoch": 2.3, + "learning_rate": 2.730968831468991e-06, + "loss": 0.3279, + "step": 13478 + }, + { + "epoch": 2.3, + "learning_rate": 2.729704399639035e-06, + "loss": 0.3125, + "step": 13479 + }, + { + "epoch": 2.3, + "learning_rate": 2.7284402143237953e-06, + "loss": 0.3567, + "step": 13480 + }, + { + "epoch": 2.3, + "learning_rate": 2.7271762755661335e-06, + "loss": 0.3459, + "step": 13481 + }, + { + "epoch": 2.3, + "learning_rate": 2.7259125834089073e-06, + "loss": 0.3324, + "step": 13482 + }, + { + "epoch": 2.3, + "learning_rate": 2.724649137894965e-06, + "loss": 0.3273, + "step": 13483 + }, + { + "epoch": 2.3, + "learning_rate": 2.7233859390671447e-06, + "loss": 0.298, + "step": 13484 + }, + { + "epoch": 2.3, + "learning_rate": 2.722122986968281e-06, + "loss": 0.2984, + "step": 13485 + }, + { + "epoch": 2.3, + "learning_rate": 2.7208602816411987e-06, + "loss": 0.3239, + "step": 13486 + }, + { + "epoch": 2.3, + "learning_rate": 2.719597823128706e-06, + "loss": 0.328, + "step": 13487 + }, + { + "epoch": 2.3, + "learning_rate": 2.7183356114736113e-06, + "loss": 0.325, + "step": 13488 + }, + { + "epoch": 2.3, + "learning_rate": 2.7170736467187186e-06, + "loss": 0.2742, + "step": 13489 + }, + { + "epoch": 2.3, + "learning_rate": 2.7158119289068085e-06, + "loss": 0.309, + "step": 13490 + }, + { + "epoch": 2.3, + "learning_rate": 2.714550458080667e-06, + "loss": 0.3272, + "step": 13491 + }, + { + "epoch": 2.3, + "learning_rate": 2.7132892342830653e-06, + "loss": 0.3024, + "step": 13492 + }, + { + "epoch": 2.3, + "learning_rate": 2.712028257556769e-06, + "loss": 0.3332, + "step": 13493 + }, + { + "epoch": 2.3, + "learning_rate": 2.7107675279445345e-06, + "loss": 0.326, + "step": 13494 + }, + { + "epoch": 2.3, + "learning_rate": 2.7095070454891117e-06, + "loss": 0.2949, + "step": 13495 + }, + { + "epoch": 2.3, + "learning_rate": 2.708246810233234e-06, + "loss": 0.3184, + "step": 13496 + }, + { + "epoch": 2.3, + "learning_rate": 2.706986822219636e-06, + "loss": 0.3062, + "step": 13497 + }, + { + "epoch": 2.3, + "learning_rate": 2.7057270814910386e-06, + "loss": 0.3009, + "step": 13498 + }, + { + "epoch": 2.3, + "learning_rate": 2.7044675880901585e-06, + "loss": 0.295, + "step": 13499 + }, + { + "epoch": 2.3, + "learning_rate": 2.7032083420597e-06, + "loss": 0.3179, + "step": 13500 + }, + { + "epoch": 2.3, + "learning_rate": 2.7019493434423594e-06, + "loss": 0.3107, + "step": 13501 + }, + { + "epoch": 2.3, + "learning_rate": 2.7006905922808312e-06, + "loss": 0.2812, + "step": 13502 + }, + { + "epoch": 2.3, + "learning_rate": 2.6994320886177884e-06, + "loss": 0.3203, + "step": 13503 + }, + { + "epoch": 2.3, + "learning_rate": 2.6981738324959096e-06, + "loss": 0.2962, + "step": 13504 + }, + { + "epoch": 2.3, + "learning_rate": 2.696915823957852e-06, + "loss": 0.329, + "step": 13505 + }, + { + "epoch": 2.3, + "learning_rate": 2.6956580630462746e-06, + "loss": 0.3095, + "step": 13506 + }, + { + "epoch": 2.3, + "learning_rate": 2.694400549803825e-06, + "loss": 0.3136, + "step": 13507 + }, + { + "epoch": 2.3, + "learning_rate": 2.693143284273142e-06, + "loss": 0.3136, + "step": 13508 + }, + { + "epoch": 2.3, + "learning_rate": 2.691886266496854e-06, + "loss": 0.2923, + "step": 13509 + }, + { + "epoch": 2.3, + "learning_rate": 2.690629496517587e-06, + "loss": 0.2986, + "step": 13510 + }, + { + "epoch": 2.3, + "learning_rate": 2.6893729743779495e-06, + "loss": 0.3215, + "step": 13511 + }, + { + "epoch": 2.3, + "learning_rate": 2.6881167001205478e-06, + "loss": 0.3119, + "step": 13512 + }, + { + "epoch": 2.3, + "learning_rate": 2.6868606737879787e-06, + "loss": 0.3319, + "step": 13513 + }, + { + "epoch": 2.3, + "learning_rate": 2.685604895422831e-06, + "loss": 0.3103, + "step": 13514 + }, + { + "epoch": 2.3, + "learning_rate": 2.6843493650676844e-06, + "loss": 0.3159, + "step": 13515 + }, + { + "epoch": 2.31, + "learning_rate": 2.683094082765111e-06, + "loss": 0.305, + "step": 13516 + }, + { + "epoch": 2.31, + "learning_rate": 2.681839048557673e-06, + "loss": 0.3099, + "step": 13517 + }, + { + "epoch": 2.31, + "learning_rate": 2.6805842624879275e-06, + "loss": 0.3257, + "step": 13518 + }, + { + "epoch": 2.31, + "learning_rate": 2.6793297245984162e-06, + "loss": 0.3106, + "step": 13519 + }, + { + "epoch": 2.31, + "learning_rate": 2.6780754349316806e-06, + "loss": 0.305, + "step": 13520 + }, + { + "epoch": 2.31, + "learning_rate": 2.676821393530247e-06, + "loss": 0.3208, + "step": 13521 + }, + { + "epoch": 2.31, + "learning_rate": 2.675567600436636e-06, + "loss": 0.3359, + "step": 13522 + }, + { + "epoch": 2.31, + "learning_rate": 2.674314055693363e-06, + "loss": 0.3247, + "step": 13523 + }, + { + "epoch": 2.31, + "learning_rate": 2.67306075934293e-06, + "loss": 0.3156, + "step": 13524 + }, + { + "epoch": 2.31, + "learning_rate": 2.6718077114278373e-06, + "loss": 0.3016, + "step": 13525 + }, + { + "epoch": 2.31, + "learning_rate": 2.670554911990565e-06, + "loss": 0.3425, + "step": 13526 + }, + { + "epoch": 2.31, + "learning_rate": 2.6693023610735957e-06, + "loss": 0.3289, + "step": 13527 + }, + { + "epoch": 2.31, + "learning_rate": 2.6680500587193993e-06, + "loss": 0.3027, + "step": 13528 + }, + { + "epoch": 2.31, + "learning_rate": 2.6667980049704367e-06, + "loss": 0.2945, + "step": 13529 + }, + { + "epoch": 2.31, + "learning_rate": 2.6655461998691635e-06, + "loss": 0.3106, + "step": 13530 + }, + { + "epoch": 2.31, + "learning_rate": 2.664294643458023e-06, + "loss": 0.3078, + "step": 13531 + }, + { + "epoch": 2.31, + "learning_rate": 2.663043335779454e-06, + "loss": 0.2976, + "step": 13532 + }, + { + "epoch": 2.31, + "learning_rate": 2.6617922768758854e-06, + "loss": 0.3048, + "step": 13533 + }, + { + "epoch": 2.31, + "learning_rate": 2.660541466789732e-06, + "loss": 0.3014, + "step": 13534 + }, + { + "epoch": 2.31, + "learning_rate": 2.6592909055634087e-06, + "loss": 0.3249, + "step": 13535 + }, + { + "epoch": 2.31, + "learning_rate": 2.6580405932393207e-06, + "loss": 0.3091, + "step": 13536 + }, + { + "epoch": 2.31, + "learning_rate": 2.6567905298598574e-06, + "loss": 0.3006, + "step": 13537 + }, + { + "epoch": 2.31, + "learning_rate": 2.655540715467406e-06, + "loss": 0.2955, + "step": 13538 + }, + { + "epoch": 2.31, + "learning_rate": 2.6542911501043456e-06, + "loss": 0.3235, + "step": 13539 + }, + { + "epoch": 2.31, + "learning_rate": 2.653041833813047e-06, + "loss": 0.3388, + "step": 13540 + }, + { + "epoch": 2.31, + "learning_rate": 2.6517927666358668e-06, + "loss": 0.3282, + "step": 13541 + }, + { + "epoch": 2.31, + "learning_rate": 2.6505439486151594e-06, + "loss": 0.3111, + "step": 13542 + }, + { + "epoch": 2.31, + "learning_rate": 2.6492953797932684e-06, + "loss": 0.3184, + "step": 13543 + }, + { + "epoch": 2.31, + "learning_rate": 2.64804706021253e-06, + "loss": 0.2965, + "step": 13544 + }, + { + "epoch": 2.31, + "learning_rate": 2.646798989915269e-06, + "loss": 0.3176, + "step": 13545 + }, + { + "epoch": 2.31, + "learning_rate": 2.6455511689438052e-06, + "loss": 0.3155, + "step": 13546 + }, + { + "epoch": 2.31, + "learning_rate": 2.6443035973404497e-06, + "loss": 0.3227, + "step": 13547 + }, + { + "epoch": 2.31, + "learning_rate": 2.6430562751475053e-06, + "loss": 0.311, + "step": 13548 + }, + { + "epoch": 2.31, + "learning_rate": 2.6418092024072594e-06, + "loss": 0.3328, + "step": 13549 + }, + { + "epoch": 2.31, + "learning_rate": 2.6405623791620007e-06, + "loss": 0.3322, + "step": 13550 + }, + { + "epoch": 2.31, + "learning_rate": 2.639315805454008e-06, + "loss": 0.3016, + "step": 13551 + }, + { + "epoch": 2.31, + "learning_rate": 2.6380694813255426e-06, + "loss": 0.3104, + "step": 13552 + }, + { + "epoch": 2.31, + "learning_rate": 2.636823406818866e-06, + "loss": 0.2981, + "step": 13553 + }, + { + "epoch": 2.31, + "learning_rate": 2.635577581976231e-06, + "loss": 0.3324, + "step": 13554 + }, + { + "epoch": 2.31, + "learning_rate": 2.6343320068398804e-06, + "loss": 0.3241, + "step": 13555 + }, + { + "epoch": 2.31, + "learning_rate": 2.633086681452044e-06, + "loss": 0.3166, + "step": 13556 + }, + { + "epoch": 2.31, + "learning_rate": 2.63184160585495e-06, + "loss": 0.3081, + "step": 13557 + }, + { + "epoch": 2.31, + "learning_rate": 2.630596780090815e-06, + "loss": 0.3401, + "step": 13558 + }, + { + "epoch": 2.31, + "learning_rate": 2.629352204201846e-06, + "loss": 0.3158, + "step": 13559 + }, + { + "epoch": 2.31, + "learning_rate": 2.628107878230246e-06, + "loss": 0.316, + "step": 13560 + }, + { + "epoch": 2.31, + "learning_rate": 2.6268638022182037e-06, + "loss": 0.2911, + "step": 13561 + }, + { + "epoch": 2.31, + "learning_rate": 2.6256199762079038e-06, + "loss": 0.3109, + "step": 13562 + }, + { + "epoch": 2.31, + "learning_rate": 2.6243764002415227e-06, + "loss": 0.3041, + "step": 13563 + }, + { + "epoch": 2.31, + "learning_rate": 2.6231330743612217e-06, + "loss": 0.2961, + "step": 13564 + }, + { + "epoch": 2.31, + "learning_rate": 2.6218899986091604e-06, + "loss": 0.2902, + "step": 13565 + }, + { + "epoch": 2.31, + "learning_rate": 2.6206471730274886e-06, + "loss": 0.3157, + "step": 13566 + }, + { + "epoch": 2.31, + "learning_rate": 2.6194045976583495e-06, + "loss": 0.2985, + "step": 13567 + }, + { + "epoch": 2.31, + "learning_rate": 2.6181622725438694e-06, + "loss": 0.3082, + "step": 13568 + }, + { + "epoch": 2.31, + "learning_rate": 2.616920197726174e-06, + "loss": 0.3068, + "step": 13569 + }, + { + "epoch": 2.31, + "learning_rate": 2.6156783732473802e-06, + "loss": 0.3097, + "step": 13570 + }, + { + "epoch": 2.31, + "learning_rate": 2.6144367991495967e-06, + "loss": 0.3141, + "step": 13571 + }, + { + "epoch": 2.31, + "learning_rate": 2.6131954754749157e-06, + "loss": 0.3064, + "step": 13572 + }, + { + "epoch": 2.31, + "learning_rate": 2.6119544022654298e-06, + "loss": 0.3162, + "step": 13573 + }, + { + "epoch": 2.31, + "learning_rate": 2.610713579563221e-06, + "loss": 0.3152, + "step": 13574 + }, + { + "epoch": 2.32, + "learning_rate": 2.60947300741036e-06, + "loss": 0.3183, + "step": 13575 + }, + { + "epoch": 2.32, + "learning_rate": 2.608232685848914e-06, + "loss": 0.322, + "step": 13576 + }, + { + "epoch": 2.32, + "learning_rate": 2.606992614920937e-06, + "loss": 0.2954, + "step": 13577 + }, + { + "epoch": 2.32, + "learning_rate": 2.605752794668479e-06, + "loss": 0.3093, + "step": 13578 + }, + { + "epoch": 2.32, + "learning_rate": 2.6045132251335727e-06, + "loss": 0.3084, + "step": 13579 + }, + { + "epoch": 2.32, + "learning_rate": 2.6032739063582523e-06, + "loss": 0.299, + "step": 13580 + }, + { + "epoch": 2.32, + "learning_rate": 2.6020348383845383e-06, + "loss": 0.3356, + "step": 13581 + }, + { + "epoch": 2.32, + "learning_rate": 2.6007960212544457e-06, + "loss": 0.3235, + "step": 13582 + }, + { + "epoch": 2.32, + "learning_rate": 2.59955745500998e-06, + "loss": 0.3145, + "step": 13583 + }, + { + "epoch": 2.32, + "learning_rate": 2.598319139693133e-06, + "loss": 0.325, + "step": 13584 + }, + { + "epoch": 2.32, + "learning_rate": 2.5970810753458954e-06, + "loss": 0.3163, + "step": 13585 + }, + { + "epoch": 2.32, + "learning_rate": 2.595843262010248e-06, + "loss": 0.3069, + "step": 13586 + }, + { + "epoch": 2.32, + "learning_rate": 2.5946056997281567e-06, + "loss": 0.3202, + "step": 13587 + }, + { + "epoch": 2.32, + "learning_rate": 2.5933683885415862e-06, + "loss": 0.3237, + "step": 13588 + }, + { + "epoch": 2.32, + "learning_rate": 2.59213132849249e-06, + "loss": 0.3163, + "step": 13589 + }, + { + "epoch": 2.32, + "learning_rate": 2.590894519622813e-06, + "loss": 0.305, + "step": 13590 + }, + { + "epoch": 2.32, + "learning_rate": 2.5896579619744934e-06, + "loss": 0.3373, + "step": 13591 + }, + { + "epoch": 2.32, + "learning_rate": 2.588421655589457e-06, + "loss": 0.3072, + "step": 13592 + }, + { + "epoch": 2.32, + "learning_rate": 2.5871856005096286e-06, + "loss": 0.3391, + "step": 13593 + }, + { + "epoch": 2.32, + "learning_rate": 2.585949796776912e-06, + "loss": 0.3016, + "step": 13594 + }, + { + "epoch": 2.32, + "learning_rate": 2.584714244433212e-06, + "loss": 0.3213, + "step": 13595 + }, + { + "epoch": 2.32, + "learning_rate": 2.5834789435204245e-06, + "loss": 0.3084, + "step": 13596 + }, + { + "epoch": 2.32, + "learning_rate": 2.582243894080434e-06, + "loss": 0.3171, + "step": 13597 + }, + { + "epoch": 2.32, + "learning_rate": 2.5810090961551204e-06, + "loss": 0.313, + "step": 13598 + }, + { + "epoch": 2.32, + "learning_rate": 2.5797745497863457e-06, + "loss": 0.3111, + "step": 13599 + }, + { + "epoch": 2.32, + "learning_rate": 2.5785402550159733e-06, + "loss": 0.3111, + "step": 13600 + }, + { + "epoch": 2.32, + "learning_rate": 2.5773062118858585e-06, + "loss": 0.299, + "step": 13601 + }, + { + "epoch": 2.32, + "learning_rate": 2.576072420437836e-06, + "loss": 0.3185, + "step": 13602 + }, + { + "epoch": 2.32, + "learning_rate": 2.574838880713746e-06, + "loss": 0.325, + "step": 13603 + }, + { + "epoch": 2.32, + "learning_rate": 2.5736055927554117e-06, + "loss": 0.3257, + "step": 13604 + }, + { + "epoch": 2.32, + "learning_rate": 2.5723725566046508e-06, + "loss": 0.301, + "step": 13605 + }, + { + "epoch": 2.32, + "learning_rate": 2.5711397723032726e-06, + "loss": 0.3253, + "step": 13606 + }, + { + "epoch": 2.32, + "learning_rate": 2.569907239893077e-06, + "loss": 0.3247, + "step": 13607 + }, + { + "epoch": 2.32, + "learning_rate": 2.568674959415859e-06, + "loss": 0.3176, + "step": 13608 + }, + { + "epoch": 2.32, + "learning_rate": 2.5674429309133953e-06, + "loss": 0.278, + "step": 13609 + }, + { + "epoch": 2.32, + "learning_rate": 2.5662111544274638e-06, + "loss": 0.3104, + "step": 13610 + }, + { + "epoch": 2.32, + "learning_rate": 2.5649796299998298e-06, + "loss": 0.3093, + "step": 13611 + }, + { + "epoch": 2.32, + "learning_rate": 2.563748357672251e-06, + "loss": 0.2912, + "step": 13612 + }, + { + "epoch": 2.32, + "learning_rate": 2.5625173374864766e-06, + "loss": 0.3089, + "step": 13613 + }, + { + "epoch": 2.32, + "learning_rate": 2.5612865694842494e-06, + "loss": 0.2842, + "step": 13614 + }, + { + "epoch": 2.32, + "learning_rate": 2.5600560537072962e-06, + "loss": 0.3074, + "step": 13615 + }, + { + "epoch": 2.32, + "learning_rate": 2.5588257901973444e-06, + "loss": 0.3126, + "step": 13616 + }, + { + "epoch": 2.32, + "learning_rate": 2.557595778996105e-06, + "loss": 0.3067, + "step": 13617 + }, + { + "epoch": 2.32, + "learning_rate": 2.5563660201452845e-06, + "loss": 0.3073, + "step": 13618 + }, + { + "epoch": 2.32, + "learning_rate": 2.5551365136865834e-06, + "loss": 0.3046, + "step": 13619 + }, + { + "epoch": 2.32, + "learning_rate": 2.5539072596616886e-06, + "loss": 0.3137, + "step": 13620 + }, + { + "epoch": 2.32, + "learning_rate": 2.5526782581122813e-06, + "loss": 0.3249, + "step": 13621 + }, + { + "epoch": 2.32, + "learning_rate": 2.551449509080033e-06, + "loss": 0.3255, + "step": 13622 + }, + { + "epoch": 2.32, + "learning_rate": 2.550221012606611e-06, + "loss": 0.3309, + "step": 13623 + }, + { + "epoch": 2.32, + "learning_rate": 2.5489927687336623e-06, + "loss": 0.3214, + "step": 13624 + }, + { + "epoch": 2.32, + "learning_rate": 2.547764777502838e-06, + "loss": 0.3267, + "step": 13625 + }, + { + "epoch": 2.32, + "learning_rate": 2.546537038955774e-06, + "loss": 0.3188, + "step": 13626 + }, + { + "epoch": 2.32, + "learning_rate": 2.545309553134101e-06, + "loss": 0.3498, + "step": 13627 + }, + { + "epoch": 2.32, + "learning_rate": 2.5440823200794383e-06, + "loss": 0.3184, + "step": 13628 + }, + { + "epoch": 2.32, + "learning_rate": 2.542855339833399e-06, + "loss": 0.3345, + "step": 13629 + }, + { + "epoch": 2.32, + "learning_rate": 2.5416286124375878e-06, + "loss": 0.3131, + "step": 13630 + }, + { + "epoch": 2.32, + "learning_rate": 2.5404021379335954e-06, + "loss": 0.2846, + "step": 13631 + }, + { + "epoch": 2.32, + "learning_rate": 2.5391759163630126e-06, + "loss": 0.3107, + "step": 13632 + }, + { + "epoch": 2.33, + "learning_rate": 2.537949947767411e-06, + "loss": 0.3035, + "step": 13633 + }, + { + "epoch": 2.33, + "learning_rate": 2.536724232188363e-06, + "loss": 0.305, + "step": 13634 + }, + { + "epoch": 2.33, + "learning_rate": 2.5354987696674304e-06, + "loss": 0.3194, + "step": 13635 + }, + { + "epoch": 2.33, + "learning_rate": 2.5342735602461633e-06, + "loss": 0.3216, + "step": 13636 + }, + { + "epoch": 2.33, + "learning_rate": 2.533048603966105e-06, + "loss": 0.3157, + "step": 13637 + }, + { + "epoch": 2.33, + "learning_rate": 2.531823900868795e-06, + "loss": 0.3029, + "step": 13638 + }, + { + "epoch": 2.33, + "learning_rate": 2.530599450995751e-06, + "loss": 0.3032, + "step": 13639 + }, + { + "epoch": 2.33, + "learning_rate": 2.529375254388495e-06, + "loss": 0.3385, + "step": 13640 + }, + { + "epoch": 2.33, + "learning_rate": 2.528151311088537e-06, + "loss": 0.3076, + "step": 13641 + }, + { + "epoch": 2.33, + "learning_rate": 2.526927621137375e-06, + "loss": 0.3251, + "step": 13642 + }, + { + "epoch": 2.33, + "learning_rate": 2.525704184576502e-06, + "loss": 0.3142, + "step": 13643 + }, + { + "epoch": 2.33, + "learning_rate": 2.524481001447402e-06, + "loss": 0.3092, + "step": 13644 + }, + { + "epoch": 2.33, + "learning_rate": 2.523258071791551e-06, + "loss": 0.298, + "step": 13645 + }, + { + "epoch": 2.33, + "learning_rate": 2.5220353956504094e-06, + "loss": 0.3026, + "step": 13646 + }, + { + "epoch": 2.33, + "learning_rate": 2.5208129730654384e-06, + "loss": 0.3102, + "step": 13647 + }, + { + "epoch": 2.33, + "learning_rate": 2.51959080407809e-06, + "loss": 0.3181, + "step": 13648 + }, + { + "epoch": 2.33, + "learning_rate": 2.5183688887297973e-06, + "loss": 0.3244, + "step": 13649 + }, + { + "epoch": 2.33, + "learning_rate": 2.5171472270619955e-06, + "loss": 0.3194, + "step": 13650 + }, + { + "epoch": 2.33, + "learning_rate": 2.5159258191161075e-06, + "loss": 0.3174, + "step": 13651 + }, + { + "epoch": 2.33, + "learning_rate": 2.514704664933547e-06, + "loss": 0.3158, + "step": 13652 + }, + { + "epoch": 2.33, + "learning_rate": 2.5134837645557253e-06, + "loss": 0.2913, + "step": 13653 + }, + { + "epoch": 2.33, + "learning_rate": 2.512263118024031e-06, + "loss": 0.3404, + "step": 13654 + }, + { + "epoch": 2.33, + "learning_rate": 2.5110427253798562e-06, + "loss": 0.3065, + "step": 13655 + }, + { + "epoch": 2.33, + "learning_rate": 2.509822586664581e-06, + "loss": 0.3212, + "step": 13656 + }, + { + "epoch": 2.33, + "learning_rate": 2.508602701919578e-06, + "loss": 0.3223, + "step": 13657 + }, + { + "epoch": 2.33, + "learning_rate": 2.507383071186208e-06, + "loss": 0.3035, + "step": 13658 + }, + { + "epoch": 2.33, + "learning_rate": 2.506163694505828e-06, + "loss": 0.346, + "step": 13659 + }, + { + "epoch": 2.33, + "learning_rate": 2.5049445719197795e-06, + "loss": 0.2906, + "step": 13660 + }, + { + "epoch": 2.33, + "learning_rate": 2.503725703469406e-06, + "loss": 0.3174, + "step": 13661 + }, + { + "epoch": 2.33, + "learning_rate": 2.502507089196028e-06, + "loss": 0.327, + "step": 13662 + }, + { + "epoch": 2.33, + "learning_rate": 2.5012887291409715e-06, + "loss": 0.2913, + "step": 13663 + }, + { + "epoch": 2.33, + "learning_rate": 2.500070623345542e-06, + "loss": 0.3309, + "step": 13664 + }, + { + "epoch": 2.33, + "learning_rate": 2.4988527718510447e-06, + "loss": 0.3069, + "step": 13665 + }, + { + "epoch": 2.33, + "learning_rate": 2.4976351746987725e-06, + "loss": 0.3039, + "step": 13666 + }, + { + "epoch": 2.33, + "learning_rate": 2.4964178319300125e-06, + "loss": 0.305, + "step": 13667 + }, + { + "epoch": 2.33, + "learning_rate": 2.4952007435860427e-06, + "loss": 0.3151, + "step": 13668 + }, + { + "epoch": 2.33, + "learning_rate": 2.493983909708125e-06, + "loss": 0.2913, + "step": 13669 + }, + { + "epoch": 2.33, + "learning_rate": 2.492767330337523e-06, + "loss": 0.3094, + "step": 13670 + }, + { + "epoch": 2.33, + "learning_rate": 2.491551005515487e-06, + "loss": 0.3065, + "step": 13671 + }, + { + "epoch": 2.33, + "learning_rate": 2.4903349352832583e-06, + "loss": 0.3167, + "step": 13672 + }, + { + "epoch": 2.33, + "learning_rate": 2.489119119682072e-06, + "loss": 0.3145, + "step": 13673 + }, + { + "epoch": 2.33, + "learning_rate": 2.48790355875315e-06, + "loss": 0.3165, + "step": 13674 + }, + { + "epoch": 2.33, + "learning_rate": 2.4866882525377113e-06, + "loss": 0.2968, + "step": 13675 + }, + { + "epoch": 2.33, + "learning_rate": 2.4854732010769646e-06, + "loss": 0.3301, + "step": 13676 + }, + { + "epoch": 2.33, + "learning_rate": 2.484258404412103e-06, + "loss": 0.29, + "step": 13677 + }, + { + "epoch": 2.33, + "learning_rate": 2.4830438625843213e-06, + "loss": 0.3237, + "step": 13678 + }, + { + "epoch": 2.33, + "learning_rate": 2.481829575634802e-06, + "loss": 0.3085, + "step": 13679 + }, + { + "epoch": 2.33, + "learning_rate": 2.480615543604714e-06, + "loss": 0.3261, + "step": 13680 + }, + { + "epoch": 2.33, + "learning_rate": 2.4794017665352234e-06, + "loss": 0.3327, + "step": 13681 + }, + { + "epoch": 2.33, + "learning_rate": 2.4781882444674855e-06, + "loss": 0.327, + "step": 13682 + }, + { + "epoch": 2.33, + "learning_rate": 2.476974977442651e-06, + "loss": 0.3159, + "step": 13683 + }, + { + "epoch": 2.33, + "learning_rate": 2.4757619655018527e-06, + "loss": 0.306, + "step": 13684 + }, + { + "epoch": 2.33, + "learning_rate": 2.4745492086862233e-06, + "loss": 0.2949, + "step": 13685 + }, + { + "epoch": 2.33, + "learning_rate": 2.473336707036883e-06, + "loss": 0.3048, + "step": 13686 + }, + { + "epoch": 2.33, + "learning_rate": 2.4721244605949447e-06, + "loss": 0.3425, + "step": 13687 + }, + { + "epoch": 2.33, + "learning_rate": 2.470912469401512e-06, + "loss": 0.3144, + "step": 13688 + }, + { + "epoch": 2.33, + "learning_rate": 2.4697007334976796e-06, + "loss": 0.3136, + "step": 13689 + }, + { + "epoch": 2.33, + "learning_rate": 2.4684892529245353e-06, + "loss": 0.344, + "step": 13690 + }, + { + "epoch": 2.33, + "learning_rate": 2.46727802772316e-06, + "loss": 0.3311, + "step": 13691 + }, + { + "epoch": 2.34, + "learning_rate": 2.4660670579346147e-06, + "loss": 0.3258, + "step": 13692 + }, + { + "epoch": 2.34, + "learning_rate": 2.464856343599964e-06, + "loss": 0.3251, + "step": 13693 + }, + { + "epoch": 2.34, + "learning_rate": 2.4636458847602608e-06, + "loss": 0.3069, + "step": 13694 + }, + { + "epoch": 2.34, + "learning_rate": 2.4624356814565496e-06, + "loss": 0.2904, + "step": 13695 + }, + { + "epoch": 2.34, + "learning_rate": 2.4612257337298607e-06, + "loss": 0.3292, + "step": 13696 + }, + { + "epoch": 2.34, + "learning_rate": 2.4600160416212214e-06, + "loss": 0.3447, + "step": 13697 + }, + { + "epoch": 2.34, + "learning_rate": 2.458806605171652e-06, + "loss": 0.3119, + "step": 13698 + }, + { + "epoch": 2.34, + "learning_rate": 2.4575974244221557e-06, + "loss": 0.2961, + "step": 13699 + }, + { + "epoch": 2.34, + "learning_rate": 2.4563884994137343e-06, + "loss": 0.3121, + "step": 13700 + }, + { + "epoch": 2.34, + "learning_rate": 2.4551798301873807e-06, + "loss": 0.315, + "step": 13701 + }, + { + "epoch": 2.34, + "learning_rate": 2.4539714167840757e-06, + "loss": 0.3073, + "step": 13702 + }, + { + "epoch": 2.34, + "learning_rate": 2.4527632592447937e-06, + "loss": 0.29, + "step": 13703 + }, + { + "epoch": 2.34, + "learning_rate": 2.4515553576105e-06, + "loss": 0.3373, + "step": 13704 + }, + { + "epoch": 2.34, + "learning_rate": 2.450347711922151e-06, + "loss": 0.346, + "step": 13705 + }, + { + "epoch": 2.34, + "learning_rate": 2.4491403222206965e-06, + "loss": 0.342, + "step": 13706 + }, + { + "epoch": 2.34, + "learning_rate": 2.4479331885470715e-06, + "loss": 0.3089, + "step": 13707 + }, + { + "epoch": 2.34, + "learning_rate": 2.446726310942208e-06, + "loss": 0.318, + "step": 13708 + }, + { + "epoch": 2.34, + "learning_rate": 2.445519689447029e-06, + "loss": 0.3206, + "step": 13709 + }, + { + "epoch": 2.34, + "learning_rate": 2.4443133241024487e-06, + "loss": 0.3053, + "step": 13710 + }, + { + "epoch": 2.34, + "learning_rate": 2.443107214949366e-06, + "loss": 0.3304, + "step": 13711 + }, + { + "epoch": 2.34, + "learning_rate": 2.44190136202868e-06, + "loss": 0.3017, + "step": 13712 + }, + { + "epoch": 2.34, + "learning_rate": 2.4406957653812825e-06, + "loss": 0.3224, + "step": 13713 + }, + { + "epoch": 2.34, + "learning_rate": 2.4394904250480424e-06, + "loss": 0.3181, + "step": 13714 + }, + { + "epoch": 2.34, + "learning_rate": 2.4382853410698337e-06, + "loss": 0.3075, + "step": 13715 + }, + { + "epoch": 2.34, + "learning_rate": 2.4370805134875186e-06, + "loss": 0.3243, + "step": 13716 + }, + { + "epoch": 2.34, + "learning_rate": 2.4358759423419476e-06, + "loss": 0.3018, + "step": 13717 + }, + { + "epoch": 2.34, + "learning_rate": 2.4346716276739645e-06, + "loss": 0.311, + "step": 13718 + }, + { + "epoch": 2.34, + "learning_rate": 2.433467569524406e-06, + "loss": 0.324, + "step": 13719 + }, + { + "epoch": 2.34, + "learning_rate": 2.4322637679340956e-06, + "loss": 0.3117, + "step": 13720 + }, + { + "epoch": 2.34, + "learning_rate": 2.431060222943855e-06, + "loss": 0.3206, + "step": 13721 + }, + { + "epoch": 2.34, + "learning_rate": 2.4298569345944868e-06, + "loss": 0.3168, + "step": 13722 + }, + { + "epoch": 2.34, + "learning_rate": 2.428653902926793e-06, + "loss": 0.3014, + "step": 13723 + }, + { + "epoch": 2.34, + "learning_rate": 2.427451127981567e-06, + "loss": 0.3043, + "step": 13724 + }, + { + "epoch": 2.34, + "learning_rate": 2.426248609799591e-06, + "loss": 0.3088, + "step": 13725 + }, + { + "epoch": 2.34, + "learning_rate": 2.4250463484216404e-06, + "loss": 0.3407, + "step": 13726 + }, + { + "epoch": 2.34, + "learning_rate": 2.4238443438884753e-06, + "loss": 0.2884, + "step": 13727 + }, + { + "epoch": 2.34, + "learning_rate": 2.4226425962408583e-06, + "loss": 0.3227, + "step": 13728 + }, + { + "epoch": 2.34, + "learning_rate": 2.4214411055195308e-06, + "loss": 0.2844, + "step": 13729 + }, + { + "epoch": 2.34, + "learning_rate": 2.4202398717652363e-06, + "loss": 0.3447, + "step": 13730 + }, + { + "epoch": 2.34, + "learning_rate": 2.4190388950187027e-06, + "loss": 0.3272, + "step": 13731 + }, + { + "epoch": 2.34, + "learning_rate": 2.4178381753206537e-06, + "loss": 0.3175, + "step": 13732 + }, + { + "epoch": 2.34, + "learning_rate": 2.416637712711801e-06, + "loss": 0.2985, + "step": 13733 + }, + { + "epoch": 2.34, + "learning_rate": 2.4154375072328506e-06, + "loss": 0.311, + "step": 13734 + }, + { + "epoch": 2.34, + "learning_rate": 2.414237558924496e-06, + "loss": 0.3182, + "step": 13735 + }, + { + "epoch": 2.34, + "learning_rate": 2.413037867827427e-06, + "loss": 0.335, + "step": 13736 + }, + { + "epoch": 2.34, + "learning_rate": 2.4118384339823163e-06, + "loss": 0.3111, + "step": 13737 + }, + { + "epoch": 2.34, + "learning_rate": 2.4106392574298377e-06, + "loss": 0.3175, + "step": 13738 + }, + { + "epoch": 2.34, + "learning_rate": 2.4094403382106492e-06, + "loss": 0.3241, + "step": 13739 + }, + { + "epoch": 2.34, + "learning_rate": 2.408241676365405e-06, + "loss": 0.3003, + "step": 13740 + }, + { + "epoch": 2.34, + "learning_rate": 2.4070432719347457e-06, + "loss": 0.3226, + "step": 13741 + }, + { + "epoch": 2.34, + "learning_rate": 2.405845124959312e-06, + "loss": 0.338, + "step": 13742 + }, + { + "epoch": 2.34, + "learning_rate": 2.4046472354797202e-06, + "loss": 0.3126, + "step": 13743 + }, + { + "epoch": 2.34, + "learning_rate": 2.403449603536595e-06, + "loss": 0.3103, + "step": 13744 + }, + { + "epoch": 2.34, + "learning_rate": 2.4022522291705396e-06, + "loss": 0.29, + "step": 13745 + }, + { + "epoch": 2.34, + "learning_rate": 2.401055112422155e-06, + "loss": 0.3047, + "step": 13746 + }, + { + "epoch": 2.34, + "learning_rate": 2.3998582533320316e-06, + "loss": 0.3143, + "step": 13747 + }, + { + "epoch": 2.34, + "learning_rate": 2.3986616519407537e-06, + "loss": 0.2986, + "step": 13748 + }, + { + "epoch": 2.34, + "learning_rate": 2.397465308288891e-06, + "loss": 0.2945, + "step": 13749 + }, + { + "epoch": 2.35, + "learning_rate": 2.3962692224170115e-06, + "loss": 0.3107, + "step": 13750 + }, + { + "epoch": 2.35, + "learning_rate": 2.395073394365672e-06, + "loss": 0.3316, + "step": 13751 + }, + { + "epoch": 2.35, + "learning_rate": 2.3938778241754155e-06, + "loss": 0.2785, + "step": 13752 + }, + { + "epoch": 2.35, + "learning_rate": 2.3926825118867814e-06, + "loss": 0.2886, + "step": 13753 + }, + { + "epoch": 2.35, + "learning_rate": 2.3914874575403003e-06, + "loss": 0.2934, + "step": 13754 + }, + { + "epoch": 2.35, + "learning_rate": 2.390292661176492e-06, + "loss": 0.3287, + "step": 13755 + }, + { + "epoch": 2.35, + "learning_rate": 2.38909812283587e-06, + "loss": 0.3429, + "step": 13756 + }, + { + "epoch": 2.35, + "learning_rate": 2.3879038425589395e-06, + "loss": 0.3039, + "step": 13757 + }, + { + "epoch": 2.35, + "learning_rate": 2.3867098203861894e-06, + "loss": 0.3146, + "step": 13758 + }, + { + "epoch": 2.35, + "learning_rate": 2.385516056358109e-06, + "loss": 0.326, + "step": 13759 + }, + { + "epoch": 2.35, + "learning_rate": 2.3843225505151778e-06, + "loss": 0.3108, + "step": 13760 + }, + { + "epoch": 2.35, + "learning_rate": 2.3831293028978586e-06, + "loss": 0.2899, + "step": 13761 + }, + { + "epoch": 2.35, + "learning_rate": 2.3819363135466144e-06, + "loss": 0.2951, + "step": 13762 + }, + { + "epoch": 2.35, + "learning_rate": 2.3807435825018944e-06, + "loss": 0.3246, + "step": 13763 + }, + { + "epoch": 2.35, + "learning_rate": 2.3795511098041422e-06, + "loss": 0.3224, + "step": 13764 + }, + { + "epoch": 2.35, + "learning_rate": 2.3783588954937898e-06, + "loss": 0.3275, + "step": 13765 + }, + { + "epoch": 2.35, + "learning_rate": 2.3771669396112663e-06, + "loss": 0.2976, + "step": 13766 + }, + { + "epoch": 2.35, + "learning_rate": 2.3759752421969804e-06, + "loss": 0.332, + "step": 13767 + }, + { + "epoch": 2.35, + "learning_rate": 2.374783803291343e-06, + "loss": 0.3228, + "step": 13768 + }, + { + "epoch": 2.35, + "learning_rate": 2.3735926229347506e-06, + "loss": 0.3188, + "step": 13769 + }, + { + "epoch": 2.35, + "learning_rate": 2.372401701167595e-06, + "loss": 0.3014, + "step": 13770 + }, + { + "epoch": 2.35, + "learning_rate": 2.3712110380302555e-06, + "loss": 0.3305, + "step": 13771 + }, + { + "epoch": 2.35, + "learning_rate": 2.3700206335631036e-06, + "loss": 0.3211, + "step": 13772 + }, + { + "epoch": 2.35, + "learning_rate": 2.3688304878065073e-06, + "loss": 0.315, + "step": 13773 + }, + { + "epoch": 2.35, + "learning_rate": 2.3676406008008124e-06, + "loss": 0.3263, + "step": 13774 + }, + { + "epoch": 2.35, + "learning_rate": 2.3664509725863727e-06, + "loss": 0.2806, + "step": 13775 + }, + { + "epoch": 2.35, + "learning_rate": 2.3652616032035193e-06, + "loss": 0.3143, + "step": 13776 + }, + { + "epoch": 2.35, + "learning_rate": 2.364072492692581e-06, + "loss": 0.3376, + "step": 13777 + }, + { + "epoch": 2.35, + "learning_rate": 2.3628836410938793e-06, + "loss": 0.2968, + "step": 13778 + }, + { + "epoch": 2.35, + "learning_rate": 2.361695048447723e-06, + "loss": 0.3331, + "step": 13779 + }, + { + "epoch": 2.35, + "learning_rate": 2.360506714794416e-06, + "loss": 0.3189, + "step": 13780 + }, + { + "epoch": 2.35, + "learning_rate": 2.359318640174252e-06, + "loss": 0.3112, + "step": 13781 + }, + { + "epoch": 2.35, + "learning_rate": 2.3581308246275103e-06, + "loss": 0.323, + "step": 13782 + }, + { + "epoch": 2.35, + "learning_rate": 2.356943268194469e-06, + "loss": 0.2909, + "step": 13783 + }, + { + "epoch": 2.35, + "learning_rate": 2.3557559709153954e-06, + "loss": 0.3289, + "step": 13784 + }, + { + "epoch": 2.35, + "learning_rate": 2.3545689328305465e-06, + "loss": 0.3241, + "step": 13785 + }, + { + "epoch": 2.35, + "learning_rate": 2.3533821539801714e-06, + "loss": 0.312, + "step": 13786 + }, + { + "epoch": 2.35, + "learning_rate": 2.3521956344045105e-06, + "loss": 0.3088, + "step": 13787 + }, + { + "epoch": 2.35, + "learning_rate": 2.3510093741437956e-06, + "loss": 0.3231, + "step": 13788 + }, + { + "epoch": 2.35, + "learning_rate": 2.3498233732382512e-06, + "loss": 0.3088, + "step": 13789 + }, + { + "epoch": 2.35, + "learning_rate": 2.3486376317280867e-06, + "loss": 0.3371, + "step": 13790 + }, + { + "epoch": 2.35, + "learning_rate": 2.347452149653513e-06, + "loss": 0.3272, + "step": 13791 + }, + { + "epoch": 2.35, + "learning_rate": 2.346266927054719e-06, + "loss": 0.2871, + "step": 13792 + }, + { + "epoch": 2.35, + "learning_rate": 2.3450819639718967e-06, + "loss": 0.3095, + "step": 13793 + }, + { + "epoch": 2.35, + "learning_rate": 2.3438972604452237e-06, + "loss": 0.3023, + "step": 13794 + }, + { + "epoch": 2.35, + "learning_rate": 2.3427128165148706e-06, + "loss": 0.3304, + "step": 13795 + }, + { + "epoch": 2.35, + "learning_rate": 2.3415286322210007e-06, + "loss": 0.3167, + "step": 13796 + }, + { + "epoch": 2.35, + "learning_rate": 2.340344707603761e-06, + "loss": 0.3213, + "step": 13797 + }, + { + "epoch": 2.35, + "learning_rate": 2.3391610427032973e-06, + "loss": 0.3359, + "step": 13798 + }, + { + "epoch": 2.35, + "learning_rate": 2.337977637559745e-06, + "loss": 0.3113, + "step": 13799 + }, + { + "epoch": 2.35, + "learning_rate": 2.3367944922132293e-06, + "loss": 0.3313, + "step": 13800 + }, + { + "epoch": 2.35, + "learning_rate": 2.335611606703867e-06, + "loss": 0.3432, + "step": 13801 + }, + { + "epoch": 2.35, + "learning_rate": 2.334428981071768e-06, + "loss": 0.317, + "step": 13802 + }, + { + "epoch": 2.35, + "learning_rate": 2.33324661535703e-06, + "loss": 0.3146, + "step": 13803 + }, + { + "epoch": 2.35, + "learning_rate": 2.3320645095997476e-06, + "loss": 0.3365, + "step": 13804 + }, + { + "epoch": 2.35, + "learning_rate": 2.3308826638399953e-06, + "loss": 0.3235, + "step": 13805 + }, + { + "epoch": 2.35, + "learning_rate": 2.329701078117851e-06, + "loss": 0.3169, + "step": 13806 + }, + { + "epoch": 2.35, + "learning_rate": 2.3285197524733795e-06, + "loss": 0.3237, + "step": 13807 + }, + { + "epoch": 2.35, + "learning_rate": 2.327338686946632e-06, + "loss": 0.3042, + "step": 13808 + }, + { + "epoch": 2.36, + "learning_rate": 2.3261578815776577e-06, + "loss": 0.3125, + "step": 13809 + }, + { + "epoch": 2.36, + "learning_rate": 2.324977336406494e-06, + "loss": 0.3164, + "step": 13810 + }, + { + "epoch": 2.36, + "learning_rate": 2.323797051473172e-06, + "loss": 0.3056, + "step": 13811 + }, + { + "epoch": 2.36, + "learning_rate": 2.3226170268177084e-06, + "loss": 0.3117, + "step": 13812 + }, + { + "epoch": 2.36, + "learning_rate": 2.321437262480114e-06, + "loss": 0.3171, + "step": 13813 + }, + { + "epoch": 2.36, + "learning_rate": 2.320257758500394e-06, + "loss": 0.3004, + "step": 13814 + }, + { + "epoch": 2.36, + "learning_rate": 2.319078514918539e-06, + "loss": 0.2992, + "step": 13815 + }, + { + "epoch": 2.36, + "learning_rate": 2.3178995317745366e-06, + "loss": 0.332, + "step": 13816 + }, + { + "epoch": 2.36, + "learning_rate": 2.3167208091083615e-06, + "loss": 0.2942, + "step": 13817 + }, + { + "epoch": 2.36, + "learning_rate": 2.315542346959981e-06, + "loss": 0.2985, + "step": 13818 + }, + { + "epoch": 2.36, + "learning_rate": 2.3143641453693564e-06, + "loss": 0.3264, + "step": 13819 + }, + { + "epoch": 2.36, + "learning_rate": 2.313186204376431e-06, + "loss": 0.3104, + "step": 13820 + }, + { + "epoch": 2.36, + "learning_rate": 2.3120085240211475e-06, + "loss": 0.32, + "step": 13821 + }, + { + "epoch": 2.36, + "learning_rate": 2.3108311043434428e-06, + "loss": 0.3005, + "step": 13822 + }, + { + "epoch": 2.36, + "learning_rate": 2.3096539453832314e-06, + "loss": 0.3082, + "step": 13823 + }, + { + "epoch": 2.36, + "learning_rate": 2.308477047180432e-06, + "loss": 0.298, + "step": 13824 + }, + { + "epoch": 2.36, + "learning_rate": 2.3073004097749483e-06, + "loss": 0.302, + "step": 13825 + }, + { + "epoch": 2.36, + "learning_rate": 2.3061240332066816e-06, + "loss": 0.3427, + "step": 13826 + }, + { + "epoch": 2.36, + "learning_rate": 2.304947917515512e-06, + "loss": 0.2897, + "step": 13827 + }, + { + "epoch": 2.36, + "learning_rate": 2.303772062741322e-06, + "loss": 0.2814, + "step": 13828 + }, + { + "epoch": 2.36, + "learning_rate": 2.302596468923981e-06, + "loss": 0.3155, + "step": 13829 + }, + { + "epoch": 2.36, + "learning_rate": 2.30142113610335e-06, + "loss": 0.3069, + "step": 13830 + }, + { + "epoch": 2.36, + "learning_rate": 2.3002460643192813e-06, + "loss": 0.2925, + "step": 13831 + }, + { + "epoch": 2.36, + "learning_rate": 2.299071253611618e-06, + "loss": 0.3089, + "step": 13832 + }, + { + "epoch": 2.36, + "learning_rate": 2.297896704020195e-06, + "loss": 0.3068, + "step": 13833 + }, + { + "epoch": 2.36, + "learning_rate": 2.29672241558484e-06, + "loss": 0.3151, + "step": 13834 + }, + { + "epoch": 2.36, + "learning_rate": 2.2955483883453634e-06, + "loss": 0.3088, + "step": 13835 + }, + { + "epoch": 2.36, + "learning_rate": 2.294374622341579e-06, + "loss": 0.3019, + "step": 13836 + }, + { + "epoch": 2.36, + "learning_rate": 2.293201117613282e-06, + "loss": 0.3135, + "step": 13837 + }, + { + "epoch": 2.36, + "learning_rate": 2.2920278742002677e-06, + "loss": 0.3023, + "step": 13838 + }, + { + "epoch": 2.36, + "learning_rate": 2.290854892142311e-06, + "loss": 0.3375, + "step": 13839 + }, + { + "epoch": 2.36, + "learning_rate": 2.2896821714791874e-06, + "loss": 0.3142, + "step": 13840 + }, + { + "epoch": 2.36, + "learning_rate": 2.2885097122506627e-06, + "loss": 0.3294, + "step": 13841 + }, + { + "epoch": 2.36, + "learning_rate": 2.287337514496487e-06, + "loss": 0.3193, + "step": 13842 + }, + { + "epoch": 2.36, + "learning_rate": 2.2861655782564084e-06, + "loss": 0.3264, + "step": 13843 + }, + { + "epoch": 2.36, + "learning_rate": 2.284993903570164e-06, + "loss": 0.2935, + "step": 13844 + }, + { + "epoch": 2.36, + "learning_rate": 2.283822490477482e-06, + "loss": 0.3062, + "step": 13845 + }, + { + "epoch": 2.36, + "learning_rate": 2.2826513390180803e-06, + "loss": 0.3319, + "step": 13846 + }, + { + "epoch": 2.36, + "learning_rate": 2.281480449231671e-06, + "loss": 0.3223, + "step": 13847 + }, + { + "epoch": 2.36, + "learning_rate": 2.2803098211579555e-06, + "loss": 0.3287, + "step": 13848 + }, + { + "epoch": 2.36, + "learning_rate": 2.2791394548366285e-06, + "loss": 0.3409, + "step": 13849 + }, + { + "epoch": 2.36, + "learning_rate": 2.2779693503073675e-06, + "loss": 0.3026, + "step": 13850 + }, + { + "epoch": 2.36, + "learning_rate": 2.2767995076098514e-06, + "loss": 0.2995, + "step": 13851 + }, + { + "epoch": 2.36, + "learning_rate": 2.2756299267837467e-06, + "loss": 0.296, + "step": 13852 + }, + { + "epoch": 2.36, + "learning_rate": 2.2744606078687114e-06, + "loss": 0.3075, + "step": 13853 + }, + { + "epoch": 2.36, + "learning_rate": 2.2732915509043897e-06, + "loss": 0.3246, + "step": 13854 + }, + { + "epoch": 2.36, + "learning_rate": 2.272122755930423e-06, + "loss": 0.3076, + "step": 13855 + }, + { + "epoch": 2.36, + "learning_rate": 2.2709542229864432e-06, + "loss": 0.3142, + "step": 13856 + }, + { + "epoch": 2.36, + "learning_rate": 2.269785952112069e-06, + "loss": 0.3184, + "step": 13857 + }, + { + "epoch": 2.36, + "learning_rate": 2.2686179433469148e-06, + "loss": 0.321, + "step": 13858 + }, + { + "epoch": 2.36, + "learning_rate": 2.2674501967305838e-06, + "loss": 0.3227, + "step": 13859 + }, + { + "epoch": 2.36, + "learning_rate": 2.266282712302672e-06, + "loss": 0.3112, + "step": 13860 + }, + { + "epoch": 2.36, + "learning_rate": 2.2651154901027648e-06, + "loss": 0.3128, + "step": 13861 + }, + { + "epoch": 2.36, + "learning_rate": 2.263948530170439e-06, + "loss": 0.3105, + "step": 13862 + }, + { + "epoch": 2.36, + "learning_rate": 2.2627818325452634e-06, + "loss": 0.3154, + "step": 13863 + }, + { + "epoch": 2.36, + "learning_rate": 2.2616153972667996e-06, + "loss": 0.293, + "step": 13864 + }, + { + "epoch": 2.36, + "learning_rate": 2.2604492243745934e-06, + "loss": 0.2938, + "step": 13865 + }, + { + "epoch": 2.36, + "learning_rate": 2.2592833139081883e-06, + "loss": 0.302, + "step": 13866 + }, + { + "epoch": 2.36, + "learning_rate": 2.258117665907117e-06, + "loss": 0.303, + "step": 13867 + }, + { + "epoch": 2.37, + "learning_rate": 2.2569522804109036e-06, + "loss": 0.3093, + "step": 13868 + }, + { + "epoch": 2.37, + "learning_rate": 2.255787157459065e-06, + "loss": 0.3242, + "step": 13869 + }, + { + "epoch": 2.37, + "learning_rate": 2.2546222970911025e-06, + "loss": 0.288, + "step": 13870 + }, + { + "epoch": 2.37, + "learning_rate": 2.2534576993465183e-06, + "loss": 0.3125, + "step": 13871 + }, + { + "epoch": 2.37, + "learning_rate": 2.252293364264796e-06, + "loss": 0.316, + "step": 13872 + }, + { + "epoch": 2.37, + "learning_rate": 2.2511292918854154e-06, + "loss": 0.323, + "step": 13873 + }, + { + "epoch": 2.37, + "learning_rate": 2.249965482247849e-06, + "loss": 0.3334, + "step": 13874 + }, + { + "epoch": 2.37, + "learning_rate": 2.248801935391557e-06, + "loss": 0.2954, + "step": 13875 + }, + { + "epoch": 2.37, + "learning_rate": 2.247638651355991e-06, + "loss": 0.3178, + "step": 13876 + }, + { + "epoch": 2.37, + "learning_rate": 2.246475630180598e-06, + "loss": 0.3051, + "step": 13877 + }, + { + "epoch": 2.37, + "learning_rate": 2.2453128719048088e-06, + "loss": 0.3257, + "step": 13878 + }, + { + "epoch": 2.37, + "learning_rate": 2.244150376568055e-06, + "loss": 0.3061, + "step": 13879 + }, + { + "epoch": 2.37, + "learning_rate": 2.242988144209746e-06, + "loss": 0.319, + "step": 13880 + }, + { + "epoch": 2.37, + "learning_rate": 2.2418261748692927e-06, + "loss": 0.3022, + "step": 13881 + }, + { + "epoch": 2.37, + "learning_rate": 2.2406644685860946e-06, + "loss": 0.3183, + "step": 13882 + }, + { + "epoch": 2.37, + "learning_rate": 2.239503025399542e-06, + "loss": 0.3382, + "step": 13883 + }, + { + "epoch": 2.37, + "learning_rate": 2.2383418453490156e-06, + "loss": 0.3072, + "step": 13884 + }, + { + "epoch": 2.37, + "learning_rate": 2.237180928473891e-06, + "loss": 0.3276, + "step": 13885 + }, + { + "epoch": 2.37, + "learning_rate": 2.236020274813525e-06, + "loss": 0.3221, + "step": 13886 + }, + { + "epoch": 2.37, + "learning_rate": 2.234859884407279e-06, + "loss": 0.3038, + "step": 13887 + }, + { + "epoch": 2.37, + "learning_rate": 2.233699757294492e-06, + "loss": 0.3175, + "step": 13888 + }, + { + "epoch": 2.37, + "learning_rate": 2.232539893514504e-06, + "loss": 0.3115, + "step": 13889 + }, + { + "epoch": 2.37, + "learning_rate": 2.231380293106641e-06, + "loss": 0.3105, + "step": 13890 + }, + { + "epoch": 2.37, + "learning_rate": 2.2302209561102228e-06, + "loss": 0.3136, + "step": 13891 + }, + { + "epoch": 2.37, + "learning_rate": 2.2290618825645593e-06, + "loss": 0.3236, + "step": 13892 + }, + { + "epoch": 2.37, + "learning_rate": 2.227903072508951e-06, + "loss": 0.3052, + "step": 13893 + }, + { + "epoch": 2.37, + "learning_rate": 2.2267445259826936e-06, + "loss": 0.2937, + "step": 13894 + }, + { + "epoch": 2.37, + "learning_rate": 2.225586243025063e-06, + "loss": 0.2982, + "step": 13895 + }, + { + "epoch": 2.37, + "learning_rate": 2.2244282236753366e-06, + "loss": 0.2945, + "step": 13896 + }, + { + "epoch": 2.37, + "learning_rate": 2.22327046797278e-06, + "loss": 0.3008, + "step": 13897 + }, + { + "epoch": 2.37, + "learning_rate": 2.2221129759566484e-06, + "loss": 0.312, + "step": 13898 + }, + { + "epoch": 2.37, + "learning_rate": 2.2209557476661902e-06, + "loss": 0.3025, + "step": 13899 + }, + { + "epoch": 2.37, + "learning_rate": 2.2197987831406454e-06, + "loss": 0.2972, + "step": 13900 + }, + { + "epoch": 2.37, + "learning_rate": 2.218642082419238e-06, + "loss": 0.3065, + "step": 13901 + }, + { + "epoch": 2.37, + "learning_rate": 2.217485645541192e-06, + "loss": 0.2876, + "step": 13902 + }, + { + "epoch": 2.37, + "learning_rate": 2.2163294725457196e-06, + "loss": 0.3139, + "step": 13903 + }, + { + "epoch": 2.37, + "learning_rate": 2.2151735634720204e-06, + "loss": 0.2907, + "step": 13904 + }, + { + "epoch": 2.37, + "learning_rate": 2.2140179183592892e-06, + "loss": 0.3108, + "step": 13905 + }, + { + "epoch": 2.37, + "learning_rate": 2.2128625372467093e-06, + "loss": 0.3177, + "step": 13906 + }, + { + "epoch": 2.37, + "learning_rate": 2.211707420173459e-06, + "loss": 0.3087, + "step": 13907 + }, + { + "epoch": 2.37, + "learning_rate": 2.210552567178704e-06, + "loss": 0.3068, + "step": 13908 + }, + { + "epoch": 2.37, + "learning_rate": 2.209397978301604e-06, + "loss": 0.3207, + "step": 13909 + }, + { + "epoch": 2.37, + "learning_rate": 2.2082436535813023e-06, + "loss": 0.3181, + "step": 13910 + }, + { + "epoch": 2.37, + "learning_rate": 2.2070895930569438e-06, + "loss": 0.312, + "step": 13911 + }, + { + "epoch": 2.37, + "learning_rate": 2.2059357967676555e-06, + "loss": 0.3046, + "step": 13912 + }, + { + "epoch": 2.37, + "learning_rate": 2.2047822647525618e-06, + "loss": 0.3193, + "step": 13913 + }, + { + "epoch": 2.37, + "learning_rate": 2.2036289970507762e-06, + "loss": 0.3058, + "step": 13914 + }, + { + "epoch": 2.37, + "learning_rate": 2.2024759937014006e-06, + "loss": 0.3106, + "step": 13915 + }, + { + "epoch": 2.37, + "learning_rate": 2.201323254743534e-06, + "loss": 0.3108, + "step": 13916 + }, + { + "epoch": 2.37, + "learning_rate": 2.2001707802162564e-06, + "loss": 0.329, + "step": 13917 + }, + { + "epoch": 2.37, + "learning_rate": 2.199018570158652e-06, + "loss": 0.3169, + "step": 13918 + }, + { + "epoch": 2.37, + "learning_rate": 2.1978666246097814e-06, + "loss": 0.2767, + "step": 13919 + }, + { + "epoch": 2.37, + "learning_rate": 2.1967149436087064e-06, + "loss": 0.3103, + "step": 13920 + }, + { + "epoch": 2.37, + "learning_rate": 2.1955635271944785e-06, + "loss": 0.2898, + "step": 13921 + }, + { + "epoch": 2.37, + "learning_rate": 2.19441237540614e-06, + "loss": 0.2842, + "step": 13922 + }, + { + "epoch": 2.37, + "learning_rate": 2.1932614882827196e-06, + "loss": 0.3015, + "step": 13923 + }, + { + "epoch": 2.37, + "learning_rate": 2.192110865863246e-06, + "loss": 0.305, + "step": 13924 + }, + { + "epoch": 2.37, + "learning_rate": 2.190960508186727e-06, + "loss": 0.328, + "step": 13925 + }, + { + "epoch": 2.38, + "learning_rate": 2.189810415292171e-06, + "loss": 0.3119, + "step": 13926 + }, + { + "epoch": 2.38, + "learning_rate": 2.188660587218574e-06, + "loss": 0.3083, + "step": 13927 + }, + { + "epoch": 2.38, + "learning_rate": 2.1875110240049234e-06, + "loss": 0.313, + "step": 13928 + }, + { + "epoch": 2.38, + "learning_rate": 2.186361725690198e-06, + "loss": 0.303, + "step": 13929 + }, + { + "epoch": 2.38, + "learning_rate": 2.1852126923133676e-06, + "loss": 0.311, + "step": 13930 + }, + { + "epoch": 2.38, + "learning_rate": 2.1840639239133942e-06, + "loss": 0.3204, + "step": 13931 + }, + { + "epoch": 2.38, + "learning_rate": 2.1829154205292236e-06, + "loss": 0.2891, + "step": 13932 + }, + { + "epoch": 2.38, + "learning_rate": 2.181767182199802e-06, + "loss": 0.3131, + "step": 13933 + }, + { + "epoch": 2.38, + "learning_rate": 2.1806192089640655e-06, + "loss": 0.3094, + "step": 13934 + }, + { + "epoch": 2.38, + "learning_rate": 2.1794715008609323e-06, + "loss": 0.309, + "step": 13935 + }, + { + "epoch": 2.38, + "learning_rate": 2.178324057929321e-06, + "loss": 0.3211, + "step": 13936 + }, + { + "epoch": 2.38, + "learning_rate": 2.1771768802081385e-06, + "loss": 0.3275, + "step": 13937 + }, + { + "epoch": 2.38, + "learning_rate": 2.1760299677362817e-06, + "loss": 0.3306, + "step": 13938 + }, + { + "epoch": 2.38, + "learning_rate": 2.174883320552641e-06, + "loss": 0.3021, + "step": 13939 + }, + { + "epoch": 2.38, + "learning_rate": 2.1737369386960925e-06, + "loss": 0.3294, + "step": 13940 + }, + { + "epoch": 2.38, + "learning_rate": 2.172590822205508e-06, + "loss": 0.3174, + "step": 13941 + }, + { + "epoch": 2.38, + "learning_rate": 2.1714449711197495e-06, + "loss": 0.3034, + "step": 13942 + }, + { + "epoch": 2.38, + "learning_rate": 2.17029938547767e-06, + "loss": 0.3141, + "step": 13943 + }, + { + "epoch": 2.38, + "learning_rate": 2.169154065318112e-06, + "loss": 0.3051, + "step": 13944 + }, + { + "epoch": 2.38, + "learning_rate": 2.16800901067991e-06, + "loss": 0.2908, + "step": 13945 + }, + { + "epoch": 2.38, + "learning_rate": 2.166864221601892e-06, + "loss": 0.322, + "step": 13946 + }, + { + "epoch": 2.38, + "learning_rate": 2.16571969812287e-06, + "loss": 0.3008, + "step": 13947 + }, + { + "epoch": 2.38, + "learning_rate": 2.1645754402816544e-06, + "loss": 0.3005, + "step": 13948 + }, + { + "epoch": 2.38, + "learning_rate": 2.1634314481170426e-06, + "loss": 0.3004, + "step": 13949 + }, + { + "epoch": 2.38, + "learning_rate": 2.1622877216678273e-06, + "loss": 0.3157, + "step": 13950 + }, + { + "epoch": 2.38, + "learning_rate": 2.161144260972784e-06, + "loss": 0.3111, + "step": 13951 + }, + { + "epoch": 2.38, + "learning_rate": 2.160001066070686e-06, + "loss": 0.2957, + "step": 13952 + }, + { + "epoch": 2.38, + "learning_rate": 2.158858137000296e-06, + "loss": 0.3156, + "step": 13953 + }, + { + "epoch": 2.38, + "learning_rate": 2.15771547380037e-06, + "loss": 0.2951, + "step": 13954 + }, + { + "epoch": 2.38, + "learning_rate": 2.1565730765096483e-06, + "loss": 0.2979, + "step": 13955 + }, + { + "epoch": 2.38, + "learning_rate": 2.1554309451668664e-06, + "loss": 0.2987, + "step": 13956 + }, + { + "epoch": 2.38, + "learning_rate": 2.154289079810753e-06, + "loss": 0.3336, + "step": 13957 + }, + { + "epoch": 2.38, + "learning_rate": 2.1531474804800255e-06, + "loss": 0.3287, + "step": 13958 + }, + { + "epoch": 2.38, + "learning_rate": 2.1520061472133903e-06, + "loss": 0.3175, + "step": 13959 + }, + { + "epoch": 2.38, + "learning_rate": 2.150865080049548e-06, + "loss": 0.3337, + "step": 13960 + }, + { + "epoch": 2.38, + "learning_rate": 2.149724279027191e-06, + "loss": 0.2842, + "step": 13961 + }, + { + "epoch": 2.38, + "learning_rate": 2.1485837441849966e-06, + "loss": 0.293, + "step": 13962 + }, + { + "epoch": 2.38, + "learning_rate": 2.147443475561638e-06, + "loss": 0.3209, + "step": 13963 + }, + { + "epoch": 2.38, + "learning_rate": 2.146303473195779e-06, + "loss": 0.3302, + "step": 13964 + }, + { + "epoch": 2.38, + "learning_rate": 2.145163737126077e-06, + "loss": 0.3008, + "step": 13965 + }, + { + "epoch": 2.38, + "learning_rate": 2.1440242673911706e-06, + "loss": 0.3049, + "step": 13966 + }, + { + "epoch": 2.38, + "learning_rate": 2.1428850640297007e-06, + "loss": 0.3019, + "step": 13967 + }, + { + "epoch": 2.38, + "learning_rate": 2.141746127080292e-06, + "loss": 0.3188, + "step": 13968 + }, + { + "epoch": 2.38, + "learning_rate": 2.140607456581566e-06, + "loss": 0.3107, + "step": 13969 + }, + { + "epoch": 2.38, + "learning_rate": 2.1394690525721275e-06, + "loss": 0.3225, + "step": 13970 + }, + { + "epoch": 2.38, + "learning_rate": 2.1383309150905783e-06, + "loss": 0.319, + "step": 13971 + }, + { + "epoch": 2.38, + "learning_rate": 2.1371930441755084e-06, + "loss": 0.3075, + "step": 13972 + }, + { + "epoch": 2.38, + "learning_rate": 2.136055439865502e-06, + "loss": 0.3182, + "step": 13973 + }, + { + "epoch": 2.38, + "learning_rate": 2.1349181021991306e-06, + "loss": 0.3358, + "step": 13974 + }, + { + "epoch": 2.38, + "learning_rate": 2.1337810312149576e-06, + "loss": 0.3043, + "step": 13975 + }, + { + "epoch": 2.38, + "learning_rate": 2.132644226951538e-06, + "loss": 0.3149, + "step": 13976 + }, + { + "epoch": 2.38, + "learning_rate": 2.1315076894474207e-06, + "loss": 0.3294, + "step": 13977 + }, + { + "epoch": 2.38, + "learning_rate": 2.130371418741137e-06, + "loss": 0.323, + "step": 13978 + }, + { + "epoch": 2.38, + "learning_rate": 2.129235414871218e-06, + "loss": 0.2854, + "step": 13979 + }, + { + "epoch": 2.38, + "learning_rate": 2.128099677876181e-06, + "loss": 0.3222, + "step": 13980 + }, + { + "epoch": 2.38, + "learning_rate": 2.1269642077945385e-06, + "loss": 0.3117, + "step": 13981 + }, + { + "epoch": 2.38, + "learning_rate": 2.1258290046647856e-06, + "loss": 0.3036, + "step": 13982 + }, + { + "epoch": 2.38, + "learning_rate": 2.124694068525417e-06, + "loss": 0.3137, + "step": 13983 + }, + { + "epoch": 2.38, + "learning_rate": 2.1235593994149162e-06, + "loss": 0.3075, + "step": 13984 + }, + { + "epoch": 2.39, + "learning_rate": 2.1224249973717536e-06, + "loss": 0.3061, + "step": 13985 + }, + { + "epoch": 2.39, + "learning_rate": 2.1212908624343954e-06, + "loss": 0.3233, + "step": 13986 + }, + { + "epoch": 2.39, + "learning_rate": 2.120156994641296e-06, + "loss": 0.3064, + "step": 13987 + }, + { + "epoch": 2.39, + "learning_rate": 2.1190233940309014e-06, + "loss": 0.2858, + "step": 13988 + }, + { + "epoch": 2.39, + "learning_rate": 2.1178900606416495e-06, + "loss": 0.3368, + "step": 13989 + }, + { + "epoch": 2.39, + "learning_rate": 2.1167569945119694e-06, + "loss": 0.3171, + "step": 13990 + }, + { + "epoch": 2.39, + "learning_rate": 2.115624195680278e-06, + "loss": 0.3419, + "step": 13991 + }, + { + "epoch": 2.39, + "learning_rate": 2.1144916641849887e-06, + "loss": 0.3472, + "step": 13992 + }, + { + "epoch": 2.39, + "learning_rate": 2.1133594000644974e-06, + "loss": 0.3063, + "step": 13993 + }, + { + "epoch": 2.39, + "learning_rate": 2.112227403357198e-06, + "loss": 0.2998, + "step": 13994 + }, + { + "epoch": 2.39, + "learning_rate": 2.1110956741014743e-06, + "loss": 0.3298, + "step": 13995 + }, + { + "epoch": 2.39, + "learning_rate": 2.1099642123356977e-06, + "loss": 0.3068, + "step": 13996 + }, + { + "epoch": 2.39, + "learning_rate": 2.1088330180982386e-06, + "loss": 0.3319, + "step": 13997 + }, + { + "epoch": 2.39, + "learning_rate": 2.107702091427445e-06, + "loss": 0.3048, + "step": 13998 + }, + { + "epoch": 2.39, + "learning_rate": 2.106571432361669e-06, + "loss": 0.3002, + "step": 13999 + }, + { + "epoch": 2.39, + "learning_rate": 2.1054410409392435e-06, + "loss": 0.3024, + "step": 14000 + }, + { + "epoch": 2.39, + "learning_rate": 2.104310917198499e-06, + "loss": 0.2807, + "step": 14001 + }, + { + "epoch": 2.39, + "learning_rate": 2.1031810611777535e-06, + "loss": 0.3017, + "step": 14002 + }, + { + "epoch": 2.39, + "learning_rate": 2.10205147291532e-06, + "loss": 0.3078, + "step": 14003 + }, + { + "epoch": 2.39, + "learning_rate": 2.100922152449496e-06, + "loss": 0.3326, + "step": 14004 + }, + { + "epoch": 2.39, + "learning_rate": 2.0997930998185754e-06, + "loss": 0.3273, + "step": 14005 + }, + { + "epoch": 2.39, + "learning_rate": 2.098664315060842e-06, + "loss": 0.3327, + "step": 14006 + }, + { + "epoch": 2.39, + "learning_rate": 2.0975357982145715e-06, + "loss": 0.3113, + "step": 14007 + }, + { + "epoch": 2.39, + "learning_rate": 2.0964075493180225e-06, + "loss": 0.2972, + "step": 14008 + }, + { + "epoch": 2.39, + "learning_rate": 2.095279568409455e-06, + "loss": 0.3055, + "step": 14009 + }, + { + "epoch": 2.39, + "learning_rate": 2.0941518555271134e-06, + "loss": 0.3102, + "step": 14010 + }, + { + "epoch": 2.39, + "learning_rate": 2.0930244107092367e-06, + "loss": 0.3066, + "step": 14011 + }, + { + "epoch": 2.39, + "learning_rate": 2.0918972339940557e-06, + "loss": 0.306, + "step": 14012 + }, + { + "epoch": 2.39, + "learning_rate": 2.0907703254197842e-06, + "loss": 0.313, + "step": 14013 + }, + { + "epoch": 2.39, + "learning_rate": 2.089643685024635e-06, + "loss": 0.2991, + "step": 14014 + }, + { + "epoch": 2.39, + "learning_rate": 2.088517312846813e-06, + "loss": 0.2892, + "step": 14015 + }, + { + "epoch": 2.39, + "learning_rate": 2.087391208924503e-06, + "loss": 0.3187, + "step": 14016 + }, + { + "epoch": 2.39, + "learning_rate": 2.0862653732958914e-06, + "loss": 0.3102, + "step": 14017 + }, + { + "epoch": 2.39, + "learning_rate": 2.0851398059991525e-06, + "loss": 0.3245, + "step": 14018 + }, + { + "epoch": 2.39, + "learning_rate": 2.0840145070724506e-06, + "loss": 0.3053, + "step": 14019 + }, + { + "epoch": 2.39, + "learning_rate": 2.082889476553942e-06, + "loss": 0.3318, + "step": 14020 + }, + { + "epoch": 2.39, + "learning_rate": 2.0817647144817723e-06, + "loss": 0.3298, + "step": 14021 + }, + { + "epoch": 2.39, + "learning_rate": 2.0806402208940812e-06, + "loss": 0.2948, + "step": 14022 + }, + { + "epoch": 2.39, + "learning_rate": 2.0795159958289935e-06, + "loss": 0.3197, + "step": 14023 + }, + { + "epoch": 2.39, + "learning_rate": 2.078392039324629e-06, + "loss": 0.3248, + "step": 14024 + }, + { + "epoch": 2.39, + "learning_rate": 2.0772683514191005e-06, + "loss": 0.3038, + "step": 14025 + }, + { + "epoch": 2.39, + "learning_rate": 2.0761449321505066e-06, + "loss": 0.2963, + "step": 14026 + }, + { + "epoch": 2.39, + "learning_rate": 2.0750217815569396e-06, + "loss": 0.3209, + "step": 14027 + }, + { + "epoch": 2.39, + "learning_rate": 2.073898899676485e-06, + "loss": 0.3171, + "step": 14028 + }, + { + "epoch": 2.39, + "learning_rate": 2.072776286547212e-06, + "loss": 0.2948, + "step": 14029 + }, + { + "epoch": 2.39, + "learning_rate": 2.07165394220719e-06, + "loss": 0.3264, + "step": 14030 + }, + { + "epoch": 2.39, + "learning_rate": 2.0705318666944697e-06, + "loss": 0.3321, + "step": 14031 + }, + { + "epoch": 2.39, + "learning_rate": 2.069410060047099e-06, + "loss": 0.3046, + "step": 14032 + }, + { + "epoch": 2.39, + "learning_rate": 2.0682885223031156e-06, + "loss": 0.3066, + "step": 14033 + }, + { + "epoch": 2.39, + "learning_rate": 2.0671672535005473e-06, + "loss": 0.2989, + "step": 14034 + }, + { + "epoch": 2.39, + "learning_rate": 2.066046253677414e-06, + "loss": 0.294, + "step": 14035 + }, + { + "epoch": 2.39, + "learning_rate": 2.0649255228717248e-06, + "loss": 0.3125, + "step": 14036 + }, + { + "epoch": 2.39, + "learning_rate": 2.0638050611214832e-06, + "loss": 0.3163, + "step": 14037 + }, + { + "epoch": 2.39, + "learning_rate": 2.062684868464675e-06, + "loss": 0.3111, + "step": 14038 + }, + { + "epoch": 2.39, + "learning_rate": 2.0615649449392873e-06, + "loss": 0.3026, + "step": 14039 + }, + { + "epoch": 2.39, + "learning_rate": 2.0604452905832906e-06, + "loss": 0.3117, + "step": 14040 + }, + { + "epoch": 2.39, + "learning_rate": 2.0593259054346513e-06, + "loss": 0.3185, + "step": 14041 + }, + { + "epoch": 2.39, + "learning_rate": 2.058206789531324e-06, + "loss": 0.314, + "step": 14042 + }, + { + "epoch": 2.39, + "learning_rate": 2.0570879429112557e-06, + "loss": 0.3447, + "step": 14043 + }, + { + "epoch": 2.4, + "learning_rate": 2.0559693656123824e-06, + "loss": 0.3447, + "step": 14044 + }, + { + "epoch": 2.4, + "learning_rate": 2.0548510576726312e-06, + "loss": 0.3115, + "step": 14045 + }, + { + "epoch": 2.4, + "learning_rate": 2.053733019129922e-06, + "loss": 0.3214, + "step": 14046 + }, + { + "epoch": 2.4, + "learning_rate": 2.0526152500221606e-06, + "loss": 0.3252, + "step": 14047 + }, + { + "epoch": 2.4, + "learning_rate": 2.0514977503872515e-06, + "loss": 0.3132, + "step": 14048 + }, + { + "epoch": 2.4, + "learning_rate": 2.0503805202630833e-06, + "loss": 0.2995, + "step": 14049 + }, + { + "epoch": 2.4, + "learning_rate": 2.049263559687539e-06, + "loss": 0.3231, + "step": 14050 + }, + { + "epoch": 2.4, + "learning_rate": 2.048146868698492e-06, + "loss": 0.291, + "step": 14051 + }, + { + "epoch": 2.4, + "learning_rate": 2.0470304473338086e-06, + "loss": 0.3184, + "step": 14052 + }, + { + "epoch": 2.4, + "learning_rate": 2.0459142956313373e-06, + "loss": 0.2838, + "step": 14053 + }, + { + "epoch": 2.4, + "learning_rate": 2.044798413628928e-06, + "loss": 0.3041, + "step": 14054 + }, + { + "epoch": 2.4, + "learning_rate": 2.0436828013644137e-06, + "loss": 0.2851, + "step": 14055 + }, + { + "epoch": 2.4, + "learning_rate": 2.0425674588756263e-06, + "loss": 0.3046, + "step": 14056 + }, + { + "epoch": 2.4, + "learning_rate": 2.041452386200379e-06, + "loss": 0.3245, + "step": 14057 + }, + { + "epoch": 2.4, + "learning_rate": 2.040337583376484e-06, + "loss": 0.3118, + "step": 14058 + }, + { + "epoch": 2.4, + "learning_rate": 2.039223050441743e-06, + "loss": 0.3091, + "step": 14059 + }, + { + "epoch": 2.4, + "learning_rate": 2.03810878743394e-06, + "loss": 0.3139, + "step": 14060 + }, + { + "epoch": 2.4, + "learning_rate": 2.036994794390861e-06, + "loss": 0.3172, + "step": 14061 + }, + { + "epoch": 2.4, + "learning_rate": 2.03588107135028e-06, + "loss": 0.3195, + "step": 14062 + }, + { + "epoch": 2.4, + "learning_rate": 2.034767618349953e-06, + "loss": 0.3256, + "step": 14063 + }, + { + "epoch": 2.4, + "learning_rate": 2.03365443542764e-06, + "loss": 0.3208, + "step": 14064 + }, + { + "epoch": 2.4, + "learning_rate": 2.0325415226210845e-06, + "loss": 0.3231, + "step": 14065 + }, + { + "epoch": 2.4, + "learning_rate": 2.0314288799680214e-06, + "loss": 0.3191, + "step": 14066 + }, + { + "epoch": 2.4, + "learning_rate": 2.0303165075061802e-06, + "loss": 0.315, + "step": 14067 + }, + { + "epoch": 2.4, + "learning_rate": 2.029204405273273e-06, + "loss": 0.3022, + "step": 14068 + }, + { + "epoch": 2.4, + "learning_rate": 2.028092573307011e-06, + "loss": 0.298, + "step": 14069 + }, + { + "epoch": 2.4, + "learning_rate": 2.0269810116450927e-06, + "loss": 0.3388, + "step": 14070 + }, + { + "epoch": 2.4, + "learning_rate": 2.0258697203252075e-06, + "loss": 0.2984, + "step": 14071 + }, + { + "epoch": 2.4, + "learning_rate": 2.0247586993850378e-06, + "loss": 0.3046, + "step": 14072 + }, + { + "epoch": 2.4, + "learning_rate": 2.023647948862254e-06, + "loss": 0.3117, + "step": 14073 + }, + { + "epoch": 2.4, + "learning_rate": 2.022537468794521e-06, + "loss": 0.3017, + "step": 14074 + }, + { + "epoch": 2.4, + "learning_rate": 2.0214272592194863e-06, + "loss": 0.3218, + "step": 14075 + }, + { + "epoch": 2.4, + "learning_rate": 2.020317320174797e-06, + "loss": 0.2995, + "step": 14076 + }, + { + "epoch": 2.4, + "learning_rate": 2.019207651698092e-06, + "loss": 0.2929, + "step": 14077 + }, + { + "epoch": 2.4, + "learning_rate": 2.018098253826989e-06, + "loss": 0.3086, + "step": 14078 + }, + { + "epoch": 2.4, + "learning_rate": 2.016989126599108e-06, + "loss": 0.295, + "step": 14079 + }, + { + "epoch": 2.4, + "learning_rate": 2.0158802700520576e-06, + "loss": 0.305, + "step": 14080 + }, + { + "epoch": 2.4, + "learning_rate": 2.0147716842234345e-06, + "loss": 0.3176, + "step": 14081 + }, + { + "epoch": 2.4, + "learning_rate": 2.013663369150831e-06, + "loss": 0.3511, + "step": 14082 + }, + { + "epoch": 2.4, + "learning_rate": 2.012555324871822e-06, + "loss": 0.3037, + "step": 14083 + }, + { + "epoch": 2.4, + "learning_rate": 2.0114475514239795e-06, + "loss": 0.3374, + "step": 14084 + }, + { + "epoch": 2.4, + "learning_rate": 2.0103400488448654e-06, + "loss": 0.3041, + "step": 14085 + }, + { + "epoch": 2.4, + "learning_rate": 2.009232817172032e-06, + "loss": 0.2771, + "step": 14086 + }, + { + "epoch": 2.4, + "learning_rate": 2.008125856443023e-06, + "loss": 0.3107, + "step": 14087 + }, + { + "epoch": 2.4, + "learning_rate": 2.0070191666953707e-06, + "loss": 0.3128, + "step": 14088 + }, + { + "epoch": 2.4, + "learning_rate": 2.0059127479666043e-06, + "loss": 0.3202, + "step": 14089 + }, + { + "epoch": 2.4, + "learning_rate": 2.0048066002942322e-06, + "loss": 0.3441, + "step": 14090 + }, + { + "epoch": 2.4, + "learning_rate": 2.0037007237157633e-06, + "loss": 0.3188, + "step": 14091 + }, + { + "epoch": 2.4, + "learning_rate": 2.0025951182686966e-06, + "loss": 0.3223, + "step": 14092 + }, + { + "epoch": 2.4, + "learning_rate": 2.0014897839905213e-06, + "loss": 0.3156, + "step": 14093 + }, + { + "epoch": 2.4, + "learning_rate": 2.000384720918711e-06, + "loss": 0.2974, + "step": 14094 + }, + { + "epoch": 2.4, + "learning_rate": 1.9992799290907384e-06, + "loss": 0.3326, + "step": 14095 + }, + { + "epoch": 2.4, + "learning_rate": 1.998175408544062e-06, + "loss": 0.2946, + "step": 14096 + }, + { + "epoch": 2.4, + "learning_rate": 1.9970711593161373e-06, + "loss": 0.2999, + "step": 14097 + }, + { + "epoch": 2.4, + "learning_rate": 1.9959671814444004e-06, + "loss": 0.3253, + "step": 14098 + }, + { + "epoch": 2.4, + "learning_rate": 1.9948634749662867e-06, + "loss": 0.3149, + "step": 14099 + }, + { + "epoch": 2.4, + "learning_rate": 1.993760039919219e-06, + "loss": 0.3188, + "step": 14100 + }, + { + "epoch": 2.4, + "learning_rate": 1.992656876340613e-06, + "loss": 0.301, + "step": 14101 + }, + { + "epoch": 2.41, + "learning_rate": 1.991553984267872e-06, + "loss": 0.3139, + "step": 14102 + }, + { + "epoch": 2.41, + "learning_rate": 1.990451363738394e-06, + "loss": 0.3016, + "step": 14103 + }, + { + "epoch": 2.41, + "learning_rate": 1.9893490147895667e-06, + "loss": 0.2955, + "step": 14104 + }, + { + "epoch": 2.41, + "learning_rate": 1.988246937458762e-06, + "loss": 0.3163, + "step": 14105 + }, + { + "epoch": 2.41, + "learning_rate": 1.9871451317833534e-06, + "loss": 0.3108, + "step": 14106 + }, + { + "epoch": 2.41, + "learning_rate": 1.9860435978006966e-06, + "loss": 0.2951, + "step": 14107 + }, + { + "epoch": 2.41, + "learning_rate": 1.9849423355481436e-06, + "loss": 0.3116, + "step": 14108 + }, + { + "epoch": 2.41, + "learning_rate": 1.9838413450630365e-06, + "loss": 0.3376, + "step": 14109 + }, + { + "epoch": 2.41, + "learning_rate": 1.982740626382702e-06, + "loss": 0.3242, + "step": 14110 + }, + { + "epoch": 2.41, + "learning_rate": 1.9816401795444664e-06, + "loss": 0.3225, + "step": 14111 + }, + { + "epoch": 2.41, + "learning_rate": 1.9805400045856417e-06, + "loss": 0.298, + "step": 14112 + }, + { + "epoch": 2.41, + "learning_rate": 1.9794401015435293e-06, + "loss": 0.3158, + "step": 14113 + }, + { + "epoch": 2.41, + "learning_rate": 1.9783404704554253e-06, + "loss": 0.333, + "step": 14114 + }, + { + "epoch": 2.41, + "learning_rate": 1.9772411113586156e-06, + "loss": 0.3022, + "step": 14115 + }, + { + "epoch": 2.41, + "learning_rate": 1.976142024290376e-06, + "loss": 0.2939, + "step": 14116 + }, + { + "epoch": 2.41, + "learning_rate": 1.9750432092879735e-06, + "loss": 0.3138, + "step": 14117 + }, + { + "epoch": 2.41, + "learning_rate": 1.973944666388665e-06, + "loss": 0.3115, + "step": 14118 + }, + { + "epoch": 2.41, + "learning_rate": 1.972846395629702e-06, + "loss": 0.3193, + "step": 14119 + }, + { + "epoch": 2.41, + "learning_rate": 1.971748397048319e-06, + "loss": 0.3078, + "step": 14120 + }, + { + "epoch": 2.41, + "learning_rate": 1.9706506706817476e-06, + "loss": 0.319, + "step": 14121 + }, + { + "epoch": 2.41, + "learning_rate": 1.969553216567209e-06, + "loss": 0.3026, + "step": 14122 + }, + { + "epoch": 2.41, + "learning_rate": 1.9684560347419156e-06, + "loss": 0.3043, + "step": 14123 + }, + { + "epoch": 2.41, + "learning_rate": 1.967359125243071e-06, + "loss": 0.339, + "step": 14124 + }, + { + "epoch": 2.41, + "learning_rate": 1.966262488107863e-06, + "loss": 0.315, + "step": 14125 + }, + { + "epoch": 2.41, + "learning_rate": 1.9651661233734787e-06, + "loss": 0.3302, + "step": 14126 + }, + { + "epoch": 2.41, + "learning_rate": 1.9640700310770954e-06, + "loss": 0.3031, + "step": 14127 + }, + { + "epoch": 2.41, + "learning_rate": 1.9629742112558724e-06, + "loss": 0.3113, + "step": 14128 + }, + { + "epoch": 2.41, + "learning_rate": 1.9618786639469676e-06, + "loss": 0.3199, + "step": 14129 + }, + { + "epoch": 2.41, + "learning_rate": 1.9607833891875307e-06, + "loss": 0.2983, + "step": 14130 + }, + { + "epoch": 2.41, + "learning_rate": 1.9596883870146977e-06, + "loss": 0.2976, + "step": 14131 + }, + { + "epoch": 2.41, + "learning_rate": 1.958593657465596e-06, + "loss": 0.3244, + "step": 14132 + }, + { + "epoch": 2.41, + "learning_rate": 1.9574992005773465e-06, + "loss": 0.3412, + "step": 14133 + }, + { + "epoch": 2.41, + "learning_rate": 1.9564050163870598e-06, + "loss": 0.3092, + "step": 14134 + }, + { + "epoch": 2.41, + "learning_rate": 1.955311104931833e-06, + "loss": 0.3167, + "step": 14135 + }, + { + "epoch": 2.41, + "learning_rate": 1.954217466248759e-06, + "loss": 0.32, + "step": 14136 + }, + { + "epoch": 2.41, + "learning_rate": 1.9531241003749202e-06, + "loss": 0.3155, + "step": 14137 + }, + { + "epoch": 2.41, + "learning_rate": 1.952031007347389e-06, + "loss": 0.2957, + "step": 14138 + }, + { + "epoch": 2.41, + "learning_rate": 1.9509381872032307e-06, + "loss": 0.3154, + "step": 14139 + }, + { + "epoch": 2.41, + "learning_rate": 1.949845639979501e-06, + "loss": 0.3182, + "step": 14140 + }, + { + "epoch": 2.41, + "learning_rate": 1.9487533657132394e-06, + "loss": 0.3178, + "step": 14141 + }, + { + "epoch": 2.41, + "learning_rate": 1.947661364441489e-06, + "loss": 0.3375, + "step": 14142 + }, + { + "epoch": 2.41, + "learning_rate": 1.9465696362012687e-06, + "loss": 0.3271, + "step": 14143 + }, + { + "epoch": 2.41, + "learning_rate": 1.9454781810295998e-06, + "loss": 0.2973, + "step": 14144 + }, + { + "epoch": 2.41, + "learning_rate": 1.94438699896349e-06, + "loss": 0.3044, + "step": 14145 + }, + { + "epoch": 2.41, + "learning_rate": 1.9432960900399377e-06, + "loss": 0.324, + "step": 14146 + }, + { + "epoch": 2.41, + "learning_rate": 1.9422054542959333e-06, + "loss": 0.3087, + "step": 14147 + }, + { + "epoch": 2.41, + "learning_rate": 1.9411150917684572e-06, + "loss": 0.3265, + "step": 14148 + }, + { + "epoch": 2.41, + "learning_rate": 1.940025002494482e-06, + "loss": 0.3076, + "step": 14149 + }, + { + "epoch": 2.41, + "learning_rate": 1.9389351865109664e-06, + "loss": 0.3155, + "step": 14150 + }, + { + "epoch": 2.41, + "learning_rate": 1.9378456438548633e-06, + "loss": 0.3344, + "step": 14151 + }, + { + "epoch": 2.41, + "learning_rate": 1.9367563745631157e-06, + "loss": 0.3033, + "step": 14152 + }, + { + "epoch": 2.41, + "learning_rate": 1.9356673786726598e-06, + "loss": 0.3177, + "step": 14153 + }, + { + "epoch": 2.41, + "learning_rate": 1.9345786562204185e-06, + "loss": 0.3227, + "step": 14154 + }, + { + "epoch": 2.41, + "learning_rate": 1.933490207243308e-06, + "loss": 0.3239, + "step": 14155 + }, + { + "epoch": 2.41, + "learning_rate": 1.9324020317782367e-06, + "loss": 0.3236, + "step": 14156 + }, + { + "epoch": 2.41, + "learning_rate": 1.9313141298620965e-06, + "loss": 0.3165, + "step": 14157 + }, + { + "epoch": 2.41, + "learning_rate": 1.93022650153178e-06, + "loss": 0.3142, + "step": 14158 + }, + { + "epoch": 2.41, + "learning_rate": 1.9291391468241603e-06, + "loss": 0.3081, + "step": 14159 + }, + { + "epoch": 2.41, + "learning_rate": 1.9280520657761103e-06, + "loss": 0.298, + "step": 14160 + }, + { + "epoch": 2.42, + "learning_rate": 1.926965258424488e-06, + "loss": 0.3127, + "step": 14161 + }, + { + "epoch": 2.42, + "learning_rate": 1.925878724806145e-06, + "loss": 0.3214, + "step": 14162 + }, + { + "epoch": 2.42, + "learning_rate": 1.924792464957922e-06, + "loss": 0.3172, + "step": 14163 + }, + { + "epoch": 2.42, + "learning_rate": 1.9237064789166505e-06, + "loss": 0.3192, + "step": 14164 + }, + { + "epoch": 2.42, + "learning_rate": 1.9226207667191566e-06, + "loss": 0.2938, + "step": 14165 + }, + { + "epoch": 2.42, + "learning_rate": 1.9215353284022487e-06, + "loss": 0.3225, + "step": 14166 + }, + { + "epoch": 2.42, + "learning_rate": 1.920450164002732e-06, + "loss": 0.312, + "step": 14167 + }, + { + "epoch": 2.42, + "learning_rate": 1.919365273557403e-06, + "loss": 0.3118, + "step": 14168 + }, + { + "epoch": 2.42, + "learning_rate": 1.9182806571030466e-06, + "loss": 0.3071, + "step": 14169 + }, + { + "epoch": 2.42, + "learning_rate": 1.9171963146764384e-06, + "loss": 0.3362, + "step": 14170 + }, + { + "epoch": 2.42, + "learning_rate": 1.916112246314348e-06, + "loss": 0.2899, + "step": 14171 + }, + { + "epoch": 2.42, + "learning_rate": 1.915028452053529e-06, + "loss": 0.3277, + "step": 14172 + }, + { + "epoch": 2.42, + "learning_rate": 1.9139449319307313e-06, + "loss": 0.3212, + "step": 14173 + }, + { + "epoch": 2.42, + "learning_rate": 1.9128616859826965e-06, + "loss": 0.326, + "step": 14174 + }, + { + "epoch": 2.42, + "learning_rate": 1.9117787142461496e-06, + "loss": 0.297, + "step": 14175 + }, + { + "epoch": 2.42, + "learning_rate": 1.910696016757814e-06, + "loss": 0.3019, + "step": 14176 + }, + { + "epoch": 2.42, + "learning_rate": 1.909613593554399e-06, + "loss": 0.3232, + "step": 14177 + }, + { + "epoch": 2.42, + "learning_rate": 1.9085314446726087e-06, + "loss": 0.3063, + "step": 14178 + }, + { + "epoch": 2.42, + "learning_rate": 1.9074495701491347e-06, + "loss": 0.3059, + "step": 14179 + }, + { + "epoch": 2.42, + "learning_rate": 1.9063679700206627e-06, + "loss": 0.3095, + "step": 14180 + }, + { + "epoch": 2.42, + "learning_rate": 1.9052866443238605e-06, + "loss": 0.3361, + "step": 14181 + }, + { + "epoch": 2.42, + "learning_rate": 1.9042055930953973e-06, + "loss": 0.3213, + "step": 14182 + }, + { + "epoch": 2.42, + "learning_rate": 1.9031248163719284e-06, + "loss": 0.3126, + "step": 14183 + }, + { + "epoch": 2.42, + "learning_rate": 1.9020443141900979e-06, + "loss": 0.2988, + "step": 14184 + }, + { + "epoch": 2.42, + "learning_rate": 1.900964086586543e-06, + "loss": 0.2941, + "step": 14185 + }, + { + "epoch": 2.42, + "learning_rate": 1.8998841335978923e-06, + "loss": 0.3244, + "step": 14186 + }, + { + "epoch": 2.42, + "learning_rate": 1.8988044552607666e-06, + "loss": 0.3292, + "step": 14187 + }, + { + "epoch": 2.42, + "learning_rate": 1.8977250516117674e-06, + "loss": 0.3171, + "step": 14188 + }, + { + "epoch": 2.42, + "learning_rate": 1.8966459226875012e-06, + "loss": 0.3076, + "step": 14189 + }, + { + "epoch": 2.42, + "learning_rate": 1.895567068524552e-06, + "loss": 0.2856, + "step": 14190 + }, + { + "epoch": 2.42, + "learning_rate": 1.8944884891595038e-06, + "loss": 0.3059, + "step": 14191 + }, + { + "epoch": 2.42, + "learning_rate": 1.8934101846289277e-06, + "loss": 0.3014, + "step": 14192 + }, + { + "epoch": 2.42, + "learning_rate": 1.8923321549693863e-06, + "loss": 0.3299, + "step": 14193 + }, + { + "epoch": 2.42, + "learning_rate": 1.8912544002174327e-06, + "loss": 0.3145, + "step": 14194 + }, + { + "epoch": 2.42, + "learning_rate": 1.890176920409612e-06, + "loss": 0.3169, + "step": 14195 + }, + { + "epoch": 2.42, + "learning_rate": 1.8890997155824541e-06, + "loss": 0.3152, + "step": 14196 + }, + { + "epoch": 2.42, + "learning_rate": 1.8880227857724864e-06, + "loss": 0.2831, + "step": 14197 + }, + { + "epoch": 2.42, + "learning_rate": 1.8869461310162241e-06, + "loss": 0.3237, + "step": 14198 + }, + { + "epoch": 2.42, + "learning_rate": 1.8858697513501745e-06, + "loss": 0.3241, + "step": 14199 + }, + { + "epoch": 2.42, + "learning_rate": 1.884793646810833e-06, + "loss": 0.3277, + "step": 14200 + }, + { + "epoch": 2.42, + "learning_rate": 1.8837178174346882e-06, + "loss": 0.3136, + "step": 14201 + }, + { + "epoch": 2.42, + "learning_rate": 1.8826422632582198e-06, + "loss": 0.3076, + "step": 14202 + }, + { + "epoch": 2.42, + "learning_rate": 1.881566984317893e-06, + "loss": 0.3132, + "step": 14203 + }, + { + "epoch": 2.42, + "learning_rate": 1.8804919806501697e-06, + "loss": 0.3042, + "step": 14204 + }, + { + "epoch": 2.42, + "learning_rate": 1.8794172522915022e-06, + "loss": 0.3057, + "step": 14205 + }, + { + "epoch": 2.42, + "learning_rate": 1.8783427992783266e-06, + "loss": 0.2921, + "step": 14206 + }, + { + "epoch": 2.42, + "learning_rate": 1.877268621647077e-06, + "loss": 0.3196, + "step": 14207 + }, + { + "epoch": 2.42, + "learning_rate": 1.8761947194341757e-06, + "loss": 0.3116, + "step": 14208 + }, + { + "epoch": 2.42, + "learning_rate": 1.8751210926760355e-06, + "loss": 0.2661, + "step": 14209 + }, + { + "epoch": 2.42, + "learning_rate": 1.874047741409062e-06, + "loss": 0.3036, + "step": 14210 + }, + { + "epoch": 2.42, + "learning_rate": 1.8729746656696456e-06, + "loss": 0.3217, + "step": 14211 + }, + { + "epoch": 2.42, + "learning_rate": 1.8719018654941733e-06, + "loss": 0.2995, + "step": 14212 + }, + { + "epoch": 2.42, + "learning_rate": 1.8708293409190204e-06, + "loss": 0.2946, + "step": 14213 + }, + { + "epoch": 2.42, + "learning_rate": 1.869757091980553e-06, + "loss": 0.3178, + "step": 14214 + }, + { + "epoch": 2.42, + "learning_rate": 1.868685118715128e-06, + "loss": 0.3291, + "step": 14215 + }, + { + "epoch": 2.42, + "learning_rate": 1.8676134211590925e-06, + "loss": 0.3374, + "step": 14216 + }, + { + "epoch": 2.42, + "learning_rate": 1.8665419993487887e-06, + "loss": 0.3337, + "step": 14217 + }, + { + "epoch": 2.42, + "learning_rate": 1.865470853320539e-06, + "loss": 0.3303, + "step": 14218 + }, + { + "epoch": 2.43, + "learning_rate": 1.8643999831106663e-06, + "loss": 0.2781, + "step": 14219 + }, + { + "epoch": 2.43, + "learning_rate": 1.8633293887554826e-06, + "loss": 0.311, + "step": 14220 + }, + { + "epoch": 2.43, + "learning_rate": 1.8622590702912835e-06, + "loss": 0.299, + "step": 14221 + }, + { + "epoch": 2.43, + "learning_rate": 1.861189027754363e-06, + "loss": 0.3414, + "step": 14222 + }, + { + "epoch": 2.43, + "learning_rate": 1.8601192611810026e-06, + "loss": 0.319, + "step": 14223 + }, + { + "epoch": 2.43, + "learning_rate": 1.8590497706074772e-06, + "loss": 0.3116, + "step": 14224 + }, + { + "epoch": 2.43, + "learning_rate": 1.8579805560700503e-06, + "loss": 0.312, + "step": 14225 + }, + { + "epoch": 2.43, + "learning_rate": 1.8569116176049717e-06, + "loss": 0.2923, + "step": 14226 + }, + { + "epoch": 2.43, + "learning_rate": 1.8558429552484891e-06, + "loss": 0.3328, + "step": 14227 + }, + { + "epoch": 2.43, + "learning_rate": 1.8547745690368368e-06, + "loss": 0.3147, + "step": 14228 + }, + { + "epoch": 2.43, + "learning_rate": 1.8537064590062415e-06, + "loss": 0.2957, + "step": 14229 + }, + { + "epoch": 2.43, + "learning_rate": 1.8526386251929184e-06, + "loss": 0.325, + "step": 14230 + }, + { + "epoch": 2.43, + "learning_rate": 1.8515710676330766e-06, + "loss": 0.3064, + "step": 14231 + }, + { + "epoch": 2.43, + "learning_rate": 1.850503786362915e-06, + "loss": 0.3144, + "step": 14232 + }, + { + "epoch": 2.43, + "learning_rate": 1.8494367814186176e-06, + "loss": 0.3164, + "step": 14233 + }, + { + "epoch": 2.43, + "learning_rate": 1.8483700528363645e-06, + "loss": 0.3368, + "step": 14234 + }, + { + "epoch": 2.43, + "learning_rate": 1.8473036006523283e-06, + "loss": 0.3047, + "step": 14235 + }, + { + "epoch": 2.43, + "learning_rate": 1.8462374249026693e-06, + "loss": 0.3096, + "step": 14236 + }, + { + "epoch": 2.43, + "learning_rate": 1.8451715256235347e-06, + "loss": 0.3047, + "step": 14237 + }, + { + "epoch": 2.43, + "learning_rate": 1.8441059028510677e-06, + "loss": 0.318, + "step": 14238 + }, + { + "epoch": 2.43, + "learning_rate": 1.8430405566214004e-06, + "loss": 0.2942, + "step": 14239 + }, + { + "epoch": 2.43, + "learning_rate": 1.8419754869706596e-06, + "loss": 0.3157, + "step": 14240 + }, + { + "epoch": 2.43, + "learning_rate": 1.840910693934953e-06, + "loss": 0.3301, + "step": 14241 + }, + { + "epoch": 2.43, + "learning_rate": 1.8398461775503862e-06, + "loss": 0.3083, + "step": 14242 + }, + { + "epoch": 2.43, + "learning_rate": 1.8387819378530546e-06, + "loss": 0.3346, + "step": 14243 + }, + { + "epoch": 2.43, + "learning_rate": 1.8377179748790442e-06, + "loss": 0.3099, + "step": 14244 + }, + { + "epoch": 2.43, + "learning_rate": 1.8366542886644289e-06, + "loss": 0.3215, + "step": 14245 + }, + { + "epoch": 2.43, + "learning_rate": 1.835590879245277e-06, + "loss": 0.3036, + "step": 14246 + }, + { + "epoch": 2.43, + "learning_rate": 1.8345277466576483e-06, + "loss": 0.3059, + "step": 14247 + }, + { + "epoch": 2.43, + "learning_rate": 1.8334648909375852e-06, + "loss": 0.3045, + "step": 14248 + }, + { + "epoch": 2.43, + "learning_rate": 1.8324023121211276e-06, + "loss": 0.3087, + "step": 14249 + }, + { + "epoch": 2.43, + "learning_rate": 1.8313400102443047e-06, + "loss": 0.3018, + "step": 14250 + }, + { + "epoch": 2.43, + "learning_rate": 1.8302779853431374e-06, + "loss": 0.3074, + "step": 14251 + }, + { + "epoch": 2.43, + "learning_rate": 1.829216237453637e-06, + "loss": 0.3089, + "step": 14252 + }, + { + "epoch": 2.43, + "learning_rate": 1.8281547666118004e-06, + "loss": 0.3023, + "step": 14253 + }, + { + "epoch": 2.43, + "learning_rate": 1.827093572853621e-06, + "loss": 0.3106, + "step": 14254 + }, + { + "epoch": 2.43, + "learning_rate": 1.826032656215082e-06, + "loss": 0.2902, + "step": 14255 + }, + { + "epoch": 2.43, + "learning_rate": 1.824972016732154e-06, + "loss": 0.3151, + "step": 14256 + }, + { + "epoch": 2.43, + "learning_rate": 1.8239116544407997e-06, + "loss": 0.3198, + "step": 14257 + }, + { + "epoch": 2.43, + "learning_rate": 1.8228515693769755e-06, + "loss": 0.3214, + "step": 14258 + }, + { + "epoch": 2.43, + "learning_rate": 1.8217917615766233e-06, + "loss": 0.2888, + "step": 14259 + }, + { + "epoch": 2.43, + "learning_rate": 1.8207322310756804e-06, + "loss": 0.3073, + "step": 14260 + }, + { + "epoch": 2.43, + "learning_rate": 1.8196729779100709e-06, + "loss": 0.3022, + "step": 14261 + }, + { + "epoch": 2.43, + "learning_rate": 1.818614002115714e-06, + "loss": 0.3171, + "step": 14262 + }, + { + "epoch": 2.43, + "learning_rate": 1.817555303728511e-06, + "loss": 0.3071, + "step": 14263 + }, + { + "epoch": 2.43, + "learning_rate": 1.8164968827843631e-06, + "loss": 0.3167, + "step": 14264 + }, + { + "epoch": 2.43, + "learning_rate": 1.8154387393191574e-06, + "loss": 0.3192, + "step": 14265 + }, + { + "epoch": 2.43, + "learning_rate": 1.8143808733687718e-06, + "loss": 0.3021, + "step": 14266 + }, + { + "epoch": 2.43, + "learning_rate": 1.8133232849690796e-06, + "loss": 0.3124, + "step": 14267 + }, + { + "epoch": 2.43, + "learning_rate": 1.8122659741559346e-06, + "loss": 0.3138, + "step": 14268 + }, + { + "epoch": 2.43, + "learning_rate": 1.81120894096519e-06, + "loss": 0.3342, + "step": 14269 + }, + { + "epoch": 2.43, + "learning_rate": 1.8101521854326887e-06, + "loss": 0.3116, + "step": 14270 + }, + { + "epoch": 2.43, + "learning_rate": 1.8090957075942573e-06, + "loss": 0.3211, + "step": 14271 + }, + { + "epoch": 2.43, + "learning_rate": 1.8080395074857215e-06, + "loss": 0.3228, + "step": 14272 + }, + { + "epoch": 2.43, + "learning_rate": 1.806983585142893e-06, + "loss": 0.3037, + "step": 14273 + }, + { + "epoch": 2.43, + "learning_rate": 1.8059279406015739e-06, + "loss": 0.326, + "step": 14274 + }, + { + "epoch": 2.43, + "learning_rate": 1.8048725738975614e-06, + "loss": 0.336, + "step": 14275 + }, + { + "epoch": 2.43, + "learning_rate": 1.8038174850666367e-06, + "loss": 0.3005, + "step": 14276 + }, + { + "epoch": 2.43, + "learning_rate": 1.8027626741445792e-06, + "loss": 0.3066, + "step": 14277 + }, + { + "epoch": 2.44, + "learning_rate": 1.8017081411671478e-06, + "loss": 0.3215, + "step": 14278 + }, + { + "epoch": 2.44, + "learning_rate": 1.8006538861701028e-06, + "loss": 0.3089, + "step": 14279 + }, + { + "epoch": 2.44, + "learning_rate": 1.7995999091891902e-06, + "loss": 0.3221, + "step": 14280 + }, + { + "epoch": 2.44, + "learning_rate": 1.798546210260148e-06, + "loss": 0.3002, + "step": 14281 + }, + { + "epoch": 2.44, + "learning_rate": 1.7974927894187034e-06, + "loss": 0.3123, + "step": 14282 + }, + { + "epoch": 2.44, + "learning_rate": 1.7964396467005775e-06, + "loss": 0.3042, + "step": 14283 + }, + { + "epoch": 2.44, + "learning_rate": 1.7953867821414739e-06, + "loss": 0.3296, + "step": 14284 + }, + { + "epoch": 2.44, + "learning_rate": 1.7943341957770976e-06, + "loss": 0.3163, + "step": 14285 + }, + { + "epoch": 2.44, + "learning_rate": 1.7932818876431346e-06, + "loss": 0.3269, + "step": 14286 + }, + { + "epoch": 2.44, + "learning_rate": 1.7922298577752661e-06, + "loss": 0.3164, + "step": 14287 + }, + { + "epoch": 2.44, + "learning_rate": 1.791178106209166e-06, + "loss": 0.298, + "step": 14288 + }, + { + "epoch": 2.44, + "learning_rate": 1.7901266329804934e-06, + "loss": 0.3221, + "step": 14289 + }, + { + "epoch": 2.44, + "learning_rate": 1.789075438124903e-06, + "loss": 0.3141, + "step": 14290 + }, + { + "epoch": 2.44, + "learning_rate": 1.7880245216780367e-06, + "loss": 0.3005, + "step": 14291 + }, + { + "epoch": 2.44, + "learning_rate": 1.7869738836755302e-06, + "loss": 0.3203, + "step": 14292 + }, + { + "epoch": 2.44, + "learning_rate": 1.7859235241530037e-06, + "loss": 0.3239, + "step": 14293 + }, + { + "epoch": 2.44, + "learning_rate": 1.7848734431460735e-06, + "loss": 0.3099, + "step": 14294 + }, + { + "epoch": 2.44, + "learning_rate": 1.7838236406903454e-06, + "loss": 0.2934, + "step": 14295 + }, + { + "epoch": 2.44, + "learning_rate": 1.782774116821414e-06, + "loss": 0.3355, + "step": 14296 + }, + { + "epoch": 2.44, + "learning_rate": 1.781724871574867e-06, + "loss": 0.2943, + "step": 14297 + }, + { + "epoch": 2.44, + "learning_rate": 1.780675904986281e-06, + "loss": 0.2864, + "step": 14298 + }, + { + "epoch": 2.44, + "learning_rate": 1.7796272170912255e-06, + "loss": 0.3296, + "step": 14299 + }, + { + "epoch": 2.44, + "learning_rate": 1.7785788079252532e-06, + "loss": 0.3274, + "step": 14300 + }, + { + "epoch": 2.44, + "learning_rate": 1.7775306775239188e-06, + "loss": 0.3195, + "step": 14301 + }, + { + "epoch": 2.44, + "learning_rate": 1.776482825922755e-06, + "loss": 0.3235, + "step": 14302 + }, + { + "epoch": 2.44, + "learning_rate": 1.775435253157295e-06, + "loss": 0.3197, + "step": 14303 + }, + { + "epoch": 2.44, + "learning_rate": 1.7743879592630575e-06, + "loss": 0.32, + "step": 14304 + }, + { + "epoch": 2.44, + "learning_rate": 1.7733409442755556e-06, + "loss": 0.3234, + "step": 14305 + }, + { + "epoch": 2.44, + "learning_rate": 1.7722942082302885e-06, + "loss": 0.3068, + "step": 14306 + }, + { + "epoch": 2.44, + "learning_rate": 1.7712477511627512e-06, + "loss": 0.28, + "step": 14307 + }, + { + "epoch": 2.44, + "learning_rate": 1.770201573108421e-06, + "loss": 0.3314, + "step": 14308 + }, + { + "epoch": 2.44, + "learning_rate": 1.7691556741027738e-06, + "loss": 0.3102, + "step": 14309 + }, + { + "epoch": 2.44, + "learning_rate": 1.7681100541812713e-06, + "loss": 0.301, + "step": 14310 + }, + { + "epoch": 2.44, + "learning_rate": 1.7670647133793695e-06, + "loss": 0.3075, + "step": 14311 + }, + { + "epoch": 2.44, + "learning_rate": 1.7660196517325123e-06, + "loss": 0.3163, + "step": 14312 + }, + { + "epoch": 2.44, + "learning_rate": 1.7649748692761336e-06, + "loss": 0.305, + "step": 14313 + }, + { + "epoch": 2.44, + "learning_rate": 1.7639303660456631e-06, + "loss": 0.2979, + "step": 14314 + }, + { + "epoch": 2.44, + "learning_rate": 1.7628861420765108e-06, + "loss": 0.3138, + "step": 14315 + }, + { + "epoch": 2.44, + "learning_rate": 1.7618421974040856e-06, + "loss": 0.3113, + "step": 14316 + }, + { + "epoch": 2.44, + "learning_rate": 1.7607985320637876e-06, + "loss": 0.321, + "step": 14317 + }, + { + "epoch": 2.44, + "learning_rate": 1.7597551460910001e-06, + "loss": 0.3165, + "step": 14318 + }, + { + "epoch": 2.44, + "learning_rate": 1.7587120395211033e-06, + "loss": 0.2944, + "step": 14319 + }, + { + "epoch": 2.44, + "learning_rate": 1.7576692123894656e-06, + "loss": 0.3088, + "step": 14320 + }, + { + "epoch": 2.44, + "learning_rate": 1.7566266647314466e-06, + "loss": 0.3144, + "step": 14321 + }, + { + "epoch": 2.44, + "learning_rate": 1.7555843965823992e-06, + "loss": 0.2984, + "step": 14322 + }, + { + "epoch": 2.44, + "learning_rate": 1.7545424079776574e-06, + "loss": 0.3131, + "step": 14323 + }, + { + "epoch": 2.44, + "learning_rate": 1.7535006989525548e-06, + "loss": 0.3183, + "step": 14324 + }, + { + "epoch": 2.44, + "learning_rate": 1.7524592695424135e-06, + "loss": 0.3132, + "step": 14325 + }, + { + "epoch": 2.44, + "learning_rate": 1.7514181197825441e-06, + "loss": 0.3065, + "step": 14326 + }, + { + "epoch": 2.44, + "learning_rate": 1.7503772497082506e-06, + "loss": 0.3234, + "step": 14327 + }, + { + "epoch": 2.44, + "learning_rate": 1.7493366593548255e-06, + "loss": 0.2971, + "step": 14328 + }, + { + "epoch": 2.44, + "learning_rate": 1.748296348757551e-06, + "loss": 0.3181, + "step": 14329 + }, + { + "epoch": 2.44, + "learning_rate": 1.7472563179517044e-06, + "loss": 0.33, + "step": 14330 + }, + { + "epoch": 2.44, + "learning_rate": 1.746216566972545e-06, + "loss": 0.3223, + "step": 14331 + }, + { + "epoch": 2.44, + "learning_rate": 1.7451770958553337e-06, + "loss": 0.3288, + "step": 14332 + }, + { + "epoch": 2.44, + "learning_rate": 1.744137904635309e-06, + "loss": 0.3208, + "step": 14333 + }, + { + "epoch": 2.44, + "learning_rate": 1.7430989933477116e-06, + "loss": 0.3114, + "step": 14334 + }, + { + "epoch": 2.44, + "learning_rate": 1.7420603620277653e-06, + "loss": 0.3434, + "step": 14335 + }, + { + "epoch": 2.44, + "learning_rate": 1.74102201071069e-06, + "loss": 0.3261, + "step": 14336 + }, + { + "epoch": 2.45, + "learning_rate": 1.739983939431694e-06, + "loss": 0.3, + "step": 14337 + }, + { + "epoch": 2.45, + "learning_rate": 1.7389461482259707e-06, + "loss": 0.297, + "step": 14338 + }, + { + "epoch": 2.45, + "learning_rate": 1.7379086371287103e-06, + "loss": 0.2899, + "step": 14339 + }, + { + "epoch": 2.45, + "learning_rate": 1.7368714061750924e-06, + "loss": 0.3227, + "step": 14340 + }, + { + "epoch": 2.45, + "learning_rate": 1.7358344554002882e-06, + "loss": 0.3258, + "step": 14341 + }, + { + "epoch": 2.45, + "learning_rate": 1.7347977848394549e-06, + "loss": 0.3169, + "step": 14342 + }, + { + "epoch": 2.45, + "learning_rate": 1.7337613945277443e-06, + "loss": 0.2761, + "step": 14343 + }, + { + "epoch": 2.45, + "learning_rate": 1.7327252845002972e-06, + "loss": 0.3116, + "step": 14344 + }, + { + "epoch": 2.45, + "learning_rate": 1.731689454792248e-06, + "loss": 0.309, + "step": 14345 + }, + { + "epoch": 2.45, + "learning_rate": 1.730653905438714e-06, + "loss": 0.3192, + "step": 14346 + }, + { + "epoch": 2.45, + "learning_rate": 1.7296186364748092e-06, + "loss": 0.3209, + "step": 14347 + }, + { + "epoch": 2.45, + "learning_rate": 1.7285836479356389e-06, + "loss": 0.3078, + "step": 14348 + }, + { + "epoch": 2.45, + "learning_rate": 1.7275489398562929e-06, + "loss": 0.3228, + "step": 14349 + }, + { + "epoch": 2.45, + "learning_rate": 1.7265145122718563e-06, + "loss": 0.3422, + "step": 14350 + }, + { + "epoch": 2.45, + "learning_rate": 1.7254803652174046e-06, + "loss": 0.3198, + "step": 14351 + }, + { + "epoch": 2.45, + "learning_rate": 1.7244464987280041e-06, + "loss": 0.3266, + "step": 14352 + }, + { + "epoch": 2.45, + "learning_rate": 1.7234129128387067e-06, + "loss": 0.3094, + "step": 14353 + }, + { + "epoch": 2.45, + "learning_rate": 1.72237960758456e-06, + "loss": 0.3129, + "step": 14354 + }, + { + "epoch": 2.45, + "learning_rate": 1.7213465830006004e-06, + "loss": 0.3183, + "step": 14355 + }, + { + "epoch": 2.45, + "learning_rate": 1.7203138391218544e-06, + "loss": 0.2973, + "step": 14356 + }, + { + "epoch": 2.45, + "learning_rate": 1.7192813759833405e-06, + "loss": 0.2649, + "step": 14357 + }, + { + "epoch": 2.45, + "learning_rate": 1.7182491936200651e-06, + "loss": 0.3011, + "step": 14358 + }, + { + "epoch": 2.45, + "learning_rate": 1.7172172920670284e-06, + "loss": 0.3179, + "step": 14359 + }, + { + "epoch": 2.45, + "learning_rate": 1.7161856713592195e-06, + "loss": 0.3242, + "step": 14360 + }, + { + "epoch": 2.45, + "learning_rate": 1.7151543315316133e-06, + "loss": 0.3059, + "step": 14361 + }, + { + "epoch": 2.45, + "learning_rate": 1.7141232726191836e-06, + "loss": 0.3004, + "step": 14362 + }, + { + "epoch": 2.45, + "learning_rate": 1.7130924946568883e-06, + "loss": 0.2995, + "step": 14363 + }, + { + "epoch": 2.45, + "learning_rate": 1.7120619976796825e-06, + "loss": 0.2979, + "step": 14364 + }, + { + "epoch": 2.45, + "learning_rate": 1.711031781722502e-06, + "loss": 0.3153, + "step": 14365 + }, + { + "epoch": 2.45, + "learning_rate": 1.7100018468202805e-06, + "loss": 0.3281, + "step": 14366 + }, + { + "epoch": 2.45, + "learning_rate": 1.7089721930079396e-06, + "loss": 0.3285, + "step": 14367 + }, + { + "epoch": 2.45, + "learning_rate": 1.7079428203203952e-06, + "loss": 0.3379, + "step": 14368 + }, + { + "epoch": 2.45, + "learning_rate": 1.7069137287925464e-06, + "loss": 0.3083, + "step": 14369 + }, + { + "epoch": 2.45, + "learning_rate": 1.7058849184592862e-06, + "loss": 0.3163, + "step": 14370 + }, + { + "epoch": 2.45, + "learning_rate": 1.7048563893555015e-06, + "loss": 0.2815, + "step": 14371 + }, + { + "epoch": 2.45, + "learning_rate": 1.7038281415160652e-06, + "loss": 0.2905, + "step": 14372 + }, + { + "epoch": 2.45, + "learning_rate": 1.7028001749758428e-06, + "loss": 0.3014, + "step": 14373 + }, + { + "epoch": 2.45, + "learning_rate": 1.7017724897696897e-06, + "loss": 0.3176, + "step": 14374 + }, + { + "epoch": 2.45, + "learning_rate": 1.7007450859324537e-06, + "loss": 0.2841, + "step": 14375 + }, + { + "epoch": 2.45, + "learning_rate": 1.6997179634989668e-06, + "loss": 0.3226, + "step": 14376 + }, + { + "epoch": 2.45, + "learning_rate": 1.698691122504057e-06, + "loss": 0.3224, + "step": 14377 + }, + { + "epoch": 2.45, + "learning_rate": 1.6976645629825439e-06, + "loss": 0.3141, + "step": 14378 + }, + { + "epoch": 2.45, + "learning_rate": 1.6966382849692354e-06, + "loss": 0.317, + "step": 14379 + }, + { + "epoch": 2.45, + "learning_rate": 1.6956122884989247e-06, + "loss": 0.33, + "step": 14380 + }, + { + "epoch": 2.45, + "learning_rate": 1.6945865736064048e-06, + "loss": 0.3249, + "step": 14381 + }, + { + "epoch": 2.45, + "learning_rate": 1.6935611403264529e-06, + "loss": 0.3065, + "step": 14382 + }, + { + "epoch": 2.45, + "learning_rate": 1.6925359886938409e-06, + "loss": 0.3306, + "step": 14383 + }, + { + "epoch": 2.45, + "learning_rate": 1.6915111187433253e-06, + "loss": 0.2756, + "step": 14384 + }, + { + "epoch": 2.45, + "learning_rate": 1.6904865305096585e-06, + "loss": 0.3298, + "step": 14385 + }, + { + "epoch": 2.45, + "learning_rate": 1.6894622240275805e-06, + "loss": 0.3012, + "step": 14386 + }, + { + "epoch": 2.45, + "learning_rate": 1.6884381993318222e-06, + "loss": 0.3153, + "step": 14387 + }, + { + "epoch": 2.45, + "learning_rate": 1.6874144564571071e-06, + "loss": 0.3156, + "step": 14388 + }, + { + "epoch": 2.45, + "learning_rate": 1.6863909954381463e-06, + "loss": 0.3061, + "step": 14389 + }, + { + "epoch": 2.45, + "learning_rate": 1.6853678163096443e-06, + "loss": 0.3043, + "step": 14390 + }, + { + "epoch": 2.45, + "learning_rate": 1.68434491910629e-06, + "loss": 0.3307, + "step": 14391 + }, + { + "epoch": 2.45, + "learning_rate": 1.68332230386277e-06, + "loss": 0.2808, + "step": 14392 + }, + { + "epoch": 2.45, + "learning_rate": 1.6822999706137565e-06, + "loss": 0.337, + "step": 14393 + }, + { + "epoch": 2.45, + "learning_rate": 1.6812779193939156e-06, + "loss": 0.3109, + "step": 14394 + }, + { + "epoch": 2.46, + "learning_rate": 1.6802561502379034e-06, + "loss": 0.3047, + "step": 14395 + }, + { + "epoch": 2.46, + "learning_rate": 1.6792346631803602e-06, + "loss": 0.3234, + "step": 14396 + }, + { + "epoch": 2.46, + "learning_rate": 1.678213458255925e-06, + "loss": 0.331, + "step": 14397 + }, + { + "epoch": 2.46, + "learning_rate": 1.6771925354992257e-06, + "loss": 0.3066, + "step": 14398 + }, + { + "epoch": 2.46, + "learning_rate": 1.6761718949448746e-06, + "loss": 0.3025, + "step": 14399 + }, + { + "epoch": 2.46, + "learning_rate": 1.6751515366274795e-06, + "loss": 0.2991, + "step": 14400 + }, + { + "epoch": 2.46, + "learning_rate": 1.6741314605816406e-06, + "loss": 0.3236, + "step": 14401 + }, + { + "epoch": 2.46, + "learning_rate": 1.6731116668419422e-06, + "loss": 0.2848, + "step": 14402 + }, + { + "epoch": 2.46, + "learning_rate": 1.672092155442966e-06, + "loss": 0.3035, + "step": 14403 + }, + { + "epoch": 2.46, + "learning_rate": 1.6710729264192782e-06, + "loss": 0.2926, + "step": 14404 + }, + { + "epoch": 2.46, + "learning_rate": 1.6700539798054417e-06, + "loss": 0.2805, + "step": 14405 + }, + { + "epoch": 2.46, + "learning_rate": 1.6690353156360005e-06, + "loss": 0.2925, + "step": 14406 + }, + { + "epoch": 2.46, + "learning_rate": 1.6680169339454976e-06, + "loss": 0.3175, + "step": 14407 + }, + { + "epoch": 2.46, + "learning_rate": 1.666998834768463e-06, + "loss": 0.3119, + "step": 14408 + }, + { + "epoch": 2.46, + "learning_rate": 1.6659810181394165e-06, + "loss": 0.2943, + "step": 14409 + }, + { + "epoch": 2.46, + "learning_rate": 1.664963484092872e-06, + "loss": 0.322, + "step": 14410 + }, + { + "epoch": 2.46, + "learning_rate": 1.6639462326633304e-06, + "loss": 0.2936, + "step": 14411 + }, + { + "epoch": 2.46, + "learning_rate": 1.662929263885281e-06, + "loss": 0.3199, + "step": 14412 + }, + { + "epoch": 2.46, + "learning_rate": 1.6619125777932098e-06, + "loss": 0.325, + "step": 14413 + }, + { + "epoch": 2.46, + "learning_rate": 1.6608961744215869e-06, + "loss": 0.307, + "step": 14414 + }, + { + "epoch": 2.46, + "learning_rate": 1.6598800538048765e-06, + "loss": 0.296, + "step": 14415 + }, + { + "epoch": 2.46, + "learning_rate": 1.6588642159775315e-06, + "loss": 0.3253, + "step": 14416 + }, + { + "epoch": 2.46, + "learning_rate": 1.6578486609739974e-06, + "loss": 0.3184, + "step": 14417 + }, + { + "epoch": 2.46, + "learning_rate": 1.6568333888287092e-06, + "loss": 0.3003, + "step": 14418 + }, + { + "epoch": 2.46, + "learning_rate": 1.6558183995760902e-06, + "loss": 0.3166, + "step": 14419 + }, + { + "epoch": 2.46, + "learning_rate": 1.6548036932505585e-06, + "loss": 0.3119, + "step": 14420 + }, + { + "epoch": 2.46, + "learning_rate": 1.6537892698865166e-06, + "loss": 0.3012, + "step": 14421 + }, + { + "epoch": 2.46, + "learning_rate": 1.6527751295183615e-06, + "loss": 0.3024, + "step": 14422 + }, + { + "epoch": 2.46, + "learning_rate": 1.65176127218048e-06, + "loss": 0.2922, + "step": 14423 + }, + { + "epoch": 2.46, + "learning_rate": 1.650747697907249e-06, + "loss": 0.3103, + "step": 14424 + }, + { + "epoch": 2.46, + "learning_rate": 1.6497344067330369e-06, + "loss": 0.3021, + "step": 14425 + }, + { + "epoch": 2.46, + "learning_rate": 1.6487213986922035e-06, + "loss": 0.3087, + "step": 14426 + }, + { + "epoch": 2.46, + "learning_rate": 1.6477086738190905e-06, + "loss": 0.3201, + "step": 14427 + }, + { + "epoch": 2.46, + "learning_rate": 1.6466962321480418e-06, + "loss": 0.3227, + "step": 14428 + }, + { + "epoch": 2.46, + "learning_rate": 1.645684073713386e-06, + "loss": 0.2995, + "step": 14429 + }, + { + "epoch": 2.46, + "learning_rate": 1.6446721985494396e-06, + "loss": 0.2776, + "step": 14430 + }, + { + "epoch": 2.46, + "learning_rate": 1.6436606066905136e-06, + "loss": 0.3221, + "step": 14431 + }, + { + "epoch": 2.46, + "learning_rate": 1.6426492981709086e-06, + "loss": 0.3233, + "step": 14432 + }, + { + "epoch": 2.46, + "learning_rate": 1.6416382730249158e-06, + "loss": 0.3049, + "step": 14433 + }, + { + "epoch": 2.46, + "learning_rate": 1.6406275312868147e-06, + "loss": 0.3006, + "step": 14434 + }, + { + "epoch": 2.46, + "learning_rate": 1.63961707299088e-06, + "loss": 0.3025, + "step": 14435 + }, + { + "epoch": 2.46, + "learning_rate": 1.6386068981713689e-06, + "loss": 0.3254, + "step": 14436 + }, + { + "epoch": 2.46, + "learning_rate": 1.6375970068625346e-06, + "loss": 0.3065, + "step": 14437 + }, + { + "epoch": 2.46, + "learning_rate": 1.6365873990986203e-06, + "loss": 0.3302, + "step": 14438 + }, + { + "epoch": 2.46, + "learning_rate": 1.6355780749138595e-06, + "loss": 0.3, + "step": 14439 + }, + { + "epoch": 2.46, + "learning_rate": 1.6345690343424758e-06, + "loss": 0.315, + "step": 14440 + }, + { + "epoch": 2.46, + "learning_rate": 1.6335602774186809e-06, + "loss": 0.3133, + "step": 14441 + }, + { + "epoch": 2.46, + "learning_rate": 1.632551804176683e-06, + "loss": 0.3063, + "step": 14442 + }, + { + "epoch": 2.46, + "learning_rate": 1.6315436146506702e-06, + "loss": 0.3078, + "step": 14443 + }, + { + "epoch": 2.46, + "learning_rate": 1.630535708874833e-06, + "loss": 0.3081, + "step": 14444 + }, + { + "epoch": 2.46, + "learning_rate": 1.629528086883343e-06, + "loss": 0.3285, + "step": 14445 + }, + { + "epoch": 2.46, + "learning_rate": 1.6285207487103661e-06, + "loss": 0.305, + "step": 14446 + }, + { + "epoch": 2.46, + "learning_rate": 1.6275136943900582e-06, + "loss": 0.3101, + "step": 14447 + }, + { + "epoch": 2.46, + "learning_rate": 1.6265069239565679e-06, + "loss": 0.3062, + "step": 14448 + }, + { + "epoch": 2.46, + "learning_rate": 1.6255004374440296e-06, + "loss": 0.3139, + "step": 14449 + }, + { + "epoch": 2.46, + "learning_rate": 1.6244942348865734e-06, + "loss": 0.3151, + "step": 14450 + }, + { + "epoch": 2.46, + "learning_rate": 1.6234883163183124e-06, + "loss": 0.3215, + "step": 14451 + }, + { + "epoch": 2.46, + "learning_rate": 1.6224826817733564e-06, + "loss": 0.3023, + "step": 14452 + }, + { + "epoch": 2.46, + "learning_rate": 1.6214773312858034e-06, + "loss": 0.3101, + "step": 14453 + }, + { + "epoch": 2.47, + "learning_rate": 1.620472264889743e-06, + "loss": 0.2924, + "step": 14454 + }, + { + "epoch": 2.47, + "learning_rate": 1.6194674826192526e-06, + "loss": 0.321, + "step": 14455 + }, + { + "epoch": 2.47, + "learning_rate": 1.6184629845084021e-06, + "loss": 0.2938, + "step": 14456 + }, + { + "epoch": 2.47, + "learning_rate": 1.6174587705912503e-06, + "loss": 0.2915, + "step": 14457 + }, + { + "epoch": 2.47, + "learning_rate": 1.616454840901852e-06, + "loss": 0.33, + "step": 14458 + }, + { + "epoch": 2.47, + "learning_rate": 1.6154511954742403e-06, + "loss": 0.2929, + "step": 14459 + }, + { + "epoch": 2.47, + "learning_rate": 1.6144478343424519e-06, + "loss": 0.3016, + "step": 14460 + }, + { + "epoch": 2.47, + "learning_rate": 1.613444757540502e-06, + "loss": 0.301, + "step": 14461 + }, + { + "epoch": 2.47, + "learning_rate": 1.612441965102406e-06, + "loss": 0.3237, + "step": 14462 + }, + { + "epoch": 2.47, + "learning_rate": 1.611439457062165e-06, + "loss": 0.2939, + "step": 14463 + }, + { + "epoch": 2.47, + "learning_rate": 1.610437233453771e-06, + "loss": 0.3151, + "step": 14464 + }, + { + "epoch": 2.47, + "learning_rate": 1.6094352943112079e-06, + "loss": 0.3041, + "step": 14465 + }, + { + "epoch": 2.47, + "learning_rate": 1.6084336396684463e-06, + "loss": 0.2993, + "step": 14466 + }, + { + "epoch": 2.47, + "learning_rate": 1.607432269559449e-06, + "loss": 0.3112, + "step": 14467 + }, + { + "epoch": 2.47, + "learning_rate": 1.6064311840181723e-06, + "loss": 0.3308, + "step": 14468 + }, + { + "epoch": 2.47, + "learning_rate": 1.6054303830785578e-06, + "loss": 0.332, + "step": 14469 + }, + { + "epoch": 2.47, + "learning_rate": 1.6044298667745407e-06, + "loss": 0.3332, + "step": 14470 + }, + { + "epoch": 2.47, + "learning_rate": 1.6034296351400459e-06, + "loss": 0.3137, + "step": 14471 + }, + { + "epoch": 2.47, + "learning_rate": 1.6024296882089874e-06, + "loss": 0.3022, + "step": 14472 + }, + { + "epoch": 2.47, + "learning_rate": 1.6014300260152738e-06, + "loss": 0.3049, + "step": 14473 + }, + { + "epoch": 2.47, + "learning_rate": 1.6004306485927968e-06, + "loss": 0.3098, + "step": 14474 + }, + { + "epoch": 2.47, + "learning_rate": 1.5994315559754426e-06, + "loss": 0.3175, + "step": 14475 + }, + { + "epoch": 2.47, + "learning_rate": 1.5984327481970919e-06, + "loss": 0.2934, + "step": 14476 + }, + { + "epoch": 2.47, + "learning_rate": 1.5974342252916054e-06, + "loss": 0.2995, + "step": 14477 + }, + { + "epoch": 2.47, + "learning_rate": 1.5964359872928424e-06, + "loss": 0.3099, + "step": 14478 + }, + { + "epoch": 2.47, + "learning_rate": 1.595438034234651e-06, + "loss": 0.3165, + "step": 14479 + }, + { + "epoch": 2.47, + "learning_rate": 1.5944403661508712e-06, + "loss": 0.3188, + "step": 14480 + }, + { + "epoch": 2.47, + "learning_rate": 1.5934429830753262e-06, + "loss": 0.2925, + "step": 14481 + }, + { + "epoch": 2.47, + "learning_rate": 1.5924458850418368e-06, + "loss": 0.2973, + "step": 14482 + }, + { + "epoch": 2.47, + "learning_rate": 1.5914490720842112e-06, + "loss": 0.3087, + "step": 14483 + }, + { + "epoch": 2.47, + "learning_rate": 1.5904525442362484e-06, + "loss": 0.2975, + "step": 14484 + }, + { + "epoch": 2.47, + "learning_rate": 1.5894563015317389e-06, + "loss": 0.334, + "step": 14485 + }, + { + "epoch": 2.47, + "learning_rate": 1.5884603440044622e-06, + "loss": 0.3241, + "step": 14486 + }, + { + "epoch": 2.47, + "learning_rate": 1.587464671688187e-06, + "loss": 0.3122, + "step": 14487 + }, + { + "epoch": 2.47, + "learning_rate": 1.5864692846166774e-06, + "loss": 0.3322, + "step": 14488 + }, + { + "epoch": 2.47, + "learning_rate": 1.5854741828236786e-06, + "loss": 0.322, + "step": 14489 + }, + { + "epoch": 2.47, + "learning_rate": 1.5844793663429348e-06, + "loss": 0.3326, + "step": 14490 + }, + { + "epoch": 2.47, + "learning_rate": 1.5834848352081788e-06, + "loss": 0.3056, + "step": 14491 + }, + { + "epoch": 2.47, + "learning_rate": 1.5824905894531272e-06, + "loss": 0.2861, + "step": 14492 + }, + { + "epoch": 2.47, + "learning_rate": 1.5814966291114964e-06, + "loss": 0.2976, + "step": 14493 + }, + { + "epoch": 2.47, + "learning_rate": 1.580502954216987e-06, + "loss": 0.3312, + "step": 14494 + }, + { + "epoch": 2.47, + "learning_rate": 1.5795095648032943e-06, + "loss": 0.3357, + "step": 14495 + }, + { + "epoch": 2.47, + "learning_rate": 1.5785164609040959e-06, + "loss": 0.3307, + "step": 14496 + }, + { + "epoch": 2.47, + "learning_rate": 1.5775236425530692e-06, + "loss": 0.2989, + "step": 14497 + }, + { + "epoch": 2.47, + "learning_rate": 1.5765311097838764e-06, + "loss": 0.3182, + "step": 14498 + }, + { + "epoch": 2.47, + "learning_rate": 1.5755388626301725e-06, + "loss": 0.3198, + "step": 14499 + }, + { + "epoch": 2.47, + "learning_rate": 1.5745469011255999e-06, + "loss": 0.3272, + "step": 14500 + }, + { + "epoch": 2.47, + "learning_rate": 1.5735552253037955e-06, + "loss": 0.2777, + "step": 14501 + }, + { + "epoch": 2.47, + "learning_rate": 1.5725638351983819e-06, + "loss": 0.2882, + "step": 14502 + }, + { + "epoch": 2.47, + "learning_rate": 1.5715727308429785e-06, + "loss": 0.2978, + "step": 14503 + }, + { + "epoch": 2.47, + "learning_rate": 1.570581912271185e-06, + "loss": 0.3412, + "step": 14504 + }, + { + "epoch": 2.47, + "learning_rate": 1.5695913795165996e-06, + "loss": 0.3011, + "step": 14505 + }, + { + "epoch": 2.47, + "learning_rate": 1.5686011326128093e-06, + "loss": 0.3024, + "step": 14506 + }, + { + "epoch": 2.47, + "learning_rate": 1.5676111715933917e-06, + "loss": 0.3146, + "step": 14507 + }, + { + "epoch": 2.47, + "learning_rate": 1.566621496491909e-06, + "loss": 0.2986, + "step": 14508 + }, + { + "epoch": 2.47, + "learning_rate": 1.5656321073419211e-06, + "loss": 0.3189, + "step": 14509 + }, + { + "epoch": 2.47, + "learning_rate": 1.5646430041769777e-06, + "loss": 0.3305, + "step": 14510 + }, + { + "epoch": 2.47, + "learning_rate": 1.563654187030611e-06, + "loss": 0.2951, + "step": 14511 + }, + { + "epoch": 2.47, + "learning_rate": 1.562665655936353e-06, + "loss": 0.2944, + "step": 14512 + }, + { + "epoch": 2.48, + "learning_rate": 1.56167741092772e-06, + "loss": 0.3219, + "step": 14513 + }, + { + "epoch": 2.48, + "learning_rate": 1.5606894520382209e-06, + "loss": 0.2932, + "step": 14514 + }, + { + "epoch": 2.48, + "learning_rate": 1.5597017793013547e-06, + "loss": 0.3162, + "step": 14515 + }, + { + "epoch": 2.48, + "learning_rate": 1.558714392750611e-06, + "loss": 0.335, + "step": 14516 + }, + { + "epoch": 2.48, + "learning_rate": 1.5577272924194685e-06, + "loss": 0.3047, + "step": 14517 + }, + { + "epoch": 2.48, + "learning_rate": 1.5567404783414008e-06, + "loss": 0.2977, + "step": 14518 + }, + { + "epoch": 2.48, + "learning_rate": 1.555753950549861e-06, + "loss": 0.3029, + "step": 14519 + }, + { + "epoch": 2.48, + "learning_rate": 1.5547677090783031e-06, + "loss": 0.3279, + "step": 14520 + }, + { + "epoch": 2.48, + "learning_rate": 1.5537817539601684e-06, + "loss": 0.2969, + "step": 14521 + }, + { + "epoch": 2.48, + "learning_rate": 1.552796085228886e-06, + "loss": 0.3157, + "step": 14522 + }, + { + "epoch": 2.48, + "learning_rate": 1.5518107029178798e-06, + "loss": 0.3045, + "step": 14523 + }, + { + "epoch": 2.48, + "learning_rate": 1.5508256070605576e-06, + "loss": 0.3134, + "step": 14524 + }, + { + "epoch": 2.48, + "learning_rate": 1.5498407976903262e-06, + "loss": 0.3141, + "step": 14525 + }, + { + "epoch": 2.48, + "learning_rate": 1.548856274840571e-06, + "loss": 0.334, + "step": 14526 + }, + { + "epoch": 2.48, + "learning_rate": 1.5478720385446778e-06, + "loss": 0.3292, + "step": 14527 + }, + { + "epoch": 2.48, + "learning_rate": 1.5468880888360194e-06, + "loss": 0.3156, + "step": 14528 + }, + { + "epoch": 2.48, + "learning_rate": 1.5459044257479582e-06, + "loss": 0.3075, + "step": 14529 + }, + { + "epoch": 2.48, + "learning_rate": 1.5449210493138467e-06, + "loss": 0.3374, + "step": 14530 + }, + { + "epoch": 2.48, + "learning_rate": 1.54393795956703e-06, + "loss": 0.3145, + "step": 14531 + }, + { + "epoch": 2.48, + "learning_rate": 1.5429551565408418e-06, + "loss": 0.3209, + "step": 14532 + }, + { + "epoch": 2.48, + "learning_rate": 1.5419726402686076e-06, + "loss": 0.2975, + "step": 14533 + }, + { + "epoch": 2.48, + "learning_rate": 1.540990410783636e-06, + "loss": 0.2854, + "step": 14534 + }, + { + "epoch": 2.48, + "learning_rate": 1.5400084681192363e-06, + "loss": 0.3153, + "step": 14535 + }, + { + "epoch": 2.48, + "learning_rate": 1.5390268123087004e-06, + "loss": 0.3015, + "step": 14536 + }, + { + "epoch": 2.48, + "learning_rate": 1.5380454433853165e-06, + "loss": 0.316, + "step": 14537 + }, + { + "epoch": 2.48, + "learning_rate": 1.5370643613823611e-06, + "loss": 0.3043, + "step": 14538 + }, + { + "epoch": 2.48, + "learning_rate": 1.5360835663330941e-06, + "loss": 0.3106, + "step": 14539 + }, + { + "epoch": 2.48, + "learning_rate": 1.5351030582707748e-06, + "loss": 0.3206, + "step": 14540 + }, + { + "epoch": 2.48, + "learning_rate": 1.534122837228652e-06, + "loss": 0.3091, + "step": 14541 + }, + { + "epoch": 2.48, + "learning_rate": 1.5331429032399558e-06, + "loss": 0.3217, + "step": 14542 + }, + { + "epoch": 2.48, + "learning_rate": 1.5321632563379185e-06, + "loss": 0.3187, + "step": 14543 + }, + { + "epoch": 2.48, + "learning_rate": 1.5311838965557534e-06, + "loss": 0.2988, + "step": 14544 + }, + { + "epoch": 2.48, + "learning_rate": 1.530204823926671e-06, + "loss": 0.3081, + "step": 14545 + }, + { + "epoch": 2.48, + "learning_rate": 1.5292260384838664e-06, + "loss": 0.3049, + "step": 14546 + }, + { + "epoch": 2.48, + "learning_rate": 1.528247540260528e-06, + "loss": 0.3235, + "step": 14547 + }, + { + "epoch": 2.48, + "learning_rate": 1.5272693292898367e-06, + "loss": 0.311, + "step": 14548 + }, + { + "epoch": 2.48, + "learning_rate": 1.526291405604956e-06, + "loss": 0.3077, + "step": 14549 + }, + { + "epoch": 2.48, + "learning_rate": 1.5253137692390474e-06, + "loss": 0.3194, + "step": 14550 + }, + { + "epoch": 2.48, + "learning_rate": 1.5243364202252586e-06, + "loss": 0.351, + "step": 14551 + }, + { + "epoch": 2.48, + "learning_rate": 1.5233593585967288e-06, + "loss": 0.3279, + "step": 14552 + }, + { + "epoch": 2.48, + "learning_rate": 1.5223825843865881e-06, + "loss": 0.3267, + "step": 14553 + }, + { + "epoch": 2.48, + "learning_rate": 1.5214060976279576e-06, + "loss": 0.3003, + "step": 14554 + }, + { + "epoch": 2.48, + "learning_rate": 1.5204298983539434e-06, + "loss": 0.3182, + "step": 14555 + }, + { + "epoch": 2.48, + "learning_rate": 1.5194539865976488e-06, + "loss": 0.3053, + "step": 14556 + }, + { + "epoch": 2.48, + "learning_rate": 1.5184783623921606e-06, + "loss": 0.3247, + "step": 14557 + }, + { + "epoch": 2.48, + "learning_rate": 1.5175030257705625e-06, + "loss": 0.3121, + "step": 14558 + }, + { + "epoch": 2.48, + "learning_rate": 1.5165279767659235e-06, + "loss": 0.3121, + "step": 14559 + }, + { + "epoch": 2.48, + "learning_rate": 1.5155532154113062e-06, + "loss": 0.3161, + "step": 14560 + }, + { + "epoch": 2.48, + "learning_rate": 1.5145787417397606e-06, + "loss": 0.3316, + "step": 14561 + }, + { + "epoch": 2.48, + "learning_rate": 1.5136045557843293e-06, + "loss": 0.2977, + "step": 14562 + }, + { + "epoch": 2.48, + "learning_rate": 1.5126306575780458e-06, + "loss": 0.3038, + "step": 14563 + }, + { + "epoch": 2.48, + "learning_rate": 1.5116570471539294e-06, + "loss": 0.2977, + "step": 14564 + }, + { + "epoch": 2.48, + "learning_rate": 1.5106837245449912e-06, + "loss": 0.3263, + "step": 14565 + }, + { + "epoch": 2.48, + "learning_rate": 1.5097106897842374e-06, + "loss": 0.3349, + "step": 14566 + }, + { + "epoch": 2.48, + "learning_rate": 1.5087379429046577e-06, + "loss": 0.339, + "step": 14567 + }, + { + "epoch": 2.48, + "learning_rate": 1.5077654839392376e-06, + "loss": 0.292, + "step": 14568 + }, + { + "epoch": 2.48, + "learning_rate": 1.5067933129209489e-06, + "loss": 0.3066, + "step": 14569 + }, + { + "epoch": 2.48, + "learning_rate": 1.5058214298827589e-06, + "loss": 0.3099, + "step": 14570 + }, + { + "epoch": 2.49, + "learning_rate": 1.5048498348576146e-06, + "loss": 0.3431, + "step": 14571 + }, + { + "epoch": 2.49, + "learning_rate": 1.5038785278784662e-06, + "loss": 0.311, + "step": 14572 + }, + { + "epoch": 2.49, + "learning_rate": 1.502907508978243e-06, + "loss": 0.284, + "step": 14573 + }, + { + "epoch": 2.49, + "learning_rate": 1.5019367781898709e-06, + "loss": 0.3063, + "step": 14574 + }, + { + "epoch": 2.49, + "learning_rate": 1.5009663355462656e-06, + "loss": 0.3054, + "step": 14575 + }, + { + "epoch": 2.49, + "learning_rate": 1.499996181080332e-06, + "loss": 0.3096, + "step": 14576 + }, + { + "epoch": 2.49, + "learning_rate": 1.4990263148249651e-06, + "loss": 0.3143, + "step": 14577 + }, + { + "epoch": 2.49, + "learning_rate": 1.4980567368130515e-06, + "loss": 0.3077, + "step": 14578 + }, + { + "epoch": 2.49, + "learning_rate": 1.4970874470774632e-06, + "loss": 0.3101, + "step": 14579 + }, + { + "epoch": 2.49, + "learning_rate": 1.4961184456510692e-06, + "loss": 0.2965, + "step": 14580 + }, + { + "epoch": 2.49, + "learning_rate": 1.495149732566723e-06, + "loss": 0.3058, + "step": 14581 + }, + { + "epoch": 2.49, + "learning_rate": 1.494181307857273e-06, + "loss": 0.3171, + "step": 14582 + }, + { + "epoch": 2.49, + "learning_rate": 1.4932131715555553e-06, + "loss": 0.3225, + "step": 14583 + }, + { + "epoch": 2.49, + "learning_rate": 1.4922453236943968e-06, + "loss": 0.3105, + "step": 14584 + }, + { + "epoch": 2.49, + "learning_rate": 1.4912777643066156e-06, + "loss": 0.3016, + "step": 14585 + }, + { + "epoch": 2.49, + "learning_rate": 1.4903104934250144e-06, + "loss": 0.3079, + "step": 14586 + }, + { + "epoch": 2.49, + "learning_rate": 1.489343511082394e-06, + "loss": 0.3061, + "step": 14587 + }, + { + "epoch": 2.49, + "learning_rate": 1.4883768173115443e-06, + "loss": 0.2986, + "step": 14588 + }, + { + "epoch": 2.49, + "learning_rate": 1.4874104121452372e-06, + "loss": 0.2961, + "step": 14589 + }, + { + "epoch": 2.49, + "learning_rate": 1.4864442956162429e-06, + "loss": 0.3041, + "step": 14590 + }, + { + "epoch": 2.49, + "learning_rate": 1.4854784677573209e-06, + "loss": 0.3188, + "step": 14591 + }, + { + "epoch": 2.49, + "learning_rate": 1.4845129286012194e-06, + "loss": 0.2977, + "step": 14592 + }, + { + "epoch": 2.49, + "learning_rate": 1.483547678180679e-06, + "loss": 0.326, + "step": 14593 + }, + { + "epoch": 2.49, + "learning_rate": 1.4825827165284247e-06, + "loss": 0.2997, + "step": 14594 + }, + { + "epoch": 2.49, + "learning_rate": 1.4816180436771754e-06, + "loss": 0.327, + "step": 14595 + }, + { + "epoch": 2.49, + "learning_rate": 1.4806536596596432e-06, + "loss": 0.3152, + "step": 14596 + }, + { + "epoch": 2.49, + "learning_rate": 1.4796895645085262e-06, + "loss": 0.3188, + "step": 14597 + }, + { + "epoch": 2.49, + "learning_rate": 1.478725758256515e-06, + "loss": 0.3117, + "step": 14598 + }, + { + "epoch": 2.49, + "learning_rate": 1.4777622409362879e-06, + "loss": 0.3285, + "step": 14599 + }, + { + "epoch": 2.49, + "learning_rate": 1.4767990125805166e-06, + "loss": 0.3193, + "step": 14600 + }, + { + "epoch": 2.49, + "learning_rate": 1.4758360732218636e-06, + "loss": 0.3263, + "step": 14601 + }, + { + "epoch": 2.49, + "learning_rate": 1.4748734228929728e-06, + "loss": 0.3084, + "step": 14602 + }, + { + "epoch": 2.49, + "learning_rate": 1.4739110616264918e-06, + "loss": 0.3005, + "step": 14603 + }, + { + "epoch": 2.49, + "learning_rate": 1.4729489894550465e-06, + "loss": 0.3057, + "step": 14604 + }, + { + "epoch": 2.49, + "learning_rate": 1.4719872064112584e-06, + "loss": 0.3154, + "step": 14605 + }, + { + "epoch": 2.49, + "learning_rate": 1.4710257125277416e-06, + "loss": 0.3412, + "step": 14606 + }, + { + "epoch": 2.49, + "learning_rate": 1.4700645078370956e-06, + "loss": 0.2976, + "step": 14607 + }, + { + "epoch": 2.49, + "learning_rate": 1.4691035923719143e-06, + "loss": 0.316, + "step": 14608 + }, + { + "epoch": 2.49, + "learning_rate": 1.468142966164776e-06, + "loss": 0.3036, + "step": 14609 + }, + { + "epoch": 2.49, + "learning_rate": 1.4671826292482549e-06, + "loss": 0.3064, + "step": 14610 + }, + { + "epoch": 2.49, + "learning_rate": 1.4662225816549137e-06, + "loss": 0.2968, + "step": 14611 + }, + { + "epoch": 2.49, + "learning_rate": 1.4652628234173027e-06, + "loss": 0.3072, + "step": 14612 + }, + { + "epoch": 2.49, + "learning_rate": 1.4643033545679676e-06, + "loss": 0.2875, + "step": 14613 + }, + { + "epoch": 2.49, + "learning_rate": 1.4633441751394384e-06, + "loss": 0.296, + "step": 14614 + }, + { + "epoch": 2.49, + "learning_rate": 1.4623852851642395e-06, + "loss": 0.3171, + "step": 14615 + }, + { + "epoch": 2.49, + "learning_rate": 1.461426684674886e-06, + "loss": 0.3179, + "step": 14616 + }, + { + "epoch": 2.49, + "learning_rate": 1.4604683737038772e-06, + "loss": 0.2724, + "step": 14617 + }, + { + "epoch": 2.49, + "learning_rate": 1.4595103522837084e-06, + "loss": 0.304, + "step": 14618 + }, + { + "epoch": 2.49, + "learning_rate": 1.4585526204468658e-06, + "loss": 0.3164, + "step": 14619 + }, + { + "epoch": 2.49, + "learning_rate": 1.457595178225819e-06, + "loss": 0.3441, + "step": 14620 + }, + { + "epoch": 2.49, + "learning_rate": 1.4566380256530344e-06, + "loss": 0.3333, + "step": 14621 + }, + { + "epoch": 2.49, + "learning_rate": 1.4556811627609657e-06, + "loss": 0.3131, + "step": 14622 + }, + { + "epoch": 2.49, + "learning_rate": 1.4547245895820605e-06, + "loss": 0.3137, + "step": 14623 + }, + { + "epoch": 2.49, + "learning_rate": 1.4537683061487485e-06, + "loss": 0.3293, + "step": 14624 + }, + { + "epoch": 2.49, + "learning_rate": 1.4528123124934556e-06, + "loss": 0.3122, + "step": 14625 + }, + { + "epoch": 2.49, + "learning_rate": 1.4518566086485985e-06, + "loss": 0.3229, + "step": 14626 + }, + { + "epoch": 2.49, + "learning_rate": 1.4509011946465812e-06, + "loss": 0.3071, + "step": 14627 + }, + { + "epoch": 2.49, + "learning_rate": 1.4499460705198e-06, + "loss": 0.3091, + "step": 14628 + }, + { + "epoch": 2.49, + "learning_rate": 1.4489912363006397e-06, + "loss": 0.3173, + "step": 14629 + }, + { + "epoch": 2.5, + "learning_rate": 1.4480366920214762e-06, + "loss": 0.3376, + "step": 14630 + }, + { + "epoch": 2.5, + "learning_rate": 1.4470824377146775e-06, + "loss": 0.2958, + "step": 14631 + }, + { + "epoch": 2.5, + "learning_rate": 1.4461284734125957e-06, + "loss": 0.3229, + "step": 14632 + }, + { + "epoch": 2.5, + "learning_rate": 1.4451747991475785e-06, + "loss": 0.3081, + "step": 14633 + }, + { + "epoch": 2.5, + "learning_rate": 1.444221414951964e-06, + "loss": 0.2962, + "step": 14634 + }, + { + "epoch": 2.5, + "learning_rate": 1.4432683208580756e-06, + "loss": 0.3221, + "step": 14635 + }, + { + "epoch": 2.5, + "learning_rate": 1.4423155168982317e-06, + "loss": 0.3171, + "step": 14636 + }, + { + "epoch": 2.5, + "learning_rate": 1.4413630031047376e-06, + "loss": 0.3208, + "step": 14637 + }, + { + "epoch": 2.5, + "learning_rate": 1.4404107795098954e-06, + "loss": 0.314, + "step": 14638 + }, + { + "epoch": 2.5, + "learning_rate": 1.4394588461459856e-06, + "loss": 0.2875, + "step": 14639 + }, + { + "epoch": 2.5, + "learning_rate": 1.4385072030452884e-06, + "loss": 0.3379, + "step": 14640 + }, + { + "epoch": 2.5, + "learning_rate": 1.437555850240071e-06, + "loss": 0.3053, + "step": 14641 + }, + { + "epoch": 2.5, + "learning_rate": 1.436604787762591e-06, + "loss": 0.32, + "step": 14642 + }, + { + "epoch": 2.5, + "learning_rate": 1.4356540156450971e-06, + "loss": 0.2879, + "step": 14643 + }, + { + "epoch": 2.5, + "learning_rate": 1.4347035339198268e-06, + "loss": 0.2929, + "step": 14644 + }, + { + "epoch": 2.5, + "learning_rate": 1.4337533426190075e-06, + "loss": 0.3147, + "step": 14645 + }, + { + "epoch": 2.5, + "learning_rate": 1.4328034417748604e-06, + "loss": 0.3315, + "step": 14646 + }, + { + "epoch": 2.5, + "learning_rate": 1.4318538314195895e-06, + "loss": 0.3095, + "step": 14647 + }, + { + "epoch": 2.5, + "learning_rate": 1.4309045115853947e-06, + "loss": 0.3154, + "step": 14648 + }, + { + "epoch": 2.5, + "learning_rate": 1.4299554823044649e-06, + "loss": 0.3113, + "step": 14649 + }, + { + "epoch": 2.5, + "learning_rate": 1.4290067436089815e-06, + "loss": 0.3105, + "step": 14650 + }, + { + "epoch": 2.5, + "learning_rate": 1.4280582955311106e-06, + "loss": 0.3034, + "step": 14651 + }, + { + "epoch": 2.5, + "learning_rate": 1.4271101381030106e-06, + "loss": 0.2893, + "step": 14652 + }, + { + "epoch": 2.5, + "learning_rate": 1.4261622713568347e-06, + "loss": 0.2984, + "step": 14653 + }, + { + "epoch": 2.5, + "learning_rate": 1.425214695324718e-06, + "loss": 0.3203, + "step": 14654 + }, + { + "epoch": 2.5, + "learning_rate": 1.424267410038791e-06, + "loss": 0.2967, + "step": 14655 + }, + { + "epoch": 2.5, + "learning_rate": 1.4233204155311752e-06, + "loss": 0.3066, + "step": 14656 + }, + { + "epoch": 2.5, + "learning_rate": 1.4223737118339786e-06, + "loss": 0.3103, + "step": 14657 + }, + { + "epoch": 2.5, + "learning_rate": 1.4214272989793033e-06, + "loss": 0.3028, + "step": 14658 + }, + { + "epoch": 2.5, + "learning_rate": 1.4204811769992377e-06, + "loss": 0.2799, + "step": 14659 + }, + { + "epoch": 2.5, + "learning_rate": 1.419535345925862e-06, + "loss": 0.3262, + "step": 14660 + }, + { + "epoch": 2.5, + "learning_rate": 1.4185898057912495e-06, + "loss": 0.3215, + "step": 14661 + }, + { + "epoch": 2.5, + "learning_rate": 1.4176445566274566e-06, + "loss": 0.3288, + "step": 14662 + }, + { + "epoch": 2.5, + "learning_rate": 1.4166995984665366e-06, + "loss": 0.3176, + "step": 14663 + }, + { + "epoch": 2.5, + "learning_rate": 1.4157549313405284e-06, + "loss": 0.3008, + "step": 14664 + }, + { + "epoch": 2.5, + "learning_rate": 1.414810555281464e-06, + "loss": 0.3451, + "step": 14665 + }, + { + "epoch": 2.5, + "learning_rate": 1.4138664703213677e-06, + "loss": 0.3234, + "step": 14666 + }, + { + "epoch": 2.5, + "learning_rate": 1.4129226764922442e-06, + "loss": 0.31, + "step": 14667 + }, + { + "epoch": 2.5, + "learning_rate": 1.4119791738261012e-06, + "loss": 0.3033, + "step": 14668 + }, + { + "epoch": 2.5, + "learning_rate": 1.411035962354924e-06, + "loss": 0.3095, + "step": 14669 + }, + { + "epoch": 2.5, + "learning_rate": 1.4100930421106983e-06, + "loss": 0.3332, + "step": 14670 + }, + { + "epoch": 2.5, + "learning_rate": 1.4091504131253941e-06, + "loss": 0.3201, + "step": 14671 + }, + { + "epoch": 2.5, + "learning_rate": 1.4082080754309734e-06, + "loss": 0.3026, + "step": 14672 + }, + { + "epoch": 2.5, + "learning_rate": 1.40726602905939e-06, + "loss": 0.3148, + "step": 14673 + }, + { + "epoch": 2.5, + "learning_rate": 1.406324274042583e-06, + "loss": 0.2971, + "step": 14674 + }, + { + "epoch": 2.5, + "learning_rate": 1.4053828104124867e-06, + "loss": 0.316, + "step": 14675 + }, + { + "epoch": 2.5, + "learning_rate": 1.4044416382010252e-06, + "loss": 0.3293, + "step": 14676 + }, + { + "epoch": 2.5, + "learning_rate": 1.4035007574401072e-06, + "loss": 0.3151, + "step": 14677 + }, + { + "epoch": 2.5, + "learning_rate": 1.4025601681616351e-06, + "loss": 0.287, + "step": 14678 + }, + { + "epoch": 2.5, + "learning_rate": 1.4016198703975049e-06, + "loss": 0.3562, + "step": 14679 + }, + { + "epoch": 2.5, + "learning_rate": 1.400679864179596e-06, + "loss": 0.2978, + "step": 14680 + }, + { + "epoch": 2.5, + "learning_rate": 1.3997401495397855e-06, + "loss": 0.3093, + "step": 14681 + }, + { + "epoch": 2.5, + "learning_rate": 1.3988007265099324e-06, + "loss": 0.3165, + "step": 14682 + }, + { + "epoch": 2.5, + "learning_rate": 1.3978615951218899e-06, + "loss": 0.2948, + "step": 14683 + }, + { + "epoch": 2.5, + "learning_rate": 1.396922755407506e-06, + "loss": 0.3019, + "step": 14684 + }, + { + "epoch": 2.5, + "learning_rate": 1.3959842073986085e-06, + "loss": 0.2813, + "step": 14685 + }, + { + "epoch": 2.5, + "learning_rate": 1.395045951127022e-06, + "loss": 0.2741, + "step": 14686 + }, + { + "epoch": 2.5, + "learning_rate": 1.394107986624561e-06, + "loss": 0.313, + "step": 14687 + }, + { + "epoch": 2.5, + "learning_rate": 1.39317031392303e-06, + "loss": 0.3111, + "step": 14688 + }, + { + "epoch": 2.51, + "learning_rate": 1.3922329330542217e-06, + "loss": 0.3204, + "step": 14689 + }, + { + "epoch": 2.51, + "learning_rate": 1.3912958440499202e-06, + "loss": 0.2867, + "step": 14690 + }, + { + "epoch": 2.51, + "learning_rate": 1.3903590469419014e-06, + "loss": 0.3151, + "step": 14691 + }, + { + "epoch": 2.51, + "learning_rate": 1.3894225417619255e-06, + "loss": 0.3043, + "step": 14692 + }, + { + "epoch": 2.51, + "learning_rate": 1.3884863285417482e-06, + "loss": 0.3071, + "step": 14693 + }, + { + "epoch": 2.51, + "learning_rate": 1.387550407313115e-06, + "loss": 0.3015, + "step": 14694 + }, + { + "epoch": 2.51, + "learning_rate": 1.3866147781077587e-06, + "loss": 0.2865, + "step": 14695 + }, + { + "epoch": 2.51, + "learning_rate": 1.3856794409574059e-06, + "loss": 0.3215, + "step": 14696 + }, + { + "epoch": 2.51, + "learning_rate": 1.3847443958937712e-06, + "loss": 0.3276, + "step": 14697 + }, + { + "epoch": 2.51, + "learning_rate": 1.383809642948556e-06, + "loss": 0.2996, + "step": 14698 + }, + { + "epoch": 2.51, + "learning_rate": 1.3828751821534592e-06, + "loss": 0.318, + "step": 14699 + }, + { + "epoch": 2.51, + "learning_rate": 1.3819410135401612e-06, + "loss": 0.3098, + "step": 14700 + }, + { + "epoch": 2.51, + "learning_rate": 1.381007137140339e-06, + "loss": 0.3457, + "step": 14701 + }, + { + "epoch": 2.51, + "learning_rate": 1.3800735529856591e-06, + "loss": 0.3201, + "step": 14702 + }, + { + "epoch": 2.51, + "learning_rate": 1.3791402611077742e-06, + "loss": 0.3111, + "step": 14703 + }, + { + "epoch": 2.51, + "learning_rate": 1.3782072615383313e-06, + "loss": 0.3129, + "step": 14704 + }, + { + "epoch": 2.51, + "learning_rate": 1.377274554308965e-06, + "loss": 0.316, + "step": 14705 + }, + { + "epoch": 2.51, + "learning_rate": 1.3763421394513033e-06, + "loss": 0.3212, + "step": 14706 + }, + { + "epoch": 2.51, + "learning_rate": 1.3754100169969575e-06, + "loss": 0.3479, + "step": 14707 + }, + { + "epoch": 2.51, + "learning_rate": 1.3744781869775347e-06, + "loss": 0.3204, + "step": 14708 + }, + { + "epoch": 2.51, + "learning_rate": 1.3735466494246307e-06, + "loss": 0.3086, + "step": 14709 + }, + { + "epoch": 2.51, + "learning_rate": 1.3726154043698315e-06, + "loss": 0.304, + "step": 14710 + }, + { + "epoch": 2.51, + "learning_rate": 1.3716844518447125e-06, + "loss": 0.3112, + "step": 14711 + }, + { + "epoch": 2.51, + "learning_rate": 1.37075379188084e-06, + "loss": 0.2856, + "step": 14712 + }, + { + "epoch": 2.51, + "learning_rate": 1.369823424509773e-06, + "loss": 0.3174, + "step": 14713 + }, + { + "epoch": 2.51, + "learning_rate": 1.3688933497630507e-06, + "loss": 0.2958, + "step": 14714 + }, + { + "epoch": 2.51, + "learning_rate": 1.367963567672217e-06, + "loss": 0.3412, + "step": 14715 + }, + { + "epoch": 2.51, + "learning_rate": 1.3670340782687908e-06, + "loss": 0.2967, + "step": 14716 + }, + { + "epoch": 2.51, + "learning_rate": 1.3661048815842926e-06, + "loss": 0.2881, + "step": 14717 + }, + { + "epoch": 2.51, + "learning_rate": 1.365175977650227e-06, + "loss": 0.3393, + "step": 14718 + }, + { + "epoch": 2.51, + "learning_rate": 1.3642473664980927e-06, + "loss": 0.3226, + "step": 14719 + }, + { + "epoch": 2.51, + "learning_rate": 1.363319048159374e-06, + "loss": 0.3105, + "step": 14720 + }, + { + "epoch": 2.51, + "learning_rate": 1.3623910226655501e-06, + "loss": 0.3087, + "step": 14721 + }, + { + "epoch": 2.51, + "learning_rate": 1.361463290048085e-06, + "loss": 0.3022, + "step": 14722 + }, + { + "epoch": 2.51, + "learning_rate": 1.3605358503384358e-06, + "loss": 0.3029, + "step": 14723 + }, + { + "epoch": 2.51, + "learning_rate": 1.3596087035680495e-06, + "loss": 0.3346, + "step": 14724 + }, + { + "epoch": 2.51, + "learning_rate": 1.358681849768364e-06, + "loss": 0.3252, + "step": 14725 + }, + { + "epoch": 2.51, + "learning_rate": 1.3577552889708056e-06, + "loss": 0.3034, + "step": 14726 + }, + { + "epoch": 2.51, + "learning_rate": 1.3568290212067902e-06, + "loss": 0.3186, + "step": 14727 + }, + { + "epoch": 2.51, + "learning_rate": 1.3559030465077294e-06, + "loss": 0.3277, + "step": 14728 + }, + { + "epoch": 2.51, + "learning_rate": 1.3549773649050147e-06, + "loss": 0.313, + "step": 14729 + }, + { + "epoch": 2.51, + "learning_rate": 1.3540519764300352e-06, + "loss": 0.2817, + "step": 14730 + }, + { + "epoch": 2.51, + "learning_rate": 1.3531268811141707e-06, + "loss": 0.3302, + "step": 14731 + }, + { + "epoch": 2.51, + "learning_rate": 1.3522020789887836e-06, + "loss": 0.3297, + "step": 14732 + }, + { + "epoch": 2.51, + "learning_rate": 1.3512775700852342e-06, + "loss": 0.3093, + "step": 14733 + }, + { + "epoch": 2.51, + "learning_rate": 1.350353354434869e-06, + "loss": 0.3046, + "step": 14734 + }, + { + "epoch": 2.51, + "learning_rate": 1.349429432069027e-06, + "loss": 0.3134, + "step": 14735 + }, + { + "epoch": 2.51, + "learning_rate": 1.3485058030190367e-06, + "loss": 0.309, + "step": 14736 + }, + { + "epoch": 2.51, + "learning_rate": 1.3475824673162119e-06, + "loss": 0.3225, + "step": 14737 + }, + { + "epoch": 2.51, + "learning_rate": 1.346659424991862e-06, + "loss": 0.281, + "step": 14738 + }, + { + "epoch": 2.51, + "learning_rate": 1.345736676077285e-06, + "loss": 0.2932, + "step": 14739 + }, + { + "epoch": 2.51, + "learning_rate": 1.3448142206037684e-06, + "loss": 0.2978, + "step": 14740 + }, + { + "epoch": 2.51, + "learning_rate": 1.3438920586025906e-06, + "loss": 0.3, + "step": 14741 + }, + { + "epoch": 2.51, + "learning_rate": 1.3429701901050185e-06, + "loss": 0.3292, + "step": 14742 + }, + { + "epoch": 2.51, + "learning_rate": 1.3420486151423128e-06, + "loss": 0.2943, + "step": 14743 + }, + { + "epoch": 2.51, + "learning_rate": 1.3411273337457165e-06, + "loss": 0.3175, + "step": 14744 + }, + { + "epoch": 2.51, + "learning_rate": 1.340206345946471e-06, + "loss": 0.3152, + "step": 14745 + }, + { + "epoch": 2.51, + "learning_rate": 1.3392856517758057e-06, + "loss": 0.3134, + "step": 14746 + }, + { + "epoch": 2.52, + "learning_rate": 1.3383652512649337e-06, + "loss": 0.3295, + "step": 14747 + }, + { + "epoch": 2.52, + "learning_rate": 1.3374451444450664e-06, + "loss": 0.3211, + "step": 14748 + }, + { + "epoch": 2.52, + "learning_rate": 1.3365253313474014e-06, + "loss": 0.3367, + "step": 14749 + }, + { + "epoch": 2.52, + "learning_rate": 1.3356058120031267e-06, + "loss": 0.2832, + "step": 14750 + }, + { + "epoch": 2.52, + "learning_rate": 1.3346865864434234e-06, + "loss": 0.2891, + "step": 14751 + }, + { + "epoch": 2.52, + "learning_rate": 1.3337676546994548e-06, + "loss": 0.3136, + "step": 14752 + }, + { + "epoch": 2.52, + "learning_rate": 1.3328490168023821e-06, + "loss": 0.3281, + "step": 14753 + }, + { + "epoch": 2.52, + "learning_rate": 1.3319306727833526e-06, + "loss": 0.3257, + "step": 14754 + }, + { + "epoch": 2.52, + "learning_rate": 1.3310126226735054e-06, + "loss": 0.3073, + "step": 14755 + }, + { + "epoch": 2.52, + "learning_rate": 1.3300948665039691e-06, + "loss": 0.2995, + "step": 14756 + }, + { + "epoch": 2.52, + "learning_rate": 1.3291774043058624e-06, + "loss": 0.2892, + "step": 14757 + }, + { + "epoch": 2.52, + "learning_rate": 1.3282602361102926e-06, + "loss": 0.2924, + "step": 14758 + }, + { + "epoch": 2.52, + "learning_rate": 1.3273433619483612e-06, + "loss": 0.2735, + "step": 14759 + }, + { + "epoch": 2.52, + "learning_rate": 1.3264267818511522e-06, + "loss": 0.3264, + "step": 14760 + }, + { + "epoch": 2.52, + "learning_rate": 1.3255104958497467e-06, + "loss": 0.3087, + "step": 14761 + }, + { + "epoch": 2.52, + "learning_rate": 1.3245945039752151e-06, + "loss": 0.2886, + "step": 14762 + }, + { + "epoch": 2.52, + "learning_rate": 1.3236788062586126e-06, + "loss": 0.2959, + "step": 14763 + }, + { + "epoch": 2.52, + "learning_rate": 1.3227634027309876e-06, + "loss": 0.3092, + "step": 14764 + }, + { + "epoch": 2.52, + "learning_rate": 1.3218482934233824e-06, + "loss": 0.298, + "step": 14765 + }, + { + "epoch": 2.52, + "learning_rate": 1.3209334783668248e-06, + "loss": 0.3231, + "step": 14766 + }, + { + "epoch": 2.52, + "learning_rate": 1.3200189575923294e-06, + "loss": 0.319, + "step": 14767 + }, + { + "epoch": 2.52, + "learning_rate": 1.319104731130909e-06, + "loss": 0.3078, + "step": 14768 + }, + { + "epoch": 2.52, + "learning_rate": 1.3181907990135624e-06, + "loss": 0.3302, + "step": 14769 + }, + { + "epoch": 2.52, + "learning_rate": 1.3172771612712764e-06, + "loss": 0.3052, + "step": 14770 + }, + { + "epoch": 2.52, + "learning_rate": 1.3163638179350314e-06, + "loss": 0.32, + "step": 14771 + }, + { + "epoch": 2.52, + "learning_rate": 1.3154507690357943e-06, + "loss": 0.3006, + "step": 14772 + }, + { + "epoch": 2.52, + "learning_rate": 1.314538014604526e-06, + "loss": 0.3131, + "step": 14773 + }, + { + "epoch": 2.52, + "learning_rate": 1.3136255546721778e-06, + "loss": 0.3103, + "step": 14774 + }, + { + "epoch": 2.52, + "learning_rate": 1.3127133892696831e-06, + "loss": 0.3115, + "step": 14775 + }, + { + "epoch": 2.52, + "learning_rate": 1.311801518427972e-06, + "loss": 0.34, + "step": 14776 + }, + { + "epoch": 2.52, + "learning_rate": 1.3108899421779652e-06, + "loss": 0.3342, + "step": 14777 + }, + { + "epoch": 2.52, + "learning_rate": 1.3099786605505738e-06, + "loss": 0.329, + "step": 14778 + }, + { + "epoch": 2.52, + "learning_rate": 1.3090676735766906e-06, + "loss": 0.304, + "step": 14779 + }, + { + "epoch": 2.52, + "learning_rate": 1.3081569812872085e-06, + "loss": 0.3079, + "step": 14780 + }, + { + "epoch": 2.52, + "learning_rate": 1.3072465837130078e-06, + "loss": 0.3173, + "step": 14781 + }, + { + "epoch": 2.52, + "learning_rate": 1.3063364808849532e-06, + "loss": 0.3076, + "step": 14782 + }, + { + "epoch": 2.52, + "learning_rate": 1.305426672833906e-06, + "loss": 0.3064, + "step": 14783 + }, + { + "epoch": 2.52, + "learning_rate": 1.3045171595907157e-06, + "loss": 0.3176, + "step": 14784 + }, + { + "epoch": 2.52, + "learning_rate": 1.3036079411862202e-06, + "loss": 0.3151, + "step": 14785 + }, + { + "epoch": 2.52, + "learning_rate": 1.3026990176512489e-06, + "loss": 0.3169, + "step": 14786 + }, + { + "epoch": 2.52, + "learning_rate": 1.3017903890166206e-06, + "loss": 0.3031, + "step": 14787 + }, + { + "epoch": 2.52, + "learning_rate": 1.300882055313144e-06, + "loss": 0.2887, + "step": 14788 + }, + { + "epoch": 2.52, + "learning_rate": 1.2999740165716213e-06, + "loss": 0.3167, + "step": 14789 + }, + { + "epoch": 2.52, + "learning_rate": 1.2990662728228364e-06, + "loss": 0.3012, + "step": 14790 + }, + { + "epoch": 2.52, + "learning_rate": 1.2981588240975696e-06, + "loss": 0.3164, + "step": 14791 + }, + { + "epoch": 2.52, + "learning_rate": 1.2972516704265925e-06, + "loss": 0.2989, + "step": 14792 + }, + { + "epoch": 2.52, + "learning_rate": 1.296344811840663e-06, + "loss": 0.3368, + "step": 14793 + }, + { + "epoch": 2.52, + "learning_rate": 1.2954382483705286e-06, + "loss": 0.3108, + "step": 14794 + }, + { + "epoch": 2.52, + "learning_rate": 1.294531980046928e-06, + "loss": 0.3192, + "step": 14795 + }, + { + "epoch": 2.52, + "learning_rate": 1.2936260069005935e-06, + "loss": 0.3121, + "step": 14796 + }, + { + "epoch": 2.52, + "learning_rate": 1.2927203289622404e-06, + "loss": 0.3311, + "step": 14797 + }, + { + "epoch": 2.52, + "learning_rate": 1.2918149462625784e-06, + "loss": 0.3174, + "step": 14798 + }, + { + "epoch": 2.52, + "learning_rate": 1.2909098588323066e-06, + "loss": 0.2937, + "step": 14799 + }, + { + "epoch": 2.52, + "learning_rate": 1.2900050667021146e-06, + "loss": 0.3411, + "step": 14800 + }, + { + "epoch": 2.52, + "learning_rate": 1.2891005699026814e-06, + "loss": 0.3186, + "step": 14801 + }, + { + "epoch": 2.52, + "learning_rate": 1.2881963684646748e-06, + "loss": 0.3393, + "step": 14802 + }, + { + "epoch": 2.52, + "learning_rate": 1.2872924624187544e-06, + "loss": 0.3301, + "step": 14803 + }, + { + "epoch": 2.52, + "learning_rate": 1.2863888517955714e-06, + "loss": 0.3185, + "step": 14804 + }, + { + "epoch": 2.52, + "learning_rate": 1.28548553662576e-06, + "loss": 0.3033, + "step": 14805 + }, + { + "epoch": 2.53, + "learning_rate": 1.2845825169399506e-06, + "loss": 0.3154, + "step": 14806 + }, + { + "epoch": 2.53, + "learning_rate": 1.2836797927687627e-06, + "loss": 0.3054, + "step": 14807 + }, + { + "epoch": 2.53, + "learning_rate": 1.2827773641428054e-06, + "loss": 0.3015, + "step": 14808 + }, + { + "epoch": 2.53, + "learning_rate": 1.2818752310926796e-06, + "loss": 0.3026, + "step": 14809 + }, + { + "epoch": 2.53, + "learning_rate": 1.2809733936489677e-06, + "loss": 0.3191, + "step": 14810 + }, + { + "epoch": 2.53, + "learning_rate": 1.280071851842255e-06, + "loss": 0.3213, + "step": 14811 + }, + { + "epoch": 2.53, + "learning_rate": 1.2791706057031061e-06, + "loss": 0.3145, + "step": 14812 + }, + { + "epoch": 2.53, + "learning_rate": 1.2782696552620799e-06, + "loss": 0.3198, + "step": 14813 + }, + { + "epoch": 2.53, + "learning_rate": 1.2773690005497253e-06, + "loss": 0.2944, + "step": 14814 + }, + { + "epoch": 2.53, + "learning_rate": 1.276468641596582e-06, + "loss": 0.2919, + "step": 14815 + }, + { + "epoch": 2.53, + "learning_rate": 1.2755685784331784e-06, + "loss": 0.3211, + "step": 14816 + }, + { + "epoch": 2.53, + "learning_rate": 1.274668811090033e-06, + "loss": 0.2954, + "step": 14817 + }, + { + "epoch": 2.53, + "learning_rate": 1.2737693395976536e-06, + "loss": 0.3107, + "step": 14818 + }, + { + "epoch": 2.53, + "learning_rate": 1.2728701639865416e-06, + "loss": 0.2877, + "step": 14819 + }, + { + "epoch": 2.53, + "learning_rate": 1.271971284287181e-06, + "loss": 0.2971, + "step": 14820 + }, + { + "epoch": 2.53, + "learning_rate": 1.2710727005300517e-06, + "loss": 0.3543, + "step": 14821 + }, + { + "epoch": 2.53, + "learning_rate": 1.2701744127456228e-06, + "loss": 0.3049, + "step": 14822 + }, + { + "epoch": 2.53, + "learning_rate": 1.2692764209643526e-06, + "loss": 0.303, + "step": 14823 + }, + { + "epoch": 2.53, + "learning_rate": 1.268378725216689e-06, + "loss": 0.3208, + "step": 14824 + }, + { + "epoch": 2.53, + "learning_rate": 1.267481325533072e-06, + "loss": 0.3087, + "step": 14825 + }, + { + "epoch": 2.53, + "learning_rate": 1.2665842219439273e-06, + "loss": 0.3106, + "step": 14826 + }, + { + "epoch": 2.53, + "learning_rate": 1.2656874144796761e-06, + "loss": 0.3023, + "step": 14827 + }, + { + "epoch": 2.53, + "learning_rate": 1.2647909031707217e-06, + "loss": 0.3106, + "step": 14828 + }, + { + "epoch": 2.53, + "learning_rate": 1.2638946880474657e-06, + "loss": 0.3218, + "step": 14829 + }, + { + "epoch": 2.53, + "learning_rate": 1.2629987691402957e-06, + "loss": 0.3144, + "step": 14830 + }, + { + "epoch": 2.53, + "learning_rate": 1.2621031464795896e-06, + "loss": 0.2852, + "step": 14831 + }, + { + "epoch": 2.53, + "learning_rate": 1.2612078200957146e-06, + "loss": 0.328, + "step": 14832 + }, + { + "epoch": 2.53, + "learning_rate": 1.2603127900190293e-06, + "loss": 0.2932, + "step": 14833 + }, + { + "epoch": 2.53, + "learning_rate": 1.2594180562798851e-06, + "loss": 0.3321, + "step": 14834 + }, + { + "epoch": 2.53, + "learning_rate": 1.2585236189086125e-06, + "loss": 0.342, + "step": 14835 + }, + { + "epoch": 2.53, + "learning_rate": 1.2576294779355446e-06, + "loss": 0.3079, + "step": 14836 + }, + { + "epoch": 2.53, + "learning_rate": 1.2567356333909976e-06, + "loss": 0.3276, + "step": 14837 + }, + { + "epoch": 2.53, + "learning_rate": 1.255842085305279e-06, + "loss": 0.3296, + "step": 14838 + }, + { + "epoch": 2.53, + "learning_rate": 1.254948833708687e-06, + "loss": 0.2984, + "step": 14839 + }, + { + "epoch": 2.53, + "learning_rate": 1.2540558786315105e-06, + "loss": 0.3202, + "step": 14840 + }, + { + "epoch": 2.53, + "learning_rate": 1.2531632201040234e-06, + "loss": 0.3283, + "step": 14841 + }, + { + "epoch": 2.53, + "learning_rate": 1.2522708581564946e-06, + "loss": 0.3208, + "step": 14842 + }, + { + "epoch": 2.53, + "learning_rate": 1.2513787928191856e-06, + "loss": 0.2986, + "step": 14843 + }, + { + "epoch": 2.53, + "learning_rate": 1.2504870241223377e-06, + "loss": 0.3171, + "step": 14844 + }, + { + "epoch": 2.53, + "learning_rate": 1.24959555209619e-06, + "loss": 0.325, + "step": 14845 + }, + { + "epoch": 2.53, + "learning_rate": 1.2487043767709717e-06, + "loss": 0.3006, + "step": 14846 + }, + { + "epoch": 2.53, + "learning_rate": 1.2478134981768986e-06, + "loss": 0.3086, + "step": 14847 + }, + { + "epoch": 2.53, + "learning_rate": 1.2469229163441776e-06, + "loss": 0.3015, + "step": 14848 + }, + { + "epoch": 2.53, + "learning_rate": 1.246032631303008e-06, + "loss": 0.316, + "step": 14849 + }, + { + "epoch": 2.53, + "learning_rate": 1.2451426430835733e-06, + "loss": 0.2939, + "step": 14850 + }, + { + "epoch": 2.53, + "learning_rate": 1.2442529517160518e-06, + "loss": 0.3309, + "step": 14851 + }, + { + "epoch": 2.53, + "learning_rate": 1.2433635572306114e-06, + "loss": 0.2853, + "step": 14852 + }, + { + "epoch": 2.53, + "learning_rate": 1.2424744596574078e-06, + "loss": 0.3114, + "step": 14853 + }, + { + "epoch": 2.53, + "learning_rate": 1.2415856590265874e-06, + "loss": 0.3171, + "step": 14854 + }, + { + "epoch": 2.53, + "learning_rate": 1.2406971553682878e-06, + "loss": 0.3248, + "step": 14855 + }, + { + "epoch": 2.53, + "learning_rate": 1.2398089487126385e-06, + "loss": 0.3035, + "step": 14856 + }, + { + "epoch": 2.53, + "learning_rate": 1.23892103908975e-06, + "loss": 0.3592, + "step": 14857 + }, + { + "epoch": 2.53, + "learning_rate": 1.2380334265297333e-06, + "loss": 0.3124, + "step": 14858 + }, + { + "epoch": 2.53, + "learning_rate": 1.2371461110626813e-06, + "loss": 0.3174, + "step": 14859 + }, + { + "epoch": 2.53, + "learning_rate": 1.2362590927186824e-06, + "loss": 0.3015, + "step": 14860 + }, + { + "epoch": 2.53, + "learning_rate": 1.2353723715278122e-06, + "loss": 0.315, + "step": 14861 + }, + { + "epoch": 2.53, + "learning_rate": 1.2344859475201377e-06, + "loss": 0.3263, + "step": 14862 + }, + { + "epoch": 2.53, + "learning_rate": 1.2335998207257138e-06, + "loss": 0.3134, + "step": 14863 + }, + { + "epoch": 2.54, + "learning_rate": 1.23271399117459e-06, + "loss": 0.3244, + "step": 14864 + }, + { + "epoch": 2.54, + "learning_rate": 1.2318284588967967e-06, + "loss": 0.3125, + "step": 14865 + }, + { + "epoch": 2.54, + "learning_rate": 1.230943223922363e-06, + "loss": 0.3055, + "step": 14866 + }, + { + "epoch": 2.54, + "learning_rate": 1.2300582862813026e-06, + "loss": 0.3145, + "step": 14867 + }, + { + "epoch": 2.54, + "learning_rate": 1.2291736460036242e-06, + "loss": 0.2898, + "step": 14868 + }, + { + "epoch": 2.54, + "learning_rate": 1.2282893031193211e-06, + "loss": 0.3236, + "step": 14869 + }, + { + "epoch": 2.54, + "learning_rate": 1.2274052576583794e-06, + "loss": 0.3149, + "step": 14870 + }, + { + "epoch": 2.54, + "learning_rate": 1.2265215096507776e-06, + "loss": 0.3143, + "step": 14871 + }, + { + "epoch": 2.54, + "learning_rate": 1.225638059126476e-06, + "loss": 0.308, + "step": 14872 + }, + { + "epoch": 2.54, + "learning_rate": 1.2247549061154307e-06, + "loss": 0.3159, + "step": 14873 + }, + { + "epoch": 2.54, + "learning_rate": 1.2238720506475909e-06, + "loss": 0.3135, + "step": 14874 + }, + { + "epoch": 2.54, + "learning_rate": 1.2229894927528873e-06, + "loss": 0.2846, + "step": 14875 + }, + { + "epoch": 2.54, + "learning_rate": 1.222107232461247e-06, + "loss": 0.2964, + "step": 14876 + }, + { + "epoch": 2.54, + "learning_rate": 1.221225269802583e-06, + "loss": 0.3141, + "step": 14877 + }, + { + "epoch": 2.54, + "learning_rate": 1.2203436048068018e-06, + "loss": 0.307, + "step": 14878 + }, + { + "epoch": 2.54, + "learning_rate": 1.2194622375038013e-06, + "loss": 0.2885, + "step": 14879 + }, + { + "epoch": 2.54, + "learning_rate": 1.2185811679234593e-06, + "loss": 0.289, + "step": 14880 + }, + { + "epoch": 2.54, + "learning_rate": 1.2177003960956535e-06, + "loss": 0.3271, + "step": 14881 + }, + { + "epoch": 2.54, + "learning_rate": 1.2168199220502485e-06, + "loss": 0.3041, + "step": 14882 + }, + { + "epoch": 2.54, + "learning_rate": 1.2159397458170996e-06, + "loss": 0.3152, + "step": 14883 + }, + { + "epoch": 2.54, + "learning_rate": 1.2150598674260493e-06, + "loss": 0.3129, + "step": 14884 + }, + { + "epoch": 2.54, + "learning_rate": 1.214180286906933e-06, + "loss": 0.3029, + "step": 14885 + }, + { + "epoch": 2.54, + "learning_rate": 1.2133010042895755e-06, + "loss": 0.2913, + "step": 14886 + }, + { + "epoch": 2.54, + "learning_rate": 1.2124220196037883e-06, + "loss": 0.3203, + "step": 14887 + }, + { + "epoch": 2.54, + "learning_rate": 1.2115433328793757e-06, + "loss": 0.2919, + "step": 14888 + }, + { + "epoch": 2.54, + "learning_rate": 1.2106649441461327e-06, + "loss": 0.3123, + "step": 14889 + }, + { + "epoch": 2.54, + "learning_rate": 1.2097868534338442e-06, + "loss": 0.3084, + "step": 14890 + }, + { + "epoch": 2.54, + "learning_rate": 1.2089090607722787e-06, + "loss": 0.3152, + "step": 14891 + }, + { + "epoch": 2.54, + "learning_rate": 1.2080315661912046e-06, + "loss": 0.3004, + "step": 14892 + }, + { + "epoch": 2.54, + "learning_rate": 1.2071543697203725e-06, + "loss": 0.3272, + "step": 14893 + }, + { + "epoch": 2.54, + "learning_rate": 1.2062774713895288e-06, + "loss": 0.3184, + "step": 14894 + }, + { + "epoch": 2.54, + "learning_rate": 1.2054008712284016e-06, + "loss": 0.3334, + "step": 14895 + }, + { + "epoch": 2.54, + "learning_rate": 1.2045245692667162e-06, + "loss": 0.3007, + "step": 14896 + }, + { + "epoch": 2.54, + "learning_rate": 1.2036485655341867e-06, + "loss": 0.2918, + "step": 14897 + }, + { + "epoch": 2.54, + "learning_rate": 1.202772860060515e-06, + "loss": 0.3199, + "step": 14898 + }, + { + "epoch": 2.54, + "learning_rate": 1.2018974528753935e-06, + "loss": 0.3101, + "step": 14899 + }, + { + "epoch": 2.54, + "learning_rate": 1.2010223440085044e-06, + "loss": 0.326, + "step": 14900 + }, + { + "epoch": 2.54, + "learning_rate": 1.200147533489524e-06, + "loss": 0.296, + "step": 14901 + }, + { + "epoch": 2.54, + "learning_rate": 1.1992730213481074e-06, + "loss": 0.3169, + "step": 14902 + }, + { + "epoch": 2.54, + "learning_rate": 1.198398807613912e-06, + "loss": 0.3218, + "step": 14903 + }, + { + "epoch": 2.54, + "learning_rate": 1.1975248923165784e-06, + "loss": 0.3256, + "step": 14904 + }, + { + "epoch": 2.54, + "learning_rate": 1.1966512754857407e-06, + "loss": 0.3445, + "step": 14905 + }, + { + "epoch": 2.54, + "learning_rate": 1.1957779571510152e-06, + "loss": 0.2942, + "step": 14906 + }, + { + "epoch": 2.54, + "learning_rate": 1.1949049373420186e-06, + "loss": 0.3155, + "step": 14907 + }, + { + "epoch": 2.54, + "learning_rate": 1.1940322160883499e-06, + "loss": 0.3005, + "step": 14908 + }, + { + "epoch": 2.54, + "learning_rate": 1.1931597934196049e-06, + "loss": 0.3057, + "step": 14909 + }, + { + "epoch": 2.54, + "learning_rate": 1.1922876693653584e-06, + "loss": 0.3306, + "step": 14910 + }, + { + "epoch": 2.54, + "learning_rate": 1.191415843955186e-06, + "loss": 0.3149, + "step": 14911 + }, + { + "epoch": 2.54, + "learning_rate": 1.1905443172186459e-06, + "loss": 0.3322, + "step": 14912 + }, + { + "epoch": 2.54, + "learning_rate": 1.1896730891852926e-06, + "loss": 0.3292, + "step": 14913 + }, + { + "epoch": 2.54, + "learning_rate": 1.1888021598846643e-06, + "loss": 0.3226, + "step": 14914 + }, + { + "epoch": 2.54, + "learning_rate": 1.1879315293462923e-06, + "loss": 0.3298, + "step": 14915 + }, + { + "epoch": 2.54, + "learning_rate": 1.1870611975996993e-06, + "loss": 0.3302, + "step": 14916 + }, + { + "epoch": 2.54, + "learning_rate": 1.1861911646743919e-06, + "loss": 0.3117, + "step": 14917 + }, + { + "epoch": 2.54, + "learning_rate": 1.1853214305998716e-06, + "loss": 0.3239, + "step": 14918 + }, + { + "epoch": 2.54, + "learning_rate": 1.18445199540563e-06, + "loss": 0.3076, + "step": 14919 + }, + { + "epoch": 2.54, + "learning_rate": 1.1835828591211462e-06, + "loss": 0.322, + "step": 14920 + }, + { + "epoch": 2.54, + "learning_rate": 1.1827140217758926e-06, + "loss": 0.3007, + "step": 14921 + }, + { + "epoch": 2.54, + "learning_rate": 1.1818454833993254e-06, + "loss": 0.3145, + "step": 14922 + }, + { + "epoch": 2.55, + "learning_rate": 1.1809772440208944e-06, + "loss": 0.3045, + "step": 14923 + }, + { + "epoch": 2.55, + "learning_rate": 1.1801093036700417e-06, + "loss": 0.3064, + "step": 14924 + }, + { + "epoch": 2.55, + "learning_rate": 1.1792416623761938e-06, + "loss": 0.306, + "step": 14925 + }, + { + "epoch": 2.55, + "learning_rate": 1.1783743201687714e-06, + "loss": 0.2949, + "step": 14926 + }, + { + "epoch": 2.55, + "learning_rate": 1.1775072770771833e-06, + "loss": 0.2989, + "step": 14927 + }, + { + "epoch": 2.55, + "learning_rate": 1.1766405331308283e-06, + "loss": 0.3093, + "step": 14928 + }, + { + "epoch": 2.55, + "learning_rate": 1.175774088359095e-06, + "loss": 0.3284, + "step": 14929 + }, + { + "epoch": 2.55, + "learning_rate": 1.1749079427913623e-06, + "loss": 0.314, + "step": 14930 + }, + { + "epoch": 2.55, + "learning_rate": 1.1740420964570022e-06, + "loss": 0.319, + "step": 14931 + }, + { + "epoch": 2.55, + "learning_rate": 1.1731765493853663e-06, + "loss": 0.3276, + "step": 14932 + }, + { + "epoch": 2.55, + "learning_rate": 1.1723113016058075e-06, + "loss": 0.3023, + "step": 14933 + }, + { + "epoch": 2.55, + "learning_rate": 1.1714463531476616e-06, + "loss": 0.3214, + "step": 14934 + }, + { + "epoch": 2.55, + "learning_rate": 1.1705817040402567e-06, + "loss": 0.2987, + "step": 14935 + }, + { + "epoch": 2.55, + "learning_rate": 1.1697173543129125e-06, + "loss": 0.3287, + "step": 14936 + }, + { + "epoch": 2.55, + "learning_rate": 1.1688533039949368e-06, + "loss": 0.3283, + "step": 14937 + }, + { + "epoch": 2.55, + "learning_rate": 1.1679895531156238e-06, + "loss": 0.3127, + "step": 14938 + }, + { + "epoch": 2.55, + "learning_rate": 1.1671261017042657e-06, + "loss": 0.301, + "step": 14939 + }, + { + "epoch": 2.55, + "learning_rate": 1.1662629497901334e-06, + "loss": 0.3032, + "step": 14940 + }, + { + "epoch": 2.55, + "learning_rate": 1.165400097402497e-06, + "loss": 0.3203, + "step": 14941 + }, + { + "epoch": 2.55, + "learning_rate": 1.1645375445706131e-06, + "loss": 0.3148, + "step": 14942 + }, + { + "epoch": 2.55, + "learning_rate": 1.1636752913237292e-06, + "loss": 0.3243, + "step": 14943 + }, + { + "epoch": 2.55, + "learning_rate": 1.1628133376910812e-06, + "loss": 0.3174, + "step": 14944 + }, + { + "epoch": 2.55, + "learning_rate": 1.1619516837018962e-06, + "loss": 0.3215, + "step": 14945 + }, + { + "epoch": 2.55, + "learning_rate": 1.1610903293853891e-06, + "loss": 0.3111, + "step": 14946 + }, + { + "epoch": 2.55, + "learning_rate": 1.1602292747707678e-06, + "loss": 0.2901, + "step": 14947 + }, + { + "epoch": 2.55, + "learning_rate": 1.1593685198872263e-06, + "loss": 0.2768, + "step": 14948 + }, + { + "epoch": 2.55, + "learning_rate": 1.1585080647639503e-06, + "loss": 0.2947, + "step": 14949 + }, + { + "epoch": 2.55, + "learning_rate": 1.1576479094301163e-06, + "loss": 0.2716, + "step": 14950 + }, + { + "epoch": 2.55, + "learning_rate": 1.156788053914889e-06, + "loss": 0.2967, + "step": 14951 + }, + { + "epoch": 2.55, + "learning_rate": 1.1559284982474262e-06, + "loss": 0.3183, + "step": 14952 + }, + { + "epoch": 2.55, + "learning_rate": 1.1550692424568688e-06, + "loss": 0.3115, + "step": 14953 + }, + { + "epoch": 2.55, + "learning_rate": 1.1542102865723537e-06, + "loss": 0.2928, + "step": 14954 + }, + { + "epoch": 2.55, + "learning_rate": 1.1533516306230075e-06, + "loss": 0.3146, + "step": 14955 + }, + { + "epoch": 2.55, + "learning_rate": 1.1524932746379403e-06, + "loss": 0.3038, + "step": 14956 + }, + { + "epoch": 2.55, + "learning_rate": 1.1516352186462588e-06, + "loss": 0.299, + "step": 14957 + }, + { + "epoch": 2.55, + "learning_rate": 1.1507774626770585e-06, + "loss": 0.313, + "step": 14958 + }, + { + "epoch": 2.55, + "learning_rate": 1.1499200067594207e-06, + "loss": 0.3144, + "step": 14959 + }, + { + "epoch": 2.55, + "learning_rate": 1.1490628509224221e-06, + "loss": 0.3139, + "step": 14960 + }, + { + "epoch": 2.55, + "learning_rate": 1.1482059951951252e-06, + "loss": 0.3148, + "step": 14961 + }, + { + "epoch": 2.55, + "learning_rate": 1.1473494396065843e-06, + "loss": 0.3134, + "step": 14962 + }, + { + "epoch": 2.55, + "learning_rate": 1.1464931841858407e-06, + "loss": 0.3057, + "step": 14963 + }, + { + "epoch": 2.55, + "learning_rate": 1.1456372289619288e-06, + "loss": 0.3181, + "step": 14964 + }, + { + "epoch": 2.55, + "learning_rate": 1.1447815739638712e-06, + "loss": 0.3265, + "step": 14965 + }, + { + "epoch": 2.55, + "learning_rate": 1.1439262192206801e-06, + "loss": 0.319, + "step": 14966 + }, + { + "epoch": 2.55, + "learning_rate": 1.1430711647613601e-06, + "loss": 0.2867, + "step": 14967 + }, + { + "epoch": 2.55, + "learning_rate": 1.1422164106149047e-06, + "loss": 0.3109, + "step": 14968 + }, + { + "epoch": 2.55, + "learning_rate": 1.1413619568102906e-06, + "loss": 0.3247, + "step": 14969 + }, + { + "epoch": 2.55, + "learning_rate": 1.140507803376496e-06, + "loss": 0.3046, + "step": 14970 + }, + { + "epoch": 2.55, + "learning_rate": 1.1396539503424775e-06, + "loss": 0.3193, + "step": 14971 + }, + { + "epoch": 2.55, + "learning_rate": 1.1388003977371898e-06, + "loss": 0.319, + "step": 14972 + }, + { + "epoch": 2.55, + "learning_rate": 1.137947145589574e-06, + "loss": 0.3302, + "step": 14973 + }, + { + "epoch": 2.55, + "learning_rate": 1.1370941939285606e-06, + "loss": 0.3015, + "step": 14974 + }, + { + "epoch": 2.55, + "learning_rate": 1.136241542783072e-06, + "loss": 0.327, + "step": 14975 + }, + { + "epoch": 2.55, + "learning_rate": 1.1353891921820182e-06, + "loss": 0.332, + "step": 14976 + }, + { + "epoch": 2.55, + "learning_rate": 1.1345371421543016e-06, + "loss": 0.3015, + "step": 14977 + }, + { + "epoch": 2.55, + "learning_rate": 1.1336853927288104e-06, + "loss": 0.3008, + "step": 14978 + }, + { + "epoch": 2.55, + "learning_rate": 1.132833943934425e-06, + "loss": 0.3095, + "step": 14979 + }, + { + "epoch": 2.55, + "learning_rate": 1.1319827958000174e-06, + "loss": 0.302, + "step": 14980 + }, + { + "epoch": 2.55, + "learning_rate": 1.131131948354446e-06, + "loss": 0.3295, + "step": 14981 + }, + { + "epoch": 2.56, + "learning_rate": 1.1302814016265617e-06, + "loss": 0.3105, + "step": 14982 + }, + { + "epoch": 2.56, + "learning_rate": 1.1294311556452042e-06, + "loss": 0.2992, + "step": 14983 + }, + { + "epoch": 2.56, + "learning_rate": 1.1285812104392036e-06, + "loss": 0.3143, + "step": 14984 + }, + { + "epoch": 2.56, + "learning_rate": 1.1277315660373766e-06, + "loss": 0.276, + "step": 14985 + }, + { + "epoch": 2.56, + "learning_rate": 1.126882222468535e-06, + "loss": 0.3072, + "step": 14986 + }, + { + "epoch": 2.56, + "learning_rate": 1.1260331797614732e-06, + "loss": 0.3354, + "step": 14987 + }, + { + "epoch": 2.56, + "learning_rate": 1.1251844379449839e-06, + "loss": 0.3028, + "step": 14988 + }, + { + "epoch": 2.56, + "learning_rate": 1.124335997047844e-06, + "loss": 0.316, + "step": 14989 + }, + { + "epoch": 2.56, + "learning_rate": 1.1234878570988206e-06, + "loss": 0.3016, + "step": 14990 + }, + { + "epoch": 2.56, + "learning_rate": 1.122640018126674e-06, + "loss": 0.3292, + "step": 14991 + }, + { + "epoch": 2.56, + "learning_rate": 1.1217924801601532e-06, + "loss": 0.3332, + "step": 14992 + }, + { + "epoch": 2.56, + "learning_rate": 1.120945243227991e-06, + "loss": 0.3022, + "step": 14993 + }, + { + "epoch": 2.56, + "learning_rate": 1.1200983073589177e-06, + "loss": 0.3012, + "step": 14994 + }, + { + "epoch": 2.56, + "learning_rate": 1.1192516725816493e-06, + "loss": 0.3228, + "step": 14995 + }, + { + "epoch": 2.56, + "learning_rate": 1.118405338924894e-06, + "loss": 0.286, + "step": 14996 + }, + { + "epoch": 2.56, + "learning_rate": 1.1175593064173474e-06, + "loss": 0.3174, + "step": 14997 + }, + { + "epoch": 2.56, + "learning_rate": 1.1167135750876968e-06, + "loss": 0.3415, + "step": 14998 + }, + { + "epoch": 2.56, + "learning_rate": 1.1158681449646202e-06, + "loss": 0.2952, + "step": 14999 + }, + { + "epoch": 2.56, + "learning_rate": 1.1150230160767795e-06, + "loss": 0.3004, + "step": 15000 + }, + { + "epoch": 2.56, + "learning_rate": 1.1141781884528335e-06, + "loss": 0.3145, + "step": 15001 + }, + { + "epoch": 2.56, + "learning_rate": 1.1133336621214287e-06, + "loss": 0.3208, + "step": 15002 + }, + { + "epoch": 2.56, + "learning_rate": 1.1124894371111973e-06, + "loss": 0.3177, + "step": 15003 + }, + { + "epoch": 2.56, + "learning_rate": 1.1116455134507665e-06, + "loss": 0.311, + "step": 15004 + }, + { + "epoch": 2.56, + "learning_rate": 1.1108018911687501e-06, + "loss": 0.3069, + "step": 15005 + }, + { + "epoch": 2.56, + "learning_rate": 1.1099585702937542e-06, + "loss": 0.3108, + "step": 15006 + }, + { + "epoch": 2.56, + "learning_rate": 1.109115550854376e-06, + "loss": 0.2883, + "step": 15007 + }, + { + "epoch": 2.56, + "learning_rate": 1.1082728328791947e-06, + "loss": 0.3163, + "step": 15008 + }, + { + "epoch": 2.56, + "learning_rate": 1.1074304163967864e-06, + "loss": 0.3066, + "step": 15009 + }, + { + "epoch": 2.56, + "learning_rate": 1.1065883014357147e-06, + "loss": 0.3146, + "step": 15010 + }, + { + "epoch": 2.56, + "learning_rate": 1.1057464880245338e-06, + "loss": 0.3058, + "step": 15011 + }, + { + "epoch": 2.56, + "learning_rate": 1.1049049761917884e-06, + "loss": 0.3184, + "step": 15012 + }, + { + "epoch": 2.56, + "learning_rate": 1.1040637659660092e-06, + "loss": 0.3602, + "step": 15013 + }, + { + "epoch": 2.56, + "learning_rate": 1.1032228573757232e-06, + "loss": 0.2951, + "step": 15014 + }, + { + "epoch": 2.56, + "learning_rate": 1.1023822504494387e-06, + "loss": 0.3111, + "step": 15015 + }, + { + "epoch": 2.56, + "learning_rate": 1.1015419452156594e-06, + "loss": 0.3133, + "step": 15016 + }, + { + "epoch": 2.56, + "learning_rate": 1.1007019417028807e-06, + "loss": 0.2783, + "step": 15017 + }, + { + "epoch": 2.56, + "learning_rate": 1.0998622399395808e-06, + "loss": 0.2917, + "step": 15018 + }, + { + "epoch": 2.56, + "learning_rate": 1.0990228399542325e-06, + "loss": 0.29, + "step": 15019 + }, + { + "epoch": 2.56, + "learning_rate": 1.0981837417752983e-06, + "loss": 0.2817, + "step": 15020 + }, + { + "epoch": 2.56, + "learning_rate": 1.097344945431229e-06, + "loss": 0.316, + "step": 15021 + }, + { + "epoch": 2.56, + "learning_rate": 1.0965064509504674e-06, + "loss": 0.2935, + "step": 15022 + }, + { + "epoch": 2.56, + "learning_rate": 1.0956682583614419e-06, + "loss": 0.3009, + "step": 15023 + }, + { + "epoch": 2.56, + "learning_rate": 1.0948303676925742e-06, + "loss": 0.2921, + "step": 15024 + }, + { + "epoch": 2.56, + "learning_rate": 1.093992778972275e-06, + "loss": 0.3148, + "step": 15025 + }, + { + "epoch": 2.56, + "learning_rate": 1.0931554922289444e-06, + "loss": 0.3202, + "step": 15026 + }, + { + "epoch": 2.56, + "learning_rate": 1.0923185074909714e-06, + "loss": 0.2932, + "step": 15027 + }, + { + "epoch": 2.56, + "learning_rate": 1.0914818247867375e-06, + "loss": 0.3021, + "step": 15028 + }, + { + "epoch": 2.56, + "learning_rate": 1.0906454441446134e-06, + "loss": 0.2921, + "step": 15029 + }, + { + "epoch": 2.56, + "learning_rate": 1.0898093655929554e-06, + "loss": 0.3124, + "step": 15030 + }, + { + "epoch": 2.56, + "learning_rate": 1.0889735891601116e-06, + "loss": 0.3324, + "step": 15031 + }, + { + "epoch": 2.56, + "learning_rate": 1.088138114874424e-06, + "loss": 0.3145, + "step": 15032 + }, + { + "epoch": 2.56, + "learning_rate": 1.0873029427642211e-06, + "loss": 0.3074, + "step": 15033 + }, + { + "epoch": 2.56, + "learning_rate": 1.0864680728578181e-06, + "loss": 0.3077, + "step": 15034 + }, + { + "epoch": 2.56, + "learning_rate": 1.0856335051835242e-06, + "loss": 0.2995, + "step": 15035 + }, + { + "epoch": 2.56, + "learning_rate": 1.0847992397696383e-06, + "loss": 0.3031, + "step": 15036 + }, + { + "epoch": 2.56, + "learning_rate": 1.0839652766444497e-06, + "loss": 0.2914, + "step": 15037 + }, + { + "epoch": 2.56, + "learning_rate": 1.0831316158362304e-06, + "loss": 0.3281, + "step": 15038 + }, + { + "epoch": 2.56, + "learning_rate": 1.082298257373251e-06, + "loss": 0.3156, + "step": 15039 + }, + { + "epoch": 2.57, + "learning_rate": 1.0814652012837669e-06, + "loss": 0.3005, + "step": 15040 + }, + { + "epoch": 2.57, + "learning_rate": 1.0806324475960261e-06, + "loss": 0.3084, + "step": 15041 + }, + { + "epoch": 2.57, + "learning_rate": 1.0797999963382643e-06, + "loss": 0.3074, + "step": 15042 + }, + { + "epoch": 2.57, + "learning_rate": 1.0789678475387067e-06, + "loss": 0.3193, + "step": 15043 + }, + { + "epoch": 2.57, + "learning_rate": 1.0781360012255716e-06, + "loss": 0.342, + "step": 15044 + }, + { + "epoch": 2.57, + "learning_rate": 1.077304457427061e-06, + "loss": 0.285, + "step": 15045 + }, + { + "epoch": 2.57, + "learning_rate": 1.076473216171372e-06, + "loss": 0.3146, + "step": 15046 + }, + { + "epoch": 2.57, + "learning_rate": 1.0756422774866893e-06, + "loss": 0.2922, + "step": 15047 + }, + { + "epoch": 2.57, + "learning_rate": 1.074811641401189e-06, + "loss": 0.3166, + "step": 15048 + }, + { + "epoch": 2.57, + "learning_rate": 1.0739813079430328e-06, + "loss": 0.2999, + "step": 15049 + }, + { + "epoch": 2.57, + "learning_rate": 1.0731512771403764e-06, + "loss": 0.3114, + "step": 15050 + }, + { + "epoch": 2.57, + "learning_rate": 1.0723215490213635e-06, + "loss": 0.3047, + "step": 15051 + }, + { + "epoch": 2.57, + "learning_rate": 1.0714921236141306e-06, + "loss": 0.3089, + "step": 15052 + }, + { + "epoch": 2.57, + "learning_rate": 1.0706630009467954e-06, + "loss": 0.3197, + "step": 15053 + }, + { + "epoch": 2.57, + "learning_rate": 1.0698341810474755e-06, + "loss": 0.2773, + "step": 15054 + }, + { + "epoch": 2.57, + "learning_rate": 1.0690056639442714e-06, + "loss": 0.322, + "step": 15055 + }, + { + "epoch": 2.57, + "learning_rate": 1.0681774496652764e-06, + "loss": 0.3103, + "step": 15056 + }, + { + "epoch": 2.57, + "learning_rate": 1.0673495382385745e-06, + "loss": 0.3021, + "step": 15057 + }, + { + "epoch": 2.57, + "learning_rate": 1.0665219296922359e-06, + "loss": 0.2969, + "step": 15058 + }, + { + "epoch": 2.57, + "learning_rate": 1.0656946240543253e-06, + "loss": 0.3145, + "step": 15059 + }, + { + "epoch": 2.57, + "learning_rate": 1.0648676213528897e-06, + "loss": 0.309, + "step": 15060 + }, + { + "epoch": 2.57, + "learning_rate": 1.0640409216159719e-06, + "loss": 0.3094, + "step": 15061 + }, + { + "epoch": 2.57, + "learning_rate": 1.0632145248716052e-06, + "loss": 0.3095, + "step": 15062 + }, + { + "epoch": 2.57, + "learning_rate": 1.0623884311478073e-06, + "loss": 0.309, + "step": 15063 + }, + { + "epoch": 2.57, + "learning_rate": 1.061562640472593e-06, + "loss": 0.3362, + "step": 15064 + }, + { + "epoch": 2.57, + "learning_rate": 1.0607371528739573e-06, + "loss": 0.302, + "step": 15065 + }, + { + "epoch": 2.57, + "learning_rate": 1.059911968379892e-06, + "loss": 0.3199, + "step": 15066 + }, + { + "epoch": 2.57, + "learning_rate": 1.05908708701838e-06, + "loss": 0.3112, + "step": 15067 + }, + { + "epoch": 2.57, + "learning_rate": 1.0582625088173847e-06, + "loss": 0.3158, + "step": 15068 + }, + { + "epoch": 2.57, + "learning_rate": 1.0574382338048695e-06, + "loss": 0.2969, + "step": 15069 + }, + { + "epoch": 2.57, + "learning_rate": 1.0566142620087804e-06, + "loss": 0.3038, + "step": 15070 + }, + { + "epoch": 2.57, + "learning_rate": 1.055790593457059e-06, + "loss": 0.3247, + "step": 15071 + }, + { + "epoch": 2.57, + "learning_rate": 1.0549672281776313e-06, + "loss": 0.3033, + "step": 15072 + }, + { + "epoch": 2.57, + "learning_rate": 1.0541441661984153e-06, + "loss": 0.2888, + "step": 15073 + }, + { + "epoch": 2.57, + "learning_rate": 1.053321407547322e-06, + "loss": 0.3092, + "step": 15074 + }, + { + "epoch": 2.57, + "learning_rate": 1.0524989522522433e-06, + "loss": 0.3202, + "step": 15075 + }, + { + "epoch": 2.57, + "learning_rate": 1.0516768003410694e-06, + "loss": 0.2947, + "step": 15076 + }, + { + "epoch": 2.57, + "learning_rate": 1.0508549518416767e-06, + "loss": 0.3221, + "step": 15077 + }, + { + "epoch": 2.57, + "learning_rate": 1.0500334067819306e-06, + "loss": 0.3223, + "step": 15078 + }, + { + "epoch": 2.57, + "learning_rate": 1.0492121651896892e-06, + "loss": 0.3069, + "step": 15079 + }, + { + "epoch": 2.57, + "learning_rate": 1.0483912270928e-06, + "loss": 0.2994, + "step": 15080 + }, + { + "epoch": 2.57, + "learning_rate": 1.0475705925190937e-06, + "loss": 0.3077, + "step": 15081 + }, + { + "epoch": 2.57, + "learning_rate": 1.0467502614964009e-06, + "loss": 0.3249, + "step": 15082 + }, + { + "epoch": 2.57, + "learning_rate": 1.0459302340525313e-06, + "loss": 0.3059, + "step": 15083 + }, + { + "epoch": 2.57, + "learning_rate": 1.0451105102152924e-06, + "loss": 0.3143, + "step": 15084 + }, + { + "epoch": 2.57, + "learning_rate": 1.044291090012478e-06, + "loss": 0.2841, + "step": 15085 + }, + { + "epoch": 2.57, + "learning_rate": 1.0434719734718746e-06, + "loss": 0.3152, + "step": 15086 + }, + { + "epoch": 2.57, + "learning_rate": 1.042653160621253e-06, + "loss": 0.3041, + "step": 15087 + }, + { + "epoch": 2.57, + "learning_rate": 1.0418346514883782e-06, + "loss": 0.3187, + "step": 15088 + }, + { + "epoch": 2.57, + "learning_rate": 1.0410164461010053e-06, + "loss": 0.2974, + "step": 15089 + }, + { + "epoch": 2.57, + "learning_rate": 1.0401985444868735e-06, + "loss": 0.2713, + "step": 15090 + }, + { + "epoch": 2.57, + "learning_rate": 1.0393809466737182e-06, + "loss": 0.295, + "step": 15091 + }, + { + "epoch": 2.57, + "learning_rate": 1.0385636526892595e-06, + "loss": 0.3124, + "step": 15092 + }, + { + "epoch": 2.57, + "learning_rate": 1.037746662561212e-06, + "loss": 0.2879, + "step": 15093 + }, + { + "epoch": 2.57, + "learning_rate": 1.0369299763172758e-06, + "loss": 0.3092, + "step": 15094 + }, + { + "epoch": 2.57, + "learning_rate": 1.0361135939851453e-06, + "loss": 0.3425, + "step": 15095 + }, + { + "epoch": 2.57, + "learning_rate": 1.035297515592497e-06, + "loss": 0.3269, + "step": 15096 + }, + { + "epoch": 2.57, + "learning_rate": 1.034481741167004e-06, + "loss": 0.3175, + "step": 15097 + }, + { + "epoch": 2.57, + "learning_rate": 1.0336662707363287e-06, + "loss": 0.314, + "step": 15098 + }, + { + "epoch": 2.58, + "learning_rate": 1.0328511043281187e-06, + "loss": 0.317, + "step": 15099 + }, + { + "epoch": 2.58, + "learning_rate": 1.0320362419700135e-06, + "loss": 0.31, + "step": 15100 + }, + { + "epoch": 2.58, + "learning_rate": 1.0312216836896449e-06, + "loss": 0.3288, + "step": 15101 + }, + { + "epoch": 2.58, + "learning_rate": 1.0304074295146304e-06, + "loss": 0.3121, + "step": 15102 + }, + { + "epoch": 2.58, + "learning_rate": 1.0295934794725815e-06, + "loss": 0.3041, + "step": 15103 + }, + { + "epoch": 2.58, + "learning_rate": 1.0287798335910958e-06, + "loss": 0.3114, + "step": 15104 + }, + { + "epoch": 2.58, + "learning_rate": 1.0279664918977605e-06, + "loss": 0.3073, + "step": 15105 + }, + { + "epoch": 2.58, + "learning_rate": 1.0271534544201533e-06, + "loss": 0.3292, + "step": 15106 + }, + { + "epoch": 2.58, + "learning_rate": 1.0263407211858433e-06, + "loss": 0.3001, + "step": 15107 + }, + { + "epoch": 2.58, + "learning_rate": 1.0255282922223886e-06, + "loss": 0.3385, + "step": 15108 + }, + { + "epoch": 2.58, + "learning_rate": 1.0247161675573348e-06, + "loss": 0.3188, + "step": 15109 + }, + { + "epoch": 2.58, + "learning_rate": 1.0239043472182186e-06, + "loss": 0.3432, + "step": 15110 + }, + { + "epoch": 2.58, + "learning_rate": 1.0230928312325706e-06, + "loss": 0.2842, + "step": 15111 + }, + { + "epoch": 2.58, + "learning_rate": 1.0222816196279007e-06, + "loss": 0.3073, + "step": 15112 + }, + { + "epoch": 2.58, + "learning_rate": 1.021470712431719e-06, + "loss": 0.2889, + "step": 15113 + }, + { + "epoch": 2.58, + "learning_rate": 1.0206601096715185e-06, + "loss": 0.311, + "step": 15114 + }, + { + "epoch": 2.58, + "learning_rate": 1.0198498113747857e-06, + "loss": 0.3082, + "step": 15115 + }, + { + "epoch": 2.58, + "learning_rate": 1.0190398175689953e-06, + "loss": 0.3136, + "step": 15116 + }, + { + "epoch": 2.58, + "learning_rate": 1.018230128281611e-06, + "loss": 0.3088, + "step": 15117 + }, + { + "epoch": 2.58, + "learning_rate": 1.0174207435400885e-06, + "loss": 0.2972, + "step": 15118 + }, + { + "epoch": 2.58, + "learning_rate": 1.0166116633718727e-06, + "loss": 0.3239, + "step": 15119 + }, + { + "epoch": 2.58, + "learning_rate": 1.0158028878043936e-06, + "loss": 0.3251, + "step": 15120 + }, + { + "epoch": 2.58, + "learning_rate": 1.0149944168650761e-06, + "loss": 0.3002, + "step": 15121 + }, + { + "epoch": 2.58, + "learning_rate": 1.0141862505813338e-06, + "loss": 0.2976, + "step": 15122 + }, + { + "epoch": 2.58, + "learning_rate": 1.0133783889805692e-06, + "loss": 0.3239, + "step": 15123 + }, + { + "epoch": 2.58, + "learning_rate": 1.012570832090174e-06, + "loss": 0.3172, + "step": 15124 + }, + { + "epoch": 2.58, + "learning_rate": 1.0117635799375292e-06, + "loss": 0.3268, + "step": 15125 + }, + { + "epoch": 2.58, + "learning_rate": 1.0109566325500087e-06, + "loss": 0.3139, + "step": 15126 + }, + { + "epoch": 2.58, + "learning_rate": 1.0101499899549748e-06, + "loss": 0.3358, + "step": 15127 + }, + { + "epoch": 2.58, + "learning_rate": 1.0093436521797728e-06, + "loss": 0.2998, + "step": 15128 + }, + { + "epoch": 2.58, + "learning_rate": 1.0085376192517504e-06, + "loss": 0.3326, + "step": 15129 + }, + { + "epoch": 2.58, + "learning_rate": 1.0077318911982314e-06, + "loss": 0.2947, + "step": 15130 + }, + { + "epoch": 2.58, + "learning_rate": 1.0069264680465385e-06, + "loss": 0.3178, + "step": 15131 + }, + { + "epoch": 2.58, + "learning_rate": 1.0061213498239808e-06, + "loss": 0.3428, + "step": 15132 + }, + { + "epoch": 2.58, + "learning_rate": 1.0053165365578576e-06, + "loss": 0.3007, + "step": 15133 + }, + { + "epoch": 2.58, + "learning_rate": 1.0045120282754605e-06, + "loss": 0.3108, + "step": 15134 + }, + { + "epoch": 2.58, + "learning_rate": 1.0037078250040644e-06, + "loss": 0.3084, + "step": 15135 + }, + { + "epoch": 2.58, + "learning_rate": 1.0029039267709385e-06, + "loss": 0.3266, + "step": 15136 + }, + { + "epoch": 2.58, + "learning_rate": 1.0021003336033409e-06, + "loss": 0.3075, + "step": 15137 + }, + { + "epoch": 2.58, + "learning_rate": 1.0012970455285187e-06, + "loss": 0.3027, + "step": 15138 + }, + { + "epoch": 2.58, + "learning_rate": 1.0004940625737103e-06, + "loss": 0.3356, + "step": 15139 + }, + { + "epoch": 2.58, + "learning_rate": 9.996913847661415e-07, + "loss": 0.3198, + "step": 15140 + }, + { + "epoch": 2.58, + "learning_rate": 9.988890121330287e-07, + "loss": 0.3089, + "step": 15141 + }, + { + "epoch": 2.58, + "learning_rate": 9.980869447015805e-07, + "loss": 0.3185, + "step": 15142 + }, + { + "epoch": 2.58, + "learning_rate": 9.972851824989892e-07, + "loss": 0.3078, + "step": 15143 + }, + { + "epoch": 2.58, + "learning_rate": 9.964837255524407e-07, + "loss": 0.3197, + "step": 15144 + }, + { + "epoch": 2.58, + "learning_rate": 9.95682573889114e-07, + "loss": 0.3198, + "step": 15145 + }, + { + "epoch": 2.58, + "learning_rate": 9.948817275361677e-07, + "loss": 0.2959, + "step": 15146 + }, + { + "epoch": 2.58, + "learning_rate": 9.940811865207612e-07, + "loss": 0.314, + "step": 15147 + }, + { + "epoch": 2.58, + "learning_rate": 9.932809508700348e-07, + "loss": 0.3132, + "step": 15148 + }, + { + "epoch": 2.58, + "learning_rate": 9.924810206111245e-07, + "loss": 0.3043, + "step": 15149 + }, + { + "epoch": 2.58, + "learning_rate": 9.916813957711557e-07, + "loss": 0.3261, + "step": 15150 + }, + { + "epoch": 2.58, + "learning_rate": 9.908820763772364e-07, + "loss": 0.2983, + "step": 15151 + }, + { + "epoch": 2.58, + "learning_rate": 9.900830624564729e-07, + "loss": 0.3108, + "step": 15152 + }, + { + "epoch": 2.58, + "learning_rate": 9.892843540359553e-07, + "loss": 0.3153, + "step": 15153 + }, + { + "epoch": 2.58, + "learning_rate": 9.884859511427658e-07, + "loss": 0.3042, + "step": 15154 + }, + { + "epoch": 2.58, + "learning_rate": 9.87687853803977e-07, + "loss": 0.3, + "step": 15155 + }, + { + "epoch": 2.58, + "learning_rate": 9.868900620466503e-07, + "loss": 0.3294, + "step": 15156 + }, + { + "epoch": 2.58, + "learning_rate": 9.860925758978367e-07, + "loss": 0.295, + "step": 15157 + }, + { + "epoch": 2.59, + "learning_rate": 9.852953953845746e-07, + "loss": 0.3221, + "step": 15158 + }, + { + "epoch": 2.59, + "learning_rate": 9.844985205338952e-07, + "loss": 0.3019, + "step": 15159 + }, + { + "epoch": 2.59, + "learning_rate": 9.837019513728196e-07, + "loss": 0.2937, + "step": 15160 + }, + { + "epoch": 2.59, + "learning_rate": 9.82905687928355e-07, + "loss": 0.3016, + "step": 15161 + }, + { + "epoch": 2.59, + "learning_rate": 9.821097302275006e-07, + "loss": 0.3032, + "step": 15162 + }, + { + "epoch": 2.59, + "learning_rate": 9.813140782972453e-07, + "loss": 0.2928, + "step": 15163 + }, + { + "epoch": 2.59, + "learning_rate": 9.80518732164568e-07, + "loss": 0.2971, + "step": 15164 + }, + { + "epoch": 2.59, + "learning_rate": 9.79723691856439e-07, + "loss": 0.2925, + "step": 15165 + }, + { + "epoch": 2.59, + "learning_rate": 9.789289573998107e-07, + "loss": 0.3118, + "step": 15166 + }, + { + "epoch": 2.59, + "learning_rate": 9.781345288216327e-07, + "loss": 0.2995, + "step": 15167 + }, + { + "epoch": 2.59, + "learning_rate": 9.773404061488412e-07, + "loss": 0.3182, + "step": 15168 + }, + { + "epoch": 2.59, + "learning_rate": 9.765465894083637e-07, + "loss": 0.2866, + "step": 15169 + }, + { + "epoch": 2.59, + "learning_rate": 9.757530786271164e-07, + "loss": 0.3152, + "step": 15170 + }, + { + "epoch": 2.59, + "learning_rate": 9.749598738320032e-07, + "loss": 0.3191, + "step": 15171 + }, + { + "epoch": 2.59, + "learning_rate": 9.741669750499227e-07, + "loss": 0.306, + "step": 15172 + }, + { + "epoch": 2.59, + "learning_rate": 9.733743823077558e-07, + "loss": 0.3173, + "step": 15173 + }, + { + "epoch": 2.59, + "learning_rate": 9.725820956323785e-07, + "loss": 0.3145, + "step": 15174 + }, + { + "epoch": 2.59, + "learning_rate": 9.717901150506549e-07, + "loss": 0.2945, + "step": 15175 + }, + { + "epoch": 2.59, + "learning_rate": 9.709984405894413e-07, + "loss": 0.2666, + "step": 15176 + }, + { + "epoch": 2.59, + "learning_rate": 9.702070722755774e-07, + "loss": 0.2984, + "step": 15177 + }, + { + "epoch": 2.59, + "learning_rate": 9.694160101358974e-07, + "loss": 0.3161, + "step": 15178 + }, + { + "epoch": 2.59, + "learning_rate": 9.68625254197224e-07, + "loss": 0.3235, + "step": 15179 + }, + { + "epoch": 2.59, + "learning_rate": 9.678348044863717e-07, + "loss": 0.3213, + "step": 15180 + }, + { + "epoch": 2.59, + "learning_rate": 9.670446610301386e-07, + "loss": 0.3003, + "step": 15181 + }, + { + "epoch": 2.59, + "learning_rate": 9.662548238553183e-07, + "loss": 0.3225, + "step": 15182 + }, + { + "epoch": 2.59, + "learning_rate": 9.65465292988691e-07, + "loss": 0.3025, + "step": 15183 + }, + { + "epoch": 2.59, + "learning_rate": 9.646760684570277e-07, + "loss": 0.3102, + "step": 15184 + }, + { + "epoch": 2.59, + "learning_rate": 9.638871502870895e-07, + "loss": 0.299, + "step": 15185 + }, + { + "epoch": 2.59, + "learning_rate": 9.630985385056257e-07, + "loss": 0.3099, + "step": 15186 + }, + { + "epoch": 2.59, + "learning_rate": 9.623102331393785e-07, + "loss": 0.3173, + "step": 15187 + }, + { + "epoch": 2.59, + "learning_rate": 9.61522234215072e-07, + "loss": 0.304, + "step": 15188 + }, + { + "epoch": 2.59, + "learning_rate": 9.60734541759427e-07, + "loss": 0.3403, + "step": 15189 + }, + { + "epoch": 2.59, + "learning_rate": 9.599471557991525e-07, + "loss": 0.3275, + "step": 15190 + }, + { + "epoch": 2.59, + "learning_rate": 9.591600763609464e-07, + "loss": 0.3149, + "step": 15191 + }, + { + "epoch": 2.59, + "learning_rate": 9.583733034714982e-07, + "loss": 0.3376, + "step": 15192 + }, + { + "epoch": 2.59, + "learning_rate": 9.575868371574814e-07, + "loss": 0.3095, + "step": 15193 + }, + { + "epoch": 2.59, + "learning_rate": 9.56800677445565e-07, + "loss": 0.3272, + "step": 15194 + }, + { + "epoch": 2.59, + "learning_rate": 9.560148243624056e-07, + "loss": 0.3192, + "step": 15195 + }, + { + "epoch": 2.59, + "learning_rate": 9.552292779346473e-07, + "loss": 0.3146, + "step": 15196 + }, + { + "epoch": 2.59, + "learning_rate": 9.544440381889275e-07, + "loss": 0.3377, + "step": 15197 + }, + { + "epoch": 2.59, + "learning_rate": 9.536591051518696e-07, + "loss": 0.3188, + "step": 15198 + }, + { + "epoch": 2.59, + "learning_rate": 9.52874478850091e-07, + "loss": 0.292, + "step": 15199 + }, + { + "epoch": 2.59, + "learning_rate": 9.520901593101939e-07, + "loss": 0.3347, + "step": 15200 + }, + { + "epoch": 2.59, + "learning_rate": 9.513061465587736e-07, + "loss": 0.2969, + "step": 15201 + }, + { + "epoch": 2.59, + "learning_rate": 9.505224406224144e-07, + "loss": 0.3239, + "step": 15202 + }, + { + "epoch": 2.59, + "learning_rate": 9.497390415276875e-07, + "loss": 0.3075, + "step": 15203 + }, + { + "epoch": 2.59, + "learning_rate": 9.489559493011558e-07, + "loss": 0.3258, + "step": 15204 + }, + { + "epoch": 2.59, + "learning_rate": 9.481731639693725e-07, + "loss": 0.3107, + "step": 15205 + }, + { + "epoch": 2.59, + "learning_rate": 9.473906855588799e-07, + "loss": 0.294, + "step": 15206 + }, + { + "epoch": 2.59, + "learning_rate": 9.466085140962111e-07, + "loss": 0.3077, + "step": 15207 + }, + { + "epoch": 2.59, + "learning_rate": 9.458266496078838e-07, + "loss": 0.3132, + "step": 15208 + }, + { + "epoch": 2.59, + "learning_rate": 9.450450921204102e-07, + "loss": 0.3176, + "step": 15209 + }, + { + "epoch": 2.59, + "learning_rate": 9.442638416602923e-07, + "loss": 0.3044, + "step": 15210 + }, + { + "epoch": 2.59, + "learning_rate": 9.434828982540168e-07, + "loss": 0.3004, + "step": 15211 + }, + { + "epoch": 2.59, + "learning_rate": 9.427022619280657e-07, + "loss": 0.319, + "step": 15212 + }, + { + "epoch": 2.59, + "learning_rate": 9.419219327089068e-07, + "loss": 0.2943, + "step": 15213 + }, + { + "epoch": 2.59, + "learning_rate": 9.41141910622999e-07, + "loss": 0.3203, + "step": 15214 + }, + { + "epoch": 2.59, + "learning_rate": 9.403621956967923e-07, + "loss": 0.3042, + "step": 15215 + }, + { + "epoch": 2.6, + "learning_rate": 9.395827879567221e-07, + "loss": 0.3208, + "step": 15216 + }, + { + "epoch": 2.6, + "learning_rate": 9.388036874292205e-07, + "loss": 0.303, + "step": 15217 + }, + { + "epoch": 2.6, + "learning_rate": 9.380248941406977e-07, + "loss": 0.3136, + "step": 15218 + }, + { + "epoch": 2.6, + "learning_rate": 9.372464081175648e-07, + "loss": 0.3272, + "step": 15219 + }, + { + "epoch": 2.6, + "learning_rate": 9.36468229386216e-07, + "loss": 0.3128, + "step": 15220 + }, + { + "epoch": 2.6, + "learning_rate": 9.356903579730392e-07, + "loss": 0.2961, + "step": 15221 + }, + { + "epoch": 2.6, + "learning_rate": 9.349127939044079e-07, + "loss": 0.2976, + "step": 15222 + }, + { + "epoch": 2.6, + "learning_rate": 9.341355372066896e-07, + "loss": 0.3162, + "step": 15223 + }, + { + "epoch": 2.6, + "learning_rate": 9.333585879062357e-07, + "loss": 0.3047, + "step": 15224 + }, + { + "epoch": 2.6, + "learning_rate": 9.325819460293939e-07, + "loss": 0.3118, + "step": 15225 + }, + { + "epoch": 2.6, + "learning_rate": 9.318056116024932e-07, + "loss": 0.3033, + "step": 15226 + }, + { + "epoch": 2.6, + "learning_rate": 9.310295846518591e-07, + "loss": 0.3134, + "step": 15227 + }, + { + "epoch": 2.6, + "learning_rate": 9.30253865203804e-07, + "loss": 0.304, + "step": 15228 + }, + { + "epoch": 2.6, + "learning_rate": 9.294784532846313e-07, + "loss": 0.3, + "step": 15229 + }, + { + "epoch": 2.6, + "learning_rate": 9.287033489206332e-07, + "loss": 0.3223, + "step": 15230 + }, + { + "epoch": 2.6, + "learning_rate": 9.279285521380898e-07, + "loss": 0.3134, + "step": 15231 + }, + { + "epoch": 2.6, + "learning_rate": 9.271540629632758e-07, + "loss": 0.2932, + "step": 15232 + }, + { + "epoch": 2.6, + "learning_rate": 9.263798814224467e-07, + "loss": 0.3204, + "step": 15233 + }, + { + "epoch": 2.6, + "learning_rate": 9.25606007541856e-07, + "loss": 0.3056, + "step": 15234 + }, + { + "epoch": 2.6, + "learning_rate": 9.248324413477416e-07, + "loss": 0.3448, + "step": 15235 + }, + { + "epoch": 2.6, + "learning_rate": 9.240591828663347e-07, + "loss": 0.2904, + "step": 15236 + }, + { + "epoch": 2.6, + "learning_rate": 9.232862321238534e-07, + "loss": 0.3181, + "step": 15237 + }, + { + "epoch": 2.6, + "learning_rate": 9.225135891465064e-07, + "loss": 0.3111, + "step": 15238 + }, + { + "epoch": 2.6, + "learning_rate": 9.217412539604942e-07, + "loss": 0.2767, + "step": 15239 + }, + { + "epoch": 2.6, + "learning_rate": 9.20969226591999e-07, + "loss": 0.3248, + "step": 15240 + }, + { + "epoch": 2.6, + "learning_rate": 9.201975070672042e-07, + "loss": 0.3212, + "step": 15241 + }, + { + "epoch": 2.6, + "learning_rate": 9.194260954122702e-07, + "loss": 0.3062, + "step": 15242 + }, + { + "epoch": 2.6, + "learning_rate": 9.186549916533582e-07, + "loss": 0.3182, + "step": 15243 + }, + { + "epoch": 2.6, + "learning_rate": 9.178841958166118e-07, + "loss": 0.3336, + "step": 15244 + }, + { + "epoch": 2.6, + "learning_rate": 9.171137079281667e-07, + "loss": 0.3386, + "step": 15245 + }, + { + "epoch": 2.6, + "learning_rate": 9.163435280141486e-07, + "loss": 0.2989, + "step": 15246 + }, + { + "epoch": 2.6, + "learning_rate": 9.155736561006734e-07, + "loss": 0.3308, + "step": 15247 + }, + { + "epoch": 2.6, + "learning_rate": 9.148040922138424e-07, + "loss": 0.3219, + "step": 15248 + }, + { + "epoch": 2.6, + "learning_rate": 9.140348363797502e-07, + "loss": 0.2963, + "step": 15249 + }, + { + "epoch": 2.6, + "learning_rate": 9.132658886244805e-07, + "loss": 0.3119, + "step": 15250 + }, + { + "epoch": 2.6, + "learning_rate": 9.124972489741069e-07, + "loss": 0.3134, + "step": 15251 + }, + { + "epoch": 2.6, + "learning_rate": 9.117289174546895e-07, + "loss": 0.3085, + "step": 15252 + }, + { + "epoch": 2.6, + "learning_rate": 9.109608940922821e-07, + "loss": 0.2978, + "step": 15253 + }, + { + "epoch": 2.6, + "learning_rate": 9.10193178912927e-07, + "loss": 0.3171, + "step": 15254 + }, + { + "epoch": 2.6, + "learning_rate": 9.094257719426525e-07, + "loss": 0.299, + "step": 15255 + }, + { + "epoch": 2.6, + "learning_rate": 9.086586732074809e-07, + "loss": 0.3097, + "step": 15256 + }, + { + "epoch": 2.6, + "learning_rate": 9.078918827334227e-07, + "loss": 0.288, + "step": 15257 + }, + { + "epoch": 2.6, + "learning_rate": 9.071254005464758e-07, + "loss": 0.3101, + "step": 15258 + }, + { + "epoch": 2.6, + "learning_rate": 9.063592266726295e-07, + "loss": 0.3166, + "step": 15259 + }, + { + "epoch": 2.6, + "learning_rate": 9.055933611378631e-07, + "loss": 0.3275, + "step": 15260 + }, + { + "epoch": 2.6, + "learning_rate": 9.048278039681457e-07, + "loss": 0.2944, + "step": 15261 + }, + { + "epoch": 2.6, + "learning_rate": 9.040625551894367e-07, + "loss": 0.3206, + "step": 15262 + }, + { + "epoch": 2.6, + "learning_rate": 9.032976148276795e-07, + "loss": 0.3359, + "step": 15263 + }, + { + "epoch": 2.6, + "learning_rate": 9.025329829088125e-07, + "loss": 0.2989, + "step": 15264 + }, + { + "epoch": 2.6, + "learning_rate": 9.017686594587627e-07, + "loss": 0.3055, + "step": 15265 + }, + { + "epoch": 2.6, + "learning_rate": 9.010046445034459e-07, + "loss": 0.3067, + "step": 15266 + }, + { + "epoch": 2.6, + "learning_rate": 9.002409380687671e-07, + "loss": 0.2981, + "step": 15267 + }, + { + "epoch": 2.6, + "learning_rate": 8.994775401806221e-07, + "loss": 0.3373, + "step": 15268 + }, + { + "epoch": 2.6, + "learning_rate": 8.98714450864896e-07, + "loss": 0.3375, + "step": 15269 + }, + { + "epoch": 2.6, + "learning_rate": 8.979516701474645e-07, + "loss": 0.3018, + "step": 15270 + }, + { + "epoch": 2.6, + "learning_rate": 8.971891980541858e-07, + "loss": 0.3241, + "step": 15271 + }, + { + "epoch": 2.6, + "learning_rate": 8.964270346109194e-07, + "loss": 0.2985, + "step": 15272 + }, + { + "epoch": 2.6, + "learning_rate": 8.956651798435024e-07, + "loss": 0.315, + "step": 15273 + }, + { + "epoch": 2.6, + "learning_rate": 8.949036337777706e-07, + "loss": 0.3029, + "step": 15274 + }, + { + "epoch": 2.61, + "learning_rate": 8.941423964395446e-07, + "loss": 0.306, + "step": 15275 + }, + { + "epoch": 2.61, + "learning_rate": 8.93381467854636e-07, + "loss": 0.32, + "step": 15276 + }, + { + "epoch": 2.61, + "learning_rate": 8.926208480488474e-07, + "loss": 0.3118, + "step": 15277 + }, + { + "epoch": 2.61, + "learning_rate": 8.918605370479671e-07, + "loss": 0.3137, + "step": 15278 + }, + { + "epoch": 2.61, + "learning_rate": 8.911005348777746e-07, + "loss": 0.329, + "step": 15279 + }, + { + "epoch": 2.61, + "learning_rate": 8.903408415640402e-07, + "loss": 0.3175, + "step": 15280 + }, + { + "epoch": 2.61, + "learning_rate": 8.895814571325234e-07, + "loss": 0.293, + "step": 15281 + }, + { + "epoch": 2.61, + "learning_rate": 8.888223816089725e-07, + "loss": 0.295, + "step": 15282 + }, + { + "epoch": 2.61, + "learning_rate": 8.880636150191257e-07, + "loss": 0.3254, + "step": 15283 + }, + { + "epoch": 2.61, + "learning_rate": 8.873051573887103e-07, + "loss": 0.3071, + "step": 15284 + }, + { + "epoch": 2.61, + "learning_rate": 8.865470087434447e-07, + "loss": 0.2897, + "step": 15285 + }, + { + "epoch": 2.61, + "learning_rate": 8.857891691090336e-07, + "loss": 0.3053, + "step": 15286 + }, + { + "epoch": 2.61, + "learning_rate": 8.850316385111735e-07, + "loss": 0.3184, + "step": 15287 + }, + { + "epoch": 2.61, + "learning_rate": 8.842744169755512e-07, + "loss": 0.3148, + "step": 15288 + }, + { + "epoch": 2.61, + "learning_rate": 8.835175045278399e-07, + "loss": 0.3296, + "step": 15289 + }, + { + "epoch": 2.61, + "learning_rate": 8.827609011937066e-07, + "loss": 0.3369, + "step": 15290 + }, + { + "epoch": 2.61, + "learning_rate": 8.820046069988031e-07, + "loss": 0.297, + "step": 15291 + }, + { + "epoch": 2.61, + "learning_rate": 8.812486219687766e-07, + "loss": 0.2993, + "step": 15292 + }, + { + "epoch": 2.61, + "learning_rate": 8.804929461292566e-07, + "loss": 0.3365, + "step": 15293 + }, + { + "epoch": 2.61, + "learning_rate": 8.797375795058683e-07, + "loss": 0.324, + "step": 15294 + }, + { + "epoch": 2.61, + "learning_rate": 8.78982522124222e-07, + "loss": 0.3096, + "step": 15295 + }, + { + "epoch": 2.61, + "learning_rate": 8.782277740099221e-07, + "loss": 0.3015, + "step": 15296 + }, + { + "epoch": 2.61, + "learning_rate": 8.774733351885578e-07, + "loss": 0.3125, + "step": 15297 + }, + { + "epoch": 2.61, + "learning_rate": 8.76719205685711e-07, + "loss": 0.3067, + "step": 15298 + }, + { + "epoch": 2.61, + "learning_rate": 8.759653855269512e-07, + "loss": 0.3001, + "step": 15299 + }, + { + "epoch": 2.61, + "learning_rate": 8.752118747378413e-07, + "loss": 0.3163, + "step": 15300 + }, + { + "epoch": 2.61, + "learning_rate": 8.744586733439265e-07, + "loss": 0.3118, + "step": 15301 + }, + { + "epoch": 2.61, + "learning_rate": 8.737057813707473e-07, + "loss": 0.3101, + "step": 15302 + }, + { + "epoch": 2.61, + "learning_rate": 8.729531988438322e-07, + "loss": 0.3129, + "step": 15303 + }, + { + "epoch": 2.61, + "learning_rate": 8.722009257887009e-07, + "loss": 0.3085, + "step": 15304 + }, + { + "epoch": 2.61, + "learning_rate": 8.714489622308575e-07, + "loss": 0.2973, + "step": 15305 + }, + { + "epoch": 2.61, + "learning_rate": 8.706973081957992e-07, + "loss": 0.3205, + "step": 15306 + }, + { + "epoch": 2.61, + "learning_rate": 8.699459637090169e-07, + "loss": 0.3267, + "step": 15307 + }, + { + "epoch": 2.61, + "learning_rate": 8.691949287959811e-07, + "loss": 0.2925, + "step": 15308 + }, + { + "epoch": 2.61, + "learning_rate": 8.684442034821594e-07, + "loss": 0.316, + "step": 15309 + }, + { + "epoch": 2.61, + "learning_rate": 8.676937877930059e-07, + "loss": 0.3087, + "step": 15310 + }, + { + "epoch": 2.61, + "learning_rate": 8.669436817539668e-07, + "loss": 0.2868, + "step": 15311 + }, + { + "epoch": 2.61, + "learning_rate": 8.66193885390475e-07, + "loss": 0.3102, + "step": 15312 + }, + { + "epoch": 2.61, + "learning_rate": 8.654443987279537e-07, + "loss": 0.3012, + "step": 15313 + }, + { + "epoch": 2.61, + "learning_rate": 8.646952217918159e-07, + "loss": 0.3034, + "step": 15314 + }, + { + "epoch": 2.61, + "learning_rate": 8.639463546074666e-07, + "loss": 0.3353, + "step": 15315 + }, + { + "epoch": 2.61, + "learning_rate": 8.631977972002936e-07, + "loss": 0.3057, + "step": 15316 + }, + { + "epoch": 2.61, + "learning_rate": 8.624495495956797e-07, + "loss": 0.316, + "step": 15317 + }, + { + "epoch": 2.61, + "learning_rate": 8.617016118189958e-07, + "loss": 0.3049, + "step": 15318 + }, + { + "epoch": 2.61, + "learning_rate": 8.60953983895606e-07, + "loss": 0.3351, + "step": 15319 + }, + { + "epoch": 2.61, + "learning_rate": 8.602066658508535e-07, + "loss": 0.315, + "step": 15320 + }, + { + "epoch": 2.61, + "learning_rate": 8.594596577100822e-07, + "loss": 0.3106, + "step": 15321 + }, + { + "epoch": 2.61, + "learning_rate": 8.58712959498621e-07, + "loss": 0.3252, + "step": 15322 + }, + { + "epoch": 2.61, + "learning_rate": 8.579665712417861e-07, + "loss": 0.3181, + "step": 15323 + }, + { + "epoch": 2.61, + "learning_rate": 8.572204929648864e-07, + "loss": 0.3106, + "step": 15324 + }, + { + "epoch": 2.61, + "learning_rate": 8.564747246932181e-07, + "loss": 0.3187, + "step": 15325 + }, + { + "epoch": 2.61, + "learning_rate": 8.5572926645207e-07, + "loss": 0.3028, + "step": 15326 + }, + { + "epoch": 2.61, + "learning_rate": 8.549841182667185e-07, + "loss": 0.3369, + "step": 15327 + }, + { + "epoch": 2.61, + "learning_rate": 8.542392801624278e-07, + "loss": 0.2927, + "step": 15328 + }, + { + "epoch": 2.61, + "learning_rate": 8.534947521644543e-07, + "loss": 0.3212, + "step": 15329 + }, + { + "epoch": 2.61, + "learning_rate": 8.527505342980435e-07, + "loss": 0.3472, + "step": 15330 + }, + { + "epoch": 2.61, + "learning_rate": 8.520066265884275e-07, + "loss": 0.3267, + "step": 15331 + }, + { + "epoch": 2.61, + "learning_rate": 8.512630290608315e-07, + "loss": 0.3058, + "step": 15332 + }, + { + "epoch": 2.61, + "learning_rate": 8.505197417404687e-07, + "loss": 0.2846, + "step": 15333 + }, + { + "epoch": 2.62, + "learning_rate": 8.497767646525413e-07, + "loss": 0.319, + "step": 15334 + }, + { + "epoch": 2.62, + "learning_rate": 8.490340978222433e-07, + "loss": 0.329, + "step": 15335 + }, + { + "epoch": 2.62, + "learning_rate": 8.482917412747527e-07, + "loss": 0.3366, + "step": 15336 + }, + { + "epoch": 2.62, + "learning_rate": 8.475496950352436e-07, + "loss": 0.2886, + "step": 15337 + }, + { + "epoch": 2.62, + "learning_rate": 8.46807959128878e-07, + "loss": 0.3207, + "step": 15338 + }, + { + "epoch": 2.62, + "learning_rate": 8.460665335808016e-07, + "loss": 0.3167, + "step": 15339 + }, + { + "epoch": 2.62, + "learning_rate": 8.453254184161564e-07, + "loss": 0.3378, + "step": 15340 + }, + { + "epoch": 2.62, + "learning_rate": 8.445846136600721e-07, + "loss": 0.2989, + "step": 15341 + }, + { + "epoch": 2.62, + "learning_rate": 8.438441193376656e-07, + "loss": 0.3372, + "step": 15342 + }, + { + "epoch": 2.62, + "learning_rate": 8.431039354740467e-07, + "loss": 0.314, + "step": 15343 + }, + { + "epoch": 2.62, + "learning_rate": 8.423640620943119e-07, + "loss": 0.2885, + "step": 15344 + }, + { + "epoch": 2.62, + "learning_rate": 8.41624499223549e-07, + "loss": 0.3408, + "step": 15345 + }, + { + "epoch": 2.62, + "learning_rate": 8.408852468868334e-07, + "loss": 0.3073, + "step": 15346 + }, + { + "epoch": 2.62, + "learning_rate": 8.401463051092307e-07, + "loss": 0.2849, + "step": 15347 + }, + { + "epoch": 2.62, + "learning_rate": 8.394076739157975e-07, + "loss": 0.3167, + "step": 15348 + }, + { + "epoch": 2.62, + "learning_rate": 8.386693533315771e-07, + "loss": 0.3267, + "step": 15349 + }, + { + "epoch": 2.62, + "learning_rate": 8.379313433816061e-07, + "loss": 0.3378, + "step": 15350 + }, + { + "epoch": 2.62, + "learning_rate": 8.371936440909078e-07, + "loss": 0.3163, + "step": 15351 + }, + { + "epoch": 2.62, + "learning_rate": 8.364562554844935e-07, + "loss": 0.2831, + "step": 15352 + }, + { + "epoch": 2.62, + "learning_rate": 8.357191775873685e-07, + "loss": 0.3124, + "step": 15353 + }, + { + "epoch": 2.62, + "learning_rate": 8.349824104245229e-07, + "loss": 0.3234, + "step": 15354 + }, + { + "epoch": 2.62, + "learning_rate": 8.34245954020938e-07, + "loss": 0.3118, + "step": 15355 + }, + { + "epoch": 2.62, + "learning_rate": 8.335098084015858e-07, + "loss": 0.2929, + "step": 15356 + }, + { + "epoch": 2.62, + "learning_rate": 8.327739735914275e-07, + "loss": 0.298, + "step": 15357 + }, + { + "epoch": 2.62, + "learning_rate": 8.320384496154122e-07, + "loss": 0.3148, + "step": 15358 + }, + { + "epoch": 2.62, + "learning_rate": 8.313032364984797e-07, + "loss": 0.3164, + "step": 15359 + }, + { + "epoch": 2.62, + "learning_rate": 8.305683342655613e-07, + "loss": 0.2976, + "step": 15360 + }, + { + "epoch": 2.62, + "learning_rate": 8.298337429415714e-07, + "loss": 0.3028, + "step": 15361 + }, + { + "epoch": 2.62, + "learning_rate": 8.290994625514203e-07, + "loss": 0.3212, + "step": 15362 + }, + { + "epoch": 2.62, + "learning_rate": 8.283654931200036e-07, + "loss": 0.3094, + "step": 15363 + }, + { + "epoch": 2.62, + "learning_rate": 8.276318346722089e-07, + "loss": 0.3103, + "step": 15364 + }, + { + "epoch": 2.62, + "learning_rate": 8.268984872329145e-07, + "loss": 0.328, + "step": 15365 + }, + { + "epoch": 2.62, + "learning_rate": 8.261654508269845e-07, + "loss": 0.3272, + "step": 15366 + }, + { + "epoch": 2.62, + "learning_rate": 8.254327254792726e-07, + "loss": 0.3196, + "step": 15367 + }, + { + "epoch": 2.62, + "learning_rate": 8.247003112146246e-07, + "loss": 0.2996, + "step": 15368 + }, + { + "epoch": 2.62, + "learning_rate": 8.239682080578759e-07, + "loss": 0.3022, + "step": 15369 + }, + { + "epoch": 2.62, + "learning_rate": 8.232364160338479e-07, + "loss": 0.3362, + "step": 15370 + }, + { + "epoch": 2.62, + "learning_rate": 8.22504935167353e-07, + "loss": 0.306, + "step": 15371 + }, + { + "epoch": 2.62, + "learning_rate": 8.217737654831959e-07, + "loss": 0.3189, + "step": 15372 + }, + { + "epoch": 2.62, + "learning_rate": 8.210429070061676e-07, + "loss": 0.3129, + "step": 15373 + }, + { + "epoch": 2.62, + "learning_rate": 8.203123597610485e-07, + "loss": 0.3308, + "step": 15374 + }, + { + "epoch": 2.62, + "learning_rate": 8.195821237726132e-07, + "loss": 0.3054, + "step": 15375 + }, + { + "epoch": 2.62, + "learning_rate": 8.188521990656162e-07, + "loss": 0.3051, + "step": 15376 + }, + { + "epoch": 2.62, + "learning_rate": 8.1812258566481e-07, + "loss": 0.3158, + "step": 15377 + }, + { + "epoch": 2.62, + "learning_rate": 8.173932835949339e-07, + "loss": 0.3091, + "step": 15378 + }, + { + "epoch": 2.62, + "learning_rate": 8.166642928807145e-07, + "loss": 0.3083, + "step": 15379 + }, + { + "epoch": 2.62, + "learning_rate": 8.159356135468721e-07, + "loss": 0.3577, + "step": 15380 + }, + { + "epoch": 2.62, + "learning_rate": 8.152072456181137e-07, + "loss": 0.3074, + "step": 15381 + }, + { + "epoch": 2.62, + "learning_rate": 8.144791891191373e-07, + "loss": 0.3237, + "step": 15382 + }, + { + "epoch": 2.62, + "learning_rate": 8.137514440746263e-07, + "loss": 0.2992, + "step": 15383 + }, + { + "epoch": 2.62, + "learning_rate": 8.13024010509258e-07, + "loss": 0.3263, + "step": 15384 + }, + { + "epoch": 2.62, + "learning_rate": 8.122968884476967e-07, + "loss": 0.3076, + "step": 15385 + }, + { + "epoch": 2.62, + "learning_rate": 8.115700779145975e-07, + "loss": 0.3278, + "step": 15386 + }, + { + "epoch": 2.62, + "learning_rate": 8.108435789346048e-07, + "loss": 0.3187, + "step": 15387 + }, + { + "epoch": 2.62, + "learning_rate": 8.101173915323513e-07, + "loss": 0.3019, + "step": 15388 + }, + { + "epoch": 2.62, + "learning_rate": 8.093915157324606e-07, + "loss": 0.3112, + "step": 15389 + }, + { + "epoch": 2.62, + "learning_rate": 8.086659515595475e-07, + "loss": 0.3252, + "step": 15390 + }, + { + "epoch": 2.62, + "learning_rate": 8.079406990382077e-07, + "loss": 0.2858, + "step": 15391 + }, + { + "epoch": 2.63, + "learning_rate": 8.072157581930373e-07, + "loss": 0.3147, + "step": 15392 + }, + { + "epoch": 2.63, + "learning_rate": 8.064911290486155e-07, + "loss": 0.2969, + "step": 15393 + }, + { + "epoch": 2.63, + "learning_rate": 8.057668116295115e-07, + "loss": 0.2955, + "step": 15394 + }, + { + "epoch": 2.63, + "learning_rate": 8.050428059602866e-07, + "loss": 0.289, + "step": 15395 + }, + { + "epoch": 2.63, + "learning_rate": 8.043191120654891e-07, + "loss": 0.2896, + "step": 15396 + }, + { + "epoch": 2.63, + "learning_rate": 8.035957299696584e-07, + "loss": 0.2928, + "step": 15397 + }, + { + "epoch": 2.63, + "learning_rate": 8.028726596973191e-07, + "loss": 0.3135, + "step": 15398 + }, + { + "epoch": 2.63, + "learning_rate": 8.021499012729905e-07, + "loss": 0.2841, + "step": 15399 + }, + { + "epoch": 2.63, + "learning_rate": 8.014274547211809e-07, + "loss": 0.2819, + "step": 15400 + }, + { + "epoch": 2.63, + "learning_rate": 8.007053200663828e-07, + "loss": 0.3167, + "step": 15401 + }, + { + "epoch": 2.63, + "learning_rate": 7.999834973330844e-07, + "loss": 0.304, + "step": 15402 + }, + { + "epoch": 2.63, + "learning_rate": 7.992619865457584e-07, + "loss": 0.2964, + "step": 15403 + }, + { + "epoch": 2.63, + "learning_rate": 7.98540787728872e-07, + "loss": 0.2931, + "step": 15404 + }, + { + "epoch": 2.63, + "learning_rate": 7.978199009068776e-07, + "loss": 0.2917, + "step": 15405 + }, + { + "epoch": 2.63, + "learning_rate": 7.970993261042171e-07, + "loss": 0.3151, + "step": 15406 + }, + { + "epoch": 2.63, + "learning_rate": 7.96379063345325e-07, + "loss": 0.2962, + "step": 15407 + }, + { + "epoch": 2.63, + "learning_rate": 7.956591126546209e-07, + "loss": 0.3434, + "step": 15408 + }, + { + "epoch": 2.63, + "learning_rate": 7.949394740565197e-07, + "loss": 0.3333, + "step": 15409 + }, + { + "epoch": 2.63, + "learning_rate": 7.942201475754196e-07, + "loss": 0.3168, + "step": 15410 + }, + { + "epoch": 2.63, + "learning_rate": 7.935011332357113e-07, + "loss": 0.3039, + "step": 15411 + }, + { + "epoch": 2.63, + "learning_rate": 7.92782431061776e-07, + "loss": 0.3395, + "step": 15412 + }, + { + "epoch": 2.63, + "learning_rate": 7.920640410779834e-07, + "loss": 0.3194, + "step": 15413 + }, + { + "epoch": 2.63, + "learning_rate": 7.913459633086884e-07, + "loss": 0.2865, + "step": 15414 + }, + { + "epoch": 2.63, + "learning_rate": 7.906281977782427e-07, + "loss": 0.3075, + "step": 15415 + }, + { + "epoch": 2.63, + "learning_rate": 7.899107445109799e-07, + "loss": 0.2906, + "step": 15416 + }, + { + "epoch": 2.63, + "learning_rate": 7.891936035312287e-07, + "loss": 0.2969, + "step": 15417 + }, + { + "epoch": 2.63, + "learning_rate": 7.884767748633048e-07, + "loss": 0.3024, + "step": 15418 + }, + { + "epoch": 2.63, + "learning_rate": 7.877602585315159e-07, + "loss": 0.3171, + "step": 15419 + }, + { + "epoch": 2.63, + "learning_rate": 7.870440545601565e-07, + "loss": 0.2968, + "step": 15420 + }, + { + "epoch": 2.63, + "learning_rate": 7.863281629735075e-07, + "loss": 0.3155, + "step": 15421 + }, + { + "epoch": 2.63, + "learning_rate": 7.856125837958462e-07, + "loss": 0.3088, + "step": 15422 + }, + { + "epoch": 2.63, + "learning_rate": 7.848973170514341e-07, + "loss": 0.3091, + "step": 15423 + }, + { + "epoch": 2.63, + "learning_rate": 7.841823627645251e-07, + "loss": 0.3299, + "step": 15424 + }, + { + "epoch": 2.63, + "learning_rate": 7.8346772095936e-07, + "loss": 0.3102, + "step": 15425 + }, + { + "epoch": 2.63, + "learning_rate": 7.827533916601715e-07, + "loss": 0.2787, + "step": 15426 + }, + { + "epoch": 2.63, + "learning_rate": 7.820393748911792e-07, + "loss": 0.3034, + "step": 15427 + }, + { + "epoch": 2.63, + "learning_rate": 7.813256706765959e-07, + "loss": 0.3286, + "step": 15428 + }, + { + "epoch": 2.63, + "learning_rate": 7.806122790406178e-07, + "loss": 0.3406, + "step": 15429 + }, + { + "epoch": 2.63, + "learning_rate": 7.798992000074346e-07, + "loss": 0.2992, + "step": 15430 + }, + { + "epoch": 2.63, + "learning_rate": 7.79186433601229e-07, + "loss": 0.3168, + "step": 15431 + }, + { + "epoch": 2.63, + "learning_rate": 7.784739798461627e-07, + "loss": 0.3119, + "step": 15432 + }, + { + "epoch": 2.63, + "learning_rate": 7.777618387663955e-07, + "loss": 0.2953, + "step": 15433 + }, + { + "epoch": 2.63, + "learning_rate": 7.770500103860745e-07, + "loss": 0.3084, + "step": 15434 + }, + { + "epoch": 2.63, + "learning_rate": 7.763384947293373e-07, + "loss": 0.3173, + "step": 15435 + }, + { + "epoch": 2.63, + "learning_rate": 7.756272918203056e-07, + "loss": 0.2916, + "step": 15436 + }, + { + "epoch": 2.63, + "learning_rate": 7.749164016830968e-07, + "loss": 0.3106, + "step": 15437 + }, + { + "epoch": 2.63, + "learning_rate": 7.742058243418138e-07, + "loss": 0.3289, + "step": 15438 + }, + { + "epoch": 2.63, + "learning_rate": 7.734955598205518e-07, + "loss": 0.327, + "step": 15439 + }, + { + "epoch": 2.63, + "learning_rate": 7.727856081433916e-07, + "loss": 0.3127, + "step": 15440 + }, + { + "epoch": 2.63, + "learning_rate": 7.720759693344082e-07, + "loss": 0.3151, + "step": 15441 + }, + { + "epoch": 2.63, + "learning_rate": 7.713666434176614e-07, + "loss": 0.3145, + "step": 15442 + }, + { + "epoch": 2.63, + "learning_rate": 7.706576304172054e-07, + "loss": 0.3271, + "step": 15443 + }, + { + "epoch": 2.63, + "learning_rate": 7.699489303570762e-07, + "loss": 0.3104, + "step": 15444 + }, + { + "epoch": 2.63, + "learning_rate": 7.69240543261307e-07, + "loss": 0.3402, + "step": 15445 + }, + { + "epoch": 2.63, + "learning_rate": 7.685324691539153e-07, + "loss": 0.3158, + "step": 15446 + }, + { + "epoch": 2.63, + "learning_rate": 7.67824708058913e-07, + "loss": 0.3189, + "step": 15447 + }, + { + "epoch": 2.63, + "learning_rate": 7.671172600002941e-07, + "loss": 0.3149, + "step": 15448 + }, + { + "epoch": 2.63, + "learning_rate": 7.664101250020484e-07, + "loss": 0.3098, + "step": 15449 + }, + { + "epoch": 2.63, + "learning_rate": 7.657033030881545e-07, + "loss": 0.3197, + "step": 15450 + }, + { + "epoch": 2.64, + "learning_rate": 7.649967942825742e-07, + "loss": 0.291, + "step": 15451 + }, + { + "epoch": 2.64, + "learning_rate": 7.642905986092652e-07, + "loss": 0.2943, + "step": 15452 + }, + { + "epoch": 2.64, + "learning_rate": 7.635847160921739e-07, + "loss": 0.3369, + "step": 15453 + }, + { + "epoch": 2.64, + "learning_rate": 7.628791467552332e-07, + "loss": 0.3094, + "step": 15454 + }, + { + "epoch": 2.64, + "learning_rate": 7.621738906223686e-07, + "loss": 0.3105, + "step": 15455 + }, + { + "epoch": 2.64, + "learning_rate": 7.614689477174908e-07, + "loss": 0.3085, + "step": 15456 + }, + { + "epoch": 2.64, + "learning_rate": 7.607643180645052e-07, + "loss": 0.2993, + "step": 15457 + }, + { + "epoch": 2.64, + "learning_rate": 7.600600016873039e-07, + "loss": 0.2874, + "step": 15458 + }, + { + "epoch": 2.64, + "learning_rate": 7.593559986097642e-07, + "loss": 0.3296, + "step": 15459 + }, + { + "epoch": 2.64, + "learning_rate": 7.586523088557595e-07, + "loss": 0.3282, + "step": 15460 + }, + { + "epoch": 2.64, + "learning_rate": 7.579489324491496e-07, + "loss": 0.3019, + "step": 15461 + }, + { + "epoch": 2.64, + "learning_rate": 7.572458694137863e-07, + "loss": 0.3307, + "step": 15462 + }, + { + "epoch": 2.64, + "learning_rate": 7.56543119773504e-07, + "loss": 0.3035, + "step": 15463 + }, + { + "epoch": 2.64, + "learning_rate": 7.558406835521337e-07, + "loss": 0.2996, + "step": 15464 + }, + { + "epoch": 2.64, + "learning_rate": 7.551385607734929e-07, + "loss": 0.3143, + "step": 15465 + }, + { + "epoch": 2.64, + "learning_rate": 7.54436751461387e-07, + "loss": 0.3106, + "step": 15466 + }, + { + "epoch": 2.64, + "learning_rate": 7.537352556396138e-07, + "loss": 0.3052, + "step": 15467 + }, + { + "epoch": 2.64, + "learning_rate": 7.530340733319585e-07, + "loss": 0.3074, + "step": 15468 + }, + { + "epoch": 2.64, + "learning_rate": 7.523332045621956e-07, + "loss": 0.301, + "step": 15469 + }, + { + "epoch": 2.64, + "learning_rate": 7.516326493540905e-07, + "loss": 0.3166, + "step": 15470 + }, + { + "epoch": 2.64, + "learning_rate": 7.509324077313962e-07, + "loss": 0.3373, + "step": 15471 + }, + { + "epoch": 2.64, + "learning_rate": 7.502324797178573e-07, + "loss": 0.3224, + "step": 15472 + }, + { + "epoch": 2.64, + "learning_rate": 7.49532865337207e-07, + "loss": 0.3161, + "step": 15473 + }, + { + "epoch": 2.64, + "learning_rate": 7.488335646131628e-07, + "loss": 0.2964, + "step": 15474 + }, + { + "epoch": 2.64, + "learning_rate": 7.481345775694393e-07, + "loss": 0.3323, + "step": 15475 + }, + { + "epoch": 2.64, + "learning_rate": 7.474359042297363e-07, + "loss": 0.3077, + "step": 15476 + }, + { + "epoch": 2.64, + "learning_rate": 7.46737544617745e-07, + "loss": 0.307, + "step": 15477 + }, + { + "epoch": 2.64, + "learning_rate": 7.460394987571451e-07, + "loss": 0.3166, + "step": 15478 + }, + { + "epoch": 2.64, + "learning_rate": 7.453417666716012e-07, + "loss": 0.3085, + "step": 15479 + }, + { + "epoch": 2.64, + "learning_rate": 7.446443483847776e-07, + "loss": 0.2824, + "step": 15480 + }, + { + "epoch": 2.64, + "learning_rate": 7.439472439203154e-07, + "loss": 0.3158, + "step": 15481 + }, + { + "epoch": 2.64, + "learning_rate": 7.432504533018548e-07, + "loss": 0.3381, + "step": 15482 + }, + { + "epoch": 2.64, + "learning_rate": 7.425539765530221e-07, + "loss": 0.3004, + "step": 15483 + }, + { + "epoch": 2.64, + "learning_rate": 7.418578136974319e-07, + "loss": 0.3175, + "step": 15484 + }, + { + "epoch": 2.64, + "learning_rate": 7.411619647586888e-07, + "loss": 0.3024, + "step": 15485 + }, + { + "epoch": 2.64, + "learning_rate": 7.404664297603881e-07, + "loss": 0.2832, + "step": 15486 + }, + { + "epoch": 2.64, + "learning_rate": 7.397712087261122e-07, + "loss": 0.3315, + "step": 15487 + }, + { + "epoch": 2.64, + "learning_rate": 7.390763016794378e-07, + "loss": 0.3089, + "step": 15488 + }, + { + "epoch": 2.64, + "learning_rate": 7.383817086439216e-07, + "loss": 0.306, + "step": 15489 + }, + { + "epoch": 2.64, + "learning_rate": 7.37687429643118e-07, + "loss": 0.2954, + "step": 15490 + }, + { + "epoch": 2.64, + "learning_rate": 7.369934647005672e-07, + "loss": 0.298, + "step": 15491 + }, + { + "epoch": 2.64, + "learning_rate": 7.362998138398004e-07, + "loss": 0.2773, + "step": 15492 + }, + { + "epoch": 2.64, + "learning_rate": 7.356064770843374e-07, + "loss": 0.3053, + "step": 15493 + }, + { + "epoch": 2.64, + "learning_rate": 7.349134544576875e-07, + "loss": 0.3271, + "step": 15494 + }, + { + "epoch": 2.64, + "learning_rate": 7.342207459833484e-07, + "loss": 0.323, + "step": 15495 + }, + { + "epoch": 2.64, + "learning_rate": 7.33528351684808e-07, + "loss": 0.2912, + "step": 15496 + }, + { + "epoch": 2.64, + "learning_rate": 7.32836271585542e-07, + "loss": 0.3168, + "step": 15497 + }, + { + "epoch": 2.64, + "learning_rate": 7.321445057090193e-07, + "loss": 0.3121, + "step": 15498 + }, + { + "epoch": 2.64, + "learning_rate": 7.314530540786935e-07, + "loss": 0.3061, + "step": 15499 + }, + { + "epoch": 2.64, + "learning_rate": 7.307619167180102e-07, + "loss": 0.303, + "step": 15500 + }, + { + "epoch": 2.64, + "learning_rate": 7.300710936504052e-07, + "loss": 0.2863, + "step": 15501 + }, + { + "epoch": 2.64, + "learning_rate": 7.293805848993019e-07, + "loss": 0.305, + "step": 15502 + }, + { + "epoch": 2.64, + "learning_rate": 7.286903904881137e-07, + "loss": 0.3016, + "step": 15503 + }, + { + "epoch": 2.64, + "learning_rate": 7.28000510440241e-07, + "loss": 0.3011, + "step": 15504 + }, + { + "epoch": 2.64, + "learning_rate": 7.273109447790772e-07, + "loss": 0.2915, + "step": 15505 + }, + { + "epoch": 2.64, + "learning_rate": 7.266216935280035e-07, + "loss": 0.3026, + "step": 15506 + }, + { + "epoch": 2.64, + "learning_rate": 7.259327567103902e-07, + "loss": 0.3029, + "step": 15507 + }, + { + "epoch": 2.64, + "learning_rate": 7.252441343495975e-07, + "loss": 0.3138, + "step": 15508 + }, + { + "epoch": 2.65, + "learning_rate": 7.245558264689756e-07, + "loss": 0.3184, + "step": 15509 + }, + { + "epoch": 2.65, + "learning_rate": 7.238678330918614e-07, + "loss": 0.2901, + "step": 15510 + }, + { + "epoch": 2.65, + "learning_rate": 7.231801542415818e-07, + "loss": 0.305, + "step": 15511 + }, + { + "epoch": 2.65, + "learning_rate": 7.224927899414569e-07, + "loss": 0.3073, + "step": 15512 + }, + { + "epoch": 2.65, + "learning_rate": 7.218057402147905e-07, + "loss": 0.3186, + "step": 15513 + }, + { + "epoch": 2.65, + "learning_rate": 7.211190050848804e-07, + "loss": 0.3118, + "step": 15514 + }, + { + "epoch": 2.65, + "learning_rate": 7.204325845750104e-07, + "loss": 0.3406, + "step": 15515 + }, + { + "epoch": 2.65, + "learning_rate": 7.197464787084552e-07, + "loss": 0.3069, + "step": 15516 + }, + { + "epoch": 2.65, + "learning_rate": 7.190606875084793e-07, + "loss": 0.3127, + "step": 15517 + }, + { + "epoch": 2.65, + "learning_rate": 7.183752109983377e-07, + "loss": 0.3174, + "step": 15518 + }, + { + "epoch": 2.65, + "learning_rate": 7.176900492012695e-07, + "loss": 0.3228, + "step": 15519 + }, + { + "epoch": 2.65, + "learning_rate": 7.170052021405083e-07, + "loss": 0.3394, + "step": 15520 + }, + { + "epoch": 2.65, + "learning_rate": 7.163206698392744e-07, + "loss": 0.2993, + "step": 15521 + }, + { + "epoch": 2.65, + "learning_rate": 7.156364523207793e-07, + "loss": 0.3108, + "step": 15522 + }, + { + "epoch": 2.65, + "learning_rate": 7.149525496082221e-07, + "loss": 0.3219, + "step": 15523 + }, + { + "epoch": 2.65, + "learning_rate": 7.142689617247922e-07, + "loss": 0.2955, + "step": 15524 + }, + { + "epoch": 2.65, + "learning_rate": 7.13585688693671e-07, + "loss": 0.298, + "step": 15525 + }, + { + "epoch": 2.65, + "learning_rate": 7.12902730538021e-07, + "loss": 0.3009, + "step": 15526 + }, + { + "epoch": 2.65, + "learning_rate": 7.122200872810037e-07, + "loss": 0.2897, + "step": 15527 + }, + { + "epoch": 2.65, + "learning_rate": 7.115377589457617e-07, + "loss": 0.29, + "step": 15528 + }, + { + "epoch": 2.65, + "learning_rate": 7.108557455554332e-07, + "loss": 0.3252, + "step": 15529 + }, + { + "epoch": 2.65, + "learning_rate": 7.101740471331431e-07, + "loss": 0.298, + "step": 15530 + }, + { + "epoch": 2.65, + "learning_rate": 7.09492663702005e-07, + "loss": 0.3094, + "step": 15531 + }, + { + "epoch": 2.65, + "learning_rate": 7.088115952851238e-07, + "loss": 0.3278, + "step": 15532 + }, + { + "epoch": 2.65, + "learning_rate": 7.081308419055944e-07, + "loss": 0.2999, + "step": 15533 + }, + { + "epoch": 2.65, + "learning_rate": 7.074504035864937e-07, + "loss": 0.2855, + "step": 15534 + }, + { + "epoch": 2.65, + "learning_rate": 7.06770280350898e-07, + "loss": 0.2989, + "step": 15535 + }, + { + "epoch": 2.65, + "learning_rate": 7.060904722218665e-07, + "loss": 0.3002, + "step": 15536 + }, + { + "epoch": 2.65, + "learning_rate": 7.054109792224495e-07, + "loss": 0.3081, + "step": 15537 + }, + { + "epoch": 2.65, + "learning_rate": 7.047318013756865e-07, + "loss": 0.2897, + "step": 15538 + }, + { + "epoch": 2.65, + "learning_rate": 7.04052938704608e-07, + "loss": 0.3113, + "step": 15539 + }, + { + "epoch": 2.65, + "learning_rate": 7.033743912322311e-07, + "loss": 0.3129, + "step": 15540 + }, + { + "epoch": 2.65, + "learning_rate": 7.026961589815651e-07, + "loss": 0.309, + "step": 15541 + }, + { + "epoch": 2.65, + "learning_rate": 7.020182419756027e-07, + "loss": 0.3143, + "step": 15542 + }, + { + "epoch": 2.65, + "learning_rate": 7.013406402373358e-07, + "loss": 0.3107, + "step": 15543 + }, + { + "epoch": 2.65, + "learning_rate": 7.006633537897345e-07, + "loss": 0.3029, + "step": 15544 + }, + { + "epoch": 2.65, + "learning_rate": 6.999863826557663e-07, + "loss": 0.293, + "step": 15545 + }, + { + "epoch": 2.65, + "learning_rate": 6.993097268583848e-07, + "loss": 0.3047, + "step": 15546 + }, + { + "epoch": 2.65, + "learning_rate": 6.986333864205341e-07, + "loss": 0.3299, + "step": 15547 + }, + { + "epoch": 2.65, + "learning_rate": 6.97957361365147e-07, + "loss": 0.3205, + "step": 15548 + }, + { + "epoch": 2.65, + "learning_rate": 6.972816517151448e-07, + "loss": 0.3033, + "step": 15549 + }, + { + "epoch": 2.65, + "learning_rate": 6.966062574934385e-07, + "loss": 0.3183, + "step": 15550 + }, + { + "epoch": 2.65, + "learning_rate": 6.959311787229295e-07, + "loss": 0.3116, + "step": 15551 + }, + { + "epoch": 2.65, + "learning_rate": 6.952564154265074e-07, + "loss": 0.3136, + "step": 15552 + }, + { + "epoch": 2.65, + "learning_rate": 6.945819676270515e-07, + "loss": 0.2992, + "step": 15553 + }, + { + "epoch": 2.65, + "learning_rate": 6.939078353474315e-07, + "loss": 0.2929, + "step": 15554 + }, + { + "epoch": 2.65, + "learning_rate": 6.932340186105036e-07, + "loss": 0.3119, + "step": 15555 + }, + { + "epoch": 2.65, + "learning_rate": 6.925605174391182e-07, + "loss": 0.3054, + "step": 15556 + }, + { + "epoch": 2.65, + "learning_rate": 6.91887331856107e-07, + "loss": 0.3038, + "step": 15557 + }, + { + "epoch": 2.65, + "learning_rate": 6.912144618842987e-07, + "loss": 0.3109, + "step": 15558 + }, + { + "epoch": 2.65, + "learning_rate": 6.905419075465092e-07, + "loss": 0.3156, + "step": 15559 + }, + { + "epoch": 2.65, + "learning_rate": 6.898696688655393e-07, + "loss": 0.3137, + "step": 15560 + }, + { + "epoch": 2.65, + "learning_rate": 6.891977458641852e-07, + "loss": 0.3134, + "step": 15561 + }, + { + "epoch": 2.65, + "learning_rate": 6.885261385652297e-07, + "loss": 0.314, + "step": 15562 + }, + { + "epoch": 2.65, + "learning_rate": 6.87854846991447e-07, + "loss": 0.3123, + "step": 15563 + }, + { + "epoch": 2.65, + "learning_rate": 6.871838711655943e-07, + "loss": 0.2985, + "step": 15564 + }, + { + "epoch": 2.65, + "learning_rate": 6.865132111104245e-07, + "loss": 0.3023, + "step": 15565 + }, + { + "epoch": 2.65, + "learning_rate": 6.858428668486783e-07, + "loss": 0.3123, + "step": 15566 + }, + { + "epoch": 2.65, + "learning_rate": 6.851728384030842e-07, + "loss": 0.291, + "step": 15567 + }, + { + "epoch": 2.66, + "learning_rate": 6.845031257963619e-07, + "loss": 0.3045, + "step": 15568 + }, + { + "epoch": 2.66, + "learning_rate": 6.838337290512187e-07, + "loss": 0.2971, + "step": 15569 + }, + { + "epoch": 2.66, + "learning_rate": 6.831646481903531e-07, + "loss": 0.324, + "step": 15570 + }, + { + "epoch": 2.66, + "learning_rate": 6.824958832364514e-07, + "loss": 0.3002, + "step": 15571 + }, + { + "epoch": 2.66, + "learning_rate": 6.818274342121889e-07, + "loss": 0.3286, + "step": 15572 + }, + { + "epoch": 2.66, + "learning_rate": 6.811593011402296e-07, + "loss": 0.3025, + "step": 15573 + }, + { + "epoch": 2.66, + "learning_rate": 6.804914840432309e-07, + "loss": 0.29, + "step": 15574 + }, + { + "epoch": 2.66, + "learning_rate": 6.798239829438324e-07, + "loss": 0.3203, + "step": 15575 + }, + { + "epoch": 2.66, + "learning_rate": 6.791567978646707e-07, + "loss": 0.298, + "step": 15576 + }, + { + "epoch": 2.66, + "learning_rate": 6.784899288283664e-07, + "loss": 0.322, + "step": 15577 + }, + { + "epoch": 2.66, + "learning_rate": 6.778233758575337e-07, + "loss": 0.3151, + "step": 15578 + }, + { + "epoch": 2.66, + "learning_rate": 6.771571389747689e-07, + "loss": 0.2968, + "step": 15579 + }, + { + "epoch": 2.66, + "learning_rate": 6.764912182026662e-07, + "loss": 0.2991, + "step": 15580 + }, + { + "epoch": 2.66, + "learning_rate": 6.75825613563802e-07, + "loss": 0.2942, + "step": 15581 + }, + { + "epoch": 2.66, + "learning_rate": 6.75160325080747e-07, + "loss": 0.2717, + "step": 15582 + }, + { + "epoch": 2.66, + "learning_rate": 6.744953527760589e-07, + "loss": 0.3076, + "step": 15583 + }, + { + "epoch": 2.66, + "learning_rate": 6.738306966722852e-07, + "loss": 0.337, + "step": 15584 + }, + { + "epoch": 2.66, + "learning_rate": 6.731663567919621e-07, + "loss": 0.2958, + "step": 15585 + }, + { + "epoch": 2.66, + "learning_rate": 6.725023331576175e-07, + "loss": 0.3099, + "step": 15586 + }, + { + "epoch": 2.66, + "learning_rate": 6.718386257917619e-07, + "loss": 0.2926, + "step": 15587 + }, + { + "epoch": 2.66, + "learning_rate": 6.711752347169031e-07, + "loss": 0.2997, + "step": 15588 + }, + { + "epoch": 2.66, + "learning_rate": 6.705121599555331e-07, + "loss": 0.3097, + "step": 15589 + }, + { + "epoch": 2.66, + "learning_rate": 6.698494015301382e-07, + "loss": 0.3015, + "step": 15590 + }, + { + "epoch": 2.66, + "learning_rate": 6.691869594631861e-07, + "loss": 0.3158, + "step": 15591 + }, + { + "epoch": 2.66, + "learning_rate": 6.685248337771411e-07, + "loss": 0.3115, + "step": 15592 + }, + { + "epoch": 2.66, + "learning_rate": 6.678630244944539e-07, + "loss": 0.292, + "step": 15593 + }, + { + "epoch": 2.66, + "learning_rate": 6.672015316375624e-07, + "loss": 0.3043, + "step": 15594 + }, + { + "epoch": 2.66, + "learning_rate": 6.665403552288985e-07, + "loss": 0.318, + "step": 15595 + }, + { + "epoch": 2.66, + "learning_rate": 6.658794952908787e-07, + "loss": 0.3157, + "step": 15596 + }, + { + "epoch": 2.66, + "learning_rate": 6.652189518459118e-07, + "loss": 0.318, + "step": 15597 + }, + { + "epoch": 2.66, + "learning_rate": 6.645587249163943e-07, + "loss": 0.3255, + "step": 15598 + }, + { + "epoch": 2.66, + "learning_rate": 6.638988145247149e-07, + "loss": 0.2752, + "step": 15599 + }, + { + "epoch": 2.66, + "learning_rate": 6.632392206932459e-07, + "loss": 0.3065, + "step": 15600 + }, + { + "epoch": 2.66, + "learning_rate": 6.62579943444357e-07, + "loss": 0.2787, + "step": 15601 + }, + { + "epoch": 2.66, + "learning_rate": 6.619209828003959e-07, + "loss": 0.3347, + "step": 15602 + }, + { + "epoch": 2.66, + "learning_rate": 6.612623387837102e-07, + "loss": 0.3006, + "step": 15603 + }, + { + "epoch": 2.66, + "learning_rate": 6.606040114166323e-07, + "loss": 0.3122, + "step": 15604 + }, + { + "epoch": 2.66, + "learning_rate": 6.599460007214842e-07, + "loss": 0.3127, + "step": 15605 + }, + { + "epoch": 2.66, + "learning_rate": 6.592883067205769e-07, + "loss": 0.283, + "step": 15606 + }, + { + "epoch": 2.66, + "learning_rate": 6.586309294362103e-07, + "loss": 0.3202, + "step": 15607 + }, + { + "epoch": 2.66, + "learning_rate": 6.579738688906756e-07, + "loss": 0.2944, + "step": 15608 + }, + { + "epoch": 2.66, + "learning_rate": 6.573171251062482e-07, + "loss": 0.3105, + "step": 15609 + }, + { + "epoch": 2.66, + "learning_rate": 6.566606981052004e-07, + "loss": 0.309, + "step": 15610 + }, + { + "epoch": 2.66, + "learning_rate": 6.560045879097876e-07, + "loss": 0.3164, + "step": 15611 + }, + { + "epoch": 2.66, + "learning_rate": 6.553487945422576e-07, + "loss": 0.2826, + "step": 15612 + }, + { + "epoch": 2.66, + "learning_rate": 6.546933180248461e-07, + "loss": 0.3162, + "step": 15613 + }, + { + "epoch": 2.66, + "learning_rate": 6.540381583797784e-07, + "loss": 0.3098, + "step": 15614 + }, + { + "epoch": 2.66, + "learning_rate": 6.53383315629268e-07, + "loss": 0.3193, + "step": 15615 + }, + { + "epoch": 2.66, + "learning_rate": 6.527287897955226e-07, + "loss": 0.2978, + "step": 15616 + }, + { + "epoch": 2.66, + "learning_rate": 6.520745809007312e-07, + "loss": 0.3358, + "step": 15617 + }, + { + "epoch": 2.66, + "learning_rate": 6.514206889670771e-07, + "loss": 0.3085, + "step": 15618 + }, + { + "epoch": 2.66, + "learning_rate": 6.507671140167316e-07, + "loss": 0.3155, + "step": 15619 + }, + { + "epoch": 2.66, + "learning_rate": 6.501138560718568e-07, + "loss": 0.2973, + "step": 15620 + }, + { + "epoch": 2.66, + "learning_rate": 6.494609151546038e-07, + "loss": 0.3266, + "step": 15621 + }, + { + "epoch": 2.66, + "learning_rate": 6.488082912871085e-07, + "loss": 0.3076, + "step": 15622 + }, + { + "epoch": 2.66, + "learning_rate": 6.48155984491502e-07, + "loss": 0.3014, + "step": 15623 + }, + { + "epoch": 2.66, + "learning_rate": 6.475039947899031e-07, + "loss": 0.3006, + "step": 15624 + }, + { + "epoch": 2.66, + "learning_rate": 6.468523222044143e-07, + "loss": 0.3074, + "step": 15625 + }, + { + "epoch": 2.66, + "learning_rate": 6.462009667571356e-07, + "loss": 0.3132, + "step": 15626 + }, + { + "epoch": 2.67, + "learning_rate": 6.455499284701516e-07, + "loss": 0.3087, + "step": 15627 + }, + { + "epoch": 2.67, + "learning_rate": 6.448992073655381e-07, + "loss": 0.3316, + "step": 15628 + }, + { + "epoch": 2.67, + "learning_rate": 6.442488034653582e-07, + "loss": 0.3113, + "step": 15629 + }, + { + "epoch": 2.67, + "learning_rate": 6.435987167916658e-07, + "loss": 0.2962, + "step": 15630 + }, + { + "epoch": 2.67, + "learning_rate": 6.429489473665041e-07, + "loss": 0.2823, + "step": 15631 + }, + { + "epoch": 2.67, + "learning_rate": 6.422994952119022e-07, + "loss": 0.347, + "step": 15632 + }, + { + "epoch": 2.67, + "learning_rate": 6.416503603498825e-07, + "loss": 0.2851, + "step": 15633 + }, + { + "epoch": 2.67, + "learning_rate": 6.410015428024563e-07, + "loss": 0.301, + "step": 15634 + }, + { + "epoch": 2.67, + "learning_rate": 6.403530425916227e-07, + "loss": 0.3223, + "step": 15635 + }, + { + "epoch": 2.67, + "learning_rate": 6.397048597393696e-07, + "loss": 0.3008, + "step": 15636 + }, + { + "epoch": 2.67, + "learning_rate": 6.390569942676783e-07, + "loss": 0.3121, + "step": 15637 + }, + { + "epoch": 2.67, + "learning_rate": 6.384094461985113e-07, + "loss": 0.2971, + "step": 15638 + }, + { + "epoch": 2.67, + "learning_rate": 6.377622155538276e-07, + "loss": 0.3109, + "step": 15639 + }, + { + "epoch": 2.67, + "learning_rate": 6.371153023555721e-07, + "loss": 0.3295, + "step": 15640 + }, + { + "epoch": 2.67, + "learning_rate": 6.364687066256803e-07, + "loss": 0.3083, + "step": 15641 + }, + { + "epoch": 2.67, + "learning_rate": 6.358224283860759e-07, + "loss": 0.3103, + "step": 15642 + }, + { + "epoch": 2.67, + "learning_rate": 6.351764676586725e-07, + "loss": 0.3088, + "step": 15643 + }, + { + "epoch": 2.67, + "learning_rate": 6.345308244653736e-07, + "loss": 0.2979, + "step": 15644 + }, + { + "epoch": 2.67, + "learning_rate": 6.338854988280707e-07, + "loss": 0.3113, + "step": 15645 + }, + { + "epoch": 2.67, + "learning_rate": 6.332404907686452e-07, + "loss": 0.3194, + "step": 15646 + }, + { + "epoch": 2.67, + "learning_rate": 6.325958003089661e-07, + "loss": 0.3152, + "step": 15647 + }, + { + "epoch": 2.67, + "learning_rate": 6.319514274708926e-07, + "loss": 0.3226, + "step": 15648 + }, + { + "epoch": 2.67, + "learning_rate": 6.313073722762764e-07, + "loss": 0.3114, + "step": 15649 + }, + { + "epoch": 2.67, + "learning_rate": 6.30663634746953e-07, + "loss": 0.3101, + "step": 15650 + }, + { + "epoch": 2.67, + "learning_rate": 6.300202149047508e-07, + "loss": 0.2996, + "step": 15651 + }, + { + "epoch": 2.67, + "learning_rate": 6.293771127714854e-07, + "loss": 0.3261, + "step": 15652 + }, + { + "epoch": 2.67, + "learning_rate": 6.287343283689662e-07, + "loss": 0.3377, + "step": 15653 + }, + { + "epoch": 2.67, + "learning_rate": 6.280918617189824e-07, + "loss": 0.3102, + "step": 15654 + }, + { + "epoch": 2.67, + "learning_rate": 6.274497128433232e-07, + "loss": 0.3126, + "step": 15655 + }, + { + "epoch": 2.67, + "learning_rate": 6.268078817637579e-07, + "loss": 0.3103, + "step": 15656 + }, + { + "epoch": 2.67, + "learning_rate": 6.261663685020514e-07, + "loss": 0.2979, + "step": 15657 + }, + { + "epoch": 2.67, + "learning_rate": 6.255251730799539e-07, + "loss": 0.3229, + "step": 15658 + }, + { + "epoch": 2.67, + "learning_rate": 6.248842955192091e-07, + "loss": 0.2883, + "step": 15659 + }, + { + "epoch": 2.67, + "learning_rate": 6.242437358415454e-07, + "loss": 0.3132, + "step": 15660 + }, + { + "epoch": 2.67, + "learning_rate": 6.236034940686853e-07, + "loss": 0.2855, + "step": 15661 + }, + { + "epoch": 2.67, + "learning_rate": 6.229635702223325e-07, + "loss": 0.318, + "step": 15662 + }, + { + "epoch": 2.67, + "learning_rate": 6.223239643241885e-07, + "loss": 0.3395, + "step": 15663 + }, + { + "epoch": 2.67, + "learning_rate": 6.216846763959384e-07, + "loss": 0.3027, + "step": 15664 + }, + { + "epoch": 2.67, + "learning_rate": 6.210457064592612e-07, + "loss": 0.3319, + "step": 15665 + }, + { + "epoch": 2.67, + "learning_rate": 6.204070545358198e-07, + "loss": 0.2888, + "step": 15666 + }, + { + "epoch": 2.67, + "learning_rate": 6.197687206472714e-07, + "loss": 0.3128, + "step": 15667 + }, + { + "epoch": 2.67, + "learning_rate": 6.191307048152607e-07, + "loss": 0.2983, + "step": 15668 + }, + { + "epoch": 2.67, + "learning_rate": 6.18493007061417e-07, + "loss": 0.3213, + "step": 15669 + }, + { + "epoch": 2.67, + "learning_rate": 6.178556274073655e-07, + "loss": 0.3, + "step": 15670 + }, + { + "epoch": 2.67, + "learning_rate": 6.172185658747187e-07, + "loss": 0.3054, + "step": 15671 + }, + { + "epoch": 2.67, + "learning_rate": 6.16581822485075e-07, + "loss": 0.3087, + "step": 15672 + }, + { + "epoch": 2.67, + "learning_rate": 6.15945397260026e-07, + "loss": 0.2988, + "step": 15673 + }, + { + "epoch": 2.67, + "learning_rate": 6.153092902211499e-07, + "loss": 0.3114, + "step": 15674 + }, + { + "epoch": 2.67, + "learning_rate": 6.146735013900173e-07, + "loss": 0.3068, + "step": 15675 + }, + { + "epoch": 2.67, + "learning_rate": 6.140380307881866e-07, + "loss": 0.312, + "step": 15676 + }, + { + "epoch": 2.67, + "learning_rate": 6.134028784372004e-07, + "loss": 0.2928, + "step": 15677 + }, + { + "epoch": 2.67, + "learning_rate": 6.127680443585982e-07, + "loss": 0.3191, + "step": 15678 + }, + { + "epoch": 2.67, + "learning_rate": 6.12133528573905e-07, + "loss": 0.3157, + "step": 15679 + }, + { + "epoch": 2.67, + "learning_rate": 6.114993311046346e-07, + "loss": 0.3037, + "step": 15680 + }, + { + "epoch": 2.67, + "learning_rate": 6.108654519722923e-07, + "loss": 0.3296, + "step": 15681 + }, + { + "epoch": 2.67, + "learning_rate": 6.102318911983684e-07, + "loss": 0.3276, + "step": 15682 + }, + { + "epoch": 2.67, + "learning_rate": 6.095986488043503e-07, + "loss": 0.3171, + "step": 15683 + }, + { + "epoch": 2.67, + "learning_rate": 6.089657248117031e-07, + "loss": 0.3355, + "step": 15684 + }, + { + "epoch": 2.68, + "learning_rate": 6.083331192418906e-07, + "loss": 0.2878, + "step": 15685 + }, + { + "epoch": 2.68, + "learning_rate": 6.077008321163646e-07, + "loss": 0.319, + "step": 15686 + }, + { + "epoch": 2.68, + "learning_rate": 6.070688634565591e-07, + "loss": 0.2977, + "step": 15687 + }, + { + "epoch": 2.68, + "learning_rate": 6.064372132839058e-07, + "loss": 0.3234, + "step": 15688 + }, + { + "epoch": 2.68, + "learning_rate": 6.05805881619822e-07, + "loss": 0.313, + "step": 15689 + }, + { + "epoch": 2.68, + "learning_rate": 6.051748684857129e-07, + "loss": 0.2959, + "step": 15690 + }, + { + "epoch": 2.68, + "learning_rate": 6.045441739029768e-07, + "loss": 0.2732, + "step": 15691 + }, + { + "epoch": 2.68, + "learning_rate": 6.039137978929954e-07, + "loss": 0.2979, + "step": 15692 + }, + { + "epoch": 2.68, + "learning_rate": 6.032837404771452e-07, + "loss": 0.3001, + "step": 15693 + }, + { + "epoch": 2.68, + "learning_rate": 6.02654001676789e-07, + "loss": 0.3181, + "step": 15694 + }, + { + "epoch": 2.68, + "learning_rate": 6.020245815132786e-07, + "loss": 0.3001, + "step": 15695 + }, + { + "epoch": 2.68, + "learning_rate": 6.01395480007958e-07, + "loss": 0.3086, + "step": 15696 + }, + { + "epoch": 2.68, + "learning_rate": 6.007666971821557e-07, + "loss": 0.2943, + "step": 15697 + }, + { + "epoch": 2.68, + "learning_rate": 6.001382330571959e-07, + "loss": 0.3369, + "step": 15698 + }, + { + "epoch": 2.68, + "learning_rate": 5.995100876543825e-07, + "loss": 0.3318, + "step": 15699 + }, + { + "epoch": 2.68, + "learning_rate": 5.988822609950174e-07, + "loss": 0.3292, + "step": 15700 + }, + { + "epoch": 2.68, + "learning_rate": 5.982547531003879e-07, + "loss": 0.309, + "step": 15701 + }, + { + "epoch": 2.68, + "learning_rate": 5.976275639917728e-07, + "loss": 0.3136, + "step": 15702 + }, + { + "epoch": 2.68, + "learning_rate": 5.970006936904349e-07, + "loss": 0.3237, + "step": 15703 + }, + { + "epoch": 2.68, + "learning_rate": 5.963741422176305e-07, + "loss": 0.3064, + "step": 15704 + }, + { + "epoch": 2.68, + "learning_rate": 5.957479095946039e-07, + "loss": 0.3326, + "step": 15705 + }, + { + "epoch": 2.68, + "learning_rate": 5.951219958425925e-07, + "loss": 0.3268, + "step": 15706 + }, + { + "epoch": 2.68, + "learning_rate": 5.944964009828135e-07, + "loss": 0.2929, + "step": 15707 + }, + { + "epoch": 2.68, + "learning_rate": 5.938711250364837e-07, + "loss": 0.303, + "step": 15708 + }, + { + "epoch": 2.68, + "learning_rate": 5.932461680248014e-07, + "loss": 0.2954, + "step": 15709 + }, + { + "epoch": 2.68, + "learning_rate": 5.926215299689575e-07, + "loss": 0.3209, + "step": 15710 + }, + { + "epoch": 2.68, + "learning_rate": 5.91997210890134e-07, + "loss": 0.3052, + "step": 15711 + }, + { + "epoch": 2.68, + "learning_rate": 5.913732108094972e-07, + "loss": 0.3021, + "step": 15712 + }, + { + "epoch": 2.68, + "learning_rate": 5.90749529748208e-07, + "loss": 0.3234, + "step": 15713 + }, + { + "epoch": 2.68, + "learning_rate": 5.901261677274095e-07, + "loss": 0.3011, + "step": 15714 + }, + { + "epoch": 2.68, + "learning_rate": 5.895031247682414e-07, + "loss": 0.3237, + "step": 15715 + }, + { + "epoch": 2.68, + "learning_rate": 5.888804008918281e-07, + "loss": 0.3038, + "step": 15716 + }, + { + "epoch": 2.68, + "learning_rate": 5.882579961192836e-07, + "loss": 0.3084, + "step": 15717 + }, + { + "epoch": 2.68, + "learning_rate": 5.876359104717144e-07, + "loss": 0.3008, + "step": 15718 + }, + { + "epoch": 2.68, + "learning_rate": 5.870141439702115e-07, + "loss": 0.3297, + "step": 15719 + }, + { + "epoch": 2.68, + "learning_rate": 5.863926966358568e-07, + "loss": 0.3245, + "step": 15720 + }, + { + "epoch": 2.68, + "learning_rate": 5.857715684897248e-07, + "loss": 0.3136, + "step": 15721 + }, + { + "epoch": 2.68, + "learning_rate": 5.851507595528716e-07, + "loss": 0.3313, + "step": 15722 + }, + { + "epoch": 2.68, + "learning_rate": 5.845302698463496e-07, + "loss": 0.3214, + "step": 15723 + }, + { + "epoch": 2.68, + "learning_rate": 5.839100993911984e-07, + "loss": 0.3122, + "step": 15724 + }, + { + "epoch": 2.68, + "learning_rate": 5.832902482084446e-07, + "loss": 0.3093, + "step": 15725 + }, + { + "epoch": 2.68, + "learning_rate": 5.82670716319107e-07, + "loss": 0.3194, + "step": 15726 + }, + { + "epoch": 2.68, + "learning_rate": 5.820515037441921e-07, + "loss": 0.2915, + "step": 15727 + }, + { + "epoch": 2.68, + "learning_rate": 5.814326105046941e-07, + "loss": 0.3034, + "step": 15728 + }, + { + "epoch": 2.68, + "learning_rate": 5.808140366216008e-07, + "loss": 0.3404, + "step": 15729 + }, + { + "epoch": 2.68, + "learning_rate": 5.801957821158822e-07, + "loss": 0.3275, + "step": 15730 + }, + { + "epoch": 2.68, + "learning_rate": 5.795778470085045e-07, + "loss": 0.3188, + "step": 15731 + }, + { + "epoch": 2.68, + "learning_rate": 5.789602313204179e-07, + "loss": 0.3167, + "step": 15732 + }, + { + "epoch": 2.68, + "learning_rate": 5.783429350725678e-07, + "loss": 0.3172, + "step": 15733 + }, + { + "epoch": 2.68, + "learning_rate": 5.777259582858807e-07, + "loss": 0.3148, + "step": 15734 + }, + { + "epoch": 2.68, + "learning_rate": 5.771093009812778e-07, + "loss": 0.314, + "step": 15735 + }, + { + "epoch": 2.68, + "learning_rate": 5.764929631796711e-07, + "loss": 0.3049, + "step": 15736 + }, + { + "epoch": 2.68, + "learning_rate": 5.758769449019541e-07, + "loss": 0.3166, + "step": 15737 + }, + { + "epoch": 2.68, + "learning_rate": 5.752612461690166e-07, + "loss": 0.3126, + "step": 15738 + }, + { + "epoch": 2.68, + "learning_rate": 5.746458670017352e-07, + "loss": 0.3438, + "step": 15739 + }, + { + "epoch": 2.68, + "learning_rate": 5.740308074209744e-07, + "loss": 0.2948, + "step": 15740 + }, + { + "epoch": 2.68, + "learning_rate": 5.734160674475908e-07, + "loss": 0.3251, + "step": 15741 + }, + { + "epoch": 2.68, + "learning_rate": 5.728016471024278e-07, + "loss": 0.2971, + "step": 15742 + }, + { + "epoch": 2.68, + "learning_rate": 5.721875464063187e-07, + "loss": 0.306, + "step": 15743 + }, + { + "epoch": 2.69, + "learning_rate": 5.715737653800868e-07, + "loss": 0.3307, + "step": 15744 + }, + { + "epoch": 2.69, + "learning_rate": 5.70960304044541e-07, + "loss": 0.2947, + "step": 15745 + }, + { + "epoch": 2.69, + "learning_rate": 5.703471624204848e-07, + "loss": 0.3321, + "step": 15746 + }, + { + "epoch": 2.69, + "learning_rate": 5.697343405287059e-07, + "loss": 0.2942, + "step": 15747 + }, + { + "epoch": 2.69, + "learning_rate": 5.691218383899843e-07, + "loss": 0.3065, + "step": 15748 + }, + { + "epoch": 2.69, + "learning_rate": 5.685096560250891e-07, + "loss": 0.3294, + "step": 15749 + }, + { + "epoch": 2.69, + "learning_rate": 5.678977934547758e-07, + "loss": 0.3108, + "step": 15750 + }, + { + "epoch": 2.69, + "learning_rate": 5.672862506997934e-07, + "loss": 0.276, + "step": 15751 + }, + { + "epoch": 2.69, + "learning_rate": 5.666750277808741e-07, + "loss": 0.309, + "step": 15752 + }, + { + "epoch": 2.69, + "learning_rate": 5.660641247187448e-07, + "loss": 0.3036, + "step": 15753 + }, + { + "epoch": 2.69, + "learning_rate": 5.654535415341189e-07, + "loss": 0.298, + "step": 15754 + }, + { + "epoch": 2.69, + "learning_rate": 5.648432782477009e-07, + "loss": 0.2926, + "step": 15755 + }, + { + "epoch": 2.69, + "learning_rate": 5.64233334880181e-07, + "loss": 0.3281, + "step": 15756 + }, + { + "epoch": 2.69, + "learning_rate": 5.636237114522414e-07, + "loss": 0.3006, + "step": 15757 + }, + { + "epoch": 2.69, + "learning_rate": 5.630144079845524e-07, + "loss": 0.3213, + "step": 15758 + }, + { + "epoch": 2.69, + "learning_rate": 5.624054244977772e-07, + "loss": 0.3155, + "step": 15759 + }, + { + "epoch": 2.69, + "learning_rate": 5.617967610125585e-07, + "loss": 0.3042, + "step": 15760 + }, + { + "epoch": 2.69, + "learning_rate": 5.611884175495386e-07, + "loss": 0.3138, + "step": 15761 + }, + { + "epoch": 2.69, + "learning_rate": 5.605803941293431e-07, + "loss": 0.3072, + "step": 15762 + }, + { + "epoch": 2.69, + "learning_rate": 5.599726907725889e-07, + "loss": 0.303, + "step": 15763 + }, + { + "epoch": 2.69, + "learning_rate": 5.593653074998807e-07, + "loss": 0.3168, + "step": 15764 + }, + { + "epoch": 2.69, + "learning_rate": 5.587582443318163e-07, + "loss": 0.2962, + "step": 15765 + }, + { + "epoch": 2.69, + "learning_rate": 5.581515012889749e-07, + "loss": 0.3187, + "step": 15766 + }, + { + "epoch": 2.69, + "learning_rate": 5.575450783919323e-07, + "loss": 0.3155, + "step": 15767 + }, + { + "epoch": 2.69, + "learning_rate": 5.569389756612487e-07, + "loss": 0.3368, + "step": 15768 + }, + { + "epoch": 2.69, + "learning_rate": 5.563331931174765e-07, + "loss": 0.2981, + "step": 15769 + }, + { + "epoch": 2.69, + "learning_rate": 5.557277307811548e-07, + "loss": 0.3223, + "step": 15770 + }, + { + "epoch": 2.69, + "learning_rate": 5.551225886728151e-07, + "loss": 0.3249, + "step": 15771 + }, + { + "epoch": 2.69, + "learning_rate": 5.545177668129742e-07, + "loss": 0.2919, + "step": 15772 + }, + { + "epoch": 2.69, + "learning_rate": 5.539132652221402e-07, + "loss": 0.2956, + "step": 15773 + }, + { + "epoch": 2.69, + "learning_rate": 5.533090839208133e-07, + "loss": 0.3225, + "step": 15774 + }, + { + "epoch": 2.69, + "learning_rate": 5.527052229294738e-07, + "loss": 0.3152, + "step": 15775 + }, + { + "epoch": 2.69, + "learning_rate": 5.52101682268601e-07, + "loss": 0.3182, + "step": 15776 + }, + { + "epoch": 2.69, + "learning_rate": 5.514984619586572e-07, + "loss": 0.3176, + "step": 15777 + }, + { + "epoch": 2.69, + "learning_rate": 5.508955620200962e-07, + "loss": 0.3118, + "step": 15778 + }, + { + "epoch": 2.69, + "learning_rate": 5.502929824733616e-07, + "loss": 0.2764, + "step": 15779 + }, + { + "epoch": 2.69, + "learning_rate": 5.496907233388859e-07, + "loss": 0.2842, + "step": 15780 + }, + { + "epoch": 2.69, + "learning_rate": 5.490887846370874e-07, + "loss": 0.2971, + "step": 15781 + }, + { + "epoch": 2.69, + "learning_rate": 5.484871663883773e-07, + "loss": 0.3087, + "step": 15782 + }, + { + "epoch": 2.69, + "learning_rate": 5.478858686131561e-07, + "loss": 0.2957, + "step": 15783 + }, + { + "epoch": 2.69, + "learning_rate": 5.472848913318085e-07, + "loss": 0.2989, + "step": 15784 + }, + { + "epoch": 2.69, + "learning_rate": 5.466842345647149e-07, + "loss": 0.2968, + "step": 15785 + }, + { + "epoch": 2.69, + "learning_rate": 5.460838983322414e-07, + "loss": 0.3007, + "step": 15786 + }, + { + "epoch": 2.69, + "learning_rate": 5.454838826547426e-07, + "loss": 0.3395, + "step": 15787 + }, + { + "epoch": 2.69, + "learning_rate": 5.448841875525646e-07, + "loss": 0.326, + "step": 15788 + }, + { + "epoch": 2.69, + "learning_rate": 5.442848130460421e-07, + "loss": 0.2992, + "step": 15789 + }, + { + "epoch": 2.69, + "learning_rate": 5.436857591554945e-07, + "loss": 0.2937, + "step": 15790 + }, + { + "epoch": 2.69, + "learning_rate": 5.430870259012366e-07, + "loss": 0.3306, + "step": 15791 + }, + { + "epoch": 2.69, + "learning_rate": 5.4248861330357e-07, + "loss": 0.2904, + "step": 15792 + }, + { + "epoch": 2.69, + "learning_rate": 5.418905213827841e-07, + "loss": 0.3172, + "step": 15793 + }, + { + "epoch": 2.69, + "learning_rate": 5.412927501591592e-07, + "loss": 0.3047, + "step": 15794 + }, + { + "epoch": 2.69, + "learning_rate": 5.406952996529636e-07, + "loss": 0.2965, + "step": 15795 + }, + { + "epoch": 2.69, + "learning_rate": 5.400981698844565e-07, + "loss": 0.3181, + "step": 15796 + }, + { + "epoch": 2.69, + "learning_rate": 5.395013608738831e-07, + "loss": 0.3184, + "step": 15797 + }, + { + "epoch": 2.69, + "learning_rate": 5.389048726414803e-07, + "loss": 0.3009, + "step": 15798 + }, + { + "epoch": 2.69, + "learning_rate": 5.383087052074721e-07, + "loss": 0.3046, + "step": 15799 + }, + { + "epoch": 2.69, + "learning_rate": 5.377128585920732e-07, + "loss": 0.3037, + "step": 15800 + }, + { + "epoch": 2.69, + "learning_rate": 5.371173328154877e-07, + "loss": 0.2869, + "step": 15801 + }, + { + "epoch": 2.69, + "learning_rate": 5.365221278979071e-07, + "loss": 0.3292, + "step": 15802 + }, + { + "epoch": 2.7, + "learning_rate": 5.359272438595153e-07, + "loss": 0.3409, + "step": 15803 + }, + { + "epoch": 2.7, + "learning_rate": 5.353326807204828e-07, + "loss": 0.3198, + "step": 15804 + }, + { + "epoch": 2.7, + "learning_rate": 5.347384385009669e-07, + "loss": 0.3108, + "step": 15805 + }, + { + "epoch": 2.7, + "learning_rate": 5.34144517221118e-07, + "loss": 0.2977, + "step": 15806 + }, + { + "epoch": 2.7, + "learning_rate": 5.335509169010744e-07, + "loss": 0.3111, + "step": 15807 + }, + { + "epoch": 2.7, + "learning_rate": 5.329576375609635e-07, + "loss": 0.3077, + "step": 15808 + }, + { + "epoch": 2.7, + "learning_rate": 5.323646792209014e-07, + "loss": 0.3064, + "step": 15809 + }, + { + "epoch": 2.7, + "learning_rate": 5.317720419009942e-07, + "loss": 0.3124, + "step": 15810 + }, + { + "epoch": 2.7, + "learning_rate": 5.311797256213369e-07, + "loss": 0.2989, + "step": 15811 + }, + { + "epoch": 2.7, + "learning_rate": 5.305877304020113e-07, + "loss": 0.2932, + "step": 15812 + }, + { + "epoch": 2.7, + "learning_rate": 5.299960562630923e-07, + "loss": 0.331, + "step": 15813 + }, + { + "epoch": 2.7, + "learning_rate": 5.294047032246407e-07, + "loss": 0.3206, + "step": 15814 + }, + { + "epoch": 2.7, + "learning_rate": 5.288136713067083e-07, + "loss": 0.3143, + "step": 15815 + }, + { + "epoch": 2.7, + "learning_rate": 5.282229605293332e-07, + "loss": 0.33, + "step": 15816 + }, + { + "epoch": 2.7, + "learning_rate": 5.276325709125474e-07, + "loss": 0.3237, + "step": 15817 + }, + { + "epoch": 2.7, + "learning_rate": 5.27042502476367e-07, + "loss": 0.3099, + "step": 15818 + }, + { + "epoch": 2.7, + "learning_rate": 5.264527552408038e-07, + "loss": 0.3099, + "step": 15819 + }, + { + "epoch": 2.7, + "learning_rate": 5.258633292258486e-07, + "loss": 0.307, + "step": 15820 + }, + { + "epoch": 2.7, + "learning_rate": 5.252742244514908e-07, + "loss": 0.3293, + "step": 15821 + }, + { + "epoch": 2.7, + "learning_rate": 5.246854409377033e-07, + "loss": 0.2993, + "step": 15822 + }, + { + "epoch": 2.7, + "learning_rate": 5.240969787044514e-07, + "loss": 0.3407, + "step": 15823 + }, + { + "epoch": 2.7, + "learning_rate": 5.235088377716879e-07, + "loss": 0.2941, + "step": 15824 + }, + { + "epoch": 2.7, + "learning_rate": 5.229210181593558e-07, + "loss": 0.3048, + "step": 15825 + }, + { + "epoch": 2.7, + "learning_rate": 5.223335198873858e-07, + "loss": 0.3029, + "step": 15826 + }, + { + "epoch": 2.7, + "learning_rate": 5.217463429756964e-07, + "loss": 0.3243, + "step": 15827 + }, + { + "epoch": 2.7, + "learning_rate": 5.211594874441994e-07, + "loss": 0.3038, + "step": 15828 + }, + { + "epoch": 2.7, + "learning_rate": 5.205729533127935e-07, + "loss": 0.3213, + "step": 15829 + }, + { + "epoch": 2.7, + "learning_rate": 5.199867406013648e-07, + "loss": 0.3097, + "step": 15830 + }, + { + "epoch": 2.7, + "learning_rate": 5.194008493297897e-07, + "loss": 0.3043, + "step": 15831 + }, + { + "epoch": 2.7, + "learning_rate": 5.188152795179357e-07, + "loss": 0.3226, + "step": 15832 + }, + { + "epoch": 2.7, + "learning_rate": 5.18230031185657e-07, + "loss": 0.321, + "step": 15833 + }, + { + "epoch": 2.7, + "learning_rate": 5.176451043527986e-07, + "loss": 0.3064, + "step": 15834 + }, + { + "epoch": 2.7, + "learning_rate": 5.170604990391925e-07, + "loss": 0.3152, + "step": 15835 + }, + { + "epoch": 2.7, + "learning_rate": 5.164762152646607e-07, + "loss": 0.31, + "step": 15836 + }, + { + "epoch": 2.7, + "learning_rate": 5.158922530490163e-07, + "loss": 0.3137, + "step": 15837 + }, + { + "epoch": 2.7, + "learning_rate": 5.153086124120577e-07, + "loss": 0.317, + "step": 15838 + }, + { + "epoch": 2.7, + "learning_rate": 5.147252933735758e-07, + "loss": 0.2875, + "step": 15839 + }, + { + "epoch": 2.7, + "learning_rate": 5.141422959533493e-07, + "loss": 0.3052, + "step": 15840 + }, + { + "epoch": 2.7, + "learning_rate": 5.135596201711468e-07, + "loss": 0.316, + "step": 15841 + }, + { + "epoch": 2.7, + "learning_rate": 5.129772660467236e-07, + "loss": 0.309, + "step": 15842 + }, + { + "epoch": 2.7, + "learning_rate": 5.12395233599825e-07, + "loss": 0.3025, + "step": 15843 + }, + { + "epoch": 2.7, + "learning_rate": 5.118135228501874e-07, + "loss": 0.3354, + "step": 15844 + }, + { + "epoch": 2.7, + "learning_rate": 5.112321338175352e-07, + "loss": 0.3193, + "step": 15845 + }, + { + "epoch": 2.7, + "learning_rate": 5.106510665215802e-07, + "loss": 0.3126, + "step": 15846 + }, + { + "epoch": 2.7, + "learning_rate": 5.100703209820257e-07, + "loss": 0.2756, + "step": 15847 + }, + { + "epoch": 2.7, + "learning_rate": 5.094898972185636e-07, + "loss": 0.3044, + "step": 15848 + }, + { + "epoch": 2.7, + "learning_rate": 5.089097952508748e-07, + "loss": 0.3078, + "step": 15849 + }, + { + "epoch": 2.7, + "learning_rate": 5.083300150986259e-07, + "loss": 0.2976, + "step": 15850 + }, + { + "epoch": 2.7, + "learning_rate": 5.077505567814778e-07, + "loss": 0.3006, + "step": 15851 + }, + { + "epoch": 2.7, + "learning_rate": 5.07171420319078e-07, + "loss": 0.3117, + "step": 15852 + }, + { + "epoch": 2.7, + "learning_rate": 5.065926057310632e-07, + "loss": 0.3235, + "step": 15853 + }, + { + "epoch": 2.7, + "learning_rate": 5.060141130370588e-07, + "loss": 0.3206, + "step": 15854 + }, + { + "epoch": 2.7, + "learning_rate": 5.054359422566813e-07, + "loss": 0.3104, + "step": 15855 + }, + { + "epoch": 2.7, + "learning_rate": 5.048580934095349e-07, + "loss": 0.3059, + "step": 15856 + }, + { + "epoch": 2.7, + "learning_rate": 5.042805665152107e-07, + "loss": 0.2833, + "step": 15857 + }, + { + "epoch": 2.7, + "learning_rate": 5.03703361593293e-07, + "loss": 0.306, + "step": 15858 + }, + { + "epoch": 2.7, + "learning_rate": 5.031264786633505e-07, + "loss": 0.2995, + "step": 15859 + }, + { + "epoch": 2.7, + "learning_rate": 5.025499177449467e-07, + "loss": 0.3101, + "step": 15860 + }, + { + "epoch": 2.71, + "learning_rate": 5.019736788576301e-07, + "loss": 0.2969, + "step": 15861 + }, + { + "epoch": 2.71, + "learning_rate": 5.013977620209387e-07, + "loss": 0.2973, + "step": 15862 + }, + { + "epoch": 2.71, + "learning_rate": 5.008221672544001e-07, + "loss": 0.303, + "step": 15863 + }, + { + "epoch": 2.71, + "learning_rate": 5.00246894577533e-07, + "loss": 0.3199, + "step": 15864 + }, + { + "epoch": 2.71, + "learning_rate": 4.996719440098397e-07, + "loss": 0.3057, + "step": 15865 + }, + { + "epoch": 2.71, + "learning_rate": 4.990973155708168e-07, + "loss": 0.3099, + "step": 15866 + }, + { + "epoch": 2.71, + "learning_rate": 4.985230092799498e-07, + "loss": 0.3152, + "step": 15867 + }, + { + "epoch": 2.71, + "learning_rate": 4.979490251567099e-07, + "loss": 0.3101, + "step": 15868 + }, + { + "epoch": 2.71, + "learning_rate": 4.973753632205591e-07, + "loss": 0.3317, + "step": 15869 + }, + { + "epoch": 2.71, + "learning_rate": 4.968020234909498e-07, + "loss": 0.3087, + "step": 15870 + }, + { + "epoch": 2.71, + "learning_rate": 4.962290059873242e-07, + "loss": 0.331, + "step": 15871 + }, + { + "epoch": 2.71, + "learning_rate": 4.956563107291068e-07, + "loss": 0.3035, + "step": 15872 + }, + { + "epoch": 2.71, + "learning_rate": 4.950839377357197e-07, + "loss": 0.3088, + "step": 15873 + }, + { + "epoch": 2.71, + "learning_rate": 4.945118870265686e-07, + "loss": 0.302, + "step": 15874 + }, + { + "epoch": 2.71, + "learning_rate": 4.939401586210513e-07, + "loss": 0.2934, + "step": 15875 + }, + { + "epoch": 2.71, + "learning_rate": 4.933687525385533e-07, + "loss": 0.2917, + "step": 15876 + }, + { + "epoch": 2.71, + "learning_rate": 4.927976687984482e-07, + "loss": 0.315, + "step": 15877 + }, + { + "epoch": 2.71, + "learning_rate": 4.922269074201014e-07, + "loss": 0.3171, + "step": 15878 + }, + { + "epoch": 2.71, + "learning_rate": 4.916564684228653e-07, + "loss": 0.2987, + "step": 15879 + }, + { + "epoch": 2.71, + "learning_rate": 4.9108635182608e-07, + "loss": 0.3085, + "step": 15880 + }, + { + "epoch": 2.71, + "learning_rate": 4.905165576490789e-07, + "loss": 0.3219, + "step": 15881 + }, + { + "epoch": 2.71, + "learning_rate": 4.899470859111811e-07, + "loss": 0.3106, + "step": 15882 + }, + { + "epoch": 2.71, + "learning_rate": 4.893779366316942e-07, + "loss": 0.3185, + "step": 15883 + }, + { + "epoch": 2.71, + "learning_rate": 4.888091098299197e-07, + "loss": 0.2965, + "step": 15884 + }, + { + "epoch": 2.71, + "learning_rate": 4.882406055251432e-07, + "loss": 0.3177, + "step": 15885 + }, + { + "epoch": 2.71, + "learning_rate": 4.876724237366415e-07, + "loss": 0.3128, + "step": 15886 + }, + { + "epoch": 2.71, + "learning_rate": 4.87104564483678e-07, + "loss": 0.3055, + "step": 15887 + }, + { + "epoch": 2.71, + "learning_rate": 4.865370277855097e-07, + "loss": 0.3149, + "step": 15888 + }, + { + "epoch": 2.71, + "learning_rate": 4.859698136613788e-07, + "loss": 0.3247, + "step": 15889 + }, + { + "epoch": 2.71, + "learning_rate": 4.854029221305179e-07, + "loss": 0.3351, + "step": 15890 + }, + { + "epoch": 2.71, + "learning_rate": 4.848363532121492e-07, + "loss": 0.2923, + "step": 15891 + }, + { + "epoch": 2.71, + "learning_rate": 4.84270106925484e-07, + "loss": 0.3178, + "step": 15892 + }, + { + "epoch": 2.71, + "learning_rate": 4.837041832897205e-07, + "loss": 0.3013, + "step": 15893 + }, + { + "epoch": 2.71, + "learning_rate": 4.831385823240498e-07, + "loss": 0.2947, + "step": 15894 + }, + { + "epoch": 2.71, + "learning_rate": 4.825733040476465e-07, + "loss": 0.3135, + "step": 15895 + }, + { + "epoch": 2.71, + "learning_rate": 4.820083484796801e-07, + "loss": 0.3045, + "step": 15896 + }, + { + "epoch": 2.71, + "learning_rate": 4.814437156393048e-07, + "loss": 0.3202, + "step": 15897 + }, + { + "epoch": 2.71, + "learning_rate": 4.808794055456667e-07, + "loss": 0.3015, + "step": 15898 + }, + { + "epoch": 2.71, + "learning_rate": 4.803154182179004e-07, + "loss": 0.2867, + "step": 15899 + }, + { + "epoch": 2.71, + "learning_rate": 4.797517536751295e-07, + "loss": 0.3181, + "step": 15900 + }, + { + "epoch": 2.71, + "learning_rate": 4.791884119364653e-07, + "loss": 0.3241, + "step": 15901 + }, + { + "epoch": 2.71, + "learning_rate": 4.786253930210083e-07, + "loss": 0.2952, + "step": 15902 + }, + { + "epoch": 2.71, + "learning_rate": 4.780626969478497e-07, + "loss": 0.3103, + "step": 15903 + }, + { + "epoch": 2.71, + "learning_rate": 4.775003237360687e-07, + "loss": 0.3111, + "step": 15904 + }, + { + "epoch": 2.71, + "learning_rate": 4.769382734047345e-07, + "loss": 0.3092, + "step": 15905 + }, + { + "epoch": 2.71, + "learning_rate": 4.7637654597290416e-07, + "loss": 0.2929, + "step": 15906 + }, + { + "epoch": 2.71, + "learning_rate": 4.7581514145962346e-07, + "loss": 0.3298, + "step": 15907 + }, + { + "epoch": 2.71, + "learning_rate": 4.752540598839306e-07, + "loss": 0.298, + "step": 15908 + }, + { + "epoch": 2.71, + "learning_rate": 4.746933012648469e-07, + "loss": 0.3079, + "step": 15909 + }, + { + "epoch": 2.71, + "learning_rate": 4.741328656213884e-07, + "loss": 0.317, + "step": 15910 + }, + { + "epoch": 2.71, + "learning_rate": 4.7357275297255534e-07, + "loss": 0.2995, + "step": 15911 + }, + { + "epoch": 2.71, + "learning_rate": 4.730129633373415e-07, + "loss": 0.2875, + "step": 15912 + }, + { + "epoch": 2.71, + "learning_rate": 4.724534967347272e-07, + "loss": 0.282, + "step": 15913 + }, + { + "epoch": 2.71, + "learning_rate": 4.7189435318368284e-07, + "loss": 0.3075, + "step": 15914 + }, + { + "epoch": 2.71, + "learning_rate": 4.7133553270316655e-07, + "loss": 0.2724, + "step": 15915 + }, + { + "epoch": 2.71, + "learning_rate": 4.707770353121288e-07, + "loss": 0.3145, + "step": 15916 + }, + { + "epoch": 2.71, + "learning_rate": 4.702188610295022e-07, + "loss": 0.3081, + "step": 15917 + }, + { + "epoch": 2.71, + "learning_rate": 4.6966100987421603e-07, + "loss": 0.2985, + "step": 15918 + }, + { + "epoch": 2.71, + "learning_rate": 4.691034818651841e-07, + "loss": 0.3063, + "step": 15919 + }, + { + "epoch": 2.72, + "learning_rate": 4.685462770213112e-07, + "loss": 0.3297, + "step": 15920 + }, + { + "epoch": 2.72, + "learning_rate": 4.6798939536149e-07, + "loss": 0.2863, + "step": 15921 + }, + { + "epoch": 2.72, + "learning_rate": 4.674328369046033e-07, + "loss": 0.3187, + "step": 15922 + }, + { + "epoch": 2.72, + "learning_rate": 4.6687660166952477e-07, + "loss": 0.3229, + "step": 15923 + }, + { + "epoch": 2.72, + "learning_rate": 4.6632068967510937e-07, + "loss": 0.3094, + "step": 15924 + }, + { + "epoch": 2.72, + "learning_rate": 4.657651009402098e-07, + "loss": 0.3044, + "step": 15925 + }, + { + "epoch": 2.72, + "learning_rate": 4.652098354836654e-07, + "loss": 0.336, + "step": 15926 + }, + { + "epoch": 2.72, + "learning_rate": 4.6465489332430116e-07, + "loss": 0.3048, + "step": 15927 + }, + { + "epoch": 2.72, + "learning_rate": 4.641002744809342e-07, + "loss": 0.2803, + "step": 15928 + }, + { + "epoch": 2.72, + "learning_rate": 4.6354597897237065e-07, + "loss": 0.3166, + "step": 15929 + }, + { + "epoch": 2.72, + "learning_rate": 4.629920068174043e-07, + "loss": 0.304, + "step": 15930 + }, + { + "epoch": 2.72, + "learning_rate": 4.624383580348202e-07, + "loss": 0.3024, + "step": 15931 + }, + { + "epoch": 2.72, + "learning_rate": 4.618850326433899e-07, + "loss": 0.3367, + "step": 15932 + }, + { + "epoch": 2.72, + "learning_rate": 4.6133203066187517e-07, + "loss": 0.3078, + "step": 15933 + }, + { + "epoch": 2.72, + "learning_rate": 4.607793521090265e-07, + "loss": 0.3, + "step": 15934 + }, + { + "epoch": 2.72, + "learning_rate": 4.6022699700358443e-07, + "loss": 0.3303, + "step": 15935 + }, + { + "epoch": 2.72, + "learning_rate": 4.5967496536427626e-07, + "loss": 0.2926, + "step": 15936 + }, + { + "epoch": 2.72, + "learning_rate": 4.5912325720982145e-07, + "loss": 0.3354, + "step": 15937 + }, + { + "epoch": 2.72, + "learning_rate": 4.58571872558925e-07, + "loss": 0.3192, + "step": 15938 + }, + { + "epoch": 2.72, + "learning_rate": 4.5802081143028644e-07, + "loss": 0.3124, + "step": 15939 + }, + { + "epoch": 2.72, + "learning_rate": 4.574700738425864e-07, + "loss": 0.2969, + "step": 15940 + }, + { + "epoch": 2.72, + "learning_rate": 4.5691965981450204e-07, + "loss": 0.3086, + "step": 15941 + }, + { + "epoch": 2.72, + "learning_rate": 4.5636956936469303e-07, + "loss": 0.3278, + "step": 15942 + }, + { + "epoch": 2.72, + "learning_rate": 4.5581980251181324e-07, + "loss": 0.336, + "step": 15943 + }, + { + "epoch": 2.72, + "learning_rate": 4.5527035927450337e-07, + "loss": 0.3075, + "step": 15944 + }, + { + "epoch": 2.72, + "learning_rate": 4.5472123967139403e-07, + "loss": 0.3026, + "step": 15945 + }, + { + "epoch": 2.72, + "learning_rate": 4.541724437211037e-07, + "loss": 0.3063, + "step": 15946 + }, + { + "epoch": 2.72, + "learning_rate": 4.536239714422408e-07, + "loss": 0.3082, + "step": 15947 + }, + { + "epoch": 2.72, + "learning_rate": 4.530758228534027e-07, + "loss": 0.3058, + "step": 15948 + }, + { + "epoch": 2.72, + "learning_rate": 4.525279979731745e-07, + "loss": 0.2964, + "step": 15949 + }, + { + "epoch": 2.72, + "learning_rate": 4.5198049682013134e-07, + "loss": 0.3009, + "step": 15950 + }, + { + "epoch": 2.72, + "learning_rate": 4.514333194128384e-07, + "loss": 0.2976, + "step": 15951 + }, + { + "epoch": 2.72, + "learning_rate": 4.5088646576984863e-07, + "loss": 0.3225, + "step": 15952 + }, + { + "epoch": 2.72, + "learning_rate": 4.5033993590970383e-07, + "loss": 0.3103, + "step": 15953 + }, + { + "epoch": 2.72, + "learning_rate": 4.4979372985093696e-07, + "loss": 0.3098, + "step": 15954 + }, + { + "epoch": 2.72, + "learning_rate": 4.4924784761206543e-07, + "loss": 0.2992, + "step": 15955 + }, + { + "epoch": 2.72, + "learning_rate": 4.487022892116e-07, + "loss": 0.3362, + "step": 15956 + }, + { + "epoch": 2.72, + "learning_rate": 4.481570546680403e-07, + "loss": 0.3201, + "step": 15957 + }, + { + "epoch": 2.72, + "learning_rate": 4.4761214399987153e-07, + "loss": 0.3252, + "step": 15958 + }, + { + "epoch": 2.72, + "learning_rate": 4.4706755722557003e-07, + "loss": 0.3057, + "step": 15959 + }, + { + "epoch": 2.72, + "learning_rate": 4.465232943636022e-07, + "loss": 0.3278, + "step": 15960 + }, + { + "epoch": 2.72, + "learning_rate": 4.459793554324232e-07, + "loss": 0.3094, + "step": 15961 + }, + { + "epoch": 2.72, + "learning_rate": 4.45435740450475e-07, + "loss": 0.3103, + "step": 15962 + }, + { + "epoch": 2.72, + "learning_rate": 4.4489244943619057e-07, + "loss": 0.3154, + "step": 15963 + }, + { + "epoch": 2.72, + "learning_rate": 4.443494824079897e-07, + "loss": 0.3202, + "step": 15964 + }, + { + "epoch": 2.72, + "learning_rate": 4.4380683938428537e-07, + "loss": 0.3199, + "step": 15965 + }, + { + "epoch": 2.72, + "learning_rate": 4.432645203834762e-07, + "loss": 0.3199, + "step": 15966 + }, + { + "epoch": 2.72, + "learning_rate": 4.4272252542394977e-07, + "loss": 0.3235, + "step": 15967 + }, + { + "epoch": 2.72, + "learning_rate": 4.421808545240847e-07, + "loss": 0.3083, + "step": 15968 + }, + { + "epoch": 2.72, + "learning_rate": 4.4163950770224726e-07, + "loss": 0.3229, + "step": 15969 + }, + { + "epoch": 2.72, + "learning_rate": 4.410984849767919e-07, + "loss": 0.3195, + "step": 15970 + }, + { + "epoch": 2.72, + "learning_rate": 4.405577863660648e-07, + "loss": 0.2924, + "step": 15971 + }, + { + "epoch": 2.72, + "learning_rate": 4.400174118883982e-07, + "loss": 0.337, + "step": 15972 + }, + { + "epoch": 2.72, + "learning_rate": 4.394773615621162e-07, + "loss": 0.2975, + "step": 15973 + }, + { + "epoch": 2.72, + "learning_rate": 4.389376354055275e-07, + "loss": 0.3283, + "step": 15974 + }, + { + "epoch": 2.72, + "learning_rate": 4.3839823343693523e-07, + "loss": 0.2907, + "step": 15975 + }, + { + "epoch": 2.72, + "learning_rate": 4.37859155674627e-07, + "loss": 0.3268, + "step": 15976 + }, + { + "epoch": 2.72, + "learning_rate": 4.373204021368849e-07, + "loss": 0.3099, + "step": 15977 + }, + { + "epoch": 2.72, + "learning_rate": 4.36781972841972e-07, + "loss": 0.3079, + "step": 15978 + }, + { + "epoch": 2.73, + "learning_rate": 4.362438678081471e-07, + "loss": 0.3173, + "step": 15979 + }, + { + "epoch": 2.73, + "learning_rate": 4.357060870536556e-07, + "loss": 0.3115, + "step": 15980 + }, + { + "epoch": 2.73, + "learning_rate": 4.3516863059673286e-07, + "loss": 0.319, + "step": 15981 + }, + { + "epoch": 2.73, + "learning_rate": 4.34631498455601e-07, + "loss": 0.2907, + "step": 15982 + }, + { + "epoch": 2.73, + "learning_rate": 4.340946906484733e-07, + "loss": 0.319, + "step": 15983 + }, + { + "epoch": 2.73, + "learning_rate": 4.335582071935529e-07, + "loss": 0.3086, + "step": 15984 + }, + { + "epoch": 2.73, + "learning_rate": 4.330220481090286e-07, + "loss": 0.2875, + "step": 15985 + }, + { + "epoch": 2.73, + "learning_rate": 4.324862134130792e-07, + "loss": 0.3121, + "step": 15986 + }, + { + "epoch": 2.73, + "learning_rate": 4.3195070312387455e-07, + "loss": 0.3147, + "step": 15987 + }, + { + "epoch": 2.73, + "learning_rate": 4.3141551725957354e-07, + "loss": 0.3218, + "step": 15988 + }, + { + "epoch": 2.73, + "learning_rate": 4.308806558383205e-07, + "loss": 0.3242, + "step": 15989 + }, + { + "epoch": 2.73, + "learning_rate": 4.3034611887825097e-07, + "loss": 0.3066, + "step": 15990 + }, + { + "epoch": 2.73, + "learning_rate": 4.298119063974915e-07, + "loss": 0.3028, + "step": 15991 + }, + { + "epoch": 2.73, + "learning_rate": 4.292780184141554e-07, + "loss": 0.3048, + "step": 15992 + }, + { + "epoch": 2.73, + "learning_rate": 4.2874445494634377e-07, + "loss": 0.2963, + "step": 15993 + }, + { + "epoch": 2.73, + "learning_rate": 4.282112160121488e-07, + "loss": 0.3257, + "step": 15994 + }, + { + "epoch": 2.73, + "learning_rate": 4.2767830162965153e-07, + "loss": 0.3271, + "step": 15995 + }, + { + "epoch": 2.73, + "learning_rate": 4.271457118169209e-07, + "loss": 0.3166, + "step": 15996 + }, + { + "epoch": 2.73, + "learning_rate": 4.2661344659201577e-07, + "loss": 0.3083, + "step": 15997 + }, + { + "epoch": 2.73, + "learning_rate": 4.260815059729839e-07, + "loss": 0.284, + "step": 15998 + }, + { + "epoch": 2.73, + "learning_rate": 4.255498899778632e-07, + "loss": 0.3105, + "step": 15999 + }, + { + "epoch": 2.73, + "learning_rate": 4.2501859862467687e-07, + "loss": 0.3166, + "step": 16000 + }, + { + "epoch": 2.73, + "learning_rate": 4.244876319314406e-07, + "loss": 0.3092, + "step": 16001 + }, + { + "epoch": 2.73, + "learning_rate": 4.2395698991615663e-07, + "loss": 0.2951, + "step": 16002 + }, + { + "epoch": 2.73, + "learning_rate": 4.2342667259681944e-07, + "loss": 0.3065, + "step": 16003 + }, + { + "epoch": 2.73, + "learning_rate": 4.228966799914114e-07, + "loss": 0.3263, + "step": 16004 + }, + { + "epoch": 2.73, + "learning_rate": 4.223670121178991e-07, + "loss": 0.3092, + "step": 16005 + }, + { + "epoch": 2.73, + "learning_rate": 4.218376689942449e-07, + "loss": 0.3189, + "step": 16006 + }, + { + "epoch": 2.73, + "learning_rate": 4.2130865063839786e-07, + "loss": 0.332, + "step": 16007 + }, + { + "epoch": 2.73, + "learning_rate": 4.2077995706829356e-07, + "loss": 0.293, + "step": 16008 + }, + { + "epoch": 2.73, + "learning_rate": 4.2025158830185873e-07, + "loss": 0.3202, + "step": 16009 + }, + { + "epoch": 2.73, + "learning_rate": 4.1972354435700913e-07, + "loss": 0.3097, + "step": 16010 + }, + { + "epoch": 2.73, + "learning_rate": 4.1919582525164924e-07, + "loss": 0.2956, + "step": 16011 + }, + { + "epoch": 2.73, + "learning_rate": 4.186684310036737e-07, + "loss": 0.3106, + "step": 16012 + }, + { + "epoch": 2.73, + "learning_rate": 4.1814136163096265e-07, + "loss": 0.3078, + "step": 16013 + }, + { + "epoch": 2.73, + "learning_rate": 4.1761461715139063e-07, + "loss": 0.3114, + "step": 16014 + }, + { + "epoch": 2.73, + "learning_rate": 4.170881975828145e-07, + "loss": 0.3188, + "step": 16015 + }, + { + "epoch": 2.73, + "learning_rate": 4.165621029430855e-07, + "loss": 0.315, + "step": 16016 + }, + { + "epoch": 2.73, + "learning_rate": 4.160363332500428e-07, + "loss": 0.3123, + "step": 16017 + }, + { + "epoch": 2.73, + "learning_rate": 4.1551088852151086e-07, + "loss": 0.3159, + "step": 16018 + }, + { + "epoch": 2.73, + "learning_rate": 4.1498576877530894e-07, + "loss": 0.3118, + "step": 16019 + }, + { + "epoch": 2.73, + "learning_rate": 4.144609740292416e-07, + "loss": 0.318, + "step": 16020 + }, + { + "epoch": 2.73, + "learning_rate": 4.139365043011023e-07, + "loss": 0.2853, + "step": 16021 + }, + { + "epoch": 2.73, + "learning_rate": 4.134123596086748e-07, + "loss": 0.3115, + "step": 16022 + }, + { + "epoch": 2.73, + "learning_rate": 4.128885399697313e-07, + "loss": 0.3095, + "step": 16023 + }, + { + "epoch": 2.73, + "learning_rate": 4.123650454020323e-07, + "loss": 0.3059, + "step": 16024 + }, + { + "epoch": 2.73, + "learning_rate": 4.118418759233278e-07, + "loss": 0.3068, + "step": 16025 + }, + { + "epoch": 2.73, + "learning_rate": 4.1131903155135824e-07, + "loss": 0.3081, + "step": 16026 + }, + { + "epoch": 2.73, + "learning_rate": 4.107965123038515e-07, + "loss": 0.3042, + "step": 16027 + }, + { + "epoch": 2.73, + "learning_rate": 4.102743181985247e-07, + "loss": 0.3311, + "step": 16028 + }, + { + "epoch": 2.73, + "learning_rate": 4.097524492530858e-07, + "loss": 0.3212, + "step": 16029 + }, + { + "epoch": 2.73, + "learning_rate": 4.09230905485225e-07, + "loss": 0.3188, + "step": 16030 + }, + { + "epoch": 2.73, + "learning_rate": 4.087096869126295e-07, + "loss": 0.3125, + "step": 16031 + }, + { + "epoch": 2.73, + "learning_rate": 4.081887935529727e-07, + "loss": 0.2965, + "step": 16032 + }, + { + "epoch": 2.73, + "learning_rate": 4.07668225423915e-07, + "loss": 0.3047, + "step": 16033 + }, + { + "epoch": 2.73, + "learning_rate": 4.071479825431079e-07, + "loss": 0.3038, + "step": 16034 + }, + { + "epoch": 2.73, + "learning_rate": 4.0662806492819395e-07, + "loss": 0.3058, + "step": 16035 + }, + { + "epoch": 2.73, + "learning_rate": 4.0610847259679786e-07, + "loss": 0.3014, + "step": 16036 + }, + { + "epoch": 2.74, + "learning_rate": 4.0558920556654005e-07, + "loss": 0.3346, + "step": 16037 + }, + { + "epoch": 2.74, + "learning_rate": 4.0507026385502747e-07, + "loss": 0.3057, + "step": 16038 + }, + { + "epoch": 2.74, + "learning_rate": 4.0455164747985385e-07, + "loss": 0.3049, + "step": 16039 + }, + { + "epoch": 2.74, + "learning_rate": 4.040333564586052e-07, + "loss": 0.2842, + "step": 16040 + }, + { + "epoch": 2.74, + "learning_rate": 4.0351539080885517e-07, + "loss": 0.3074, + "step": 16041 + }, + { + "epoch": 2.74, + "learning_rate": 4.0299775054816747e-07, + "loss": 0.2997, + "step": 16042 + }, + { + "epoch": 2.74, + "learning_rate": 4.0248043569409255e-07, + "loss": 0.3095, + "step": 16043 + }, + { + "epoch": 2.74, + "learning_rate": 4.0196344626417193e-07, + "loss": 0.302, + "step": 16044 + }, + { + "epoch": 2.74, + "learning_rate": 4.014467822759349e-07, + "loss": 0.3161, + "step": 16045 + }, + { + "epoch": 2.74, + "learning_rate": 4.009304437468986e-07, + "loss": 0.3167, + "step": 16046 + }, + { + "epoch": 2.74, + "learning_rate": 4.004144306945734e-07, + "loss": 0.3219, + "step": 16047 + }, + { + "epoch": 2.74, + "learning_rate": 3.9989874313645316e-07, + "loss": 0.2974, + "step": 16048 + }, + { + "epoch": 2.74, + "learning_rate": 3.9938338109002494e-07, + "loss": 0.3182, + "step": 16049 + }, + { + "epoch": 2.74, + "learning_rate": 3.9886834457276257e-07, + "loss": 0.2915, + "step": 16050 + }, + { + "epoch": 2.74, + "learning_rate": 3.9835363360213096e-07, + "loss": 0.3419, + "step": 16051 + }, + { + "epoch": 2.74, + "learning_rate": 3.9783924819557953e-07, + "loss": 0.2805, + "step": 16052 + }, + { + "epoch": 2.74, + "learning_rate": 3.9732518837055313e-07, + "loss": 0.3005, + "step": 16053 + }, + { + "epoch": 2.74, + "learning_rate": 3.96811454144479e-07, + "loss": 0.3228, + "step": 16054 + }, + { + "epoch": 2.74, + "learning_rate": 3.9629804553477645e-07, + "loss": 0.301, + "step": 16055 + }, + { + "epoch": 2.74, + "learning_rate": 3.957849625588561e-07, + "loss": 0.3061, + "step": 16056 + }, + { + "epoch": 2.74, + "learning_rate": 3.9527220523411294e-07, + "loss": 0.2998, + "step": 16057 + }, + { + "epoch": 2.74, + "learning_rate": 3.9475977357793407e-07, + "loss": 0.3121, + "step": 16058 + }, + { + "epoch": 2.74, + "learning_rate": 3.942476676076956e-07, + "loss": 0.3425, + "step": 16059 + }, + { + "epoch": 2.74, + "learning_rate": 3.937358873407593e-07, + "loss": 0.2961, + "step": 16060 + }, + { + "epoch": 2.74, + "learning_rate": 3.9322443279447895e-07, + "loss": 0.3189, + "step": 16061 + }, + { + "epoch": 2.74, + "learning_rate": 3.927133039861963e-07, + "loss": 0.3304, + "step": 16062 + }, + { + "epoch": 2.74, + "learning_rate": 3.9220250093324417e-07, + "loss": 0.3295, + "step": 16063 + }, + { + "epoch": 2.74, + "learning_rate": 3.916920236529398e-07, + "loss": 0.2981, + "step": 16064 + }, + { + "epoch": 2.74, + "learning_rate": 3.9118187216259375e-07, + "loss": 0.3135, + "step": 16065 + }, + { + "epoch": 2.74, + "learning_rate": 3.906720464795022e-07, + "loss": 0.3018, + "step": 16066 + }, + { + "epoch": 2.74, + "learning_rate": 3.9016254662095574e-07, + "loss": 0.3032, + "step": 16067 + }, + { + "epoch": 2.74, + "learning_rate": 3.8965337260422397e-07, + "loss": 0.3241, + "step": 16068 + }, + { + "epoch": 2.74, + "learning_rate": 3.891445244465775e-07, + "loss": 0.3185, + "step": 16069 + }, + { + "epoch": 2.74, + "learning_rate": 3.886360021652658e-07, + "loss": 0.3296, + "step": 16070 + }, + { + "epoch": 2.74, + "learning_rate": 3.881278057775317e-07, + "loss": 0.3167, + "step": 16071 + }, + { + "epoch": 2.74, + "learning_rate": 3.876199353006083e-07, + "loss": 0.2965, + "step": 16072 + }, + { + "epoch": 2.74, + "learning_rate": 3.871123907517149e-07, + "loss": 0.3023, + "step": 16073 + }, + { + "epoch": 2.74, + "learning_rate": 3.866051721480635e-07, + "loss": 0.2936, + "step": 16074 + }, + { + "epoch": 2.74, + "learning_rate": 3.86098279506848e-07, + "loss": 0.3282, + "step": 16075 + }, + { + "epoch": 2.74, + "learning_rate": 3.855917128452591e-07, + "loss": 0.2954, + "step": 16076 + }, + { + "epoch": 2.74, + "learning_rate": 3.8508547218047085e-07, + "loss": 0.3143, + "step": 16077 + }, + { + "epoch": 2.74, + "learning_rate": 3.845795575296496e-07, + "loss": 0.314, + "step": 16078 + }, + { + "epoch": 2.74, + "learning_rate": 3.840739689099493e-07, + "loss": 0.3091, + "step": 16079 + }, + { + "epoch": 2.74, + "learning_rate": 3.8356870633851296e-07, + "loss": 0.3299, + "step": 16080 + }, + { + "epoch": 2.74, + "learning_rate": 3.830637698324735e-07, + "loss": 0.3057, + "step": 16081 + }, + { + "epoch": 2.74, + "learning_rate": 3.825591594089506e-07, + "loss": 0.3051, + "step": 16082 + }, + { + "epoch": 2.74, + "learning_rate": 3.820548750850539e-07, + "loss": 0.3264, + "step": 16083 + }, + { + "epoch": 2.74, + "learning_rate": 3.81550916877883e-07, + "loss": 0.2995, + "step": 16084 + }, + { + "epoch": 2.74, + "learning_rate": 3.810472848045266e-07, + "loss": 0.3157, + "step": 16085 + }, + { + "epoch": 2.74, + "learning_rate": 3.8054397888205863e-07, + "loss": 0.3169, + "step": 16086 + }, + { + "epoch": 2.74, + "learning_rate": 3.800409991275478e-07, + "loss": 0.3108, + "step": 16087 + }, + { + "epoch": 2.74, + "learning_rate": 3.795383455580459e-07, + "loss": 0.3125, + "step": 16088 + }, + { + "epoch": 2.74, + "learning_rate": 3.7903601819059945e-07, + "loss": 0.3154, + "step": 16089 + }, + { + "epoch": 2.74, + "learning_rate": 3.7853401704223804e-07, + "loss": 0.3078, + "step": 16090 + }, + { + "epoch": 2.74, + "learning_rate": 3.7803234212998364e-07, + "loss": 0.3109, + "step": 16091 + }, + { + "epoch": 2.74, + "learning_rate": 3.775309934708482e-07, + "loss": 0.3071, + "step": 16092 + }, + { + "epoch": 2.74, + "learning_rate": 3.7702997108183035e-07, + "loss": 0.3329, + "step": 16093 + }, + { + "epoch": 2.74, + "learning_rate": 3.7652927497991766e-07, + "loss": 0.2995, + "step": 16094 + }, + { + "epoch": 2.74, + "learning_rate": 3.760289051820876e-07, + "loss": 0.3187, + "step": 16095 + }, + { + "epoch": 2.75, + "learning_rate": 3.7552886170530657e-07, + "loss": 0.3008, + "step": 16096 + }, + { + "epoch": 2.75, + "learning_rate": 3.7502914456652995e-07, + "loss": 0.3031, + "step": 16097 + }, + { + "epoch": 2.75, + "learning_rate": 3.7452975378270086e-07, + "loss": 0.3274, + "step": 16098 + }, + { + "epoch": 2.75, + "learning_rate": 3.740306893707513e-07, + "loss": 0.2986, + "step": 16099 + }, + { + "epoch": 2.75, + "learning_rate": 3.7353195134760657e-07, + "loss": 0.3108, + "step": 16100 + }, + { + "epoch": 2.75, + "learning_rate": 3.730335397301732e-07, + "loss": 0.3062, + "step": 16101 + }, + { + "epoch": 2.75, + "learning_rate": 3.7253545453535325e-07, + "loss": 0.3263, + "step": 16102 + }, + { + "epoch": 2.75, + "learning_rate": 3.7203769578003533e-07, + "loss": 0.3041, + "step": 16103 + }, + { + "epoch": 2.75, + "learning_rate": 3.715402634810972e-07, + "loss": 0.3057, + "step": 16104 + }, + { + "epoch": 2.75, + "learning_rate": 3.710431576554041e-07, + "loss": 0.3248, + "step": 16105 + }, + { + "epoch": 2.75, + "learning_rate": 3.7054637831981157e-07, + "loss": 0.3175, + "step": 16106 + }, + { + "epoch": 2.75, + "learning_rate": 3.700499254911649e-07, + "loss": 0.3158, + "step": 16107 + }, + { + "epoch": 2.75, + "learning_rate": 3.695537991862963e-07, + "loss": 0.2925, + "step": 16108 + }, + { + "epoch": 2.75, + "learning_rate": 3.6905799942203003e-07, + "loss": 0.3226, + "step": 16109 + }, + { + "epoch": 2.75, + "learning_rate": 3.6856252621517487e-07, + "loss": 0.3124, + "step": 16110 + }, + { + "epoch": 2.75, + "learning_rate": 3.680673795825318e-07, + "loss": 0.3098, + "step": 16111 + }, + { + "epoch": 2.75, + "learning_rate": 3.6757255954089075e-07, + "loss": 0.3228, + "step": 16112 + }, + { + "epoch": 2.75, + "learning_rate": 3.6707806610702833e-07, + "loss": 0.3095, + "step": 16113 + }, + { + "epoch": 2.75, + "learning_rate": 3.6658389929771223e-07, + "loss": 0.3152, + "step": 16114 + }, + { + "epoch": 2.75, + "learning_rate": 3.6609005912969675e-07, + "loss": 0.3175, + "step": 16115 + }, + { + "epoch": 2.75, + "learning_rate": 3.655965456197286e-07, + "loss": 0.355, + "step": 16116 + }, + { + "epoch": 2.75, + "learning_rate": 3.651033587845398e-07, + "loss": 0.3222, + "step": 16117 + }, + { + "epoch": 2.75, + "learning_rate": 3.6461049864085384e-07, + "loss": 0.3421, + "step": 16118 + }, + { + "epoch": 2.75, + "learning_rate": 3.6411796520538165e-07, + "loss": 0.3019, + "step": 16119 + }, + { + "epoch": 2.75, + "learning_rate": 3.6362575849482553e-07, + "loss": 0.3288, + "step": 16120 + }, + { + "epoch": 2.75, + "learning_rate": 3.631338785258709e-07, + "loss": 0.3205, + "step": 16121 + }, + { + "epoch": 2.75, + "learning_rate": 3.6264232531519895e-07, + "loss": 0.317, + "step": 16122 + }, + { + "epoch": 2.75, + "learning_rate": 3.621510988794752e-07, + "loss": 0.3177, + "step": 16123 + }, + { + "epoch": 2.75, + "learning_rate": 3.616601992353574e-07, + "loss": 0.3193, + "step": 16124 + }, + { + "epoch": 2.75, + "learning_rate": 3.61169626399489e-07, + "loss": 0.3235, + "step": 16125 + }, + { + "epoch": 2.75, + "learning_rate": 3.606793803885056e-07, + "loss": 0.3065, + "step": 16126 + }, + { + "epoch": 2.75, + "learning_rate": 3.6018946121902933e-07, + "loss": 0.3257, + "step": 16127 + }, + { + "epoch": 2.75, + "learning_rate": 3.5969986890767137e-07, + "loss": 0.2998, + "step": 16128 + }, + { + "epoch": 2.75, + "learning_rate": 3.5921060347103186e-07, + "loss": 0.3138, + "step": 16129 + }, + { + "epoch": 2.75, + "learning_rate": 3.5872166492570195e-07, + "loss": 0.3053, + "step": 16130 + }, + { + "epoch": 2.75, + "learning_rate": 3.582330532882583e-07, + "loss": 0.3101, + "step": 16131 + }, + { + "epoch": 2.75, + "learning_rate": 3.5774476857527107e-07, + "loss": 0.3149, + "step": 16132 + }, + { + "epoch": 2.75, + "learning_rate": 3.572568108032937e-07, + "loss": 0.306, + "step": 16133 + }, + { + "epoch": 2.75, + "learning_rate": 3.567691799888728e-07, + "loss": 0.3043, + "step": 16134 + }, + { + "epoch": 2.75, + "learning_rate": 3.5628187614854315e-07, + "loss": 0.3243, + "step": 16135 + }, + { + "epoch": 2.75, + "learning_rate": 3.5579489929882695e-07, + "loss": 0.3229, + "step": 16136 + }, + { + "epoch": 2.75, + "learning_rate": 3.553082494562354e-07, + "loss": 0.316, + "step": 16137 + }, + { + "epoch": 2.75, + "learning_rate": 3.548219266372699e-07, + "loss": 0.2965, + "step": 16138 + }, + { + "epoch": 2.75, + "learning_rate": 3.5433593085842045e-07, + "loss": 0.3349, + "step": 16139 + }, + { + "epoch": 2.75, + "learning_rate": 3.538502621361661e-07, + "loss": 0.2915, + "step": 16140 + }, + { + "epoch": 2.75, + "learning_rate": 3.533649204869749e-07, + "loss": 0.3216, + "step": 16141 + }, + { + "epoch": 2.75, + "learning_rate": 3.528799059273036e-07, + "loss": 0.2755, + "step": 16142 + }, + { + "epoch": 2.75, + "learning_rate": 3.523952184735957e-07, + "loss": 0.2997, + "step": 16143 + }, + { + "epoch": 2.75, + "learning_rate": 3.519108581422859e-07, + "loss": 0.2875, + "step": 16144 + }, + { + "epoch": 2.75, + "learning_rate": 3.5142682494979873e-07, + "loss": 0.3292, + "step": 16145 + }, + { + "epoch": 2.75, + "learning_rate": 3.5094311891254674e-07, + "loss": 0.296, + "step": 16146 + }, + { + "epoch": 2.75, + "learning_rate": 3.5045974004693006e-07, + "loss": 0.3187, + "step": 16147 + }, + { + "epoch": 2.75, + "learning_rate": 3.4997668836933783e-07, + "loss": 0.2997, + "step": 16148 + }, + { + "epoch": 2.75, + "learning_rate": 3.494939638961503e-07, + "loss": 0.3185, + "step": 16149 + }, + { + "epoch": 2.75, + "learning_rate": 3.490115666437355e-07, + "loss": 0.2925, + "step": 16150 + }, + { + "epoch": 2.75, + "learning_rate": 3.4852949662844803e-07, + "loss": 0.2848, + "step": 16151 + }, + { + "epoch": 2.75, + "learning_rate": 3.4804775386663493e-07, + "loss": 0.2884, + "step": 16152 + }, + { + "epoch": 2.75, + "learning_rate": 3.4756633837463085e-07, + "loss": 0.2819, + "step": 16153 + }, + { + "epoch": 2.76, + "learning_rate": 3.470852501687583e-07, + "loss": 0.2937, + "step": 16154 + }, + { + "epoch": 2.76, + "learning_rate": 3.4660448926533087e-07, + "loss": 0.3082, + "step": 16155 + }, + { + "epoch": 2.76, + "learning_rate": 3.4612405568064997e-07, + "loss": 0.3056, + "step": 16156 + }, + { + "epoch": 2.76, + "learning_rate": 3.456439494310049e-07, + "loss": 0.3193, + "step": 16157 + }, + { + "epoch": 2.76, + "learning_rate": 3.451641705326736e-07, + "loss": 0.3281, + "step": 16158 + }, + { + "epoch": 2.76, + "learning_rate": 3.4468471900192536e-07, + "loss": 0.3324, + "step": 16159 + }, + { + "epoch": 2.76, + "learning_rate": 3.442055948550171e-07, + "loss": 0.3093, + "step": 16160 + }, + { + "epoch": 2.76, + "learning_rate": 3.4372679810819374e-07, + "loss": 0.3352, + "step": 16161 + }, + { + "epoch": 2.76, + "learning_rate": 3.4324832877769e-07, + "loss": 0.3063, + "step": 16162 + }, + { + "epoch": 2.76, + "learning_rate": 3.4277018687973173e-07, + "loss": 0.3239, + "step": 16163 + }, + { + "epoch": 2.76, + "learning_rate": 3.4229237243052716e-07, + "loss": 0.2892, + "step": 16164 + }, + { + "epoch": 2.76, + "learning_rate": 3.418148854462822e-07, + "loss": 0.3134, + "step": 16165 + }, + { + "epoch": 2.76, + "learning_rate": 3.413377259431816e-07, + "loss": 0.3081, + "step": 16166 + }, + { + "epoch": 2.76, + "learning_rate": 3.408608939374092e-07, + "loss": 0.2812, + "step": 16167 + }, + { + "epoch": 2.76, + "learning_rate": 3.4038438944512976e-07, + "loss": 0.2883, + "step": 16168 + }, + { + "epoch": 2.76, + "learning_rate": 3.3990821248250263e-07, + "loss": 0.3003, + "step": 16169 + }, + { + "epoch": 2.76, + "learning_rate": 3.3943236306567265e-07, + "loss": 0.3425, + "step": 16170 + }, + { + "epoch": 2.76, + "learning_rate": 3.3895684121077466e-07, + "loss": 0.3209, + "step": 16171 + }, + { + "epoch": 2.76, + "learning_rate": 3.384816469339325e-07, + "loss": 0.2902, + "step": 16172 + }, + { + "epoch": 2.76, + "learning_rate": 3.3800678025125767e-07, + "loss": 0.3141, + "step": 16173 + }, + { + "epoch": 2.76, + "learning_rate": 3.3753224117885176e-07, + "loss": 0.327, + "step": 16174 + }, + { + "epoch": 2.76, + "learning_rate": 3.3705802973280523e-07, + "loss": 0.3239, + "step": 16175 + }, + { + "epoch": 2.76, + "learning_rate": 3.3658414592919743e-07, + "loss": 0.3217, + "step": 16176 + }, + { + "epoch": 2.76, + "learning_rate": 3.361105897840955e-07, + "loss": 0.3115, + "step": 16177 + }, + { + "epoch": 2.76, + "learning_rate": 3.3563736131355774e-07, + "loss": 0.3037, + "step": 16178 + }, + { + "epoch": 2.76, + "learning_rate": 3.3516446053363015e-07, + "loss": 0.3059, + "step": 16179 + }, + { + "epoch": 2.76, + "learning_rate": 3.3469188746034663e-07, + "loss": 0.3248, + "step": 16180 + }, + { + "epoch": 2.76, + "learning_rate": 3.34219642109731e-07, + "loss": 0.2984, + "step": 16181 + }, + { + "epoch": 2.76, + "learning_rate": 3.337477244977938e-07, + "loss": 0.3235, + "step": 16182 + }, + { + "epoch": 2.76, + "learning_rate": 3.332761346405389e-07, + "loss": 0.3195, + "step": 16183 + }, + { + "epoch": 2.76, + "learning_rate": 3.3280487255395564e-07, + "loss": 0.2853, + "step": 16184 + }, + { + "epoch": 2.76, + "learning_rate": 3.323339382540236e-07, + "loss": 0.2777, + "step": 16185 + }, + { + "epoch": 2.76, + "learning_rate": 3.3186333175670993e-07, + "loss": 0.2871, + "step": 16186 + }, + { + "epoch": 2.76, + "learning_rate": 3.31393053077973e-07, + "loss": 0.2944, + "step": 16187 + }, + { + "epoch": 2.76, + "learning_rate": 3.3092310223375667e-07, + "loss": 0.2907, + "step": 16188 + }, + { + "epoch": 2.76, + "learning_rate": 3.3045347923999606e-07, + "loss": 0.3358, + "step": 16189 + }, + { + "epoch": 2.76, + "learning_rate": 3.2998418411261613e-07, + "loss": 0.3084, + "step": 16190 + }, + { + "epoch": 2.76, + "learning_rate": 3.295152168675286e-07, + "loss": 0.2955, + "step": 16191 + }, + { + "epoch": 2.76, + "learning_rate": 3.2904657752063417e-07, + "loss": 0.322, + "step": 16192 + }, + { + "epoch": 2.76, + "learning_rate": 3.285782660878234e-07, + "loss": 0.3046, + "step": 16193 + }, + { + "epoch": 2.76, + "learning_rate": 3.281102825849769e-07, + "loss": 0.3107, + "step": 16194 + }, + { + "epoch": 2.76, + "learning_rate": 3.276426270279598e-07, + "loss": 0.31, + "step": 16195 + }, + { + "epoch": 2.76, + "learning_rate": 3.271752994326305e-07, + "loss": 0.321, + "step": 16196 + }, + { + "epoch": 2.76, + "learning_rate": 3.2670829981483523e-07, + "loss": 0.3028, + "step": 16197 + }, + { + "epoch": 2.76, + "learning_rate": 3.2624162819040794e-07, + "loss": 0.3246, + "step": 16198 + }, + { + "epoch": 2.76, + "learning_rate": 3.257752845751716e-07, + "loss": 0.3129, + "step": 16199 + }, + { + "epoch": 2.76, + "learning_rate": 3.253092689849391e-07, + "loss": 0.3542, + "step": 16200 + }, + { + "epoch": 2.76, + "learning_rate": 3.248435814355122e-07, + "loss": 0.2997, + "step": 16201 + }, + { + "epoch": 2.76, + "learning_rate": 3.2437822194268055e-07, + "loss": 0.3207, + "step": 16202 + }, + { + "epoch": 2.76, + "learning_rate": 3.239131905222237e-07, + "loss": 0.3014, + "step": 16203 + }, + { + "epoch": 2.76, + "learning_rate": 3.23448487189908e-07, + "loss": 0.2763, + "step": 16204 + }, + { + "epoch": 2.76, + "learning_rate": 3.229841119614907e-07, + "loss": 0.3219, + "step": 16205 + }, + { + "epoch": 2.76, + "learning_rate": 3.2252006485271826e-07, + "loss": 0.282, + "step": 16206 + }, + { + "epoch": 2.76, + "learning_rate": 3.2205634587932464e-07, + "loss": 0.3173, + "step": 16207 + }, + { + "epoch": 2.76, + "learning_rate": 3.2159295505703405e-07, + "loss": 0.3225, + "step": 16208 + }, + { + "epoch": 2.76, + "learning_rate": 3.2112989240155714e-07, + "loss": 0.3279, + "step": 16209 + }, + { + "epoch": 2.76, + "learning_rate": 3.20667157928598e-07, + "loss": 0.3269, + "step": 16210 + }, + { + "epoch": 2.76, + "learning_rate": 3.2020475165384204e-07, + "loss": 0.3129, + "step": 16211 + }, + { + "epoch": 2.76, + "learning_rate": 3.197426735929732e-07, + "loss": 0.2953, + "step": 16212 + }, + { + "epoch": 2.77, + "learning_rate": 3.192809237616545e-07, + "loss": 0.3431, + "step": 16213 + }, + { + "epoch": 2.77, + "learning_rate": 3.188195021755458e-07, + "loss": 0.2907, + "step": 16214 + }, + { + "epoch": 2.77, + "learning_rate": 3.1835840885029e-07, + "loss": 0.3053, + "step": 16215 + }, + { + "epoch": 2.77, + "learning_rate": 3.1789764380152354e-07, + "loss": 0.3205, + "step": 16216 + }, + { + "epoch": 2.77, + "learning_rate": 3.1743720704487056e-07, + "loss": 0.3153, + "step": 16217 + }, + { + "epoch": 2.77, + "learning_rate": 3.1697709859593975e-07, + "loss": 0.3024, + "step": 16218 + }, + { + "epoch": 2.77, + "learning_rate": 3.1651731847033515e-07, + "loss": 0.3187, + "step": 16219 + }, + { + "epoch": 2.77, + "learning_rate": 3.1605786668364445e-07, + "loss": 0.3186, + "step": 16220 + }, + { + "epoch": 2.77, + "learning_rate": 3.155987432514462e-07, + "loss": 0.3088, + "step": 16221 + }, + { + "epoch": 2.77, + "learning_rate": 3.1513994818931025e-07, + "loss": 0.3188, + "step": 16222 + }, + { + "epoch": 2.77, + "learning_rate": 3.1468148151279076e-07, + "loss": 0.3094, + "step": 16223 + }, + { + "epoch": 2.77, + "learning_rate": 3.142233432374353e-07, + "loss": 0.3237, + "step": 16224 + }, + { + "epoch": 2.77, + "learning_rate": 3.13765533378777e-07, + "loss": 0.303, + "step": 16225 + }, + { + "epoch": 2.77, + "learning_rate": 3.1330805195233684e-07, + "loss": 0.3044, + "step": 16226 + }, + { + "epoch": 2.77, + "learning_rate": 3.12850898973629e-07, + "loss": 0.2919, + "step": 16227 + }, + { + "epoch": 2.77, + "learning_rate": 3.123940744581544e-07, + "loss": 0.3053, + "step": 16228 + }, + { + "epoch": 2.77, + "learning_rate": 3.1193757842140183e-07, + "loss": 0.3175, + "step": 16229 + }, + { + "epoch": 2.77, + "learning_rate": 3.1148141087884884e-07, + "loss": 0.3328, + "step": 16230 + }, + { + "epoch": 2.77, + "learning_rate": 3.1102557184596317e-07, + "loss": 0.3146, + "step": 16231 + }, + { + "epoch": 2.77, + "learning_rate": 3.1057006133820344e-07, + "loss": 0.3071, + "step": 16232 + }, + { + "epoch": 2.77, + "learning_rate": 3.101148793710118e-07, + "loss": 0.3305, + "step": 16233 + }, + { + "epoch": 2.77, + "learning_rate": 3.096600259598226e-07, + "loss": 0.3089, + "step": 16234 + }, + { + "epoch": 2.77, + "learning_rate": 3.0920550112005897e-07, + "loss": 0.3212, + "step": 16235 + }, + { + "epoch": 2.77, + "learning_rate": 3.087513048671331e-07, + "loss": 0.2835, + "step": 16236 + }, + { + "epoch": 2.77, + "learning_rate": 3.082974372164449e-07, + "loss": 0.3247, + "step": 16237 + }, + { + "epoch": 2.77, + "learning_rate": 3.078438981833842e-07, + "loss": 0.3109, + "step": 16238 + }, + { + "epoch": 2.77, + "learning_rate": 3.073906877833277e-07, + "loss": 0.3288, + "step": 16239 + }, + { + "epoch": 2.77, + "learning_rate": 3.0693780603164637e-07, + "loss": 0.3096, + "step": 16240 + }, + { + "epoch": 2.77, + "learning_rate": 3.0648525294369123e-07, + "loss": 0.3237, + "step": 16241 + }, + { + "epoch": 2.77, + "learning_rate": 3.06033028534809e-07, + "loss": 0.3015, + "step": 16242 + }, + { + "epoch": 2.77, + "learning_rate": 3.055811328203351e-07, + "loss": 0.3154, + "step": 16243 + }, + { + "epoch": 2.77, + "learning_rate": 3.051295658155895e-07, + "loss": 0.3125, + "step": 16244 + }, + { + "epoch": 2.77, + "learning_rate": 3.0467832753588447e-07, + "loss": 0.3079, + "step": 16245 + }, + { + "epoch": 2.77, + "learning_rate": 3.042274179965199e-07, + "loss": 0.306, + "step": 16246 + }, + { + "epoch": 2.77, + "learning_rate": 3.0377683721278585e-07, + "loss": 0.3205, + "step": 16247 + }, + { + "epoch": 2.77, + "learning_rate": 3.0332658519995897e-07, + "loss": 0.3332, + "step": 16248 + }, + { + "epoch": 2.77, + "learning_rate": 3.028766619733059e-07, + "loss": 0.2934, + "step": 16249 + }, + { + "epoch": 2.77, + "learning_rate": 3.0242706754808335e-07, + "loss": 0.2944, + "step": 16250 + }, + { + "epoch": 2.77, + "learning_rate": 3.0197780193953474e-07, + "loss": 0.3074, + "step": 16251 + }, + { + "epoch": 2.77, + "learning_rate": 3.015288651628945e-07, + "loss": 0.3296, + "step": 16252 + }, + { + "epoch": 2.77, + "learning_rate": 3.0108025723338375e-07, + "loss": 0.3112, + "step": 16253 + }, + { + "epoch": 2.77, + "learning_rate": 3.006319781662137e-07, + "loss": 0.2995, + "step": 16254 + }, + { + "epoch": 2.77, + "learning_rate": 3.001840279765855e-07, + "loss": 0.3036, + "step": 16255 + }, + { + "epoch": 2.77, + "learning_rate": 2.9973640667968597e-07, + "loss": 0.3446, + "step": 16256 + }, + { + "epoch": 2.77, + "learning_rate": 2.9928911429069397e-07, + "loss": 0.2933, + "step": 16257 + }, + { + "epoch": 2.77, + "learning_rate": 2.988421508247741e-07, + "loss": 0.3184, + "step": 16258 + }, + { + "epoch": 2.77, + "learning_rate": 2.9839551629708417e-07, + "loss": 0.3174, + "step": 16259 + }, + { + "epoch": 2.77, + "learning_rate": 2.9794921072276663e-07, + "loss": 0.3013, + "step": 16260 + }, + { + "epoch": 2.77, + "learning_rate": 2.975032341169537e-07, + "loss": 0.3061, + "step": 16261 + }, + { + "epoch": 2.77, + "learning_rate": 2.9705758649477e-07, + "loss": 0.3086, + "step": 16262 + }, + { + "epoch": 2.77, + "learning_rate": 2.9661226787132344e-07, + "loss": 0.2943, + "step": 16263 + }, + { + "epoch": 2.77, + "learning_rate": 2.961672782617142e-07, + "loss": 0.3244, + "step": 16264 + }, + { + "epoch": 2.77, + "learning_rate": 2.957226176810302e-07, + "loss": 0.3101, + "step": 16265 + }, + { + "epoch": 2.77, + "learning_rate": 2.952782861443493e-07, + "loss": 0.3393, + "step": 16266 + }, + { + "epoch": 2.77, + "learning_rate": 2.948342836667384e-07, + "loss": 0.3258, + "step": 16267 + }, + { + "epoch": 2.77, + "learning_rate": 2.9439061026324987e-07, + "loss": 0.303, + "step": 16268 + }, + { + "epoch": 2.77, + "learning_rate": 2.939472659489295e-07, + "loss": 0.3497, + "step": 16269 + }, + { + "epoch": 2.77, + "learning_rate": 2.935042507388108e-07, + "loss": 0.3062, + "step": 16270 + }, + { + "epoch": 2.77, + "learning_rate": 2.930615646479118e-07, + "loss": 0.2921, + "step": 16271 + }, + { + "epoch": 2.78, + "learning_rate": 2.9261920769124375e-07, + "loss": 0.2927, + "step": 16272 + }, + { + "epoch": 2.78, + "learning_rate": 2.921771798838069e-07, + "loss": 0.321, + "step": 16273 + }, + { + "epoch": 2.78, + "learning_rate": 2.9173548124058924e-07, + "loss": 0.2874, + "step": 16274 + }, + { + "epoch": 2.78, + "learning_rate": 2.9129411177656663e-07, + "loss": 0.3117, + "step": 16275 + }, + { + "epoch": 2.78, + "learning_rate": 2.908530715067048e-07, + "loss": 0.313, + "step": 16276 + }, + { + "epoch": 2.78, + "learning_rate": 2.9041236044595856e-07, + "loss": 0.3076, + "step": 16277 + }, + { + "epoch": 2.78, + "learning_rate": 2.899719786092703e-07, + "loss": 0.2852, + "step": 16278 + }, + { + "epoch": 2.78, + "learning_rate": 2.8953192601157145e-07, + "loss": 0.2942, + "step": 16279 + }, + { + "epoch": 2.78, + "learning_rate": 2.890922026677856e-07, + "loss": 0.3068, + "step": 16280 + }, + { + "epoch": 2.78, + "learning_rate": 2.8865280859281973e-07, + "loss": 0.3078, + "step": 16281 + }, + { + "epoch": 2.78, + "learning_rate": 2.882137438015742e-07, + "loss": 0.3068, + "step": 16282 + }, + { + "epoch": 2.78, + "learning_rate": 2.877750083089359e-07, + "loss": 0.3319, + "step": 16283 + }, + { + "epoch": 2.78, + "learning_rate": 2.873366021297808e-07, + "loss": 0.2935, + "step": 16284 + }, + { + "epoch": 2.78, + "learning_rate": 2.8689852527897465e-07, + "loss": 0.2943, + "step": 16285 + }, + { + "epoch": 2.78, + "learning_rate": 2.8646077777137014e-07, + "loss": 0.3394, + "step": 16286 + }, + { + "epoch": 2.78, + "learning_rate": 2.8602335962181205e-07, + "loss": 0.3338, + "step": 16287 + }, + { + "epoch": 2.78, + "learning_rate": 2.855862708451296e-07, + "loss": 0.3207, + "step": 16288 + }, + { + "epoch": 2.78, + "learning_rate": 2.8514951145614424e-07, + "loss": 0.3195, + "step": 16289 + }, + { + "epoch": 2.78, + "learning_rate": 2.8471308146966634e-07, + "loss": 0.3172, + "step": 16290 + }, + { + "epoch": 2.78, + "learning_rate": 2.8427698090049307e-07, + "loss": 0.2885, + "step": 16291 + }, + { + "epoch": 2.78, + "learning_rate": 2.838412097634102e-07, + "loss": 0.3068, + "step": 16292 + }, + { + "epoch": 2.78, + "learning_rate": 2.834057680731961e-07, + "loss": 0.2985, + "step": 16293 + }, + { + "epoch": 2.78, + "learning_rate": 2.829706558446121e-07, + "loss": 0.3072, + "step": 16294 + }, + { + "epoch": 2.78, + "learning_rate": 2.8253587309241325e-07, + "loss": 0.3148, + "step": 16295 + }, + { + "epoch": 2.78, + "learning_rate": 2.821014198313421e-07, + "loss": 0.3381, + "step": 16296 + }, + { + "epoch": 2.78, + "learning_rate": 2.816672960761291e-07, + "loss": 0.3289, + "step": 16297 + }, + { + "epoch": 2.78, + "learning_rate": 2.8123350184149465e-07, + "loss": 0.3149, + "step": 16298 + }, + { + "epoch": 2.78, + "learning_rate": 2.808000371421482e-07, + "loss": 0.312, + "step": 16299 + }, + { + "epoch": 2.78, + "learning_rate": 2.803669019927857e-07, + "loss": 0.3258, + "step": 16300 + }, + { + "epoch": 2.78, + "learning_rate": 2.799340964080943e-07, + "loss": 0.301, + "step": 16301 + }, + { + "epoch": 2.78, + "learning_rate": 2.7950162040274784e-07, + "loss": 0.3384, + "step": 16302 + }, + { + "epoch": 2.78, + "learning_rate": 2.790694739914124e-07, + "loss": 0.3047, + "step": 16303 + }, + { + "epoch": 2.78, + "learning_rate": 2.7863765718873946e-07, + "loss": 0.3154, + "step": 16304 + }, + { + "epoch": 2.78, + "learning_rate": 2.7820617000937076e-07, + "loss": 0.3103, + "step": 16305 + }, + { + "epoch": 2.78, + "learning_rate": 2.77775012467939e-07, + "loss": 0.3233, + "step": 16306 + }, + { + "epoch": 2.78, + "learning_rate": 2.773441845790603e-07, + "loss": 0.2931, + "step": 16307 + }, + { + "epoch": 2.78, + "learning_rate": 2.769136863573452e-07, + "loss": 0.325, + "step": 16308 + }, + { + "epoch": 2.78, + "learning_rate": 2.764835178173886e-07, + "loss": 0.2973, + "step": 16309 + }, + { + "epoch": 2.78, + "learning_rate": 2.7605367897377665e-07, + "loss": 0.311, + "step": 16310 + }, + { + "epoch": 2.78, + "learning_rate": 2.756241698410844e-07, + "loss": 0.3239, + "step": 16311 + }, + { + "epoch": 2.78, + "learning_rate": 2.7519499043387575e-07, + "loss": 0.3279, + "step": 16312 + }, + { + "epoch": 2.78, + "learning_rate": 2.747661407667035e-07, + "loss": 0.3146, + "step": 16313 + }, + { + "epoch": 2.78, + "learning_rate": 2.743376208541071e-07, + "loss": 0.3435, + "step": 16314 + }, + { + "epoch": 2.78, + "learning_rate": 2.739094307106183e-07, + "loss": 0.3329, + "step": 16315 + }, + { + "epoch": 2.78, + "learning_rate": 2.734815703507532e-07, + "loss": 0.3061, + "step": 16316 + }, + { + "epoch": 2.78, + "learning_rate": 2.7305403978902026e-07, + "loss": 0.3075, + "step": 16317 + }, + { + "epoch": 2.78, + "learning_rate": 2.726268390399167e-07, + "loss": 0.3073, + "step": 16318 + }, + { + "epoch": 2.78, + "learning_rate": 2.721999681179277e-07, + "loss": 0.3132, + "step": 16319 + }, + { + "epoch": 2.78, + "learning_rate": 2.717734270375272e-07, + "loss": 0.322, + "step": 16320 + }, + { + "epoch": 2.78, + "learning_rate": 2.713472158131758e-07, + "loss": 0.2948, + "step": 16321 + }, + { + "epoch": 2.78, + "learning_rate": 2.7092133445932867e-07, + "loss": 0.302, + "step": 16322 + }, + { + "epoch": 2.78, + "learning_rate": 2.7049578299042424e-07, + "loss": 0.3182, + "step": 16323 + }, + { + "epoch": 2.78, + "learning_rate": 2.7007056142089205e-07, + "loss": 0.2932, + "step": 16324 + }, + { + "epoch": 2.78, + "learning_rate": 2.6964566976514836e-07, + "loss": 0.3239, + "step": 16325 + }, + { + "epoch": 2.78, + "learning_rate": 2.692211080376017e-07, + "loss": 0.3205, + "step": 16326 + }, + { + "epoch": 2.78, + "learning_rate": 2.687968762526483e-07, + "loss": 0.3129, + "step": 16327 + }, + { + "epoch": 2.78, + "learning_rate": 2.6837297442467215e-07, + "loss": 0.3094, + "step": 16328 + }, + { + "epoch": 2.78, + "learning_rate": 2.6794940256804514e-07, + "loss": 0.3257, + "step": 16329 + }, + { + "epoch": 2.79, + "learning_rate": 2.675261606971324e-07, + "loss": 0.3021, + "step": 16330 + }, + { + "epoch": 2.79, + "learning_rate": 2.671032488262826e-07, + "loss": 0.3132, + "step": 16331 + }, + { + "epoch": 2.79, + "learning_rate": 2.666806669698352e-07, + "loss": 0.3102, + "step": 16332 + }, + { + "epoch": 2.79, + "learning_rate": 2.662584151421188e-07, + "loss": 0.3059, + "step": 16333 + }, + { + "epoch": 2.79, + "learning_rate": 2.6583649335745197e-07, + "loss": 0.321, + "step": 16334 + }, + { + "epoch": 2.79, + "learning_rate": 2.6541490163013993e-07, + "loss": 0.3301, + "step": 16335 + }, + { + "epoch": 2.79, + "learning_rate": 2.649936399744779e-07, + "loss": 0.3216, + "step": 16336 + }, + { + "epoch": 2.79, + "learning_rate": 2.645727084047511e-07, + "loss": 0.3048, + "step": 16337 + }, + { + "epoch": 2.79, + "learning_rate": 2.641521069352293e-07, + "loss": 0.3171, + "step": 16338 + }, + { + "epoch": 2.79, + "learning_rate": 2.6373183558017546e-07, + "loss": 0.2973, + "step": 16339 + }, + { + "epoch": 2.79, + "learning_rate": 2.633118943538404e-07, + "loss": 0.3262, + "step": 16340 + }, + { + "epoch": 2.79, + "learning_rate": 2.6289228327046166e-07, + "loss": 0.3153, + "step": 16341 + }, + { + "epoch": 2.79, + "learning_rate": 2.624730023442668e-07, + "loss": 0.3144, + "step": 16342 + }, + { + "epoch": 2.79, + "learning_rate": 2.620540515894743e-07, + "loss": 0.2965, + "step": 16343 + }, + { + "epoch": 2.79, + "learning_rate": 2.6163543102028734e-07, + "loss": 0.3041, + "step": 16344 + }, + { + "epoch": 2.79, + "learning_rate": 2.6121714065090343e-07, + "loss": 0.2985, + "step": 16345 + }, + { + "epoch": 2.79, + "learning_rate": 2.6079918049550236e-07, + "loss": 0.3127, + "step": 16346 + }, + { + "epoch": 2.79, + "learning_rate": 2.603815505682572e-07, + "loss": 0.3039, + "step": 16347 + }, + { + "epoch": 2.79, + "learning_rate": 2.599642508833278e-07, + "loss": 0.3213, + "step": 16348 + }, + { + "epoch": 2.79, + "learning_rate": 2.5954728145486384e-07, + "loss": 0.3164, + "step": 16349 + }, + { + "epoch": 2.79, + "learning_rate": 2.5913064229700524e-07, + "loss": 0.3116, + "step": 16350 + }, + { + "epoch": 2.79, + "learning_rate": 2.587143334238773e-07, + "loss": 0.2898, + "step": 16351 + }, + { + "epoch": 2.79, + "learning_rate": 2.582983548495954e-07, + "loss": 0.2777, + "step": 16352 + }, + { + "epoch": 2.79, + "learning_rate": 2.578827065882672e-07, + "loss": 0.3021, + "step": 16353 + }, + { + "epoch": 2.79, + "learning_rate": 2.574673886539825e-07, + "loss": 0.3258, + "step": 16354 + }, + { + "epoch": 2.79, + "learning_rate": 2.570524010608266e-07, + "loss": 0.3078, + "step": 16355 + }, + { + "epoch": 2.79, + "learning_rate": 2.5663774382286733e-07, + "loss": 0.3271, + "step": 16356 + }, + { + "epoch": 2.79, + "learning_rate": 2.5622341695416663e-07, + "loss": 0.3324, + "step": 16357 + }, + { + "epoch": 2.79, + "learning_rate": 2.558094204687722e-07, + "loss": 0.3085, + "step": 16358 + }, + { + "epoch": 2.79, + "learning_rate": 2.5539575438072175e-07, + "loss": 0.3218, + "step": 16359 + }, + { + "epoch": 2.79, + "learning_rate": 2.5498241870404283e-07, + "loss": 0.3073, + "step": 16360 + }, + { + "epoch": 2.79, + "learning_rate": 2.5456941345274877e-07, + "loss": 0.3195, + "step": 16361 + }, + { + "epoch": 2.79, + "learning_rate": 2.5415673864084387e-07, + "loss": 0.3076, + "step": 16362 + }, + { + "epoch": 2.79, + "learning_rate": 2.537443942823203e-07, + "loss": 0.3274, + "step": 16363 + }, + { + "epoch": 2.79, + "learning_rate": 2.5333238039116025e-07, + "loss": 0.3428, + "step": 16364 + }, + { + "epoch": 2.79, + "learning_rate": 2.5292069698133357e-07, + "loss": 0.2752, + "step": 16365 + }, + { + "epoch": 2.79, + "learning_rate": 2.5250934406679917e-07, + "loss": 0.2839, + "step": 16366 + }, + { + "epoch": 2.79, + "learning_rate": 2.520983216615047e-07, + "loss": 0.3166, + "step": 16367 + }, + { + "epoch": 2.79, + "learning_rate": 2.516876297793891e-07, + "loss": 0.3166, + "step": 16368 + }, + { + "epoch": 2.79, + "learning_rate": 2.5127726843437337e-07, + "loss": 0.3263, + "step": 16369 + }, + { + "epoch": 2.79, + "learning_rate": 2.508672376403742e-07, + "loss": 0.3212, + "step": 16370 + }, + { + "epoch": 2.79, + "learning_rate": 2.5045753741129606e-07, + "loss": 0.3058, + "step": 16371 + }, + { + "epoch": 2.79, + "learning_rate": 2.5004816776102783e-07, + "loss": 0.3252, + "step": 16372 + }, + { + "epoch": 2.79, + "learning_rate": 2.496391287034505e-07, + "loss": 0.328, + "step": 16373 + }, + { + "epoch": 2.79, + "learning_rate": 2.4923042025243425e-07, + "loss": 0.3238, + "step": 16374 + }, + { + "epoch": 2.79, + "learning_rate": 2.4882204242183794e-07, + "loss": 0.3132, + "step": 16375 + }, + { + "epoch": 2.79, + "learning_rate": 2.4841399522550714e-07, + "loss": 0.3097, + "step": 16376 + }, + { + "epoch": 2.79, + "learning_rate": 2.4800627867727744e-07, + "loss": 0.3371, + "step": 16377 + }, + { + "epoch": 2.79, + "learning_rate": 2.475988927909745e-07, + "loss": 0.296, + "step": 16378 + }, + { + "epoch": 2.79, + "learning_rate": 2.4719183758041056e-07, + "loss": 0.3094, + "step": 16379 + }, + { + "epoch": 2.79, + "learning_rate": 2.467851130593879e-07, + "loss": 0.3104, + "step": 16380 + }, + { + "epoch": 2.79, + "learning_rate": 2.4637871924169775e-07, + "loss": 0.3075, + "step": 16381 + }, + { + "epoch": 2.79, + "learning_rate": 2.459726561411191e-07, + "loss": 0.3029, + "step": 16382 + }, + { + "epoch": 2.79, + "learning_rate": 2.4556692377142197e-07, + "loss": 0.3146, + "step": 16383 + }, + { + "epoch": 2.79, + "learning_rate": 2.4516152214636213e-07, + "loss": 0.3184, + "step": 16384 + }, + { + "epoch": 2.79, + "learning_rate": 2.4475645127968516e-07, + "loss": 0.3065, + "step": 16385 + }, + { + "epoch": 2.79, + "learning_rate": 2.443517111851279e-07, + "loss": 0.3115, + "step": 16386 + }, + { + "epoch": 2.79, + "learning_rate": 2.4394730187641156e-07, + "loss": 0.308, + "step": 16387 + }, + { + "epoch": 2.79, + "learning_rate": 2.435432233672497e-07, + "loss": 0.3221, + "step": 16388 + }, + { + "epoch": 2.8, + "learning_rate": 2.4313947567134343e-07, + "loss": 0.3104, + "step": 16389 + }, + { + "epoch": 2.8, + "learning_rate": 2.4273605880238304e-07, + "loss": 0.3162, + "step": 16390 + }, + { + "epoch": 2.8, + "learning_rate": 2.4233297277404646e-07, + "loss": 0.3079, + "step": 16391 + }, + { + "epoch": 2.8, + "learning_rate": 2.4193021760000044e-07, + "loss": 0.2969, + "step": 16392 + }, + { + "epoch": 2.8, + "learning_rate": 2.415277932939031e-07, + "loss": 0.3148, + "step": 16393 + }, + { + "epoch": 2.8, + "learning_rate": 2.411256998693978e-07, + "loss": 0.3236, + "step": 16394 + }, + { + "epoch": 2.8, + "learning_rate": 2.4072393734012046e-07, + "loss": 0.2912, + "step": 16395 + }, + { + "epoch": 2.8, + "learning_rate": 2.403225057196912e-07, + "loss": 0.2861, + "step": 16396 + }, + { + "epoch": 2.8, + "learning_rate": 2.3992140502172243e-07, + "loss": 0.3132, + "step": 16397 + }, + { + "epoch": 2.8, + "learning_rate": 2.395206352598167e-07, + "loss": 0.3261, + "step": 16398 + }, + { + "epoch": 2.8, + "learning_rate": 2.3912019644755867e-07, + "loss": 0.3219, + "step": 16399 + }, + { + "epoch": 2.8, + "learning_rate": 2.387200885985286e-07, + "loss": 0.3182, + "step": 16400 + }, + { + "epoch": 2.8, + "learning_rate": 2.383203117262922e-07, + "loss": 0.2991, + "step": 16401 + }, + { + "epoch": 2.8, + "learning_rate": 2.3792086584440655e-07, + "loss": 0.3193, + "step": 16402 + }, + { + "epoch": 2.8, + "learning_rate": 2.3752175096641294e-07, + "loss": 0.3346, + "step": 16403 + }, + { + "epoch": 2.8, + "learning_rate": 2.3712296710584504e-07, + "loss": 0.3164, + "step": 16404 + }, + { + "epoch": 2.8, + "learning_rate": 2.3672451427622645e-07, + "loss": 0.3203, + "step": 16405 + }, + { + "epoch": 2.8, + "learning_rate": 2.3632639249106525e-07, + "loss": 0.2917, + "step": 16406 + }, + { + "epoch": 2.8, + "learning_rate": 2.3592860176386067e-07, + "loss": 0.2723, + "step": 16407 + }, + { + "epoch": 2.8, + "learning_rate": 2.355311421081019e-07, + "loss": 0.3256, + "step": 16408 + }, + { + "epoch": 2.8, + "learning_rate": 2.3513401353726483e-07, + "loss": 0.3099, + "step": 16409 + }, + { + "epoch": 2.8, + "learning_rate": 2.3473721606481537e-07, + "loss": 0.3121, + "step": 16410 + }, + { + "epoch": 2.8, + "learning_rate": 2.3434074970420827e-07, + "loss": 0.3344, + "step": 16411 + }, + { + "epoch": 2.8, + "learning_rate": 2.3394461446888506e-07, + "loss": 0.2922, + "step": 16412 + }, + { + "epoch": 2.8, + "learning_rate": 2.335488103722794e-07, + "loss": 0.2987, + "step": 16413 + }, + { + "epoch": 2.8, + "learning_rate": 2.3315333742780942e-07, + "loss": 0.3173, + "step": 16414 + }, + { + "epoch": 2.8, + "learning_rate": 2.3275819564888668e-07, + "loss": 0.336, + "step": 16415 + }, + { + "epoch": 2.8, + "learning_rate": 2.323633850489082e-07, + "loss": 0.3309, + "step": 16416 + }, + { + "epoch": 2.8, + "learning_rate": 2.3196890564126107e-07, + "loss": 0.3057, + "step": 16417 + }, + { + "epoch": 2.8, + "learning_rate": 2.3157475743932235e-07, + "loss": 0.3229, + "step": 16418 + }, + { + "epoch": 2.8, + "learning_rate": 2.3118094045645468e-07, + "loss": 0.3008, + "step": 16419 + }, + { + "epoch": 2.8, + "learning_rate": 2.3078745470601183e-07, + "loss": 0.2925, + "step": 16420 + }, + { + "epoch": 2.8, + "learning_rate": 2.3039430020133425e-07, + "loss": 0.2973, + "step": 16421 + }, + { + "epoch": 2.8, + "learning_rate": 2.300014769557557e-07, + "loss": 0.3161, + "step": 16422 + }, + { + "epoch": 2.8, + "learning_rate": 2.296089849825922e-07, + "loss": 0.2928, + "step": 16423 + }, + { + "epoch": 2.8, + "learning_rate": 2.2921682429515536e-07, + "loss": 0.3089, + "step": 16424 + }, + { + "epoch": 2.8, + "learning_rate": 2.2882499490674003e-07, + "loss": 0.309, + "step": 16425 + }, + { + "epoch": 2.8, + "learning_rate": 2.2843349683063343e-07, + "loss": 0.3106, + "step": 16426 + }, + { + "epoch": 2.8, + "learning_rate": 2.2804233008010822e-07, + "loss": 0.3167, + "step": 16427 + }, + { + "epoch": 2.8, + "learning_rate": 2.2765149466843051e-07, + "loss": 0.3352, + "step": 16428 + }, + { + "epoch": 2.8, + "learning_rate": 2.2726099060885075e-07, + "loss": 0.2804, + "step": 16429 + }, + { + "epoch": 2.8, + "learning_rate": 2.268708179146084e-07, + "loss": 0.3323, + "step": 16430 + }, + { + "epoch": 2.8, + "learning_rate": 2.264809765989362e-07, + "loss": 0.3042, + "step": 16431 + }, + { + "epoch": 2.8, + "learning_rate": 2.260914666750491e-07, + "loss": 0.3144, + "step": 16432 + }, + { + "epoch": 2.8, + "learning_rate": 2.257022881561577e-07, + "loss": 0.3121, + "step": 16433 + }, + { + "epoch": 2.8, + "learning_rate": 2.2531344105545582e-07, + "loss": 0.3162, + "step": 16434 + }, + { + "epoch": 2.8, + "learning_rate": 2.2492492538612852e-07, + "loss": 0.3353, + "step": 16435 + }, + { + "epoch": 2.8, + "learning_rate": 2.2453674116135084e-07, + "loss": 0.3094, + "step": 16436 + }, + { + "epoch": 2.8, + "learning_rate": 2.2414888839428107e-07, + "loss": 0.3085, + "step": 16437 + }, + { + "epoch": 2.8, + "learning_rate": 2.237613670980743e-07, + "loss": 0.3272, + "step": 16438 + }, + { + "epoch": 2.8, + "learning_rate": 2.2337417728586664e-07, + "loss": 0.3174, + "step": 16439 + }, + { + "epoch": 2.8, + "learning_rate": 2.2298731897078983e-07, + "loss": 0.3205, + "step": 16440 + }, + { + "epoch": 2.8, + "learning_rate": 2.226007921659601e-07, + "loss": 0.3096, + "step": 16441 + }, + { + "epoch": 2.8, + "learning_rate": 2.2221459688448244e-07, + "loss": 0.3004, + "step": 16442 + }, + { + "epoch": 2.8, + "learning_rate": 2.2182873313945307e-07, + "loss": 0.3344, + "step": 16443 + }, + { + "epoch": 2.8, + "learning_rate": 2.2144320094395378e-07, + "loss": 0.3089, + "step": 16444 + }, + { + "epoch": 2.8, + "learning_rate": 2.2105800031105738e-07, + "loss": 0.2923, + "step": 16445 + }, + { + "epoch": 2.8, + "learning_rate": 2.2067313125382682e-07, + "loss": 0.3272, + "step": 16446 + }, + { + "epoch": 2.8, + "learning_rate": 2.2028859378530942e-07, + "loss": 0.3143, + "step": 16447 + }, + { + "epoch": 2.81, + "learning_rate": 2.1990438791854475e-07, + "loss": 0.3169, + "step": 16448 + }, + { + "epoch": 2.81, + "learning_rate": 2.1952051366656125e-07, + "loss": 0.3006, + "step": 16449 + }, + { + "epoch": 2.81, + "learning_rate": 2.1913697104237297e-07, + "loss": 0.3225, + "step": 16450 + }, + { + "epoch": 2.81, + "learning_rate": 2.187537600589862e-07, + "loss": 0.3036, + "step": 16451 + }, + { + "epoch": 2.81, + "learning_rate": 2.1837088072939384e-07, + "loss": 0.3136, + "step": 16452 + }, + { + "epoch": 2.81, + "learning_rate": 2.1798833306657774e-07, + "loss": 0.3118, + "step": 16453 + }, + { + "epoch": 2.81, + "learning_rate": 2.1760611708350977e-07, + "loss": 0.3189, + "step": 16454 + }, + { + "epoch": 2.81, + "learning_rate": 2.1722423279314954e-07, + "loss": 0.3037, + "step": 16455 + }, + { + "epoch": 2.81, + "learning_rate": 2.1684268020844558e-07, + "loss": 0.2997, + "step": 16456 + }, + { + "epoch": 2.81, + "learning_rate": 2.1646145934233532e-07, + "loss": 0.3088, + "step": 16457 + }, + { + "epoch": 2.81, + "learning_rate": 2.160805702077451e-07, + "loss": 0.3065, + "step": 16458 + }, + { + "epoch": 2.81, + "learning_rate": 2.1570001281758902e-07, + "loss": 0.3139, + "step": 16459 + }, + { + "epoch": 2.81, + "learning_rate": 2.1531978718477232e-07, + "loss": 0.3162, + "step": 16460 + }, + { + "epoch": 2.81, + "learning_rate": 2.1493989332218468e-07, + "loss": 0.286, + "step": 16461 + }, + { + "epoch": 2.81, + "learning_rate": 2.1456033124270913e-07, + "loss": 0.2844, + "step": 16462 + }, + { + "epoch": 2.81, + "learning_rate": 2.1418110095921542e-07, + "loss": 0.3091, + "step": 16463 + }, + { + "epoch": 2.81, + "learning_rate": 2.1380220248456207e-07, + "loss": 0.3112, + "step": 16464 + }, + { + "epoch": 2.81, + "learning_rate": 2.1342363583159664e-07, + "loss": 0.3034, + "step": 16465 + }, + { + "epoch": 2.81, + "learning_rate": 2.130454010131544e-07, + "loss": 0.3171, + "step": 16466 + }, + { + "epoch": 2.81, + "learning_rate": 2.1266749804206176e-07, + "loss": 0.3071, + "step": 16467 + }, + { + "epoch": 2.81, + "learning_rate": 2.122899269311296e-07, + "loss": 0.3178, + "step": 16468 + }, + { + "epoch": 2.81, + "learning_rate": 2.1191268769316208e-07, + "loss": 0.3087, + "step": 16469 + }, + { + "epoch": 2.81, + "learning_rate": 2.1153578034095013e-07, + "loss": 0.3027, + "step": 16470 + }, + { + "epoch": 2.81, + "learning_rate": 2.111592048872735e-07, + "loss": 0.2987, + "step": 16471 + }, + { + "epoch": 2.81, + "learning_rate": 2.1078296134490084e-07, + "loss": 0.3057, + "step": 16472 + }, + { + "epoch": 2.81, + "learning_rate": 2.1040704972658976e-07, + "loss": 0.3041, + "step": 16473 + }, + { + "epoch": 2.81, + "learning_rate": 2.1003147004508563e-07, + "loss": 0.3358, + "step": 16474 + }, + { + "epoch": 2.81, + "learning_rate": 2.0965622231312377e-07, + "loss": 0.3276, + "step": 16475 + }, + { + "epoch": 2.81, + "learning_rate": 2.0928130654342739e-07, + "loss": 0.2988, + "step": 16476 + }, + { + "epoch": 2.81, + "learning_rate": 2.089067227487085e-07, + "loss": 0.3124, + "step": 16477 + }, + { + "epoch": 2.81, + "learning_rate": 2.0853247094166917e-07, + "loss": 0.3145, + "step": 16478 + }, + { + "epoch": 2.81, + "learning_rate": 2.0815855113499928e-07, + "loss": 0.3215, + "step": 16479 + }, + { + "epoch": 2.81, + "learning_rate": 2.0778496334137755e-07, + "loss": 0.2989, + "step": 16480 + }, + { + "epoch": 2.81, + "learning_rate": 2.074117075734694e-07, + "loss": 0.2948, + "step": 16481 + }, + { + "epoch": 2.81, + "learning_rate": 2.070387838439314e-07, + "loss": 0.3195, + "step": 16482 + }, + { + "epoch": 2.81, + "learning_rate": 2.0666619216541006e-07, + "loss": 0.3146, + "step": 16483 + }, + { + "epoch": 2.81, + "learning_rate": 2.0629393255053753e-07, + "loss": 0.2978, + "step": 16484 + }, + { + "epoch": 2.81, + "learning_rate": 2.0592200501193594e-07, + "loss": 0.3109, + "step": 16485 + }, + { + "epoch": 2.81, + "learning_rate": 2.0555040956221628e-07, + "loss": 0.3336, + "step": 16486 + }, + { + "epoch": 2.81, + "learning_rate": 2.051791462139785e-07, + "loss": 0.3337, + "step": 16487 + }, + { + "epoch": 2.81, + "learning_rate": 2.048082149798125e-07, + "loss": 0.3359, + "step": 16488 + }, + { + "epoch": 2.81, + "learning_rate": 2.0443761587229271e-07, + "loss": 0.3096, + "step": 16489 + }, + { + "epoch": 2.81, + "learning_rate": 2.0406734890398682e-07, + "loss": 0.3139, + "step": 16490 + }, + { + "epoch": 2.81, + "learning_rate": 2.0369741408744926e-07, + "loss": 0.3147, + "step": 16491 + }, + { + "epoch": 2.81, + "learning_rate": 2.033278114352233e-07, + "loss": 0.2888, + "step": 16492 + }, + { + "epoch": 2.81, + "learning_rate": 2.0295854095984112e-07, + "loss": 0.3089, + "step": 16493 + }, + { + "epoch": 2.81, + "learning_rate": 2.0258960267382387e-07, + "loss": 0.2806, + "step": 16494 + }, + { + "epoch": 2.81, + "learning_rate": 2.0222099658968264e-07, + "loss": 0.3207, + "step": 16495 + }, + { + "epoch": 2.81, + "learning_rate": 2.0185272271991186e-07, + "loss": 0.3079, + "step": 16496 + }, + { + "epoch": 2.81, + "learning_rate": 2.0148478107700152e-07, + "loss": 0.3117, + "step": 16497 + }, + { + "epoch": 2.81, + "learning_rate": 2.0111717167342727e-07, + "loss": 0.2996, + "step": 16498 + }, + { + "epoch": 2.81, + "learning_rate": 2.007498945216535e-07, + "loss": 0.3244, + "step": 16499 + }, + { + "epoch": 2.81, + "learning_rate": 2.0038294963413251e-07, + "loss": 0.2898, + "step": 16500 + }, + { + "epoch": 2.81, + "learning_rate": 2.0001633702330658e-07, + "loss": 0.3096, + "step": 16501 + }, + { + "epoch": 2.81, + "learning_rate": 1.99650056701608e-07, + "loss": 0.2997, + "step": 16502 + }, + { + "epoch": 2.81, + "learning_rate": 1.9928410868145566e-07, + "loss": 0.3279, + "step": 16503 + }, + { + "epoch": 2.81, + "learning_rate": 1.9891849297525634e-07, + "loss": 0.3268, + "step": 16504 + }, + { + "epoch": 2.81, + "learning_rate": 1.9855320959540903e-07, + "loss": 0.3325, + "step": 16505 + }, + { + "epoch": 2.82, + "learning_rate": 1.9818825855429824e-07, + "loss": 0.3066, + "step": 16506 + }, + { + "epoch": 2.82, + "learning_rate": 1.9782363986429852e-07, + "loss": 0.3077, + "step": 16507 + }, + { + "epoch": 2.82, + "learning_rate": 1.9745935353777222e-07, + "loss": 0.3013, + "step": 16508 + }, + { + "epoch": 2.82, + "learning_rate": 1.970953995870739e-07, + "loss": 0.3092, + "step": 16509 + }, + { + "epoch": 2.82, + "learning_rate": 1.9673177802454257e-07, + "loss": 0.3213, + "step": 16510 + }, + { + "epoch": 2.82, + "learning_rate": 1.9636848886250615e-07, + "loss": 0.3018, + "step": 16511 + }, + { + "epoch": 2.82, + "learning_rate": 1.9600553211328477e-07, + "loss": 0.3336, + "step": 16512 + }, + { + "epoch": 2.82, + "learning_rate": 1.9564290778918415e-07, + "loss": 0.3312, + "step": 16513 + }, + { + "epoch": 2.82, + "learning_rate": 1.9528061590250113e-07, + "loss": 0.3002, + "step": 16514 + }, + { + "epoch": 2.82, + "learning_rate": 1.9491865646551922e-07, + "loss": 0.328, + "step": 16515 + }, + { + "epoch": 2.82, + "learning_rate": 1.945570294905097e-07, + "loss": 0.2913, + "step": 16516 + }, + { + "epoch": 2.82, + "learning_rate": 1.941957349897372e-07, + "loss": 0.3023, + "step": 16517 + }, + { + "epoch": 2.82, + "learning_rate": 1.9383477297545083e-07, + "loss": 0.3157, + "step": 16518 + }, + { + "epoch": 2.82, + "learning_rate": 1.9347414345988967e-07, + "loss": 0.2935, + "step": 16519 + }, + { + "epoch": 2.82, + "learning_rate": 1.9311384645528176e-07, + "loss": 0.2928, + "step": 16520 + }, + { + "epoch": 2.82, + "learning_rate": 1.927538819738428e-07, + "loss": 0.2996, + "step": 16521 + }, + { + "epoch": 2.82, + "learning_rate": 1.9239425002777977e-07, + "loss": 0.3004, + "step": 16522 + }, + { + "epoch": 2.82, + "learning_rate": 1.9203495062928623e-07, + "loss": 0.3228, + "step": 16523 + }, + { + "epoch": 2.82, + "learning_rate": 1.9167598379054463e-07, + "loss": 0.2961, + "step": 16524 + }, + { + "epoch": 2.82, + "learning_rate": 1.913173495237264e-07, + "loss": 0.3248, + "step": 16525 + }, + { + "epoch": 2.82, + "learning_rate": 1.9095904784099283e-07, + "loss": 0.2954, + "step": 16526 + }, + { + "epoch": 2.82, + "learning_rate": 1.9060107875449208e-07, + "loss": 0.2893, + "step": 16527 + }, + { + "epoch": 2.82, + "learning_rate": 1.9024344227636105e-07, + "loss": 0.3385, + "step": 16528 + }, + { + "epoch": 2.82, + "learning_rate": 1.8988613841872783e-07, + "loss": 0.3193, + "step": 16529 + }, + { + "epoch": 2.82, + "learning_rate": 1.8952916719370717e-07, + "loss": 0.3147, + "step": 16530 + }, + { + "epoch": 2.82, + "learning_rate": 1.8917252861340162e-07, + "loss": 0.316, + "step": 16531 + }, + { + "epoch": 2.82, + "learning_rate": 1.8881622268990373e-07, + "loss": 0.3124, + "step": 16532 + }, + { + "epoch": 2.82, + "learning_rate": 1.884602494352983e-07, + "loss": 0.3021, + "step": 16533 + }, + { + "epoch": 2.82, + "learning_rate": 1.8810460886165116e-07, + "loss": 0.3052, + "step": 16534 + }, + { + "epoch": 2.82, + "learning_rate": 1.8774930098102162e-07, + "loss": 0.3033, + "step": 16535 + }, + { + "epoch": 2.82, + "learning_rate": 1.8739432580546002e-07, + "loss": 0.2947, + "step": 16536 + }, + { + "epoch": 2.82, + "learning_rate": 1.8703968334700007e-07, + "loss": 0.3231, + "step": 16537 + }, + { + "epoch": 2.82, + "learning_rate": 1.866853736176666e-07, + "loss": 0.3286, + "step": 16538 + }, + { + "epoch": 2.82, + "learning_rate": 1.8633139662947442e-07, + "loss": 0.3106, + "step": 16539 + }, + { + "epoch": 2.82, + "learning_rate": 1.8597775239442505e-07, + "loss": 0.3387, + "step": 16540 + }, + { + "epoch": 2.82, + "learning_rate": 1.8562444092451116e-07, + "loss": 0.3153, + "step": 16541 + }, + { + "epoch": 2.82, + "learning_rate": 1.852714622317109e-07, + "loss": 0.2938, + "step": 16542 + }, + { + "epoch": 2.82, + "learning_rate": 1.8491881632799248e-07, + "loss": 0.3134, + "step": 16543 + }, + { + "epoch": 2.82, + "learning_rate": 1.845665032253141e-07, + "loss": 0.3027, + "step": 16544 + }, + { + "epoch": 2.82, + "learning_rate": 1.842145229356207e-07, + "loss": 0.2924, + "step": 16545 + }, + { + "epoch": 2.82, + "learning_rate": 1.8386287547084934e-07, + "loss": 0.2969, + "step": 16546 + }, + { + "epoch": 2.82, + "learning_rate": 1.8351156084291944e-07, + "loss": 0.2965, + "step": 16547 + }, + { + "epoch": 2.82, + "learning_rate": 1.8316057906374695e-07, + "loss": 0.296, + "step": 16548 + }, + { + "epoch": 2.82, + "learning_rate": 1.828099301452302e-07, + "loss": 0.3166, + "step": 16549 + }, + { + "epoch": 2.82, + "learning_rate": 1.824596140992585e-07, + "loss": 0.2986, + "step": 16550 + }, + { + "epoch": 2.82, + "learning_rate": 1.8210963093771128e-07, + "loss": 0.2977, + "step": 16551 + }, + { + "epoch": 2.82, + "learning_rate": 1.817599806724546e-07, + "loss": 0.3183, + "step": 16552 + }, + { + "epoch": 2.82, + "learning_rate": 1.8141066331534563e-07, + "loss": 0.3359, + "step": 16553 + }, + { + "epoch": 2.82, + "learning_rate": 1.810616788782271e-07, + "loss": 0.3341, + "step": 16554 + }, + { + "epoch": 2.82, + "learning_rate": 1.8071302737293294e-07, + "loss": 0.3121, + "step": 16555 + }, + { + "epoch": 2.82, + "learning_rate": 1.8036470881128476e-07, + "loss": 0.3311, + "step": 16556 + }, + { + "epoch": 2.82, + "learning_rate": 1.800167232050931e-07, + "loss": 0.3153, + "step": 16557 + }, + { + "epoch": 2.82, + "learning_rate": 1.7966907056615635e-07, + "loss": 0.309, + "step": 16558 + }, + { + "epoch": 2.82, + "learning_rate": 1.793217509062628e-07, + "loss": 0.3021, + "step": 16559 + }, + { + "epoch": 2.82, + "learning_rate": 1.7897476423718862e-07, + "loss": 0.3288, + "step": 16560 + }, + { + "epoch": 2.82, + "learning_rate": 1.7862811057070105e-07, + "loss": 0.3232, + "step": 16561 + }, + { + "epoch": 2.82, + "learning_rate": 1.7828178991855295e-07, + "loss": 0.3106, + "step": 16562 + }, + { + "epoch": 2.82, + "learning_rate": 1.7793580229248596e-07, + "loss": 0.29, + "step": 16563 + }, + { + "epoch": 2.82, + "learning_rate": 1.77590147704233e-07, + "loss": 0.2814, + "step": 16564 + }, + { + "epoch": 2.83, + "learning_rate": 1.7724482616551352e-07, + "loss": 0.3392, + "step": 16565 + }, + { + "epoch": 2.83, + "learning_rate": 1.768998376880371e-07, + "loss": 0.3109, + "step": 16566 + }, + { + "epoch": 2.83, + "learning_rate": 1.7655518228349987e-07, + "loss": 0.3103, + "step": 16567 + }, + { + "epoch": 2.83, + "learning_rate": 1.7621085996358921e-07, + "loss": 0.328, + "step": 16568 + }, + { + "epoch": 2.83, + "learning_rate": 1.758668707399791e-07, + "loss": 0.3072, + "step": 16569 + }, + { + "epoch": 2.83, + "learning_rate": 1.7552321462433463e-07, + "loss": 0.324, + "step": 16570 + }, + { + "epoch": 2.83, + "learning_rate": 1.7517989162830872e-07, + "loss": 0.3064, + "step": 16571 + }, + { + "epoch": 2.83, + "learning_rate": 1.7483690176353985e-07, + "loss": 0.28, + "step": 16572 + }, + { + "epoch": 2.83, + "learning_rate": 1.744942450416598e-07, + "loss": 0.3299, + "step": 16573 + }, + { + "epoch": 2.83, + "learning_rate": 1.7415192147428595e-07, + "loss": 0.3003, + "step": 16574 + }, + { + "epoch": 2.83, + "learning_rate": 1.738099310730268e-07, + "loss": 0.3115, + "step": 16575 + }, + { + "epoch": 2.83, + "learning_rate": 1.7346827384947639e-07, + "loss": 0.301, + "step": 16576 + }, + { + "epoch": 2.83, + "learning_rate": 1.7312694981522216e-07, + "loss": 0.3129, + "step": 16577 + }, + { + "epoch": 2.83, + "learning_rate": 1.727859589818348e-07, + "loss": 0.2913, + "step": 16578 + }, + { + "epoch": 2.83, + "learning_rate": 1.7244530136087845e-07, + "loss": 0.3213, + "step": 16579 + }, + { + "epoch": 2.83, + "learning_rate": 1.721049769639005e-07, + "loss": 0.3142, + "step": 16580 + }, + { + "epoch": 2.83, + "learning_rate": 1.7176498580244394e-07, + "loss": 0.3232, + "step": 16581 + }, + { + "epoch": 2.83, + "learning_rate": 1.71425327888034e-07, + "loss": 0.302, + "step": 16582 + }, + { + "epoch": 2.83, + "learning_rate": 1.7108600323219038e-07, + "loss": 0.3077, + "step": 16583 + }, + { + "epoch": 2.83, + "learning_rate": 1.7074701184641607e-07, + "loss": 0.3116, + "step": 16584 + }, + { + "epoch": 2.83, + "learning_rate": 1.7040835374220633e-07, + "loss": 0.3194, + "step": 16585 + }, + { + "epoch": 2.83, + "learning_rate": 1.7007002893104528e-07, + "loss": 0.3009, + "step": 16586 + }, + { + "epoch": 2.83, + "learning_rate": 1.6973203742440268e-07, + "loss": 0.3062, + "step": 16587 + }, + { + "epoch": 2.83, + "learning_rate": 1.6939437923373936e-07, + "loss": 0.33, + "step": 16588 + }, + { + "epoch": 2.83, + "learning_rate": 1.69057054370505e-07, + "loss": 0.3214, + "step": 16589 + }, + { + "epoch": 2.83, + "learning_rate": 1.6872006284613608e-07, + "loss": 0.3304, + "step": 16590 + }, + { + "epoch": 2.83, + "learning_rate": 1.6838340467206004e-07, + "loss": 0.296, + "step": 16591 + }, + { + "epoch": 2.83, + "learning_rate": 1.6804707985969338e-07, + "loss": 0.3065, + "step": 16592 + }, + { + "epoch": 2.83, + "learning_rate": 1.6771108842043583e-07, + "loss": 0.2898, + "step": 16593 + }, + { + "epoch": 2.83, + "learning_rate": 1.6737543036568382e-07, + "loss": 0.3199, + "step": 16594 + }, + { + "epoch": 2.83, + "learning_rate": 1.6704010570681718e-07, + "loss": 0.3228, + "step": 16595 + }, + { + "epoch": 2.83, + "learning_rate": 1.6670511445520453e-07, + "loss": 0.3039, + "step": 16596 + }, + { + "epoch": 2.83, + "learning_rate": 1.6637045662220575e-07, + "loss": 0.2897, + "step": 16597 + }, + { + "epoch": 2.83, + "learning_rate": 1.6603613221916725e-07, + "loss": 0.3202, + "step": 16598 + }, + { + "epoch": 2.83, + "learning_rate": 1.6570214125742667e-07, + "loss": 0.3276, + "step": 16599 + }, + { + "epoch": 2.83, + "learning_rate": 1.6536848374830716e-07, + "loss": 0.2868, + "step": 16600 + }, + { + "epoch": 2.83, + "learning_rate": 1.650351597031241e-07, + "loss": 0.3168, + "step": 16601 + }, + { + "epoch": 2.83, + "learning_rate": 1.6470216913317628e-07, + "loss": 0.3242, + "step": 16602 + }, + { + "epoch": 2.83, + "learning_rate": 1.6436951204975682e-07, + "loss": 0.3299, + "step": 16603 + }, + { + "epoch": 2.83, + "learning_rate": 1.640371884641445e-07, + "loss": 0.3005, + "step": 16604 + }, + { + "epoch": 2.83, + "learning_rate": 1.6370519838760701e-07, + "loss": 0.3139, + "step": 16605 + }, + { + "epoch": 2.83, + "learning_rate": 1.6337354183140198e-07, + "loss": 0.3026, + "step": 16606 + }, + { + "epoch": 2.83, + "learning_rate": 1.6304221880677486e-07, + "loss": 0.3242, + "step": 16607 + }, + { + "epoch": 2.83, + "learning_rate": 1.6271122932495997e-07, + "loss": 0.3152, + "step": 16608 + }, + { + "epoch": 2.83, + "learning_rate": 1.623805733971795e-07, + "loss": 0.3006, + "step": 16609 + }, + { + "epoch": 2.83, + "learning_rate": 1.6205025103464556e-07, + "loss": 0.3175, + "step": 16610 + }, + { + "epoch": 2.83, + "learning_rate": 1.6172026224855808e-07, + "loss": 0.3223, + "step": 16611 + }, + { + "epoch": 2.83, + "learning_rate": 1.6139060705010589e-07, + "loss": 0.3069, + "step": 16612 + }, + { + "epoch": 2.83, + "learning_rate": 1.6106128545046783e-07, + "loss": 0.3086, + "step": 16613 + }, + { + "epoch": 2.83, + "learning_rate": 1.6073229746080832e-07, + "loss": 0.3059, + "step": 16614 + }, + { + "epoch": 2.83, + "learning_rate": 1.6040364309228396e-07, + "loss": 0.3235, + "step": 16615 + }, + { + "epoch": 2.83, + "learning_rate": 1.6007532235603806e-07, + "loss": 0.3358, + "step": 16616 + }, + { + "epoch": 2.83, + "learning_rate": 1.5974733526320284e-07, + "loss": 0.3196, + "step": 16617 + }, + { + "epoch": 2.83, + "learning_rate": 1.594196818248994e-07, + "loss": 0.308, + "step": 16618 + }, + { + "epoch": 2.83, + "learning_rate": 1.5909236205223665e-07, + "loss": 0.2858, + "step": 16619 + }, + { + "epoch": 2.83, + "learning_rate": 1.587653759563157e-07, + "loss": 0.3248, + "step": 16620 + }, + { + "epoch": 2.83, + "learning_rate": 1.5843872354822099e-07, + "loss": 0.3238, + "step": 16621 + }, + { + "epoch": 2.83, + "learning_rate": 1.5811240483902924e-07, + "loss": 0.3188, + "step": 16622 + }, + { + "epoch": 2.83, + "learning_rate": 1.5778641983980603e-07, + "loss": 0.3094, + "step": 16623 + }, + { + "epoch": 2.84, + "learning_rate": 1.5746076856160253e-07, + "loss": 0.3026, + "step": 16624 + }, + { + "epoch": 2.84, + "learning_rate": 1.5713545101546212e-07, + "loss": 0.3278, + "step": 16625 + }, + { + "epoch": 2.84, + "learning_rate": 1.56810467212416e-07, + "loss": 0.2909, + "step": 16626 + }, + { + "epoch": 2.84, + "learning_rate": 1.5648581716348198e-07, + "loss": 0.3074, + "step": 16627 + }, + { + "epoch": 2.84, + "learning_rate": 1.5616150087966797e-07, + "loss": 0.3318, + "step": 16628 + }, + { + "epoch": 2.84, + "learning_rate": 1.558375183719707e-07, + "loss": 0.3271, + "step": 16629 + }, + { + "epoch": 2.84, + "learning_rate": 1.5551386965137694e-07, + "loss": 0.3156, + "step": 16630 + }, + { + "epoch": 2.84, + "learning_rate": 1.55190554728859e-07, + "loss": 0.3155, + "step": 16631 + }, + { + "epoch": 2.84, + "learning_rate": 1.5486757361538041e-07, + "loss": 0.3113, + "step": 16632 + }, + { + "epoch": 2.84, + "learning_rate": 1.5454492632189233e-07, + "loss": 0.2834, + "step": 16633 + }, + { + "epoch": 2.84, + "learning_rate": 1.542226128593338e-07, + "loss": 0.3065, + "step": 16634 + }, + { + "epoch": 2.84, + "learning_rate": 1.5390063323863614e-07, + "loss": 0.2905, + "step": 16635 + }, + { + "epoch": 2.84, + "learning_rate": 1.5357898747071387e-07, + "loss": 0.3125, + "step": 16636 + }, + { + "epoch": 2.84, + "learning_rate": 1.5325767556647387e-07, + "loss": 0.3219, + "step": 16637 + }, + { + "epoch": 2.84, + "learning_rate": 1.5293669753681295e-07, + "loss": 0.3012, + "step": 16638 + }, + { + "epoch": 2.84, + "learning_rate": 1.5261605339261243e-07, + "loss": 0.2996, + "step": 16639 + }, + { + "epoch": 2.84, + "learning_rate": 1.5229574314474472e-07, + "loss": 0.3111, + "step": 16640 + }, + { + "epoch": 2.84, + "learning_rate": 1.5197576680407e-07, + "loss": 0.2952, + "step": 16641 + }, + { + "epoch": 2.84, + "learning_rate": 1.5165612438144074e-07, + "loss": 0.3068, + "step": 16642 + }, + { + "epoch": 2.84, + "learning_rate": 1.5133681588769044e-07, + "loss": 0.3181, + "step": 16643 + }, + { + "epoch": 2.84, + "learning_rate": 1.510178413336494e-07, + "loss": 0.3133, + "step": 16644 + }, + { + "epoch": 2.84, + "learning_rate": 1.506992007301322e-07, + "loss": 0.2989, + "step": 16645 + }, + { + "epoch": 2.84, + "learning_rate": 1.5038089408794366e-07, + "loss": 0.2943, + "step": 16646 + }, + { + "epoch": 2.84, + "learning_rate": 1.5006292141787504e-07, + "loss": 0.2973, + "step": 16647 + }, + { + "epoch": 2.84, + "learning_rate": 1.4974528273070887e-07, + "loss": 0.302, + "step": 16648 + }, + { + "epoch": 2.84, + "learning_rate": 1.4942797803721543e-07, + "loss": 0.3143, + "step": 16649 + }, + { + "epoch": 2.84, + "learning_rate": 1.4911100734815277e-07, + "loss": 0.3177, + "step": 16650 + }, + { + "epoch": 2.84, + "learning_rate": 1.4879437067426895e-07, + "loss": 0.3283, + "step": 16651 + }, + { + "epoch": 2.84, + "learning_rate": 1.4847806802630093e-07, + "loss": 0.3162, + "step": 16652 + }, + { + "epoch": 2.84, + "learning_rate": 1.481620994149735e-07, + "loss": 0.2738, + "step": 16653 + }, + { + "epoch": 2.84, + "learning_rate": 1.478464648510003e-07, + "loss": 0.2862, + "step": 16654 + }, + { + "epoch": 2.84, + "learning_rate": 1.4753116434508163e-07, + "loss": 0.2944, + "step": 16655 + }, + { + "epoch": 2.84, + "learning_rate": 1.472161979079101e-07, + "loss": 0.3187, + "step": 16656 + }, + { + "epoch": 2.84, + "learning_rate": 1.46901565550166e-07, + "loss": 0.3282, + "step": 16657 + }, + { + "epoch": 2.84, + "learning_rate": 1.4658726728251638e-07, + "loss": 0.3381, + "step": 16658 + }, + { + "epoch": 2.84, + "learning_rate": 1.4627330311561827e-07, + "loss": 0.3179, + "step": 16659 + }, + { + "epoch": 2.84, + "learning_rate": 1.459596730601165e-07, + "loss": 0.3086, + "step": 16660 + }, + { + "epoch": 2.84, + "learning_rate": 1.456463771266481e-07, + "loss": 0.331, + "step": 16661 + }, + { + "epoch": 2.84, + "learning_rate": 1.4533341532583345e-07, + "loss": 0.3211, + "step": 16662 + }, + { + "epoch": 2.84, + "learning_rate": 1.450207876682852e-07, + "loss": 0.3237, + "step": 16663 + }, + { + "epoch": 2.84, + "learning_rate": 1.4470849416460264e-07, + "loss": 0.3143, + "step": 16664 + }, + { + "epoch": 2.84, + "learning_rate": 1.443965348253762e-07, + "loss": 0.3242, + "step": 16665 + }, + { + "epoch": 2.84, + "learning_rate": 1.4408490966118293e-07, + "loss": 0.3019, + "step": 16666 + }, + { + "epoch": 2.84, + "learning_rate": 1.4377361868259e-07, + "loss": 0.2923, + "step": 16667 + }, + { + "epoch": 2.84, + "learning_rate": 1.4346266190015113e-07, + "loss": 0.3082, + "step": 16668 + }, + { + "epoch": 2.84, + "learning_rate": 1.4315203932440903e-07, + "loss": 0.3208, + "step": 16669 + }, + { + "epoch": 2.84, + "learning_rate": 1.4284175096589859e-07, + "loss": 0.3109, + "step": 16670 + }, + { + "epoch": 2.84, + "learning_rate": 1.4253179683513917e-07, + "loss": 0.3087, + "step": 16671 + }, + { + "epoch": 2.84, + "learning_rate": 1.4222217694264017e-07, + "loss": 0.338, + "step": 16672 + }, + { + "epoch": 2.84, + "learning_rate": 1.4191289129890097e-07, + "loss": 0.2991, + "step": 16673 + }, + { + "epoch": 2.84, + "learning_rate": 1.416039399144087e-07, + "loss": 0.3214, + "step": 16674 + }, + { + "epoch": 2.84, + "learning_rate": 1.412953227996372e-07, + "loss": 0.314, + "step": 16675 + }, + { + "epoch": 2.84, + "learning_rate": 1.409870399650537e-07, + "loss": 0.2963, + "step": 16676 + }, + { + "epoch": 2.84, + "learning_rate": 1.406790914211076e-07, + "loss": 0.2996, + "step": 16677 + }, + { + "epoch": 2.84, + "learning_rate": 1.4037147717824272e-07, + "loss": 0.2982, + "step": 16678 + }, + { + "epoch": 2.84, + "learning_rate": 1.400641972468897e-07, + "loss": 0.3296, + "step": 16679 + }, + { + "epoch": 2.84, + "learning_rate": 1.397572516374668e-07, + "loss": 0.3063, + "step": 16680 + }, + { + "epoch": 2.84, + "learning_rate": 1.3945064036038125e-07, + "loss": 0.3167, + "step": 16681 + }, + { + "epoch": 2.85, + "learning_rate": 1.3914436342603032e-07, + "loss": 0.2945, + "step": 16682 + }, + { + "epoch": 2.85, + "learning_rate": 1.3883842084479903e-07, + "loss": 0.292, + "step": 16683 + }, + { + "epoch": 2.85, + "learning_rate": 1.385328126270591e-07, + "loss": 0.3028, + "step": 16684 + }, + { + "epoch": 2.85, + "learning_rate": 1.382275387831744e-07, + "loss": 0.314, + "step": 16685 + }, + { + "epoch": 2.85, + "learning_rate": 1.3792259932349672e-07, + "loss": 0.2745, + "step": 16686 + }, + { + "epoch": 2.85, + "learning_rate": 1.376179942583633e-07, + "loss": 0.3117, + "step": 16687 + }, + { + "epoch": 2.85, + "learning_rate": 1.3731372359810368e-07, + "loss": 0.2907, + "step": 16688 + }, + { + "epoch": 2.85, + "learning_rate": 1.3700978735303627e-07, + "loss": 0.3154, + "step": 16689 + }, + { + "epoch": 2.85, + "learning_rate": 1.3670618553346505e-07, + "loss": 0.3063, + "step": 16690 + }, + { + "epoch": 2.85, + "learning_rate": 1.3640291814968398e-07, + "loss": 0.3042, + "step": 16691 + }, + { + "epoch": 2.85, + "learning_rate": 1.360999852119771e-07, + "loss": 0.3021, + "step": 16692 + }, + { + "epoch": 2.85, + "learning_rate": 1.3579738673061394e-07, + "loss": 0.2829, + "step": 16693 + }, + { + "epoch": 2.85, + "learning_rate": 1.354951227158574e-07, + "loss": 0.2926, + "step": 16694 + }, + { + "epoch": 2.85, + "learning_rate": 1.3519319317795487e-07, + "loss": 0.3074, + "step": 16695 + }, + { + "epoch": 2.85, + "learning_rate": 1.348915981271437e-07, + "loss": 0.3263, + "step": 16696 + }, + { + "epoch": 2.85, + "learning_rate": 1.3459033757365126e-07, + "loss": 0.2838, + "step": 16697 + }, + { + "epoch": 2.85, + "learning_rate": 1.342894115276916e-07, + "loss": 0.3042, + "step": 16698 + }, + { + "epoch": 2.85, + "learning_rate": 1.3398881999946878e-07, + "loss": 0.3247, + "step": 16699 + }, + { + "epoch": 2.85, + "learning_rate": 1.3368856299917354e-07, + "loss": 0.3299, + "step": 16700 + }, + { + "epoch": 2.85, + "learning_rate": 1.3338864053698887e-07, + "loss": 0.3058, + "step": 16701 + }, + { + "epoch": 2.85, + "learning_rate": 1.3308905262308213e-07, + "loss": 0.3024, + "step": 16702 + }, + { + "epoch": 2.85, + "learning_rate": 1.3278979926761304e-07, + "loss": 0.3015, + "step": 16703 + }, + { + "epoch": 2.85, + "learning_rate": 1.32490880480729e-07, + "loss": 0.2963, + "step": 16704 + }, + { + "epoch": 2.85, + "learning_rate": 1.3219229627256304e-07, + "loss": 0.3047, + "step": 16705 + }, + { + "epoch": 2.85, + "learning_rate": 1.3189404665324145e-07, + "loss": 0.318, + "step": 16706 + }, + { + "epoch": 2.85, + "learning_rate": 1.315961316328762e-07, + "loss": 0.2997, + "step": 16707 + }, + { + "epoch": 2.85, + "learning_rate": 1.312985512215692e-07, + "loss": 0.3008, + "step": 16708 + }, + { + "epoch": 2.85, + "learning_rate": 1.3100130542940903e-07, + "loss": 0.3064, + "step": 16709 + }, + { + "epoch": 2.85, + "learning_rate": 1.307043942664754e-07, + "loss": 0.2945, + "step": 16710 + }, + { + "epoch": 2.85, + "learning_rate": 1.3040781774283695e-07, + "loss": 0.318, + "step": 16711 + }, + { + "epoch": 2.85, + "learning_rate": 1.3011157586854784e-07, + "loss": 0.305, + "step": 16712 + }, + { + "epoch": 2.85, + "learning_rate": 1.2981566865365336e-07, + "loss": 0.2998, + "step": 16713 + }, + { + "epoch": 2.85, + "learning_rate": 1.2952009610818772e-07, + "loss": 0.3097, + "step": 16714 + }, + { + "epoch": 2.85, + "learning_rate": 1.292248582421729e-07, + "loss": 0.2994, + "step": 16715 + }, + { + "epoch": 2.85, + "learning_rate": 1.2892995506561755e-07, + "loss": 0.3153, + "step": 16716 + }, + { + "epoch": 2.85, + "learning_rate": 1.286353865885226e-07, + "loss": 0.3089, + "step": 16717 + }, + { + "epoch": 2.85, + "learning_rate": 1.2834115282087556e-07, + "loss": 0.2905, + "step": 16718 + }, + { + "epoch": 2.85, + "learning_rate": 1.2804725377265403e-07, + "loss": 0.3046, + "step": 16719 + }, + { + "epoch": 2.85, + "learning_rate": 1.2775368945382228e-07, + "loss": 0.2882, + "step": 16720 + }, + { + "epoch": 2.85, + "learning_rate": 1.2746045987433454e-07, + "loss": 0.318, + "step": 16721 + }, + { + "epoch": 2.85, + "learning_rate": 1.2716756504413396e-07, + "loss": 0.3124, + "step": 16722 + }, + { + "epoch": 2.85, + "learning_rate": 1.2687500497315043e-07, + "loss": 0.3186, + "step": 16723 + }, + { + "epoch": 2.85, + "learning_rate": 1.2658277967130372e-07, + "loss": 0.3049, + "step": 16724 + }, + { + "epoch": 2.85, + "learning_rate": 1.2629088914850375e-07, + "loss": 0.3302, + "step": 16725 + }, + { + "epoch": 2.85, + "learning_rate": 1.2599933341464699e-07, + "loss": 0.3247, + "step": 16726 + }, + { + "epoch": 2.85, + "learning_rate": 1.257081124796189e-07, + "loss": 0.3384, + "step": 16727 + }, + { + "epoch": 2.85, + "learning_rate": 1.254172263532949e-07, + "loss": 0.2907, + "step": 16728 + }, + { + "epoch": 2.85, + "learning_rate": 1.251266750455382e-07, + "loss": 0.3187, + "step": 16729 + }, + { + "epoch": 2.85, + "learning_rate": 1.2483645856619874e-07, + "loss": 0.3093, + "step": 16730 + }, + { + "epoch": 2.85, + "learning_rate": 1.2454657692511862e-07, + "loss": 0.3166, + "step": 16731 + }, + { + "epoch": 2.85, + "learning_rate": 1.2425703013212552e-07, + "loss": 0.3081, + "step": 16732 + }, + { + "epoch": 2.85, + "learning_rate": 1.239678181970383e-07, + "loss": 0.3195, + "step": 16733 + }, + { + "epoch": 2.85, + "learning_rate": 1.236789411296635e-07, + "loss": 0.3241, + "step": 16734 + }, + { + "epoch": 2.85, + "learning_rate": 1.2339039893979553e-07, + "loss": 0.3084, + "step": 16735 + }, + { + "epoch": 2.85, + "learning_rate": 1.2310219163721772e-07, + "loss": 0.3039, + "step": 16736 + }, + { + "epoch": 2.85, + "learning_rate": 1.228143192317033e-07, + "loss": 0.3005, + "step": 16737 + }, + { + "epoch": 2.85, + "learning_rate": 1.2252678173301225e-07, + "loss": 0.2889, + "step": 16738 + }, + { + "epoch": 2.85, + "learning_rate": 1.2223957915089457e-07, + "loss": 0.3179, + "step": 16739 + }, + { + "epoch": 2.85, + "learning_rate": 1.2195271149508802e-07, + "loss": 0.2969, + "step": 16740 + }, + { + "epoch": 2.86, + "learning_rate": 1.2166617877531928e-07, + "loss": 0.287, + "step": 16741 + }, + { + "epoch": 2.86, + "learning_rate": 1.21379981001305e-07, + "loss": 0.3121, + "step": 16742 + }, + { + "epoch": 2.86, + "learning_rate": 1.2109411818274851e-07, + "loss": 0.3102, + "step": 16743 + }, + { + "epoch": 2.86, + "learning_rate": 1.2080859032934433e-07, + "loss": 0.2923, + "step": 16744 + }, + { + "epoch": 2.86, + "learning_rate": 1.2052339745077025e-07, + "loss": 0.3067, + "step": 16745 + }, + { + "epoch": 2.86, + "learning_rate": 1.202385395566985e-07, + "loss": 0.3051, + "step": 16746 + }, + { + "epoch": 2.86, + "learning_rate": 1.1995401665678918e-07, + "loss": 0.3202, + "step": 16747 + }, + { + "epoch": 2.86, + "learning_rate": 1.1966982876068678e-07, + "loss": 0.282, + "step": 16748 + }, + { + "epoch": 2.86, + "learning_rate": 1.1938597587802914e-07, + "loss": 0.3153, + "step": 16749 + }, + { + "epoch": 2.86, + "learning_rate": 1.1910245801843967e-07, + "loss": 0.2894, + "step": 16750 + }, + { + "epoch": 2.86, + "learning_rate": 1.1881927519153402e-07, + "loss": 0.3524, + "step": 16751 + }, + { + "epoch": 2.86, + "learning_rate": 1.1853642740691118e-07, + "loss": 0.3014, + "step": 16752 + }, + { + "epoch": 2.86, + "learning_rate": 1.1825391467416348e-07, + "loss": 0.2909, + "step": 16753 + }, + { + "epoch": 2.86, + "learning_rate": 1.179717370028699e-07, + "loss": 0.3084, + "step": 16754 + }, + { + "epoch": 2.86, + "learning_rate": 1.1768989440259726e-07, + "loss": 0.339, + "step": 16755 + }, + { + "epoch": 2.86, + "learning_rate": 1.1740838688290345e-07, + "loss": 0.2781, + "step": 16756 + }, + { + "epoch": 2.86, + "learning_rate": 1.1712721445333198e-07, + "loss": 0.3075, + "step": 16757 + }, + { + "epoch": 2.86, + "learning_rate": 1.1684637712341851e-07, + "loss": 0.3029, + "step": 16758 + }, + { + "epoch": 2.86, + "learning_rate": 1.1656587490268434e-07, + "loss": 0.3109, + "step": 16759 + }, + { + "epoch": 2.86, + "learning_rate": 1.1628570780063964e-07, + "loss": 0.3325, + "step": 16760 + }, + { + "epoch": 2.86, + "learning_rate": 1.1600587582678568e-07, + "loss": 0.2902, + "step": 16761 + }, + { + "epoch": 2.86, + "learning_rate": 1.1572637899061045e-07, + "loss": 0.3167, + "step": 16762 + }, + { + "epoch": 2.86, + "learning_rate": 1.154472173015897e-07, + "loss": 0.3356, + "step": 16763 + }, + { + "epoch": 2.86, + "learning_rate": 1.1516839076918918e-07, + "loss": 0.316, + "step": 16764 + }, + { + "epoch": 2.86, + "learning_rate": 1.1488989940286465e-07, + "loss": 0.303, + "step": 16765 + }, + { + "epoch": 2.86, + "learning_rate": 1.1461174321205859e-07, + "loss": 0.3217, + "step": 16766 + }, + { + "epoch": 2.86, + "learning_rate": 1.143339222062012e-07, + "loss": 0.3318, + "step": 16767 + }, + { + "epoch": 2.86, + "learning_rate": 1.1405643639471275e-07, + "loss": 0.3358, + "step": 16768 + }, + { + "epoch": 2.86, + "learning_rate": 1.1377928578700347e-07, + "loss": 0.3125, + "step": 16769 + }, + { + "epoch": 2.86, + "learning_rate": 1.135024703924692e-07, + "loss": 0.3256, + "step": 16770 + }, + { + "epoch": 2.86, + "learning_rate": 1.1322599022049685e-07, + "loss": 0.3068, + "step": 16771 + }, + { + "epoch": 2.86, + "learning_rate": 1.1294984528046004e-07, + "loss": 0.3372, + "step": 16772 + }, + { + "epoch": 2.86, + "learning_rate": 1.1267403558172352e-07, + "loss": 0.3191, + "step": 16773 + }, + { + "epoch": 2.86, + "learning_rate": 1.1239856113363867e-07, + "loss": 0.313, + "step": 16774 + }, + { + "epoch": 2.86, + "learning_rate": 1.1212342194554471e-07, + "loss": 0.3035, + "step": 16775 + }, + { + "epoch": 2.86, + "learning_rate": 1.1184861802677305e-07, + "loss": 0.2991, + "step": 16776 + }, + { + "epoch": 2.86, + "learning_rate": 1.1157414938663957e-07, + "loss": 0.2918, + "step": 16777 + }, + { + "epoch": 2.86, + "learning_rate": 1.1130001603445128e-07, + "loss": 0.3046, + "step": 16778 + }, + { + "epoch": 2.86, + "learning_rate": 1.1102621797950297e-07, + "loss": 0.341, + "step": 16779 + }, + { + "epoch": 2.86, + "learning_rate": 1.1075275523107943e-07, + "loss": 0.2949, + "step": 16780 + }, + { + "epoch": 2.86, + "learning_rate": 1.1047962779845323e-07, + "loss": 0.3498, + "step": 16781 + }, + { + "epoch": 2.86, + "learning_rate": 1.1020683569088364e-07, + "loss": 0.3135, + "step": 16782 + }, + { + "epoch": 2.86, + "learning_rate": 1.0993437891762104e-07, + "loss": 0.309, + "step": 16783 + }, + { + "epoch": 2.86, + "learning_rate": 1.096622574879036e-07, + "loss": 0.2778, + "step": 16784 + }, + { + "epoch": 2.86, + "learning_rate": 1.0939047141095949e-07, + "loss": 0.316, + "step": 16785 + }, + { + "epoch": 2.86, + "learning_rate": 1.0911902069600133e-07, + "loss": 0.3122, + "step": 16786 + }, + { + "epoch": 2.86, + "learning_rate": 1.088479053522351e-07, + "loss": 0.2968, + "step": 16787 + }, + { + "epoch": 2.86, + "learning_rate": 1.0857712538885456e-07, + "loss": 0.3077, + "step": 16788 + }, + { + "epoch": 2.86, + "learning_rate": 1.0830668081503904e-07, + "loss": 0.3105, + "step": 16789 + }, + { + "epoch": 2.86, + "learning_rate": 1.0803657163995896e-07, + "loss": 0.3203, + "step": 16790 + }, + { + "epoch": 2.86, + "learning_rate": 1.0776679787277366e-07, + "loss": 0.3307, + "step": 16791 + }, + { + "epoch": 2.86, + "learning_rate": 1.0749735952263029e-07, + "loss": 0.3228, + "step": 16792 + }, + { + "epoch": 2.86, + "learning_rate": 1.0722825659866376e-07, + "loss": 0.326, + "step": 16793 + }, + { + "epoch": 2.86, + "learning_rate": 1.0695948910999898e-07, + "loss": 0.2976, + "step": 16794 + }, + { + "epoch": 2.86, + "learning_rate": 1.0669105706574978e-07, + "loss": 0.3011, + "step": 16795 + }, + { + "epoch": 2.86, + "learning_rate": 1.0642296047501888e-07, + "loss": 0.304, + "step": 16796 + }, + { + "epoch": 2.86, + "learning_rate": 1.0615519934689344e-07, + "loss": 0.2957, + "step": 16797 + }, + { + "epoch": 2.86, + "learning_rate": 1.0588777369045511e-07, + "loss": 0.3243, + "step": 16798 + }, + { + "epoch": 2.87, + "learning_rate": 1.0562068351477106e-07, + "loss": 0.3084, + "step": 16799 + }, + { + "epoch": 2.87, + "learning_rate": 1.0535392882889627e-07, + "loss": 0.303, + "step": 16800 + }, + { + "epoch": 2.87, + "learning_rate": 1.0508750964187797e-07, + "loss": 0.3105, + "step": 16801 + }, + { + "epoch": 2.87, + "learning_rate": 1.0482142596274669e-07, + "loss": 0.3001, + "step": 16802 + }, + { + "epoch": 2.87, + "learning_rate": 1.0455567780052744e-07, + "loss": 0.3196, + "step": 16803 + }, + { + "epoch": 2.87, + "learning_rate": 1.0429026516422968e-07, + "loss": 0.3072, + "step": 16804 + }, + { + "epoch": 2.87, + "learning_rate": 1.0402518806285178e-07, + "loss": 0.2968, + "step": 16805 + }, + { + "epoch": 2.87, + "learning_rate": 1.0376044650538209e-07, + "loss": 0.3206, + "step": 16806 + }, + { + "epoch": 2.87, + "learning_rate": 1.0349604050079898e-07, + "loss": 0.3144, + "step": 16807 + }, + { + "epoch": 2.87, + "learning_rate": 1.032319700580664e-07, + "loss": 0.3151, + "step": 16808 + }, + { + "epoch": 2.87, + "learning_rate": 1.029682351861383e-07, + "loss": 0.3073, + "step": 16809 + }, + { + "epoch": 2.87, + "learning_rate": 1.0270483589395752e-07, + "loss": 0.3004, + "step": 16810 + }, + { + "epoch": 2.87, + "learning_rate": 1.024417721904547e-07, + "loss": 0.3251, + "step": 16811 + }, + { + "epoch": 2.87, + "learning_rate": 1.0217904408455048e-07, + "loss": 0.3354, + "step": 16812 + }, + { + "epoch": 2.87, + "learning_rate": 1.0191665158515108e-07, + "loss": 0.3214, + "step": 16813 + }, + { + "epoch": 2.87, + "learning_rate": 1.0165459470115602e-07, + "loss": 0.3129, + "step": 16814 + }, + { + "epoch": 2.87, + "learning_rate": 1.0139287344144933e-07, + "loss": 0.3369, + "step": 16815 + }, + { + "epoch": 2.87, + "learning_rate": 1.0113148781490611e-07, + "loss": 0.3258, + "step": 16816 + }, + { + "epoch": 2.87, + "learning_rate": 1.008704378303882e-07, + "loss": 0.3101, + "step": 16817 + }, + { + "epoch": 2.87, + "learning_rate": 1.0060972349674847e-07, + "loss": 0.3205, + "step": 16818 + }, + { + "epoch": 2.87, + "learning_rate": 1.0034934482282543e-07, + "loss": 0.3302, + "step": 16819 + }, + { + "epoch": 2.87, + "learning_rate": 1.0008930181744869e-07, + "loss": 0.3126, + "step": 16820 + }, + { + "epoch": 2.87, + "learning_rate": 9.982959448943452e-08, + "loss": 0.3109, + "step": 16821 + }, + { + "epoch": 2.87, + "learning_rate": 9.957022284759033e-08, + "loss": 0.289, + "step": 16822 + }, + { + "epoch": 2.87, + "learning_rate": 9.931118690071017e-08, + "loss": 0.3193, + "step": 16823 + }, + { + "epoch": 2.87, + "learning_rate": 9.905248665757595e-08, + "loss": 0.3091, + "step": 16824 + }, + { + "epoch": 2.87, + "learning_rate": 9.879412212696171e-08, + "loss": 0.3097, + "step": 16825 + }, + { + "epoch": 2.87, + "learning_rate": 9.853609331762604e-08, + "loss": 0.3171, + "step": 16826 + }, + { + "epoch": 2.87, + "learning_rate": 9.827840023831858e-08, + "loss": 0.3269, + "step": 16827 + }, + { + "epoch": 2.87, + "learning_rate": 9.802104289777681e-08, + "loss": 0.2981, + "step": 16828 + }, + { + "epoch": 2.87, + "learning_rate": 9.776402130472817e-08, + "loss": 0.3021, + "step": 16829 + }, + { + "epoch": 2.87, + "learning_rate": 9.75073354678846e-08, + "loss": 0.298, + "step": 16830 + }, + { + "epoch": 2.87, + "learning_rate": 9.725098539595246e-08, + "loss": 0.3132, + "step": 16831 + }, + { + "epoch": 2.87, + "learning_rate": 9.699497109762367e-08, + "loss": 0.3034, + "step": 16832 + }, + { + "epoch": 2.87, + "learning_rate": 9.673929258157689e-08, + "loss": 0.313, + "step": 16833 + }, + { + "epoch": 2.87, + "learning_rate": 9.648394985648401e-08, + "loss": 0.3028, + "step": 16834 + }, + { + "epoch": 2.87, + "learning_rate": 9.622894293100038e-08, + "loss": 0.3126, + "step": 16835 + }, + { + "epoch": 2.87, + "learning_rate": 9.59742718137735e-08, + "loss": 0.2884, + "step": 16836 + }, + { + "epoch": 2.87, + "learning_rate": 9.571993651343869e-08, + "loss": 0.3063, + "step": 16837 + }, + { + "epoch": 2.87, + "learning_rate": 9.546593703862018e-08, + "loss": 0.2957, + "step": 16838 + }, + { + "epoch": 2.87, + "learning_rate": 9.521227339792993e-08, + "loss": 0.2847, + "step": 16839 + }, + { + "epoch": 2.87, + "learning_rate": 9.495894559996888e-08, + "loss": 0.3127, + "step": 16840 + }, + { + "epoch": 2.87, + "learning_rate": 9.47059536533268e-08, + "loss": 0.3079, + "step": 16841 + }, + { + "epoch": 2.87, + "learning_rate": 9.445329756658239e-08, + "loss": 0.2899, + "step": 16842 + }, + { + "epoch": 2.87, + "learning_rate": 9.420097734830103e-08, + "loss": 0.3027, + "step": 16843 + }, + { + "epoch": 2.87, + "learning_rate": 9.39489930070403e-08, + "loss": 0.3286, + "step": 16844 + }, + { + "epoch": 2.87, + "learning_rate": 9.369734455134227e-08, + "loss": 0.3126, + "step": 16845 + }, + { + "epoch": 2.87, + "learning_rate": 9.344603198974011e-08, + "loss": 0.2956, + "step": 16846 + }, + { + "epoch": 2.87, + "learning_rate": 9.31950553307559e-08, + "loss": 0.3035, + "step": 16847 + }, + { + "epoch": 2.87, + "learning_rate": 9.29444145828995e-08, + "loss": 0.3151, + "step": 16848 + }, + { + "epoch": 2.87, + "learning_rate": 9.269410975466853e-08, + "loss": 0.2956, + "step": 16849 + }, + { + "epoch": 2.87, + "learning_rate": 9.244414085455066e-08, + "loss": 0.3207, + "step": 16850 + }, + { + "epoch": 2.87, + "learning_rate": 9.219450789102135e-08, + "loss": 0.3039, + "step": 16851 + }, + { + "epoch": 2.87, + "learning_rate": 9.194521087254382e-08, + "loss": 0.3069, + "step": 16852 + }, + { + "epoch": 2.87, + "learning_rate": 9.169624980757353e-08, + "loss": 0.28, + "step": 16853 + }, + { + "epoch": 2.87, + "learning_rate": 9.144762470455037e-08, + "loss": 0.2939, + "step": 16854 + }, + { + "epoch": 2.87, + "learning_rate": 9.119933557190542e-08, + "loss": 0.3176, + "step": 16855 + }, + { + "epoch": 2.87, + "learning_rate": 9.095138241805745e-08, + "loss": 0.2957, + "step": 16856 + }, + { + "epoch": 2.87, + "learning_rate": 9.0703765251412e-08, + "loss": 0.3285, + "step": 16857 + }, + { + "epoch": 2.88, + "learning_rate": 9.045648408036679e-08, + "loss": 0.3029, + "step": 16858 + }, + { + "epoch": 2.88, + "learning_rate": 9.020953891330509e-08, + "loss": 0.3043, + "step": 16859 + }, + { + "epoch": 2.88, + "learning_rate": 8.996292975860243e-08, + "loss": 0.3048, + "step": 16860 + }, + { + "epoch": 2.88, + "learning_rate": 8.97166566246177e-08, + "loss": 0.3311, + "step": 16861 + }, + { + "epoch": 2.88, + "learning_rate": 8.94707195197042e-08, + "loss": 0.3228, + "step": 16862 + }, + { + "epoch": 2.88, + "learning_rate": 8.922511845219972e-08, + "loss": 0.2809, + "step": 16863 + }, + { + "epoch": 2.88, + "learning_rate": 8.897985343042981e-08, + "loss": 0.3168, + "step": 16864 + }, + { + "epoch": 2.88, + "learning_rate": 8.873492446271448e-08, + "loss": 0.3, + "step": 16865 + }, + { + "epoch": 2.88, + "learning_rate": 8.849033155735598e-08, + "loss": 0.302, + "step": 16866 + }, + { + "epoch": 2.88, + "learning_rate": 8.824607472264768e-08, + "loss": 0.3128, + "step": 16867 + }, + { + "epoch": 2.88, + "learning_rate": 8.800215396687295e-08, + "loss": 0.3051, + "step": 16868 + }, + { + "epoch": 2.88, + "learning_rate": 8.775856929830184e-08, + "loss": 0.3089, + "step": 16869 + }, + { + "epoch": 2.88, + "learning_rate": 8.751532072519332e-08, + "loss": 0.3044, + "step": 16870 + }, + { + "epoch": 2.88, + "learning_rate": 8.72724082557963e-08, + "loss": 0.3159, + "step": 16871 + }, + { + "epoch": 2.88, + "learning_rate": 8.702983189834536e-08, + "loss": 0.3235, + "step": 16872 + }, + { + "epoch": 2.88, + "learning_rate": 8.67875916610672e-08, + "loss": 0.3127, + "step": 16873 + }, + { + "epoch": 2.88, + "learning_rate": 8.65456875521753e-08, + "loss": 0.3169, + "step": 16874 + }, + { + "epoch": 2.88, + "learning_rate": 8.630411957987084e-08, + "loss": 0.3092, + "step": 16875 + }, + { + "epoch": 2.88, + "learning_rate": 8.606288775234617e-08, + "loss": 0.3053, + "step": 16876 + }, + { + "epoch": 2.88, + "learning_rate": 8.58219920777803e-08, + "loss": 0.3038, + "step": 16877 + }, + { + "epoch": 2.88, + "learning_rate": 8.558143256434004e-08, + "loss": 0.3025, + "step": 16878 + }, + { + "epoch": 2.88, + "learning_rate": 8.53412092201844e-08, + "loss": 0.3242, + "step": 16879 + }, + { + "epoch": 2.88, + "learning_rate": 8.510132205345578e-08, + "loss": 0.3104, + "step": 16880 + }, + { + "epoch": 2.88, + "learning_rate": 8.486177107229099e-08, + "loss": 0.3165, + "step": 16881 + }, + { + "epoch": 2.88, + "learning_rate": 8.46225562848102e-08, + "loss": 0.3215, + "step": 16882 + }, + { + "epoch": 2.88, + "learning_rate": 8.43836776991258e-08, + "loss": 0.288, + "step": 16883 + }, + { + "epoch": 2.88, + "learning_rate": 8.41451353233369e-08, + "loss": 0.3049, + "step": 16884 + }, + { + "epoch": 2.88, + "learning_rate": 8.390692916553145e-08, + "loss": 0.312, + "step": 16885 + }, + { + "epoch": 2.88, + "learning_rate": 8.366905923378854e-08, + "loss": 0.2927, + "step": 16886 + }, + { + "epoch": 2.88, + "learning_rate": 8.343152553617062e-08, + "loss": 0.2978, + "step": 16887 + }, + { + "epoch": 2.88, + "learning_rate": 8.319432808073347e-08, + "loss": 0.3215, + "step": 16888 + }, + { + "epoch": 2.88, + "learning_rate": 8.295746687551953e-08, + "loss": 0.3437, + "step": 16889 + }, + { + "epoch": 2.88, + "learning_rate": 8.272094192855906e-08, + "loss": 0.2897, + "step": 16890 + }, + { + "epoch": 2.88, + "learning_rate": 8.248475324787452e-08, + "loss": 0.3115, + "step": 16891 + }, + { + "epoch": 2.88, + "learning_rate": 8.224890084147174e-08, + "loss": 0.3125, + "step": 16892 + }, + { + "epoch": 2.88, + "learning_rate": 8.201338471734876e-08, + "loss": 0.322, + "step": 16893 + }, + { + "epoch": 2.88, + "learning_rate": 8.177820488349142e-08, + "loss": 0.3191, + "step": 16894 + }, + { + "epoch": 2.88, + "learning_rate": 8.154336134787332e-08, + "loss": 0.3171, + "step": 16895 + }, + { + "epoch": 2.88, + "learning_rate": 8.130885411845813e-08, + "loss": 0.3237, + "step": 16896 + }, + { + "epoch": 2.88, + "learning_rate": 8.107468320319834e-08, + "loss": 0.2932, + "step": 16897 + }, + { + "epoch": 2.88, + "learning_rate": 8.084084861003094e-08, + "loss": 0.2872, + "step": 16898 + }, + { + "epoch": 2.88, + "learning_rate": 8.060735034688738e-08, + "loss": 0.3221, + "step": 16899 + }, + { + "epoch": 2.88, + "learning_rate": 8.037418842168243e-08, + "loss": 0.3134, + "step": 16900 + }, + { + "epoch": 2.88, + "learning_rate": 8.014136284232532e-08, + "loss": 0.2959, + "step": 16901 + }, + { + "epoch": 2.88, + "learning_rate": 7.990887361670752e-08, + "loss": 0.3276, + "step": 16902 + }, + { + "epoch": 2.88, + "learning_rate": 7.967672075271271e-08, + "loss": 0.3155, + "step": 16903 + }, + { + "epoch": 2.88, + "learning_rate": 7.94449042582135e-08, + "loss": 0.3075, + "step": 16904 + }, + { + "epoch": 2.88, + "learning_rate": 7.921342414106914e-08, + "loss": 0.3051, + "step": 16905 + }, + { + "epoch": 2.88, + "learning_rate": 7.898228040912893e-08, + "loss": 0.3218, + "step": 16906 + }, + { + "epoch": 2.88, + "learning_rate": 7.8751473070231e-08, + "loss": 0.3074, + "step": 16907 + }, + { + "epoch": 2.88, + "learning_rate": 7.852100213219916e-08, + "loss": 0.3252, + "step": 16908 + }, + { + "epoch": 2.88, + "learning_rate": 7.829086760285042e-08, + "loss": 0.2912, + "step": 16909 + }, + { + "epoch": 2.88, + "learning_rate": 7.806106948998749e-08, + "loss": 0.3003, + "step": 16910 + }, + { + "epoch": 2.88, + "learning_rate": 7.783160780139964e-08, + "loss": 0.3173, + "step": 16911 + }, + { + "epoch": 2.88, + "learning_rate": 7.760248254487068e-08, + "loss": 0.3223, + "step": 16912 + }, + { + "epoch": 2.88, + "learning_rate": 7.737369372816883e-08, + "loss": 0.3145, + "step": 16913 + }, + { + "epoch": 2.88, + "learning_rate": 7.71452413590501e-08, + "loss": 0.2872, + "step": 16914 + }, + { + "epoch": 2.88, + "learning_rate": 7.691712544526053e-08, + "loss": 0.3293, + "step": 16915 + }, + { + "epoch": 2.88, + "learning_rate": 7.668934599453725e-08, + "loss": 0.3211, + "step": 16916 + }, + { + "epoch": 2.89, + "learning_rate": 7.646190301460187e-08, + "loss": 0.3269, + "step": 16917 + }, + { + "epoch": 2.89, + "learning_rate": 7.623479651316712e-08, + "loss": 0.3085, + "step": 16918 + }, + { + "epoch": 2.89, + "learning_rate": 7.600802649793237e-08, + "loss": 0.3006, + "step": 16919 + }, + { + "epoch": 2.89, + "learning_rate": 7.578159297658705e-08, + "loss": 0.3334, + "step": 16920 + }, + { + "epoch": 2.89, + "learning_rate": 7.555549595680945e-08, + "loss": 0.3289, + "step": 16921 + }, + { + "epoch": 2.89, + "learning_rate": 7.532973544626676e-08, + "loss": 0.323, + "step": 16922 + }, + { + "epoch": 2.89, + "learning_rate": 7.510431145261176e-08, + "loss": 0.3237, + "step": 16923 + }, + { + "epoch": 2.89, + "learning_rate": 7.487922398348946e-08, + "loss": 0.3078, + "step": 16924 + }, + { + "epoch": 2.89, + "learning_rate": 7.46544730465315e-08, + "loss": 0.3191, + "step": 16925 + }, + { + "epoch": 2.89, + "learning_rate": 7.443005864935848e-08, + "loss": 0.3266, + "step": 16926 + }, + { + "epoch": 2.89, + "learning_rate": 7.420598079957875e-08, + "loss": 0.3294, + "step": 16927 + }, + { + "epoch": 2.89, + "learning_rate": 7.398223950479177e-08, + "loss": 0.3087, + "step": 16928 + }, + { + "epoch": 2.89, + "learning_rate": 7.375883477258373e-08, + "loss": 0.3167, + "step": 16929 + }, + { + "epoch": 2.89, + "learning_rate": 7.353576661052741e-08, + "loss": 0.2976, + "step": 16930 + }, + { + "epoch": 2.89, + "learning_rate": 7.331303502618903e-08, + "loss": 0.2938, + "step": 16931 + }, + { + "epoch": 2.89, + "learning_rate": 7.30906400271203e-08, + "loss": 0.3041, + "step": 16932 + }, + { + "epoch": 2.89, + "learning_rate": 7.286858162086185e-08, + "loss": 0.2983, + "step": 16933 + }, + { + "epoch": 2.89, + "learning_rate": 7.264685981494213e-08, + "loss": 0.2787, + "step": 16934 + }, + { + "epoch": 2.89, + "learning_rate": 7.242547461687954e-08, + "loss": 0.3293, + "step": 16935 + }, + { + "epoch": 2.89, + "learning_rate": 7.220442603418032e-08, + "loss": 0.3209, + "step": 16936 + }, + { + "epoch": 2.89, + "learning_rate": 7.198371407434068e-08, + "loss": 0.2993, + "step": 16937 + }, + { + "epoch": 2.89, + "learning_rate": 7.176333874484353e-08, + "loss": 0.3094, + "step": 16938 + }, + { + "epoch": 2.89, + "learning_rate": 7.15433000531618e-08, + "loss": 0.3038, + "step": 16939 + }, + { + "epoch": 2.89, + "learning_rate": 7.132359800675503e-08, + "loss": 0.3039, + "step": 16940 + }, + { + "epoch": 2.89, + "learning_rate": 7.110423261307397e-08, + "loss": 0.3214, + "step": 16941 + }, + { + "epoch": 2.89, + "learning_rate": 7.088520387955599e-08, + "loss": 0.2917, + "step": 16942 + }, + { + "epoch": 2.89, + "learning_rate": 7.066651181362738e-08, + "loss": 0.2984, + "step": 16943 + }, + { + "epoch": 2.89, + "learning_rate": 7.044815642270553e-08, + "loss": 0.3314, + "step": 16944 + }, + { + "epoch": 2.89, + "learning_rate": 7.023013771419118e-08, + "loss": 0.3032, + "step": 16945 + }, + { + "epoch": 2.89, + "learning_rate": 7.001245569547843e-08, + "loss": 0.3329, + "step": 16946 + }, + { + "epoch": 2.89, + "learning_rate": 6.979511037394915e-08, + "loss": 0.3143, + "step": 16947 + }, + { + "epoch": 2.89, + "learning_rate": 6.957810175697078e-08, + "loss": 0.3063, + "step": 16948 + }, + { + "epoch": 2.89, + "learning_rate": 6.936142985190187e-08, + "loss": 0.2875, + "step": 16949 + }, + { + "epoch": 2.89, + "learning_rate": 6.914509466608987e-08, + "loss": 0.2983, + "step": 16950 + }, + { + "epoch": 2.89, + "learning_rate": 6.892909620687116e-08, + "loss": 0.3245, + "step": 16951 + }, + { + "epoch": 2.89, + "learning_rate": 6.871343448156653e-08, + "loss": 0.2975, + "step": 16952 + }, + { + "epoch": 2.89, + "learning_rate": 6.849810949749126e-08, + "loss": 0.3146, + "step": 16953 + }, + { + "epoch": 2.89, + "learning_rate": 6.828312126194614e-08, + "loss": 0.3158, + "step": 16954 + }, + { + "epoch": 2.89, + "learning_rate": 6.80684697822187e-08, + "loss": 0.3036, + "step": 16955 + }, + { + "epoch": 2.89, + "learning_rate": 6.785415506558978e-08, + "loss": 0.3078, + "step": 16956 + }, + { + "epoch": 2.89, + "learning_rate": 6.764017711932358e-08, + "loss": 0.3038, + "step": 16957 + }, + { + "epoch": 2.89, + "learning_rate": 6.742653595067761e-08, + "loss": 0.309, + "step": 16958 + }, + { + "epoch": 2.89, + "learning_rate": 6.721323156689497e-08, + "loss": 0.3234, + "step": 16959 + }, + { + "epoch": 2.89, + "learning_rate": 6.700026397520765e-08, + "loss": 0.3162, + "step": 16960 + }, + { + "epoch": 2.89, + "learning_rate": 6.678763318283765e-08, + "loss": 0.307, + "step": 16961 + }, + { + "epoch": 2.89, + "learning_rate": 6.657533919699477e-08, + "loss": 0.2978, + "step": 16962 + }, + { + "epoch": 2.89, + "learning_rate": 6.636338202487658e-08, + "loss": 0.2876, + "step": 16963 + }, + { + "epoch": 2.89, + "learning_rate": 6.615176167367065e-08, + "loss": 0.3141, + "step": 16964 + }, + { + "epoch": 2.89, + "learning_rate": 6.594047815055127e-08, + "loss": 0.3003, + "step": 16965 + }, + { + "epoch": 2.89, + "learning_rate": 6.572953146268379e-08, + "loss": 0.3375, + "step": 16966 + }, + { + "epoch": 2.89, + "learning_rate": 6.551892161722029e-08, + "loss": 0.3039, + "step": 16967 + }, + { + "epoch": 2.89, + "learning_rate": 6.53086486213006e-08, + "loss": 0.3116, + "step": 16968 + }, + { + "epoch": 2.89, + "learning_rate": 6.50987124820579e-08, + "loss": 0.2967, + "step": 16969 + }, + { + "epoch": 2.89, + "learning_rate": 6.488911320660651e-08, + "loss": 0.3068, + "step": 16970 + }, + { + "epoch": 2.89, + "learning_rate": 6.467985080205519e-08, + "loss": 0.3154, + "step": 16971 + }, + { + "epoch": 2.89, + "learning_rate": 6.447092527550047e-08, + "loss": 0.3023, + "step": 16972 + }, + { + "epoch": 2.89, + "learning_rate": 6.426233663402447e-08, + "loss": 0.3226, + "step": 16973 + }, + { + "epoch": 2.89, + "learning_rate": 6.405408488470155e-08, + "loss": 0.293, + "step": 16974 + }, + { + "epoch": 2.9, + "learning_rate": 6.384617003459159e-08, + "loss": 0.3439, + "step": 16975 + }, + { + "epoch": 2.9, + "learning_rate": 6.363859209074453e-08, + "loss": 0.2964, + "step": 16976 + }, + { + "epoch": 2.9, + "learning_rate": 6.343135106019916e-08, + "loss": 0.3162, + "step": 16977 + }, + { + "epoch": 2.9, + "learning_rate": 6.32244469499832e-08, + "loss": 0.2974, + "step": 16978 + }, + { + "epoch": 2.9, + "learning_rate": 6.301787976711105e-08, + "loss": 0.3202, + "step": 16979 + }, + { + "epoch": 2.9, + "learning_rate": 6.281164951858599e-08, + "loss": 0.3206, + "step": 16980 + }, + { + "epoch": 2.9, + "learning_rate": 6.260575621140242e-08, + "loss": 0.3229, + "step": 16981 + }, + { + "epoch": 2.9, + "learning_rate": 6.240019985254142e-08, + "loss": 0.2907, + "step": 16982 + }, + { + "epoch": 2.9, + "learning_rate": 6.219498044897188e-08, + "loss": 0.2933, + "step": 16983 + }, + { + "epoch": 2.9, + "learning_rate": 6.199009800765265e-08, + "loss": 0.3352, + "step": 16984 + }, + { + "epoch": 2.9, + "learning_rate": 6.178555253553042e-08, + "loss": 0.292, + "step": 16985 + }, + { + "epoch": 2.9, + "learning_rate": 6.158134403954186e-08, + "loss": 0.3353, + "step": 16986 + }, + { + "epoch": 2.9, + "learning_rate": 6.13774725266092e-08, + "loss": 0.3504, + "step": 16987 + }, + { + "epoch": 2.9, + "learning_rate": 6.11739380036469e-08, + "loss": 0.3034, + "step": 16988 + }, + { + "epoch": 2.9, + "learning_rate": 6.097074047755503e-08, + "loss": 0.3043, + "step": 16989 + }, + { + "epoch": 2.9, + "learning_rate": 6.07678799552236e-08, + "loss": 0.3228, + "step": 16990 + }, + { + "epoch": 2.9, + "learning_rate": 6.056535644353268e-08, + "loss": 0.2925, + "step": 16991 + }, + { + "epoch": 2.9, + "learning_rate": 6.036316994934677e-08, + "loss": 0.3237, + "step": 16992 + }, + { + "epoch": 2.9, + "learning_rate": 6.016132047952372e-08, + "loss": 0.2897, + "step": 16993 + }, + { + "epoch": 2.9, + "learning_rate": 5.995980804090474e-08, + "loss": 0.327, + "step": 16994 + }, + { + "epoch": 2.9, + "learning_rate": 5.975863264032544e-08, + "loss": 0.3106, + "step": 16995 + }, + { + "epoch": 2.9, + "learning_rate": 5.955779428460595e-08, + "loss": 0.3079, + "step": 16996 + }, + { + "epoch": 2.9, + "learning_rate": 5.935729298055526e-08, + "loss": 0.301, + "step": 16997 + }, + { + "epoch": 2.9, + "learning_rate": 5.915712873497348e-08, + "loss": 0.333, + "step": 16998 + }, + { + "epoch": 2.9, + "learning_rate": 5.895730155464741e-08, + "loss": 0.3065, + "step": 16999 + }, + { + "epoch": 2.9, + "learning_rate": 5.875781144635051e-08, + "loss": 0.3107, + "step": 17000 + }, + { + "epoch": 2.9, + "learning_rate": 5.855865841684849e-08, + "loss": 0.2864, + "step": 17001 + }, + { + "epoch": 2.9, + "learning_rate": 5.835984247289483e-08, + "loss": 0.3001, + "step": 17002 + }, + { + "epoch": 2.9, + "learning_rate": 5.816136362122971e-08, + "loss": 0.3128, + "step": 17003 + }, + { + "epoch": 2.9, + "learning_rate": 5.7963221868583274e-08, + "loss": 0.3345, + "step": 17004 + }, + { + "epoch": 2.9, + "learning_rate": 5.7765417221673504e-08, + "loss": 0.3076, + "step": 17005 + }, + { + "epoch": 2.9, + "learning_rate": 5.756794968720725e-08, + "loss": 0.3106, + "step": 17006 + }, + { + "epoch": 2.9, + "learning_rate": 5.737081927188137e-08, + "loss": 0.3294, + "step": 17007 + }, + { + "epoch": 2.9, + "learning_rate": 5.717402598237942e-08, + "loss": 0.3028, + "step": 17008 + }, + { + "epoch": 2.9, + "learning_rate": 5.697756982537273e-08, + "loss": 0.309, + "step": 17009 + }, + { + "epoch": 2.9, + "learning_rate": 5.678145080752484e-08, + "loss": 0.3211, + "step": 17010 + }, + { + "epoch": 2.9, + "learning_rate": 5.658566893548378e-08, + "loss": 0.3185, + "step": 17011 + }, + { + "epoch": 2.9, + "learning_rate": 5.639022421588869e-08, + "loss": 0.3085, + "step": 17012 + }, + { + "epoch": 2.9, + "learning_rate": 5.6195116655365364e-08, + "loss": 0.3304, + "step": 17013 + }, + { + "epoch": 2.9, + "learning_rate": 5.600034626053186e-08, + "loss": 0.2838, + "step": 17014 + }, + { + "epoch": 2.9, + "learning_rate": 5.580591303799066e-08, + "loss": 0.3298, + "step": 17015 + }, + { + "epoch": 2.9, + "learning_rate": 5.561181699433427e-08, + "loss": 0.2991, + "step": 17016 + }, + { + "epoch": 2.9, + "learning_rate": 5.5418058136144094e-08, + "loss": 0.3157, + "step": 17017 + }, + { + "epoch": 2.9, + "learning_rate": 5.522463646998932e-08, + "loss": 0.3246, + "step": 17018 + }, + { + "epoch": 2.9, + "learning_rate": 5.5031552002430265e-08, + "loss": 0.3014, + "step": 17019 + }, + { + "epoch": 2.9, + "learning_rate": 5.483880474001169e-08, + "loss": 0.3168, + "step": 17020 + }, + { + "epoch": 2.9, + "learning_rate": 5.464639468927058e-08, + "loss": 0.3206, + "step": 17021 + }, + { + "epoch": 2.9, + "learning_rate": 5.445432185673172e-08, + "loss": 0.3342, + "step": 17022 + }, + { + "epoch": 2.9, + "learning_rate": 5.4262586248904345e-08, + "loss": 0.2964, + "step": 17023 + }, + { + "epoch": 2.9, + "learning_rate": 5.407118787229326e-08, + "loss": 0.3023, + "step": 17024 + }, + { + "epoch": 2.9, + "learning_rate": 5.388012673338661e-08, + "loss": 0.285, + "step": 17025 + }, + { + "epoch": 2.9, + "learning_rate": 5.368940283866253e-08, + "loss": 0.3076, + "step": 17026 + }, + { + "epoch": 2.9, + "learning_rate": 5.349901619458919e-08, + "loss": 0.3008, + "step": 17027 + }, + { + "epoch": 2.9, + "learning_rate": 5.330896680762032e-08, + "loss": 0.296, + "step": 17028 + }, + { + "epoch": 2.9, + "learning_rate": 5.311925468420076e-08, + "loss": 0.3062, + "step": 17029 + }, + { + "epoch": 2.9, + "learning_rate": 5.292987983076314e-08, + "loss": 0.2935, + "step": 17030 + }, + { + "epoch": 2.9, + "learning_rate": 5.2740842253728995e-08, + "loss": 0.3199, + "step": 17031 + }, + { + "epoch": 2.9, + "learning_rate": 5.255214195950764e-08, + "loss": 0.3009, + "step": 17032 + }, + { + "epoch": 2.9, + "learning_rate": 5.236377895449729e-08, + "loss": 0.3262, + "step": 17033 + }, + { + "epoch": 2.91, + "learning_rate": 5.217575324508395e-08, + "loss": 0.3091, + "step": 17034 + }, + { + "epoch": 2.91, + "learning_rate": 5.1988064837644734e-08, + "loss": 0.3034, + "step": 17035 + }, + { + "epoch": 2.91, + "learning_rate": 5.180071373854345e-08, + "loss": 0.308, + "step": 17036 + }, + { + "epoch": 2.91, + "learning_rate": 5.161369995413168e-08, + "loss": 0.2974, + "step": 17037 + }, + { + "epoch": 2.91, + "learning_rate": 5.1427023490751016e-08, + "loss": 0.2755, + "step": 17038 + }, + { + "epoch": 2.91, + "learning_rate": 5.124068435472973e-08, + "loss": 0.3126, + "step": 17039 + }, + { + "epoch": 2.91, + "learning_rate": 5.1054682552389434e-08, + "loss": 0.3078, + "step": 17040 + }, + { + "epoch": 2.91, + "learning_rate": 5.086901809003286e-08, + "loss": 0.3165, + "step": 17041 + }, + { + "epoch": 2.91, + "learning_rate": 5.068369097395831e-08, + "loss": 0.3134, + "step": 17042 + }, + { + "epoch": 2.91, + "learning_rate": 5.049870121044742e-08, + "loss": 0.321, + "step": 17043 + }, + { + "epoch": 2.91, + "learning_rate": 5.0314048805775175e-08, + "loss": 0.3095, + "step": 17044 + }, + { + "epoch": 2.91, + "learning_rate": 5.012973376620101e-08, + "loss": 0.3305, + "step": 17045 + }, + { + "epoch": 2.91, + "learning_rate": 4.994575609797436e-08, + "loss": 0.3098, + "step": 17046 + }, + { + "epoch": 2.91, + "learning_rate": 4.976211580733359e-08, + "loss": 0.3372, + "step": 17047 + }, + { + "epoch": 2.91, + "learning_rate": 4.957881290050592e-08, + "loss": 0.3171, + "step": 17048 + }, + { + "epoch": 2.91, + "learning_rate": 4.93958473837064e-08, + "loss": 0.3233, + "step": 17049 + }, + { + "epoch": 2.91, + "learning_rate": 4.921321926313894e-08, + "loss": 0.3229, + "step": 17050 + }, + { + "epoch": 2.91, + "learning_rate": 4.9030928544995247e-08, + "loss": 0.314, + "step": 17051 + }, + { + "epoch": 2.91, + "learning_rate": 4.8848975235457065e-08, + "loss": 0.3056, + "step": 17052 + }, + { + "epoch": 2.91, + "learning_rate": 4.866735934069389e-08, + "loss": 0.3065, + "step": 17053 + }, + { + "epoch": 2.91, + "learning_rate": 4.8486080866864127e-08, + "loss": 0.309, + "step": 17054 + }, + { + "epoch": 2.91, + "learning_rate": 4.8305139820112866e-08, + "loss": 0.3187, + "step": 17055 + }, + { + "epoch": 2.91, + "learning_rate": 4.8124536206576314e-08, + "loss": 0.3032, + "step": 17056 + }, + { + "epoch": 2.91, + "learning_rate": 4.7944270032378446e-08, + "loss": 0.3333, + "step": 17057 + }, + { + "epoch": 2.91, + "learning_rate": 4.776434130363106e-08, + "loss": 0.3304, + "step": 17058 + }, + { + "epoch": 2.91, + "learning_rate": 4.7584750026435923e-08, + "loss": 0.3042, + "step": 17059 + }, + { + "epoch": 2.91, + "learning_rate": 4.740549620688151e-08, + "loss": 0.3071, + "step": 17060 + }, + { + "epoch": 2.91, + "learning_rate": 4.722657985104629e-08, + "loss": 0.3025, + "step": 17061 + }, + { + "epoch": 2.91, + "learning_rate": 4.70480009649954e-08, + "loss": 0.3253, + "step": 17062 + }, + { + "epoch": 2.91, + "learning_rate": 4.686975955478623e-08, + "loss": 0.3224, + "step": 17063 + }, + { + "epoch": 2.91, + "learning_rate": 4.6691855626460614e-08, + "loss": 0.2914, + "step": 17064 + }, + { + "epoch": 2.91, + "learning_rate": 4.65142891860515e-08, + "loss": 0.3265, + "step": 17065 + }, + { + "epoch": 2.91, + "learning_rate": 4.6337060239580734e-08, + "loss": 0.3005, + "step": 17066 + }, + { + "epoch": 2.91, + "learning_rate": 4.616016879305574e-08, + "loss": 0.2946, + "step": 17067 + }, + { + "epoch": 2.91, + "learning_rate": 4.598361485247505e-08, + "loss": 0.2957, + "step": 17068 + }, + { + "epoch": 2.91, + "learning_rate": 4.580739842382498e-08, + "loss": 0.3039, + "step": 17069 + }, + { + "epoch": 2.91, + "learning_rate": 4.5631519513080755e-08, + "loss": 0.3392, + "step": 17070 + }, + { + "epoch": 2.91, + "learning_rate": 4.545597812620761e-08, + "loss": 0.3214, + "step": 17071 + }, + { + "epoch": 2.91, + "learning_rate": 4.528077426915412e-08, + "loss": 0.2964, + "step": 17072 + }, + { + "epoch": 2.91, + "learning_rate": 4.510590794786329e-08, + "loss": 0.3149, + "step": 17073 + }, + { + "epoch": 2.91, + "learning_rate": 4.4931379168263736e-08, + "loss": 0.3013, + "step": 17074 + }, + { + "epoch": 2.91, + "learning_rate": 4.475718793627404e-08, + "loss": 0.3207, + "step": 17075 + }, + { + "epoch": 2.91, + "learning_rate": 4.458333425779837e-08, + "loss": 0.3122, + "step": 17076 + }, + { + "epoch": 2.91, + "learning_rate": 4.440981813873424e-08, + "loss": 0.3215, + "step": 17077 + }, + { + "epoch": 2.91, + "learning_rate": 4.42366395849636e-08, + "loss": 0.2967, + "step": 17078 + }, + { + "epoch": 2.91, + "learning_rate": 4.406379860235843e-08, + "loss": 0.3053, + "step": 17079 + }, + { + "epoch": 2.91, + "learning_rate": 4.3891295196779595e-08, + "loss": 0.3303, + "step": 17080 + }, + { + "epoch": 2.91, + "learning_rate": 4.3719129374075744e-08, + "loss": 0.307, + "step": 17081 + }, + { + "epoch": 2.91, + "learning_rate": 4.354730114008665e-08, + "loss": 0.3194, + "step": 17082 + }, + { + "epoch": 2.91, + "learning_rate": 4.337581050063544e-08, + "loss": 0.3008, + "step": 17083 + }, + { + "epoch": 2.91, + "learning_rate": 4.3204657461537455e-08, + "loss": 0.3038, + "step": 17084 + }, + { + "epoch": 2.91, + "learning_rate": 4.303384202859806e-08, + "loss": 0.3176, + "step": 17085 + }, + { + "epoch": 2.91, + "learning_rate": 4.286336420760706e-08, + "loss": 0.3135, + "step": 17086 + }, + { + "epoch": 2.91, + "learning_rate": 4.26932240043465e-08, + "loss": 0.3184, + "step": 17087 + }, + { + "epoch": 2.91, + "learning_rate": 4.252342142458399e-08, + "loss": 0.3064, + "step": 17088 + }, + { + "epoch": 2.91, + "learning_rate": 4.235395647407714e-08, + "loss": 0.3081, + "step": 17089 + }, + { + "epoch": 2.91, + "learning_rate": 4.2184829158572474e-08, + "loss": 0.3048, + "step": 17090 + }, + { + "epoch": 2.91, + "learning_rate": 4.201603948380428e-08, + "loss": 0.3086, + "step": 17091 + }, + { + "epoch": 2.91, + "learning_rate": 4.184758745549689e-08, + "loss": 0.286, + "step": 17092 + }, + { + "epoch": 2.92, + "learning_rate": 4.167947307936016e-08, + "loss": 0.2984, + "step": 17093 + }, + { + "epoch": 2.92, + "learning_rate": 4.1511696361095086e-08, + "loss": 0.3005, + "step": 17094 + }, + { + "epoch": 2.92, + "learning_rate": 4.134425730639158e-08, + "loss": 0.3152, + "step": 17095 + }, + { + "epoch": 2.92, + "learning_rate": 4.1177155920926194e-08, + "loss": 0.2887, + "step": 17096 + }, + { + "epoch": 2.92, + "learning_rate": 4.1010392210364405e-08, + "loss": 0.3204, + "step": 17097 + }, + { + "epoch": 2.92, + "learning_rate": 4.084396618036057e-08, + "loss": 0.3179, + "step": 17098 + }, + { + "epoch": 2.92, + "learning_rate": 4.067787783655908e-08, + "loss": 0.2964, + "step": 17099 + }, + { + "epoch": 2.92, + "learning_rate": 4.051212718459097e-08, + "loss": 0.3057, + "step": 17100 + }, + { + "epoch": 2.92, + "learning_rate": 4.034671423007508e-08, + "loss": 0.308, + "step": 17101 + }, + { + "epoch": 2.92, + "learning_rate": 4.0181638978621374e-08, + "loss": 0.3215, + "step": 17102 + }, + { + "epoch": 2.92, + "learning_rate": 4.001690143582648e-08, + "loss": 0.3209, + "step": 17103 + }, + { + "epoch": 2.92, + "learning_rate": 3.985250160727705e-08, + "loss": 0.3005, + "step": 17104 + }, + { + "epoch": 2.92, + "learning_rate": 3.96884394985475e-08, + "loss": 0.3289, + "step": 17105 + }, + { + "epoch": 2.92, + "learning_rate": 3.952471511519895e-08, + "loss": 0.3023, + "step": 17106 + }, + { + "epoch": 2.92, + "learning_rate": 3.936132846278362e-08, + "loss": 0.3129, + "step": 17107 + }, + { + "epoch": 2.92, + "learning_rate": 3.9198279546842635e-08, + "loss": 0.2993, + "step": 17108 + }, + { + "epoch": 2.92, + "learning_rate": 3.9035568372902676e-08, + "loss": 0.3, + "step": 17109 + }, + { + "epoch": 2.92, + "learning_rate": 3.8873194946481566e-08, + "loss": 0.3161, + "step": 17110 + }, + { + "epoch": 2.92, + "learning_rate": 3.8711159273086e-08, + "loss": 0.3325, + "step": 17111 + }, + { + "epoch": 2.92, + "learning_rate": 3.854946135820936e-08, + "loss": 0.3025, + "step": 17112 + }, + { + "epoch": 2.92, + "learning_rate": 3.838810120733394e-08, + "loss": 0.3216, + "step": 17113 + }, + { + "epoch": 2.92, + "learning_rate": 3.822707882593091e-08, + "loss": 0.3086, + "step": 17114 + }, + { + "epoch": 2.92, + "learning_rate": 3.806639421946146e-08, + "loss": 0.3218, + "step": 17115 + }, + { + "epoch": 2.92, + "learning_rate": 3.790604739337234e-08, + "loss": 0.2852, + "step": 17116 + }, + { + "epoch": 2.92, + "learning_rate": 3.7746038353100313e-08, + "loss": 0.3229, + "step": 17117 + }, + { + "epoch": 2.92, + "learning_rate": 3.758636710407326e-08, + "loss": 0.3008, + "step": 17118 + }, + { + "epoch": 2.92, + "learning_rate": 3.7427033651702414e-08, + "loss": 0.3117, + "step": 17119 + }, + { + "epoch": 2.92, + "learning_rate": 3.7268038001391224e-08, + "loss": 0.2999, + "step": 17120 + }, + { + "epoch": 2.92, + "learning_rate": 3.710938015853094e-08, + "loss": 0.314, + "step": 17121 + }, + { + "epoch": 2.92, + "learning_rate": 3.695106012850169e-08, + "loss": 0.3231, + "step": 17122 + }, + { + "epoch": 2.92, + "learning_rate": 3.6793077916670304e-08, + "loss": 0.3354, + "step": 17123 + }, + { + "epoch": 2.92, + "learning_rate": 3.6635433528394716e-08, + "loss": 0.2949, + "step": 17124 + }, + { + "epoch": 2.92, + "learning_rate": 3.6478126969020645e-08, + "loss": 0.313, + "step": 17125 + }, + { + "epoch": 2.92, + "learning_rate": 3.6321158243880494e-08, + "loss": 0.3271, + "step": 17126 + }, + { + "epoch": 2.92, + "learning_rate": 3.6164527358296675e-08, + "loss": 0.3311, + "step": 17127 + }, + { + "epoch": 2.92, + "learning_rate": 3.6008234317581606e-08, + "loss": 0.3244, + "step": 17128 + }, + { + "epoch": 2.92, + "learning_rate": 3.5852279127032154e-08, + "loss": 0.3025, + "step": 17129 + }, + { + "epoch": 2.92, + "learning_rate": 3.569666179193965e-08, + "loss": 0.3157, + "step": 17130 + }, + { + "epoch": 2.92, + "learning_rate": 3.554138231757764e-08, + "loss": 0.3139, + "step": 17131 + }, + { + "epoch": 2.92, + "learning_rate": 3.538644070921304e-08, + "loss": 0.3017, + "step": 17132 + }, + { + "epoch": 2.92, + "learning_rate": 3.523183697209831e-08, + "loss": 0.3192, + "step": 17133 + }, + { + "epoch": 2.92, + "learning_rate": 3.5077571111477024e-08, + "loss": 0.3498, + "step": 17134 + }, + { + "epoch": 2.92, + "learning_rate": 3.492364313257834e-08, + "loss": 0.2978, + "step": 17135 + }, + { + "epoch": 2.92, + "learning_rate": 3.477005304062253e-08, + "loss": 0.2966, + "step": 17136 + }, + { + "epoch": 2.92, + "learning_rate": 3.4616800840816533e-08, + "loss": 0.2852, + "step": 17137 + }, + { + "epoch": 2.92, + "learning_rate": 3.4463886538357307e-08, + "loss": 0.2849, + "step": 17138 + }, + { + "epoch": 2.92, + "learning_rate": 3.431131013842959e-08, + "loss": 0.287, + "step": 17139 + }, + { + "epoch": 2.92, + "learning_rate": 3.415907164620702e-08, + "loss": 0.3068, + "step": 17140 + }, + { + "epoch": 2.92, + "learning_rate": 3.400717106685214e-08, + "loss": 0.3094, + "step": 17141 + }, + { + "epoch": 2.92, + "learning_rate": 3.385560840551416e-08, + "loss": 0.3158, + "step": 17142 + }, + { + "epoch": 2.92, + "learning_rate": 3.370438366733231e-08, + "loss": 0.3033, + "step": 17143 + }, + { + "epoch": 2.92, + "learning_rate": 3.35534968574347e-08, + "loss": 0.3062, + "step": 17144 + }, + { + "epoch": 2.92, + "learning_rate": 3.340294798093835e-08, + "loss": 0.3131, + "step": 17145 + }, + { + "epoch": 2.92, + "learning_rate": 3.3252737042945847e-08, + "loss": 0.3341, + "step": 17146 + }, + { + "epoch": 2.92, + "learning_rate": 3.3102864048552006e-08, + "loss": 0.2985, + "step": 17147 + }, + { + "epoch": 2.92, + "learning_rate": 3.295332900283721e-08, + "loss": 0.3061, + "step": 17148 + }, + { + "epoch": 2.92, + "learning_rate": 3.2804131910872946e-08, + "loss": 0.3193, + "step": 17149 + }, + { + "epoch": 2.92, + "learning_rate": 3.265527277771852e-08, + "loss": 0.3012, + "step": 17150 + }, + { + "epoch": 2.93, + "learning_rate": 3.250675160841987e-08, + "loss": 0.3256, + "step": 17151 + }, + { + "epoch": 2.93, + "learning_rate": 3.23585684080141e-08, + "loss": 0.3402, + "step": 17152 + }, + { + "epoch": 2.93, + "learning_rate": 3.221072318152496e-08, + "loss": 0.2919, + "step": 17153 + }, + { + "epoch": 2.93, + "learning_rate": 3.206321593396511e-08, + "loss": 0.3175, + "step": 17154 + }, + { + "epoch": 2.93, + "learning_rate": 3.191604667033721e-08, + "loss": 0.311, + "step": 17155 + }, + { + "epoch": 2.93, + "learning_rate": 3.1769215395630604e-08, + "loss": 0.3266, + "step": 17156 + }, + { + "epoch": 2.93, + "learning_rate": 3.162272211482354e-08, + "loss": 0.3248, + "step": 17157 + }, + { + "epoch": 2.93, + "learning_rate": 3.1476566832884246e-08, + "loss": 0.307, + "step": 17158 + }, + { + "epoch": 2.93, + "learning_rate": 3.133074955476767e-08, + "loss": 0.2956, + "step": 17159 + }, + { + "epoch": 2.93, + "learning_rate": 3.1185270285418734e-08, + "loss": 0.3238, + "step": 17160 + }, + { + "epoch": 2.93, + "learning_rate": 3.104012902976905e-08, + "loss": 0.2951, + "step": 17161 + }, + { + "epoch": 2.93, + "learning_rate": 3.0895325792741346e-08, + "loss": 0.335, + "step": 17162 + }, + { + "epoch": 2.93, + "learning_rate": 3.075086057924392e-08, + "loss": 0.3124, + "step": 17163 + }, + { + "epoch": 2.93, + "learning_rate": 3.0606733394176194e-08, + "loss": 0.2961, + "step": 17164 + }, + { + "epoch": 2.93, + "learning_rate": 3.046294424242535e-08, + "loss": 0.3212, + "step": 17165 + }, + { + "epoch": 2.93, + "learning_rate": 3.03194931288664e-08, + "loss": 0.312, + "step": 17166 + }, + { + "epoch": 2.93, + "learning_rate": 3.0176380058364316e-08, + "loss": 0.3278, + "step": 17167 + }, + { + "epoch": 2.93, + "learning_rate": 3.0033605035769684e-08, + "loss": 0.3074, + "step": 17168 + }, + { + "epoch": 2.93, + "learning_rate": 2.9891168065924184e-08, + "loss": 0.3185, + "step": 17169 + }, + { + "epoch": 2.93, + "learning_rate": 2.9749069153659492e-08, + "loss": 0.311, + "step": 17170 + }, + { + "epoch": 2.93, + "learning_rate": 2.960730830379066e-08, + "loss": 0.3217, + "step": 17171 + }, + { + "epoch": 2.93, + "learning_rate": 2.9465885521127168e-08, + "loss": 0.285, + "step": 17172 + }, + { + "epoch": 2.93, + "learning_rate": 2.9324800810461852e-08, + "loss": 0.307, + "step": 17173 + }, + { + "epoch": 2.93, + "learning_rate": 2.9184054176579767e-08, + "loss": 0.3025, + "step": 17174 + }, + { + "epoch": 2.93, + "learning_rate": 2.9043645624253767e-08, + "loss": 0.3162, + "step": 17175 + }, + { + "epoch": 2.93, + "learning_rate": 2.890357515824338e-08, + "loss": 0.3219, + "step": 17176 + }, + { + "epoch": 2.93, + "learning_rate": 2.8763842783299245e-08, + "loss": 0.3191, + "step": 17177 + }, + { + "epoch": 2.93, + "learning_rate": 2.8624448504158685e-08, + "loss": 0.3459, + "step": 17178 + }, + { + "epoch": 2.93, + "learning_rate": 2.8485392325547924e-08, + "loss": 0.3122, + "step": 17179 + }, + { + "epoch": 2.93, + "learning_rate": 2.834667425218207e-08, + "loss": 0.3249, + "step": 17180 + }, + { + "epoch": 2.93, + "learning_rate": 2.8208294288765147e-08, + "loss": 0.3136, + "step": 17181 + }, + { + "epoch": 2.93, + "learning_rate": 2.8070252439987843e-08, + "loss": 0.2994, + "step": 17182 + }, + { + "epoch": 2.93, + "learning_rate": 2.793254871053308e-08, + "loss": 0.3215, + "step": 17183 + }, + { + "epoch": 2.93, + "learning_rate": 2.7795183105068234e-08, + "loss": 0.3155, + "step": 17184 + }, + { + "epoch": 2.93, + "learning_rate": 2.7658155628250695e-08, + "loss": 0.3115, + "step": 17185 + }, + { + "epoch": 2.93, + "learning_rate": 2.752146628472785e-08, + "loss": 0.3321, + "step": 17186 + }, + { + "epoch": 2.93, + "learning_rate": 2.7385115079134884e-08, + "loss": 0.2847, + "step": 17187 + }, + { + "epoch": 2.93, + "learning_rate": 2.7249102016093653e-08, + "loss": 0.3027, + "step": 17188 + }, + { + "epoch": 2.93, + "learning_rate": 2.7113427100216028e-08, + "loss": 0.3079, + "step": 17189 + }, + { + "epoch": 2.93, + "learning_rate": 2.697809033610277e-08, + "loss": 0.2936, + "step": 17190 + }, + { + "epoch": 2.93, + "learning_rate": 2.684309172834243e-08, + "loss": 0.306, + "step": 17191 + }, + { + "epoch": 2.93, + "learning_rate": 2.670843128151357e-08, + "loss": 0.2995, + "step": 17192 + }, + { + "epoch": 2.93, + "learning_rate": 2.6574109000181426e-08, + "loss": 0.3024, + "step": 17193 + }, + { + "epoch": 2.93, + "learning_rate": 2.6440124888900132e-08, + "loss": 0.3175, + "step": 17194 + }, + { + "epoch": 2.93, + "learning_rate": 2.6306478952212722e-08, + "loss": 0.297, + "step": 17195 + }, + { + "epoch": 2.93, + "learning_rate": 2.6173171194651126e-08, + "loss": 0.3187, + "step": 17196 + }, + { + "epoch": 2.93, + "learning_rate": 2.604020162073506e-08, + "loss": 0.2876, + "step": 17197 + }, + { + "epoch": 2.93, + "learning_rate": 2.5907570234974257e-08, + "loss": 0.3199, + "step": 17198 + }, + { + "epoch": 2.93, + "learning_rate": 2.5775277041864e-08, + "loss": 0.3222, + "step": 17199 + }, + { + "epoch": 2.93, + "learning_rate": 2.5643322045891815e-08, + "loss": 0.3347, + "step": 17200 + }, + { + "epoch": 2.93, + "learning_rate": 2.5511705251529685e-08, + "loss": 0.3155, + "step": 17201 + }, + { + "epoch": 2.93, + "learning_rate": 2.5380426663242918e-08, + "loss": 0.3148, + "step": 17202 + }, + { + "epoch": 2.93, + "learning_rate": 2.5249486285481294e-08, + "loss": 0.3169, + "step": 17203 + }, + { + "epoch": 2.93, + "learning_rate": 2.5118884122684594e-08, + "loss": 0.2951, + "step": 17204 + }, + { + "epoch": 2.93, + "learning_rate": 2.49886201792815e-08, + "loss": 0.3084, + "step": 17205 + }, + { + "epoch": 2.93, + "learning_rate": 2.485869445968958e-08, + "loss": 0.3391, + "step": 17206 + }, + { + "epoch": 2.93, + "learning_rate": 2.47291069683131e-08, + "loss": 0.3122, + "step": 17207 + }, + { + "epoch": 2.93, + "learning_rate": 2.4599857709546314e-08, + "loss": 0.3043, + "step": 17208 + }, + { + "epoch": 2.93, + "learning_rate": 2.447094668777128e-08, + "loss": 0.3136, + "step": 17209 + }, + { + "epoch": 2.94, + "learning_rate": 2.4342373907361162e-08, + "loss": 0.3266, + "step": 17210 + }, + { + "epoch": 2.94, + "learning_rate": 2.4214139372672473e-08, + "loss": 0.2943, + "step": 17211 + }, + { + "epoch": 2.94, + "learning_rate": 2.4086243088055073e-08, + "loss": 0.2963, + "step": 17212 + }, + { + "epoch": 2.94, + "learning_rate": 2.3958685057844378e-08, + "loss": 0.2984, + "step": 17213 + }, + { + "epoch": 2.94, + "learning_rate": 2.383146528636693e-08, + "loss": 0.2894, + "step": 17214 + }, + { + "epoch": 2.94, + "learning_rate": 2.3704583777935942e-08, + "loss": 0.3077, + "step": 17215 + }, + { + "epoch": 2.94, + "learning_rate": 2.357804053685353e-08, + "loss": 0.2789, + "step": 17216 + }, + { + "epoch": 2.94, + "learning_rate": 2.3451835567409598e-08, + "loss": 0.3116, + "step": 17217 + }, + { + "epoch": 2.94, + "learning_rate": 2.3325968873884053e-08, + "loss": 0.3082, + "step": 17218 + }, + { + "epoch": 2.94, + "learning_rate": 2.3200440460545704e-08, + "loss": 0.3051, + "step": 17219 + }, + { + "epoch": 2.94, + "learning_rate": 2.3075250331648925e-08, + "loss": 0.3283, + "step": 17220 + }, + { + "epoch": 2.94, + "learning_rate": 2.2950398491439207e-08, + "loss": 0.2981, + "step": 17221 + }, + { + "epoch": 2.94, + "learning_rate": 2.2825884944150944e-08, + "loss": 0.2795, + "step": 17222 + }, + { + "epoch": 2.94, + "learning_rate": 2.2701709694004094e-08, + "loss": 0.3238, + "step": 17223 + }, + { + "epoch": 2.94, + "learning_rate": 2.257787274521084e-08, + "loss": 0.2929, + "step": 17224 + }, + { + "epoch": 2.94, + "learning_rate": 2.2454374101970045e-08, + "loss": 0.3193, + "step": 17225 + }, + { + "epoch": 2.94, + "learning_rate": 2.2331213768468363e-08, + "loss": 0.2993, + "step": 17226 + }, + { + "epoch": 2.94, + "learning_rate": 2.220839174888245e-08, + "loss": 0.3131, + "step": 17227 + }, + { + "epoch": 2.94, + "learning_rate": 2.208590804737565e-08, + "loss": 0.2972, + "step": 17228 + }, + { + "epoch": 2.94, + "learning_rate": 2.1963762668102406e-08, + "loss": 0.3067, + "step": 17229 + }, + { + "epoch": 2.94, + "learning_rate": 2.1841955615203858e-08, + "loss": 0.3439, + "step": 17230 + }, + { + "epoch": 2.94, + "learning_rate": 2.1720486892810034e-08, + "loss": 0.2865, + "step": 17231 + }, + { + "epoch": 2.94, + "learning_rate": 2.159935650503986e-08, + "loss": 0.3212, + "step": 17232 + }, + { + "epoch": 2.94, + "learning_rate": 2.147856445600005e-08, + "loss": 0.3281, + "step": 17233 + }, + { + "epoch": 2.94, + "learning_rate": 2.135811074978622e-08, + "loss": 0.3128, + "step": 17234 + }, + { + "epoch": 2.94, + "learning_rate": 2.123799539048288e-08, + "loss": 0.3067, + "step": 17235 + }, + { + "epoch": 2.94, + "learning_rate": 2.1118218382163436e-08, + "loss": 0.3051, + "step": 17236 + }, + { + "epoch": 2.94, + "learning_rate": 2.0998779728887975e-08, + "loss": 0.3157, + "step": 17237 + }, + { + "epoch": 2.94, + "learning_rate": 2.08796794347077e-08, + "loss": 0.3149, + "step": 17238 + }, + { + "epoch": 2.94, + "learning_rate": 2.0760917503659382e-08, + "loss": 0.32, + "step": 17239 + }, + { + "epoch": 2.94, + "learning_rate": 2.0642493939770914e-08, + "loss": 0.3145, + "step": 17240 + }, + { + "epoch": 2.94, + "learning_rate": 2.0524408747057966e-08, + "loss": 0.3144, + "step": 17241 + }, + { + "epoch": 2.94, + "learning_rate": 2.0406661929524007e-08, + "loss": 0.3107, + "step": 17242 + }, + { + "epoch": 2.94, + "learning_rate": 2.028925349116029e-08, + "loss": 0.3387, + "step": 17243 + }, + { + "epoch": 2.94, + "learning_rate": 2.0172183435950288e-08, + "loss": 0.2943, + "step": 17244 + }, + { + "epoch": 2.94, + "learning_rate": 2.0055451767861944e-08, + "loss": 0.3306, + "step": 17245 + }, + { + "epoch": 2.94, + "learning_rate": 1.9939058490853203e-08, + "loss": 0.3171, + "step": 17246 + }, + { + "epoch": 2.94, + "learning_rate": 1.9823003608870906e-08, + "loss": 0.3137, + "step": 17247 + }, + { + "epoch": 2.94, + "learning_rate": 1.9707287125850793e-08, + "loss": 0.3018, + "step": 17248 + }, + { + "epoch": 2.94, + "learning_rate": 1.9591909045715286e-08, + "loss": 0.2979, + "step": 17249 + }, + { + "epoch": 2.94, + "learning_rate": 1.9476869372377916e-08, + "loss": 0.3338, + "step": 17250 + }, + { + "epoch": 2.94, + "learning_rate": 1.936216810973779e-08, + "loss": 0.3008, + "step": 17251 + }, + { + "epoch": 2.94, + "learning_rate": 1.9247805261685127e-08, + "loss": 0.3194, + "step": 17252 + }, + { + "epoch": 2.94, + "learning_rate": 1.9133780832096825e-08, + "loss": 0.3113, + "step": 17253 + }, + { + "epoch": 2.94, + "learning_rate": 1.9020094824839795e-08, + "loss": 0.3118, + "step": 17254 + }, + { + "epoch": 2.94, + "learning_rate": 1.890674724376984e-08, + "loss": 0.3104, + "step": 17255 + }, + { + "epoch": 2.94, + "learning_rate": 1.879373809272833e-08, + "loss": 0.3231, + "step": 17256 + }, + { + "epoch": 2.94, + "learning_rate": 1.8681067375547756e-08, + "loss": 0.2895, + "step": 17257 + }, + { + "epoch": 2.94, + "learning_rate": 1.8568735096048395e-08, + "loss": 0.3358, + "step": 17258 + }, + { + "epoch": 2.94, + "learning_rate": 1.8456741258039423e-08, + "loss": 0.2849, + "step": 17259 + }, + { + "epoch": 2.94, + "learning_rate": 1.83450858653178e-08, + "loss": 0.305, + "step": 17260 + }, + { + "epoch": 2.94, + "learning_rate": 1.8233768921669393e-08, + "loss": 0.3257, + "step": 17261 + }, + { + "epoch": 2.94, + "learning_rate": 1.8122790430870062e-08, + "loss": 0.2968, + "step": 17262 + }, + { + "epoch": 2.94, + "learning_rate": 1.801215039668125e-08, + "loss": 0.3289, + "step": 17263 + }, + { + "epoch": 2.94, + "learning_rate": 1.7901848822854397e-08, + "loss": 0.3198, + "step": 17264 + }, + { + "epoch": 2.94, + "learning_rate": 1.779188571312984e-08, + "loss": 0.2947, + "step": 17265 + }, + { + "epoch": 2.94, + "learning_rate": 1.7682261071236827e-08, + "loss": 0.3205, + "step": 17266 + }, + { + "epoch": 2.94, + "learning_rate": 1.7572974900891272e-08, + "loss": 0.3222, + "step": 17267 + }, + { + "epoch": 2.94, + "learning_rate": 1.7464027205799094e-08, + "loss": 0.3072, + "step": 17268 + }, + { + "epoch": 2.95, + "learning_rate": 1.735541798965512e-08, + "loss": 0.3112, + "step": 17269 + }, + { + "epoch": 2.95, + "learning_rate": 1.7247147256141962e-08, + "loss": 0.3144, + "step": 17270 + }, + { + "epoch": 2.95, + "learning_rate": 1.71392150089289e-08, + "loss": 0.2939, + "step": 17271 + }, + { + "epoch": 2.95, + "learning_rate": 1.7031621251677455e-08, + "loss": 0.294, + "step": 17272 + }, + { + "epoch": 2.95, + "learning_rate": 1.692436598803582e-08, + "loss": 0.3166, + "step": 17273 + }, + { + "epoch": 2.95, + "learning_rate": 1.681744922163997e-08, + "loss": 0.292, + "step": 17274 + }, + { + "epoch": 2.95, + "learning_rate": 1.6710870956114788e-08, + "loss": 0.3007, + "step": 17275 + }, + { + "epoch": 2.95, + "learning_rate": 1.660463119507516e-08, + "loss": 0.3134, + "step": 17276 + }, + { + "epoch": 2.95, + "learning_rate": 1.649872994212265e-08, + "loss": 0.2889, + "step": 17277 + }, + { + "epoch": 2.95, + "learning_rate": 1.6393167200848827e-08, + "loss": 0.3149, + "step": 17278 + }, + { + "epoch": 2.95, + "learning_rate": 1.628794297483194e-08, + "loss": 0.3199, + "step": 17279 + }, + { + "epoch": 2.95, + "learning_rate": 1.6183057267640245e-08, + "loss": 0.3057, + "step": 17280 + }, + { + "epoch": 2.95, + "learning_rate": 1.60785100828309e-08, + "loss": 0.2956, + "step": 17281 + }, + { + "epoch": 2.95, + "learning_rate": 1.5974301423946625e-08, + "loss": 0.326, + "step": 17282 + }, + { + "epoch": 2.95, + "learning_rate": 1.587043129452348e-08, + "loss": 0.3294, + "step": 17283 + }, + { + "epoch": 2.95, + "learning_rate": 1.5766899698081984e-08, + "loss": 0.3162, + "step": 17284 + }, + { + "epoch": 2.95, + "learning_rate": 1.566370663813266e-08, + "loss": 0.3153, + "step": 17285 + }, + { + "epoch": 2.95, + "learning_rate": 1.556085211817493e-08, + "loss": 0.2964, + "step": 17286 + }, + { + "epoch": 2.95, + "learning_rate": 1.5458336141696006e-08, + "loss": 0.337, + "step": 17287 + }, + { + "epoch": 2.95, + "learning_rate": 1.5356158712171997e-08, + "loss": 0.3234, + "step": 17288 + }, + { + "epoch": 2.95, + "learning_rate": 1.52543198330668e-08, + "loss": 0.2752, + "step": 17289 + }, + { + "epoch": 2.95, + "learning_rate": 1.5152819507835426e-08, + "loss": 0.3272, + "step": 17290 + }, + { + "epoch": 2.95, + "learning_rate": 1.5051657739916236e-08, + "loss": 0.326, + "step": 17291 + }, + { + "epoch": 2.95, + "learning_rate": 1.495083453274315e-08, + "loss": 0.2979, + "step": 17292 + }, + { + "epoch": 2.95, + "learning_rate": 1.4850349889731219e-08, + "loss": 0.3369, + "step": 17293 + }, + { + "epoch": 2.95, + "learning_rate": 1.4750203814289932e-08, + "loss": 0.3036, + "step": 17294 + }, + { + "epoch": 2.95, + "learning_rate": 1.4650396309814352e-08, + "loss": 0.3037, + "step": 17295 + }, + { + "epoch": 2.95, + "learning_rate": 1.455092737968844e-08, + "loss": 0.3192, + "step": 17296 + }, + { + "epoch": 2.95, + "learning_rate": 1.4451797027283943e-08, + "loss": 0.3335, + "step": 17297 + }, + { + "epoch": 2.95, + "learning_rate": 1.4353005255964836e-08, + "loss": 0.3234, + "step": 17298 + }, + { + "epoch": 2.95, + "learning_rate": 1.4254552069077332e-08, + "loss": 0.3018, + "step": 17299 + }, + { + "epoch": 2.95, + "learning_rate": 1.4156437469963203e-08, + "loss": 0.318, + "step": 17300 + }, + { + "epoch": 2.95, + "learning_rate": 1.4058661461946454e-08, + "loss": 0.3265, + "step": 17301 + }, + { + "epoch": 2.95, + "learning_rate": 1.3961224048344436e-08, + "loss": 0.3, + "step": 17302 + }, + { + "epoch": 2.95, + "learning_rate": 1.3864125232458947e-08, + "loss": 0.2831, + "step": 17303 + }, + { + "epoch": 2.95, + "learning_rate": 1.3767365017584022e-08, + "loss": 0.2968, + "step": 17304 + }, + { + "epoch": 2.95, + "learning_rate": 1.367094340699926e-08, + "loss": 0.312, + "step": 17305 + }, + { + "epoch": 2.95, + "learning_rate": 1.3574860403975376e-08, + "loss": 0.3154, + "step": 17306 + }, + { + "epoch": 2.95, + "learning_rate": 1.3479116011769766e-08, + "loss": 0.2951, + "step": 17307 + }, + { + "epoch": 2.95, + "learning_rate": 1.3383710233627611e-08, + "loss": 0.3044, + "step": 17308 + }, + { + "epoch": 2.95, + "learning_rate": 1.3288643072786323e-08, + "loss": 0.3262, + "step": 17309 + }, + { + "epoch": 2.95, + "learning_rate": 1.3193914532466657e-08, + "loss": 0.3095, + "step": 17310 + }, + { + "epoch": 2.95, + "learning_rate": 1.3099524615882709e-08, + "loss": 0.3151, + "step": 17311 + }, + { + "epoch": 2.95, + "learning_rate": 1.300547332623303e-08, + "loss": 0.2994, + "step": 17312 + }, + { + "epoch": 2.95, + "learning_rate": 1.2911760666708406e-08, + "loss": 0.2977, + "step": 17313 + }, + { + "epoch": 2.95, + "learning_rate": 1.281838664048629e-08, + "loss": 0.3216, + "step": 17314 + }, + { + "epoch": 2.95, + "learning_rate": 1.2725351250730822e-08, + "loss": 0.3091, + "step": 17315 + }, + { + "epoch": 2.95, + "learning_rate": 1.2632654500598363e-08, + "loss": 0.306, + "step": 17316 + }, + { + "epoch": 2.95, + "learning_rate": 1.2540296393231954e-08, + "loss": 0.3179, + "step": 17317 + }, + { + "epoch": 2.95, + "learning_rate": 1.2448276931762426e-08, + "loss": 0.3452, + "step": 17318 + }, + { + "epoch": 2.95, + "learning_rate": 1.2356596119309505e-08, + "loss": 0.315, + "step": 17319 + }, + { + "epoch": 2.95, + "learning_rate": 1.2265253958982925e-08, + "loss": 0.3195, + "step": 17320 + }, + { + "epoch": 2.95, + "learning_rate": 1.21742504538791e-08, + "loss": 0.3, + "step": 17321 + }, + { + "epoch": 2.95, + "learning_rate": 1.2083585607084447e-08, + "loss": 0.321, + "step": 17322 + }, + { + "epoch": 2.95, + "learning_rate": 1.1993259421672066e-08, + "loss": 0.3075, + "step": 17323 + }, + { + "epoch": 2.95, + "learning_rate": 1.190327190070617e-08, + "loss": 0.3367, + "step": 17324 + }, + { + "epoch": 2.95, + "learning_rate": 1.1813623047236544e-08, + "loss": 0.3025, + "step": 17325 + }, + { + "epoch": 2.95, + "learning_rate": 1.1724312864302978e-08, + "loss": 0.3255, + "step": 17326 + }, + { + "epoch": 2.96, + "learning_rate": 1.163534135493527e-08, + "loss": 0.2895, + "step": 17327 + }, + { + "epoch": 2.96, + "learning_rate": 1.1546708522148785e-08, + "loss": 0.3089, + "step": 17328 + }, + { + "epoch": 2.96, + "learning_rate": 1.1458414368948901e-08, + "loss": 0.3115, + "step": 17329 + }, + { + "epoch": 2.96, + "learning_rate": 1.1370458898329884e-08, + "loss": 0.3363, + "step": 17330 + }, + { + "epoch": 2.96, + "learning_rate": 1.1282842113273796e-08, + "loss": 0.2957, + "step": 17331 + }, + { + "epoch": 2.96, + "learning_rate": 1.1195564016751592e-08, + "loss": 0.3138, + "step": 17332 + }, + { + "epoch": 2.96, + "learning_rate": 1.110862461172202e-08, + "loss": 0.3315, + "step": 17333 + }, + { + "epoch": 2.96, + "learning_rate": 1.1022023901133827e-08, + "loss": 0.317, + "step": 17334 + }, + { + "epoch": 2.96, + "learning_rate": 1.0935761887922446e-08, + "loss": 0.3167, + "step": 17335 + }, + { + "epoch": 2.96, + "learning_rate": 1.0849838575013317e-08, + "loss": 0.3121, + "step": 17336 + }, + { + "epoch": 2.96, + "learning_rate": 1.076425396532077e-08, + "loss": 0.3259, + "step": 17337 + }, + { + "epoch": 2.96, + "learning_rate": 1.0679008061745822e-08, + "loss": 0.3075, + "step": 17338 + }, + { + "epoch": 2.96, + "learning_rate": 1.0594100867178381e-08, + "loss": 0.315, + "step": 17339 + }, + { + "epoch": 2.96, + "learning_rate": 1.0509532384498367e-08, + "loss": 0.2988, + "step": 17340 + }, + { + "epoch": 2.96, + "learning_rate": 1.0425302616572375e-08, + "loss": 0.2857, + "step": 17341 + }, + { + "epoch": 2.96, + "learning_rate": 1.0341411566257008e-08, + "loss": 0.33, + "step": 17342 + }, + { + "epoch": 2.96, + "learning_rate": 1.0257859236396661e-08, + "loss": 0.3028, + "step": 17343 + }, + { + "epoch": 2.96, + "learning_rate": 1.017464562982462e-08, + "loss": 0.3016, + "step": 17344 + }, + { + "epoch": 2.96, + "learning_rate": 1.0091770749361962e-08, + "loss": 0.3455, + "step": 17345 + }, + { + "epoch": 2.96, + "learning_rate": 1.0009234597818663e-08, + "loss": 0.3185, + "step": 17346 + }, + { + "epoch": 2.96, + "learning_rate": 9.927037177993593e-09, + "loss": 0.2789, + "step": 17347 + }, + { + "epoch": 2.96, + "learning_rate": 9.845178492673414e-09, + "loss": 0.3021, + "step": 17348 + }, + { + "epoch": 2.96, + "learning_rate": 9.763658544634791e-09, + "loss": 0.2907, + "step": 17349 + }, + { + "epoch": 2.96, + "learning_rate": 9.682477336639962e-09, + "loss": 0.2989, + "step": 17350 + }, + { + "epoch": 2.96, + "learning_rate": 9.601634871443388e-09, + "loss": 0.3123, + "step": 17351 + }, + { + "epoch": 2.96, + "learning_rate": 9.521131151786211e-09, + "loss": 0.305, + "step": 17352 + }, + { + "epoch": 2.96, + "learning_rate": 9.440966180396249e-09, + "loss": 0.2842, + "step": 17353 + }, + { + "epoch": 2.96, + "learning_rate": 9.361139959993549e-09, + "loss": 0.304, + "step": 17354 + }, + { + "epoch": 2.96, + "learning_rate": 9.281652493282612e-09, + "loss": 0.3117, + "step": 17355 + }, + { + "epoch": 2.96, + "learning_rate": 9.202503782961281e-09, + "loss": 0.3431, + "step": 17356 + }, + { + "epoch": 2.96, + "learning_rate": 9.123693831711854e-09, + "loss": 0.3099, + "step": 17357 + }, + { + "epoch": 2.96, + "learning_rate": 9.045222642206642e-09, + "loss": 0.3347, + "step": 17358 + }, + { + "epoch": 2.96, + "learning_rate": 8.967090217105734e-09, + "loss": 0.3085, + "step": 17359 + }, + { + "epoch": 2.96, + "learning_rate": 8.889296559060345e-09, + "loss": 0.3229, + "step": 17360 + }, + { + "epoch": 2.96, + "learning_rate": 8.811841670706145e-09, + "loss": 0.3078, + "step": 17361 + }, + { + "epoch": 2.96, + "learning_rate": 8.73472555466992e-09, + "loss": 0.3065, + "step": 17362 + }, + { + "epoch": 2.96, + "learning_rate": 8.657948213567357e-09, + "loss": 0.3234, + "step": 17363 + }, + { + "epoch": 2.96, + "learning_rate": 8.58150965000082e-09, + "loss": 0.3149, + "step": 17364 + }, + { + "epoch": 2.96, + "learning_rate": 8.505409866562675e-09, + "loss": 0.3017, + "step": 17365 + }, + { + "epoch": 2.96, + "learning_rate": 8.429648865833084e-09, + "loss": 0.3049, + "step": 17366 + }, + { + "epoch": 2.96, + "learning_rate": 8.354226650381104e-09, + "loss": 0.3139, + "step": 17367 + }, + { + "epoch": 2.96, + "learning_rate": 8.279143222763576e-09, + "loss": 0.3097, + "step": 17368 + }, + { + "epoch": 2.96, + "learning_rate": 8.204398585526242e-09, + "loss": 0.3048, + "step": 17369 + }, + { + "epoch": 2.96, + "learning_rate": 8.12999274120374e-09, + "loss": 0.309, + "step": 17370 + }, + { + "epoch": 2.96, + "learning_rate": 8.05592569231961e-09, + "loss": 0.2997, + "step": 17371 + }, + { + "epoch": 2.96, + "learning_rate": 7.982197441384066e-09, + "loss": 0.3084, + "step": 17372 + }, + { + "epoch": 2.96, + "learning_rate": 7.908807990897327e-09, + "loss": 0.3228, + "step": 17373 + }, + { + "epoch": 2.96, + "learning_rate": 7.835757343349626e-09, + "loss": 0.3254, + "step": 17374 + }, + { + "epoch": 2.96, + "learning_rate": 7.763045501214539e-09, + "loss": 0.3044, + "step": 17375 + }, + { + "epoch": 2.96, + "learning_rate": 7.690672466961203e-09, + "loss": 0.2983, + "step": 17376 + }, + { + "epoch": 2.96, + "learning_rate": 7.618638243040987e-09, + "loss": 0.3081, + "step": 17377 + }, + { + "epoch": 2.96, + "learning_rate": 7.546942831896387e-09, + "loss": 0.3144, + "step": 17378 + }, + { + "epoch": 2.96, + "learning_rate": 7.475586235959898e-09, + "loss": 0.3107, + "step": 17379 + }, + { + "epoch": 2.96, + "learning_rate": 7.404568457649586e-09, + "loss": 0.3294, + "step": 17380 + }, + { + "epoch": 2.96, + "learning_rate": 7.333889499374636e-09, + "loss": 0.3254, + "step": 17381 + }, + { + "epoch": 2.96, + "learning_rate": 7.263549363530909e-09, + "loss": 0.2813, + "step": 17382 + }, + { + "epoch": 2.96, + "learning_rate": 7.193548052504273e-09, + "loss": 0.3174, + "step": 17383 + }, + { + "epoch": 2.96, + "learning_rate": 7.123885568667277e-09, + "loss": 0.3082, + "step": 17384 + }, + { + "epoch": 2.96, + "learning_rate": 7.054561914381364e-09, + "loss": 0.29, + "step": 17385 + }, + { + "epoch": 2.97, + "learning_rate": 6.985577091999096e-09, + "loss": 0.3228, + "step": 17386 + }, + { + "epoch": 2.97, + "learning_rate": 6.9169311038574935e-09, + "loss": 0.2937, + "step": 17387 + }, + { + "epoch": 2.97, + "learning_rate": 6.848623952285804e-09, + "loss": 0.3079, + "step": 17388 + }, + { + "epoch": 2.97, + "learning_rate": 6.780655639598843e-09, + "loss": 0.3051, + "step": 17389 + }, + { + "epoch": 2.97, + "learning_rate": 6.713026168102544e-09, + "loss": 0.3037, + "step": 17390 + }, + { + "epoch": 2.97, + "learning_rate": 6.645735540088405e-09, + "loss": 0.3036, + "step": 17391 + }, + { + "epoch": 2.97, + "learning_rate": 6.578783757840157e-09, + "loss": 0.3129, + "step": 17392 + }, + { + "epoch": 2.97, + "learning_rate": 6.512170823625985e-09, + "loss": 0.3163, + "step": 17393 + }, + { + "epoch": 2.97, + "learning_rate": 6.445896739705193e-09, + "loss": 0.3005, + "step": 17394 + }, + { + "epoch": 2.97, + "learning_rate": 6.379961508324872e-09, + "loss": 0.2896, + "step": 17395 + }, + { + "epoch": 2.97, + "learning_rate": 6.314365131721012e-09, + "loss": 0.2982, + "step": 17396 + }, + { + "epoch": 2.97, + "learning_rate": 6.2491076121173884e-09, + "loss": 0.2898, + "step": 17397 + }, + { + "epoch": 2.97, + "learning_rate": 6.1841889517277876e-09, + "loss": 0.2997, + "step": 17398 + }, + { + "epoch": 2.97, + "learning_rate": 6.11960915275267e-09, + "loss": 0.3238, + "step": 17399 + }, + { + "epoch": 2.97, + "learning_rate": 6.055368217380287e-09, + "loss": 0.3172, + "step": 17400 + }, + { + "epoch": 2.97, + "learning_rate": 5.991466147791114e-09, + "loss": 0.3264, + "step": 17401 + }, + { + "epoch": 2.97, + "learning_rate": 5.927902946151198e-09, + "loss": 0.3212, + "step": 17402 + }, + { + "epoch": 2.97, + "learning_rate": 5.864678614615482e-09, + "loss": 0.306, + "step": 17403 + }, + { + "epoch": 2.97, + "learning_rate": 5.8017931553266956e-09, + "loss": 0.32, + "step": 17404 + }, + { + "epoch": 2.97, + "learning_rate": 5.739246570419799e-09, + "loss": 0.3067, + "step": 17405 + }, + { + "epoch": 2.97, + "learning_rate": 5.677038862013095e-09, + "loss": 0.3025, + "step": 17406 + }, + { + "epoch": 2.97, + "learning_rate": 5.615170032217121e-09, + "loss": 0.3314, + "step": 17407 + }, + { + "epoch": 2.97, + "learning_rate": 5.553640083129086e-09, + "loss": 0.3176, + "step": 17408 + }, + { + "epoch": 2.97, + "learning_rate": 5.49244901683621e-09, + "loss": 0.3262, + "step": 17409 + }, + { + "epoch": 2.97, + "learning_rate": 5.431596835411279e-09, + "loss": 0.321, + "step": 17410 + }, + { + "epoch": 2.97, + "learning_rate": 5.371083540920419e-09, + "loss": 0.2993, + "step": 17411 + }, + { + "epoch": 2.97, + "learning_rate": 5.3109091354131e-09, + "loss": 0.2946, + "step": 17412 + }, + { + "epoch": 2.97, + "learning_rate": 5.251073620931024e-09, + "loss": 0.3059, + "step": 17413 + }, + { + "epoch": 2.97, + "learning_rate": 5.191576999502568e-09, + "loss": 0.3236, + "step": 17414 + }, + { + "epoch": 2.97, + "learning_rate": 5.132419273143896e-09, + "loss": 0.3072, + "step": 17415 + }, + { + "epoch": 2.97, + "learning_rate": 5.073600443863402e-09, + "loss": 0.3171, + "step": 17416 + }, + { + "epoch": 2.97, + "learning_rate": 5.015120513653937e-09, + "loss": 0.3209, + "step": 17417 + }, + { + "epoch": 2.97, + "learning_rate": 4.9569794844983584e-09, + "loss": 0.2917, + "step": 17418 + }, + { + "epoch": 2.97, + "learning_rate": 4.899177358368423e-09, + "loss": 0.3033, + "step": 17419 + }, + { + "epoch": 2.97, + "learning_rate": 4.841714137223675e-09, + "loss": 0.325, + "step": 17420 + }, + { + "epoch": 2.97, + "learning_rate": 4.784589823012553e-09, + "loss": 0.319, + "step": 17421 + }, + { + "epoch": 2.97, + "learning_rate": 4.727804417672399e-09, + "loss": 0.3296, + "step": 17422 + }, + { + "epoch": 2.97, + "learning_rate": 4.671357923129449e-09, + "loss": 0.3222, + "step": 17423 + }, + { + "epoch": 2.97, + "learning_rate": 4.615250341295507e-09, + "loss": 0.3225, + "step": 17424 + }, + { + "epoch": 2.97, + "learning_rate": 4.559481674074606e-09, + "loss": 0.3149, + "step": 17425 + }, + { + "epoch": 2.97, + "learning_rate": 4.504051923356345e-09, + "loss": 0.3289, + "step": 17426 + }, + { + "epoch": 2.97, + "learning_rate": 4.448961091021442e-09, + "loss": 0.3241, + "step": 17427 + }, + { + "epoch": 2.97, + "learning_rate": 4.3942091789384025e-09, + "loss": 0.3167, + "step": 17428 + }, + { + "epoch": 2.97, + "learning_rate": 4.339796188962409e-09, + "loss": 0.2857, + "step": 17429 + }, + { + "epoch": 2.97, + "learning_rate": 4.285722122937542e-09, + "loss": 0.3073, + "step": 17430 + }, + { + "epoch": 2.97, + "learning_rate": 4.231986982700109e-09, + "loss": 0.3231, + "step": 17431 + }, + { + "epoch": 2.97, + "learning_rate": 4.178590770069768e-09, + "loss": 0.2961, + "step": 17432 + }, + { + "epoch": 2.97, + "learning_rate": 4.1255334868584015e-09, + "loss": 0.3289, + "step": 17433 + }, + { + "epoch": 2.97, + "learning_rate": 4.072815134864572e-09, + "loss": 0.3038, + "step": 17434 + }, + { + "epoch": 2.97, + "learning_rate": 4.020435715874626e-09, + "loss": 0.299, + "step": 17435 + }, + { + "epoch": 2.97, + "learning_rate": 3.968395231667144e-09, + "loss": 0.322, + "step": 17436 + }, + { + "epoch": 2.97, + "learning_rate": 3.916693684004047e-09, + "loss": 0.3316, + "step": 17437 + }, + { + "epoch": 2.97, + "learning_rate": 3.865331074639489e-09, + "loss": 0.3143, + "step": 17438 + }, + { + "epoch": 2.97, + "learning_rate": 3.8143074053165196e-09, + "loss": 0.2873, + "step": 17439 + }, + { + "epoch": 2.97, + "learning_rate": 3.763622677762646e-09, + "loss": 0.3074, + "step": 17440 + }, + { + "epoch": 2.97, + "learning_rate": 3.713276893697604e-09, + "loss": 0.2951, + "step": 17441 + }, + { + "epoch": 2.97, + "learning_rate": 3.6632700548278057e-09, + "loss": 0.3082, + "step": 17442 + }, + { + "epoch": 2.97, + "learning_rate": 3.613602162850782e-09, + "loss": 0.3125, + "step": 17443 + }, + { + "epoch": 2.98, + "learning_rate": 3.564273219448522e-09, + "loss": 0.3128, + "step": 17444 + }, + { + "epoch": 2.98, + "learning_rate": 3.515283226295241e-09, + "loss": 0.3106, + "step": 17445 + }, + { + "epoch": 2.98, + "learning_rate": 3.466632185049612e-09, + "loss": 0.3012, + "step": 17446 + }, + { + "epoch": 2.98, + "learning_rate": 3.418320097364758e-09, + "loss": 0.292, + "step": 17447 + }, + { + "epoch": 2.98, + "learning_rate": 3.3703469648760367e-09, + "loss": 0.2987, + "step": 17448 + }, + { + "epoch": 2.98, + "learning_rate": 3.3227127892099253e-09, + "loss": 0.3323, + "step": 17449 + }, + { + "epoch": 2.98, + "learning_rate": 3.2754175719840187e-09, + "loss": 0.3276, + "step": 17450 + }, + { + "epoch": 2.98, + "learning_rate": 3.2284613148003686e-09, + "loss": 0.2961, + "step": 17451 + }, + { + "epoch": 2.98, + "learning_rate": 3.1818440192510347e-09, + "loss": 0.3018, + "step": 17452 + }, + { + "epoch": 2.98, + "learning_rate": 3.135565686916975e-09, + "loss": 0.3234, + "step": 17453 + }, + { + "epoch": 2.98, + "learning_rate": 3.0896263193669342e-09, + "loss": 0.2772, + "step": 17454 + }, + { + "epoch": 2.98, + "learning_rate": 3.0440259181596653e-09, + "loss": 0.3296, + "step": 17455 + }, + { + "epoch": 2.98, + "learning_rate": 2.998764484839489e-09, + "loss": 0.3101, + "step": 17456 + }, + { + "epoch": 2.98, + "learning_rate": 2.953842020942954e-09, + "loss": 0.3043, + "step": 17457 + }, + { + "epoch": 2.98, + "learning_rate": 2.909258527993286e-09, + "loss": 0.3216, + "step": 17458 + }, + { + "epoch": 2.98, + "learning_rate": 2.8650140075003885e-09, + "loss": 0.3057, + "step": 17459 + }, + { + "epoch": 2.98, + "learning_rate": 2.821108460966393e-09, + "loss": 0.3215, + "step": 17460 + }, + { + "epoch": 2.98, + "learning_rate": 2.777541889877888e-09, + "loss": 0.3248, + "step": 17461 + }, + { + "epoch": 2.98, + "learning_rate": 2.7343142957136915e-09, + "loss": 0.2972, + "step": 17462 + }, + { + "epoch": 2.98, + "learning_rate": 2.691425679939297e-09, + "loss": 0.3081, + "step": 17463 + }, + { + "epoch": 2.98, + "learning_rate": 2.6488760440090965e-09, + "loss": 0.3308, + "step": 17464 + }, + { + "epoch": 2.98, + "learning_rate": 2.6066653893652704e-09, + "loss": 0.2816, + "step": 17465 + }, + { + "epoch": 2.98, + "learning_rate": 2.564793717440006e-09, + "loss": 0.3085, + "step": 17466 + }, + { + "epoch": 2.98, + "learning_rate": 2.5232610296510585e-09, + "loss": 0.3237, + "step": 17467 + }, + { + "epoch": 2.98, + "learning_rate": 2.4820673274095207e-09, + "loss": 0.2956, + "step": 17468 + }, + { + "epoch": 2.98, + "learning_rate": 2.4412126121098333e-09, + "loss": 0.3073, + "step": 17469 + }, + { + "epoch": 2.98, + "learning_rate": 2.4006968851375546e-09, + "loss": 0.3456, + "step": 17470 + }, + { + "epoch": 2.98, + "learning_rate": 2.36052014786714e-09, + "loss": 0.3167, + "step": 17471 + }, + { + "epoch": 2.98, + "learning_rate": 2.320682401660834e-09, + "loss": 0.3171, + "step": 17472 + }, + { + "epoch": 2.98, + "learning_rate": 2.281183647869778e-09, + "loss": 0.3217, + "step": 17473 + }, + { + "epoch": 2.98, + "learning_rate": 2.2420238878317903e-09, + "loss": 0.3179, + "step": 17474 + }, + { + "epoch": 2.98, + "learning_rate": 2.203203122876918e-09, + "loss": 0.3291, + "step": 17475 + }, + { + "epoch": 2.98, + "learning_rate": 2.1647213543185553e-09, + "loss": 0.3424, + "step": 17476 + }, + { + "epoch": 2.98, + "learning_rate": 2.126578583464545e-09, + "loss": 0.2935, + "step": 17477 + }, + { + "epoch": 2.98, + "learning_rate": 2.088774811606076e-09, + "loss": 0.3041, + "step": 17478 + }, + { + "epoch": 2.98, + "learning_rate": 2.051310040025456e-09, + "loss": 0.3215, + "step": 17479 + }, + { + "epoch": 2.98, + "learning_rate": 2.014184269993891e-09, + "loss": 0.2809, + "step": 17480 + }, + { + "epoch": 2.98, + "learning_rate": 1.9773975027681523e-09, + "loss": 0.3086, + "step": 17481 + }, + { + "epoch": 2.98, + "learning_rate": 1.940949739598352e-09, + "loss": 0.3153, + "step": 17482 + }, + { + "epoch": 2.98, + "learning_rate": 1.904840981717948e-09, + "loss": 0.2991, + "step": 17483 + }, + { + "epoch": 2.98, + "learning_rate": 1.8690712303526258e-09, + "loss": 0.3179, + "step": 17484 + }, + { + "epoch": 2.98, + "learning_rate": 1.8336404867158597e-09, + "loss": 0.3418, + "step": 17485 + }, + { + "epoch": 2.98, + "learning_rate": 1.7985487520066903e-09, + "loss": 0.2948, + "step": 17486 + }, + { + "epoch": 2.98, + "learning_rate": 1.7637960274163867e-09, + "loss": 0.3185, + "step": 17487 + }, + { + "epoch": 2.98, + "learning_rate": 1.7293823141240062e-09, + "loss": 0.2984, + "step": 17488 + }, + { + "epoch": 2.98, + "learning_rate": 1.6953076132952827e-09, + "loss": 0.3278, + "step": 17489 + }, + { + "epoch": 2.98, + "learning_rate": 1.6615719260859586e-09, + "loss": 0.285, + "step": 17490 + }, + { + "epoch": 2.98, + "learning_rate": 1.6281752536395635e-09, + "loss": 0.2978, + "step": 17491 + }, + { + "epoch": 2.98, + "learning_rate": 1.5951175970885247e-09, + "loss": 0.3069, + "step": 17492 + }, + { + "epoch": 2.98, + "learning_rate": 1.5623989575541676e-09, + "loss": 0.2879, + "step": 17493 + }, + { + "epoch": 2.98, + "learning_rate": 1.5300193361467152e-09, + "loss": 0.3168, + "step": 17494 + }, + { + "epoch": 2.98, + "learning_rate": 1.497978733961958e-09, + "loss": 0.308, + "step": 17495 + }, + { + "epoch": 2.98, + "learning_rate": 1.4662771520879138e-09, + "loss": 0.2939, + "step": 17496 + }, + { + "epoch": 2.98, + "learning_rate": 1.434914591599279e-09, + "loss": 0.3053, + "step": 17497 + }, + { + "epoch": 2.98, + "learning_rate": 1.4038910535585371e-09, + "loss": 0.3194, + "step": 17498 + }, + { + "epoch": 2.98, + "learning_rate": 1.3732065390192894e-09, + "loss": 0.3026, + "step": 17499 + }, + { + "epoch": 2.98, + "learning_rate": 1.3428610490195947e-09, + "loss": 0.3102, + "step": 17500 + }, + { + "epoch": 2.98, + "learning_rate": 1.3128545845908503e-09, + "loss": 0.3389, + "step": 17501 + }, + { + "epoch": 2.98, + "learning_rate": 1.2831871467489098e-09, + "loss": 0.3139, + "step": 17502 + }, + { + "epoch": 2.99, + "learning_rate": 1.2538587364996357e-09, + "loss": 0.2939, + "step": 17503 + }, + { + "epoch": 2.99, + "learning_rate": 1.224869354838898e-09, + "loss": 0.3085, + "step": 17504 + }, + { + "epoch": 2.99, + "learning_rate": 1.1962190027481336e-09, + "loss": 0.3376, + "step": 17505 + }, + { + "epoch": 2.99, + "learning_rate": 1.167907681199898e-09, + "loss": 0.2877, + "step": 17506 + }, + { + "epoch": 2.99, + "learning_rate": 1.139935391153424e-09, + "loss": 0.3083, + "step": 17507 + }, + { + "epoch": 2.99, + "learning_rate": 1.1123021335568418e-09, + "loss": 0.3188, + "step": 17508 + }, + { + "epoch": 2.99, + "learning_rate": 1.0850079093482902e-09, + "loss": 0.3201, + "step": 17509 + }, + { + "epoch": 2.99, + "learning_rate": 1.0580527194525847e-09, + "loss": 0.3426, + "step": 17510 + }, + { + "epoch": 2.99, + "learning_rate": 1.031436564783439e-09, + "loss": 0.3174, + "step": 17511 + }, + { + "epoch": 2.99, + "learning_rate": 1.0051594462445747e-09, + "loss": 0.3272, + "step": 17512 + }, + { + "epoch": 2.99, + "learning_rate": 9.792213647252801e-10, + "loss": 0.297, + "step": 17513 + }, + { + "epoch": 2.99, + "learning_rate": 9.536223211059625e-10, + "loss": 0.3175, + "step": 17514 + }, + { + "epoch": 2.99, + "learning_rate": 9.283623162537059e-10, + "loss": 0.322, + "step": 17515 + }, + { + "epoch": 2.99, + "learning_rate": 9.034413510267126e-10, + "loss": 0.2864, + "step": 17516 + }, + { + "epoch": 2.99, + "learning_rate": 8.788594262687522e-10, + "loss": 0.2979, + "step": 17517 + }, + { + "epoch": 2.99, + "learning_rate": 8.546165428124919e-10, + "loss": 0.3018, + "step": 17518 + }, + { + "epoch": 2.99, + "learning_rate": 8.307127014828276e-10, + "loss": 0.3247, + "step": 17519 + }, + { + "epoch": 2.99, + "learning_rate": 8.071479030868911e-10, + "loss": 0.307, + "step": 17520 + }, + { + "epoch": 2.99, + "learning_rate": 7.839221484262638e-10, + "loss": 0.3042, + "step": 17521 + }, + { + "epoch": 2.99, + "learning_rate": 7.610354382869833e-10, + "loss": 0.3143, + "step": 17522 + }, + { + "epoch": 2.99, + "learning_rate": 7.384877734450957e-10, + "loss": 0.2961, + "step": 17523 + }, + { + "epoch": 2.99, + "learning_rate": 7.162791546666548e-10, + "loss": 0.3129, + "step": 17524 + }, + { + "epoch": 2.99, + "learning_rate": 6.944095827032815e-10, + "loss": 0.304, + "step": 17525 + }, + { + "epoch": 2.99, + "learning_rate": 6.72879058296605e-10, + "loss": 0.3013, + "step": 17526 + }, + { + "epoch": 2.99, + "learning_rate": 6.516875821771518e-10, + "loss": 0.3409, + "step": 17527 + }, + { + "epoch": 2.99, + "learning_rate": 6.308351550621261e-10, + "loss": 0.3042, + "step": 17528 + }, + { + "epoch": 2.99, + "learning_rate": 6.103217776609605e-10, + "loss": 0.3124, + "step": 17529 + }, + { + "epoch": 2.99, + "learning_rate": 5.901474506675442e-10, + "loss": 0.3072, + "step": 17530 + }, + { + "epoch": 2.99, + "learning_rate": 5.703121747657748e-10, + "loss": 0.3092, + "step": 17531 + }, + { + "epoch": 2.99, + "learning_rate": 5.508159506295574e-10, + "loss": 0.302, + "step": 17532 + }, + { + "epoch": 2.99, + "learning_rate": 5.316587789194749e-10, + "loss": 0.3, + "step": 17533 + }, + { + "epoch": 2.99, + "learning_rate": 5.128406602838976e-10, + "loss": 0.2726, + "step": 17534 + }, + { + "epoch": 2.99, + "learning_rate": 4.943615953612035e-10, + "loss": 0.3191, + "step": 17535 + }, + { + "epoch": 2.99, + "learning_rate": 4.76221584779779e-10, + "loss": 0.307, + "step": 17536 + }, + { + "epoch": 2.99, + "learning_rate": 4.584206291524673e-10, + "loss": 0.3119, + "step": 17537 + }, + { + "epoch": 2.99, + "learning_rate": 4.409587290843398e-10, + "loss": 0.3163, + "step": 17538 + }, + { + "epoch": 2.99, + "learning_rate": 4.2383588516714536e-10, + "loss": 0.3074, + "step": 17539 + }, + { + "epoch": 2.99, + "learning_rate": 4.0705209798042044e-10, + "loss": 0.3149, + "step": 17540 + }, + { + "epoch": 2.99, + "learning_rate": 3.906073680948197e-10, + "loss": 0.3158, + "step": 17541 + }, + { + "epoch": 2.99, + "learning_rate": 3.745016960665648e-10, + "loss": 0.2941, + "step": 17542 + }, + { + "epoch": 2.99, + "learning_rate": 3.5873508244299584e-10, + "loss": 0.3028, + "step": 17543 + }, + { + "epoch": 2.99, + "learning_rate": 3.433075277581299e-10, + "loss": 0.2955, + "step": 17544 + }, + { + "epoch": 2.99, + "learning_rate": 3.2821903253488217e-10, + "loss": 0.2804, + "step": 17545 + }, + { + "epoch": 2.99, + "learning_rate": 3.1346959728395523e-10, + "loss": 0.3042, + "step": 17546 + }, + { + "epoch": 2.99, + "learning_rate": 2.9905922250828e-10, + "loss": 0.3087, + "step": 17547 + }, + { + "epoch": 2.99, + "learning_rate": 2.8498790869413427e-10, + "loss": 0.2878, + "step": 17548 + }, + { + "epoch": 2.99, + "learning_rate": 2.7125565631891393e-10, + "loss": 0.2993, + "step": 17549 + }, + { + "epoch": 2.99, + "learning_rate": 2.578624658489126e-10, + "loss": 0.2937, + "step": 17550 + }, + { + "epoch": 2.99, + "learning_rate": 2.4480833773710134e-10, + "loss": 0.2953, + "step": 17551 + }, + { + "epoch": 2.99, + "learning_rate": 2.320932724275693e-10, + "loss": 0.2925, + "step": 17552 + }, + { + "epoch": 2.99, + "learning_rate": 2.1971727035108304e-10, + "loss": 0.3235, + "step": 17553 + }, + { + "epoch": 2.99, + "learning_rate": 2.0768033192619663e-10, + "loss": 0.3052, + "step": 17554 + }, + { + "epoch": 2.99, + "learning_rate": 1.9598245756258238e-10, + "loss": 0.3018, + "step": 17555 + }, + { + "epoch": 2.99, + "learning_rate": 1.8462364765547968e-10, + "loss": 0.3185, + "step": 17556 + }, + { + "epoch": 2.99, + "learning_rate": 1.7360390259124615e-10, + "loss": 0.3012, + "step": 17557 + }, + { + "epoch": 2.99, + "learning_rate": 1.629232227429167e-10, + "loss": 0.3323, + "step": 17558 + }, + { + "epoch": 2.99, + "learning_rate": 1.525816084724241e-10, + "loss": 0.3049, + "step": 17559 + }, + { + "epoch": 2.99, + "learning_rate": 1.4257906013059875e-10, + "loss": 0.3269, + "step": 17560 + }, + { + "epoch": 2.99, + "learning_rate": 1.3291557805716892e-10, + "loss": 0.28, + "step": 17561 + }, + { + "epoch": 3.0, + "learning_rate": 1.2359116257965043e-10, + "loss": 0.322, + "step": 17562 + }, + { + "epoch": 3.0, + "learning_rate": 1.1460581401223636e-10, + "loss": 0.2877, + "step": 17563 + }, + { + "epoch": 3.0, + "learning_rate": 1.059595326624585e-10, + "loss": 0.311, + "step": 17564 + }, + { + "epoch": 3.0, + "learning_rate": 9.765231882230552e-11, + "loss": 0.3495, + "step": 17565 + }, + { + "epoch": 3.0, + "learning_rate": 8.968417277377406e-11, + "loss": 0.3037, + "step": 17566 + }, + { + "epoch": 3.0, + "learning_rate": 8.205509478553808e-11, + "loss": 0.3093, + "step": 17567 + }, + { + "epoch": 3.0, + "learning_rate": 7.476508511738978e-11, + "loss": 0.3143, + "step": 17568 + }, + { + "epoch": 3.0, + "learning_rate": 6.78141440180191e-11, + "loss": 0.3272, + "step": 17569 + }, + { + "epoch": 3.0, + "learning_rate": 6.120227172057291e-11, + "loss": 0.2875, + "step": 17570 + }, + { + "epoch": 3.0, + "learning_rate": 5.4929468450426457e-11, + "loss": 0.3334, + "step": 17571 + }, + { + "epoch": 3.0, + "learning_rate": 4.899573441963235e-11, + "loss": 0.3232, + "step": 17572 + }, + { + "epoch": 3.0, + "learning_rate": 4.340106983025116e-11, + "loss": 0.3146, + "step": 17573 + }, + { + "epoch": 3.0, + "learning_rate": 3.814547487213105e-11, + "loss": 0.3214, + "step": 17574 + }, + { + "epoch": 3.0, + "learning_rate": 3.322894972290769e-11, + "loss": 0.2895, + "step": 17575 + }, + { + "epoch": 3.0, + "learning_rate": 2.8651494549114533e-11, + "loss": 0.3197, + "step": 17576 + }, + { + "epoch": 3.0, + "learning_rate": 2.4413109505072586e-11, + "loss": 0.3054, + "step": 17577 + }, + { + "epoch": 3.0, + "learning_rate": 2.0513794737331283e-11, + "loss": 0.3019, + "step": 17578 + }, + { + "epoch": 3.0, + "learning_rate": 1.6953550375786722e-11, + "loss": 0.311, + "step": 17579 + }, + { + "epoch": 3.0, + "learning_rate": 1.3732376541453207e-11, + "loss": 0.3255, + "step": 17580 + }, + { + "epoch": 3.0, + "learning_rate": 1.0850273344242822e-11, + "loss": 0.3049, + "step": 17581 + }, + { + "epoch": 3.0, + "learning_rate": 8.30724088074497e-12, + "loss": 0.3105, + "step": 17582 + }, + { + "epoch": 3.0, + "learning_rate": 6.10327923866727e-12, + "loss": 0.2946, + "step": 17583 + }, + { + "epoch": 3.0, + "learning_rate": 4.238388492394663e-12, + "loss": 0.3272, + "step": 17584 + }, + { + "epoch": 3.0, + "learning_rate": 2.7125687040996385e-12, + "loss": 0.3121, + "step": 17585 + }, + { + "epoch": 3.0, + "learning_rate": 1.5258199259626793e-12, + "loss": 0.3142, + "step": 17586 + }, + { + "epoch": 3.0, + "learning_rate": 6.781421990620374e-13, + "loss": 0.287, + "step": 17587 + }, + { + "epoch": 3.0, + "learning_rate": 1.6953555115328812e-13, + "loss": 0.3348, + "step": 17588 + }, + { + "epoch": 3.0, + "learning_rate": 0.0, + "loss": 0.2905, + "step": 17589 + }, + { + "epoch": 3.0, + "step": 17589, + "total_flos": 5.244831522040578e+18, + "train_loss": 0.4220799316686377, + "train_runtime": 30453.2262, + "train_samples_per_second": 73.934, + "train_steps_per_second": 0.578 + } + ], + "max_steps": 17589, + "num_train_epochs": 3, + "total_flos": 5.244831522040578e+18, + "trial_name": null, + "trial_params": null +}