diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,4770 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 790, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 8.333333333333333e-07, + "loss": 11.1556, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.6666666666666667e-06, + "loss": 11.1531, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-06, + "loss": 11.1614, + "step": 3 + }, + { + "epoch": 0.01, + "learning_rate": 3.3333333333333333e-06, + "loss": 11.1446, + "step": 4 + }, + { + "epoch": 0.01, + "learning_rate": 4.166666666666667e-06, + "loss": 11.0054, + "step": 5 + }, + { + "epoch": 0.01, + "learning_rate": 5e-06, + "loss": 10.6955, + "step": 6 + }, + { + "epoch": 0.01, + "learning_rate": 5.833333333333334e-06, + "loss": 10.4968, + "step": 7 + }, + { + "epoch": 0.01, + "learning_rate": 6.666666666666667e-06, + "loss": 9.706, + "step": 8 + }, + { + "epoch": 0.01, + "learning_rate": 7.500000000000001e-06, + "loss": 9.5405, + "step": 9 + }, + { + "epoch": 0.01, + "learning_rate": 8.333333333333334e-06, + "loss": 9.2296, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 9.166666666666666e-06, + "loss": 9.7789, + "step": 11 + }, + { + "epoch": 0.02, + "learning_rate": 1e-05, + "loss": 8.9721, + "step": 12 + }, + { + "epoch": 0.02, + "learning_rate": 1.0833333333333334e-05, + "loss": 8.9369, + "step": 13 + }, + { + "epoch": 0.02, + "learning_rate": 1.1666666666666668e-05, + "loss": 9.1961, + "step": 14 + }, + { + "epoch": 0.02, + "learning_rate": 1.25e-05, + "loss": 9.0081, + "step": 15 + }, + { + "epoch": 0.02, + "learning_rate": 1.3333333333333333e-05, + "loss": 9.1296, + "step": 16 + }, + { + "epoch": 0.02, + "learning_rate": 1.416666666666667e-05, + "loss": 8.9271, + "step": 17 + }, + { + "epoch": 0.02, + "learning_rate": 1.5000000000000002e-05, + "loss": 8.6372, + "step": 18 + }, + { + "epoch": 0.02, + "learning_rate": 1.5833333333333333e-05, + "loss": 8.2846, + "step": 19 + }, + { + "epoch": 0.03, + "learning_rate": 1.6666666666666667e-05, + "loss": 8.1478, + "step": 20 + }, + { + "epoch": 0.03, + "learning_rate": 1.7500000000000002e-05, + "loss": 8.2667, + "step": 21 + }, + { + "epoch": 0.03, + "learning_rate": 1.8333333333333333e-05, + "loss": 8.1677, + "step": 22 + }, + { + "epoch": 0.03, + "learning_rate": 1.916666666666667e-05, + "loss": 8.1679, + "step": 23 + }, + { + "epoch": 0.03, + "learning_rate": 2e-05, + "loss": 8.2426, + "step": 24 + }, + { + "epoch": 0.03, + "learning_rate": 1.9999915896977905e-05, + "loss": 8.207, + "step": 25 + }, + { + "epoch": 0.03, + "learning_rate": 1.999966358932628e-05, + "loss": 8.1476, + "step": 26 + }, + { + "epoch": 0.03, + "learning_rate": 1.999924308128909e-05, + "loss": 7.929, + "step": 27 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998654379939535e-05, + "loss": 7.7032, + "step": 28 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997897495179932e-05, + "loss": 7.4376, + "step": 29 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996972439741537e-05, + "loss": 7.3027, + "step": 30 + }, + { + "epoch": 0.04, + "learning_rate": 1.9995879229184343e-05, + "loss": 7.2694, + "step": 31 + }, + { + "epoch": 0.04, + "learning_rate": 1.999461788189681e-05, + "loss": 7.2901, + "step": 32 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993188419095562e-05, + "loss": 7.2255, + "step": 33 + }, + { + "epoch": 0.04, + "learning_rate": 1.9991590864825026e-05, + "loss": 7.0736, + "step": 34 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989825245957038e-05, + "loss": 7.0355, + "step": 35 + }, + { + "epoch": 0.05, + "learning_rate": 1.9987891592190367e-05, + "loss": 6.9484, + "step": 36 + }, + { + "epoch": 0.05, + "learning_rate": 1.998578993605024e-05, + "loss": 7.0153, + "step": 37 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983520312887785e-05, + "loss": 6.9302, + "step": 38 + }, + { + "epoch": 0.05, + "learning_rate": 1.9981082760879432e-05, + "loss": 6.9273, + "step": 39 + }, + { + "epoch": 0.05, + "learning_rate": 1.9978477321026282e-05, + "loss": 6.8868, + "step": 40 + }, + { + "epoch": 0.05, + "learning_rate": 1.997570403715341e-05, + "loss": 6.8568, + "step": 41 + }, + { + "epoch": 0.05, + "learning_rate": 1.997276295590912e-05, + "loss": 6.8307, + "step": 42 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969654126764183e-05, + "loss": 6.7708, + "step": 43 + }, + { + "epoch": 0.06, + "learning_rate": 1.9966377602010984e-05, + "loss": 6.6811, + "step": 44 + }, + { + "epoch": 0.06, + "learning_rate": 1.9962933436762644e-05, + "loss": 6.6613, + "step": 45 + }, + { + "epoch": 0.06, + "learning_rate": 1.995932168895211e-05, + "loss": 6.6736, + "step": 46 + }, + { + "epoch": 0.06, + "learning_rate": 1.9955542419331162e-05, + "loss": 6.5919, + "step": 47 + }, + { + "epoch": 0.06, + "learning_rate": 1.9951595691469397e-05, + "loss": 6.5106, + "step": 48 + }, + { + "epoch": 0.06, + "learning_rate": 1.9947481571753165e-05, + "loss": 6.4874, + "step": 49 + }, + { + "epoch": 0.06, + "learning_rate": 1.9943200129384444e-05, + "loss": 6.4935, + "step": 50 + }, + { + "epoch": 0.06, + "learning_rate": 1.9938751436379684e-05, + "loss": 6.4783, + "step": 51 + }, + { + "epoch": 0.07, + "learning_rate": 1.9934135567568594e-05, + "loss": 6.5052, + "step": 52 + }, + { + "epoch": 0.07, + "learning_rate": 1.992935260059287e-05, + "loss": 6.5075, + "step": 53 + }, + { + "epoch": 0.07, + "learning_rate": 1.992440261590491e-05, + "loss": 6.4666, + "step": 54 + }, + { + "epoch": 0.07, + "learning_rate": 1.9919285696766453e-05, + "loss": 6.406, + "step": 55 + }, + { + "epoch": 0.07, + "learning_rate": 1.991400192924717e-05, + "loss": 6.4111, + "step": 56 + }, + { + "epoch": 0.07, + "learning_rate": 1.9908551402223218e-05, + "loss": 6.4167, + "step": 57 + }, + { + "epoch": 0.07, + "learning_rate": 1.9902934207375758e-05, + "loss": 6.4207, + "step": 58 + }, + { + "epoch": 0.07, + "learning_rate": 1.989715043918941e-05, + "loss": 6.422, + "step": 59 + }, + { + "epoch": 0.08, + "learning_rate": 1.9891200194950644e-05, + "loss": 6.3422, + "step": 60 + }, + { + "epoch": 0.08, + "learning_rate": 1.9885083574746167e-05, + "loss": 6.2456, + "step": 61 + }, + { + "epoch": 0.08, + "learning_rate": 1.9878800681461222e-05, + "loss": 6.2807, + "step": 62 + }, + { + "epoch": 0.08, + "learning_rate": 1.9872351620777883e-05, + "loss": 6.3047, + "step": 63 + }, + { + "epoch": 0.08, + "learning_rate": 1.9865736501173237e-05, + "loss": 6.2419, + "step": 64 + }, + { + "epoch": 0.08, + "learning_rate": 1.9858955433917602e-05, + "loss": 6.2308, + "step": 65 + }, + { + "epoch": 0.08, + "learning_rate": 1.9852008533072627e-05, + "loss": 6.2322, + "step": 66 + }, + { + "epoch": 0.08, + "learning_rate": 1.9844895915489378e-05, + "loss": 6.2124, + "step": 67 + }, + { + "epoch": 0.09, + "learning_rate": 1.9837617700806385e-05, + "loss": 6.1977, + "step": 68 + }, + { + "epoch": 0.09, + "learning_rate": 1.9830174011447617e-05, + "loss": 6.1633, + "step": 69 + }, + { + "epoch": 0.09, + "learning_rate": 1.982256497262043e-05, + "loss": 6.1553, + "step": 70 + }, + { + "epoch": 0.09, + "learning_rate": 1.9814790712313456e-05, + "loss": 6.1803, + "step": 71 + }, + { + "epoch": 0.09, + "learning_rate": 1.980685136129445e-05, + "loss": 6.1184, + "step": 72 + }, + { + "epoch": 0.09, + "learning_rate": 1.9798747053108098e-05, + "loss": 6.155, + "step": 73 + }, + { + "epoch": 0.09, + "learning_rate": 1.9790477924073755e-05, + "loss": 6.101, + "step": 74 + }, + { + "epoch": 0.09, + "learning_rate": 1.978204411328318e-05, + "loss": 6.0193, + "step": 75 + }, + { + "epoch": 0.1, + "learning_rate": 1.977344576259816e-05, + "loss": 6.03, + "step": 76 + }, + { + "epoch": 0.1, + "learning_rate": 1.9764683016648156e-05, + "loss": 5.9886, + "step": 77 + }, + { + "epoch": 0.1, + "learning_rate": 1.9755756022827847e-05, + "loss": 6.0071, + "step": 78 + }, + { + "epoch": 0.1, + "learning_rate": 1.9746664931294667e-05, + "loss": 5.9421, + "step": 79 + }, + { + "epoch": 0.1, + "learning_rate": 1.9737409894966267e-05, + "loss": 5.9954, + "step": 80 + }, + { + "epoch": 0.1, + "learning_rate": 1.972799106951796e-05, + "loss": 5.9408, + "step": 81 + }, + { + "epoch": 0.1, + "learning_rate": 1.9718408613380077e-05, + "loss": 5.9167, + "step": 82 + }, + { + "epoch": 0.11, + "learning_rate": 1.9708662687735316e-05, + "loss": 5.9676, + "step": 83 + }, + { + "epoch": 0.11, + "learning_rate": 1.9698753456516047e-05, + "loss": 5.8603, + "step": 84 + }, + { + "epoch": 0.11, + "learning_rate": 1.9688681086401526e-05, + "loss": 5.827, + "step": 85 + }, + { + "epoch": 0.11, + "learning_rate": 1.9678445746815107e-05, + "loss": 5.8725, + "step": 86 + }, + { + "epoch": 0.11, + "learning_rate": 1.9668047609921385e-05, + "loss": 5.8081, + "step": 87 + }, + { + "epoch": 0.11, + "learning_rate": 1.9657486850623308e-05, + "loss": 5.728, + "step": 88 + }, + { + "epoch": 0.11, + "learning_rate": 1.9646763646559234e-05, + "loss": 5.8512, + "step": 89 + }, + { + "epoch": 0.11, + "learning_rate": 1.963587817809993e-05, + "loss": 5.7728, + "step": 90 + }, + { + "epoch": 0.12, + "learning_rate": 1.9624830628345562e-05, + "loss": 5.7605, + "step": 91 + }, + { + "epoch": 0.12, + "learning_rate": 1.961362118312259e-05, + "loss": 5.7939, + "step": 92 + }, + { + "epoch": 0.12, + "learning_rate": 1.9602250030980657e-05, + "loss": 5.7981, + "step": 93 + }, + { + "epoch": 0.12, + "learning_rate": 1.9590717363189424e-05, + "loss": 5.6852, + "step": 94 + }, + { + "epoch": 0.12, + "learning_rate": 1.957902337373532e-05, + "loss": 5.8011, + "step": 95 + }, + { + "epoch": 0.12, + "learning_rate": 1.9567168259318324e-05, + "loss": 5.649, + "step": 96 + }, + { + "epoch": 0.12, + "learning_rate": 1.955515221934863e-05, + "loss": 5.6449, + "step": 97 + }, + { + "epoch": 0.12, + "learning_rate": 1.9542975455943284e-05, + "loss": 5.5907, + "step": 98 + }, + { + "epoch": 0.13, + "learning_rate": 1.953063817392281e-05, + "loss": 5.6107, + "step": 99 + }, + { + "epoch": 0.13, + "learning_rate": 1.9518140580807746e-05, + "loss": 5.5694, + "step": 100 + }, + { + "epoch": 0.13, + "learning_rate": 1.9505482886815167e-05, + "loss": 5.6611, + "step": 101 + }, + { + "epoch": 0.13, + "learning_rate": 1.949266530485513e-05, + "loss": 5.5533, + "step": 102 + }, + { + "epoch": 0.13, + "learning_rate": 1.947968805052712e-05, + "loss": 5.4755, + "step": 103 + }, + { + "epoch": 0.13, + "learning_rate": 1.946655134211639e-05, + "loss": 5.6145, + "step": 104 + }, + { + "epoch": 0.13, + "learning_rate": 1.945325540059032e-05, + "loss": 5.6456, + "step": 105 + }, + { + "epoch": 0.13, + "learning_rate": 1.943980044959468e-05, + "loss": 5.4775, + "step": 106 + }, + { + "epoch": 0.14, + "learning_rate": 1.942618671544988e-05, + "loss": 5.7434, + "step": 107 + }, + { + "epoch": 0.14, + "learning_rate": 1.941241442714716e-05, + "loss": 5.6461, + "step": 108 + }, + { + "epoch": 0.14, + "learning_rate": 1.9398483816344728e-05, + "loss": 5.8886, + "step": 109 + }, + { + "epoch": 0.14, + "learning_rate": 1.938439511736388e-05, + "loss": 6.0387, + "step": 110 + }, + { + "epoch": 0.14, + "learning_rate": 1.9370148567185043e-05, + "loss": 5.8731, + "step": 111 + }, + { + "epoch": 0.14, + "learning_rate": 1.935574440544381e-05, + "loss": 5.7988, + "step": 112 + }, + { + "epoch": 0.14, + "learning_rate": 1.934118287442689e-05, + "loss": 5.5043, + "step": 113 + }, + { + "epoch": 0.14, + "learning_rate": 1.9326464219068023e-05, + "loss": 5.5951, + "step": 114 + }, + { + "epoch": 0.15, + "learning_rate": 1.9311588686943897e-05, + "loss": 5.7133, + "step": 115 + }, + { + "epoch": 0.15, + "learning_rate": 1.9296556528269954e-05, + "loss": 5.4865, + "step": 116 + }, + { + "epoch": 0.15, + "learning_rate": 1.9281367995896187e-05, + "loss": 5.5316, + "step": 117 + }, + { + "epoch": 0.15, + "learning_rate": 1.9266023345302886e-05, + "loss": 5.5545, + "step": 118 + }, + { + "epoch": 0.15, + "learning_rate": 1.9250522834596357e-05, + "loss": 5.5339, + "step": 119 + }, + { + "epoch": 0.15, + "learning_rate": 1.9234866724504554e-05, + "loss": 5.4314, + "step": 120 + }, + { + "epoch": 0.15, + "learning_rate": 1.9219055278372713e-05, + "loss": 5.4064, + "step": 121 + }, + { + "epoch": 0.15, + "learning_rate": 1.9203088762158914e-05, + "loss": 5.3135, + "step": 122 + }, + { + "epoch": 0.16, + "learning_rate": 1.9186967444429613e-05, + "loss": 5.3226, + "step": 123 + }, + { + "epoch": 0.16, + "learning_rate": 1.9170691596355113e-05, + "loss": 5.3193, + "step": 124 + }, + { + "epoch": 0.16, + "learning_rate": 1.915426149170502e-05, + "loss": 5.1942, + "step": 125 + }, + { + "epoch": 0.16, + "learning_rate": 1.913767740684362e-05, + "loss": 5.3009, + "step": 126 + }, + { + "epoch": 0.16, + "learning_rate": 1.912093962072525e-05, + "loss": 5.1603, + "step": 127 + }, + { + "epoch": 0.16, + "learning_rate": 1.910404841488959e-05, + "loss": 5.2039, + "step": 128 + }, + { + "epoch": 0.16, + "learning_rate": 1.9087004073456926e-05, + "loss": 5.1635, + "step": 129 + }, + { + "epoch": 0.16, + "learning_rate": 1.9069806883123386e-05, + "loss": 5.2517, + "step": 130 + }, + { + "epoch": 0.17, + "learning_rate": 1.9052457133156103e-05, + "loss": 5.2288, + "step": 131 + }, + { + "epoch": 0.17, + "learning_rate": 1.9034955115388364e-05, + "loss": 5.1153, + "step": 132 + }, + { + "epoch": 0.17, + "learning_rate": 1.901730112421468e-05, + "loss": 5.0183, + "step": 133 + }, + { + "epoch": 0.17, + "learning_rate": 1.8999495456585856e-05, + "loss": 5.1861, + "step": 134 + }, + { + "epoch": 0.17, + "learning_rate": 1.898153841200398e-05, + "loss": 5.0127, + "step": 135 + }, + { + "epoch": 0.17, + "learning_rate": 1.8963430292517398e-05, + "loss": 5.0361, + "step": 136 + }, + { + "epoch": 0.17, + "learning_rate": 1.8945171402715628e-05, + "loss": 4.976, + "step": 137 + }, + { + "epoch": 0.17, + "learning_rate": 1.892676204972423e-05, + "loss": 4.9992, + "step": 138 + }, + { + "epoch": 0.18, + "learning_rate": 1.8908202543199646e-05, + "loss": 4.9535, + "step": 139 + }, + { + "epoch": 0.18, + "learning_rate": 1.8889493195324e-05, + "loss": 5.0195, + "step": 140 + }, + { + "epoch": 0.18, + "learning_rate": 1.8870634320799822e-05, + "loss": 4.9628, + "step": 141 + }, + { + "epoch": 0.18, + "learning_rate": 1.8851626236844787e-05, + "loss": 4.9592, + "step": 142 + }, + { + "epoch": 0.18, + "learning_rate": 1.8832469263186352e-05, + "loss": 4.8603, + "step": 143 + }, + { + "epoch": 0.18, + "learning_rate": 1.8813163722056397e-05, + "loss": 4.9138, + "step": 144 + }, + { + "epoch": 0.18, + "learning_rate": 1.879370993818579e-05, + "loss": 5.0571, + "step": 145 + }, + { + "epoch": 0.18, + "learning_rate": 1.8774108238798932e-05, + "loss": 4.9104, + "step": 146 + }, + { + "epoch": 0.19, + "learning_rate": 1.875435895360826e-05, + "loss": 4.9345, + "step": 147 + }, + { + "epoch": 0.19, + "learning_rate": 1.873446241480868e-05, + "loss": 4.8746, + "step": 148 + }, + { + "epoch": 0.19, + "learning_rate": 1.8714418957072008e-05, + "loss": 4.8118, + "step": 149 + }, + { + "epoch": 0.19, + "learning_rate": 1.8694228917541313e-05, + "loss": 4.9833, + "step": 150 + }, + { + "epoch": 0.19, + "learning_rate": 1.8673892635825264e-05, + "loss": 4.777, + "step": 151 + }, + { + "epoch": 0.19, + "learning_rate": 1.8653410453992415e-05, + "loss": 4.8645, + "step": 152 + }, + { + "epoch": 0.19, + "learning_rate": 1.8632782716565438e-05, + "loss": 4.7069, + "step": 153 + }, + { + "epoch": 0.19, + "learning_rate": 1.861200977051535e-05, + "loss": 4.7141, + "step": 154 + }, + { + "epoch": 0.2, + "learning_rate": 1.8591091965255654e-05, + "loss": 4.7068, + "step": 155 + }, + { + "epoch": 0.2, + "learning_rate": 1.857002965263648e-05, + "loss": 4.8115, + "step": 156 + }, + { + "epoch": 0.2, + "learning_rate": 1.854882318693866e-05, + "loss": 4.7032, + "step": 157 + }, + { + "epoch": 0.2, + "learning_rate": 1.852747292486776e-05, + "loss": 4.7396, + "step": 158 + }, + { + "epoch": 0.2, + "learning_rate": 1.850597922554809e-05, + "loss": 4.577, + "step": 159 + }, + { + "epoch": 0.2, + "learning_rate": 1.8484342450516672e-05, + "loss": 4.6603, + "step": 160 + }, + { + "epoch": 0.2, + "learning_rate": 1.8462562963717134e-05, + "loss": 4.6679, + "step": 161 + }, + { + "epoch": 0.21, + "learning_rate": 1.844064113149361e-05, + "loss": 4.734, + "step": 162 + }, + { + "epoch": 0.21, + "learning_rate": 1.841857732258457e-05, + "loss": 4.6387, + "step": 163 + }, + { + "epoch": 0.21, + "learning_rate": 1.839637190811661e-05, + "loss": 4.7254, + "step": 164 + }, + { + "epoch": 0.21, + "learning_rate": 1.8374025261598224e-05, + "loss": 4.5621, + "step": 165 + }, + { + "epoch": 0.21, + "learning_rate": 1.8351537758913518e-05, + "loss": 4.5779, + "step": 166 + }, + { + "epoch": 0.21, + "learning_rate": 1.8328909778315876e-05, + "loss": 4.5663, + "step": 167 + }, + { + "epoch": 0.21, + "learning_rate": 1.8306141700421606e-05, + "loss": 4.444, + "step": 168 + }, + { + "epoch": 0.21, + "learning_rate": 1.8283233908203547e-05, + "loss": 4.4928, + "step": 169 + }, + { + "epoch": 0.22, + "learning_rate": 1.8260186786984603e-05, + "loss": 4.6391, + "step": 170 + }, + { + "epoch": 0.22, + "learning_rate": 1.8237000724431283e-05, + "loss": 4.5151, + "step": 171 + }, + { + "epoch": 0.22, + "learning_rate": 1.8213676110547177e-05, + "loss": 4.5845, + "step": 172 + }, + { + "epoch": 0.22, + "learning_rate": 1.8190213337666384e-05, + "loss": 4.6617, + "step": 173 + }, + { + "epoch": 0.22, + "learning_rate": 1.816661280044693e-05, + "loss": 4.4839, + "step": 174 + }, + { + "epoch": 0.22, + "learning_rate": 1.8142874895864113e-05, + "loss": 4.674, + "step": 175 + }, + { + "epoch": 0.22, + "learning_rate": 1.8119000023203838e-05, + "loss": 4.3447, + "step": 176 + }, + { + "epoch": 0.22, + "learning_rate": 1.809498858405589e-05, + "loss": 4.5803, + "step": 177 + }, + { + "epoch": 0.23, + "learning_rate": 1.807084098230719e-05, + "loss": 4.5345, + "step": 178 + }, + { + "epoch": 0.23, + "learning_rate": 1.8046557624134997e-05, + "loss": 4.4603, + "step": 179 + }, + { + "epoch": 0.23, + "learning_rate": 1.802213891800007e-05, + "loss": 4.4549, + "step": 180 + }, + { + "epoch": 0.23, + "learning_rate": 1.799758527463981e-05, + "loss": 4.5114, + "step": 181 + }, + { + "epoch": 0.23, + "learning_rate": 1.797289710706133e-05, + "loss": 4.3743, + "step": 182 + }, + { + "epoch": 0.23, + "learning_rate": 1.7948074830534535e-05, + "loss": 4.441, + "step": 183 + }, + { + "epoch": 0.23, + "learning_rate": 1.7923118862585123e-05, + "loss": 4.5071, + "step": 184 + }, + { + "epoch": 0.23, + "learning_rate": 1.7898029622987555e-05, + "loss": 4.3857, + "step": 185 + }, + { + "epoch": 0.24, + "learning_rate": 1.7872807533758007e-05, + "loss": 4.4135, + "step": 186 + }, + { + "epoch": 0.24, + "learning_rate": 1.7847453019147264e-05, + "loss": 4.4598, + "step": 187 + }, + { + "epoch": 0.24, + "learning_rate": 1.7821966505633587e-05, + "loss": 4.4086, + "step": 188 + }, + { + "epoch": 0.24, + "learning_rate": 1.7796348421915536e-05, + "loss": 4.3538, + "step": 189 + }, + { + "epoch": 0.24, + "learning_rate": 1.7770599198904762e-05, + "loss": 4.4076, + "step": 190 + }, + { + "epoch": 0.24, + "learning_rate": 1.774471926971877e-05, + "loss": 4.3045, + "step": 191 + }, + { + "epoch": 0.24, + "learning_rate": 1.7718709069673595e-05, + "loss": 4.5118, + "step": 192 + }, + { + "epoch": 0.24, + "learning_rate": 1.7692569036276533e-05, + "loss": 4.4237, + "step": 193 + }, + { + "epoch": 0.25, + "learning_rate": 1.7666299609218745e-05, + "loss": 4.3038, + "step": 194 + }, + { + "epoch": 0.25, + "learning_rate": 1.763990123036787e-05, + "loss": 4.3774, + "step": 195 + }, + { + "epoch": 0.25, + "learning_rate": 1.7613374343760595e-05, + "loss": 4.282, + "step": 196 + }, + { + "epoch": 0.25, + "learning_rate": 1.7586719395595185e-05, + "loss": 4.2489, + "step": 197 + }, + { + "epoch": 0.25, + "learning_rate": 1.7559936834223982e-05, + "loss": 4.2206, + "step": 198 + }, + { + "epoch": 0.25, + "learning_rate": 1.7533027110145857e-05, + "loss": 4.1831, + "step": 199 + }, + { + "epoch": 0.25, + "learning_rate": 1.7505990675998632e-05, + "loss": 4.4274, + "step": 200 + }, + { + "epoch": 0.25, + "learning_rate": 1.747882798655147e-05, + "loss": 4.2656, + "step": 201 + }, + { + "epoch": 0.26, + "learning_rate": 1.7451539498697225e-05, + "loss": 4.355, + "step": 202 + }, + { + "epoch": 0.26, + "learning_rate": 1.742412567144476e-05, + "loss": 4.3059, + "step": 203 + }, + { + "epoch": 0.26, + "learning_rate": 1.739658696591121e-05, + "loss": 4.2162, + "step": 204 + }, + { + "epoch": 0.26, + "learning_rate": 1.7368923845314262e-05, + "loss": 4.2575, + "step": 205 + }, + { + "epoch": 0.26, + "learning_rate": 1.7341136774964305e-05, + "loss": 4.1695, + "step": 206 + }, + { + "epoch": 0.26, + "learning_rate": 1.7313226222256675e-05, + "loss": 4.1604, + "step": 207 + }, + { + "epoch": 0.26, + "learning_rate": 1.728519265666373e-05, + "loss": 4.2444, + "step": 208 + }, + { + "epoch": 0.26, + "learning_rate": 1.7257036549726988e-05, + "loss": 4.1928, + "step": 209 + }, + { + "epoch": 0.27, + "learning_rate": 1.7228758375049186e-05, + "loss": 4.2985, + "step": 210 + }, + { + "epoch": 0.27, + "learning_rate": 1.7200358608286314e-05, + "loss": 4.3212, + "step": 211 + }, + { + "epoch": 0.27, + "learning_rate": 1.7171837727139613e-05, + "loss": 4.2987, + "step": 212 + }, + { + "epoch": 0.27, + "learning_rate": 1.714319621134755e-05, + "loss": 4.2621, + "step": 213 + }, + { + "epoch": 0.27, + "learning_rate": 1.711443454267772e-05, + "loss": 4.2537, + "step": 214 + }, + { + "epoch": 0.27, + "learning_rate": 1.708555320491878e-05, + "loss": 4.1606, + "step": 215 + }, + { + "epoch": 0.27, + "learning_rate": 1.705655268387229e-05, + "loss": 4.0981, + "step": 216 + }, + { + "epoch": 0.27, + "learning_rate": 1.702743346734454e-05, + "loss": 4.1428, + "step": 217 + }, + { + "epoch": 0.28, + "learning_rate": 1.6998196045138354e-05, + "loss": 4.0398, + "step": 218 + }, + { + "epoch": 0.28, + "learning_rate": 1.696884090904484e-05, + "loss": 4.1158, + "step": 219 + }, + { + "epoch": 0.28, + "learning_rate": 1.6939368552835137e-05, + "loss": 4.2338, + "step": 220 + }, + { + "epoch": 0.28, + "learning_rate": 1.6909779472252084e-05, + "loss": 4.2012, + "step": 221 + }, + { + "epoch": 0.28, + "learning_rate": 1.6880074165001906e-05, + "loss": 4.1079, + "step": 222 + }, + { + "epoch": 0.28, + "learning_rate": 1.685025313074582e-05, + "loss": 4.2466, + "step": 223 + }, + { + "epoch": 0.28, + "learning_rate": 1.682031687109165e-05, + "loss": 4.0848, + "step": 224 + }, + { + "epoch": 0.28, + "learning_rate": 1.679026588958538e-05, + "loss": 4.2162, + "step": 225 + }, + { + "epoch": 0.29, + "learning_rate": 1.6760100691702676e-05, + "loss": 4.1318, + "step": 226 + }, + { + "epoch": 0.29, + "learning_rate": 1.6729821784840398e-05, + "loss": 4.2007, + "step": 227 + }, + { + "epoch": 0.29, + "learning_rate": 1.669942967830807e-05, + "loss": 4.1688, + "step": 228 + }, + { + "epoch": 0.29, + "learning_rate": 1.6668924883319288e-05, + "loss": 4.1256, + "step": 229 + }, + { + "epoch": 0.29, + "learning_rate": 1.6638307912983135e-05, + "loss": 4.0463, + "step": 230 + }, + { + "epoch": 0.29, + "learning_rate": 1.6607579282295572e-05, + "loss": 4.1453, + "step": 231 + }, + { + "epoch": 0.29, + "learning_rate": 1.6576739508130725e-05, + "loss": 4.0776, + "step": 232 + }, + { + "epoch": 0.29, + "learning_rate": 1.6545789109232247e-05, + "loss": 4.2646, + "step": 233 + }, + { + "epoch": 0.3, + "learning_rate": 1.651472860620455e-05, + "loss": 4.0992, + "step": 234 + }, + { + "epoch": 0.3, + "learning_rate": 1.6483558521504068e-05, + "loss": 4.0812, + "step": 235 + }, + { + "epoch": 0.3, + "learning_rate": 1.6452279379430466e-05, + "loss": 4.0722, + "step": 236 + }, + { + "epoch": 0.3, + "learning_rate": 1.6420891706117818e-05, + "loss": 4.0237, + "step": 237 + }, + { + "epoch": 0.3, + "learning_rate": 1.638939602952576e-05, + "loss": 4.1031, + "step": 238 + }, + { + "epoch": 0.3, + "learning_rate": 1.6357792879430615e-05, + "loss": 4.0923, + "step": 239 + }, + { + "epoch": 0.3, + "learning_rate": 1.632608278741646e-05, + "loss": 4.0142, + "step": 240 + }, + { + "epoch": 0.31, + "learning_rate": 1.629426628686622e-05, + "loss": 4.0109, + "step": 241 + }, + { + "epoch": 0.31, + "learning_rate": 1.6262343912952656e-05, + "loss": 3.9415, + "step": 242 + }, + { + "epoch": 0.31, + "learning_rate": 1.6230316202629393e-05, + "loss": 4.0262, + "step": 243 + }, + { + "epoch": 0.31, + "learning_rate": 1.619818369462188e-05, + "loss": 3.951, + "step": 244 + }, + { + "epoch": 0.31, + "learning_rate": 1.6165946929418322e-05, + "loss": 4.0882, + "step": 245 + }, + { + "epoch": 0.31, + "learning_rate": 1.613360644926059e-05, + "loss": 4.054, + "step": 246 + }, + { + "epoch": 0.31, + "learning_rate": 1.610116279813511e-05, + "loss": 4.0486, + "step": 247 + }, + { + "epoch": 0.31, + "learning_rate": 1.6068616521763708e-05, + "loss": 4.0278, + "step": 248 + }, + { + "epoch": 0.32, + "learning_rate": 1.603596816759442e-05, + "loss": 4.0248, + "step": 249 + }, + { + "epoch": 0.32, + "learning_rate": 1.60032182847923e-05, + "loss": 4.0702, + "step": 250 + }, + { + "epoch": 0.32, + "learning_rate": 1.5970367424230162e-05, + "loss": 3.8974, + "step": 251 + }, + { + "epoch": 0.32, + "learning_rate": 1.5937416138479344e-05, + "loss": 4.0375, + "step": 252 + }, + { + "epoch": 0.32, + "learning_rate": 1.590436498180039e-05, + "loss": 4.0119, + "step": 253 + }, + { + "epoch": 0.32, + "learning_rate": 1.5871214510133734e-05, + "loss": 4.0036, + "step": 254 + }, + { + "epoch": 0.32, + "learning_rate": 1.5837965281090334e-05, + "loss": 3.9336, + "step": 255 + }, + { + "epoch": 0.32, + "learning_rate": 1.580461785394233e-05, + "loss": 3.806, + "step": 256 + }, + { + "epoch": 0.33, + "learning_rate": 1.57711727896136e-05, + "loss": 3.9693, + "step": 257 + }, + { + "epoch": 0.33, + "learning_rate": 1.5737630650670336e-05, + "loss": 3.9221, + "step": 258 + }, + { + "epoch": 0.33, + "learning_rate": 1.57039920013116e-05, + "loss": 3.8656, + "step": 259 + }, + { + "epoch": 0.33, + "learning_rate": 1.567025740735979e-05, + "loss": 4.0068, + "step": 260 + }, + { + "epoch": 0.33, + "learning_rate": 1.5636427436251182e-05, + "loss": 3.9293, + "step": 261 + }, + { + "epoch": 0.33, + "learning_rate": 1.5602502657026327e-05, + "loss": 3.9401, + "step": 262 + }, + { + "epoch": 0.33, + "learning_rate": 1.556848364032052e-05, + "loss": 3.9274, + "step": 263 + }, + { + "epoch": 0.33, + "learning_rate": 1.5534370958354184e-05, + "loss": 4.1849, + "step": 264 + }, + { + "epoch": 0.34, + "learning_rate": 1.550016518492325e-05, + "loss": 3.9264, + "step": 265 + }, + { + "epoch": 0.34, + "learning_rate": 1.5465866895389497e-05, + "loss": 3.8007, + "step": 266 + }, + { + "epoch": 0.34, + "learning_rate": 1.5431476666670885e-05, + "loss": 3.8248, + "step": 267 + }, + { + "epoch": 0.34, + "learning_rate": 1.5396995077231856e-05, + "loss": 3.891, + "step": 268 + }, + { + "epoch": 0.34, + "learning_rate": 1.5362422707073574e-05, + "loss": 3.801, + "step": 269 + }, + { + "epoch": 0.34, + "learning_rate": 1.5327760137724213e-05, + "loss": 3.9024, + "step": 270 + }, + { + "epoch": 0.34, + "learning_rate": 1.5293007952229127e-05, + "loss": 3.9204, + "step": 271 + }, + { + "epoch": 0.34, + "learning_rate": 1.5258166735141094e-05, + "loss": 3.9807, + "step": 272 + }, + { + "epoch": 0.35, + "learning_rate": 1.5223237072510433e-05, + "loss": 3.906, + "step": 273 + }, + { + "epoch": 0.35, + "learning_rate": 1.518821955187519e-05, + "loss": 3.8716, + "step": 274 + }, + { + "epoch": 0.35, + "learning_rate": 1.5153114762251221e-05, + "loss": 3.8384, + "step": 275 + }, + { + "epoch": 0.35, + "learning_rate": 1.5117923294122312e-05, + "loss": 3.7808, + "step": 276 + }, + { + "epoch": 0.35, + "learning_rate": 1.5082645739430224e-05, + "loss": 3.9122, + "step": 277 + }, + { + "epoch": 0.35, + "learning_rate": 1.5047282691564749e-05, + "loss": 3.7987, + "step": 278 + }, + { + "epoch": 0.35, + "learning_rate": 1.5011834745353725e-05, + "loss": 3.967, + "step": 279 + }, + { + "epoch": 0.35, + "learning_rate": 1.4976302497053036e-05, + "loss": 3.9335, + "step": 280 + }, + { + "epoch": 0.36, + "learning_rate": 1.4940686544336571e-05, + "loss": 3.8081, + "step": 281 + }, + { + "epoch": 0.36, + "learning_rate": 1.4904987486286184e-05, + "loss": 3.9681, + "step": 282 + }, + { + "epoch": 0.36, + "learning_rate": 1.4869205923381609e-05, + "loss": 3.8072, + "step": 283 + }, + { + "epoch": 0.36, + "learning_rate": 1.4833342457490363e-05, + "loss": 3.809, + "step": 284 + }, + { + "epoch": 0.36, + "learning_rate": 1.4797397691857614e-05, + "loss": 3.8076, + "step": 285 + }, + { + "epoch": 0.36, + "learning_rate": 1.4761372231096047e-05, + "loss": 3.8011, + "step": 286 + }, + { + "epoch": 0.36, + "learning_rate": 1.472526668117569e-05, + "loss": 3.9507, + "step": 287 + }, + { + "epoch": 0.36, + "learning_rate": 1.468908164941371e-05, + "loss": 3.8249, + "step": 288 + }, + { + "epoch": 0.37, + "learning_rate": 1.4652817744464214e-05, + "loss": 3.8342, + "step": 289 + }, + { + "epoch": 0.37, + "learning_rate": 1.4616475576308005e-05, + "loss": 3.7757, + "step": 290 + }, + { + "epoch": 0.37, + "learning_rate": 1.4580055756242315e-05, + "loss": 3.9205, + "step": 291 + }, + { + "epoch": 0.37, + "learning_rate": 1.454355889687053e-05, + "loss": 3.8701, + "step": 292 + }, + { + "epoch": 0.37, + "learning_rate": 1.4506985612091889e-05, + "loss": 3.8211, + "step": 293 + }, + { + "epoch": 0.37, + "learning_rate": 1.4470336517091139e-05, + "loss": 3.9225, + "step": 294 + }, + { + "epoch": 0.37, + "learning_rate": 1.4433612228328215e-05, + "loss": 3.7957, + "step": 295 + }, + { + "epoch": 0.37, + "learning_rate": 1.439681336352785e-05, + "loss": 3.745, + "step": 296 + }, + { + "epoch": 0.38, + "learning_rate": 1.435994054166919e-05, + "loss": 3.8174, + "step": 297 + }, + { + "epoch": 0.38, + "learning_rate": 1.4322994382975386e-05, + "loss": 3.7996, + "step": 298 + }, + { + "epoch": 0.38, + "learning_rate": 1.428597550890316e-05, + "loss": 3.789, + "step": 299 + }, + { + "epoch": 0.38, + "learning_rate": 1.4248884542132348e-05, + "loss": 3.9083, + "step": 300 + }, + { + "epoch": 0.38, + "learning_rate": 1.421172210655543e-05, + "loss": 3.6434, + "step": 301 + }, + { + "epoch": 0.38, + "learning_rate": 1.4174488827267032e-05, + "loss": 3.6945, + "step": 302 + }, + { + "epoch": 0.38, + "learning_rate": 1.4137185330553416e-05, + "loss": 3.8261, + "step": 303 + }, + { + "epoch": 0.38, + "learning_rate": 1.4099812243881947e-05, + "loss": 3.8346, + "step": 304 + }, + { + "epoch": 0.39, + "learning_rate": 1.406237019589053e-05, + "loss": 3.7792, + "step": 305 + }, + { + "epoch": 0.39, + "learning_rate": 1.4024859816377046e-05, + "loss": 3.7122, + "step": 306 + }, + { + "epoch": 0.39, + "learning_rate": 1.3987281736288743e-05, + "loss": 3.7728, + "step": 307 + }, + { + "epoch": 0.39, + "learning_rate": 1.3949636587711645e-05, + "loss": 3.6981, + "step": 308 + }, + { + "epoch": 0.39, + "learning_rate": 1.3911925003859907e-05, + "loss": 3.8742, + "step": 309 + }, + { + "epoch": 0.39, + "learning_rate": 1.3874147619065161e-05, + "loss": 3.7379, + "step": 310 + }, + { + "epoch": 0.39, + "learning_rate": 1.3836305068765852e-05, + "loss": 3.7293, + "step": 311 + }, + { + "epoch": 0.39, + "learning_rate": 1.3798397989496549e-05, + "loss": 3.7363, + "step": 312 + }, + { + "epoch": 0.4, + "learning_rate": 1.3760427018877236e-05, + "loss": 3.6561, + "step": 313 + }, + { + "epoch": 0.4, + "learning_rate": 1.3722392795602595e-05, + "loss": 3.8963, + "step": 314 + }, + { + "epoch": 0.4, + "learning_rate": 1.3684295959431241e-05, + "loss": 3.6974, + "step": 315 + }, + { + "epoch": 0.4, + "learning_rate": 1.3646137151174992e-05, + "loss": 3.9303, + "step": 316 + }, + { + "epoch": 0.4, + "learning_rate": 1.3607917012688063e-05, + "loss": 3.5819, + "step": 317 + }, + { + "epoch": 0.4, + "learning_rate": 1.3569636186856288e-05, + "loss": 3.7086, + "step": 318 + }, + { + "epoch": 0.4, + "learning_rate": 1.3531295317586291e-05, + "loss": 3.6635, + "step": 319 + }, + { + "epoch": 0.41, + "learning_rate": 1.349289504979467e-05, + "loss": 3.7356, + "step": 320 + }, + { + "epoch": 0.41, + "learning_rate": 1.3454436029397135e-05, + "loss": 3.8162, + "step": 321 + }, + { + "epoch": 0.41, + "learning_rate": 1.341591890329766e-05, + "loss": 3.7543, + "step": 322 + }, + { + "epoch": 0.41, + "learning_rate": 1.3377344319377585e-05, + "loss": 3.627, + "step": 323 + }, + { + "epoch": 0.41, + "learning_rate": 1.3338712926484725e-05, + "loss": 3.7216, + "step": 324 + }, + { + "epoch": 0.41, + "learning_rate": 1.3300025374422459e-05, + "loss": 3.6936, + "step": 325 + }, + { + "epoch": 0.41, + "learning_rate": 1.3261282313938795e-05, + "loss": 3.8305, + "step": 326 + }, + { + "epoch": 0.41, + "learning_rate": 1.322248439671543e-05, + "loss": 3.6515, + "step": 327 + }, + { + "epoch": 0.42, + "learning_rate": 1.3183632275356777e-05, + "loss": 3.7233, + "step": 328 + }, + { + "epoch": 0.42, + "learning_rate": 1.3144726603379008e-05, + "loss": 3.6564, + "step": 329 + }, + { + "epoch": 0.42, + "learning_rate": 1.3105768035199033e-05, + "loss": 3.8174, + "step": 330 + }, + { + "epoch": 0.42, + "learning_rate": 1.3066757226123522e-05, + "loss": 3.6796, + "step": 331 + }, + { + "epoch": 0.42, + "learning_rate": 1.3027694832337858e-05, + "loss": 3.6757, + "step": 332 + }, + { + "epoch": 0.42, + "learning_rate": 1.2988581510895118e-05, + "loss": 3.7172, + "step": 333 + }, + { + "epoch": 0.42, + "learning_rate": 1.2949417919705008e-05, + "loss": 3.6418, + "step": 334 + }, + { + "epoch": 0.42, + "learning_rate": 1.2910204717522805e-05, + "loss": 3.6337, + "step": 335 + }, + { + "epoch": 0.43, + "learning_rate": 1.2870942563938265e-05, + "loss": 3.7748, + "step": 336 + }, + { + "epoch": 0.43, + "learning_rate": 1.283163211936455e-05, + "loss": 3.6219, + "step": 337 + }, + { + "epoch": 0.43, + "learning_rate": 1.279227404502709e-05, + "loss": 3.6606, + "step": 338 + }, + { + "epoch": 0.43, + "learning_rate": 1.2752869002952492e-05, + "loss": 3.5741, + "step": 339 + }, + { + "epoch": 0.43, + "learning_rate": 1.2713417655957377e-05, + "loss": 3.6364, + "step": 340 + }, + { + "epoch": 0.43, + "learning_rate": 1.2673920667637244e-05, + "loss": 3.7044, + "step": 341 + }, + { + "epoch": 0.43, + "learning_rate": 1.2634378702355314e-05, + "loss": 3.7507, + "step": 342 + }, + { + "epoch": 0.43, + "learning_rate": 1.2594792425231339e-05, + "loss": 3.695, + "step": 343 + }, + { + "epoch": 0.44, + "learning_rate": 1.2555162502130434e-05, + "loss": 3.6151, + "step": 344 + }, + { + "epoch": 0.44, + "learning_rate": 1.251548959965185e-05, + "loss": 3.6708, + "step": 345 + }, + { + "epoch": 0.44, + "learning_rate": 1.2475774385117787e-05, + "loss": 3.5673, + "step": 346 + }, + { + "epoch": 0.44, + "learning_rate": 1.243601752656216e-05, + "loss": 3.5632, + "step": 347 + }, + { + "epoch": 0.44, + "learning_rate": 1.2396219692719364e-05, + "loss": 3.6272, + "step": 348 + }, + { + "epoch": 0.44, + "learning_rate": 1.2356381553013014e-05, + "loss": 3.6401, + "step": 349 + }, + { + "epoch": 0.44, + "learning_rate": 1.23165037775447e-05, + "loss": 3.6209, + "step": 350 + }, + { + "epoch": 0.44, + "learning_rate": 1.2276587037082706e-05, + "loss": 3.6672, + "step": 351 + }, + { + "epoch": 0.45, + "learning_rate": 1.2236632003050736e-05, + "loss": 3.7134, + "step": 352 + }, + { + "epoch": 0.45, + "learning_rate": 1.2196639347516613e-05, + "loss": 3.7017, + "step": 353 + }, + { + "epoch": 0.45, + "learning_rate": 1.215660974318097e-05, + "loss": 3.7002, + "step": 354 + }, + { + "epoch": 0.45, + "learning_rate": 1.211654386336595e-05, + "loss": 3.7195, + "step": 355 + }, + { + "epoch": 0.45, + "learning_rate": 1.207644238200387e-05, + "loss": 3.6528, + "step": 356 + }, + { + "epoch": 0.45, + "learning_rate": 1.2036305973625881e-05, + "loss": 3.5246, + "step": 357 + }, + { + "epoch": 0.45, + "learning_rate": 1.1996135313350636e-05, + "loss": 3.5674, + "step": 358 + }, + { + "epoch": 0.45, + "learning_rate": 1.1955931076872916e-05, + "loss": 3.6463, + "step": 359 + }, + { + "epoch": 0.46, + "learning_rate": 1.191569394045228e-05, + "loss": 3.543, + "step": 360 + }, + { + "epoch": 0.46, + "learning_rate": 1.1875424580901684e-05, + "loss": 3.6849, + "step": 361 + }, + { + "epoch": 0.46, + "learning_rate": 1.1835123675576092e-05, + "loss": 3.6798, + "step": 362 + }, + { + "epoch": 0.46, + "learning_rate": 1.1794791902361093e-05, + "loss": 3.639, + "step": 363 + }, + { + "epoch": 0.46, + "learning_rate": 1.1754429939661492e-05, + "loss": 3.6447, + "step": 364 + }, + { + "epoch": 0.46, + "learning_rate": 1.1714038466389892e-05, + "loss": 3.6637, + "step": 365 + }, + { + "epoch": 0.46, + "learning_rate": 1.1673618161955288e-05, + "loss": 3.6719, + "step": 366 + }, + { + "epoch": 0.46, + "learning_rate": 1.1633169706251637e-05, + "loss": 3.7565, + "step": 367 + }, + { + "epoch": 0.47, + "learning_rate": 1.1592693779646405e-05, + "loss": 3.645, + "step": 368 + }, + { + "epoch": 0.47, + "learning_rate": 1.1552191062969147e-05, + "loss": 3.5035, + "step": 369 + }, + { + "epoch": 0.47, + "learning_rate": 1.1511662237500032e-05, + "loss": 3.6058, + "step": 370 + }, + { + "epoch": 0.47, + "learning_rate": 1.1471107984958405e-05, + "loss": 3.5748, + "step": 371 + }, + { + "epoch": 0.47, + "learning_rate": 1.1430528987491305e-05, + "loss": 3.4794, + "step": 372 + }, + { + "epoch": 0.47, + "learning_rate": 1.1389925927661996e-05, + "loss": 3.6624, + "step": 373 + }, + { + "epoch": 0.47, + "learning_rate": 1.1349299488438485e-05, + "loss": 3.5667, + "step": 374 + }, + { + "epoch": 0.47, + "learning_rate": 1.1308650353182036e-05, + "loss": 3.5315, + "step": 375 + }, + { + "epoch": 0.48, + "learning_rate": 1.1267979205635675e-05, + "loss": 3.5055, + "step": 376 + }, + { + "epoch": 0.48, + "learning_rate": 1.1227286729912684e-05, + "loss": 3.5781, + "step": 377 + }, + { + "epoch": 0.48, + "learning_rate": 1.1186573610485099e-05, + "loss": 3.4713, + "step": 378 + }, + { + "epoch": 0.48, + "learning_rate": 1.1145840532172197e-05, + "loss": 3.5313, + "step": 379 + }, + { + "epoch": 0.48, + "learning_rate": 1.1105088180128975e-05, + "loss": 3.5475, + "step": 380 + }, + { + "epoch": 0.48, + "learning_rate": 1.1064317239834628e-05, + "loss": 3.5527, + "step": 381 + }, + { + "epoch": 0.48, + "learning_rate": 1.1023528397081011e-05, + "loss": 3.5274, + "step": 382 + }, + { + "epoch": 0.48, + "learning_rate": 1.0982722337961116e-05, + "loss": 3.5427, + "step": 383 + }, + { + "epoch": 0.49, + "learning_rate": 1.094189974885752e-05, + "loss": 3.4859, + "step": 384 + }, + { + "epoch": 0.49, + "learning_rate": 1.0901061316430848e-05, + "loss": 3.5526, + "step": 385 + }, + { + "epoch": 0.49, + "learning_rate": 1.0860207727608214e-05, + "loss": 3.544, + "step": 386 + }, + { + "epoch": 0.49, + "learning_rate": 1.0819339669571674e-05, + "loss": 3.525, + "step": 387 + }, + { + "epoch": 0.49, + "learning_rate": 1.0778457829746668e-05, + "loss": 3.6301, + "step": 388 + }, + { + "epoch": 0.49, + "learning_rate": 1.0737562895790447e-05, + "loss": 3.607, + "step": 389 + }, + { + "epoch": 0.49, + "learning_rate": 1.0696655555580524e-05, + "loss": 3.5375, + "step": 390 + }, + { + "epoch": 0.49, + "learning_rate": 1.0655736497203084e-05, + "loss": 3.4769, + "step": 391 + }, + { + "epoch": 0.5, + "learning_rate": 1.0614806408941422e-05, + "loss": 3.6209, + "step": 392 + }, + { + "epoch": 0.5, + "learning_rate": 1.0573865979264362e-05, + "loss": 3.5285, + "step": 393 + }, + { + "epoch": 0.5, + "learning_rate": 1.0532915896814673e-05, + "loss": 3.4459, + "step": 394 + }, + { + "epoch": 0.5, + "learning_rate": 1.0491956850397496e-05, + "loss": 3.5381, + "step": 395 + }, + { + "epoch": 0.5, + "learning_rate": 1.0450989528968747e-05, + "loss": 3.6046, + "step": 396 + }, + { + "epoch": 0.5, + "learning_rate": 1.0410014621623531e-05, + "loss": 3.5079, + "step": 397 + }, + { + "epoch": 0.5, + "learning_rate": 1.036903281758456e-05, + "loss": 3.4709, + "step": 398 + }, + { + "epoch": 0.51, + "learning_rate": 1.0328044806190549e-05, + "loss": 3.5259, + "step": 399 + }, + { + "epoch": 0.51, + "learning_rate": 1.028705127688462e-05, + "loss": 3.615, + "step": 400 + }, + { + "epoch": 0.51, + "learning_rate": 1.0246052919202713e-05, + "loss": 3.6054, + "step": 401 + }, + { + "epoch": 0.51, + "learning_rate": 1.0205050422761989e-05, + "loss": 3.5232, + "step": 402 + }, + { + "epoch": 0.51, + "learning_rate": 1.0164044477249215e-05, + "loss": 3.5266, + "step": 403 + }, + { + "epoch": 0.51, + "learning_rate": 1.0123035772409184e-05, + "loss": 3.4681, + "step": 404 + }, + { + "epoch": 0.51, + "learning_rate": 1.0082024998033092e-05, + "loss": 3.4198, + "step": 405 + }, + { + "epoch": 0.51, + "learning_rate": 1.004101284394696e-05, + "loss": 3.4836, + "step": 406 + }, + { + "epoch": 0.52, + "learning_rate": 1e-05, + "loss": 3.3732, + "step": 407 + }, + { + "epoch": 0.52, + "learning_rate": 9.958987156053046e-06, + "loss": 3.631, + "step": 408 + }, + { + "epoch": 0.52, + "learning_rate": 9.91797500196691e-06, + "loss": 3.3575, + "step": 409 + }, + { + "epoch": 0.52, + "learning_rate": 9.876964227590821e-06, + "loss": 3.5228, + "step": 410 + }, + { + "epoch": 0.52, + "learning_rate": 9.835955522750789e-06, + "loss": 3.4126, + "step": 411 + }, + { + "epoch": 0.52, + "learning_rate": 9.794949577238014e-06, + "loss": 3.5752, + "step": 412 + }, + { + "epoch": 0.52, + "learning_rate": 9.753947080797289e-06, + "loss": 3.6233, + "step": 413 + }, + { + "epoch": 0.52, + "learning_rate": 9.712948723115384e-06, + "loss": 3.5602, + "step": 414 + }, + { + "epoch": 0.53, + "learning_rate": 9.671955193809453e-06, + "loss": 3.3987, + "step": 415 + }, + { + "epoch": 0.53, + "learning_rate": 9.630967182415441e-06, + "loss": 3.5806, + "step": 416 + }, + { + "epoch": 0.53, + "learning_rate": 9.589985378376474e-06, + "loss": 3.5712, + "step": 417 + }, + { + "epoch": 0.53, + "learning_rate": 9.549010471031256e-06, + "loss": 3.5768, + "step": 418 + }, + { + "epoch": 0.53, + "learning_rate": 9.508043149602509e-06, + "loss": 3.5503, + "step": 419 + }, + { + "epoch": 0.53, + "learning_rate": 9.46708410318533e-06, + "loss": 3.4928, + "step": 420 + }, + { + "epoch": 0.53, + "learning_rate": 9.426134020735642e-06, + "loss": 3.5639, + "step": 421 + }, + { + "epoch": 0.53, + "learning_rate": 9.38519359105858e-06, + "loss": 3.4968, + "step": 422 + }, + { + "epoch": 0.54, + "learning_rate": 9.344263502796918e-06, + "loss": 3.5206, + "step": 423 + }, + { + "epoch": 0.54, + "learning_rate": 9.303344444419476e-06, + "loss": 3.5223, + "step": 424 + }, + { + "epoch": 0.54, + "learning_rate": 9.262437104209555e-06, + "loss": 3.5115, + "step": 425 + }, + { + "epoch": 0.54, + "learning_rate": 9.221542170253339e-06, + "loss": 3.5129, + "step": 426 + }, + { + "epoch": 0.54, + "learning_rate": 9.18066033042833e-06, + "loss": 3.4762, + "step": 427 + }, + { + "epoch": 0.54, + "learning_rate": 9.139792272391791e-06, + "loss": 3.4295, + "step": 428 + }, + { + "epoch": 0.54, + "learning_rate": 9.098938683569155e-06, + "loss": 3.402, + "step": 429 + }, + { + "epoch": 0.54, + "learning_rate": 9.058100251142483e-06, + "loss": 3.4283, + "step": 430 + }, + { + "epoch": 0.55, + "learning_rate": 9.017277662038884e-06, + "loss": 3.5585, + "step": 431 + }, + { + "epoch": 0.55, + "learning_rate": 8.97647160291899e-06, + "loss": 3.4686, + "step": 432 + }, + { + "epoch": 0.55, + "learning_rate": 8.935682760165377e-06, + "loss": 3.3924, + "step": 433 + }, + { + "epoch": 0.55, + "learning_rate": 8.894911819871027e-06, + "loss": 3.5207, + "step": 434 + }, + { + "epoch": 0.55, + "learning_rate": 8.854159467827808e-06, + "loss": 3.4214, + "step": 435 + }, + { + "epoch": 0.55, + "learning_rate": 8.813426389514903e-06, + "loss": 3.5747, + "step": 436 + }, + { + "epoch": 0.55, + "learning_rate": 8.77271327008732e-06, + "loss": 3.4337, + "step": 437 + }, + { + "epoch": 0.55, + "learning_rate": 8.732020794364327e-06, + "loss": 3.4204, + "step": 438 + }, + { + "epoch": 0.56, + "learning_rate": 8.691349646817965e-06, + "loss": 3.4279, + "step": 439 + }, + { + "epoch": 0.56, + "learning_rate": 8.650700511561515e-06, + "loss": 3.4796, + "step": 440 + }, + { + "epoch": 0.56, + "learning_rate": 8.610074072338006e-06, + "loss": 3.3757, + "step": 441 + }, + { + "epoch": 0.56, + "learning_rate": 8.5694710125087e-06, + "loss": 3.4833, + "step": 442 + }, + { + "epoch": 0.56, + "learning_rate": 8.528892015041598e-06, + "loss": 3.6173, + "step": 443 + }, + { + "epoch": 0.56, + "learning_rate": 8.488337762499971e-06, + "loss": 3.3047, + "step": 444 + }, + { + "epoch": 0.56, + "learning_rate": 8.447808937030856e-06, + "loss": 3.5363, + "step": 445 + }, + { + "epoch": 0.56, + "learning_rate": 8.407306220353597e-06, + "loss": 3.4376, + "step": 446 + }, + { + "epoch": 0.57, + "learning_rate": 8.366830293748364e-06, + "loss": 3.4486, + "step": 447 + }, + { + "epoch": 0.57, + "learning_rate": 8.326381838044713e-06, + "loss": 3.399, + "step": 448 + }, + { + "epoch": 0.57, + "learning_rate": 8.28596153361011e-06, + "loss": 3.4425, + "step": 449 + }, + { + "epoch": 0.57, + "learning_rate": 8.245570060338511e-06, + "loss": 3.4965, + "step": 450 + }, + { + "epoch": 0.57, + "learning_rate": 8.20520809763891e-06, + "loss": 3.4606, + "step": 451 + }, + { + "epoch": 0.57, + "learning_rate": 8.16487632442391e-06, + "loss": 3.3802, + "step": 452 + }, + { + "epoch": 0.57, + "learning_rate": 8.124575419098321e-06, + "loss": 3.4718, + "step": 453 + }, + { + "epoch": 0.57, + "learning_rate": 8.084306059547722e-06, + "loss": 3.4686, + "step": 454 + }, + { + "epoch": 0.58, + "learning_rate": 8.044068923127088e-06, + "loss": 3.4968, + "step": 455 + }, + { + "epoch": 0.58, + "learning_rate": 8.003864686649366e-06, + "loss": 3.3585, + "step": 456 + }, + { + "epoch": 0.58, + "learning_rate": 7.96369402637412e-06, + "loss": 3.4358, + "step": 457 + }, + { + "epoch": 0.58, + "learning_rate": 7.923557617996132e-06, + "loss": 3.3583, + "step": 458 + }, + { + "epoch": 0.58, + "learning_rate": 7.883456136634053e-06, + "loss": 3.4183, + "step": 459 + }, + { + "epoch": 0.58, + "learning_rate": 7.843390256819034e-06, + "loss": 3.4337, + "step": 460 + }, + { + "epoch": 0.58, + "learning_rate": 7.803360652483392e-06, + "loss": 3.4407, + "step": 461 + }, + { + "epoch": 0.58, + "learning_rate": 7.763367996949267e-06, + "loss": 3.2601, + "step": 462 + }, + { + "epoch": 0.59, + "learning_rate": 7.723412962917294e-06, + "loss": 3.2422, + "step": 463 + }, + { + "epoch": 0.59, + "learning_rate": 7.683496222455303e-06, + "loss": 3.4645, + "step": 464 + }, + { + "epoch": 0.59, + "learning_rate": 7.643618446986988e-06, + "loss": 3.5709, + "step": 465 + }, + { + "epoch": 0.59, + "learning_rate": 7.6037803072806396e-06, + "loss": 3.2944, + "step": 466 + }, + { + "epoch": 0.59, + "learning_rate": 7.563982473437843e-06, + "loss": 3.4792, + "step": 467 + }, + { + "epoch": 0.59, + "learning_rate": 7.524225614882216e-06, + "loss": 3.3571, + "step": 468 + }, + { + "epoch": 0.59, + "learning_rate": 7.4845104003481565e-06, + "loss": 3.3765, + "step": 469 + }, + { + "epoch": 0.59, + "learning_rate": 7.444837497869569e-06, + "loss": 3.4416, + "step": 470 + }, + { + "epoch": 0.6, + "learning_rate": 7.4052075747686625e-06, + "loss": 3.357, + "step": 471 + }, + { + "epoch": 0.6, + "learning_rate": 7.365621297644686e-06, + "loss": 3.5674, + "step": 472 + }, + { + "epoch": 0.6, + "learning_rate": 7.326079332362756e-06, + "loss": 3.2106, + "step": 473 + }, + { + "epoch": 0.6, + "learning_rate": 7.286582344042625e-06, + "loss": 3.3728, + "step": 474 + }, + { + "epoch": 0.6, + "learning_rate": 7.247130997047511e-06, + "loss": 3.4861, + "step": 475 + }, + { + "epoch": 0.6, + "learning_rate": 7.207725954972913e-06, + "loss": 3.4695, + "step": 476 + }, + { + "epoch": 0.6, + "learning_rate": 7.168367880635454e-06, + "loss": 3.3745, + "step": 477 + }, + { + "epoch": 0.61, + "learning_rate": 7.12905743606174e-06, + "loss": 3.366, + "step": 478 + }, + { + "epoch": 0.61, + "learning_rate": 7.089795282477199e-06, + "loss": 3.4958, + "step": 479 + }, + { + "epoch": 0.61, + "learning_rate": 7.050582080294996e-06, + "loss": 3.2569, + "step": 480 + }, + { + "epoch": 0.61, + "learning_rate": 7.011418489104883e-06, + "loss": 3.4334, + "step": 481 + }, + { + "epoch": 0.61, + "learning_rate": 6.972305167662144e-06, + "loss": 3.491, + "step": 482 + }, + { + "epoch": 0.61, + "learning_rate": 6.933242773876481e-06, + "loss": 3.3694, + "step": 483 + }, + { + "epoch": 0.61, + "learning_rate": 6.89423196480097e-06, + "loss": 3.5006, + "step": 484 + }, + { + "epoch": 0.61, + "learning_rate": 6.855273396620999e-06, + "loss": 3.3757, + "step": 485 + }, + { + "epoch": 0.62, + "learning_rate": 6.816367724643225e-06, + "loss": 3.4249, + "step": 486 + }, + { + "epoch": 0.62, + "learning_rate": 6.777515603284575e-06, + "loss": 3.3526, + "step": 487 + }, + { + "epoch": 0.62, + "learning_rate": 6.738717686061207e-06, + "loss": 3.4817, + "step": 488 + }, + { + "epoch": 0.62, + "learning_rate": 6.699974625577545e-06, + "loss": 3.455, + "step": 489 + }, + { + "epoch": 0.62, + "learning_rate": 6.661287073515276e-06, + "loss": 3.3259, + "step": 490 + }, + { + "epoch": 0.62, + "learning_rate": 6.622655680622416e-06, + "loss": 3.2455, + "step": 491 + }, + { + "epoch": 0.62, + "learning_rate": 6.5840810967023405e-06, + "loss": 3.2447, + "step": 492 + }, + { + "epoch": 0.62, + "learning_rate": 6.545563970602867e-06, + "loss": 3.2857, + "step": 493 + }, + { + "epoch": 0.63, + "learning_rate": 6.507104950205336e-06, + "loss": 3.3836, + "step": 494 + }, + { + "epoch": 0.63, + "learning_rate": 6.4687046824137115e-06, + "loss": 3.4351, + "step": 495 + }, + { + "epoch": 0.63, + "learning_rate": 6.430363813143716e-06, + "loss": 3.3191, + "step": 496 + }, + { + "epoch": 0.63, + "learning_rate": 6.392082987311938e-06, + "loss": 3.496, + "step": 497 + }, + { + "epoch": 0.63, + "learning_rate": 6.353862848825011e-06, + "loss": 3.2581, + "step": 498 + }, + { + "epoch": 0.63, + "learning_rate": 6.315704040568761e-06, + "loss": 3.3718, + "step": 499 + }, + { + "epoch": 0.63, + "learning_rate": 6.277607204397409e-06, + "loss": 3.445, + "step": 500 + }, + { + "epoch": 0.63, + "learning_rate": 6.239572981122766e-06, + "loss": 3.2989, + "step": 501 + }, + { + "epoch": 0.64, + "learning_rate": 6.201602010503454e-06, + "loss": 3.2606, + "step": 502 + }, + { + "epoch": 0.64, + "learning_rate": 6.163694931234153e-06, + "loss": 3.3261, + "step": 503 + }, + { + "epoch": 0.64, + "learning_rate": 6.125852380934841e-06, + "loss": 3.4656, + "step": 504 + }, + { + "epoch": 0.64, + "learning_rate": 6.088074996140096e-06, + "loss": 3.3822, + "step": 505 + }, + { + "epoch": 0.64, + "learning_rate": 6.050363412288356e-06, + "loss": 3.3442, + "step": 506 + }, + { + "epoch": 0.64, + "learning_rate": 6.012718263711261e-06, + "loss": 3.3625, + "step": 507 + }, + { + "epoch": 0.64, + "learning_rate": 5.975140183622958e-06, + "loss": 3.3852, + "step": 508 + }, + { + "epoch": 0.64, + "learning_rate": 5.93762980410947e-06, + "loss": 3.4399, + "step": 509 + }, + { + "epoch": 0.65, + "learning_rate": 5.900187756118055e-06, + "loss": 3.1638, + "step": 510 + }, + { + "epoch": 0.65, + "learning_rate": 5.862814669446586e-06, + "loss": 3.2364, + "step": 511 + }, + { + "epoch": 0.65, + "learning_rate": 5.825511172732971e-06, + "loss": 3.2059, + "step": 512 + }, + { + "epoch": 0.65, + "learning_rate": 5.788277893444574e-06, + "loss": 3.239, + "step": 513 + }, + { + "epoch": 0.65, + "learning_rate": 5.7511154578676535e-06, + "loss": 3.3294, + "step": 514 + }, + { + "epoch": 0.65, + "learning_rate": 5.714024491096841e-06, + "loss": 3.2771, + "step": 515 + }, + { + "epoch": 0.65, + "learning_rate": 5.677005617024618e-06, + "loss": 3.4053, + "step": 516 + }, + { + "epoch": 0.65, + "learning_rate": 5.640059458330811e-06, + "loss": 3.25, + "step": 517 + }, + { + "epoch": 0.66, + "learning_rate": 5.603186636472156e-06, + "loss": 3.2526, + "step": 518 + }, + { + "epoch": 0.66, + "learning_rate": 5.566387771671788e-06, + "loss": 3.4599, + "step": 519 + }, + { + "epoch": 0.66, + "learning_rate": 5.529663482908864e-06, + "loss": 3.3457, + "step": 520 + }, + { + "epoch": 0.66, + "learning_rate": 5.4930143879081146e-06, + "loss": 3.2894, + "step": 521 + }, + { + "epoch": 0.66, + "learning_rate": 5.4564411031294695e-06, + "loss": 3.3749, + "step": 522 + }, + { + "epoch": 0.66, + "learning_rate": 5.419944243757685e-06, + "loss": 3.3198, + "step": 523 + }, + { + "epoch": 0.66, + "learning_rate": 5.383524423691995e-06, + "loss": 3.3991, + "step": 524 + }, + { + "epoch": 0.66, + "learning_rate": 5.34718225553579e-06, + "loss": 3.429, + "step": 525 + }, + { + "epoch": 0.67, + "learning_rate": 5.310918350586292e-06, + "loss": 3.3088, + "step": 526 + }, + { + "epoch": 0.67, + "learning_rate": 5.274733318824317e-06, + "loss": 3.2755, + "step": 527 + }, + { + "epoch": 0.67, + "learning_rate": 5.238627768903957e-06, + "loss": 3.1154, + "step": 528 + }, + { + "epoch": 0.67, + "learning_rate": 5.202602308142391e-06, + "loss": 3.1803, + "step": 529 + }, + { + "epoch": 0.67, + "learning_rate": 5.16665754250964e-06, + "loss": 3.2609, + "step": 530 + }, + { + "epoch": 0.67, + "learning_rate": 5.130794076618391e-06, + "loss": 3.1746, + "step": 531 + }, + { + "epoch": 0.67, + "learning_rate": 5.095012513713815e-06, + "loss": 3.2586, + "step": 532 + }, + { + "epoch": 0.67, + "learning_rate": 5.059313455663429e-06, + "loss": 3.3339, + "step": 533 + }, + { + "epoch": 0.68, + "learning_rate": 5.02369750294697e-06, + "loss": 3.178, + "step": 534 + }, + { + "epoch": 0.68, + "learning_rate": 4.988165254646278e-06, + "loss": 3.3395, + "step": 535 + }, + { + "epoch": 0.68, + "learning_rate": 4.952717308435254e-06, + "loss": 3.3983, + "step": 536 + }, + { + "epoch": 0.68, + "learning_rate": 4.917354260569779e-06, + "loss": 3.1509, + "step": 537 + }, + { + "epoch": 0.68, + "learning_rate": 4.8820767058776895e-06, + "loss": 3.3076, + "step": 538 + }, + { + "epoch": 0.68, + "learning_rate": 4.846885237748779e-06, + "loss": 3.2966, + "step": 539 + }, + { + "epoch": 0.68, + "learning_rate": 4.811780448124812e-06, + "loss": 3.4145, + "step": 540 + }, + { + "epoch": 0.68, + "learning_rate": 4.776762927489568e-06, + "loss": 3.2694, + "step": 541 + }, + { + "epoch": 0.69, + "learning_rate": 4.74183326485891e-06, + "loss": 3.2737, + "step": 542 + }, + { + "epoch": 0.69, + "learning_rate": 4.706992047770877e-06, + "loss": 3.2447, + "step": 543 + }, + { + "epoch": 0.69, + "learning_rate": 4.672239862275794e-06, + "loss": 3.416, + "step": 544 + }, + { + "epoch": 0.69, + "learning_rate": 4.637577292926428e-06, + "loss": 3.3194, + "step": 545 + }, + { + "epoch": 0.69, + "learning_rate": 4.6030049227681484e-06, + "loss": 3.3098, + "step": 546 + }, + { + "epoch": 0.69, + "learning_rate": 4.568523333329116e-06, + "loss": 3.2507, + "step": 547 + }, + { + "epoch": 0.69, + "learning_rate": 4.534133104610507e-06, + "loss": 3.2184, + "step": 548 + }, + { + "epoch": 0.69, + "learning_rate": 4.4998348150767525e-06, + "loss": 3.3042, + "step": 549 + }, + { + "epoch": 0.7, + "learning_rate": 4.465629041645819e-06, + "loss": 3.2976, + "step": 550 + }, + { + "epoch": 0.7, + "learning_rate": 4.43151635967948e-06, + "loss": 3.2555, + "step": 551 + }, + { + "epoch": 0.7, + "learning_rate": 4.397497342973677e-06, + "loss": 3.2512, + "step": 552 + }, + { + "epoch": 0.7, + "learning_rate": 4.363572563748823e-06, + "loss": 3.2136, + "step": 553 + }, + { + "epoch": 0.7, + "learning_rate": 4.329742592640212e-06, + "loss": 3.4037, + "step": 554 + }, + { + "epoch": 0.7, + "learning_rate": 4.296007998688405e-06, + "loss": 3.2952, + "step": 555 + }, + { + "epoch": 0.7, + "learning_rate": 4.262369349329665e-06, + "loss": 3.3537, + "step": 556 + }, + { + "epoch": 0.71, + "learning_rate": 4.228827210386404e-06, + "loss": 3.1924, + "step": 557 + }, + { + "epoch": 0.71, + "learning_rate": 4.195382146057672e-06, + "loss": 3.2783, + "step": 558 + }, + { + "epoch": 0.71, + "learning_rate": 4.162034718909671e-06, + "loss": 3.263, + "step": 559 + }, + { + "epoch": 0.71, + "learning_rate": 4.12878548986627e-06, + "loss": 3.3612, + "step": 560 + }, + { + "epoch": 0.71, + "learning_rate": 4.095635018199612e-06, + "loss": 3.2958, + "step": 561 + }, + { + "epoch": 0.71, + "learning_rate": 4.062583861520657e-06, + "loss": 3.0953, + "step": 562 + }, + { + "epoch": 0.71, + "learning_rate": 4.029632575769841e-06, + "loss": 3.1859, + "step": 563 + }, + { + "epoch": 0.71, + "learning_rate": 3.996781715207706e-06, + "loss": 3.1945, + "step": 564 + }, + { + "epoch": 0.72, + "learning_rate": 3.964031832405581e-06, + "loss": 3.1413, + "step": 565 + }, + { + "epoch": 0.72, + "learning_rate": 3.931383478236292e-06, + "loss": 3.3722, + "step": 566 + }, + { + "epoch": 0.72, + "learning_rate": 3.8988372018648905e-06, + "loss": 3.2463, + "step": 567 + }, + { + "epoch": 0.72, + "learning_rate": 3.866393550739415e-06, + "loss": 3.221, + "step": 568 + }, + { + "epoch": 0.72, + "learning_rate": 3.834053070581685e-06, + "loss": 3.3415, + "step": 569 + }, + { + "epoch": 0.72, + "learning_rate": 3.8018163053781243e-06, + "loss": 3.2295, + "step": 570 + }, + { + "epoch": 0.72, + "learning_rate": 3.769683797370609e-06, + "loss": 3.2582, + "step": 571 + }, + { + "epoch": 0.72, + "learning_rate": 3.7376560870473465e-06, + "loss": 3.2662, + "step": 572 + }, + { + "epoch": 0.73, + "learning_rate": 3.7057337131337822e-06, + "loss": 3.2425, + "step": 573 + }, + { + "epoch": 0.73, + "learning_rate": 3.6739172125835386e-06, + "loss": 3.3492, + "step": 574 + }, + { + "epoch": 0.73, + "learning_rate": 3.6422071205693866e-06, + "loss": 3.2789, + "step": 575 + }, + { + "epoch": 0.73, + "learning_rate": 3.610603970474239e-06, + "loss": 3.1523, + "step": 576 + }, + { + "epoch": 0.73, + "learning_rate": 3.5791082938821874e-06, + "loss": 3.3764, + "step": 577 + }, + { + "epoch": 0.73, + "learning_rate": 3.5477206205695392e-06, + "loss": 3.2249, + "step": 578 + }, + { + "epoch": 0.73, + "learning_rate": 3.5164414784959368e-06, + "loss": 3.1767, + "step": 579 + }, + { + "epoch": 0.73, + "learning_rate": 3.485271393795453e-06, + "loss": 3.1767, + "step": 580 + }, + { + "epoch": 0.74, + "learning_rate": 3.454210890767755e-06, + "loss": 3.296, + "step": 581 + }, + { + "epoch": 0.74, + "learning_rate": 3.423260491869276e-06, + "loss": 3.3156, + "step": 582 + }, + { + "epoch": 0.74, + "learning_rate": 3.392420717704431e-06, + "loss": 3.3446, + "step": 583 + }, + { + "epoch": 0.74, + "learning_rate": 3.3616920870168633e-06, + "loss": 3.3076, + "step": 584 + }, + { + "epoch": 0.74, + "learning_rate": 3.331075116680715e-06, + "loss": 3.3178, + "step": 585 + }, + { + "epoch": 0.74, + "learning_rate": 3.3005703216919336e-06, + "loss": 3.158, + "step": 586 + }, + { + "epoch": 0.74, + "learning_rate": 3.2701782151596038e-06, + "loss": 3.2794, + "step": 587 + }, + { + "epoch": 0.74, + "learning_rate": 3.2398993082973294e-06, + "loss": 3.2356, + "step": 588 + }, + { + "epoch": 0.75, + "learning_rate": 3.209734110414625e-06, + "loss": 3.2024, + "step": 589 + }, + { + "epoch": 0.75, + "learning_rate": 3.179683128908352e-06, + "loss": 3.3495, + "step": 590 + }, + { + "epoch": 0.75, + "learning_rate": 3.1497468692541812e-06, + "loss": 3.2733, + "step": 591 + }, + { + "epoch": 0.75, + "learning_rate": 3.1199258349980965e-06, + "loss": 3.2758, + "step": 592 + }, + { + "epoch": 0.75, + "learning_rate": 3.090220527747916e-06, + "loss": 3.1778, + "step": 593 + }, + { + "epoch": 0.75, + "learning_rate": 3.0606314471648646e-06, + "loss": 3.2262, + "step": 594 + }, + { + "epoch": 0.75, + "learning_rate": 3.0311590909551626e-06, + "loss": 3.3652, + "step": 595 + }, + { + "epoch": 0.75, + "learning_rate": 3.0018039548616497e-06, + "loss": 3.341, + "step": 596 + }, + { + "epoch": 0.76, + "learning_rate": 2.972566532655462e-06, + "loss": 3.1817, + "step": 597 + }, + { + "epoch": 0.76, + "learning_rate": 2.943447316127712e-06, + "loss": 3.386, + "step": 598 + }, + { + "epoch": 0.76, + "learning_rate": 2.9144467950812203e-06, + "loss": 3.138, + "step": 599 + }, + { + "epoch": 0.76, + "learning_rate": 2.8855654573222824e-06, + "loss": 3.2193, + "step": 600 + }, + { + "epoch": 0.76, + "learning_rate": 2.8568037886524548e-06, + "loss": 3.3578, + "step": 601 + }, + { + "epoch": 0.76, + "learning_rate": 2.8281622728603862e-06, + "loss": 3.2952, + "step": 602 + }, + { + "epoch": 0.76, + "learning_rate": 2.79964139171369e-06, + "loss": 3.2715, + "step": 603 + }, + { + "epoch": 0.76, + "learning_rate": 2.771241624950818e-06, + "loss": 3.2274, + "step": 604 + }, + { + "epoch": 0.77, + "learning_rate": 2.742963450273016e-06, + "loss": 3.1811, + "step": 605 + }, + { + "epoch": 0.77, + "learning_rate": 2.7148073433362732e-06, + "loss": 3.3272, + "step": 606 + }, + { + "epoch": 0.77, + "learning_rate": 2.6867737777433276e-06, + "loss": 3.2594, + "step": 607 + }, + { + "epoch": 0.77, + "learning_rate": 2.658863225035695e-06, + "loss": 3.3312, + "step": 608 + }, + { + "epoch": 0.77, + "learning_rate": 2.6310761546857433e-06, + "loss": 3.3856, + "step": 609 + }, + { + "epoch": 0.77, + "learning_rate": 2.60341303408879e-06, + "loss": 3.4417, + "step": 610 + }, + { + "epoch": 0.77, + "learning_rate": 2.5758743285552435e-06, + "loss": 3.4408, + "step": 611 + }, + { + "epoch": 0.77, + "learning_rate": 2.5484605013027787e-06, + "loss": 3.147, + "step": 612 + }, + { + "epoch": 0.78, + "learning_rate": 2.521172013448534e-06, + "loss": 3.1479, + "step": 613 + }, + { + "epoch": 0.78, + "learning_rate": 2.494009324001372e-06, + "loss": 3.1448, + "step": 614 + }, + { + "epoch": 0.78, + "learning_rate": 2.4669728898541456e-06, + "loss": 3.0974, + "step": 615 + }, + { + "epoch": 0.78, + "learning_rate": 2.440063165776019e-06, + "loss": 3.1905, + "step": 616 + }, + { + "epoch": 0.78, + "learning_rate": 2.4132806044048163e-06, + "loss": 3.4658, + "step": 617 + }, + { + "epoch": 0.78, + "learning_rate": 2.3866256562394084e-06, + "loss": 3.2968, + "step": 618 + }, + { + "epoch": 0.78, + "learning_rate": 2.3600987696321332e-06, + "loss": 3.3054, + "step": 619 + }, + { + "epoch": 0.78, + "learning_rate": 2.333700390781256e-06, + "loss": 3.0319, + "step": 620 + }, + { + "epoch": 0.79, + "learning_rate": 2.3074309637234702e-06, + "loss": 3.2635, + "step": 621 + }, + { + "epoch": 0.79, + "learning_rate": 2.2812909303264084e-06, + "loss": 3.2026, + "step": 622 + }, + { + "epoch": 0.79, + "learning_rate": 2.2552807302812353e-06, + "loss": 3.2623, + "step": 623 + }, + { + "epoch": 0.79, + "learning_rate": 2.2294008010952383e-06, + "loss": 3.2525, + "step": 624 + }, + { + "epoch": 0.79, + "learning_rate": 2.203651578084467e-06, + "loss": 3.2043, + "step": 625 + }, + { + "epoch": 0.79, + "learning_rate": 2.178033494366416e-06, + "loss": 3.1327, + "step": 626 + }, + { + "epoch": 0.79, + "learning_rate": 2.1525469808527376e-06, + "loss": 3.1885, + "step": 627 + }, + { + "epoch": 0.79, + "learning_rate": 2.127192466241994e-06, + "loss": 3.2633, + "step": 628 + }, + { + "epoch": 0.8, + "learning_rate": 2.1019703770124454e-06, + "loss": 3.124, + "step": 629 + }, + { + "epoch": 0.8, + "learning_rate": 2.07688113741488e-06, + "loss": 3.3028, + "step": 630 + }, + { + "epoch": 0.8, + "learning_rate": 2.0519251694654674e-06, + "loss": 3.1655, + "step": 631 + }, + { + "epoch": 0.8, + "learning_rate": 2.027102892938674e-06, + "loss": 3.4117, + "step": 632 + }, + { + "epoch": 0.8, + "learning_rate": 2.0024147253601957e-06, + "loss": 3.1954, + "step": 633 + }, + { + "epoch": 0.8, + "learning_rate": 1.977861081999931e-06, + "loss": 3.1659, + "step": 634 + }, + { + "epoch": 0.8, + "learning_rate": 1.9534423758650043e-06, + "loss": 3.1071, + "step": 635 + }, + { + "epoch": 0.81, + "learning_rate": 1.92915901769281e-06, + "loss": 3.2877, + "step": 636 + }, + { + "epoch": 0.81, + "learning_rate": 1.9050114159441135e-06, + "loss": 3.3129, + "step": 637 + }, + { + "epoch": 0.81, + "learning_rate": 1.880999976796164e-06, + "loss": 3.3743, + "step": 638 + }, + { + "epoch": 0.81, + "learning_rate": 1.8571251041358895e-06, + "loss": 3.129, + "step": 639 + }, + { + "epoch": 0.81, + "learning_rate": 1.8333871995530728e-06, + "loss": 3.2712, + "step": 640 + }, + { + "epoch": 0.81, + "learning_rate": 1.809786662333619e-06, + "loss": 3.1361, + "step": 641 + }, + { + "epoch": 0.81, + "learning_rate": 1.786323889452828e-06, + "loss": 3.2079, + "step": 642 + }, + { + "epoch": 0.81, + "learning_rate": 1.762999275568721e-06, + "loss": 3.2124, + "step": 643 + }, + { + "epoch": 0.82, + "learning_rate": 1.739813213015401e-06, + "loss": 3.328, + "step": 644 + }, + { + "epoch": 0.82, + "learning_rate": 1.7167660917964557e-06, + "loss": 3.1001, + "step": 645 + }, + { + "epoch": 0.82, + "learning_rate": 1.6938582995783958e-06, + "loss": 3.3397, + "step": 646 + }, + { + "epoch": 0.82, + "learning_rate": 1.6710902216841241e-06, + "loss": 3.2774, + "step": 647 + }, + { + "epoch": 0.82, + "learning_rate": 1.6484622410864837e-06, + "loss": 3.3643, + "step": 648 + }, + { + "epoch": 0.82, + "learning_rate": 1.6259747384017766e-06, + "loss": 3.2594, + "step": 649 + }, + { + "epoch": 0.82, + "learning_rate": 1.6036280918833924e-06, + "loss": 3.1878, + "step": 650 + }, + { + "epoch": 0.82, + "learning_rate": 1.5814226774154328e-06, + "loss": 3.2801, + "step": 651 + }, + { + "epoch": 0.83, + "learning_rate": 1.5593588685063899e-06, + "loss": 3.1553, + "step": 652 + }, + { + "epoch": 0.83, + "learning_rate": 1.5374370362828662e-06, + "loss": 3.1934, + "step": 653 + }, + { + "epoch": 0.83, + "learning_rate": 1.515657549483328e-06, + "loss": 3.1606, + "step": 654 + }, + { + "epoch": 0.83, + "learning_rate": 1.494020774451912e-06, + "loss": 3.2433, + "step": 655 + }, + { + "epoch": 0.83, + "learning_rate": 1.472527075132245e-06, + "loss": 3.2234, + "step": 656 + }, + { + "epoch": 0.83, + "learning_rate": 1.4511768130613434e-06, + "loss": 3.1697, + "step": 657 + }, + { + "epoch": 0.83, + "learning_rate": 1.4299703473635217e-06, + "loss": 3.3123, + "step": 658 + }, + { + "epoch": 0.83, + "learning_rate": 1.4089080347443485e-06, + "loss": 3.2725, + "step": 659 + }, + { + "epoch": 0.84, + "learning_rate": 1.3879902294846537e-06, + "loss": 3.2027, + "step": 660 + }, + { + "epoch": 0.84, + "learning_rate": 1.3672172834345632e-06, + "loss": 3.2765, + "step": 661 + }, + { + "epoch": 0.84, + "learning_rate": 1.3465895460075872e-06, + "loss": 3.1554, + "step": 662 + }, + { + "epoch": 0.84, + "learning_rate": 1.3261073641747358e-06, + "loss": 3.2031, + "step": 663 + }, + { + "epoch": 0.84, + "learning_rate": 1.30577108245869e-06, + "loss": 3.2662, + "step": 664 + }, + { + "epoch": 0.84, + "learning_rate": 1.2855810429279958e-06, + "loss": 3.2323, + "step": 665 + }, + { + "epoch": 0.84, + "learning_rate": 1.2655375851913232e-06, + "loss": 3.2345, + "step": 666 + }, + { + "epoch": 0.84, + "learning_rate": 1.2456410463917445e-06, + "loss": 3.1215, + "step": 667 + }, + { + "epoch": 0.85, + "learning_rate": 1.22589176120107e-06, + "loss": 3.3559, + "step": 668 + }, + { + "epoch": 0.85, + "learning_rate": 1.2062900618142136e-06, + "loss": 3.3381, + "step": 669 + }, + { + "epoch": 0.85, + "learning_rate": 1.186836277943606e-06, + "loss": 3.1843, + "step": 670 + }, + { + "epoch": 0.85, + "learning_rate": 1.1675307368136513e-06, + "loss": 3.1898, + "step": 671 + }, + { + "epoch": 0.85, + "learning_rate": 1.1483737631552161e-06, + "loss": 3.1972, + "step": 672 + }, + { + "epoch": 0.85, + "learning_rate": 1.1293656792001817e-06, + "loss": 3.2011, + "step": 673 + }, + { + "epoch": 0.85, + "learning_rate": 1.1105068046760048e-06, + "loss": 3.2577, + "step": 674 + }, + { + "epoch": 0.85, + "learning_rate": 1.0917974568003552e-06, + "loss": 3.2048, + "step": 675 + }, + { + "epoch": 0.86, + "learning_rate": 1.0732379502757717e-06, + "loss": 3.1821, + "step": 676 + }, + { + "epoch": 0.86, + "learning_rate": 1.054828597284372e-06, + "loss": 3.2322, + "step": 677 + }, + { + "epoch": 0.86, + "learning_rate": 1.036569707482602e-06, + "loss": 3.3726, + "step": 678 + }, + { + "epoch": 0.86, + "learning_rate": 1.0184615879960224e-06, + "loss": 3.1546, + "step": 679 + }, + { + "epoch": 0.86, + "learning_rate": 1.0005045434141503e-06, + "loss": 3.2868, + "step": 680 + }, + { + "epoch": 0.86, + "learning_rate": 9.826988757853228e-07, + "loss": 3.2473, + "step": 681 + }, + { + "epoch": 0.86, + "learning_rate": 9.6504488461164e-07, + "loss": 3.2338, + "step": 682 + }, + { + "epoch": 0.86, + "learning_rate": 9.47542866843899e-07, + "loss": 3.3132, + "step": 683 + }, + { + "epoch": 0.87, + "learning_rate": 9.301931168766165e-07, + "loss": 3.266, + "step": 684 + }, + { + "epoch": 0.87, + "learning_rate": 9.129959265430766e-07, + "loss": 3.2878, + "step": 685 + }, + { + "epoch": 0.87, + "learning_rate": 8.959515851104117e-07, + "loss": 3.1873, + "step": 686 + }, + { + "epoch": 0.87, + "learning_rate": 8.790603792747499e-07, + "loss": 3.0949, + "step": 687 + }, + { + "epoch": 0.87, + "learning_rate": 8.623225931563806e-07, + "loss": 3.1953, + "step": 688 + }, + { + "epoch": 0.87, + "learning_rate": 8.457385082949842e-07, + "loss": 3.1766, + "step": 689 + }, + { + "epoch": 0.87, + "learning_rate": 8.293084036448895e-07, + "loss": 3.3156, + "step": 690 + }, + { + "epoch": 0.87, + "learning_rate": 8.130325555703911e-07, + "loss": 3.1429, + "step": 691 + }, + { + "epoch": 0.88, + "learning_rate": 7.969112378410882e-07, + "loss": 3.2375, + "step": 692 + }, + { + "epoch": 0.88, + "learning_rate": 7.809447216272892e-07, + "loss": 3.2285, + "step": 693 + }, + { + "epoch": 0.88, + "learning_rate": 7.651332754954477e-07, + "loss": 3.2257, + "step": 694 + }, + { + "epoch": 0.88, + "learning_rate": 7.494771654036448e-07, + "loss": 3.1945, + "step": 695 + }, + { + "epoch": 0.88, + "learning_rate": 7.33976654697115e-07, + "loss": 3.1229, + "step": 696 + }, + { + "epoch": 0.88, + "learning_rate": 7.186320041038175e-07, + "loss": 3.2002, + "step": 697 + }, + { + "epoch": 0.88, + "learning_rate": 7.034434717300509e-07, + "loss": 3.1972, + "step": 698 + }, + { + "epoch": 0.88, + "learning_rate": 6.884113130561043e-07, + "loss": 3.0296, + "step": 699 + }, + { + "epoch": 0.89, + "learning_rate": 6.735357809319809e-07, + "loss": 3.3183, + "step": 700 + }, + { + "epoch": 0.89, + "learning_rate": 6.588171255731157e-07, + "loss": 3.2142, + "step": 701 + }, + { + "epoch": 0.89, + "learning_rate": 6.442555945561901e-07, + "loss": 3.1247, + "step": 702 + }, + { + "epoch": 0.89, + "learning_rate": 6.298514328149574e-07, + "loss": 3.2135, + "step": 703 + }, + { + "epoch": 0.89, + "learning_rate": 6.156048826361239e-07, + "loss": 3.2563, + "step": 704 + }, + { + "epoch": 0.89, + "learning_rate": 6.015161836552764e-07, + "loss": 3.1705, + "step": 705 + }, + { + "epoch": 0.89, + "learning_rate": 5.875855728528412e-07, + "loss": 3.3331, + "step": 706 + }, + { + "epoch": 0.89, + "learning_rate": 5.738132845501199e-07, + "loss": 3.2448, + "step": 707 + }, + { + "epoch": 0.9, + "learning_rate": 5.601995504053193e-07, + "loss": 3.2878, + "step": 708 + }, + { + "epoch": 0.9, + "learning_rate": 5.467445994096821e-07, + "loss": 3.2399, + "step": 709 + }, + { + "epoch": 0.9, + "learning_rate": 5.334486578836118e-07, + "loss": 3.3755, + "step": 710 + }, + { + "epoch": 0.9, + "learning_rate": 5.203119494728826e-07, + "loss": 3.2725, + "step": 711 + }, + { + "epoch": 0.9, + "learning_rate": 5.073346951448699e-07, + "loss": 3.3156, + "step": 712 + }, + { + "epoch": 0.9, + "learning_rate": 4.945171131848358e-07, + "loss": 3.2566, + "step": 713 + }, + { + "epoch": 0.9, + "learning_rate": 4.818594191922577e-07, + "loss": 3.2779, + "step": 714 + }, + { + "epoch": 0.91, + "learning_rate": 4.6936182607719373e-07, + "loss": 3.1843, + "step": 715 + }, + { + "epoch": 0.91, + "learning_rate": 4.5702454405672004e-07, + "loss": 3.2624, + "step": 716 + }, + { + "epoch": 0.91, + "learning_rate": 4.448477806513729e-07, + "loss": 3.2075, + "step": 717 + }, + { + "epoch": 0.91, + "learning_rate": 4.328317406816751e-07, + "loss": 3.2446, + "step": 718 + }, + { + "epoch": 0.91, + "learning_rate": 4.2097662626468085e-07, + "loss": 3.1551, + "step": 719 + }, + { + "epoch": 0.91, + "learning_rate": 4.0928263681057956e-07, + "loss": 3.2224, + "step": 720 + }, + { + "epoch": 0.91, + "learning_rate": 3.97749969019342e-07, + "loss": 3.2041, + "step": 721 + }, + { + "epoch": 0.91, + "learning_rate": 3.863788168774119e-07, + "loss": 3.2021, + "step": 722 + }, + { + "epoch": 0.92, + "learning_rate": 3.7516937165444136e-07, + "loss": 3.2621, + "step": 723 + }, + { + "epoch": 0.92, + "learning_rate": 3.6412182190007086e-07, + "loss": 3.3452, + "step": 724 + }, + { + "epoch": 0.92, + "learning_rate": 3.5323635344077123e-07, + "loss": 3.1605, + "step": 725 + }, + { + "epoch": 0.92, + "learning_rate": 3.4251314937669313e-07, + "loss": 3.2814, + "step": 726 + }, + { + "epoch": 0.92, + "learning_rate": 3.3195239007861815e-07, + "loss": 3.2987, + "step": 727 + }, + { + "epoch": 0.92, + "learning_rate": 3.2155425318489584e-07, + "loss": 3.1901, + "step": 728 + }, + { + "epoch": 0.92, + "learning_rate": 3.1131891359847397e-07, + "loss": 3.3101, + "step": 729 + }, + { + "epoch": 0.92, + "learning_rate": 3.012465434839529e-07, + "loss": 3.0684, + "step": 730 + }, + { + "epoch": 0.93, + "learning_rate": 2.913373122646845e-07, + "loss": 3.1588, + "step": 731 + }, + { + "epoch": 0.93, + "learning_rate": 2.8159138661992824e-07, + "loss": 3.2064, + "step": 732 + }, + { + "epoch": 0.93, + "learning_rate": 2.720089304820417e-07, + "loss": 3.1514, + "step": 733 + }, + { + "epoch": 0.93, + "learning_rate": 2.6259010503373206e-07, + "loss": 3.3542, + "step": 734 + }, + { + "epoch": 0.93, + "learning_rate": 2.533350687053338e-07, + "loss": 3.1581, + "step": 735 + }, + { + "epoch": 0.93, + "learning_rate": 2.442439771721539e-07, + "loss": 3.217, + "step": 736 + }, + { + "epoch": 0.93, + "learning_rate": 2.353169833518454e-07, + "loss": 3.1335, + "step": 737 + }, + { + "epoch": 0.93, + "learning_rate": 2.2655423740183925e-07, + "loss": 3.122, + "step": 738 + }, + { + "epoch": 0.94, + "learning_rate": 2.179558867168219e-07, + "loss": 3.0938, + "step": 739 + }, + { + "epoch": 0.94, + "learning_rate": 2.0952207592624508e-07, + "loss": 3.1366, + "step": 740 + }, + { + "epoch": 0.94, + "learning_rate": 2.0125294689190666e-07, + "loss": 3.1353, + "step": 741 + }, + { + "epoch": 0.94, + "learning_rate": 1.9314863870555257e-07, + "loss": 3.208, + "step": 742 + }, + { + "epoch": 0.94, + "learning_rate": 1.8520928768654745e-07, + "loss": 3.1476, + "step": 743 + }, + { + "epoch": 0.94, + "learning_rate": 1.7743502737957107e-07, + "loss": 3.0601, + "step": 744 + }, + { + "epoch": 0.94, + "learning_rate": 1.6982598855238564e-07, + "loss": 3.238, + "step": 745 + }, + { + "epoch": 0.94, + "learning_rate": 1.6238229919361858e-07, + "loss": 3.1862, + "step": 746 + }, + { + "epoch": 0.95, + "learning_rate": 1.5510408451062552e-07, + "loss": 3.1664, + "step": 747 + }, + { + "epoch": 0.95, + "learning_rate": 1.4799146692737742e-07, + "loss": 3.2697, + "step": 748 + }, + { + "epoch": 0.95, + "learning_rate": 1.410445660823989e-07, + "loss": 3.149, + "step": 749 + }, + { + "epoch": 0.95, + "learning_rate": 1.3426349882676326e-07, + "loss": 3.2801, + "step": 750 + }, + { + "epoch": 0.95, + "learning_rate": 1.2764837922211947e-07, + "loss": 3.2374, + "step": 751 + }, + { + "epoch": 0.95, + "learning_rate": 1.211993185387772e-07, + "loss": 3.2966, + "step": 752 + }, + { + "epoch": 0.95, + "learning_rate": 1.1491642525383595e-07, + "loss": 3.2834, + "step": 753 + }, + { + "epoch": 0.95, + "learning_rate": 1.0879980504935772e-07, + "loss": 3.1645, + "step": 754 + }, + { + "epoch": 0.96, + "learning_rate": 1.0284956081059171e-07, + "loss": 3.1979, + "step": 755 + }, + { + "epoch": 0.96, + "learning_rate": 9.706579262424132e-08, + "loss": 3.2138, + "step": 756 + }, + { + "epoch": 0.96, + "learning_rate": 9.144859777678539e-08, + "loss": 3.284, + "step": 757 + }, + { + "epoch": 0.96, + "learning_rate": 8.599807075283406e-08, + "loss": 3.1706, + "step": 758 + }, + { + "epoch": 0.96, + "learning_rate": 8.071430323354778e-08, + "loss": 3.354, + "step": 759 + }, + { + "epoch": 0.96, + "learning_rate": 7.559738409508854e-08, + "loss": 3.2734, + "step": 760 + }, + { + "epoch": 0.96, + "learning_rate": 7.064739940713217e-08, + "loss": 3.1273, + "step": 761 + }, + { + "epoch": 0.96, + "learning_rate": 6.586443243140839e-08, + "loss": 3.2327, + "step": 762 + }, + { + "epoch": 0.97, + "learning_rate": 6.12485636203164e-08, + "loss": 3.2064, + "step": 763 + }, + { + "epoch": 0.97, + "learning_rate": 5.679987061555703e-08, + "loss": 3.2127, + "step": 764 + }, + { + "epoch": 0.97, + "learning_rate": 5.251842824683717e-08, + "loss": 3.1752, + "step": 765 + }, + { + "epoch": 0.97, + "learning_rate": 4.840430853060518e-08, + "loss": 3.198, + "step": 766 + }, + { + "epoch": 0.97, + "learning_rate": 4.4457580668839653e-08, + "loss": 3.1497, + "step": 767 + }, + { + "epoch": 0.97, + "learning_rate": 4.067831104789033e-08, + "loss": 3.2571, + "step": 768 + }, + { + "epoch": 0.97, + "learning_rate": 3.706656323735569e-08, + "loss": 3.1735, + "step": 769 + }, + { + "epoch": 0.97, + "learning_rate": 3.362239798901712e-08, + "loss": 3.1884, + "step": 770 + }, + { + "epoch": 0.98, + "learning_rate": 3.034587323581639e-08, + "loss": 3.1669, + "step": 771 + }, + { + "epoch": 0.98, + "learning_rate": 2.7237044090879795e-08, + "loss": 3.2778, + "step": 772 + }, + { + "epoch": 0.98, + "learning_rate": 2.429596284659219e-08, + "loss": 3.2229, + "step": 773 + }, + { + "epoch": 0.98, + "learning_rate": 2.1522678973718848e-08, + "loss": 3.1956, + "step": 774 + }, + { + "epoch": 0.98, + "learning_rate": 1.891723912056942e-08, + "loss": 3.074, + "step": 775 + }, + { + "epoch": 0.98, + "learning_rate": 1.6479687112217478e-08, + "loss": 3.1022, + "step": 776 + }, + { + "epoch": 0.98, + "learning_rate": 1.421006394976221e-08, + "loss": 3.268, + "step": 777 + }, + { + "epoch": 0.98, + "learning_rate": 1.2108407809635624e-08, + "loss": 3.2414, + "step": 778 + }, + { + "epoch": 0.99, + "learning_rate": 1.0174754042964197e-08, + "loss": 3.1635, + "step": 779 + }, + { + "epoch": 0.99, + "learning_rate": 8.40913517497377e-09, + "loss": 3.3233, + "step": 780 + }, + { + "epoch": 0.99, + "learning_rate": 6.8115809044411174e-09, + "loss": 3.2068, + "step": 781 + }, + { + "epoch": 0.99, + "learning_rate": 5.3821181031932235e-09, + "loss": 3.1507, + "step": 782 + }, + { + "epoch": 0.99, + "learning_rate": 4.120770815659869e-09, + "loss": 3.2037, + "step": 783 + }, + { + "epoch": 0.99, + "learning_rate": 3.0275602584650677e-09, + "loss": 3.1867, + "step": 784 + }, + { + "epoch": 0.99, + "learning_rate": 2.102504820069573e-09, + "loss": 3.2221, + "step": 785 + }, + { + "epoch": 0.99, + "learning_rate": 1.345620060465569e-09, + "loss": 3.1024, + "step": 786 + }, + { + "epoch": 1.0, + "learning_rate": 7.569187109124354e-10, + "loss": 3.1997, + "step": 787 + }, + { + "epoch": 1.0, + "learning_rate": 3.3641067372358617e-10, + "loss": 3.1689, + "step": 788 + }, + { + "epoch": 1.0, + "learning_rate": 8.410302209660437e-11, + "loss": 3.1614, + "step": 789 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 3.105, + "step": 790 + }, + { + "epoch": 1.0, + "step": 790, + "total_flos": 261024117719040.0, + "train_loss": 4.121155046209505, + "train_runtime": 3165.0402, + "train_samples_per_second": 31.949, + "train_steps_per_second": 0.25 + } + ], + "logging_steps": 1.0, + "max_steps": 790, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 50000, + "total_flos": 261024117719040.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}