|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.999458581483487, |
|
"global_step": 3692, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.405405405405406e-06, |
|
"loss": 2.6714, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.0810810810810812e-05, |
|
"loss": 2.4616, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.6216216216216218e-05, |
|
"loss": 2.5089, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.1621621621621624e-05, |
|
"loss": 2.5389, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.702702702702703e-05, |
|
"loss": 2.4892, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.2432432432432436e-05, |
|
"loss": 2.4949, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 3.783783783783784e-05, |
|
"loss": 2.5016, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.324324324324325e-05, |
|
"loss": 2.5727, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.8648648648648654e-05, |
|
"loss": 2.605, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.405405405405406e-05, |
|
"loss": 2.5045, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 5.9459459459459466e-05, |
|
"loss": 2.5509, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 6.486486486486487e-05, |
|
"loss": 2.6363, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.027027027027028e-05, |
|
"loss": 2.3918, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.567567567567568e-05, |
|
"loss": 2.5172, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.108108108108109e-05, |
|
"loss": 2.6391, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 8.64864864864865e-05, |
|
"loss": 2.3696, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.18918918918919e-05, |
|
"loss": 2.4359, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.729729729729731e-05, |
|
"loss": 2.5197, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.999998153008212e-05, |
|
"loss": 2.5001, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.999983377082087e-05, |
|
"loss": 2.7154, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.999953825273504e-05, |
|
"loss": 2.5842, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.999909497669792e-05, |
|
"loss": 2.4231, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.99985039440195e-05, |
|
"loss": 2.6799, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.999776515644638e-05, |
|
"loss": 2.5285, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.999687861616181e-05, |
|
"loss": 2.6049, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999584432578569e-05, |
|
"loss": 2.46, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999466228837451e-05, |
|
"loss": 2.5835, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999333250742145e-05, |
|
"loss": 2.6909, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999185498685624e-05, |
|
"loss": 2.4055, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999022973104525e-05, |
|
"loss": 2.5816, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.99884567447914e-05, |
|
"loss": 2.5871, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.998653603333418e-05, |
|
"loss": 2.701, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.998446760234966e-05, |
|
"loss": 2.5853, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.998225145795046e-05, |
|
"loss": 2.4068, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.997988760668566e-05, |
|
"loss": 2.5417, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.997737605554092e-05, |
|
"loss": 2.531, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.997471681193833e-05, |
|
"loss": 2.5509, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.997190988373644e-05, |
|
"loss": 2.4764, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.996895527923023e-05, |
|
"loss": 2.4522, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.996585300715116e-05, |
|
"loss": 2.332, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.996260307666696e-05, |
|
"loss": 2.518, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.995920549738183e-05, |
|
"loss": 2.5257, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.995566027933621e-05, |
|
"loss": 2.4631, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.995196743300692e-05, |
|
"loss": 2.6791, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.994812696930698e-05, |
|
"loss": 2.5043, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.994413889958568e-05, |
|
"loss": 2.5534, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.994000323562852e-05, |
|
"loss": 2.3725, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.993571998965714e-05, |
|
"loss": 2.7813, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.993128917432934e-05, |
|
"loss": 2.4455, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.992671080273903e-05, |
|
"loss": 2.4049, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.992198488841611e-05, |
|
"loss": 2.4881, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.991711144532654e-05, |
|
"loss": 2.4836, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.991209048787228e-05, |
|
"loss": 2.4244, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.990692203089119e-05, |
|
"loss": 2.5105, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.9901606089657e-05, |
|
"loss": 2.6217, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.989614267987933e-05, |
|
"loss": 2.5529, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.989053181770356e-05, |
|
"loss": 2.512, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.988477351971084e-05, |
|
"loss": 2.5365, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.9878867802918e-05, |
|
"loss": 2.6218, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.987281468477756e-05, |
|
"loss": 2.4998, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.986661418317759e-05, |
|
"loss": 2.4597, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.986026631644173e-05, |
|
"loss": 2.6517, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.985377110332912e-05, |
|
"loss": 2.6685, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.984712856303432e-05, |
|
"loss": 2.5139, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.984033871518727e-05, |
|
"loss": 2.4927, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.983340157985324e-05, |
|
"loss": 2.4832, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.982631717753275e-05, |
|
"loss": 2.5724, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.981908552916153e-05, |
|
"loss": 2.5026, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.981170665611046e-05, |
|
"loss": 2.4104, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.980418058018547e-05, |
|
"loss": 2.4806, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 9.979650732362753e-05, |
|
"loss": 2.3937, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.978868690911253e-05, |
|
"loss": 2.7488, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.978071935975126e-05, |
|
"loss": 2.5604, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.977260469908931e-05, |
|
"loss": 2.4197, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.976434295110701e-05, |
|
"loss": 2.6003, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 9.975593414021938e-05, |
|
"loss": 2.4822, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.974737829127602e-05, |
|
"loss": 2.4176, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.973867542956104e-05, |
|
"loss": 2.5665, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.972982558079303e-05, |
|
"loss": 2.7229, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.972082877112494e-05, |
|
"loss": 2.7438, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.971168502714401e-05, |
|
"loss": 2.5602, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.970239437587174e-05, |
|
"loss": 2.5438, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.96929568447637e-05, |
|
"loss": 2.5334, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.968337246170956e-05, |
|
"loss": 2.496, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 9.967364125503295e-05, |
|
"loss": 2.4688, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.966376325349143e-05, |
|
"loss": 2.5845, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.965373848627631e-05, |
|
"loss": 2.3486, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.964356698301264e-05, |
|
"loss": 2.6254, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.963324877375912e-05, |
|
"loss": 2.2205, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.9622783889008e-05, |
|
"loss": 2.4156, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.961217235968494e-05, |
|
"loss": 2.6328, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.960141421714897e-05, |
|
"loss": 2.5354, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.959050949319244e-05, |
|
"loss": 2.4779, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.957945822004083e-05, |
|
"loss": 2.4148, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.956826043035268e-05, |
|
"loss": 2.5947, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.95569161572196e-05, |
|
"loss": 2.6662, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.954542543416599e-05, |
|
"loss": 2.4853, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.953378829514908e-05, |
|
"loss": 2.4561, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.952200477455881e-05, |
|
"loss": 2.5139, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.951007490721766e-05, |
|
"loss": 2.5978, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.949799872838061e-05, |
|
"loss": 2.6275, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.948577627373503e-05, |
|
"loss": 2.4584, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 9.947340757940053e-05, |
|
"loss": 2.3811, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.946089268192895e-05, |
|
"loss": 2.4847, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.944823161830407e-05, |
|
"loss": 2.5872, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.943542442594177e-05, |
|
"loss": 2.7864, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.942247114268964e-05, |
|
"loss": 2.5991, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.940937180682706e-05, |
|
"loss": 2.4734, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.9396126457065e-05, |
|
"loss": 2.6016, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.938273513254597e-05, |
|
"loss": 2.5428, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.936919787284378e-05, |
|
"loss": 2.6381, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.935551471796358e-05, |
|
"loss": 2.6325, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.934168570834165e-05, |
|
"loss": 2.5066, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.932771088484527e-05, |
|
"loss": 2.5039, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.931359028877267e-05, |
|
"loss": 2.7124, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.929932396185281e-05, |
|
"loss": 2.4925, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.928491194624539e-05, |
|
"loss": 2.579, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.927035428454055e-05, |
|
"loss": 2.6093, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.925565101975893e-05, |
|
"loss": 2.3589, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.924080219535141e-05, |
|
"loss": 2.6058, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.922580785519901e-05, |
|
"loss": 2.5016, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 9.921066804361284e-05, |
|
"loss": 2.6041, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.919538280533382e-05, |
|
"loss": 2.4222, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.917995218553271e-05, |
|
"loss": 2.5596, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.916437622980985e-05, |
|
"loss": 2.5427, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.91486549841951e-05, |
|
"loss": 2.5865, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.913278849514765e-05, |
|
"loss": 2.4464, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.911677680955596e-05, |
|
"loss": 2.4279, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.910061997473752e-05, |
|
"loss": 2.4858, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.908431803843881e-05, |
|
"loss": 2.3309, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.906787104883506e-05, |
|
"loss": 2.5427, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.905127905453023e-05, |
|
"loss": 2.568, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.90345421045567e-05, |
|
"loss": 2.5368, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.90176602483753e-05, |
|
"loss": 2.562, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.90006335358751e-05, |
|
"loss": 2.43, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.898346201737317e-05, |
|
"loss": 2.6544, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.896614574361454e-05, |
|
"loss": 2.5701, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.894868476577201e-05, |
|
"loss": 2.5294, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.893107913544609e-05, |
|
"loss": 2.6586, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 9.891332890466463e-05, |
|
"loss": 2.4904, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.88954341258829e-05, |
|
"loss": 2.5686, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.887739485198331e-05, |
|
"loss": 2.5522, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.885921113627525e-05, |
|
"loss": 2.6227, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.884088303249501e-05, |
|
"loss": 2.6265, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.882241059480555e-05, |
|
"loss": 2.6851, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.880379387779637e-05, |
|
"loss": 2.4501, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.878503293648332e-05, |
|
"loss": 2.3563, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.876612782630848e-05, |
|
"loss": 2.3076, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.874707860313997e-05, |
|
"loss": 2.5158, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 9.87278853232718e-05, |
|
"loss": 2.4241, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.87085480434237e-05, |
|
"loss": 2.6496, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.868906682074093e-05, |
|
"loss": 2.5175, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.866944171279411e-05, |
|
"loss": 2.4658, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.864967277757911e-05, |
|
"loss": 2.3329, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.862976007351683e-05, |
|
"loss": 2.6255, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.860970365945299e-05, |
|
"loss": 2.5342, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.858950359465805e-05, |
|
"loss": 2.3472, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.856915993882696e-05, |
|
"loss": 2.4315, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.854867275207901e-05, |
|
"loss": 2.5811, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.852804209495766e-05, |
|
"loss": 2.5231, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.850726802843034e-05, |
|
"loss": 2.6642, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.84863506138883e-05, |
|
"loss": 2.5686, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.846528991314639e-05, |
|
"loss": 2.7031, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.844408598844288e-05, |
|
"loss": 2.4843, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.842273890243936e-05, |
|
"loss": 2.6201, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.840124871822041e-05, |
|
"loss": 2.4424, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.837961549929356e-05, |
|
"loss": 2.5032, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.835783930958897e-05, |
|
"loss": 2.6527, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.833592021345937e-05, |
|
"loss": 2.4956, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.831385827567975e-05, |
|
"loss": 2.3891, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.829165356144727e-05, |
|
"loss": 2.2116, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.826930613638098e-05, |
|
"loss": 2.5029, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.824681606652168e-05, |
|
"loss": 2.5519, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.822418341833172e-05, |
|
"loss": 2.5432, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.82014082586948e-05, |
|
"loss": 2.5587, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.817849065491575e-05, |
|
"loss": 2.552, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.815543067472039e-05, |
|
"loss": 2.4258, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.813222838625521e-05, |
|
"loss": 2.4393, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.810888385808732e-05, |
|
"loss": 2.5239, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.808539715920414e-05, |
|
"loss": 2.4571, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.806176835901328e-05, |
|
"loss": 2.5202, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.803799752734219e-05, |
|
"loss": 2.4761, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.801408473443816e-05, |
|
"loss": 2.8216, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.79900300509679e-05, |
|
"loss": 2.6249, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.796583354801752e-05, |
|
"loss": 2.5059, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.794149529709216e-05, |
|
"loss": 2.5326, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.791701537011591e-05, |
|
"loss": 2.7352, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.789239383943152e-05, |
|
"loss": 2.6958, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.78676307778002e-05, |
|
"loss": 2.4794, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.784272625840136e-05, |
|
"loss": 2.5559, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.781768035483256e-05, |
|
"loss": 2.4731, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.779249314110909e-05, |
|
"loss": 2.5427, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.776716469166384e-05, |
|
"loss": 2.6534, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.774169508134715e-05, |
|
"loss": 2.5991, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.771608438542639e-05, |
|
"loss": 2.4887, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 9.769033267958598e-05, |
|
"loss": 2.5762, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.766444003992703e-05, |
|
"loss": 2.3775, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.763840654296706e-05, |
|
"loss": 2.4067, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.761223226563996e-05, |
|
"loss": 2.3338, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.758591728529555e-05, |
|
"loss": 2.4981, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.755946167969952e-05, |
|
"loss": 2.402, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.753286552703312e-05, |
|
"loss": 2.7678, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.750612890589293e-05, |
|
"loss": 2.5216, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.747925189529063e-05, |
|
"loss": 2.3811, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 9.745223457465282e-05, |
|
"loss": 2.4442, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.742507702382071e-05, |
|
"loss": 2.4474, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.739777932304992e-05, |
|
"loss": 2.5238, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.737034155301024e-05, |
|
"loss": 2.4573, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.734276379478538e-05, |
|
"loss": 2.5096, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.731504612987279e-05, |
|
"loss": 2.5997, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.728718864018329e-05, |
|
"loss": 2.3851, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.725919140804099e-05, |
|
"loss": 2.5155, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.72310545161829e-05, |
|
"loss": 2.614, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.72027780477588e-05, |
|
"loss": 2.5027, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.717436208633088e-05, |
|
"loss": 2.4011, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.714580671587366e-05, |
|
"loss": 2.5327, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.711711202077354e-05, |
|
"loss": 2.3772, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.708827808582871e-05, |
|
"loss": 2.5332, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.70593049962488e-05, |
|
"loss": 2.4211, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.703019283765471e-05, |
|
"loss": 2.5811, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.700094169607828e-05, |
|
"loss": 2.5859, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.697155165796209e-05, |
|
"loss": 2.7103, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.694202281015918e-05, |
|
"loss": 2.5435, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.691235523993278e-05, |
|
"loss": 2.4327, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.688254903495609e-05, |
|
"loss": 2.4772, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.685260428331202e-05, |
|
"loss": 2.5445, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.682252107349288e-05, |
|
"loss": 2.5334, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 9.679229949440015e-05, |
|
"loss": 2.4343, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.676193963534423e-05, |
|
"loss": 2.4341, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.673144158604419e-05, |
|
"loss": 2.5318, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.67008054366274e-05, |
|
"loss": 2.435, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.667003127762942e-05, |
|
"loss": 2.6514, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.663911919999362e-05, |
|
"loss": 2.4744, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.660806929507095e-05, |
|
"loss": 2.5498, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.657688165461965e-05, |
|
"loss": 2.6276, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.654555637080502e-05, |
|
"loss": 2.6097, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.65140935361991e-05, |
|
"loss": 2.3675, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.648249324378044e-05, |
|
"loss": 2.469, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.64507555869338e-05, |
|
"loss": 2.5554, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.641888065944984e-05, |
|
"loss": 2.35, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.638686855552494e-05, |
|
"loss": 2.495, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.635471936976081e-05, |
|
"loss": 2.4547, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.63224331971643e-05, |
|
"loss": 2.3311, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.629001013314705e-05, |
|
"loss": 2.5145, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.625745027352526e-05, |
|
"loss": 2.5413, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.622475371451939e-05, |
|
"loss": 2.5209, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.619192055275386e-05, |
|
"loss": 2.4376, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.615895088525677e-05, |
|
"loss": 2.455, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.612584480945964e-05, |
|
"loss": 2.3267, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.609260242319709e-05, |
|
"loss": 2.6783, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.605922382470658e-05, |
|
"loss": 2.4699, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.602570911262805e-05, |
|
"loss": 2.4731, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.599205838600375e-05, |
|
"loss": 2.4779, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.595827174427786e-05, |
|
"loss": 2.6002, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.592434928729616e-05, |
|
"loss": 2.5211, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.589029111530586e-05, |
|
"loss": 2.5308, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.585609732895517e-05, |
|
"loss": 2.5043, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.582176802929315e-05, |
|
"loss": 2.4683, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.578730331776924e-05, |
|
"loss": 2.4226, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.575270329623309e-05, |
|
"loss": 2.3188, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.571796806693422e-05, |
|
"loss": 2.6134, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.568309773252171e-05, |
|
"loss": 2.3866, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.564809239604388e-05, |
|
"loss": 2.6209, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.5612952160948e-05, |
|
"loss": 2.5562, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.557767713108009e-05, |
|
"loss": 2.6116, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.554226741068432e-05, |
|
"loss": 2.5081, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.550672310440311e-05, |
|
"loss": 2.7225, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.547104431727647e-05, |
|
"loss": 2.6353, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.543523115474187e-05, |
|
"loss": 2.5658, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.539928372263387e-05, |
|
"loss": 2.5224, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.536320212718382e-05, |
|
"loss": 2.702, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.532698647501958e-05, |
|
"loss": 2.4106, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.529063687316513e-05, |
|
"loss": 2.5009, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.525415342904034e-05, |
|
"loss": 2.4723, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.521753625046056e-05, |
|
"loss": 2.3118, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.51807854456364e-05, |
|
"loss": 2.6302, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.51439011231733e-05, |
|
"loss": 2.3981, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.510688339207133e-05, |
|
"loss": 2.4194, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.506973236172478e-05, |
|
"loss": 2.5114, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.503244814192187e-05, |
|
"loss": 2.4885, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.499503084284441e-05, |
|
"loss": 2.4262, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.49574805750675e-05, |
|
"loss": 2.2484, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.491979744955915e-05, |
|
"loss": 2.3817, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.488198157768005e-05, |
|
"loss": 2.455, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.484403307118312e-05, |
|
"loss": 2.3778, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.480595204221329e-05, |
|
"loss": 2.5499, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.47677386033071e-05, |
|
"loss": 2.5838, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.472939286739235e-05, |
|
"loss": 2.5077, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.469091494778785e-05, |
|
"loss": 2.6054, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.465230495820303e-05, |
|
"loss": 2.4767, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.461356301273758e-05, |
|
"loss": 2.6251, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.45746892258812e-05, |
|
"loss": 2.4725, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.453568371251316e-05, |
|
"loss": 2.5408, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.4496546587902e-05, |
|
"loss": 2.5397, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.445727796770524e-05, |
|
"loss": 2.666, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.441787796796896e-05, |
|
"loss": 2.3805, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.43783467051275e-05, |
|
"loss": 2.57, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.43386842960031e-05, |
|
"loss": 2.6776, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.429889085780557e-05, |
|
"loss": 2.447, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.425896650813196e-05, |
|
"loss": 2.6253, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.421891136496612e-05, |
|
"loss": 2.3422, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.41787255466785e-05, |
|
"loss": 2.3565, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.413840917202566e-05, |
|
"loss": 2.4135, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.409796236014999e-05, |
|
"loss": 2.6778, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.405738523057938e-05, |
|
"loss": 2.4313, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.401667790322679e-05, |
|
"loss": 2.4427, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.397584049838996e-05, |
|
"loss": 2.6661, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.393487313675102e-05, |
|
"loss": 2.4825, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.389377593937618e-05, |
|
"loss": 2.5834, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.38525490277153e-05, |
|
"loss": 2.4413, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.38111925236016e-05, |
|
"loss": 2.5265, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.376970654925124e-05, |
|
"loss": 2.5181, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.372809122726299e-05, |
|
"loss": 2.6319, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.368634668061791e-05, |
|
"loss": 2.7302, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.364447303267889e-05, |
|
"loss": 2.5624, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.360247040719039e-05, |
|
"loss": 2.4739, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.356033892827796e-05, |
|
"loss": 2.3922, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.3518078720448e-05, |
|
"loss": 2.5975, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.347568990858726e-05, |
|
"loss": 2.4533, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.343317261796262e-05, |
|
"loss": 2.4955, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.339052697422057e-05, |
|
"loss": 2.4098, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.334775310338694e-05, |
|
"loss": 2.705, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.330485113186648e-05, |
|
"loss": 2.4335, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.326182118644254e-05, |
|
"loss": 2.6452, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.321866339427658e-05, |
|
"loss": 2.4124, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.317537788290794e-05, |
|
"loss": 2.3303, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.313196478025337e-05, |
|
"loss": 2.4199, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.308842421460667e-05, |
|
"loss": 2.4577, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.304475631463834e-05, |
|
"loss": 2.5357, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.300096120939516e-05, |
|
"loss": 2.418, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.29570390282998e-05, |
|
"loss": 2.4678, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.29129899011505e-05, |
|
"loss": 2.3826, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.286881395812066e-05, |
|
"loss": 2.4426, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.28245113297584e-05, |
|
"loss": 2.3528, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.278008214698624e-05, |
|
"loss": 2.4743, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.27355265411007e-05, |
|
"loss": 2.3887, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.26908446437719e-05, |
|
"loss": 2.468, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.264603658704318e-05, |
|
"loss": 2.352, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 9.260110250333066e-05, |
|
"loss": 2.4429, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.255604252542296e-05, |
|
"loss": 2.3712, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.251085678648072e-05, |
|
"loss": 2.5003, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.246554542003618e-05, |
|
"loss": 2.5098, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.24201085599929e-05, |
|
"loss": 2.6184, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.237454634062525e-05, |
|
"loss": 2.4172, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.23288588965781e-05, |
|
"loss": 2.4234, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.228304636286633e-05, |
|
"loss": 2.6617, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.223710887487453e-05, |
|
"loss": 2.422, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.219104656835654e-05, |
|
"loss": 2.6878, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.214485957943503e-05, |
|
"loss": 2.6575, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.209854804460121e-05, |
|
"loss": 2.369, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.205211210071426e-05, |
|
"loss": 2.5432, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.200555188500103e-05, |
|
"loss": 2.5313, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.195886753505565e-05, |
|
"loss": 2.3887, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.191205918883909e-05, |
|
"loss": 2.6655, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.18651269846787e-05, |
|
"loss": 2.5833, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.181807106126792e-05, |
|
"loss": 2.6638, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 9.177089155766574e-05, |
|
"loss": 2.4395, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.172358861329641e-05, |
|
"loss": 2.5247, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.167616236794894e-05, |
|
"loss": 2.4711, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.162861296177671e-05, |
|
"loss": 2.4537, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.158094053529709e-05, |
|
"loss": 2.4404, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 9.153314522939096e-05, |
|
"loss": 2.4599, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.148522718530236e-05, |
|
"loss": 2.5289, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.143718654463804e-05, |
|
"loss": 2.2966, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.138902344936706e-05, |
|
"loss": 2.4635, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.134073804182033e-05, |
|
"loss": 2.6182, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.129233046469022e-05, |
|
"loss": 2.6568, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.124380086103013e-05, |
|
"loss": 2.5841, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.11951493742541e-05, |
|
"loss": 2.609, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.114637614813634e-05, |
|
"loss": 2.3299, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.109748132681082e-05, |
|
"loss": 2.5093, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.104846505477083e-05, |
|
"loss": 2.4223, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.09993274768686e-05, |
|
"loss": 2.4636, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.095006873831479e-05, |
|
"loss": 2.3136, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.090068898467823e-05, |
|
"loss": 2.557, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.085118836188521e-05, |
|
"loss": 2.4634, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.080156701621936e-05, |
|
"loss": 2.5238, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.075182509432095e-05, |
|
"loss": 2.4833, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.070196274318666e-05, |
|
"loss": 2.6603, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.0651980110169e-05, |
|
"loss": 2.4763, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.060187734297599e-05, |
|
"loss": 2.4662, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.055165458967063e-05, |
|
"loss": 2.4409, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.050131199867052e-05, |
|
"loss": 2.5474, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 2.5071, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.040026789902665e-05, |
|
"loss": 2.4774, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.034956668898706e-05, |
|
"loss": 2.4119, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.029874623846011e-05, |
|
"loss": 2.4335, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.02478066976297e-05, |
|
"loss": 2.3666, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.019674821703166e-05, |
|
"loss": 2.5817, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.014557094755331e-05, |
|
"loss": 2.2798, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.009427504043305e-05, |
|
"loss": 2.5065, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.004286064725982e-05, |
|
"loss": 2.4121, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.999132791997271e-05, |
|
"loss": 2.5618, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.993967701086057e-05, |
|
"loss": 2.7772, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.988790807256143e-05, |
|
"loss": 2.3717, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.983602125806216e-05, |
|
"loss": 2.5273, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.978401672069797e-05, |
|
"loss": 2.5246, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.973189461415194e-05, |
|
"loss": 2.5115, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 8.967965509245461e-05, |
|
"loss": 2.583, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.962729830998353e-05, |
|
"loss": 2.4989, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.957482442146272e-05, |
|
"loss": 2.4011, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.952223358196227e-05, |
|
"loss": 2.424, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.946952594689797e-05, |
|
"loss": 2.5144, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.941670167203067e-05, |
|
"loss": 2.4956, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.936376091346595e-05, |
|
"loss": 2.5917, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.931070382765359e-05, |
|
"loss": 2.3386, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.925753057138719e-05, |
|
"loss": 2.4911, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 8.920424130180363e-05, |
|
"loss": 2.5727, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.915083617638262e-05, |
|
"loss": 2.4148, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.909731535294628e-05, |
|
"loss": 2.4859, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.904367898965857e-05, |
|
"loss": 2.4704, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.898992724502498e-05, |
|
"loss": 2.5904, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 8.893606027789192e-05, |
|
"loss": 2.5586, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.888207824744629e-05, |
|
"loss": 2.4723, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.882798131321508e-05, |
|
"loss": 2.5011, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.877376963506477e-05, |
|
"loss": 2.6237, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 8.871944337320102e-05, |
|
"loss": 2.548, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.866500268816803e-05, |
|
"loss": 2.3527, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.861044774084815e-05, |
|
"loss": 2.6638, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.855577869246142e-05, |
|
"loss": 2.4873, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.850099570456509e-05, |
|
"loss": 2.4461, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 8.844609893905309e-05, |
|
"loss": 2.4031, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.839108855815557e-05, |
|
"loss": 2.5516, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.833596472443848e-05, |
|
"loss": 2.4283, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.828072760080299e-05, |
|
"loss": 2.2932, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.822537735048512e-05, |
|
"loss": 2.3761, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 8.816991413705516e-05, |
|
"loss": 2.4804, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.811433812441722e-05, |
|
"loss": 2.6496, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.80586494768088e-05, |
|
"loss": 2.4868, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.800284835880024e-05, |
|
"loss": 2.679, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 8.79469349352942e-05, |
|
"loss": 2.6229, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.78909093715253e-05, |
|
"loss": 2.5438, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.783477183305949e-05, |
|
"loss": 2.4863, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.777852248579367e-05, |
|
"loss": 2.5205, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.772216149595513e-05, |
|
"loss": 2.4179, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 8.766568903010113e-05, |
|
"loss": 2.4653, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.76091052551183e-05, |
|
"loss": 2.3727, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.755241033822224e-05, |
|
"loss": 2.5503, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.7495604446957e-05, |
|
"loss": 2.4288, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 8.743868774919458e-05, |
|
"loss": 2.451, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.738166041313439e-05, |
|
"loss": 2.3869, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.732452260730286e-05, |
|
"loss": 2.5419, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.726727450055287e-05, |
|
"loss": 2.3962, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.720991626206321e-05, |
|
"loss": 2.4672, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 8.715244806133816e-05, |
|
"loss": 2.4988, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.7094870068207e-05, |
|
"loss": 2.2557, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.703718245282337e-05, |
|
"loss": 2.5007, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.697938538566499e-05, |
|
"loss": 2.4908, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.69214790375329e-05, |
|
"loss": 2.4346, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.686346357955117e-05, |
|
"loss": 2.2897, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.68053391831663e-05, |
|
"loss": 2.3337, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.674710602014671e-05, |
|
"loss": 2.4618, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.668876426258221e-05, |
|
"loss": 2.5041, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 8.66303140828836e-05, |
|
"loss": 2.4207, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.657175565378206e-05, |
|
"loss": 2.4657, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.651308914832862e-05, |
|
"loss": 2.5422, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.645431473989376e-05, |
|
"loss": 2.4069, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.63954326021668e-05, |
|
"loss": 2.6141, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.633644290915545e-05, |
|
"loss": 2.7452, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.627734583518521e-05, |
|
"loss": 2.4625, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.621814155489895e-05, |
|
"loss": 2.3913, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.615883024325636e-05, |
|
"loss": 3.1472, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 8.609941207553342e-05, |
|
"loss": 2.4791, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.603988722732186e-05, |
|
"loss": 2.4555, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.598025587452873e-05, |
|
"loss": 2.5092, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.592051819337579e-05, |
|
"loss": 2.5088, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.586067436039899e-05, |
|
"loss": 2.5663, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 8.580072455244801e-05, |
|
"loss": 2.5562, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.574066894668573e-05, |
|
"loss": 2.4265, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.568050772058762e-05, |
|
"loss": 2.473, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.562024105194133e-05, |
|
"loss": 2.5223, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.555986911884609e-05, |
|
"loss": 2.3263, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 8.549939209971221e-05, |
|
"loss": 2.2938, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.543881017326057e-05, |
|
"loss": 2.321, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.537812351852201e-05, |
|
"loss": 2.4323, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.531733231483694e-05, |
|
"loss": 2.365, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 8.525643674185466e-05, |
|
"loss": 2.4085, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.519543697953296e-05, |
|
"loss": 2.4288, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.51343332081375e-05, |
|
"loss": 2.6551, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.50731256082413e-05, |
|
"loss": 2.4887, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.501181436072422e-05, |
|
"loss": 2.6168, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.495039964677241e-05, |
|
"loss": 2.4247, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.488888164787782e-05, |
|
"loss": 2.5132, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.482726054583761e-05, |
|
"loss": 2.5011, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.476553652275356e-05, |
|
"loss": 2.4964, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 8.47037097610317e-05, |
|
"loss": 2.3202, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.464178044338162e-05, |
|
"loss": 2.2058, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.4579748752816e-05, |
|
"loss": 2.585, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.451761487265003e-05, |
|
"loss": 2.3743, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.44553789865009e-05, |
|
"loss": 2.2927, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 8.439304127828728e-05, |
|
"loss": 2.3899, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.433060193222868e-05, |
|
"loss": 2.462, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.426806113284502e-05, |
|
"loss": 2.4369, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.420541906495599e-05, |
|
"loss": 2.4967, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.414267591368058e-05, |
|
"loss": 2.6217, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 8.407983186443653e-05, |
|
"loss": 2.6545, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.401688710293967e-05, |
|
"loss": 2.4993, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.395384181520351e-05, |
|
"loss": 2.3227, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.389069618753865e-05, |
|
"loss": 2.325, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.382745040655212e-05, |
|
"loss": 2.6491, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.376410465914705e-05, |
|
"loss": 2.4874, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.370065913252188e-05, |
|
"loss": 2.505, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.363711401417e-05, |
|
"loss": 2.4867, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.357346949187906e-05, |
|
"loss": 2.2378, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.350972575373047e-05, |
|
"loss": 2.372, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.344588298809887e-05, |
|
"loss": 2.3432, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.338194138365151e-05, |
|
"loss": 2.6878, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.331790112934777e-05, |
|
"loss": 2.4083, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8.325376241443849e-05, |
|
"loss": 2.4451, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.318952542846557e-05, |
|
"loss": 2.3759, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.312519036126125e-05, |
|
"loss": 2.5355, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.306075740294763e-05, |
|
"loss": 2.4161, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.299622674393614e-05, |
|
"loss": 2.3455, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 8.293159857492686e-05, |
|
"loss": 2.469, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.28668730869081e-05, |
|
"loss": 2.3113, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.280205047115572e-05, |
|
"loss": 2.4072, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.273713091923264e-05, |
|
"loss": 2.5218, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.267211462298822e-05, |
|
"loss": 2.374, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 8.260700177455773e-05, |
|
"loss": 2.452, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.254179256636179e-05, |
|
"loss": 2.4523, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.247648719110572e-05, |
|
"loss": 2.5231, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.241108584177911e-05, |
|
"loss": 2.5678, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.234558871165512e-05, |
|
"loss": 2.449, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.227999599428995e-05, |
|
"loss": 2.3786, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.221430788352233e-05, |
|
"loss": 2.3994, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.214852457347286e-05, |
|
"loss": 2.5034, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.208264625854347e-05, |
|
"loss": 2.2819, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.201667313341685e-05, |
|
"loss": 2.4361, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.19506053930559e-05, |
|
"loss": 2.3855, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.18844432327031e-05, |
|
"loss": 2.4898, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.181818684787992e-05, |
|
"loss": 2.5017, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.175183643438635e-05, |
|
"loss": 2.402, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.168539218830024e-05, |
|
"loss": 2.3225, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.16188543059767e-05, |
|
"loss": 2.3171, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.155222298404756e-05, |
|
"loss": 2.5654, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.148549841942082e-05, |
|
"loss": 2.3448, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.141868080927996e-05, |
|
"loss": 2.2422, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.135177035108352e-05, |
|
"loss": 2.6608, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.128476724256431e-05, |
|
"loss": 2.486, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.121767168172904e-05, |
|
"loss": 2.3347, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.115048386685757e-05, |
|
"loss": 2.4229, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.108320399650244e-05, |
|
"loss": 2.6345, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.101583226948819e-05, |
|
"loss": 2.5783, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.09483688849108e-05, |
|
"loss": 2.4985, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.088081404213718e-05, |
|
"loss": 2.3184, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.081316794080445e-05, |
|
"loss": 2.324, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.074543078081946e-05, |
|
"loss": 2.504, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.067760276235812e-05, |
|
"loss": 2.3798, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.060968408586489e-05, |
|
"loss": 2.4197, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.054167495205207e-05, |
|
"loss": 2.4555, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.047357556189936e-05, |
|
"loss": 2.6626, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.040538611665314e-05, |
|
"loss": 2.5664, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.033710681782592e-05, |
|
"loss": 2.4436, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.026873786719573e-05, |
|
"loss": 2.5044, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.02002794668056e-05, |
|
"loss": 2.488, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 8.013173181896283e-05, |
|
"loss": 2.4565, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.006309512623848e-05, |
|
"loss": 2.5484, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.99943695914668e-05, |
|
"loss": 2.4438, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.992555541774452e-05, |
|
"loss": 2.4668, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 7.985665280843035e-05, |
|
"loss": 2.5129, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.978766196714436e-05, |
|
"loss": 2.2599, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.97185830977673e-05, |
|
"loss": 2.4388, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.964941640444014e-05, |
|
"loss": 2.5566, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.958016209156331e-05, |
|
"loss": 2.3852, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 7.951082036379625e-05, |
|
"loss": 2.3447, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.944139142605665e-05, |
|
"loss": 2.471, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.937187548351996e-05, |
|
"loss": 2.4846, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.930227274161877e-05, |
|
"loss": 2.433, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.923258340604212e-05, |
|
"loss": 2.7046, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 7.916280768273498e-05, |
|
"loss": 2.2928, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.909294577789766e-05, |
|
"loss": 2.5962, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.902299789798505e-05, |
|
"loss": 2.4707, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.895296424970618e-05, |
|
"loss": 2.4212, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 7.888284504002352e-05, |
|
"loss": 2.5168, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.881264047615245e-05, |
|
"loss": 2.5038, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.874235076556046e-05, |
|
"loss": 2.2647, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.867197611596683e-05, |
|
"loss": 2.5225, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.860151673534168e-05, |
|
"loss": 2.3552, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 7.853097283190567e-05, |
|
"loss": 2.5299, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.846034461412912e-05, |
|
"loss": 2.476, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.838963229073162e-05, |
|
"loss": 2.3523, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.831883607068125e-05, |
|
"loss": 2.4746, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 7.824795616319402e-05, |
|
"loss": 2.4551, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.817699277773325e-05, |
|
"loss": 2.4863, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.810594612400898e-05, |
|
"loss": 2.5789, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.803481641197733e-05, |
|
"loss": 2.487, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.796360385183984e-05, |
|
"loss": 2.5997, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 7.789230865404287e-05, |
|
"loss": 2.3587, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.782093102927703e-05, |
|
"loss": 2.7109, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.77494711884765e-05, |
|
"loss": 2.5783, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.767792934281843e-05, |
|
"loss": 2.4947, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.76063057037223e-05, |
|
"loss": 2.3812, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 7.753460048284928e-05, |
|
"loss": 2.3337, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.74628138921017e-05, |
|
"loss": 2.5691, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.739094614362229e-05, |
|
"loss": 2.4811, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.731899744979364e-05, |
|
"loss": 2.618, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 7.724696802323755e-05, |
|
"loss": 2.2892, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.717485807681437e-05, |
|
"loss": 2.3032, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.710266782362247e-05, |
|
"loss": 2.4592, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.703039747699747e-05, |
|
"loss": 2.3496, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.695804725051172e-05, |
|
"loss": 2.423, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 7.68856173579736e-05, |
|
"loss": 2.4122, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 7.681310801342696e-05, |
|
"loss": 2.3985, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 7.674051943115042e-05, |
|
"loss": 2.2799, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 7.666785182565677e-05, |
|
"loss": 2.3947, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 7.65951054116923e-05, |
|
"loss": 2.3299, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 7.652228040423622e-05, |
|
"loss": 2.274, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 7.644937701850002e-05, |
|
"loss": 2.3697, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 7.637639546992677e-05, |
|
"loss": 2.3167, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 7.630333597419054e-05, |
|
"loss": 2.4688, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 7.623019874719579e-05, |
|
"loss": 2.2979, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.61569840050766e-05, |
|
"loss": 2.4614, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.60836919641962e-05, |
|
"loss": 2.5093, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.60103228411462e-05, |
|
"loss": 2.4832, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.593687685274609e-05, |
|
"loss": 2.4112, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 7.586335421604238e-05, |
|
"loss": 2.3033, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 7.578975514830821e-05, |
|
"loss": 2.6554, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 7.571607986704252e-05, |
|
"loss": 2.3495, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 7.564232858996949e-05, |
|
"loss": 2.4517, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 7.556850153503787e-05, |
|
"loss": 2.4985, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.549459892042041e-05, |
|
"loss": 2.5046, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.542062096451305e-05, |
|
"loss": 2.5004, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.534656788593446e-05, |
|
"loss": 2.3215, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.527243990352529e-05, |
|
"loss": 2.5481, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.519823723634753e-05, |
|
"loss": 2.3608, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.51239601036839e-05, |
|
"loss": 2.2113, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.504960872503715e-05, |
|
"loss": 2.6318, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.497518332012946e-05, |
|
"loss": 2.3967, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.490068410890175e-05, |
|
"loss": 2.1024, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 7.48261113115131e-05, |
|
"loss": 2.5322, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.475146514834001e-05, |
|
"loss": 2.3737, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.46767458399758e-05, |
|
"loss": 2.4803, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.460195360722995e-05, |
|
"loss": 2.1737, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 7.452708867112745e-05, |
|
"loss": 2.5601, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.44521512529081e-05, |
|
"loss": 2.5452, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.437714157402598e-05, |
|
"loss": 2.3953, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.430205985614864e-05, |
|
"loss": 2.4914, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.422690632115654e-05, |
|
"loss": 2.3997, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 7.41516811911424e-05, |
|
"loss": 2.2561, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.407638468841047e-05, |
|
"loss": 2.6531, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.400101703547597e-05, |
|
"loss": 2.6299, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.392557845506432e-05, |
|
"loss": 2.4573, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 7.385006917011063e-05, |
|
"loss": 2.5633, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.377448940375887e-05, |
|
"loss": 2.6371, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.369883937936136e-05, |
|
"loss": 2.2814, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.362311932047797e-05, |
|
"loss": 2.6985, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.354732945087563e-05, |
|
"loss": 2.3274, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 7.34714699945275e-05, |
|
"loss": 2.2417, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.33955411756124e-05, |
|
"loss": 2.4285, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.331954321851418e-05, |
|
"loss": 2.4677, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.32434763478209e-05, |
|
"loss": 2.4342, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.316734078832438e-05, |
|
"loss": 2.3903, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 7.309113676501939e-05, |
|
"loss": 2.4379, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.301486450310298e-05, |
|
"loss": 2.4929, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.293852422797391e-05, |
|
"loss": 2.4626, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.286211616523193e-05, |
|
"loss": 2.5199, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 7.278564054067709e-05, |
|
"loss": 2.3659, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.270909758030912e-05, |
|
"loss": 2.4869, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.263248751032671e-05, |
|
"loss": 2.5166, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.255581055712688e-05, |
|
"loss": 2.139, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.247906694730437e-05, |
|
"loss": 2.4807, |
|
"step": 1322 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 7.24022569076508e-05, |
|
"loss": 2.4607, |
|
"step": 1324 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.232538066515414e-05, |
|
"loss": 2.3367, |
|
"step": 1326 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.224843844699803e-05, |
|
"loss": 2.6005, |
|
"step": 1328 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.217143048056108e-05, |
|
"loss": 2.3467, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 7.209435699341613e-05, |
|
"loss": 2.4132, |
|
"step": 1332 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.201721821332973e-05, |
|
"loss": 2.3049, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.194001436826135e-05, |
|
"loss": 2.3176, |
|
"step": 1336 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.18627456863627e-05, |
|
"loss": 2.5401, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.178541239597717e-05, |
|
"loss": 2.4131, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.170801472563903e-05, |
|
"loss": 2.4554, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.163055290407282e-05, |
|
"loss": 2.405, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.155302716019263e-05, |
|
"loss": 2.4435, |
|
"step": 1346 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.14754377231015e-05, |
|
"loss": 2.4068, |
|
"step": 1348 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.139778482209068e-05, |
|
"loss": 2.4863, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 7.132006868663894e-05, |
|
"loss": 2.3856, |
|
"step": 1352 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.124228954641196e-05, |
|
"loss": 2.3076, |
|
"step": 1354 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.116444763126158e-05, |
|
"loss": 2.334, |
|
"step": 1356 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.108654317122515e-05, |
|
"loss": 2.3639, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7.100857639652489e-05, |
|
"loss": 2.7099, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.093054753756713e-05, |
|
"loss": 2.6381, |
|
"step": 1362 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.085245682494168e-05, |
|
"loss": 2.4935, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.077430448942117e-05, |
|
"loss": 2.3986, |
|
"step": 1366 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.069609076196029e-05, |
|
"loss": 2.5647, |
|
"step": 1368 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 7.061781587369519e-05, |
|
"loss": 2.34, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.053948005594273e-05, |
|
"loss": 2.5114, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.046108354019987e-05, |
|
"loss": 2.4023, |
|
"step": 1374 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.038262655814291e-05, |
|
"loss": 2.5273, |
|
"step": 1376 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.030410934162684e-05, |
|
"loss": 2.3996, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 7.022553212268469e-05, |
|
"loss": 2.5281, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.014689513352675e-05, |
|
"loss": 2.2748, |
|
"step": 1382 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 7.006819860654001e-05, |
|
"loss": 2.6029, |
|
"step": 1384 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 6.998944277428734e-05, |
|
"loss": 2.3091, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 6.991062786950691e-05, |
|
"loss": 2.4689, |
|
"step": 1388 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.983175412511145e-05, |
|
"loss": 2.4879, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.975282177418756e-05, |
|
"loss": 2.5369, |
|
"step": 1392 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.967383104999505e-05, |
|
"loss": 2.4159, |
|
"step": 1394 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.959478218596625e-05, |
|
"loss": 2.5748, |
|
"step": 1396 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 6.951567541570523e-05, |
|
"loss": 2.5503, |
|
"step": 1398 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.943651097298727e-05, |
|
"loss": 2.5094, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.935728909175805e-05, |
|
"loss": 2.3161, |
|
"step": 1402 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.927801000613298e-05, |
|
"loss": 2.3559, |
|
"step": 1404 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 6.919867395039652e-05, |
|
"loss": 2.4446, |
|
"step": 1406 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.91192811590015e-05, |
|
"loss": 2.3579, |
|
"step": 1408 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.903983186656844e-05, |
|
"loss": 2.3263, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.896032630788476e-05, |
|
"loss": 2.4279, |
|
"step": 1412 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.888076471790424e-05, |
|
"loss": 2.3288, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 6.880114733174615e-05, |
|
"loss": 2.3714, |
|
"step": 1416 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.872147438469476e-05, |
|
"loss": 2.5845, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.864174611219841e-05, |
|
"loss": 2.2575, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.856196274986907e-05, |
|
"loss": 2.7716, |
|
"step": 1422 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.848212453348137e-05, |
|
"loss": 2.408, |
|
"step": 1424 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 6.840223169897217e-05, |
|
"loss": 2.5191, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.832228448243964e-05, |
|
"loss": 2.3474, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.824228312014274e-05, |
|
"loss": 2.5852, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.816222784850038e-05, |
|
"loss": 2.5364, |
|
"step": 1432 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 6.80821189040908e-05, |
|
"loss": 2.26, |
|
"step": 1434 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.800195652365087e-05, |
|
"loss": 2.4253, |
|
"step": 1436 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.792174094407533e-05, |
|
"loss": 2.3855, |
|
"step": 1438 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.784147240241619e-05, |
|
"loss": 2.2678, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.776115113588194e-05, |
|
"loss": 2.4646, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 6.76807773818369e-05, |
|
"loss": 2.6316, |
|
"step": 1444 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.760035137780046e-05, |
|
"loss": 2.5357, |
|
"step": 1446 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.751987336144648e-05, |
|
"loss": 2.5943, |
|
"step": 1448 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.743934357060246e-05, |
|
"loss": 2.4468, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.735876224324895e-05, |
|
"loss": 2.3678, |
|
"step": 1452 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 6.72781296175188e-05, |
|
"loss": 2.3095, |
|
"step": 1454 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.719744593169641e-05, |
|
"loss": 2.4335, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.711671142421714e-05, |
|
"loss": 2.5255, |
|
"step": 1458 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.703592633366647e-05, |
|
"loss": 2.3837, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 6.695509089877943e-05, |
|
"loss": 2.5474, |
|
"step": 1462 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.687420535843975e-05, |
|
"loss": 2.5055, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.679326995167932e-05, |
|
"loss": 2.4212, |
|
"step": 1466 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.671228491767728e-05, |
|
"loss": 2.4475, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.663125049575956e-05, |
|
"loss": 2.5926, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 6.655016692539793e-05, |
|
"loss": 2.2358, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.646903444620949e-05, |
|
"loss": 2.6164, |
|
"step": 1474 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.63878532979558e-05, |
|
"loss": 2.5778, |
|
"step": 1476 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.630662372054227e-05, |
|
"loss": 2.4401, |
|
"step": 1478 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.622534595401746e-05, |
|
"loss": 2.4327, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.614402023857232e-05, |
|
"loss": 2.3336, |
|
"step": 1482 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.606264681453946e-05, |
|
"loss": 2.4107, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.598122592239255e-05, |
|
"loss": 2.4793, |
|
"step": 1486 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.589975780274544e-05, |
|
"loss": 2.6092, |
|
"step": 1488 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 6.581824269635166e-05, |
|
"loss": 2.4823, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.57366808441035e-05, |
|
"loss": 2.3623, |
|
"step": 1492 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.565507248703144e-05, |
|
"loss": 2.5841, |
|
"step": 1494 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.557341786630339e-05, |
|
"loss": 2.3636, |
|
"step": 1496 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.549171722322395e-05, |
|
"loss": 2.5033, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.540997079923376e-05, |
|
"loss": 2.5465, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.532817883590874e-05, |
|
"loss": 2.4308, |
|
"step": 1502 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.524634157495935e-05, |
|
"loss": 2.6063, |
|
"step": 1504 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.516445925822997e-05, |
|
"loss": 2.3648, |
|
"step": 1506 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.508253212769808e-05, |
|
"loss": 2.5649, |
|
"step": 1508 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.500056042547364e-05, |
|
"loss": 2.4303, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.491854439379827e-05, |
|
"loss": 2.2518, |
|
"step": 1512 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.483648427504467e-05, |
|
"loss": 2.6185, |
|
"step": 1514 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.475438031171574e-05, |
|
"loss": 2.4631, |
|
"step": 1516 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 6.4672232746444e-05, |
|
"loss": 2.5055, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.459004182199082e-05, |
|
"loss": 2.4789, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.45078077812457e-05, |
|
"loss": 2.518, |
|
"step": 1522 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.442553086722554e-05, |
|
"loss": 2.2487, |
|
"step": 1524 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.434321132307394e-05, |
|
"loss": 2.4873, |
|
"step": 1526 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 6.426084939206051e-05, |
|
"loss": 2.4427, |
|
"step": 1528 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 6.417844531758009e-05, |
|
"loss": 2.5523, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 6.40959993431521e-05, |
|
"loss": 2.4331, |
|
"step": 1532 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 6.401351171241971e-05, |
|
"loss": 2.2483, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 6.393098266914925e-05, |
|
"loss": 2.3769, |
|
"step": 1536 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.384841245722945e-05, |
|
"loss": 2.4459, |
|
"step": 1538 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.376580132067065e-05, |
|
"loss": 2.4104, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.368314950360415e-05, |
|
"loss": 2.3963, |
|
"step": 1542 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.360045725028146e-05, |
|
"loss": 2.4358, |
|
"step": 1544 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 6.351772480507363e-05, |
|
"loss": 2.3851, |
|
"step": 1546 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 6.34349524124704e-05, |
|
"loss": 2.3434, |
|
"step": 1548 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 6.335214031707965e-05, |
|
"loss": 2.3168, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 6.326928876362652e-05, |
|
"loss": 2.5622, |
|
"step": 1552 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 6.318639799695285e-05, |
|
"loss": 2.4061, |
|
"step": 1554 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.310346826201621e-05, |
|
"loss": 2.6289, |
|
"step": 1556 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.302049980388948e-05, |
|
"loss": 2.4561, |
|
"step": 1558 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.29374928677599e-05, |
|
"loss": 2.4697, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.28544476989284e-05, |
|
"loss": 2.4481, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 6.277136454280898e-05, |
|
"loss": 2.5529, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.268824364492782e-05, |
|
"loss": 2.4358, |
|
"step": 1566 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.260508525092266e-05, |
|
"loss": 2.3754, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.252188960654204e-05, |
|
"loss": 2.5845, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.243865695764459e-05, |
|
"loss": 2.5552, |
|
"step": 1572 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 6.235538755019832e-05, |
|
"loss": 2.4616, |
|
"step": 1574 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 6.227208163027982e-05, |
|
"loss": 2.3196, |
|
"step": 1576 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 6.218873944407361e-05, |
|
"loss": 2.4119, |
|
"step": 1578 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 6.210536123787138e-05, |
|
"loss": 2.2707, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 6.202194725807127e-05, |
|
"loss": 2.7299, |
|
"step": 1582 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.19384977511771e-05, |
|
"loss": 2.2659, |
|
"step": 1584 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.185501296379777e-05, |
|
"loss": 2.5439, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.177149314264631e-05, |
|
"loss": 2.6154, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.168793853453943e-05, |
|
"loss": 2.5537, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 6.160434938639648e-05, |
|
"loss": 2.3475, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.152072594523906e-05, |
|
"loss": 2.3385, |
|
"step": 1594 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.143706845818992e-05, |
|
"loss": 2.4313, |
|
"step": 1596 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.135337717247261e-05, |
|
"loss": 2.3323, |
|
"step": 1598 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 6.12696523354104e-05, |
|
"loss": 2.4587, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.118589419442584e-05, |
|
"loss": 2.6458, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.110210299703982e-05, |
|
"loss": 2.5148, |
|
"step": 1604 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.101827899087094e-05, |
|
"loss": 2.5324, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.0934422423634744e-05, |
|
"loss": 2.4962, |
|
"step": 1608 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 6.085053354314302e-05, |
|
"loss": 2.4868, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.076661259730305e-05, |
|
"loss": 2.506, |
|
"step": 1612 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.068265983411685e-05, |
|
"loss": 2.3774, |
|
"step": 1614 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.05986755016805e-05, |
|
"loss": 2.4287, |
|
"step": 1616 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.051465984818332e-05, |
|
"loss": 2.3267, |
|
"step": 1618 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 6.043061312190723e-05, |
|
"loss": 2.4453, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.034653557122598e-05, |
|
"loss": 2.4022, |
|
"step": 1622 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.0262427444604384e-05, |
|
"loss": 2.5554, |
|
"step": 1624 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.017828899059763e-05, |
|
"loss": 2.4102, |
|
"step": 1626 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.009412045785051e-05, |
|
"loss": 2.3266, |
|
"step": 1628 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.000992209509676e-05, |
|
"loss": 2.2382, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.9925694151158184e-05, |
|
"loss": 2.2763, |
|
"step": 1632 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.984143687494409e-05, |
|
"loss": 2.481, |
|
"step": 1634 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.975715051545039e-05, |
|
"loss": 2.5598, |
|
"step": 1636 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 5.9672835321759016e-05, |
|
"loss": 2.3289, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.958849154303704e-05, |
|
"loss": 2.4317, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.9504119428536076e-05, |
|
"loss": 2.448, |
|
"step": 1642 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.9419719227591405e-05, |
|
"loss": 2.2034, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5.933529118962138e-05, |
|
"loss": 2.4841, |
|
"step": 1646 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.925083556412657e-05, |
|
"loss": 2.5998, |
|
"step": 1648 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.916635260068909e-05, |
|
"loss": 2.5288, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.908184254897182e-05, |
|
"loss": 2.5148, |
|
"step": 1652 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.899730565871774e-05, |
|
"loss": 2.5166, |
|
"step": 1654 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.891274217974907e-05, |
|
"loss": 2.4235, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.8828152361966685e-05, |
|
"loss": 2.5575, |
|
"step": 1658 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.874353645534922e-05, |
|
"loss": 2.4232, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.865889470995248e-05, |
|
"loss": 2.2509, |
|
"step": 1662 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.857422737590857e-05, |
|
"loss": 2.2636, |
|
"step": 1664 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.8489534703425256e-05, |
|
"loss": 2.4923, |
|
"step": 1666 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.8404816942785134e-05, |
|
"loss": 2.3899, |
|
"step": 1668 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.8320074344345e-05, |
|
"loss": 2.4698, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.8235307158535e-05, |
|
"loss": 2.65, |
|
"step": 1672 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5.8150515635858e-05, |
|
"loss": 2.4687, |
|
"step": 1674 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.806570002688869e-05, |
|
"loss": 2.4793, |
|
"step": 1676 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.798086058227304e-05, |
|
"loss": 2.2238, |
|
"step": 1678 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.78959975527274e-05, |
|
"loss": 2.4365, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.781111118903785e-05, |
|
"loss": 2.4891, |
|
"step": 1682 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.772620174205938e-05, |
|
"loss": 2.3248, |
|
"step": 1684 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5.764126946271526e-05, |
|
"loss": 2.6325, |
|
"step": 1686 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5.755631460199616e-05, |
|
"loss": 2.4281, |
|
"step": 1688 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5.747133741095956e-05, |
|
"loss": 2.4829, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5.738633814072888e-05, |
|
"loss": 2.3321, |
|
"step": 1692 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 5.730131704249278e-05, |
|
"loss": 2.5413, |
|
"step": 1694 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5.721627436750449e-05, |
|
"loss": 2.0978, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5.713121036708091e-05, |
|
"loss": 2.5072, |
|
"step": 1698 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5.704612529260205e-05, |
|
"loss": 2.4096, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 5.6961019395510126e-05, |
|
"loss": 2.3686, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5.6875892927308936e-05, |
|
"loss": 2.475, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5.679074613956307e-05, |
|
"loss": 2.375, |
|
"step": 1706 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5.6705579283897116e-05, |
|
"loss": 2.5238, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5.662039261199502e-05, |
|
"loss": 2.5842, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5.6535186375599266e-05, |
|
"loss": 2.5468, |
|
"step": 1712 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.644996082651017e-05, |
|
"loss": 2.4626, |
|
"step": 1714 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.636471621658508e-05, |
|
"loss": 2.5552, |
|
"step": 1716 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.627945279773774e-05, |
|
"loss": 2.2431, |
|
"step": 1718 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 5.61941708219374e-05, |
|
"loss": 2.36, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.6108870541208224e-05, |
|
"loss": 2.3865, |
|
"step": 1722 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.602355220762838e-05, |
|
"loss": 2.5472, |
|
"step": 1724 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.593821607332952e-05, |
|
"loss": 2.3935, |
|
"step": 1726 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.585286239049574e-05, |
|
"loss": 2.5526, |
|
"step": 1728 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 5.576749141136313e-05, |
|
"loss": 2.5119, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.568210338821881e-05, |
|
"loss": 2.3868, |
|
"step": 1732 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.5596698573400306e-05, |
|
"loss": 2.4324, |
|
"step": 1734 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.5511277219294765e-05, |
|
"loss": 2.4088, |
|
"step": 1736 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.54258395783382e-05, |
|
"loss": 2.447, |
|
"step": 1738 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.534038590301476e-05, |
|
"loss": 2.4857, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.5254916445855974e-05, |
|
"loss": 2.3698, |
|
"step": 1742 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.5169431459440014e-05, |
|
"loss": 2.5048, |
|
"step": 1744 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.508393119639094e-05, |
|
"loss": 2.5057, |
|
"step": 1746 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5.499841590937795e-05, |
|
"loss": 2.4211, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.491288585111467e-05, |
|
"loss": 2.7328, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.4827341274358344e-05, |
|
"loss": 2.5598, |
|
"step": 1752 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.4741782431909136e-05, |
|
"loss": 2.2472, |
|
"step": 1754 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.465620957660938e-05, |
|
"loss": 2.4122, |
|
"step": 1756 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 5.457062296134279e-05, |
|
"loss": 2.4685, |
|
"step": 1758 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.448502283903377e-05, |
|
"loss": 2.5201, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.439940946264662e-05, |
|
"loss": 2.5483, |
|
"step": 1762 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.4313783085184825e-05, |
|
"loss": 2.4956, |
|
"step": 1764 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.422814395969029e-05, |
|
"loss": 2.5378, |
|
"step": 1766 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 5.414249233924258e-05, |
|
"loss": 2.218, |
|
"step": 1768 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5.40568284769582e-05, |
|
"loss": 2.5364, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5.39711526259898e-05, |
|
"loss": 2.4421, |
|
"step": 1772 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5.388546503952551e-05, |
|
"loss": 2.3293, |
|
"step": 1774 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 5.379976597078808e-05, |
|
"loss": 2.4037, |
|
"step": 1776 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.371405567303428e-05, |
|
"loss": 2.4786, |
|
"step": 1778 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.362833439955396e-05, |
|
"loss": 2.2926, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.354260240366947e-05, |
|
"loss": 2.5112, |
|
"step": 1782 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.3456859938734836e-05, |
|
"loss": 2.2066, |
|
"step": 1784 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.337110725813501e-05, |
|
"loss": 2.4357, |
|
"step": 1786 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.328534461528515e-05, |
|
"loss": 2.502, |
|
"step": 1788 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.3199572263629824e-05, |
|
"loss": 2.2781, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.3113790456642345e-05, |
|
"loss": 2.3274, |
|
"step": 1792 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5.3027999447823905e-05, |
|
"loss": 2.4531, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.2942199490702924e-05, |
|
"loss": 2.4264, |
|
"step": 1796 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.285639083883428e-05, |
|
"loss": 2.3976, |
|
"step": 1798 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.27705737457985e-05, |
|
"loss": 2.3159, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.268474846520112e-05, |
|
"loss": 2.3113, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 5.259891525067179e-05, |
|
"loss": 2.3999, |
|
"step": 1804 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.251307435586368e-05, |
|
"loss": 2.4202, |
|
"step": 1806 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.2427226034452614e-05, |
|
"loss": 2.4635, |
|
"step": 1808 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.23413705401364e-05, |
|
"loss": 2.588, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.225550812663399e-05, |
|
"loss": 2.3913, |
|
"step": 1812 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5.216963904768485e-05, |
|
"loss": 2.3559, |
|
"step": 1814 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.2083763557048056e-05, |
|
"loss": 2.3511, |
|
"step": 1816 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.1997881908501736e-05, |
|
"loss": 2.0888, |
|
"step": 1818 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.191199435584211e-05, |
|
"loss": 2.3658, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.182610115288295e-05, |
|
"loss": 2.4578, |
|
"step": 1822 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.174020255345464e-05, |
|
"loss": 2.5353, |
|
"step": 1824 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.1654298811403556e-05, |
|
"loss": 2.3506, |
|
"step": 1826 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.1568390180591265e-05, |
|
"loss": 2.43, |
|
"step": 1828 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.148247691489377e-05, |
|
"loss": 2.5092, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 5.139655926820078e-05, |
|
"loss": 2.4586, |
|
"step": 1832 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.131063749441496e-05, |
|
"loss": 2.3623, |
|
"step": 1834 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.1224711847451145e-05, |
|
"loss": 2.5055, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.113878258123563e-05, |
|
"loss": 2.3857, |
|
"step": 1838 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 5.105284994970543e-05, |
|
"loss": 2.6249, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.096691420680745e-05, |
|
"loss": 2.408, |
|
"step": 1842 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.088097560649784e-05, |
|
"loss": 2.5748, |
|
"step": 1844 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.0795034402741185e-05, |
|
"loss": 2.2292, |
|
"step": 1846 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.06661182712092e-05, |
|
"loss": 3.0448, |
|
"step": 1848 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.0580171669978546e-05, |
|
"loss": 2.4891, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.049422335423252e-05, |
|
"loss": 2.5769, |
|
"step": 1852 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.04082735779644e-05, |
|
"loss": 2.3243, |
|
"step": 1854 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.032232259517179e-05, |
|
"loss": 2.5441, |
|
"step": 1856 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 5.023637065985585e-05, |
|
"loss": 2.4429, |
|
"step": 1858 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.015041802602057e-05, |
|
"loss": 2.3943, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 5.0064464947672e-05, |
|
"loss": 2.3169, |
|
"step": 1862 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.9978511678817496e-05, |
|
"loss": 2.4604, |
|
"step": 1864 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.989255847346499e-05, |
|
"loss": 2.3745, |
|
"step": 1866 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.980660558562222e-05, |
|
"loss": 2.3082, |
|
"step": 1868 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.972065326929598e-05, |
|
"loss": 2.4983, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.963470177849135e-05, |
|
"loss": 2.3494, |
|
"step": 1872 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.954875136721104e-05, |
|
"loss": 2.2882, |
|
"step": 1874 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.946280228945453e-05, |
|
"loss": 2.401, |
|
"step": 1876 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.9376854799217327e-05, |
|
"loss": 2.3044, |
|
"step": 1878 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.929090915049029e-05, |
|
"loss": 2.51, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.920496559725883e-05, |
|
"loss": 2.5332, |
|
"step": 1882 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.911902439350217e-05, |
|
"loss": 2.449, |
|
"step": 1884 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.9033085793192574e-05, |
|
"loss": 2.4766, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.894715005029459e-05, |
|
"loss": 2.5255, |
|
"step": 1888 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.8861217418764374e-05, |
|
"loss": 2.4169, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.8775288152548866e-05, |
|
"loss": 2.4542, |
|
"step": 1892 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.868936250558506e-05, |
|
"loss": 2.3703, |
|
"step": 1894 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 4.8603440731799216e-05, |
|
"loss": 2.4712, |
|
"step": 1896 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.851752308510624e-05, |
|
"loss": 2.3871, |
|
"step": 1898 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.843160981940875e-05, |
|
"loss": 2.3593, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.8345701188596456e-05, |
|
"loss": 2.3834, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.825979744654536e-05, |
|
"loss": 2.5184, |
|
"step": 1904 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 4.817389884711705e-05, |
|
"loss": 2.3947, |
|
"step": 1906 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.8088005644157895e-05, |
|
"loss": 2.3947, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.800211809149829e-05, |
|
"loss": 2.332, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.791623644295195e-05, |
|
"loss": 2.4736, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.7830360952315164e-05, |
|
"loss": 2.3257, |
|
"step": 1914 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.774449187336602e-05, |
|
"loss": 2.4029, |
|
"step": 1916 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.765862945986362e-05, |
|
"loss": 2.2457, |
|
"step": 1918 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.7572773965547384e-05, |
|
"loss": 2.4841, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.7486925644136324e-05, |
|
"loss": 2.2552, |
|
"step": 1922 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.740108474932822e-05, |
|
"loss": 2.2952, |
|
"step": 1924 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.731525153479891e-05, |
|
"loss": 2.6216, |
|
"step": 1926 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.72294262542015e-05, |
|
"loss": 2.6685, |
|
"step": 1928 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.7143609161165736e-05, |
|
"loss": 2.5377, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 4.705780050929708e-05, |
|
"loss": 2.3924, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.697200055217612e-05, |
|
"loss": 2.3375, |
|
"step": 1934 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.688620954335766e-05, |
|
"loss": 2.5853, |
|
"step": 1936 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.680042773637018e-05, |
|
"loss": 2.4731, |
|
"step": 1938 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.671465538471486e-05, |
|
"loss": 2.3143, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.6628892741865e-05, |
|
"loss": 2.3821, |
|
"step": 1942 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.654314006126516e-05, |
|
"loss": 2.5513, |
|
"step": 1944 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.645739759633054e-05, |
|
"loss": 2.3581, |
|
"step": 1946 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.637166560044605e-05, |
|
"loss": 2.3677, |
|
"step": 1948 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.628594432696573e-05, |
|
"loss": 2.2615, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 4.620023402921191e-05, |
|
"loss": 2.4965, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.61145349604745e-05, |
|
"loss": 2.3364, |
|
"step": 1954 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.602884737401022e-05, |
|
"loss": 2.1789, |
|
"step": 1956 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.594317152304183e-05, |
|
"loss": 2.229, |
|
"step": 1958 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.5857507660757424e-05, |
|
"loss": 2.7305, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.5771856040309716e-05, |
|
"loss": 2.4481, |
|
"step": 1962 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.568621691481519e-05, |
|
"loss": 2.5205, |
|
"step": 1964 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.5600590537353397e-05, |
|
"loss": 2.4775, |
|
"step": 1966 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.551497716096624e-05, |
|
"loss": 2.5465, |
|
"step": 1968 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.5429377038657214e-05, |
|
"loss": 2.4793, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.534379042339063e-05, |
|
"loss": 2.5299, |
|
"step": 1972 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.5258217568090876e-05, |
|
"loss": 2.4299, |
|
"step": 1974 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.517265872564167e-05, |
|
"loss": 2.4201, |
|
"step": 1976 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.508711414888534e-05, |
|
"loss": 2.3312, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.5001584090622065e-05, |
|
"loss": 2.3622, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.491606880360909e-05, |
|
"loss": 2.3231, |
|
"step": 1982 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.483056854055999e-05, |
|
"loss": 2.3689, |
|
"step": 1984 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.474508355414404e-05, |
|
"loss": 2.4958, |
|
"step": 1986 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 4.465961409698525e-05, |
|
"loss": 2.5048, |
|
"step": 1988 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.457416042166181e-05, |
|
"loss": 2.4204, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.448872278070523e-05, |
|
"loss": 2.5445, |
|
"step": 1992 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.4403301426599706e-05, |
|
"loss": 2.1568, |
|
"step": 1994 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.431789661178121e-05, |
|
"loss": 2.2937, |
|
"step": 1996 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 4.423250858863689e-05, |
|
"loss": 2.336, |
|
"step": 1998 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.4147137609504266e-05, |
|
"loss": 2.6334, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.4061783926670496e-05, |
|
"loss": 2.2973, |
|
"step": 2002 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.3976447792371624e-05, |
|
"loss": 2.5251, |
|
"step": 2004 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 4.38911294587918e-05, |
|
"loss": 2.4591, |
|
"step": 2006 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.38058291780626e-05, |
|
"loss": 2.4621, |
|
"step": 2008 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.372054720226227e-05, |
|
"loss": 2.3918, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.3635283783414924e-05, |
|
"loss": 2.357, |
|
"step": 2012 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.3550039173489845e-05, |
|
"loss": 2.3846, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.346481362440074e-05, |
|
"loss": 2.421, |
|
"step": 2016 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.337960738800498e-05, |
|
"loss": 2.523, |
|
"step": 2018 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.3294420716102895e-05, |
|
"loss": 2.3806, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.320925386043696e-05, |
|
"loss": 2.4476, |
|
"step": 2022 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.3124107072691055e-05, |
|
"loss": 2.4976, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.3038980604489885e-05, |
|
"loss": 2.6172, |
|
"step": 2026 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.2953874707397964e-05, |
|
"loss": 2.4828, |
|
"step": 2028 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.28687896329191e-05, |
|
"loss": 2.3404, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.278372563249552e-05, |
|
"loss": 2.4298, |
|
"step": 2032 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.269868295750722e-05, |
|
"loss": 2.5339, |
|
"step": 2034 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.261366185927114e-05, |
|
"loss": 2.2219, |
|
"step": 2036 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.252866258904045e-05, |
|
"loss": 2.3277, |
|
"step": 2038 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.2443685398003835e-05, |
|
"loss": 2.3991, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.235873053728475e-05, |
|
"loss": 2.3344, |
|
"step": 2042 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.227379825794063e-05, |
|
"loss": 2.3301, |
|
"step": 2044 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.218888881096217e-05, |
|
"loss": 2.4981, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.21040024472726e-05, |
|
"loss": 2.4976, |
|
"step": 2048 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.201913941772696e-05, |
|
"loss": 2.6412, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 4.193429997311132e-05, |
|
"loss": 2.3847, |
|
"step": 2052 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.184948436414203e-05, |
|
"loss": 2.2447, |
|
"step": 2054 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.1764692841464995e-05, |
|
"loss": 2.5925, |
|
"step": 2056 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.1679925655655e-05, |
|
"loss": 2.3348, |
|
"step": 2058 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.159518305721487e-05, |
|
"loss": 2.5109, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.151046529657477e-05, |
|
"loss": 2.5121, |
|
"step": 2062 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.142577262409144e-05, |
|
"loss": 2.4563, |
|
"step": 2064 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.134110529004753e-05, |
|
"loss": 2.4912, |
|
"step": 2066 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.1256463544650783e-05, |
|
"loss": 2.4457, |
|
"step": 2068 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.117184763803334e-05, |
|
"loss": 2.4428, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 4.108725782025092e-05, |
|
"loss": 2.3274, |
|
"step": 2072 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.1002694341282276e-05, |
|
"loss": 2.4062, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.0918157451028185e-05, |
|
"loss": 2.6018, |
|
"step": 2076 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.083364739931092e-05, |
|
"loss": 2.4493, |
|
"step": 2078 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.0749164435873425e-05, |
|
"loss": 2.5798, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.0664708810378625e-05, |
|
"loss": 2.2729, |
|
"step": 2082 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.05802807724086e-05, |
|
"loss": 2.3844, |
|
"step": 2084 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.049588057146394e-05, |
|
"loss": 2.402, |
|
"step": 2086 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.041150845696296e-05, |
|
"loss": 2.4163, |
|
"step": 2088 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 4.032716467824099e-05, |
|
"loss": 2.428, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.0242849484549623e-05, |
|
"loss": 2.4803, |
|
"step": 2092 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.015856312505593e-05, |
|
"loss": 2.2398, |
|
"step": 2094 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.0074305848841814e-05, |
|
"loss": 2.2521, |
|
"step": 2096 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.9990077904903254e-05, |
|
"loss": 2.3918, |
|
"step": 2098 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.99058795421495e-05, |
|
"loss": 2.519, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.982171100940239e-05, |
|
"loss": 2.4067, |
|
"step": 2102 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.973757255539562e-05, |
|
"loss": 2.4408, |
|
"step": 2104 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.965346442877403e-05, |
|
"loss": 2.4309, |
|
"step": 2106 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 3.9569386878092774e-05, |
|
"loss": 2.2934, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.94853401518167e-05, |
|
"loss": 2.356, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.94013244983195e-05, |
|
"loss": 2.3314, |
|
"step": 2112 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.9317340165883156e-05, |
|
"loss": 2.4871, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.923338740269696e-05, |
|
"loss": 2.4932, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.9149466456857e-05, |
|
"loss": 2.2894, |
|
"step": 2118 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.906557757636526e-05, |
|
"loss": 2.4726, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.898172100912908e-05, |
|
"loss": 2.3281, |
|
"step": 2122 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.8897897002960195e-05, |
|
"loss": 2.6179, |
|
"step": 2124 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.8814105805574166e-05, |
|
"loss": 2.3778, |
|
"step": 2126 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.873034766458959e-05, |
|
"loss": 2.4198, |
|
"step": 2128 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.86466228275274e-05, |
|
"loss": 2.318, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.856293154181009e-05, |
|
"loss": 2.3894, |
|
"step": 2132 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.847927405476097e-05, |
|
"loss": 2.388, |
|
"step": 2134 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 3.839565061360352e-05, |
|
"loss": 2.4026, |
|
"step": 2136 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.831206146546059e-05, |
|
"loss": 2.3764, |
|
"step": 2138 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.82285068573537e-05, |
|
"loss": 2.2921, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.814498703620226e-05, |
|
"loss": 2.5344, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.80615022488229e-05, |
|
"loss": 2.3933, |
|
"step": 2144 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.797805274192875e-05, |
|
"loss": 2.5373, |
|
"step": 2146 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.789463876212863e-05, |
|
"loss": 2.2517, |
|
"step": 2148 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.781126055592641e-05, |
|
"loss": 2.5876, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.772791836972019e-05, |
|
"loss": 2.5212, |
|
"step": 2152 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.764461244980169e-05, |
|
"loss": 2.4556, |
|
"step": 2154 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.7561343042355415e-05, |
|
"loss": 2.4632, |
|
"step": 2156 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.747811039345798e-05, |
|
"loss": 2.333, |
|
"step": 2158 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.739491474907735e-05, |
|
"loss": 2.3092, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.731175635507219e-05, |
|
"loss": 2.4144, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.722863545719103e-05, |
|
"loss": 2.51, |
|
"step": 2164 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.7145552301071594e-05, |
|
"loss": 2.449, |
|
"step": 2166 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.706250713224011e-05, |
|
"loss": 2.4989, |
|
"step": 2168 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.697950019611054e-05, |
|
"loss": 2.4551, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.689653173798381e-05, |
|
"loss": 2.4758, |
|
"step": 2172 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.681360200304718e-05, |
|
"loss": 2.5718, |
|
"step": 2174 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.673071123637347e-05, |
|
"loss": 2.4931, |
|
"step": 2176 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.664785968292036e-05, |
|
"loss": 2.5029, |
|
"step": 2178 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.656504758752961e-05, |
|
"loss": 2.4353, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.64822751949264e-05, |
|
"loss": 2.2827, |
|
"step": 2182 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.639954274971854e-05, |
|
"loss": 2.4667, |
|
"step": 2184 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.631685049639586e-05, |
|
"loss": 2.3599, |
|
"step": 2186 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.623419867932937e-05, |
|
"loss": 2.3421, |
|
"step": 2188 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.6151587542770567e-05, |
|
"loss": 2.4358, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.6069017330850754e-05, |
|
"loss": 2.4472, |
|
"step": 2192 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.598648828758031e-05, |
|
"loss": 2.4576, |
|
"step": 2194 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.590400065684792e-05, |
|
"loss": 2.5057, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.582155468241993e-05, |
|
"loss": 2.4759, |
|
"step": 2198 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.573915060793949e-05, |
|
"loss": 2.1946, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.5656788676926066e-05, |
|
"loss": 2.562, |
|
"step": 2202 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.557446913277448e-05, |
|
"loss": 2.7023, |
|
"step": 2204 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.5492192218754326e-05, |
|
"loss": 2.5342, |
|
"step": 2206 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.540995817800917e-05, |
|
"loss": 2.4178, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 3.532776725355601e-05, |
|
"loss": 2.4072, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.5245619688284274e-05, |
|
"loss": 2.3832, |
|
"step": 2212 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.516351572495535e-05, |
|
"loss": 2.5958, |
|
"step": 2214 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.508145560620173e-05, |
|
"loss": 2.3778, |
|
"step": 2216 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.499943957452637e-05, |
|
"loss": 2.3753, |
|
"step": 2218 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.4917467872301934e-05, |
|
"loss": 2.4079, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.4835540741770054e-05, |
|
"loss": 2.4669, |
|
"step": 2222 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.4753658425040656e-05, |
|
"loss": 2.4278, |
|
"step": 2224 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.467182116409127e-05, |
|
"loss": 2.4575, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.459002920076625e-05, |
|
"loss": 2.5202, |
|
"step": 2228 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.450828277677606e-05, |
|
"loss": 2.4366, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.442658213369662e-05, |
|
"loss": 2.4061, |
|
"step": 2232 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.434492751296856e-05, |
|
"loss": 2.393, |
|
"step": 2234 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.426331915589651e-05, |
|
"loss": 2.3859, |
|
"step": 2236 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 3.418175730364836e-05, |
|
"loss": 2.516, |
|
"step": 2238 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.4100242197254564e-05, |
|
"loss": 2.2877, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.401877407760747e-05, |
|
"loss": 2.3637, |
|
"step": 2242 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.393735318546054e-05, |
|
"loss": 2.4131, |
|
"step": 2244 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.38559797614277e-05, |
|
"loss": 2.4939, |
|
"step": 2246 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.377465404598253e-05, |
|
"loss": 2.4648, |
|
"step": 2248 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.369337627945774e-05, |
|
"loss": 2.504, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.3612146702044226e-05, |
|
"loss": 2.5889, |
|
"step": 2252 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.3530965553790526e-05, |
|
"loss": 2.5468, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.3449833074602064e-05, |
|
"loss": 2.3507, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.336874950424046e-05, |
|
"loss": 2.418, |
|
"step": 2258 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.328771508232273e-05, |
|
"loss": 2.424, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.320673004832071e-05, |
|
"loss": 2.4613, |
|
"step": 2262 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.312579464156025e-05, |
|
"loss": 2.5197, |
|
"step": 2264 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.304490910122058e-05, |
|
"loss": 2.3393, |
|
"step": 2266 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.2964073666333536e-05, |
|
"loss": 2.2088, |
|
"step": 2268 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.2883288575782875e-05, |
|
"loss": 2.4951, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.2802554068303596e-05, |
|
"loss": 2.3728, |
|
"step": 2272 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.272187038248121e-05, |
|
"loss": 2.2495, |
|
"step": 2274 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.264123775675106e-05, |
|
"loss": 2.3507, |
|
"step": 2276 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.256065642939756e-05, |
|
"loss": 2.3987, |
|
"step": 2278 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.248012663855353e-05, |
|
"loss": 2.4537, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.239964862219954e-05, |
|
"loss": 2.4883, |
|
"step": 2282 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.231922261816311e-05, |
|
"loss": 2.1862, |
|
"step": 2284 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.223884886411807e-05, |
|
"loss": 2.4353, |
|
"step": 2286 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.215852759758381e-05, |
|
"loss": 2.2626, |
|
"step": 2288 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.2078259055924675e-05, |
|
"loss": 2.3032, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.199804347634915e-05, |
|
"loss": 2.3917, |
|
"step": 2292 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.191788109590922e-05, |
|
"loss": 2.4218, |
|
"step": 2294 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.183777215149962e-05, |
|
"loss": 2.3915, |
|
"step": 2296 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.175771687985726e-05, |
|
"loss": 2.5188, |
|
"step": 2298 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.167771551756036e-05, |
|
"loss": 2.4356, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.159776830102784e-05, |
|
"loss": 2.3713, |
|
"step": 2302 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.1517875466518626e-05, |
|
"loss": 2.488, |
|
"step": 2304 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.1438037250130944e-05, |
|
"loss": 2.5058, |
|
"step": 2306 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.135825388780159e-05, |
|
"loss": 2.5503, |
|
"step": 2308 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.127852561530526e-05, |
|
"loss": 2.4376, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 3.1198852668253856e-05, |
|
"loss": 2.4054, |
|
"step": 2312 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.111923528209577e-05, |
|
"loss": 2.3189, |
|
"step": 2314 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.103967369211525e-05, |
|
"loss": 2.3736, |
|
"step": 2316 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.096016813343158e-05, |
|
"loss": 2.4304, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.08807188409985e-05, |
|
"loss": 2.3717, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.080132604960349e-05, |
|
"loss": 2.3996, |
|
"step": 2322 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.072198999386704e-05, |
|
"loss": 2.5024, |
|
"step": 2324 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.064271090824197e-05, |
|
"loss": 2.3109, |
|
"step": 2326 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.056348902701274e-05, |
|
"loss": 2.376, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.0484324584294783e-05, |
|
"loss": 2.5198, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.040521781403377e-05, |
|
"loss": 2.2787, |
|
"step": 2332 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.0326168950004964e-05, |
|
"loss": 2.3356, |
|
"step": 2334 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.0247178225812435e-05, |
|
"loss": 2.4738, |
|
"step": 2336 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.0168245874888557e-05, |
|
"loss": 2.41, |
|
"step": 2338 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 3.00893721304931e-05, |
|
"loss": 2.4233, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.0010557225712667e-05, |
|
"loss": 2.4596, |
|
"step": 2342 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.993180139345999e-05, |
|
"loss": 2.5253, |
|
"step": 2344 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.9853104866473246e-05, |
|
"loss": 2.3779, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.977446787731532e-05, |
|
"loss": 2.397, |
|
"step": 2348 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.9695890658373164e-05, |
|
"loss": 2.4016, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.96173734418571e-05, |
|
"loss": 2.3168, |
|
"step": 2352 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.9538916459800136e-05, |
|
"loss": 2.2834, |
|
"step": 2354 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.9460519944057284e-05, |
|
"loss": 2.4692, |
|
"step": 2356 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.9382184126304834e-05, |
|
"loss": 2.1485, |
|
"step": 2358 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9303909238039718e-05, |
|
"loss": 2.1016, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9225695510578843e-05, |
|
"loss": 2.1819, |
|
"step": 2362 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9147543175058335e-05, |
|
"loss": 2.4526, |
|
"step": 2364 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.9069452462432883e-05, |
|
"loss": 2.5037, |
|
"step": 2366 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.899142360347511e-05, |
|
"loss": 2.223, |
|
"step": 2368 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8913456828774854e-05, |
|
"loss": 2.4296, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.883555236873845e-05, |
|
"loss": 2.3899, |
|
"step": 2372 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.875771045358805e-05, |
|
"loss": 2.4758, |
|
"step": 2374 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.8679931313361053e-05, |
|
"loss": 2.4788, |
|
"step": 2376 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.860221517790933e-05, |
|
"loss": 2.4869, |
|
"step": 2378 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.8524562276898513e-05, |
|
"loss": 2.4313, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.8446972839807384e-05, |
|
"loss": 2.2432, |
|
"step": 2382 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.8369447095927195e-05, |
|
"loss": 2.2748, |
|
"step": 2384 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.8291985274360983e-05, |
|
"loss": 2.6419, |
|
"step": 2386 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.8214587604022847e-05, |
|
"loss": 2.4079, |
|
"step": 2388 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.8137254313637306e-05, |
|
"loss": 2.3603, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.805998563173866e-05, |
|
"loss": 2.2753, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.798278178667028e-05, |
|
"loss": 2.3884, |
|
"step": 2394 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.790564300658387e-05, |
|
"loss": 2.5817, |
|
"step": 2396 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.7828569519438942e-05, |
|
"loss": 2.4844, |
|
"step": 2398 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.775156155300197e-05, |
|
"loss": 2.4505, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.7674619334845876e-05, |
|
"loss": 2.443, |
|
"step": 2402 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.7597743092349217e-05, |
|
"loss": 2.3359, |
|
"step": 2404 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.752093305269565e-05, |
|
"loss": 2.4345, |
|
"step": 2406 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.7444189442873115e-05, |
|
"loss": 2.2828, |
|
"step": 2408 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.7367512489673312e-05, |
|
"loss": 2.5291, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.7290902419690895e-05, |
|
"loss": 2.391, |
|
"step": 2412 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.7214359459322924e-05, |
|
"loss": 2.06, |
|
"step": 2414 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.7137883834768073e-05, |
|
"loss": 2.6396, |
|
"step": 2416 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.7061475772026086e-05, |
|
"loss": 2.4143, |
|
"step": 2418 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.698513549689703e-05, |
|
"loss": 2.4331, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.6908863234980636e-05, |
|
"loss": 2.4801, |
|
"step": 2422 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.6832659211675627e-05, |
|
"loss": 2.3912, |
|
"step": 2424 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.67565236521791e-05, |
|
"loss": 2.4359, |
|
"step": 2426 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.668045678148584e-05, |
|
"loss": 2.5352, |
|
"step": 2428 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.6604458824387614e-05, |
|
"loss": 2.4982, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.6528530005472518e-05, |
|
"loss": 2.4898, |
|
"step": 2432 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.6452670549124375e-05, |
|
"loss": 2.5291, |
|
"step": 2434 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.637688067952204e-05, |
|
"loss": 2.3178, |
|
"step": 2436 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.630116062063867e-05, |
|
"loss": 2.5588, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.622551059624113e-05, |
|
"loss": 2.3862, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.614993082988937e-05, |
|
"loss": 2.3327, |
|
"step": 2442 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.607442154493568e-05, |
|
"loss": 2.3323, |
|
"step": 2444 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.599898296452406e-05, |
|
"loss": 2.237, |
|
"step": 2446 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.592361531158952e-05, |
|
"loss": 2.1117, |
|
"step": 2448 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.5848318808857606e-05, |
|
"loss": 2.3355, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.5773093678843473e-05, |
|
"loss": 2.3701, |
|
"step": 2452 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.5697940143851375e-05, |
|
"loss": 2.4158, |
|
"step": 2454 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.5622858425974018e-05, |
|
"loss": 2.4807, |
|
"step": 2456 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.5547848747091897e-05, |
|
"loss": 2.2695, |
|
"step": 2458 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.5472911328872574e-05, |
|
"loss": 2.4957, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.5398046392770054e-05, |
|
"loss": 2.3591, |
|
"step": 2462 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.532325416002419e-05, |
|
"loss": 2.6757, |
|
"step": 2464 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.524853485166e-05, |
|
"loss": 2.3271, |
|
"step": 2466 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.517388868848692e-05, |
|
"loss": 2.3084, |
|
"step": 2468 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.5099315891098264e-05, |
|
"loss": 2.3597, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.5024816679870556e-05, |
|
"loss": 2.5597, |
|
"step": 2472 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.495039127496287e-05, |
|
"loss": 2.4667, |
|
"step": 2474 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.4876039896316123e-05, |
|
"loss": 2.2991, |
|
"step": 2476 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.4801762763652474e-05, |
|
"loss": 2.27, |
|
"step": 2478 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.4727560096474706e-05, |
|
"loss": 2.5184, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.4653432114065544e-05, |
|
"loss": 2.5034, |
|
"step": 2482 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.457937903548695e-05, |
|
"loss": 2.3994, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2.450540107957961e-05, |
|
"loss": 2.229, |
|
"step": 2486 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.443149846496212e-05, |
|
"loss": 2.4133, |
|
"step": 2488 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.4357671410030526e-05, |
|
"loss": 2.5226, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.4283920132957482e-05, |
|
"loss": 2.3836, |
|
"step": 2492 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.42102448516918e-05, |
|
"loss": 2.4287, |
|
"step": 2494 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.413664578395761e-05, |
|
"loss": 2.4322, |
|
"step": 2496 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.4063123147253923e-05, |
|
"loss": 2.3545, |
|
"step": 2498 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.398967715885379e-05, |
|
"loss": 2.3359, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.391630803580382e-05, |
|
"loss": 2.4889, |
|
"step": 2502 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.3843015994923412e-05, |
|
"loss": 2.3731, |
|
"step": 2504 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.3769801252804213e-05, |
|
"loss": 2.2901, |
|
"step": 2506 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.3696664025809458e-05, |
|
"loss": 2.3341, |
|
"step": 2508 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.3623604530073245e-05, |
|
"loss": 2.3624, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.3550622981499988e-05, |
|
"loss": 2.4377, |
|
"step": 2512 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.3477719595763774e-05, |
|
"loss": 2.2931, |
|
"step": 2514 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.340489458830772e-05, |
|
"loss": 2.3726, |
|
"step": 2516 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.3332148174343254e-05, |
|
"loss": 2.2644, |
|
"step": 2518 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.3259480568849586e-05, |
|
"loss": 2.5434, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.3186891986573035e-05, |
|
"loss": 2.2445, |
|
"step": 2522 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2.3114382642026404e-05, |
|
"loss": 2.3012, |
|
"step": 2524 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.3041952749488304e-05, |
|
"loss": 2.298, |
|
"step": 2526 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.2969602523002543e-05, |
|
"loss": 2.3226, |
|
"step": 2528 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.2897332176377528e-05, |
|
"loss": 2.4809, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.2825141923185632e-05, |
|
"loss": 2.3514, |
|
"step": 2532 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.275303197676248e-05, |
|
"loss": 2.4344, |
|
"step": 2534 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.2681002550206355e-05, |
|
"loss": 2.3313, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.2609053856377714e-05, |
|
"loss": 2.1924, |
|
"step": 2538 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.2537186107898313e-05, |
|
"loss": 2.1984, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.2465399517150722e-05, |
|
"loss": 2.4612, |
|
"step": 2542 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2393694296277707e-05, |
|
"loss": 2.3225, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2322070657181583e-05, |
|
"loss": 2.3635, |
|
"step": 2546 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2250528811523513e-05, |
|
"loss": 2.4144, |
|
"step": 2548 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2179068970722978e-05, |
|
"loss": 2.5847, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.2107691345957133e-05, |
|
"loss": 2.3221, |
|
"step": 2552 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.203639614816017e-05, |
|
"loss": 2.4227, |
|
"step": 2554 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.196518358802268e-05, |
|
"loss": 2.4364, |
|
"step": 2556 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.1894053875991017e-05, |
|
"loss": 2.4288, |
|
"step": 2558 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.182300722226675e-05, |
|
"loss": 2.3931, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1752043836806002e-05, |
|
"loss": 2.4772, |
|
"step": 2562 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1681163929318777e-05, |
|
"loss": 2.3936, |
|
"step": 2564 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1610367709268387e-05, |
|
"loss": 2.3759, |
|
"step": 2566 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1539655385870877e-05, |
|
"loss": 2.4427, |
|
"step": 2568 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.1469027168094347e-05, |
|
"loss": 2.387, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1398483264658313e-05, |
|
"loss": 2.2637, |
|
"step": 2572 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.132802388403319e-05, |
|
"loss": 2.3364, |
|
"step": 2574 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.125764923443953e-05, |
|
"loss": 2.3348, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.118735952384757e-05, |
|
"loss": 2.34, |
|
"step": 2578 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.1117154959976482e-05, |
|
"loss": 2.2867, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.104703575029385e-05, |
|
"loss": 2.4191, |
|
"step": 2582 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.097700210201497e-05, |
|
"loss": 2.2275, |
|
"step": 2584 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.090705422210237e-05, |
|
"loss": 2.6198, |
|
"step": 2586 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.0837192317265016e-05, |
|
"loss": 2.364, |
|
"step": 2588 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.0767416593957894e-05, |
|
"loss": 2.2663, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.0697727258381238e-05, |
|
"loss": 2.2649, |
|
"step": 2592 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.0628124516480046e-05, |
|
"loss": 2.5761, |
|
"step": 2594 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.0558608573943354e-05, |
|
"loss": 2.2132, |
|
"step": 2596 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 2.0489179636203766e-05, |
|
"loss": 2.3719, |
|
"step": 2598 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.0419837908436688e-05, |
|
"loss": 2.4978, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.0350583595559865e-05, |
|
"loss": 2.2988, |
|
"step": 2602 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.0281416902232708e-05, |
|
"loss": 2.255, |
|
"step": 2604 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.021233803285567e-05, |
|
"loss": 2.2799, |
|
"step": 2606 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.014334719156966e-05, |
|
"loss": 2.2972, |
|
"step": 2608 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.0074444582255485e-05, |
|
"loss": 2.4158, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.0005630408533215e-05, |
|
"loss": 2.3353, |
|
"step": 2612 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.9936904873761536e-05, |
|
"loss": 2.3829, |
|
"step": 2614 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.9868268181037185e-05, |
|
"loss": 2.1709, |
|
"step": 2616 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9799720533194404e-05, |
|
"loss": 2.549, |
|
"step": 2618 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9731262132804274e-05, |
|
"loss": 2.5804, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.966289318217411e-05, |
|
"loss": 2.5311, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.959461388334686e-05, |
|
"loss": 2.3825, |
|
"step": 2624 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.9526424438100642e-05, |
|
"loss": 2.3505, |
|
"step": 2626 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.9458325047947938e-05, |
|
"loss": 2.3793, |
|
"step": 2628 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.9390315914135125e-05, |
|
"loss": 2.2617, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.9322397237641875e-05, |
|
"loss": 2.5081, |
|
"step": 2632 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.925456921918055e-05, |
|
"loss": 2.578, |
|
"step": 2634 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.918683205919557e-05, |
|
"loss": 2.3566, |
|
"step": 2636 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.9119185957862835e-05, |
|
"loss": 2.5683, |
|
"step": 2638 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.9051631115089196e-05, |
|
"loss": 2.213, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.8984167730511825e-05, |
|
"loss": 2.6764, |
|
"step": 2642 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.8916796003497572e-05, |
|
"loss": 2.422, |
|
"step": 2644 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.8849516133142432e-05, |
|
"loss": 2.1258, |
|
"step": 2646 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.8782328318270964e-05, |
|
"loss": 2.505, |
|
"step": 2648 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.8715232757435704e-05, |
|
"loss": 2.2427, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.864822964891651e-05, |
|
"loss": 2.3599, |
|
"step": 2652 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 1.8581319190720035e-05, |
|
"loss": 2.679, |
|
"step": 2654 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.851450158057918e-05, |
|
"loss": 2.3708, |
|
"step": 2656 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.844777701595244e-05, |
|
"loss": 2.414, |
|
"step": 2658 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.83811456940233e-05, |
|
"loss": 2.524, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.8314607811699762e-05, |
|
"loss": 2.4521, |
|
"step": 2662 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.824816356561364e-05, |
|
"loss": 2.3931, |
|
"step": 2664 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8181813152120092e-05, |
|
"loss": 2.3704, |
|
"step": 2666 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8115556767296914e-05, |
|
"loss": 2.5238, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.804939460694411e-05, |
|
"loss": 2.5418, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7983326866583144e-05, |
|
"loss": 2.6173, |
|
"step": 2672 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7917353741456545e-05, |
|
"loss": 2.4272, |
|
"step": 2674 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7851475426527142e-05, |
|
"loss": 2.4986, |
|
"step": 2676 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7785692116477682e-05, |
|
"loss": 2.4365, |
|
"step": 2678 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.772000400571005e-05, |
|
"loss": 2.5869, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.76544112883449e-05, |
|
"loss": 2.4987, |
|
"step": 2682 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.7588914158220898e-05, |
|
"loss": 2.4701, |
|
"step": 2684 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.7523512808894288e-05, |
|
"loss": 2.5142, |
|
"step": 2686 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.7458207433638223e-05, |
|
"loss": 2.518, |
|
"step": 2688 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.7392998225442263e-05, |
|
"loss": 2.386, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.732788537701179e-05, |
|
"loss": 2.2214, |
|
"step": 2692 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.726286908076738e-05, |
|
"loss": 2.358, |
|
"step": 2694 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.7197949528844286e-05, |
|
"loss": 2.5727, |
|
"step": 2696 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.7133126913091903e-05, |
|
"loss": 2.5317, |
|
"step": 2698 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.706840142507315e-05, |
|
"loss": 2.2929, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.700377325606388e-05, |
|
"loss": 2.4207, |
|
"step": 2702 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.6939242597052373e-05, |
|
"loss": 2.4398, |
|
"step": 2704 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.6874809638738754e-05, |
|
"loss": 2.3671, |
|
"step": 2706 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.681047457153444e-05, |
|
"loss": 2.5831, |
|
"step": 2708 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.6746237585561524e-05, |
|
"loss": 2.414, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.6682098870652236e-05, |
|
"loss": 2.2996, |
|
"step": 2712 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.6618058616348492e-05, |
|
"loss": 2.4037, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.655411701190115e-05, |
|
"loss": 2.592, |
|
"step": 2716 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.6490274246269533e-05, |
|
"loss": 2.2498, |
|
"step": 2718 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.642653050812094e-05, |
|
"loss": 2.2538, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.636288598583e-05, |
|
"loss": 2.3926, |
|
"step": 2722 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.629934086747813e-05, |
|
"loss": 2.5224, |
|
"step": 2724 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 1.6235895340852964e-05, |
|
"loss": 2.3785, |
|
"step": 2726 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.6172549593447877e-05, |
|
"loss": 2.4254, |
|
"step": 2728 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.6109303812461375e-05, |
|
"loss": 2.2977, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.60461581847965e-05, |
|
"loss": 2.4296, |
|
"step": 2732 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.598311289706033e-05, |
|
"loss": 2.1917, |
|
"step": 2734 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 1.592016813556347e-05, |
|
"loss": 2.361, |
|
"step": 2736 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.5857324086319414e-05, |
|
"loss": 2.3198, |
|
"step": 2738 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.579458093504403e-05, |
|
"loss": 2.3945, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.5731938867155e-05, |
|
"loss": 2.2314, |
|
"step": 2742 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.5669398067771324e-05, |
|
"loss": 2.4571, |
|
"step": 2744 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.560695872171273e-05, |
|
"loss": 2.2473, |
|
"step": 2746 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.5544621013499094e-05, |
|
"loss": 2.4553, |
|
"step": 2748 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.548238512734998e-05, |
|
"loss": 2.3213, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.542025124718401e-05, |
|
"loss": 2.3302, |
|
"step": 2752 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.535821955661839e-05, |
|
"loss": 2.2468, |
|
"step": 2754 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.5296290238968303e-05, |
|
"loss": 2.3087, |
|
"step": 2756 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.5234463477246452e-05, |
|
"loss": 2.4679, |
|
"step": 2758 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.5172739454162405e-05, |
|
"loss": 2.3439, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.5111118352122183e-05, |
|
"loss": 2.2882, |
|
"step": 2762 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.5049600353227588e-05, |
|
"loss": 2.4456, |
|
"step": 2764 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.4988185639275798e-05, |
|
"loss": 2.3367, |
|
"step": 2766 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.4926874391758716e-05, |
|
"loss": 2.3341, |
|
"step": 2768 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.4865666791862521e-05, |
|
"loss": 2.7886, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.4804563020467044e-05, |
|
"loss": 2.6835, |
|
"step": 2772 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.4743563258145353e-05, |
|
"loss": 2.3864, |
|
"step": 2774 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.4682667685163071e-05, |
|
"loss": 2.3261, |
|
"step": 2776 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.4621876481477987e-05, |
|
"loss": 2.4467, |
|
"step": 2778 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.4561189826739446e-05, |
|
"loss": 2.3331, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.45006079002878e-05, |
|
"loss": 2.2346, |
|
"step": 2782 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.4440130881153917e-05, |
|
"loss": 2.3942, |
|
"step": 2784 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.437975894805867e-05, |
|
"loss": 2.441, |
|
"step": 2786 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.4319492279412388e-05, |
|
"loss": 2.3998, |
|
"step": 2788 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.425933105331429e-05, |
|
"loss": 2.518, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.419927544755199e-05, |
|
"loss": 2.3147, |
|
"step": 2792 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.4139325639601015e-05, |
|
"loss": 2.2925, |
|
"step": 2794 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.4079481806624217e-05, |
|
"loss": 2.5182, |
|
"step": 2796 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.4019744125471274e-05, |
|
"loss": 2.4969, |
|
"step": 2798 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.3960112772678125e-05, |
|
"loss": 2.4316, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.3900587924466585e-05, |
|
"loss": 2.3239, |
|
"step": 2802 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.3841169756743649e-05, |
|
"loss": 2.6349, |
|
"step": 2804 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.378185844510107e-05, |
|
"loss": 2.3982, |
|
"step": 2806 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.3722654164814796e-05, |
|
"loss": 2.4663, |
|
"step": 2808 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.366355709084456e-05, |
|
"loss": 2.4762, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.3604567397833201e-05, |
|
"loss": 2.4103, |
|
"step": 2812 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.354568526010624e-05, |
|
"loss": 2.4714, |
|
"step": 2814 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.3486910851671374e-05, |
|
"loss": 2.1582, |
|
"step": 2816 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.342824434621795e-05, |
|
"loss": 2.3474, |
|
"step": 2818 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.3369685917116408e-05, |
|
"loss": 2.3022, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.3311235737417793e-05, |
|
"loss": 2.2013, |
|
"step": 2822 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.3252893979853304e-05, |
|
"loss": 2.5426, |
|
"step": 2824 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.319466081683371e-05, |
|
"loss": 2.3739, |
|
"step": 2826 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.3136536420448841e-05, |
|
"loss": 2.3773, |
|
"step": 2828 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.307852096246711e-05, |
|
"loss": 2.4481, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.302061461433502e-05, |
|
"loss": 2.5957, |
|
"step": 2832 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.2962817547176625e-05, |
|
"loss": 2.5113, |
|
"step": 2834 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.2905129931793009e-05, |
|
"loss": 2.3745, |
|
"step": 2836 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.2847551938661839e-05, |
|
"loss": 2.3667, |
|
"step": 2838 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.2790083737936798e-05, |
|
"loss": 2.3051, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.2732725499447146e-05, |
|
"loss": 2.2803, |
|
"step": 2842 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.2675477392697139e-05, |
|
"loss": 2.3317, |
|
"step": 2844 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.2618339586865625e-05, |
|
"loss": 2.5069, |
|
"step": 2846 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.2561312250805435e-05, |
|
"loss": 2.403, |
|
"step": 2848 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.2504395553043008e-05, |
|
"loss": 2.3479, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.2447589661777759e-05, |
|
"loss": 2.2771, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.239089474488171e-05, |
|
"loss": 2.3368, |
|
"step": 2854 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.2334310969898871e-05, |
|
"loss": 2.3194, |
|
"step": 2856 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.227783850404487e-05, |
|
"loss": 2.3038, |
|
"step": 2858 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.2221477514206337e-05, |
|
"loss": 2.5084, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.216522816694053e-05, |
|
"loss": 2.2501, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.2109090628474718e-05, |
|
"loss": 2.2798, |
|
"step": 2864 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.2053065064705805e-05, |
|
"loss": 2.2456, |
|
"step": 2866 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.1997151641199772e-05, |
|
"loss": 2.4106, |
|
"step": 2868 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.1941350523191208e-05, |
|
"loss": 2.4705, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.1885661875582783e-05, |
|
"loss": 2.5891, |
|
"step": 2872 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.183008586294485e-05, |
|
"loss": 2.2367, |
|
"step": 2874 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.1774622649514889e-05, |
|
"loss": 2.5675, |
|
"step": 2876 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.1719272399197023e-05, |
|
"loss": 2.4596, |
|
"step": 2878 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.166403527556153e-05, |
|
"loss": 2.2995, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.1608911441844429e-05, |
|
"loss": 2.2225, |
|
"step": 2882 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.155390106094692e-05, |
|
"loss": 2.2498, |
|
"step": 2884 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.1499004295434918e-05, |
|
"loss": 2.3428, |
|
"step": 2886 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.1444221307538571e-05, |
|
"loss": 2.3654, |
|
"step": 2888 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.1389552259151864e-05, |
|
"loss": 2.3089, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.1334997311832002e-05, |
|
"loss": 2.3778, |
|
"step": 2892 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.1280556626799005e-05, |
|
"loss": 2.3831, |
|
"step": 2894 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.1226230364935226e-05, |
|
"loss": 2.4711, |
|
"step": 2896 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.1172018686784935e-05, |
|
"loss": 2.5057, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.1117921752553723e-05, |
|
"loss": 2.3913, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.106393972210809e-05, |
|
"loss": 2.5023, |
|
"step": 2902 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.1010072754975014e-05, |
|
"loss": 2.3522, |
|
"step": 2904 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.095632101034143e-05, |
|
"loss": 2.6258, |
|
"step": 2906 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.0902684647053735e-05, |
|
"loss": 2.3644, |
|
"step": 2908 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.0849163823617375e-05, |
|
"loss": 2.4708, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 1.0795758698196368e-05, |
|
"loss": 2.2643, |
|
"step": 2912 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.0742469428612816e-05, |
|
"loss": 2.4429, |
|
"step": 2914 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.0689296172346431e-05, |
|
"loss": 2.2625, |
|
"step": 2916 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.0636239086534072e-05, |
|
"loss": 2.3484, |
|
"step": 2918 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.0583298327969338e-05, |
|
"loss": 2.4041, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.0530474053102034e-05, |
|
"loss": 2.1622, |
|
"step": 2922 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.047776641803772e-05, |
|
"loss": 2.417, |
|
"step": 2924 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.0425175578537299e-05, |
|
"loss": 2.3336, |
|
"step": 2926 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.0372701690016474e-05, |
|
"loss": 2.4013, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.0320344907545388e-05, |
|
"loss": 2.2072, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.0268105385848064e-05, |
|
"loss": 2.5827, |
|
"step": 2932 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.0215983279302049e-05, |
|
"loss": 2.5077, |
|
"step": 2934 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.0163978741937847e-05, |
|
"loss": 2.512, |
|
"step": 2936 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.0112091927438583e-05, |
|
"loss": 2.457, |
|
"step": 2938 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.0060322989139442e-05, |
|
"loss": 2.5785, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.0008672080027298e-05, |
|
"loss": 2.4435, |
|
"step": 2942 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 9.957139352740191e-06, |
|
"loss": 2.2668, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 9.90572495956696e-06, |
|
"loss": 2.4811, |
|
"step": 2946 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 9.854429052446684e-06, |
|
"loss": 2.4208, |
|
"step": 2948 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.803251782968358e-06, |
|
"loss": 2.4254, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.752193302370315e-06, |
|
"loss": 2.3277, |
|
"step": 2952 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.701253761539897e-06, |
|
"loss": 2.2717, |
|
"step": 2954 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.650433311012946e-06, |
|
"loss": 2.4127, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 9.599732100973357e-06, |
|
"loss": 2.6305, |
|
"step": 2958 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 2.3212, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 9.498688001329486e-06, |
|
"loss": 2.3556, |
|
"step": 2962 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 9.448345410329379e-06, |
|
"loss": 2.3342, |
|
"step": 2964 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 9.398122657024022e-06, |
|
"loss": 2.454, |
|
"step": 2966 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.348019889831006e-06, |
|
"loss": 2.6068, |
|
"step": 2968 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.298037256813347e-06, |
|
"loss": 2.6167, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.248174905679058e-06, |
|
"loss": 2.3684, |
|
"step": 2972 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.198432983780658e-06, |
|
"loss": 2.5119, |
|
"step": 2974 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.14881163811479e-06, |
|
"loss": 2.3928, |
|
"step": 2976 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 9.099311015321782e-06, |
|
"loss": 2.3656, |
|
"step": 2978 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 9.049931261685207e-06, |
|
"loss": 2.3461, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 9.000672523131431e-06, |
|
"loss": 2.3134, |
|
"step": 2982 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 8.951534945229172e-06, |
|
"loss": 2.3456, |
|
"step": 2984 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 8.902518673189192e-06, |
|
"loss": 2.3656, |
|
"step": 2986 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 8.853623851863663e-06, |
|
"loss": 2.3751, |
|
"step": 2988 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 8.804850625745897e-06, |
|
"loss": 2.5872, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 8.756199138969866e-06, |
|
"loss": 2.2217, |
|
"step": 2992 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 8.707669535309793e-06, |
|
"loss": 2.0714, |
|
"step": 2994 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 8.659261958179688e-06, |
|
"loss": 2.5951, |
|
"step": 2996 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 8.610976550632943e-06, |
|
"loss": 2.3067, |
|
"step": 2998 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 8.562813455361957e-06, |
|
"loss": 2.3471, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 8.514772814697653e-06, |
|
"loss": 2.4585, |
|
"step": 3002 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 8.466854770609062e-06, |
|
"loss": 2.199, |
|
"step": 3004 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.419059464702927e-06, |
|
"loss": 2.2591, |
|
"step": 3006 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.371387038223289e-06, |
|
"loss": 2.3367, |
|
"step": 3008 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.323837632051062e-06, |
|
"loss": 2.5848, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 8.27641138670358e-06, |
|
"loss": 2.2525, |
|
"step": 3012 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.229108442334255e-06, |
|
"loss": 2.7048, |
|
"step": 3014 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.18192893873208e-06, |
|
"loss": 2.4397, |
|
"step": 3016 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.134873015321303e-06, |
|
"loss": 2.3919, |
|
"step": 3018 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.087940811160916e-06, |
|
"loss": 2.3169, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.041132464944351e-06, |
|
"loss": 2.4048, |
|
"step": 3022 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.994448114998975e-06, |
|
"loss": 2.5458, |
|
"step": 3024 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.947887899285761e-06, |
|
"loss": 2.2902, |
|
"step": 3026 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.901451955398792e-06, |
|
"loss": 2.4315, |
|
"step": 3028 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.855140420564965e-06, |
|
"loss": 2.5107, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 7.808953431643467e-06, |
|
"loss": 2.3578, |
|
"step": 3032 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 7.762891125125476e-06, |
|
"loss": 2.3267, |
|
"step": 3034 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 7.716953637133677e-06, |
|
"loss": 2.3038, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 7.671141103421919e-06, |
|
"loss": 2.2405, |
|
"step": 3038 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 7.625453659374754e-06, |
|
"loss": 2.2669, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 7.579891440007103e-06, |
|
"loss": 2.4222, |
|
"step": 3042 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 7.534454579963829e-06, |
|
"loss": 2.4086, |
|
"step": 3044 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 7.489143213519301e-06, |
|
"loss": 2.3461, |
|
"step": 3046 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 7.44395747457704e-06, |
|
"loss": 2.224, |
|
"step": 3048 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 7.398897496669338e-06, |
|
"loss": 2.316, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 7.353963412956838e-06, |
|
"loss": 2.4673, |
|
"step": 3052 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 7.309155356228109e-06, |
|
"loss": 2.3921, |
|
"step": 3054 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 7.264473458899301e-06, |
|
"loss": 2.3709, |
|
"step": 3056 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 7.219917853013764e-06, |
|
"loss": 2.5216, |
|
"step": 3058 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.175488670241609e-06, |
|
"loss": 2.4435, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.131186041879357e-06, |
|
"loss": 2.4123, |
|
"step": 3062 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.0870100988495004e-06, |
|
"loss": 2.2985, |
|
"step": 3064 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 7.0429609717002076e-06, |
|
"loss": 2.4648, |
|
"step": 3066 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 6.999038790604856e-06, |
|
"loss": 2.4027, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.955243685361673e-06, |
|
"loss": 2.5828, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.911575785393326e-06, |
|
"loss": 2.2331, |
|
"step": 3072 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.868035219746638e-06, |
|
"loss": 2.3046, |
|
"step": 3074 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.824622117092078e-06, |
|
"loss": 2.3877, |
|
"step": 3076 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 6.781336605723432e-06, |
|
"loss": 2.307, |
|
"step": 3078 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.738178813557472e-06, |
|
"loss": 2.4418, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.695148868133516e-06, |
|
"loss": 2.3749, |
|
"step": 3082 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.652246896613068e-06, |
|
"loss": 2.4227, |
|
"step": 3084 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 6.609473025779434e-06, |
|
"loss": 2.5151, |
|
"step": 3086 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.566827382037383e-06, |
|
"loss": 2.4882, |
|
"step": 3088 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.524310091412739e-06, |
|
"loss": 2.3111, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.481921279552023e-06, |
|
"loss": 2.3321, |
|
"step": 3092 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.439661071722048e-06, |
|
"loss": 2.2051, |
|
"step": 3094 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 6.397529592809614e-06, |
|
"loss": 2.3448, |
|
"step": 3096 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.355526967321112e-06, |
|
"loss": 2.4095, |
|
"step": 3098 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.313653319382107e-06, |
|
"loss": 2.1535, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.271908772737017e-06, |
|
"loss": 2.3662, |
|
"step": 3102 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.2302934507487755e-06, |
|
"loss": 2.1468, |
|
"step": 3104 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.188807476398412e-06, |
|
"loss": 2.4795, |
|
"step": 3106 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.147450972284696e-06, |
|
"loss": 2.353, |
|
"step": 3108 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.106224060623822e-06, |
|
"loss": 2.3579, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.065126863248976e-06, |
|
"loss": 2.3739, |
|
"step": 3112 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.0241595016100545e-06, |
|
"loss": 2.3866, |
|
"step": 3114 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 5.98332209677322e-06, |
|
"loss": 2.2253, |
|
"step": 3116 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 5.942614769420629e-06, |
|
"loss": 2.3605, |
|
"step": 3118 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 5.902037639850011e-06, |
|
"loss": 2.3848, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 5.86159082797435e-06, |
|
"loss": 2.1943, |
|
"step": 3122 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 5.8212744533215016e-06, |
|
"loss": 2.3193, |
|
"step": 3124 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 5.781088635033882e-06, |
|
"loss": 2.5142, |
|
"step": 3126 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 5.741033491868047e-06, |
|
"loss": 2.5701, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 5.701109142194422e-06, |
|
"loss": 2.5069, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 5.6613157039969055e-06, |
|
"loss": 2.4168, |
|
"step": 3132 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.621653294872514e-06, |
|
"loss": 2.4338, |
|
"step": 3134 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.582122032031051e-06, |
|
"loss": 2.4563, |
|
"step": 3136 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.542722032294761e-06, |
|
"loss": 2.2138, |
|
"step": 3138 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.503453412098003e-06, |
|
"loss": 2.6032, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 5.464316287486859e-06, |
|
"loss": 2.3332, |
|
"step": 3142 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.425310774118802e-06, |
|
"loss": 2.3154, |
|
"step": 3144 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.386436987262416e-06, |
|
"loss": 2.6818, |
|
"step": 3146 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.347695041796985e-06, |
|
"loss": 2.2799, |
|
"step": 3148 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.309085052212165e-06, |
|
"loss": 2.5646, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 5.270607132607663e-06, |
|
"loss": 2.3395, |
|
"step": 3152 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.232261396692911e-06, |
|
"loss": 2.4606, |
|
"step": 3154 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.194047957786713e-06, |
|
"loss": 2.3552, |
|
"step": 3156 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.155966928816885e-06, |
|
"loss": 2.5682, |
|
"step": 3158 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 5.118018422319948e-06, |
|
"loss": 2.4571, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 5.080202550440849e-06, |
|
"loss": 2.24, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 5.042519424932513e-06, |
|
"loss": 2.5308, |
|
"step": 3164 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 5.0049691571555925e-06, |
|
"loss": 2.4177, |
|
"step": 3166 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 4.967551858078129e-06, |
|
"loss": 2.6177, |
|
"step": 3168 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 4.930267638275221e-06, |
|
"loss": 2.505, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.893116607928677e-06, |
|
"loss": 2.5166, |
|
"step": 3172 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.856098876826709e-06, |
|
"loss": 2.0793, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.819214554363616e-06, |
|
"loss": 2.4421, |
|
"step": 3176 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.782463749539446e-06, |
|
"loss": 2.3317, |
|
"step": 3178 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.745846570959672e-06, |
|
"loss": 2.4747, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.70936312683487e-06, |
|
"loss": 2.2323, |
|
"step": 3182 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.673013524980424e-06, |
|
"loss": 2.3297, |
|
"step": 3184 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.63679787281619e-06, |
|
"loss": 2.5994, |
|
"step": 3186 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 4.6007162773661515e-06, |
|
"loss": 2.2933, |
|
"step": 3188 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.564768845258139e-06, |
|
"loss": 2.4649, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.528955682723529e-06, |
|
"loss": 2.3754, |
|
"step": 3192 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.4932768955968876e-06, |
|
"loss": 2.6034, |
|
"step": 3194 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.4577325893156715e-06, |
|
"loss": 2.5477, |
|
"step": 3196 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 4.422322868919937e-06, |
|
"loss": 2.3983, |
|
"step": 3198 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.3870478390519884e-06, |
|
"loss": 2.3261, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.3519076039561345e-06, |
|
"loss": 2.4168, |
|
"step": 3202 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.316902267478296e-06, |
|
"loss": 2.4235, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.2820319330657835e-06, |
|
"loss": 2.2992, |
|
"step": 3206 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.2472967037669066e-06, |
|
"loss": 2.4394, |
|
"step": 3208 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.2126966822307715e-06, |
|
"loss": 2.374, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.178231970706858e-06, |
|
"loss": 2.4277, |
|
"step": 3212 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.1439026710448355e-06, |
|
"loss": 2.4958, |
|
"step": 3214 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.109708884694158e-06, |
|
"loss": 2.3339, |
|
"step": 3216 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.075650712703849e-06, |
|
"loss": 2.3244, |
|
"step": 3218 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.041728255722154e-06, |
|
"loss": 2.4202, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.0079416139962525e-06, |
|
"loss": 2.4348, |
|
"step": 3222 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.974290887371951e-06, |
|
"loss": 2.5305, |
|
"step": 3224 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.940776175293431e-06, |
|
"loss": 2.3909, |
|
"step": 3226 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.9073975768029124e-06, |
|
"loss": 2.4669, |
|
"step": 3228 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.8741551905403735e-06, |
|
"loss": 2.5117, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.8410491147432395e-06, |
|
"loss": 2.3205, |
|
"step": 3232 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.808079447246149e-06, |
|
"loss": 2.5002, |
|
"step": 3234 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.7752462854806213e-06, |
|
"loss": 2.5181, |
|
"step": 3236 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.7425497264747534e-06, |
|
"loss": 2.5206, |
|
"step": 3238 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.7099898668529642e-06, |
|
"loss": 2.4898, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.677566802835708e-06, |
|
"loss": 2.4225, |
|
"step": 3242 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.6452806302392007e-06, |
|
"loss": 2.3201, |
|
"step": 3244 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.6131314444750765e-06, |
|
"loss": 2.4289, |
|
"step": 3246 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.58111934055016e-06, |
|
"loss": 2.2184, |
|
"step": 3248 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.5492444130662108e-06, |
|
"loss": 2.5492, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.517506756219563e-06, |
|
"loss": 2.5086, |
|
"step": 3252 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.4859064638009033e-06, |
|
"loss": 2.4952, |
|
"step": 3254 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.4544436291949867e-06, |
|
"loss": 2.469, |
|
"step": 3256 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.4231183453803604e-06, |
|
"loss": 2.3824, |
|
"step": 3258 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.391930704929064e-06, |
|
"loss": 2.4994, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.360880800006383e-06, |
|
"loss": 2.545, |
|
"step": 3262 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.3299687223705745e-06, |
|
"loss": 2.3291, |
|
"step": 3264 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.299194563372604e-06, |
|
"loss": 2.5543, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.2685584139558243e-06, |
|
"loss": 2.3818, |
|
"step": 3268 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.238060364655765e-06, |
|
"loss": 2.6038, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.2077005055998533e-06, |
|
"loss": 2.4691, |
|
"step": 3272 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.177478926507127e-06, |
|
"loss": 2.4399, |
|
"step": 3274 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.1473957166879897e-06, |
|
"loss": 2.5692, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.117450965043911e-06, |
|
"loss": 2.4183, |
|
"step": 3278 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.087644760067232e-06, |
|
"loss": 2.4085, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.0579771898408326e-06, |
|
"loss": 2.2894, |
|
"step": 3282 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.0284483420379097e-06, |
|
"loss": 2.2705, |
|
"step": 3284 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.9990583039217203e-06, |
|
"loss": 2.2714, |
|
"step": 3286 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.9698071623452895e-06, |
|
"loss": 2.366, |
|
"step": 3288 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 2.940695003751198e-06, |
|
"loss": 2.3525, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.9117219141712947e-06, |
|
"loss": 2.3377, |
|
"step": 3292 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.8828879792264675e-06, |
|
"loss": 2.1998, |
|
"step": 3294 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.854193284126344e-06, |
|
"loss": 2.3437, |
|
"step": 3296 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.825637913669121e-06, |
|
"loss": 2.2963, |
|
"step": 3298 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 2.797221952241219e-06, |
|
"loss": 2.3955, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.7689454838171147e-06, |
|
"loss": 2.2326, |
|
"step": 3302 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.7408085919590264e-06, |
|
"loss": 2.3897, |
|
"step": 3304 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.7128113598167137e-06, |
|
"loss": 2.4245, |
|
"step": 3306 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.684953870127227e-06, |
|
"loss": 2.488, |
|
"step": 3308 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.657236205214625e-06, |
|
"loss": 2.3614, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.6296584469897743e-06, |
|
"loss": 2.1686, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.6022206769500845e-06, |
|
"loss": 2.6152, |
|
"step": 3314 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.574922976179295e-06, |
|
"loss": 2.3362, |
|
"step": 3316 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 2.547765425347187e-06, |
|
"loss": 2.382, |
|
"step": 3318 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.520748104709375e-06, |
|
"loss": 2.4045, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.493871094107081e-06, |
|
"loss": 2.2771, |
|
"step": 3322 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.467134472966892e-06, |
|
"loss": 2.3296, |
|
"step": 3324 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.4405383203004894e-06, |
|
"loss": 2.3129, |
|
"step": 3326 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.414082714704463e-06, |
|
"loss": 2.2268, |
|
"step": 3328 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.3877677343600524e-06, |
|
"loss": 2.476, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.36159345703294e-06, |
|
"loss": 2.5804, |
|
"step": 3332 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.3355599600729915e-06, |
|
"loss": 2.4219, |
|
"step": 3334 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.3096673204140108e-06, |
|
"loss": 2.4168, |
|
"step": 3336 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.2839156145736174e-06, |
|
"loss": 2.3116, |
|
"step": 3338 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.2583049186528704e-06, |
|
"loss": 2.3238, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.2328353083361562e-06, |
|
"loss": 2.4897, |
|
"step": 3342 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.207506858890912e-06, |
|
"loss": 2.307, |
|
"step": 3344 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.182319645167441e-06, |
|
"loss": 2.3267, |
|
"step": 3346 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.1572737415986422e-06, |
|
"loss": 2.424, |
|
"step": 3348 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.1323692221998257e-06, |
|
"loss": 2.4612, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.1076061605684818e-06, |
|
"loss": 2.4219, |
|
"step": 3352 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 2.0829846298840884e-06, |
|
"loss": 2.4251, |
|
"step": 3354 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.058504702907843e-06, |
|
"loss": 2.5063, |
|
"step": 3356 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.0341664519824887e-06, |
|
"loss": 2.5947, |
|
"step": 3358 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 2.009969949032098e-06, |
|
"loss": 2.4399, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.9859152655618498e-06, |
|
"loss": 2.405, |
|
"step": 3362 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.962002472657809e-06, |
|
"loss": 2.3689, |
|
"step": 3364 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.9382316409867264e-06, |
|
"loss": 2.451, |
|
"step": 3366 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.9146028407958484e-06, |
|
"loss": 2.3364, |
|
"step": 3368 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.8911161419126854e-06, |
|
"loss": 2.3012, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 1.8677716137447954e-06, |
|
"loss": 2.3833, |
|
"step": 3372 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.844569325279627e-06, |
|
"loss": 2.5041, |
|
"step": 3374 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.8215093450842435e-06, |
|
"loss": 2.2862, |
|
"step": 3376 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.7985917413052055e-06, |
|
"loss": 2.3316, |
|
"step": 3378 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.7758165816682826e-06, |
|
"loss": 2.2273, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.7531839334783306e-06, |
|
"loss": 2.2901, |
|
"step": 3382 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.7306938636190262e-06, |
|
"loss": 2.5521, |
|
"step": 3384 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.7083464385527325e-06, |
|
"loss": 2.3896, |
|
"step": 3386 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.686141724320245e-06, |
|
"loss": 2.2818, |
|
"step": 3388 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.6640797865406288e-06, |
|
"loss": 2.2733, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.6421606904110264e-06, |
|
"loss": 2.4238, |
|
"step": 3392 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.6203845007064455e-06, |
|
"loss": 2.6201, |
|
"step": 3394 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.5987512817795924e-06, |
|
"loss": 2.422, |
|
"step": 3396 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.5772610975606561e-06, |
|
"loss": 2.2933, |
|
"step": 3398 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.5559140115571246e-06, |
|
"loss": 2.4014, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.5347100868536246e-06, |
|
"loss": 2.3193, |
|
"step": 3402 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.5136493861117097e-06, |
|
"loss": 2.4959, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.4927319715696607e-06, |
|
"loss": 2.3566, |
|
"step": 3406 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.4719579050423427e-06, |
|
"loss": 2.3291, |
|
"step": 3408 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 1.4513272479209917e-06, |
|
"loss": 2.3138, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.43084006117305e-06, |
|
"loss": 2.2497, |
|
"step": 3412 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.41049640534196e-06, |
|
"loss": 2.2461, |
|
"step": 3414 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.3902963405470148e-06, |
|
"loss": 2.3886, |
|
"step": 3416 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.37023992648318e-06, |
|
"loss": 2.2535, |
|
"step": 3418 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.3503272224208884e-06, |
|
"loss": 2.3367, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.3305582872058963e-06, |
|
"loss": 2.4806, |
|
"step": 3422 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.3109331792590773e-06, |
|
"loss": 2.4335, |
|
"step": 3424 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.2914519565763062e-06, |
|
"loss": 2.4195, |
|
"step": 3426 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.2721146767282033e-06, |
|
"loss": 2.4332, |
|
"step": 3428 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.2529213968600406e-06, |
|
"loss": 2.2733, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.233872173691536e-06, |
|
"loss": 2.3522, |
|
"step": 3432 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.2149670635166976e-06, |
|
"loss": 2.5166, |
|
"step": 3434 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.196206122203647e-06, |
|
"loss": 2.4279, |
|
"step": 3436 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.1775894051944514e-06, |
|
"loss": 2.2575, |
|
"step": 3438 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.1591169675049863e-06, |
|
"loss": 2.514, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.140788863724751e-06, |
|
"loss": 2.4809, |
|
"step": 3442 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.1226051480167032e-06, |
|
"loss": 2.444, |
|
"step": 3444 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.1045658741171028e-06, |
|
"loss": 2.3813, |
|
"step": 3446 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.0866710953353731e-06, |
|
"loss": 2.3445, |
|
"step": 3448 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.068920864553924e-06, |
|
"loss": 2.4029, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.0513152342279842e-06, |
|
"loss": 2.4019, |
|
"step": 3452 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.0338542563854748e-06, |
|
"loss": 2.3407, |
|
"step": 3454 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.0165379826268417e-06, |
|
"loss": 2.414, |
|
"step": 3456 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 9.993664641249012e-07, |
|
"loss": 2.4748, |
|
"step": 3458 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 9.823397516246834e-07, |
|
"loss": 2.2681, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 9.654578954433059e-07, |
|
"loss": 2.3164, |
|
"step": 3462 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 9.487209454697887e-07, |
|
"loss": 2.382, |
|
"step": 3464 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 9.321289511649456e-07, |
|
"loss": 2.3299, |
|
"step": 3466 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 9.156819615612044e-07, |
|
"loss": 2.3526, |
|
"step": 3468 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 8.993800252624862e-07, |
|
"loss": 2.4167, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 8.832231904440491e-07, |
|
"loss": 2.5703, |
|
"step": 3472 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 8.672115048523554e-07, |
|
"loss": 2.5794, |
|
"step": 3474 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.513450158049108e-07, |
|
"loss": 2.3276, |
|
"step": 3476 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.356237701901582e-07, |
|
"loss": 2.3394, |
|
"step": 3478 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.200478144672952e-07, |
|
"loss": 2.3505, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 8.046171946661796e-07, |
|
"loss": 2.4532, |
|
"step": 3482 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 7.893319563871682e-07, |
|
"loss": 2.513, |
|
"step": 3484 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.741921448009837e-07, |
|
"loss": 2.4655, |
|
"step": 3486 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.591978046485926e-07, |
|
"loss": 2.605, |
|
"step": 3488 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.443489802410663e-07, |
|
"loss": 2.4451, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 7.296457154594482e-07, |
|
"loss": 2.5196, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.150880537546201e-07, |
|
"loss": 2.2368, |
|
"step": 3494 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 7.006760381471856e-07, |
|
"loss": 2.4034, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 6.86409711227337e-07, |
|
"loss": 2.5032, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 6.722891151547284e-07, |
|
"loss": 2.3998, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 6.583142916583574e-07, |
|
"loss": 2.4205, |
|
"step": 3502 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.444852820364222e-07, |
|
"loss": 2.3034, |
|
"step": 3504 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.30802127156227e-07, |
|
"loss": 2.3639, |
|
"step": 3506 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.172648674540426e-07, |
|
"loss": 2.3491, |
|
"step": 3508 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.038735429349962e-07, |
|
"loss": 2.4925, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 5.90628193172943e-07, |
|
"loss": 2.2371, |
|
"step": 3512 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 5.775288573103666e-07, |
|
"loss": 2.3831, |
|
"step": 3514 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 5.645755740582404e-07, |
|
"loss": 2.6221, |
|
"step": 3516 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 5.517683816959219e-07, |
|
"loss": 2.4171, |
|
"step": 3518 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 5.391073180710638e-07, |
|
"loss": 2.3518, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 5.265924205994644e-07, |
|
"loss": 2.4263, |
|
"step": 3522 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 5.14223726264973e-07, |
|
"loss": 2.3707, |
|
"step": 3524 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 5.020012716193901e-07, |
|
"loss": 2.2659, |
|
"step": 3526 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.899250927823396e-07, |
|
"loss": 2.3573, |
|
"step": 3528 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 4.779952254411913e-07, |
|
"loss": 2.2359, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.662117048509218e-07, |
|
"loss": 2.3461, |
|
"step": 3532 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.545745658340206e-07, |
|
"loss": 2.4581, |
|
"step": 3534 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.4308384278041183e-07, |
|
"loss": 2.4515, |
|
"step": 3536 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.317395696473214e-07, |
|
"loss": 2.4953, |
|
"step": 3538 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.2054177995919374e-07, |
|
"loss": 2.5276, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 4.094905068075694e-07, |
|
"loss": 2.323, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.985857828510353e-07, |
|
"loss": 2.4943, |
|
"step": 3544 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.878276403150749e-07, |
|
"loss": 2.4179, |
|
"step": 3546 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 3.7721611099200693e-07, |
|
"loss": 2.3685, |
|
"step": 3548 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.6675122624087454e-07, |
|
"loss": 2.4998, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.5643301698736196e-07, |
|
"loss": 2.484, |
|
"step": 3552 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.462615137237002e-07, |
|
"loss": 2.3272, |
|
"step": 3554 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.3623674650857806e-07, |
|
"loss": 2.3971, |
|
"step": 3556 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.2635874496705356e-07, |
|
"loss": 2.544, |
|
"step": 3558 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.1662753829045375e-07, |
|
"loss": 2.6006, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.0704315523631953e-07, |
|
"loss": 2.3817, |
|
"step": 3562 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 2.976056241282721e-07, |
|
"loss": 2.4897, |
|
"step": 3564 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 2.8831497285599085e-07, |
|
"loss": 2.3797, |
|
"step": 3566 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.7917122887506364e-07, |
|
"loss": 2.4418, |
|
"step": 3568 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.701744192069755e-07, |
|
"loss": 2.4906, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.613245704389644e-07, |
|
"loss": 2.4531, |
|
"step": 3572 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.5262170872398796e-07, |
|
"loss": 2.3002, |
|
"step": 3574 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 2.440658597806178e-07, |
|
"loss": 2.5243, |
|
"step": 3576 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.3565704889298434e-07, |
|
"loss": 2.4181, |
|
"step": 3578 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.2739530091069328e-07, |
|
"loss": 2.2211, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.1928064024874796e-07, |
|
"loss": 2.0089, |
|
"step": 3582 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.113130908874772e-07, |
|
"loss": 2.269, |
|
"step": 3584 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 2.0349267637247982e-07, |
|
"loss": 2.4197, |
|
"step": 3586 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.9581941981453579e-07, |
|
"loss": 2.319, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.8829334388955067e-07, |
|
"loss": 2.2629, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.80914470838478e-07, |
|
"loss": 2.5493, |
|
"step": 3592 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.7368282246726376e-07, |
|
"loss": 2.3166, |
|
"step": 3594 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.6659842014677406e-07, |
|
"loss": 2.5148, |
|
"step": 3596 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.596612848127399e-07, |
|
"loss": 2.339, |
|
"step": 3598 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.5287143696568473e-07, |
|
"loss": 2.308, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.462288966708858e-07, |
|
"loss": 2.5463, |
|
"step": 3602 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.397336835582741e-07, |
|
"loss": 2.6243, |
|
"step": 3604 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.333858168224178e-07, |
|
"loss": 2.4714, |
|
"step": 3606 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.2718531522244447e-07, |
|
"loss": 2.2877, |
|
"step": 3608 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.211321970820023e-07, |
|
"loss": 2.3737, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.1522648028917116e-07, |
|
"loss": 2.6491, |
|
"step": 3612 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.0946818229644607e-07, |
|
"loss": 2.1288, |
|
"step": 3614 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.0385732012067607e-07, |
|
"loss": 2.4494, |
|
"step": 3616 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 9.839391034300316e-08, |
|
"loss": 2.5278, |
|
"step": 3618 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 9.307796910881794e-08, |
|
"loss": 2.2621, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 8.790951212771514e-08, |
|
"loss": 2.2438, |
|
"step": 3622 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 8.28885546734548e-08, |
|
"loss": 2.5771, |
|
"step": 3624 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 7.801511158390118e-08, |
|
"loss": 2.1801, |
|
"step": 3626 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 7.328919726097838e-08, |
|
"loss": 2.3484, |
|
"step": 3628 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.871082567065367e-08, |
|
"loss": 2.5062, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 6.42800103428598e-08, |
|
"loss": 2.2535, |
|
"step": 3632 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.999676437148938e-08, |
|
"loss": 2.4677, |
|
"step": 3634 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.5861100414322796e-08, |
|
"loss": 2.2292, |
|
"step": 3636 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5.1873030693028177e-08, |
|
"loss": 2.3609, |
|
"step": 3638 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.8032566993089225e-08, |
|
"loss": 2.4992, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.4339720663788555e-08, |
|
"loss": 2.5409, |
|
"step": 3642 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.079450261817997e-08, |
|
"loss": 2.4727, |
|
"step": 3644 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.739692333304401e-08, |
|
"loss": 2.2859, |
|
"step": 3646 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.4146992848854695e-08, |
|
"loss": 2.3062, |
|
"step": 3648 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 3.104472076976839e-08, |
|
"loss": 2.3304, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.809011626357383e-08, |
|
"loss": 2.4489, |
|
"step": 3652 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.528318806168106e-08, |
|
"loss": 2.3078, |
|
"step": 3654 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.2623944459082557e-08, |
|
"loss": 2.3173, |
|
"step": 3656 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 2.0112393314336565e-08, |
|
"loss": 2.3973, |
|
"step": 3658 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.7748542049550453e-08, |
|
"loss": 2.4975, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.553239765034187e-08, |
|
"loss": 2.4494, |
|
"step": 3662 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.346396666582761e-08, |
|
"loss": 2.4442, |
|
"step": 3664 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.1543255208612546e-08, |
|
"loss": 2.4622, |
|
"step": 3666 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 9.770268954756301e-09, |
|
"loss": 2.3676, |
|
"step": 3668 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 8.145013143756597e-09, |
|
"loss": 2.5927, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 6.6674925785548125e-09, |
|
"loss": 2.4922, |
|
"step": 3672 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 5.337711625497121e-09, |
|
"loss": 2.1202, |
|
"step": 3674 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.155674214328942e-09, |
|
"loss": 2.5643, |
|
"step": 3676 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 3.1213838382004867e-09, |
|
"loss": 2.2801, |
|
"step": 3678 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 2.234843553627908e-09, |
|
"loss": 2.4424, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 1.496055980498845e-09, |
|
"loss": 2.6128, |
|
"step": 3682 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 9.050233020779786e-10, |
|
"loss": 2.3174, |
|
"step": 3684 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 4.6174726496817087e-10, |
|
"loss": 2.4364, |
|
"step": 3686 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.6622917913267088e-10, |
|
"loss": 2.5562, |
|
"step": 3688 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8469917889563094e-11, |
|
"loss": 2.2758, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8469917889563094e-11, |
|
"loss": 2.4287, |
|
"step": 3692 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 3692, |
|
"total_flos": 8.7881966778581e+16, |
|
"train_loss": 2.440685138283933, |
|
"train_runtime": 22362.2858, |
|
"train_samples_per_second": 10.572, |
|
"train_steps_per_second": 0.165 |
|
} |
|
], |
|
"max_steps": 3692, |
|
"num_train_epochs": 4, |
|
"total_flos": 8.7881966778581e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|