t5-small-kw2email-v2 / trainer_state.json
pszemraj's picture
Upload trainer_state.json
1b5a2fd
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.999458581483487,
"global_step": 3692,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.405405405405406e-06,
"loss": 2.6714,
"step": 2
},
{
"epoch": 0.0,
"learning_rate": 1.0810810810810812e-05,
"loss": 2.4616,
"step": 4
},
{
"epoch": 0.01,
"learning_rate": 1.6216216216216218e-05,
"loss": 2.5089,
"step": 6
},
{
"epoch": 0.01,
"learning_rate": 2.1621621621621624e-05,
"loss": 2.5389,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 2.702702702702703e-05,
"loss": 2.4892,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 3.2432432432432436e-05,
"loss": 2.4949,
"step": 12
},
{
"epoch": 0.02,
"learning_rate": 3.783783783783784e-05,
"loss": 2.5016,
"step": 14
},
{
"epoch": 0.02,
"learning_rate": 4.324324324324325e-05,
"loss": 2.5727,
"step": 16
},
{
"epoch": 0.02,
"learning_rate": 4.8648648648648654e-05,
"loss": 2.605,
"step": 18
},
{
"epoch": 0.02,
"learning_rate": 5.405405405405406e-05,
"loss": 2.5045,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 5.9459459459459466e-05,
"loss": 2.5509,
"step": 22
},
{
"epoch": 0.03,
"learning_rate": 6.486486486486487e-05,
"loss": 2.6363,
"step": 24
},
{
"epoch": 0.03,
"learning_rate": 7.027027027027028e-05,
"loss": 2.3918,
"step": 26
},
{
"epoch": 0.03,
"learning_rate": 7.567567567567568e-05,
"loss": 2.5172,
"step": 28
},
{
"epoch": 0.03,
"learning_rate": 8.108108108108109e-05,
"loss": 2.6391,
"step": 30
},
{
"epoch": 0.03,
"learning_rate": 8.64864864864865e-05,
"loss": 2.3696,
"step": 32
},
{
"epoch": 0.04,
"learning_rate": 9.18918918918919e-05,
"loss": 2.4359,
"step": 34
},
{
"epoch": 0.04,
"learning_rate": 9.729729729729731e-05,
"loss": 2.5197,
"step": 36
},
{
"epoch": 0.04,
"learning_rate": 9.999998153008212e-05,
"loss": 2.5001,
"step": 38
},
{
"epoch": 0.04,
"learning_rate": 9.999983377082087e-05,
"loss": 2.7154,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 9.999953825273504e-05,
"loss": 2.5842,
"step": 42
},
{
"epoch": 0.05,
"learning_rate": 9.999909497669792e-05,
"loss": 2.4231,
"step": 44
},
{
"epoch": 0.05,
"learning_rate": 9.99985039440195e-05,
"loss": 2.6799,
"step": 46
},
{
"epoch": 0.05,
"learning_rate": 9.999776515644638e-05,
"loss": 2.5285,
"step": 48
},
{
"epoch": 0.05,
"learning_rate": 9.999687861616181e-05,
"loss": 2.6049,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 9.999584432578569e-05,
"loss": 2.46,
"step": 52
},
{
"epoch": 0.06,
"learning_rate": 9.999466228837451e-05,
"loss": 2.5835,
"step": 54
},
{
"epoch": 0.06,
"learning_rate": 9.999333250742145e-05,
"loss": 2.6909,
"step": 56
},
{
"epoch": 0.06,
"learning_rate": 9.999185498685624e-05,
"loss": 2.4055,
"step": 58
},
{
"epoch": 0.06,
"learning_rate": 9.999022973104525e-05,
"loss": 2.5816,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 9.99884567447914e-05,
"loss": 2.5871,
"step": 62
},
{
"epoch": 0.07,
"learning_rate": 9.998653603333418e-05,
"loss": 2.701,
"step": 64
},
{
"epoch": 0.07,
"learning_rate": 9.998446760234966e-05,
"loss": 2.5853,
"step": 66
},
{
"epoch": 0.07,
"learning_rate": 9.998225145795046e-05,
"loss": 2.4068,
"step": 68
},
{
"epoch": 0.08,
"learning_rate": 9.997988760668566e-05,
"loss": 2.5417,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 9.997737605554092e-05,
"loss": 2.531,
"step": 72
},
{
"epoch": 0.08,
"learning_rate": 9.997471681193833e-05,
"loss": 2.5509,
"step": 74
},
{
"epoch": 0.08,
"learning_rate": 9.997190988373644e-05,
"loss": 2.4764,
"step": 76
},
{
"epoch": 0.08,
"learning_rate": 9.996895527923023e-05,
"loss": 2.4522,
"step": 78
},
{
"epoch": 0.09,
"learning_rate": 9.996585300715116e-05,
"loss": 2.332,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 9.996260307666696e-05,
"loss": 2.518,
"step": 82
},
{
"epoch": 0.09,
"learning_rate": 9.995920549738183e-05,
"loss": 2.5257,
"step": 84
},
{
"epoch": 0.09,
"learning_rate": 9.995566027933621e-05,
"loss": 2.4631,
"step": 86
},
{
"epoch": 0.1,
"learning_rate": 9.995196743300692e-05,
"loss": 2.6791,
"step": 88
},
{
"epoch": 0.1,
"learning_rate": 9.994812696930698e-05,
"loss": 2.5043,
"step": 90
},
{
"epoch": 0.1,
"learning_rate": 9.994413889958568e-05,
"loss": 2.5534,
"step": 92
},
{
"epoch": 0.1,
"learning_rate": 9.994000323562852e-05,
"loss": 2.3725,
"step": 94
},
{
"epoch": 0.1,
"learning_rate": 9.993571998965714e-05,
"loss": 2.7813,
"step": 96
},
{
"epoch": 0.11,
"learning_rate": 9.993128917432934e-05,
"loss": 2.4455,
"step": 98
},
{
"epoch": 0.11,
"learning_rate": 9.992671080273903e-05,
"loss": 2.4049,
"step": 100
},
{
"epoch": 0.11,
"learning_rate": 9.992198488841611e-05,
"loss": 2.4881,
"step": 102
},
{
"epoch": 0.11,
"learning_rate": 9.991711144532654e-05,
"loss": 2.4836,
"step": 104
},
{
"epoch": 0.11,
"learning_rate": 9.991209048787228e-05,
"loss": 2.4244,
"step": 106
},
{
"epoch": 0.12,
"learning_rate": 9.990692203089119e-05,
"loss": 2.5105,
"step": 108
},
{
"epoch": 0.12,
"learning_rate": 9.9901606089657e-05,
"loss": 2.6217,
"step": 110
},
{
"epoch": 0.12,
"learning_rate": 9.989614267987933e-05,
"loss": 2.5529,
"step": 112
},
{
"epoch": 0.12,
"learning_rate": 9.989053181770356e-05,
"loss": 2.512,
"step": 114
},
{
"epoch": 0.13,
"learning_rate": 9.988477351971084e-05,
"loss": 2.5365,
"step": 116
},
{
"epoch": 0.13,
"learning_rate": 9.9878867802918e-05,
"loss": 2.6218,
"step": 118
},
{
"epoch": 0.13,
"learning_rate": 9.987281468477756e-05,
"loss": 2.4998,
"step": 120
},
{
"epoch": 0.13,
"learning_rate": 9.986661418317759e-05,
"loss": 2.4597,
"step": 122
},
{
"epoch": 0.13,
"learning_rate": 9.986026631644173e-05,
"loss": 2.6517,
"step": 124
},
{
"epoch": 0.14,
"learning_rate": 9.985377110332912e-05,
"loss": 2.6685,
"step": 126
},
{
"epoch": 0.14,
"learning_rate": 9.984712856303432e-05,
"loss": 2.5139,
"step": 128
},
{
"epoch": 0.14,
"learning_rate": 9.984033871518727e-05,
"loss": 2.4927,
"step": 130
},
{
"epoch": 0.14,
"learning_rate": 9.983340157985324e-05,
"loss": 2.4832,
"step": 132
},
{
"epoch": 0.15,
"learning_rate": 9.982631717753275e-05,
"loss": 2.5724,
"step": 134
},
{
"epoch": 0.15,
"learning_rate": 9.981908552916153e-05,
"loss": 2.5026,
"step": 136
},
{
"epoch": 0.15,
"learning_rate": 9.981170665611046e-05,
"loss": 2.4104,
"step": 138
},
{
"epoch": 0.15,
"learning_rate": 9.980418058018547e-05,
"loss": 2.4806,
"step": 140
},
{
"epoch": 0.15,
"learning_rate": 9.979650732362753e-05,
"loss": 2.3937,
"step": 142
},
{
"epoch": 0.16,
"learning_rate": 9.978868690911253e-05,
"loss": 2.7488,
"step": 144
},
{
"epoch": 0.16,
"learning_rate": 9.978071935975126e-05,
"loss": 2.5604,
"step": 146
},
{
"epoch": 0.16,
"learning_rate": 9.977260469908931e-05,
"loss": 2.4197,
"step": 148
},
{
"epoch": 0.16,
"learning_rate": 9.976434295110701e-05,
"loss": 2.6003,
"step": 150
},
{
"epoch": 0.16,
"learning_rate": 9.975593414021938e-05,
"loss": 2.4822,
"step": 152
},
{
"epoch": 0.17,
"learning_rate": 9.974737829127602e-05,
"loss": 2.4176,
"step": 154
},
{
"epoch": 0.17,
"learning_rate": 9.973867542956104e-05,
"loss": 2.5665,
"step": 156
},
{
"epoch": 0.17,
"learning_rate": 9.972982558079303e-05,
"loss": 2.7229,
"step": 158
},
{
"epoch": 0.17,
"learning_rate": 9.972082877112494e-05,
"loss": 2.7438,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 9.971168502714401e-05,
"loss": 2.5602,
"step": 162
},
{
"epoch": 0.18,
"learning_rate": 9.970239437587174e-05,
"loss": 2.5438,
"step": 164
},
{
"epoch": 0.18,
"learning_rate": 9.96929568447637e-05,
"loss": 2.5334,
"step": 166
},
{
"epoch": 0.18,
"learning_rate": 9.968337246170956e-05,
"loss": 2.496,
"step": 168
},
{
"epoch": 0.18,
"learning_rate": 9.967364125503295e-05,
"loss": 2.4688,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 9.966376325349143e-05,
"loss": 2.5845,
"step": 172
},
{
"epoch": 0.19,
"learning_rate": 9.965373848627631e-05,
"loss": 2.3486,
"step": 174
},
{
"epoch": 0.19,
"learning_rate": 9.964356698301264e-05,
"loss": 2.6254,
"step": 176
},
{
"epoch": 0.19,
"learning_rate": 9.963324877375912e-05,
"loss": 2.2205,
"step": 178
},
{
"epoch": 0.19,
"learning_rate": 9.9622783889008e-05,
"loss": 2.4156,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 9.961217235968494e-05,
"loss": 2.6328,
"step": 182
},
{
"epoch": 0.2,
"learning_rate": 9.960141421714897e-05,
"loss": 2.5354,
"step": 184
},
{
"epoch": 0.2,
"learning_rate": 9.959050949319244e-05,
"loss": 2.4779,
"step": 186
},
{
"epoch": 0.2,
"learning_rate": 9.957945822004083e-05,
"loss": 2.4148,
"step": 188
},
{
"epoch": 0.21,
"learning_rate": 9.956826043035268e-05,
"loss": 2.5947,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 9.95569161572196e-05,
"loss": 2.6662,
"step": 192
},
{
"epoch": 0.21,
"learning_rate": 9.954542543416599e-05,
"loss": 2.4853,
"step": 194
},
{
"epoch": 0.21,
"learning_rate": 9.953378829514908e-05,
"loss": 2.4561,
"step": 196
},
{
"epoch": 0.21,
"learning_rate": 9.952200477455881e-05,
"loss": 2.5139,
"step": 198
},
{
"epoch": 0.22,
"learning_rate": 9.951007490721766e-05,
"loss": 2.5978,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 9.949799872838061e-05,
"loss": 2.6275,
"step": 202
},
{
"epoch": 0.22,
"learning_rate": 9.948577627373503e-05,
"loss": 2.4584,
"step": 204
},
{
"epoch": 0.22,
"learning_rate": 9.947340757940053e-05,
"loss": 2.3811,
"step": 206
},
{
"epoch": 0.23,
"learning_rate": 9.946089268192895e-05,
"loss": 2.4847,
"step": 208
},
{
"epoch": 0.23,
"learning_rate": 9.944823161830407e-05,
"loss": 2.5872,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 9.943542442594177e-05,
"loss": 2.7864,
"step": 212
},
{
"epoch": 0.23,
"learning_rate": 9.942247114268964e-05,
"loss": 2.5991,
"step": 214
},
{
"epoch": 0.23,
"learning_rate": 9.940937180682706e-05,
"loss": 2.4734,
"step": 216
},
{
"epoch": 0.24,
"learning_rate": 9.9396126457065e-05,
"loss": 2.6016,
"step": 218
},
{
"epoch": 0.24,
"learning_rate": 9.938273513254597e-05,
"loss": 2.5428,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 9.936919787284378e-05,
"loss": 2.6381,
"step": 222
},
{
"epoch": 0.24,
"learning_rate": 9.935551471796358e-05,
"loss": 2.6325,
"step": 224
},
{
"epoch": 0.24,
"learning_rate": 9.934168570834165e-05,
"loss": 2.5066,
"step": 226
},
{
"epoch": 0.25,
"learning_rate": 9.932771088484527e-05,
"loss": 2.5039,
"step": 228
},
{
"epoch": 0.25,
"learning_rate": 9.931359028877267e-05,
"loss": 2.7124,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 9.929932396185281e-05,
"loss": 2.4925,
"step": 232
},
{
"epoch": 0.25,
"learning_rate": 9.928491194624539e-05,
"loss": 2.579,
"step": 234
},
{
"epoch": 0.26,
"learning_rate": 9.927035428454055e-05,
"loss": 2.6093,
"step": 236
},
{
"epoch": 0.26,
"learning_rate": 9.925565101975893e-05,
"loss": 2.3589,
"step": 238
},
{
"epoch": 0.26,
"learning_rate": 9.924080219535141e-05,
"loss": 2.6058,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 9.922580785519901e-05,
"loss": 2.5016,
"step": 242
},
{
"epoch": 0.26,
"learning_rate": 9.921066804361284e-05,
"loss": 2.6041,
"step": 244
},
{
"epoch": 0.27,
"learning_rate": 9.919538280533382e-05,
"loss": 2.4222,
"step": 246
},
{
"epoch": 0.27,
"learning_rate": 9.917995218553271e-05,
"loss": 2.5596,
"step": 248
},
{
"epoch": 0.27,
"learning_rate": 9.916437622980985e-05,
"loss": 2.5427,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 9.91486549841951e-05,
"loss": 2.5865,
"step": 252
},
{
"epoch": 0.28,
"learning_rate": 9.913278849514765e-05,
"loss": 2.4464,
"step": 254
},
{
"epoch": 0.28,
"learning_rate": 9.911677680955596e-05,
"loss": 2.4279,
"step": 256
},
{
"epoch": 0.28,
"learning_rate": 9.910061997473752e-05,
"loss": 2.4858,
"step": 258
},
{
"epoch": 0.28,
"learning_rate": 9.908431803843881e-05,
"loss": 2.3309,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 9.906787104883506e-05,
"loss": 2.5427,
"step": 262
},
{
"epoch": 0.29,
"learning_rate": 9.905127905453023e-05,
"loss": 2.568,
"step": 264
},
{
"epoch": 0.29,
"learning_rate": 9.90345421045567e-05,
"loss": 2.5368,
"step": 266
},
{
"epoch": 0.29,
"learning_rate": 9.90176602483753e-05,
"loss": 2.562,
"step": 268
},
{
"epoch": 0.29,
"learning_rate": 9.90006335358751e-05,
"loss": 2.43,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 9.898346201737317e-05,
"loss": 2.6544,
"step": 272
},
{
"epoch": 0.3,
"learning_rate": 9.896614574361454e-05,
"loss": 2.5701,
"step": 274
},
{
"epoch": 0.3,
"learning_rate": 9.894868476577201e-05,
"loss": 2.5294,
"step": 276
},
{
"epoch": 0.3,
"learning_rate": 9.893107913544609e-05,
"loss": 2.6586,
"step": 278
},
{
"epoch": 0.3,
"learning_rate": 9.891332890466463e-05,
"loss": 2.4904,
"step": 280
},
{
"epoch": 0.31,
"learning_rate": 9.88954341258829e-05,
"loss": 2.5686,
"step": 282
},
{
"epoch": 0.31,
"learning_rate": 9.887739485198331e-05,
"loss": 2.5522,
"step": 284
},
{
"epoch": 0.31,
"learning_rate": 9.885921113627525e-05,
"loss": 2.6227,
"step": 286
},
{
"epoch": 0.31,
"learning_rate": 9.884088303249501e-05,
"loss": 2.6265,
"step": 288
},
{
"epoch": 0.31,
"learning_rate": 9.882241059480555e-05,
"loss": 2.6851,
"step": 290
},
{
"epoch": 0.32,
"learning_rate": 9.880379387779637e-05,
"loss": 2.4501,
"step": 292
},
{
"epoch": 0.32,
"learning_rate": 9.878503293648332e-05,
"loss": 2.3563,
"step": 294
},
{
"epoch": 0.32,
"learning_rate": 9.876612782630848e-05,
"loss": 2.3076,
"step": 296
},
{
"epoch": 0.32,
"learning_rate": 9.874707860313997e-05,
"loss": 2.5158,
"step": 298
},
{
"epoch": 0.32,
"learning_rate": 9.87278853232718e-05,
"loss": 2.4241,
"step": 300
},
{
"epoch": 0.33,
"learning_rate": 9.87085480434237e-05,
"loss": 2.6496,
"step": 302
},
{
"epoch": 0.33,
"learning_rate": 9.868906682074093e-05,
"loss": 2.5175,
"step": 304
},
{
"epoch": 0.33,
"learning_rate": 9.866944171279411e-05,
"loss": 2.4658,
"step": 306
},
{
"epoch": 0.33,
"learning_rate": 9.864967277757911e-05,
"loss": 2.3329,
"step": 308
},
{
"epoch": 0.34,
"learning_rate": 9.862976007351683e-05,
"loss": 2.6255,
"step": 310
},
{
"epoch": 0.34,
"learning_rate": 9.860970365945299e-05,
"loss": 2.5342,
"step": 312
},
{
"epoch": 0.34,
"learning_rate": 9.858950359465805e-05,
"loss": 2.3472,
"step": 314
},
{
"epoch": 0.34,
"learning_rate": 9.856915993882696e-05,
"loss": 2.4315,
"step": 316
},
{
"epoch": 0.34,
"learning_rate": 9.854867275207901e-05,
"loss": 2.5811,
"step": 318
},
{
"epoch": 0.35,
"learning_rate": 9.852804209495766e-05,
"loss": 2.5231,
"step": 320
},
{
"epoch": 0.35,
"learning_rate": 9.850726802843034e-05,
"loss": 2.6642,
"step": 322
},
{
"epoch": 0.35,
"learning_rate": 9.84863506138883e-05,
"loss": 2.5686,
"step": 324
},
{
"epoch": 0.35,
"learning_rate": 9.846528991314639e-05,
"loss": 2.7031,
"step": 326
},
{
"epoch": 0.36,
"learning_rate": 9.844408598844288e-05,
"loss": 2.4843,
"step": 328
},
{
"epoch": 0.36,
"learning_rate": 9.842273890243936e-05,
"loss": 2.6201,
"step": 330
},
{
"epoch": 0.36,
"learning_rate": 9.840124871822041e-05,
"loss": 2.4424,
"step": 332
},
{
"epoch": 0.36,
"learning_rate": 9.837961549929356e-05,
"loss": 2.5032,
"step": 334
},
{
"epoch": 0.36,
"learning_rate": 9.835783930958897e-05,
"loss": 2.6527,
"step": 336
},
{
"epoch": 0.37,
"learning_rate": 9.833592021345937e-05,
"loss": 2.4956,
"step": 338
},
{
"epoch": 0.37,
"learning_rate": 9.831385827567975e-05,
"loss": 2.3891,
"step": 340
},
{
"epoch": 0.37,
"learning_rate": 9.829165356144727e-05,
"loss": 2.2116,
"step": 342
},
{
"epoch": 0.37,
"learning_rate": 9.826930613638098e-05,
"loss": 2.5029,
"step": 344
},
{
"epoch": 0.37,
"learning_rate": 9.824681606652168e-05,
"loss": 2.5519,
"step": 346
},
{
"epoch": 0.38,
"learning_rate": 9.822418341833172e-05,
"loss": 2.5432,
"step": 348
},
{
"epoch": 0.38,
"learning_rate": 9.82014082586948e-05,
"loss": 2.5587,
"step": 350
},
{
"epoch": 0.38,
"learning_rate": 9.817849065491575e-05,
"loss": 2.552,
"step": 352
},
{
"epoch": 0.38,
"learning_rate": 9.815543067472039e-05,
"loss": 2.4258,
"step": 354
},
{
"epoch": 0.39,
"learning_rate": 9.813222838625521e-05,
"loss": 2.4393,
"step": 356
},
{
"epoch": 0.39,
"learning_rate": 9.810888385808732e-05,
"loss": 2.5239,
"step": 358
},
{
"epoch": 0.39,
"learning_rate": 9.808539715920414e-05,
"loss": 2.4571,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 9.806176835901328e-05,
"loss": 2.5202,
"step": 362
},
{
"epoch": 0.39,
"learning_rate": 9.803799752734219e-05,
"loss": 2.4761,
"step": 364
},
{
"epoch": 0.4,
"learning_rate": 9.801408473443816e-05,
"loss": 2.8216,
"step": 366
},
{
"epoch": 0.4,
"learning_rate": 9.79900300509679e-05,
"loss": 2.6249,
"step": 368
},
{
"epoch": 0.4,
"learning_rate": 9.796583354801752e-05,
"loss": 2.5059,
"step": 370
},
{
"epoch": 0.4,
"learning_rate": 9.794149529709216e-05,
"loss": 2.5326,
"step": 372
},
{
"epoch": 0.4,
"learning_rate": 9.791701537011591e-05,
"loss": 2.7352,
"step": 374
},
{
"epoch": 0.41,
"learning_rate": 9.789239383943152e-05,
"loss": 2.6958,
"step": 376
},
{
"epoch": 0.41,
"learning_rate": 9.78676307778002e-05,
"loss": 2.4794,
"step": 378
},
{
"epoch": 0.41,
"learning_rate": 9.784272625840136e-05,
"loss": 2.5559,
"step": 380
},
{
"epoch": 0.41,
"learning_rate": 9.781768035483256e-05,
"loss": 2.4731,
"step": 382
},
{
"epoch": 0.42,
"learning_rate": 9.779249314110909e-05,
"loss": 2.5427,
"step": 384
},
{
"epoch": 0.42,
"learning_rate": 9.776716469166384e-05,
"loss": 2.6534,
"step": 386
},
{
"epoch": 0.42,
"learning_rate": 9.774169508134715e-05,
"loss": 2.5991,
"step": 388
},
{
"epoch": 0.42,
"learning_rate": 9.771608438542639e-05,
"loss": 2.4887,
"step": 390
},
{
"epoch": 0.42,
"learning_rate": 9.769033267958598e-05,
"loss": 2.5762,
"step": 392
},
{
"epoch": 0.43,
"learning_rate": 9.766444003992703e-05,
"loss": 2.3775,
"step": 394
},
{
"epoch": 0.43,
"learning_rate": 9.763840654296706e-05,
"loss": 2.4067,
"step": 396
},
{
"epoch": 0.43,
"learning_rate": 9.761223226563996e-05,
"loss": 2.3338,
"step": 398
},
{
"epoch": 0.43,
"learning_rate": 9.758591728529555e-05,
"loss": 2.4981,
"step": 400
},
{
"epoch": 0.44,
"learning_rate": 9.755946167969952e-05,
"loss": 2.402,
"step": 402
},
{
"epoch": 0.44,
"learning_rate": 9.753286552703312e-05,
"loss": 2.7678,
"step": 404
},
{
"epoch": 0.44,
"learning_rate": 9.750612890589293e-05,
"loss": 2.5216,
"step": 406
},
{
"epoch": 0.44,
"learning_rate": 9.747925189529063e-05,
"loss": 2.3811,
"step": 408
},
{
"epoch": 0.44,
"learning_rate": 9.745223457465282e-05,
"loss": 2.4442,
"step": 410
},
{
"epoch": 0.45,
"learning_rate": 9.742507702382071e-05,
"loss": 2.4474,
"step": 412
},
{
"epoch": 0.45,
"learning_rate": 9.739777932304992e-05,
"loss": 2.5238,
"step": 414
},
{
"epoch": 0.45,
"learning_rate": 9.737034155301024e-05,
"loss": 2.4573,
"step": 416
},
{
"epoch": 0.45,
"learning_rate": 9.734276379478538e-05,
"loss": 2.5096,
"step": 418
},
{
"epoch": 0.45,
"learning_rate": 9.731504612987279e-05,
"loss": 2.5997,
"step": 420
},
{
"epoch": 0.46,
"learning_rate": 9.728718864018329e-05,
"loss": 2.3851,
"step": 422
},
{
"epoch": 0.46,
"learning_rate": 9.725919140804099e-05,
"loss": 2.5155,
"step": 424
},
{
"epoch": 0.46,
"learning_rate": 9.72310545161829e-05,
"loss": 2.614,
"step": 426
},
{
"epoch": 0.46,
"learning_rate": 9.72027780477588e-05,
"loss": 2.5027,
"step": 428
},
{
"epoch": 0.47,
"learning_rate": 9.717436208633088e-05,
"loss": 2.4011,
"step": 430
},
{
"epoch": 0.47,
"learning_rate": 9.714580671587366e-05,
"loss": 2.5327,
"step": 432
},
{
"epoch": 0.47,
"learning_rate": 9.711711202077354e-05,
"loss": 2.3772,
"step": 434
},
{
"epoch": 0.47,
"learning_rate": 9.708827808582871e-05,
"loss": 2.5332,
"step": 436
},
{
"epoch": 0.47,
"learning_rate": 9.70593049962488e-05,
"loss": 2.4211,
"step": 438
},
{
"epoch": 0.48,
"learning_rate": 9.703019283765471e-05,
"loss": 2.5811,
"step": 440
},
{
"epoch": 0.48,
"learning_rate": 9.700094169607828e-05,
"loss": 2.5859,
"step": 442
},
{
"epoch": 0.48,
"learning_rate": 9.697155165796209e-05,
"loss": 2.7103,
"step": 444
},
{
"epoch": 0.48,
"learning_rate": 9.694202281015918e-05,
"loss": 2.5435,
"step": 446
},
{
"epoch": 0.49,
"learning_rate": 9.691235523993278e-05,
"loss": 2.4327,
"step": 448
},
{
"epoch": 0.49,
"learning_rate": 9.688254903495609e-05,
"loss": 2.4772,
"step": 450
},
{
"epoch": 0.49,
"learning_rate": 9.685260428331202e-05,
"loss": 2.5445,
"step": 452
},
{
"epoch": 0.49,
"learning_rate": 9.682252107349288e-05,
"loss": 2.5334,
"step": 454
},
{
"epoch": 0.49,
"learning_rate": 9.679229949440015e-05,
"loss": 2.4343,
"step": 456
},
{
"epoch": 0.5,
"learning_rate": 9.676193963534423e-05,
"loss": 2.4341,
"step": 458
},
{
"epoch": 0.5,
"learning_rate": 9.673144158604419e-05,
"loss": 2.5318,
"step": 460
},
{
"epoch": 0.5,
"learning_rate": 9.67008054366274e-05,
"loss": 2.435,
"step": 462
},
{
"epoch": 0.5,
"learning_rate": 9.667003127762942e-05,
"loss": 2.6514,
"step": 464
},
{
"epoch": 0.5,
"learning_rate": 9.663911919999362e-05,
"loss": 2.4744,
"step": 466
},
{
"epoch": 0.51,
"learning_rate": 9.660806929507095e-05,
"loss": 2.5498,
"step": 468
},
{
"epoch": 0.51,
"learning_rate": 9.657688165461965e-05,
"loss": 2.6276,
"step": 470
},
{
"epoch": 0.51,
"learning_rate": 9.654555637080502e-05,
"loss": 2.6097,
"step": 472
},
{
"epoch": 0.51,
"learning_rate": 9.65140935361991e-05,
"loss": 2.3675,
"step": 474
},
{
"epoch": 0.52,
"learning_rate": 9.648249324378044e-05,
"loss": 2.469,
"step": 476
},
{
"epoch": 0.52,
"learning_rate": 9.64507555869338e-05,
"loss": 2.5554,
"step": 478
},
{
"epoch": 0.52,
"learning_rate": 9.641888065944984e-05,
"loss": 2.35,
"step": 480
},
{
"epoch": 0.52,
"learning_rate": 9.638686855552494e-05,
"loss": 2.495,
"step": 482
},
{
"epoch": 0.52,
"learning_rate": 9.635471936976081e-05,
"loss": 2.4547,
"step": 484
},
{
"epoch": 0.53,
"learning_rate": 9.63224331971643e-05,
"loss": 2.3311,
"step": 486
},
{
"epoch": 0.53,
"learning_rate": 9.629001013314705e-05,
"loss": 2.5145,
"step": 488
},
{
"epoch": 0.53,
"learning_rate": 9.625745027352526e-05,
"loss": 2.5413,
"step": 490
},
{
"epoch": 0.53,
"learning_rate": 9.622475371451939e-05,
"loss": 2.5209,
"step": 492
},
{
"epoch": 0.53,
"learning_rate": 9.619192055275386e-05,
"loss": 2.4376,
"step": 494
},
{
"epoch": 0.54,
"learning_rate": 9.615895088525677e-05,
"loss": 2.455,
"step": 496
},
{
"epoch": 0.54,
"learning_rate": 9.612584480945964e-05,
"loss": 2.3267,
"step": 498
},
{
"epoch": 0.54,
"learning_rate": 9.609260242319709e-05,
"loss": 2.6783,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 9.605922382470658e-05,
"loss": 2.4699,
"step": 502
},
{
"epoch": 0.55,
"learning_rate": 9.602570911262805e-05,
"loss": 2.4731,
"step": 504
},
{
"epoch": 0.55,
"learning_rate": 9.599205838600375e-05,
"loss": 2.4779,
"step": 506
},
{
"epoch": 0.55,
"learning_rate": 9.595827174427786e-05,
"loss": 2.6002,
"step": 508
},
{
"epoch": 0.55,
"learning_rate": 9.592434928729616e-05,
"loss": 2.5211,
"step": 510
},
{
"epoch": 0.55,
"learning_rate": 9.589029111530586e-05,
"loss": 2.5308,
"step": 512
},
{
"epoch": 0.56,
"learning_rate": 9.585609732895517e-05,
"loss": 2.5043,
"step": 514
},
{
"epoch": 0.56,
"learning_rate": 9.582176802929315e-05,
"loss": 2.4683,
"step": 516
},
{
"epoch": 0.56,
"learning_rate": 9.578730331776924e-05,
"loss": 2.4226,
"step": 518
},
{
"epoch": 0.56,
"learning_rate": 9.575270329623309e-05,
"loss": 2.3188,
"step": 520
},
{
"epoch": 0.57,
"learning_rate": 9.571796806693422e-05,
"loss": 2.6134,
"step": 522
},
{
"epoch": 0.57,
"learning_rate": 9.568309773252171e-05,
"loss": 2.3866,
"step": 524
},
{
"epoch": 0.57,
"learning_rate": 9.564809239604388e-05,
"loss": 2.6209,
"step": 526
},
{
"epoch": 0.57,
"learning_rate": 9.5612952160948e-05,
"loss": 2.5562,
"step": 528
},
{
"epoch": 0.57,
"learning_rate": 9.557767713108009e-05,
"loss": 2.6116,
"step": 530
},
{
"epoch": 0.58,
"learning_rate": 9.554226741068432e-05,
"loss": 2.5081,
"step": 532
},
{
"epoch": 0.58,
"learning_rate": 9.550672310440311e-05,
"loss": 2.7225,
"step": 534
},
{
"epoch": 0.58,
"learning_rate": 9.547104431727647e-05,
"loss": 2.6353,
"step": 536
},
{
"epoch": 0.58,
"learning_rate": 9.543523115474187e-05,
"loss": 2.5658,
"step": 538
},
{
"epoch": 0.58,
"learning_rate": 9.539928372263387e-05,
"loss": 2.5224,
"step": 540
},
{
"epoch": 0.59,
"learning_rate": 9.536320212718382e-05,
"loss": 2.702,
"step": 542
},
{
"epoch": 0.59,
"learning_rate": 9.532698647501958e-05,
"loss": 2.4106,
"step": 544
},
{
"epoch": 0.59,
"learning_rate": 9.529063687316513e-05,
"loss": 2.5009,
"step": 546
},
{
"epoch": 0.59,
"learning_rate": 9.525415342904034e-05,
"loss": 2.4723,
"step": 548
},
{
"epoch": 0.6,
"learning_rate": 9.521753625046056e-05,
"loss": 2.3118,
"step": 550
},
{
"epoch": 0.6,
"learning_rate": 9.51807854456364e-05,
"loss": 2.6302,
"step": 552
},
{
"epoch": 0.6,
"learning_rate": 9.51439011231733e-05,
"loss": 2.3981,
"step": 554
},
{
"epoch": 0.6,
"learning_rate": 9.510688339207133e-05,
"loss": 2.4194,
"step": 556
},
{
"epoch": 0.6,
"learning_rate": 9.506973236172478e-05,
"loss": 2.5114,
"step": 558
},
{
"epoch": 0.61,
"learning_rate": 9.503244814192187e-05,
"loss": 2.4885,
"step": 560
},
{
"epoch": 0.61,
"learning_rate": 9.499503084284441e-05,
"loss": 2.4262,
"step": 562
},
{
"epoch": 0.61,
"learning_rate": 9.49574805750675e-05,
"loss": 2.2484,
"step": 564
},
{
"epoch": 0.61,
"learning_rate": 9.491979744955915e-05,
"loss": 2.3817,
"step": 566
},
{
"epoch": 0.62,
"learning_rate": 9.488198157768005e-05,
"loss": 2.455,
"step": 568
},
{
"epoch": 0.62,
"learning_rate": 9.484403307118312e-05,
"loss": 2.3778,
"step": 570
},
{
"epoch": 0.62,
"learning_rate": 9.480595204221329e-05,
"loss": 2.5499,
"step": 572
},
{
"epoch": 0.62,
"learning_rate": 9.47677386033071e-05,
"loss": 2.5838,
"step": 574
},
{
"epoch": 0.62,
"learning_rate": 9.472939286739235e-05,
"loss": 2.5077,
"step": 576
},
{
"epoch": 0.63,
"learning_rate": 9.469091494778785e-05,
"loss": 2.6054,
"step": 578
},
{
"epoch": 0.63,
"learning_rate": 9.465230495820303e-05,
"loss": 2.4767,
"step": 580
},
{
"epoch": 0.63,
"learning_rate": 9.461356301273758e-05,
"loss": 2.6251,
"step": 582
},
{
"epoch": 0.63,
"learning_rate": 9.45746892258812e-05,
"loss": 2.4725,
"step": 584
},
{
"epoch": 0.63,
"learning_rate": 9.453568371251316e-05,
"loss": 2.5408,
"step": 586
},
{
"epoch": 0.64,
"learning_rate": 9.4496546587902e-05,
"loss": 2.5397,
"step": 588
},
{
"epoch": 0.64,
"learning_rate": 9.445727796770524e-05,
"loss": 2.666,
"step": 590
},
{
"epoch": 0.64,
"learning_rate": 9.441787796796896e-05,
"loss": 2.3805,
"step": 592
},
{
"epoch": 0.64,
"learning_rate": 9.43783467051275e-05,
"loss": 2.57,
"step": 594
},
{
"epoch": 0.65,
"learning_rate": 9.43386842960031e-05,
"loss": 2.6776,
"step": 596
},
{
"epoch": 0.65,
"learning_rate": 9.429889085780557e-05,
"loss": 2.447,
"step": 598
},
{
"epoch": 0.65,
"learning_rate": 9.425896650813196e-05,
"loss": 2.6253,
"step": 600
},
{
"epoch": 0.65,
"learning_rate": 9.421891136496612e-05,
"loss": 2.3422,
"step": 602
},
{
"epoch": 0.65,
"learning_rate": 9.41787255466785e-05,
"loss": 2.3565,
"step": 604
},
{
"epoch": 0.66,
"learning_rate": 9.413840917202566e-05,
"loss": 2.4135,
"step": 606
},
{
"epoch": 0.66,
"learning_rate": 9.409796236014999e-05,
"loss": 2.6778,
"step": 608
},
{
"epoch": 0.66,
"learning_rate": 9.405738523057938e-05,
"loss": 2.4313,
"step": 610
},
{
"epoch": 0.66,
"learning_rate": 9.401667790322679e-05,
"loss": 2.4427,
"step": 612
},
{
"epoch": 0.66,
"learning_rate": 9.397584049838996e-05,
"loss": 2.6661,
"step": 614
},
{
"epoch": 0.67,
"learning_rate": 9.393487313675102e-05,
"loss": 2.4825,
"step": 616
},
{
"epoch": 0.67,
"learning_rate": 9.389377593937618e-05,
"loss": 2.5834,
"step": 618
},
{
"epoch": 0.67,
"learning_rate": 9.38525490277153e-05,
"loss": 2.4413,
"step": 620
},
{
"epoch": 0.67,
"learning_rate": 9.38111925236016e-05,
"loss": 2.5265,
"step": 622
},
{
"epoch": 0.68,
"learning_rate": 9.376970654925124e-05,
"loss": 2.5181,
"step": 624
},
{
"epoch": 0.68,
"learning_rate": 9.372809122726299e-05,
"loss": 2.6319,
"step": 626
},
{
"epoch": 0.68,
"learning_rate": 9.368634668061791e-05,
"loss": 2.7302,
"step": 628
},
{
"epoch": 0.68,
"learning_rate": 9.364447303267889e-05,
"loss": 2.5624,
"step": 630
},
{
"epoch": 0.68,
"learning_rate": 9.360247040719039e-05,
"loss": 2.4739,
"step": 632
},
{
"epoch": 0.69,
"learning_rate": 9.356033892827796e-05,
"loss": 2.3922,
"step": 634
},
{
"epoch": 0.69,
"learning_rate": 9.3518078720448e-05,
"loss": 2.5975,
"step": 636
},
{
"epoch": 0.69,
"learning_rate": 9.347568990858726e-05,
"loss": 2.4533,
"step": 638
},
{
"epoch": 0.69,
"learning_rate": 9.343317261796262e-05,
"loss": 2.4955,
"step": 640
},
{
"epoch": 0.7,
"learning_rate": 9.339052697422057e-05,
"loss": 2.4098,
"step": 642
},
{
"epoch": 0.7,
"learning_rate": 9.334775310338694e-05,
"loss": 2.705,
"step": 644
},
{
"epoch": 0.7,
"learning_rate": 9.330485113186648e-05,
"loss": 2.4335,
"step": 646
},
{
"epoch": 0.7,
"learning_rate": 9.326182118644254e-05,
"loss": 2.6452,
"step": 648
},
{
"epoch": 0.7,
"learning_rate": 9.321866339427658e-05,
"loss": 2.4124,
"step": 650
},
{
"epoch": 0.71,
"learning_rate": 9.317537788290794e-05,
"loss": 2.3303,
"step": 652
},
{
"epoch": 0.71,
"learning_rate": 9.313196478025337e-05,
"loss": 2.4199,
"step": 654
},
{
"epoch": 0.71,
"learning_rate": 9.308842421460667e-05,
"loss": 2.4577,
"step": 656
},
{
"epoch": 0.71,
"learning_rate": 9.304475631463834e-05,
"loss": 2.5357,
"step": 658
},
{
"epoch": 0.71,
"learning_rate": 9.300096120939516e-05,
"loss": 2.418,
"step": 660
},
{
"epoch": 0.72,
"learning_rate": 9.29570390282998e-05,
"loss": 2.4678,
"step": 662
},
{
"epoch": 0.72,
"learning_rate": 9.29129899011505e-05,
"loss": 2.3826,
"step": 664
},
{
"epoch": 0.72,
"learning_rate": 9.286881395812066e-05,
"loss": 2.4426,
"step": 666
},
{
"epoch": 0.72,
"learning_rate": 9.28245113297584e-05,
"loss": 2.3528,
"step": 668
},
{
"epoch": 0.73,
"learning_rate": 9.278008214698624e-05,
"loss": 2.4743,
"step": 670
},
{
"epoch": 0.73,
"learning_rate": 9.27355265411007e-05,
"loss": 2.3887,
"step": 672
},
{
"epoch": 0.73,
"learning_rate": 9.26908446437719e-05,
"loss": 2.468,
"step": 674
},
{
"epoch": 0.73,
"learning_rate": 9.264603658704318e-05,
"loss": 2.352,
"step": 676
},
{
"epoch": 0.73,
"learning_rate": 9.260110250333066e-05,
"loss": 2.4429,
"step": 678
},
{
"epoch": 0.74,
"learning_rate": 9.255604252542296e-05,
"loss": 2.3712,
"step": 680
},
{
"epoch": 0.74,
"learning_rate": 9.251085678648072e-05,
"loss": 2.5003,
"step": 682
},
{
"epoch": 0.74,
"learning_rate": 9.246554542003618e-05,
"loss": 2.5098,
"step": 684
},
{
"epoch": 0.74,
"learning_rate": 9.24201085599929e-05,
"loss": 2.6184,
"step": 686
},
{
"epoch": 0.74,
"learning_rate": 9.237454634062525e-05,
"loss": 2.4172,
"step": 688
},
{
"epoch": 0.75,
"learning_rate": 9.23288588965781e-05,
"loss": 2.4234,
"step": 690
},
{
"epoch": 0.75,
"learning_rate": 9.228304636286633e-05,
"loss": 2.6617,
"step": 692
},
{
"epoch": 0.75,
"learning_rate": 9.223710887487453e-05,
"loss": 2.422,
"step": 694
},
{
"epoch": 0.75,
"learning_rate": 9.219104656835654e-05,
"loss": 2.6878,
"step": 696
},
{
"epoch": 0.76,
"learning_rate": 9.214485957943503e-05,
"loss": 2.6575,
"step": 698
},
{
"epoch": 0.76,
"learning_rate": 9.209854804460121e-05,
"loss": 2.369,
"step": 700
},
{
"epoch": 0.76,
"learning_rate": 9.205211210071426e-05,
"loss": 2.5432,
"step": 702
},
{
"epoch": 0.76,
"learning_rate": 9.200555188500103e-05,
"loss": 2.5313,
"step": 704
},
{
"epoch": 0.76,
"learning_rate": 9.195886753505565e-05,
"loss": 2.3887,
"step": 706
},
{
"epoch": 0.77,
"learning_rate": 9.191205918883909e-05,
"loss": 2.6655,
"step": 708
},
{
"epoch": 0.77,
"learning_rate": 9.18651269846787e-05,
"loss": 2.5833,
"step": 710
},
{
"epoch": 0.77,
"learning_rate": 9.181807106126792e-05,
"loss": 2.6638,
"step": 712
},
{
"epoch": 0.77,
"learning_rate": 9.177089155766574e-05,
"loss": 2.4395,
"step": 714
},
{
"epoch": 0.78,
"learning_rate": 9.172358861329641e-05,
"loss": 2.5247,
"step": 716
},
{
"epoch": 0.78,
"learning_rate": 9.167616236794894e-05,
"loss": 2.4711,
"step": 718
},
{
"epoch": 0.78,
"learning_rate": 9.162861296177671e-05,
"loss": 2.4537,
"step": 720
},
{
"epoch": 0.78,
"learning_rate": 9.158094053529709e-05,
"loss": 2.4404,
"step": 722
},
{
"epoch": 0.78,
"learning_rate": 9.153314522939096e-05,
"loss": 2.4599,
"step": 724
},
{
"epoch": 0.79,
"learning_rate": 9.148522718530236e-05,
"loss": 2.5289,
"step": 726
},
{
"epoch": 0.79,
"learning_rate": 9.143718654463804e-05,
"loss": 2.2966,
"step": 728
},
{
"epoch": 0.79,
"learning_rate": 9.138902344936706e-05,
"loss": 2.4635,
"step": 730
},
{
"epoch": 0.79,
"learning_rate": 9.134073804182033e-05,
"loss": 2.6182,
"step": 732
},
{
"epoch": 0.79,
"learning_rate": 9.129233046469022e-05,
"loss": 2.6568,
"step": 734
},
{
"epoch": 0.8,
"learning_rate": 9.124380086103013e-05,
"loss": 2.5841,
"step": 736
},
{
"epoch": 0.8,
"learning_rate": 9.11951493742541e-05,
"loss": 2.609,
"step": 738
},
{
"epoch": 0.8,
"learning_rate": 9.114637614813634e-05,
"loss": 2.3299,
"step": 740
},
{
"epoch": 0.8,
"learning_rate": 9.109748132681082e-05,
"loss": 2.5093,
"step": 742
},
{
"epoch": 0.81,
"learning_rate": 9.104846505477083e-05,
"loss": 2.4223,
"step": 744
},
{
"epoch": 0.81,
"learning_rate": 9.09993274768686e-05,
"loss": 2.4636,
"step": 746
},
{
"epoch": 0.81,
"learning_rate": 9.095006873831479e-05,
"loss": 2.3136,
"step": 748
},
{
"epoch": 0.81,
"learning_rate": 9.090068898467823e-05,
"loss": 2.557,
"step": 750
},
{
"epoch": 0.81,
"learning_rate": 9.085118836188521e-05,
"loss": 2.4634,
"step": 752
},
{
"epoch": 0.82,
"learning_rate": 9.080156701621936e-05,
"loss": 2.5238,
"step": 754
},
{
"epoch": 0.82,
"learning_rate": 9.075182509432095e-05,
"loss": 2.4833,
"step": 756
},
{
"epoch": 0.82,
"learning_rate": 9.070196274318666e-05,
"loss": 2.6603,
"step": 758
},
{
"epoch": 0.82,
"learning_rate": 9.0651980110169e-05,
"loss": 2.4763,
"step": 760
},
{
"epoch": 0.83,
"learning_rate": 9.060187734297599e-05,
"loss": 2.4662,
"step": 762
},
{
"epoch": 0.83,
"learning_rate": 9.055165458967063e-05,
"loss": 2.4409,
"step": 764
},
{
"epoch": 0.83,
"learning_rate": 9.050131199867052e-05,
"loss": 2.5474,
"step": 766
},
{
"epoch": 0.83,
"learning_rate": 9.045084971874738e-05,
"loss": 2.5071,
"step": 768
},
{
"epoch": 0.83,
"learning_rate": 9.040026789902665e-05,
"loss": 2.4774,
"step": 770
},
{
"epoch": 0.84,
"learning_rate": 9.034956668898706e-05,
"loss": 2.4119,
"step": 772
},
{
"epoch": 0.84,
"learning_rate": 9.029874623846011e-05,
"loss": 2.4335,
"step": 774
},
{
"epoch": 0.84,
"learning_rate": 9.02478066976297e-05,
"loss": 2.3666,
"step": 776
},
{
"epoch": 0.84,
"learning_rate": 9.019674821703166e-05,
"loss": 2.5817,
"step": 778
},
{
"epoch": 0.84,
"learning_rate": 9.014557094755331e-05,
"loss": 2.2798,
"step": 780
},
{
"epoch": 0.85,
"learning_rate": 9.009427504043305e-05,
"loss": 2.5065,
"step": 782
},
{
"epoch": 0.85,
"learning_rate": 9.004286064725982e-05,
"loss": 2.4121,
"step": 784
},
{
"epoch": 0.85,
"learning_rate": 8.999132791997271e-05,
"loss": 2.5618,
"step": 786
},
{
"epoch": 0.85,
"learning_rate": 8.993967701086057e-05,
"loss": 2.7772,
"step": 788
},
{
"epoch": 0.86,
"learning_rate": 8.988790807256143e-05,
"loss": 2.3717,
"step": 790
},
{
"epoch": 0.86,
"learning_rate": 8.983602125806216e-05,
"loss": 2.5273,
"step": 792
},
{
"epoch": 0.86,
"learning_rate": 8.978401672069797e-05,
"loss": 2.5246,
"step": 794
},
{
"epoch": 0.86,
"learning_rate": 8.973189461415194e-05,
"loss": 2.5115,
"step": 796
},
{
"epoch": 0.86,
"learning_rate": 8.967965509245461e-05,
"loss": 2.583,
"step": 798
},
{
"epoch": 0.87,
"learning_rate": 8.962729830998353e-05,
"loss": 2.4989,
"step": 800
},
{
"epoch": 0.87,
"learning_rate": 8.957482442146272e-05,
"loss": 2.4011,
"step": 802
},
{
"epoch": 0.87,
"learning_rate": 8.952223358196227e-05,
"loss": 2.424,
"step": 804
},
{
"epoch": 0.87,
"learning_rate": 8.946952594689797e-05,
"loss": 2.5144,
"step": 806
},
{
"epoch": 0.87,
"learning_rate": 8.941670167203067e-05,
"loss": 2.4956,
"step": 808
},
{
"epoch": 0.88,
"learning_rate": 8.936376091346595e-05,
"loss": 2.5917,
"step": 810
},
{
"epoch": 0.88,
"learning_rate": 8.931070382765359e-05,
"loss": 2.3386,
"step": 812
},
{
"epoch": 0.88,
"learning_rate": 8.925753057138719e-05,
"loss": 2.4911,
"step": 814
},
{
"epoch": 0.88,
"learning_rate": 8.920424130180363e-05,
"loss": 2.5727,
"step": 816
},
{
"epoch": 0.89,
"learning_rate": 8.915083617638262e-05,
"loss": 2.4148,
"step": 818
},
{
"epoch": 0.89,
"learning_rate": 8.909731535294628e-05,
"loss": 2.4859,
"step": 820
},
{
"epoch": 0.89,
"learning_rate": 8.904367898965857e-05,
"loss": 2.4704,
"step": 822
},
{
"epoch": 0.89,
"learning_rate": 8.898992724502498e-05,
"loss": 2.5904,
"step": 824
},
{
"epoch": 0.89,
"learning_rate": 8.893606027789192e-05,
"loss": 2.5586,
"step": 826
},
{
"epoch": 0.9,
"learning_rate": 8.888207824744629e-05,
"loss": 2.4723,
"step": 828
},
{
"epoch": 0.9,
"learning_rate": 8.882798131321508e-05,
"loss": 2.5011,
"step": 830
},
{
"epoch": 0.9,
"learning_rate": 8.877376963506477e-05,
"loss": 2.6237,
"step": 832
},
{
"epoch": 0.9,
"learning_rate": 8.871944337320102e-05,
"loss": 2.548,
"step": 834
},
{
"epoch": 0.91,
"learning_rate": 8.866500268816803e-05,
"loss": 2.3527,
"step": 836
},
{
"epoch": 0.91,
"learning_rate": 8.861044774084815e-05,
"loss": 2.6638,
"step": 838
},
{
"epoch": 0.91,
"learning_rate": 8.855577869246142e-05,
"loss": 2.4873,
"step": 840
},
{
"epoch": 0.91,
"learning_rate": 8.850099570456509e-05,
"loss": 2.4461,
"step": 842
},
{
"epoch": 0.91,
"learning_rate": 8.844609893905309e-05,
"loss": 2.4031,
"step": 844
},
{
"epoch": 0.92,
"learning_rate": 8.839108855815557e-05,
"loss": 2.5516,
"step": 846
},
{
"epoch": 0.92,
"learning_rate": 8.833596472443848e-05,
"loss": 2.4283,
"step": 848
},
{
"epoch": 0.92,
"learning_rate": 8.828072760080299e-05,
"loss": 2.2932,
"step": 850
},
{
"epoch": 0.92,
"learning_rate": 8.822537735048512e-05,
"loss": 2.3761,
"step": 852
},
{
"epoch": 0.92,
"learning_rate": 8.816991413705516e-05,
"loss": 2.4804,
"step": 854
},
{
"epoch": 0.93,
"learning_rate": 8.811433812441722e-05,
"loss": 2.6496,
"step": 856
},
{
"epoch": 0.93,
"learning_rate": 8.80586494768088e-05,
"loss": 2.4868,
"step": 858
},
{
"epoch": 0.93,
"learning_rate": 8.800284835880024e-05,
"loss": 2.679,
"step": 860
},
{
"epoch": 0.93,
"learning_rate": 8.79469349352942e-05,
"loss": 2.6229,
"step": 862
},
{
"epoch": 0.94,
"learning_rate": 8.78909093715253e-05,
"loss": 2.5438,
"step": 864
},
{
"epoch": 0.94,
"learning_rate": 8.783477183305949e-05,
"loss": 2.4863,
"step": 866
},
{
"epoch": 0.94,
"learning_rate": 8.777852248579367e-05,
"loss": 2.5205,
"step": 868
},
{
"epoch": 0.94,
"learning_rate": 8.772216149595513e-05,
"loss": 2.4179,
"step": 870
},
{
"epoch": 0.94,
"learning_rate": 8.766568903010113e-05,
"loss": 2.4653,
"step": 872
},
{
"epoch": 0.95,
"learning_rate": 8.76091052551183e-05,
"loss": 2.3727,
"step": 874
},
{
"epoch": 0.95,
"learning_rate": 8.755241033822224e-05,
"loss": 2.5503,
"step": 876
},
{
"epoch": 0.95,
"learning_rate": 8.7495604446957e-05,
"loss": 2.4288,
"step": 878
},
{
"epoch": 0.95,
"learning_rate": 8.743868774919458e-05,
"loss": 2.451,
"step": 880
},
{
"epoch": 0.96,
"learning_rate": 8.738166041313439e-05,
"loss": 2.3869,
"step": 882
},
{
"epoch": 0.96,
"learning_rate": 8.732452260730286e-05,
"loss": 2.5419,
"step": 884
},
{
"epoch": 0.96,
"learning_rate": 8.726727450055287e-05,
"loss": 2.3962,
"step": 886
},
{
"epoch": 0.96,
"learning_rate": 8.720991626206321e-05,
"loss": 2.4672,
"step": 888
},
{
"epoch": 0.96,
"learning_rate": 8.715244806133816e-05,
"loss": 2.4988,
"step": 890
},
{
"epoch": 0.97,
"learning_rate": 8.7094870068207e-05,
"loss": 2.2557,
"step": 892
},
{
"epoch": 0.97,
"learning_rate": 8.703718245282337e-05,
"loss": 2.5007,
"step": 894
},
{
"epoch": 0.97,
"learning_rate": 8.697938538566499e-05,
"loss": 2.4908,
"step": 896
},
{
"epoch": 0.97,
"learning_rate": 8.69214790375329e-05,
"loss": 2.4346,
"step": 898
},
{
"epoch": 0.97,
"learning_rate": 8.686346357955117e-05,
"loss": 2.2897,
"step": 900
},
{
"epoch": 0.98,
"learning_rate": 8.68053391831663e-05,
"loss": 2.3337,
"step": 902
},
{
"epoch": 0.98,
"learning_rate": 8.674710602014671e-05,
"loss": 2.4618,
"step": 904
},
{
"epoch": 0.98,
"learning_rate": 8.668876426258221e-05,
"loss": 2.5041,
"step": 906
},
{
"epoch": 0.98,
"learning_rate": 8.66303140828836e-05,
"loss": 2.4207,
"step": 908
},
{
"epoch": 0.99,
"learning_rate": 8.657175565378206e-05,
"loss": 2.4657,
"step": 910
},
{
"epoch": 0.99,
"learning_rate": 8.651308914832862e-05,
"loss": 2.5422,
"step": 912
},
{
"epoch": 0.99,
"learning_rate": 8.645431473989376e-05,
"loss": 2.4069,
"step": 914
},
{
"epoch": 0.99,
"learning_rate": 8.63954326021668e-05,
"loss": 2.6141,
"step": 916
},
{
"epoch": 0.99,
"learning_rate": 8.633644290915545e-05,
"loss": 2.7452,
"step": 918
},
{
"epoch": 1.0,
"learning_rate": 8.627734583518521e-05,
"loss": 2.4625,
"step": 920
},
{
"epoch": 1.0,
"learning_rate": 8.621814155489895e-05,
"loss": 2.3913,
"step": 922
},
{
"epoch": 1.0,
"learning_rate": 8.615883024325636e-05,
"loss": 3.1472,
"step": 924
},
{
"epoch": 1.0,
"learning_rate": 8.609941207553342e-05,
"loss": 2.4791,
"step": 926
},
{
"epoch": 1.01,
"learning_rate": 8.603988722732186e-05,
"loss": 2.4555,
"step": 928
},
{
"epoch": 1.01,
"learning_rate": 8.598025587452873e-05,
"loss": 2.5092,
"step": 930
},
{
"epoch": 1.01,
"learning_rate": 8.592051819337579e-05,
"loss": 2.5088,
"step": 932
},
{
"epoch": 1.01,
"learning_rate": 8.586067436039899e-05,
"loss": 2.5663,
"step": 934
},
{
"epoch": 1.01,
"learning_rate": 8.580072455244801e-05,
"loss": 2.5562,
"step": 936
},
{
"epoch": 1.02,
"learning_rate": 8.574066894668573e-05,
"loss": 2.4265,
"step": 938
},
{
"epoch": 1.02,
"learning_rate": 8.568050772058762e-05,
"loss": 2.473,
"step": 940
},
{
"epoch": 1.02,
"learning_rate": 8.562024105194133e-05,
"loss": 2.5223,
"step": 942
},
{
"epoch": 1.02,
"learning_rate": 8.555986911884609e-05,
"loss": 2.3263,
"step": 944
},
{
"epoch": 1.02,
"learning_rate": 8.549939209971221e-05,
"loss": 2.2938,
"step": 946
},
{
"epoch": 1.03,
"learning_rate": 8.543881017326057e-05,
"loss": 2.321,
"step": 948
},
{
"epoch": 1.03,
"learning_rate": 8.537812351852201e-05,
"loss": 2.4323,
"step": 950
},
{
"epoch": 1.03,
"learning_rate": 8.531733231483694e-05,
"loss": 2.365,
"step": 952
},
{
"epoch": 1.03,
"learning_rate": 8.525643674185466e-05,
"loss": 2.4085,
"step": 954
},
{
"epoch": 1.04,
"learning_rate": 8.519543697953296e-05,
"loss": 2.4288,
"step": 956
},
{
"epoch": 1.04,
"learning_rate": 8.51343332081375e-05,
"loss": 2.6551,
"step": 958
},
{
"epoch": 1.04,
"learning_rate": 8.50731256082413e-05,
"loss": 2.4887,
"step": 960
},
{
"epoch": 1.04,
"learning_rate": 8.501181436072422e-05,
"loss": 2.6168,
"step": 962
},
{
"epoch": 1.04,
"learning_rate": 8.495039964677241e-05,
"loss": 2.4247,
"step": 964
},
{
"epoch": 1.05,
"learning_rate": 8.488888164787782e-05,
"loss": 2.5132,
"step": 966
},
{
"epoch": 1.05,
"learning_rate": 8.482726054583761e-05,
"loss": 2.5011,
"step": 968
},
{
"epoch": 1.05,
"learning_rate": 8.476553652275356e-05,
"loss": 2.4964,
"step": 970
},
{
"epoch": 1.05,
"learning_rate": 8.47037097610317e-05,
"loss": 2.3202,
"step": 972
},
{
"epoch": 1.06,
"learning_rate": 8.464178044338162e-05,
"loss": 2.2058,
"step": 974
},
{
"epoch": 1.06,
"learning_rate": 8.4579748752816e-05,
"loss": 2.585,
"step": 976
},
{
"epoch": 1.06,
"learning_rate": 8.451761487265003e-05,
"loss": 2.3743,
"step": 978
},
{
"epoch": 1.06,
"learning_rate": 8.44553789865009e-05,
"loss": 2.2927,
"step": 980
},
{
"epoch": 1.06,
"learning_rate": 8.439304127828728e-05,
"loss": 2.3899,
"step": 982
},
{
"epoch": 1.07,
"learning_rate": 8.433060193222868e-05,
"loss": 2.462,
"step": 984
},
{
"epoch": 1.07,
"learning_rate": 8.426806113284502e-05,
"loss": 2.4369,
"step": 986
},
{
"epoch": 1.07,
"learning_rate": 8.420541906495599e-05,
"loss": 2.4967,
"step": 988
},
{
"epoch": 1.07,
"learning_rate": 8.414267591368058e-05,
"loss": 2.6217,
"step": 990
},
{
"epoch": 1.07,
"learning_rate": 8.407983186443653e-05,
"loss": 2.6545,
"step": 992
},
{
"epoch": 1.08,
"learning_rate": 8.401688710293967e-05,
"loss": 2.4993,
"step": 994
},
{
"epoch": 1.08,
"learning_rate": 8.395384181520351e-05,
"loss": 2.3227,
"step": 996
},
{
"epoch": 1.08,
"learning_rate": 8.389069618753865e-05,
"loss": 2.325,
"step": 998
},
{
"epoch": 1.08,
"learning_rate": 8.382745040655212e-05,
"loss": 2.6491,
"step": 1000
},
{
"epoch": 1.09,
"learning_rate": 8.376410465914705e-05,
"loss": 2.4874,
"step": 1002
},
{
"epoch": 1.09,
"learning_rate": 8.370065913252188e-05,
"loss": 2.505,
"step": 1004
},
{
"epoch": 1.09,
"learning_rate": 8.363711401417e-05,
"loss": 2.4867,
"step": 1006
},
{
"epoch": 1.09,
"learning_rate": 8.357346949187906e-05,
"loss": 2.2378,
"step": 1008
},
{
"epoch": 1.09,
"learning_rate": 8.350972575373047e-05,
"loss": 2.372,
"step": 1010
},
{
"epoch": 1.1,
"learning_rate": 8.344588298809887e-05,
"loss": 2.3432,
"step": 1012
},
{
"epoch": 1.1,
"learning_rate": 8.338194138365151e-05,
"loss": 2.6878,
"step": 1014
},
{
"epoch": 1.1,
"learning_rate": 8.331790112934777e-05,
"loss": 2.4083,
"step": 1016
},
{
"epoch": 1.1,
"learning_rate": 8.325376241443849e-05,
"loss": 2.4451,
"step": 1018
},
{
"epoch": 1.11,
"learning_rate": 8.318952542846557e-05,
"loss": 2.3759,
"step": 1020
},
{
"epoch": 1.11,
"learning_rate": 8.312519036126125e-05,
"loss": 2.5355,
"step": 1022
},
{
"epoch": 1.11,
"learning_rate": 8.306075740294763e-05,
"loss": 2.4161,
"step": 1024
},
{
"epoch": 1.11,
"learning_rate": 8.299622674393614e-05,
"loss": 2.3455,
"step": 1026
},
{
"epoch": 1.11,
"learning_rate": 8.293159857492686e-05,
"loss": 2.469,
"step": 1028
},
{
"epoch": 1.12,
"learning_rate": 8.28668730869081e-05,
"loss": 2.3113,
"step": 1030
},
{
"epoch": 1.12,
"learning_rate": 8.280205047115572e-05,
"loss": 2.4072,
"step": 1032
},
{
"epoch": 1.12,
"learning_rate": 8.273713091923264e-05,
"loss": 2.5218,
"step": 1034
},
{
"epoch": 1.12,
"learning_rate": 8.267211462298822e-05,
"loss": 2.374,
"step": 1036
},
{
"epoch": 1.12,
"learning_rate": 8.260700177455773e-05,
"loss": 2.452,
"step": 1038
},
{
"epoch": 1.13,
"learning_rate": 8.254179256636179e-05,
"loss": 2.4523,
"step": 1040
},
{
"epoch": 1.13,
"learning_rate": 8.247648719110572e-05,
"loss": 2.5231,
"step": 1042
},
{
"epoch": 1.13,
"learning_rate": 8.241108584177911e-05,
"loss": 2.5678,
"step": 1044
},
{
"epoch": 1.13,
"learning_rate": 8.234558871165512e-05,
"loss": 2.449,
"step": 1046
},
{
"epoch": 1.14,
"learning_rate": 8.227999599428995e-05,
"loss": 2.3786,
"step": 1048
},
{
"epoch": 1.14,
"learning_rate": 8.221430788352233e-05,
"loss": 2.3994,
"step": 1050
},
{
"epoch": 1.14,
"learning_rate": 8.214852457347286e-05,
"loss": 2.5034,
"step": 1052
},
{
"epoch": 1.14,
"learning_rate": 8.208264625854347e-05,
"loss": 2.2819,
"step": 1054
},
{
"epoch": 1.14,
"learning_rate": 8.201667313341685e-05,
"loss": 2.4361,
"step": 1056
},
{
"epoch": 1.15,
"learning_rate": 8.19506053930559e-05,
"loss": 2.3855,
"step": 1058
},
{
"epoch": 1.15,
"learning_rate": 8.18844432327031e-05,
"loss": 2.4898,
"step": 1060
},
{
"epoch": 1.15,
"learning_rate": 8.181818684787992e-05,
"loss": 2.5017,
"step": 1062
},
{
"epoch": 1.15,
"learning_rate": 8.175183643438635e-05,
"loss": 2.402,
"step": 1064
},
{
"epoch": 1.15,
"learning_rate": 8.168539218830024e-05,
"loss": 2.3225,
"step": 1066
},
{
"epoch": 1.16,
"learning_rate": 8.16188543059767e-05,
"loss": 2.3171,
"step": 1068
},
{
"epoch": 1.16,
"learning_rate": 8.155222298404756e-05,
"loss": 2.5654,
"step": 1070
},
{
"epoch": 1.16,
"learning_rate": 8.148549841942082e-05,
"loss": 2.3448,
"step": 1072
},
{
"epoch": 1.16,
"learning_rate": 8.141868080927996e-05,
"loss": 2.2422,
"step": 1074
},
{
"epoch": 1.17,
"learning_rate": 8.135177035108352e-05,
"loss": 2.6608,
"step": 1076
},
{
"epoch": 1.17,
"learning_rate": 8.128476724256431e-05,
"loss": 2.486,
"step": 1078
},
{
"epoch": 1.17,
"learning_rate": 8.121767168172904e-05,
"loss": 2.3347,
"step": 1080
},
{
"epoch": 1.17,
"learning_rate": 8.115048386685757e-05,
"loss": 2.4229,
"step": 1082
},
{
"epoch": 1.17,
"learning_rate": 8.108320399650244e-05,
"loss": 2.6345,
"step": 1084
},
{
"epoch": 1.18,
"learning_rate": 8.101583226948819e-05,
"loss": 2.5783,
"step": 1086
},
{
"epoch": 1.18,
"learning_rate": 8.09483688849108e-05,
"loss": 2.4985,
"step": 1088
},
{
"epoch": 1.18,
"learning_rate": 8.088081404213718e-05,
"loss": 2.3184,
"step": 1090
},
{
"epoch": 1.18,
"learning_rate": 8.081316794080445e-05,
"loss": 2.324,
"step": 1092
},
{
"epoch": 1.19,
"learning_rate": 8.074543078081946e-05,
"loss": 2.504,
"step": 1094
},
{
"epoch": 1.19,
"learning_rate": 8.067760276235812e-05,
"loss": 2.3798,
"step": 1096
},
{
"epoch": 1.19,
"learning_rate": 8.060968408586489e-05,
"loss": 2.4197,
"step": 1098
},
{
"epoch": 1.19,
"learning_rate": 8.054167495205207e-05,
"loss": 2.4555,
"step": 1100
},
{
"epoch": 1.19,
"learning_rate": 8.047357556189936e-05,
"loss": 2.6626,
"step": 1102
},
{
"epoch": 1.2,
"learning_rate": 8.040538611665314e-05,
"loss": 2.5664,
"step": 1104
},
{
"epoch": 1.2,
"learning_rate": 8.033710681782592e-05,
"loss": 2.4436,
"step": 1106
},
{
"epoch": 1.2,
"learning_rate": 8.026873786719573e-05,
"loss": 2.5044,
"step": 1108
},
{
"epoch": 1.2,
"learning_rate": 8.02002794668056e-05,
"loss": 2.488,
"step": 1110
},
{
"epoch": 1.2,
"learning_rate": 8.013173181896283e-05,
"loss": 2.4565,
"step": 1112
},
{
"epoch": 1.21,
"learning_rate": 8.006309512623848e-05,
"loss": 2.5484,
"step": 1114
},
{
"epoch": 1.21,
"learning_rate": 7.99943695914668e-05,
"loss": 2.4438,
"step": 1116
},
{
"epoch": 1.21,
"learning_rate": 7.992555541774452e-05,
"loss": 2.4668,
"step": 1118
},
{
"epoch": 1.21,
"learning_rate": 7.985665280843035e-05,
"loss": 2.5129,
"step": 1120
},
{
"epoch": 1.22,
"learning_rate": 7.978766196714436e-05,
"loss": 2.2599,
"step": 1122
},
{
"epoch": 1.22,
"learning_rate": 7.97185830977673e-05,
"loss": 2.4388,
"step": 1124
},
{
"epoch": 1.22,
"learning_rate": 7.964941640444014e-05,
"loss": 2.5566,
"step": 1126
},
{
"epoch": 1.22,
"learning_rate": 7.958016209156331e-05,
"loss": 2.3852,
"step": 1128
},
{
"epoch": 1.22,
"learning_rate": 7.951082036379625e-05,
"loss": 2.3447,
"step": 1130
},
{
"epoch": 1.23,
"learning_rate": 7.944139142605665e-05,
"loss": 2.471,
"step": 1132
},
{
"epoch": 1.23,
"learning_rate": 7.937187548351996e-05,
"loss": 2.4846,
"step": 1134
},
{
"epoch": 1.23,
"learning_rate": 7.930227274161877e-05,
"loss": 2.433,
"step": 1136
},
{
"epoch": 1.23,
"learning_rate": 7.923258340604212e-05,
"loss": 2.7046,
"step": 1138
},
{
"epoch": 1.23,
"learning_rate": 7.916280768273498e-05,
"loss": 2.2928,
"step": 1140
},
{
"epoch": 1.24,
"learning_rate": 7.909294577789766e-05,
"loss": 2.5962,
"step": 1142
},
{
"epoch": 1.24,
"learning_rate": 7.902299789798505e-05,
"loss": 2.4707,
"step": 1144
},
{
"epoch": 1.24,
"learning_rate": 7.895296424970618e-05,
"loss": 2.4212,
"step": 1146
},
{
"epoch": 1.24,
"learning_rate": 7.888284504002352e-05,
"loss": 2.5168,
"step": 1148
},
{
"epoch": 1.25,
"learning_rate": 7.881264047615245e-05,
"loss": 2.5038,
"step": 1150
},
{
"epoch": 1.25,
"learning_rate": 7.874235076556046e-05,
"loss": 2.2647,
"step": 1152
},
{
"epoch": 1.25,
"learning_rate": 7.867197611596683e-05,
"loss": 2.5225,
"step": 1154
},
{
"epoch": 1.25,
"learning_rate": 7.860151673534168e-05,
"loss": 2.3552,
"step": 1156
},
{
"epoch": 1.25,
"learning_rate": 7.853097283190567e-05,
"loss": 2.5299,
"step": 1158
},
{
"epoch": 1.26,
"learning_rate": 7.846034461412912e-05,
"loss": 2.476,
"step": 1160
},
{
"epoch": 1.26,
"learning_rate": 7.838963229073162e-05,
"loss": 2.3523,
"step": 1162
},
{
"epoch": 1.26,
"learning_rate": 7.831883607068125e-05,
"loss": 2.4746,
"step": 1164
},
{
"epoch": 1.26,
"learning_rate": 7.824795616319402e-05,
"loss": 2.4551,
"step": 1166
},
{
"epoch": 1.27,
"learning_rate": 7.817699277773325e-05,
"loss": 2.4863,
"step": 1168
},
{
"epoch": 1.27,
"learning_rate": 7.810594612400898e-05,
"loss": 2.5789,
"step": 1170
},
{
"epoch": 1.27,
"learning_rate": 7.803481641197733e-05,
"loss": 2.487,
"step": 1172
},
{
"epoch": 1.27,
"learning_rate": 7.796360385183984e-05,
"loss": 2.5997,
"step": 1174
},
{
"epoch": 1.27,
"learning_rate": 7.789230865404287e-05,
"loss": 2.3587,
"step": 1176
},
{
"epoch": 1.28,
"learning_rate": 7.782093102927703e-05,
"loss": 2.7109,
"step": 1178
},
{
"epoch": 1.28,
"learning_rate": 7.77494711884765e-05,
"loss": 2.5783,
"step": 1180
},
{
"epoch": 1.28,
"learning_rate": 7.767792934281843e-05,
"loss": 2.4947,
"step": 1182
},
{
"epoch": 1.28,
"learning_rate": 7.76063057037223e-05,
"loss": 2.3812,
"step": 1184
},
{
"epoch": 1.28,
"learning_rate": 7.753460048284928e-05,
"loss": 2.3337,
"step": 1186
},
{
"epoch": 1.29,
"learning_rate": 7.74628138921017e-05,
"loss": 2.5691,
"step": 1188
},
{
"epoch": 1.29,
"learning_rate": 7.739094614362229e-05,
"loss": 2.4811,
"step": 1190
},
{
"epoch": 1.29,
"learning_rate": 7.731899744979364e-05,
"loss": 2.618,
"step": 1192
},
{
"epoch": 1.29,
"learning_rate": 7.724696802323755e-05,
"loss": 2.2892,
"step": 1194
},
{
"epoch": 1.3,
"learning_rate": 7.717485807681437e-05,
"loss": 2.3032,
"step": 1196
},
{
"epoch": 1.3,
"learning_rate": 7.710266782362247e-05,
"loss": 2.4592,
"step": 1198
},
{
"epoch": 1.3,
"learning_rate": 7.703039747699747e-05,
"loss": 2.3496,
"step": 1200
},
{
"epoch": 1.3,
"learning_rate": 7.695804725051172e-05,
"loss": 2.423,
"step": 1202
},
{
"epoch": 1.3,
"learning_rate": 7.68856173579736e-05,
"loss": 2.4122,
"step": 1204
},
{
"epoch": 1.31,
"learning_rate": 7.681310801342696e-05,
"loss": 2.3985,
"step": 1206
},
{
"epoch": 1.31,
"learning_rate": 7.674051943115042e-05,
"loss": 2.2799,
"step": 1208
},
{
"epoch": 1.31,
"learning_rate": 7.666785182565677e-05,
"loss": 2.3947,
"step": 1210
},
{
"epoch": 1.31,
"learning_rate": 7.65951054116923e-05,
"loss": 2.3299,
"step": 1212
},
{
"epoch": 1.32,
"learning_rate": 7.652228040423622e-05,
"loss": 2.274,
"step": 1214
},
{
"epoch": 1.32,
"learning_rate": 7.644937701850002e-05,
"loss": 2.3697,
"step": 1216
},
{
"epoch": 1.32,
"learning_rate": 7.637639546992677e-05,
"loss": 2.3167,
"step": 1218
},
{
"epoch": 1.32,
"learning_rate": 7.630333597419054e-05,
"loss": 2.4688,
"step": 1220
},
{
"epoch": 1.32,
"learning_rate": 7.623019874719579e-05,
"loss": 2.2979,
"step": 1222
},
{
"epoch": 1.33,
"learning_rate": 7.61569840050766e-05,
"loss": 2.4614,
"step": 1224
},
{
"epoch": 1.33,
"learning_rate": 7.60836919641962e-05,
"loss": 2.5093,
"step": 1226
},
{
"epoch": 1.33,
"learning_rate": 7.60103228411462e-05,
"loss": 2.4832,
"step": 1228
},
{
"epoch": 1.33,
"learning_rate": 7.593687685274609e-05,
"loss": 2.4112,
"step": 1230
},
{
"epoch": 1.33,
"learning_rate": 7.586335421604238e-05,
"loss": 2.3033,
"step": 1232
},
{
"epoch": 1.34,
"learning_rate": 7.578975514830821e-05,
"loss": 2.6554,
"step": 1234
},
{
"epoch": 1.34,
"learning_rate": 7.571607986704252e-05,
"loss": 2.3495,
"step": 1236
},
{
"epoch": 1.34,
"learning_rate": 7.564232858996949e-05,
"loss": 2.4517,
"step": 1238
},
{
"epoch": 1.34,
"learning_rate": 7.556850153503787e-05,
"loss": 2.4985,
"step": 1240
},
{
"epoch": 1.35,
"learning_rate": 7.549459892042041e-05,
"loss": 2.5046,
"step": 1242
},
{
"epoch": 1.35,
"learning_rate": 7.542062096451305e-05,
"loss": 2.5004,
"step": 1244
},
{
"epoch": 1.35,
"learning_rate": 7.534656788593446e-05,
"loss": 2.3215,
"step": 1246
},
{
"epoch": 1.35,
"learning_rate": 7.527243990352529e-05,
"loss": 2.5481,
"step": 1248
},
{
"epoch": 1.35,
"learning_rate": 7.519823723634753e-05,
"loss": 2.3608,
"step": 1250
},
{
"epoch": 1.36,
"learning_rate": 7.51239601036839e-05,
"loss": 2.2113,
"step": 1252
},
{
"epoch": 1.36,
"learning_rate": 7.504960872503715e-05,
"loss": 2.6318,
"step": 1254
},
{
"epoch": 1.36,
"learning_rate": 7.497518332012946e-05,
"loss": 2.3967,
"step": 1256
},
{
"epoch": 1.36,
"learning_rate": 7.490068410890175e-05,
"loss": 2.1024,
"step": 1258
},
{
"epoch": 1.36,
"learning_rate": 7.48261113115131e-05,
"loss": 2.5322,
"step": 1260
},
{
"epoch": 1.37,
"learning_rate": 7.475146514834001e-05,
"loss": 2.3737,
"step": 1262
},
{
"epoch": 1.37,
"learning_rate": 7.46767458399758e-05,
"loss": 2.4803,
"step": 1264
},
{
"epoch": 1.37,
"learning_rate": 7.460195360722995e-05,
"loss": 2.1737,
"step": 1266
},
{
"epoch": 1.37,
"learning_rate": 7.452708867112745e-05,
"loss": 2.5601,
"step": 1268
},
{
"epoch": 1.38,
"learning_rate": 7.44521512529081e-05,
"loss": 2.5452,
"step": 1270
},
{
"epoch": 1.38,
"learning_rate": 7.437714157402598e-05,
"loss": 2.3953,
"step": 1272
},
{
"epoch": 1.38,
"learning_rate": 7.430205985614864e-05,
"loss": 2.4914,
"step": 1274
},
{
"epoch": 1.38,
"learning_rate": 7.422690632115654e-05,
"loss": 2.3997,
"step": 1276
},
{
"epoch": 1.38,
"learning_rate": 7.41516811911424e-05,
"loss": 2.2561,
"step": 1278
},
{
"epoch": 1.39,
"learning_rate": 7.407638468841047e-05,
"loss": 2.6531,
"step": 1280
},
{
"epoch": 1.39,
"learning_rate": 7.400101703547597e-05,
"loss": 2.6299,
"step": 1282
},
{
"epoch": 1.39,
"learning_rate": 7.392557845506432e-05,
"loss": 2.4573,
"step": 1284
},
{
"epoch": 1.39,
"learning_rate": 7.385006917011063e-05,
"loss": 2.5633,
"step": 1286
},
{
"epoch": 1.4,
"learning_rate": 7.377448940375887e-05,
"loss": 2.6371,
"step": 1288
},
{
"epoch": 1.4,
"learning_rate": 7.369883937936136e-05,
"loss": 2.2814,
"step": 1290
},
{
"epoch": 1.4,
"learning_rate": 7.362311932047797e-05,
"loss": 2.6985,
"step": 1292
},
{
"epoch": 1.4,
"learning_rate": 7.354732945087563e-05,
"loss": 2.3274,
"step": 1294
},
{
"epoch": 1.4,
"learning_rate": 7.34714699945275e-05,
"loss": 2.2417,
"step": 1296
},
{
"epoch": 1.41,
"learning_rate": 7.33955411756124e-05,
"loss": 2.4285,
"step": 1298
},
{
"epoch": 1.41,
"learning_rate": 7.331954321851418e-05,
"loss": 2.4677,
"step": 1300
},
{
"epoch": 1.41,
"learning_rate": 7.32434763478209e-05,
"loss": 2.4342,
"step": 1302
},
{
"epoch": 1.41,
"learning_rate": 7.316734078832438e-05,
"loss": 2.3903,
"step": 1304
},
{
"epoch": 1.41,
"learning_rate": 7.309113676501939e-05,
"loss": 2.4379,
"step": 1306
},
{
"epoch": 1.42,
"learning_rate": 7.301486450310298e-05,
"loss": 2.4929,
"step": 1308
},
{
"epoch": 1.42,
"learning_rate": 7.293852422797391e-05,
"loss": 2.4626,
"step": 1310
},
{
"epoch": 1.42,
"learning_rate": 7.286211616523193e-05,
"loss": 2.5199,
"step": 1312
},
{
"epoch": 1.42,
"learning_rate": 7.278564054067709e-05,
"loss": 2.3659,
"step": 1314
},
{
"epoch": 1.43,
"learning_rate": 7.270909758030912e-05,
"loss": 2.4869,
"step": 1316
},
{
"epoch": 1.43,
"learning_rate": 7.263248751032671e-05,
"loss": 2.5166,
"step": 1318
},
{
"epoch": 1.43,
"learning_rate": 7.255581055712688e-05,
"loss": 2.139,
"step": 1320
},
{
"epoch": 1.43,
"learning_rate": 7.247906694730437e-05,
"loss": 2.4807,
"step": 1322
},
{
"epoch": 1.43,
"learning_rate": 7.24022569076508e-05,
"loss": 2.4607,
"step": 1324
},
{
"epoch": 1.44,
"learning_rate": 7.232538066515414e-05,
"loss": 2.3367,
"step": 1326
},
{
"epoch": 1.44,
"learning_rate": 7.224843844699803e-05,
"loss": 2.6005,
"step": 1328
},
{
"epoch": 1.44,
"learning_rate": 7.217143048056108e-05,
"loss": 2.3467,
"step": 1330
},
{
"epoch": 1.44,
"learning_rate": 7.209435699341613e-05,
"loss": 2.4132,
"step": 1332
},
{
"epoch": 1.45,
"learning_rate": 7.201721821332973e-05,
"loss": 2.3049,
"step": 1334
},
{
"epoch": 1.45,
"learning_rate": 7.194001436826135e-05,
"loss": 2.3176,
"step": 1336
},
{
"epoch": 1.45,
"learning_rate": 7.18627456863627e-05,
"loss": 2.5401,
"step": 1338
},
{
"epoch": 1.45,
"learning_rate": 7.178541239597717e-05,
"loss": 2.4131,
"step": 1340
},
{
"epoch": 1.45,
"learning_rate": 7.170801472563903e-05,
"loss": 2.4554,
"step": 1342
},
{
"epoch": 1.46,
"learning_rate": 7.163055290407282e-05,
"loss": 2.405,
"step": 1344
},
{
"epoch": 1.46,
"learning_rate": 7.155302716019263e-05,
"loss": 2.4435,
"step": 1346
},
{
"epoch": 1.46,
"learning_rate": 7.14754377231015e-05,
"loss": 2.4068,
"step": 1348
},
{
"epoch": 1.46,
"learning_rate": 7.139778482209068e-05,
"loss": 2.4863,
"step": 1350
},
{
"epoch": 1.46,
"learning_rate": 7.132006868663894e-05,
"loss": 2.3856,
"step": 1352
},
{
"epoch": 1.47,
"learning_rate": 7.124228954641196e-05,
"loss": 2.3076,
"step": 1354
},
{
"epoch": 1.47,
"learning_rate": 7.116444763126158e-05,
"loss": 2.334,
"step": 1356
},
{
"epoch": 1.47,
"learning_rate": 7.108654317122515e-05,
"loss": 2.3639,
"step": 1358
},
{
"epoch": 1.47,
"learning_rate": 7.100857639652489e-05,
"loss": 2.7099,
"step": 1360
},
{
"epoch": 1.48,
"learning_rate": 7.093054753756713e-05,
"loss": 2.6381,
"step": 1362
},
{
"epoch": 1.48,
"learning_rate": 7.085245682494168e-05,
"loss": 2.4935,
"step": 1364
},
{
"epoch": 1.48,
"learning_rate": 7.077430448942117e-05,
"loss": 2.3986,
"step": 1366
},
{
"epoch": 1.48,
"learning_rate": 7.069609076196029e-05,
"loss": 2.5647,
"step": 1368
},
{
"epoch": 1.48,
"learning_rate": 7.061781587369519e-05,
"loss": 2.34,
"step": 1370
},
{
"epoch": 1.49,
"learning_rate": 7.053948005594273e-05,
"loss": 2.5114,
"step": 1372
},
{
"epoch": 1.49,
"learning_rate": 7.046108354019987e-05,
"loss": 2.4023,
"step": 1374
},
{
"epoch": 1.49,
"learning_rate": 7.038262655814291e-05,
"loss": 2.5273,
"step": 1376
},
{
"epoch": 1.49,
"learning_rate": 7.030410934162684e-05,
"loss": 2.3996,
"step": 1378
},
{
"epoch": 1.49,
"learning_rate": 7.022553212268469e-05,
"loss": 2.5281,
"step": 1380
},
{
"epoch": 1.5,
"learning_rate": 7.014689513352675e-05,
"loss": 2.2748,
"step": 1382
},
{
"epoch": 1.5,
"learning_rate": 7.006819860654001e-05,
"loss": 2.6029,
"step": 1384
},
{
"epoch": 1.5,
"learning_rate": 6.998944277428734e-05,
"loss": 2.3091,
"step": 1386
},
{
"epoch": 1.5,
"learning_rate": 6.991062786950691e-05,
"loss": 2.4689,
"step": 1388
},
{
"epoch": 1.51,
"learning_rate": 6.983175412511145e-05,
"loss": 2.4879,
"step": 1390
},
{
"epoch": 1.51,
"learning_rate": 6.975282177418756e-05,
"loss": 2.5369,
"step": 1392
},
{
"epoch": 1.51,
"learning_rate": 6.967383104999505e-05,
"loss": 2.4159,
"step": 1394
},
{
"epoch": 1.51,
"learning_rate": 6.959478218596625e-05,
"loss": 2.5748,
"step": 1396
},
{
"epoch": 1.51,
"learning_rate": 6.951567541570523e-05,
"loss": 2.5503,
"step": 1398
},
{
"epoch": 1.52,
"learning_rate": 6.943651097298727e-05,
"loss": 2.5094,
"step": 1400
},
{
"epoch": 1.52,
"learning_rate": 6.935728909175805e-05,
"loss": 2.3161,
"step": 1402
},
{
"epoch": 1.52,
"learning_rate": 6.927801000613298e-05,
"loss": 2.3559,
"step": 1404
},
{
"epoch": 1.52,
"learning_rate": 6.919867395039652e-05,
"loss": 2.4446,
"step": 1406
},
{
"epoch": 1.53,
"learning_rate": 6.91192811590015e-05,
"loss": 2.3579,
"step": 1408
},
{
"epoch": 1.53,
"learning_rate": 6.903983186656844e-05,
"loss": 2.3263,
"step": 1410
},
{
"epoch": 1.53,
"learning_rate": 6.896032630788476e-05,
"loss": 2.4279,
"step": 1412
},
{
"epoch": 1.53,
"learning_rate": 6.888076471790424e-05,
"loss": 2.3288,
"step": 1414
},
{
"epoch": 1.53,
"learning_rate": 6.880114733174615e-05,
"loss": 2.3714,
"step": 1416
},
{
"epoch": 1.54,
"learning_rate": 6.872147438469476e-05,
"loss": 2.5845,
"step": 1418
},
{
"epoch": 1.54,
"learning_rate": 6.864174611219841e-05,
"loss": 2.2575,
"step": 1420
},
{
"epoch": 1.54,
"learning_rate": 6.856196274986907e-05,
"loss": 2.7716,
"step": 1422
},
{
"epoch": 1.54,
"learning_rate": 6.848212453348137e-05,
"loss": 2.408,
"step": 1424
},
{
"epoch": 1.54,
"learning_rate": 6.840223169897217e-05,
"loss": 2.5191,
"step": 1426
},
{
"epoch": 1.55,
"learning_rate": 6.832228448243964e-05,
"loss": 2.3474,
"step": 1428
},
{
"epoch": 1.55,
"learning_rate": 6.824228312014274e-05,
"loss": 2.5852,
"step": 1430
},
{
"epoch": 1.55,
"learning_rate": 6.816222784850038e-05,
"loss": 2.5364,
"step": 1432
},
{
"epoch": 1.55,
"learning_rate": 6.80821189040908e-05,
"loss": 2.26,
"step": 1434
},
{
"epoch": 1.56,
"learning_rate": 6.800195652365087e-05,
"loss": 2.4253,
"step": 1436
},
{
"epoch": 1.56,
"learning_rate": 6.792174094407533e-05,
"loss": 2.3855,
"step": 1438
},
{
"epoch": 1.56,
"learning_rate": 6.784147240241619e-05,
"loss": 2.2678,
"step": 1440
},
{
"epoch": 1.56,
"learning_rate": 6.776115113588194e-05,
"loss": 2.4646,
"step": 1442
},
{
"epoch": 1.56,
"learning_rate": 6.76807773818369e-05,
"loss": 2.6316,
"step": 1444
},
{
"epoch": 1.57,
"learning_rate": 6.760035137780046e-05,
"loss": 2.5357,
"step": 1446
},
{
"epoch": 1.57,
"learning_rate": 6.751987336144648e-05,
"loss": 2.5943,
"step": 1448
},
{
"epoch": 1.57,
"learning_rate": 6.743934357060246e-05,
"loss": 2.4468,
"step": 1450
},
{
"epoch": 1.57,
"learning_rate": 6.735876224324895e-05,
"loss": 2.3678,
"step": 1452
},
{
"epoch": 1.57,
"learning_rate": 6.72781296175188e-05,
"loss": 2.3095,
"step": 1454
},
{
"epoch": 1.58,
"learning_rate": 6.719744593169641e-05,
"loss": 2.4335,
"step": 1456
},
{
"epoch": 1.58,
"learning_rate": 6.711671142421714e-05,
"loss": 2.5255,
"step": 1458
},
{
"epoch": 1.58,
"learning_rate": 6.703592633366647e-05,
"loss": 2.3837,
"step": 1460
},
{
"epoch": 1.58,
"learning_rate": 6.695509089877943e-05,
"loss": 2.5474,
"step": 1462
},
{
"epoch": 1.59,
"learning_rate": 6.687420535843975e-05,
"loss": 2.5055,
"step": 1464
},
{
"epoch": 1.59,
"learning_rate": 6.679326995167932e-05,
"loss": 2.4212,
"step": 1466
},
{
"epoch": 1.59,
"learning_rate": 6.671228491767728e-05,
"loss": 2.4475,
"step": 1468
},
{
"epoch": 1.59,
"learning_rate": 6.663125049575956e-05,
"loss": 2.5926,
"step": 1470
},
{
"epoch": 1.59,
"learning_rate": 6.655016692539793e-05,
"loss": 2.2358,
"step": 1472
},
{
"epoch": 1.6,
"learning_rate": 6.646903444620949e-05,
"loss": 2.6164,
"step": 1474
},
{
"epoch": 1.6,
"learning_rate": 6.63878532979558e-05,
"loss": 2.5778,
"step": 1476
},
{
"epoch": 1.6,
"learning_rate": 6.630662372054227e-05,
"loss": 2.4401,
"step": 1478
},
{
"epoch": 1.6,
"learning_rate": 6.622534595401746e-05,
"loss": 2.4327,
"step": 1480
},
{
"epoch": 1.61,
"learning_rate": 6.614402023857232e-05,
"loss": 2.3336,
"step": 1482
},
{
"epoch": 1.61,
"learning_rate": 6.606264681453946e-05,
"loss": 2.4107,
"step": 1484
},
{
"epoch": 1.61,
"learning_rate": 6.598122592239255e-05,
"loss": 2.4793,
"step": 1486
},
{
"epoch": 1.61,
"learning_rate": 6.589975780274544e-05,
"loss": 2.6092,
"step": 1488
},
{
"epoch": 1.61,
"learning_rate": 6.581824269635166e-05,
"loss": 2.4823,
"step": 1490
},
{
"epoch": 1.62,
"learning_rate": 6.57366808441035e-05,
"loss": 2.3623,
"step": 1492
},
{
"epoch": 1.62,
"learning_rate": 6.565507248703144e-05,
"loss": 2.5841,
"step": 1494
},
{
"epoch": 1.62,
"learning_rate": 6.557341786630339e-05,
"loss": 2.3636,
"step": 1496
},
{
"epoch": 1.62,
"learning_rate": 6.549171722322395e-05,
"loss": 2.5033,
"step": 1498
},
{
"epoch": 1.62,
"learning_rate": 6.540997079923376e-05,
"loss": 2.5465,
"step": 1500
},
{
"epoch": 1.63,
"learning_rate": 6.532817883590874e-05,
"loss": 2.4308,
"step": 1502
},
{
"epoch": 1.63,
"learning_rate": 6.524634157495935e-05,
"loss": 2.6063,
"step": 1504
},
{
"epoch": 1.63,
"learning_rate": 6.516445925822997e-05,
"loss": 2.3648,
"step": 1506
},
{
"epoch": 1.63,
"learning_rate": 6.508253212769808e-05,
"loss": 2.5649,
"step": 1508
},
{
"epoch": 1.64,
"learning_rate": 6.500056042547364e-05,
"loss": 2.4303,
"step": 1510
},
{
"epoch": 1.64,
"learning_rate": 6.491854439379827e-05,
"loss": 2.2518,
"step": 1512
},
{
"epoch": 1.64,
"learning_rate": 6.483648427504467e-05,
"loss": 2.6185,
"step": 1514
},
{
"epoch": 1.64,
"learning_rate": 6.475438031171574e-05,
"loss": 2.4631,
"step": 1516
},
{
"epoch": 1.64,
"learning_rate": 6.4672232746444e-05,
"loss": 2.5055,
"step": 1518
},
{
"epoch": 1.65,
"learning_rate": 6.459004182199082e-05,
"loss": 2.4789,
"step": 1520
},
{
"epoch": 1.65,
"learning_rate": 6.45078077812457e-05,
"loss": 2.518,
"step": 1522
},
{
"epoch": 1.65,
"learning_rate": 6.442553086722554e-05,
"loss": 2.2487,
"step": 1524
},
{
"epoch": 1.65,
"learning_rate": 6.434321132307394e-05,
"loss": 2.4873,
"step": 1526
},
{
"epoch": 1.66,
"learning_rate": 6.426084939206051e-05,
"loss": 2.4427,
"step": 1528
},
{
"epoch": 1.66,
"learning_rate": 6.417844531758009e-05,
"loss": 2.5523,
"step": 1530
},
{
"epoch": 1.66,
"learning_rate": 6.40959993431521e-05,
"loss": 2.4331,
"step": 1532
},
{
"epoch": 1.66,
"learning_rate": 6.401351171241971e-05,
"loss": 2.2483,
"step": 1534
},
{
"epoch": 1.66,
"learning_rate": 6.393098266914925e-05,
"loss": 2.3769,
"step": 1536
},
{
"epoch": 1.67,
"learning_rate": 6.384841245722945e-05,
"loss": 2.4459,
"step": 1538
},
{
"epoch": 1.67,
"learning_rate": 6.376580132067065e-05,
"loss": 2.4104,
"step": 1540
},
{
"epoch": 1.67,
"learning_rate": 6.368314950360415e-05,
"loss": 2.3963,
"step": 1542
},
{
"epoch": 1.67,
"learning_rate": 6.360045725028146e-05,
"loss": 2.4358,
"step": 1544
},
{
"epoch": 1.67,
"learning_rate": 6.351772480507363e-05,
"loss": 2.3851,
"step": 1546
},
{
"epoch": 1.68,
"learning_rate": 6.34349524124704e-05,
"loss": 2.3434,
"step": 1548
},
{
"epoch": 1.68,
"learning_rate": 6.335214031707965e-05,
"loss": 2.3168,
"step": 1550
},
{
"epoch": 1.68,
"learning_rate": 6.326928876362652e-05,
"loss": 2.5622,
"step": 1552
},
{
"epoch": 1.68,
"learning_rate": 6.318639799695285e-05,
"loss": 2.4061,
"step": 1554
},
{
"epoch": 1.69,
"learning_rate": 6.310346826201621e-05,
"loss": 2.6289,
"step": 1556
},
{
"epoch": 1.69,
"learning_rate": 6.302049980388948e-05,
"loss": 2.4561,
"step": 1558
},
{
"epoch": 1.69,
"learning_rate": 6.29374928677599e-05,
"loss": 2.4697,
"step": 1560
},
{
"epoch": 1.69,
"learning_rate": 6.28544476989284e-05,
"loss": 2.4481,
"step": 1562
},
{
"epoch": 1.69,
"learning_rate": 6.277136454280898e-05,
"loss": 2.5529,
"step": 1564
},
{
"epoch": 1.7,
"learning_rate": 6.268824364492782e-05,
"loss": 2.4358,
"step": 1566
},
{
"epoch": 1.7,
"learning_rate": 6.260508525092266e-05,
"loss": 2.3754,
"step": 1568
},
{
"epoch": 1.7,
"learning_rate": 6.252188960654204e-05,
"loss": 2.5845,
"step": 1570
},
{
"epoch": 1.7,
"learning_rate": 6.243865695764459e-05,
"loss": 2.5552,
"step": 1572
},
{
"epoch": 1.7,
"learning_rate": 6.235538755019832e-05,
"loss": 2.4616,
"step": 1574
},
{
"epoch": 1.71,
"learning_rate": 6.227208163027982e-05,
"loss": 2.3196,
"step": 1576
},
{
"epoch": 1.71,
"learning_rate": 6.218873944407361e-05,
"loss": 2.4119,
"step": 1578
},
{
"epoch": 1.71,
"learning_rate": 6.210536123787138e-05,
"loss": 2.2707,
"step": 1580
},
{
"epoch": 1.71,
"learning_rate": 6.202194725807127e-05,
"loss": 2.7299,
"step": 1582
},
{
"epoch": 1.72,
"learning_rate": 6.19384977511771e-05,
"loss": 2.2659,
"step": 1584
},
{
"epoch": 1.72,
"learning_rate": 6.185501296379777e-05,
"loss": 2.5439,
"step": 1586
},
{
"epoch": 1.72,
"learning_rate": 6.177149314264631e-05,
"loss": 2.6154,
"step": 1588
},
{
"epoch": 1.72,
"learning_rate": 6.168793853453943e-05,
"loss": 2.5537,
"step": 1590
},
{
"epoch": 1.72,
"learning_rate": 6.160434938639648e-05,
"loss": 2.3475,
"step": 1592
},
{
"epoch": 1.73,
"learning_rate": 6.152072594523906e-05,
"loss": 2.3385,
"step": 1594
},
{
"epoch": 1.73,
"learning_rate": 6.143706845818992e-05,
"loss": 2.4313,
"step": 1596
},
{
"epoch": 1.73,
"learning_rate": 6.135337717247261e-05,
"loss": 2.3323,
"step": 1598
},
{
"epoch": 1.73,
"learning_rate": 6.12696523354104e-05,
"loss": 2.4587,
"step": 1600
},
{
"epoch": 1.74,
"learning_rate": 6.118589419442584e-05,
"loss": 2.6458,
"step": 1602
},
{
"epoch": 1.74,
"learning_rate": 6.110210299703982e-05,
"loss": 2.5148,
"step": 1604
},
{
"epoch": 1.74,
"learning_rate": 6.101827899087094e-05,
"loss": 2.5324,
"step": 1606
},
{
"epoch": 1.74,
"learning_rate": 6.0934422423634744e-05,
"loss": 2.4962,
"step": 1608
},
{
"epoch": 1.74,
"learning_rate": 6.085053354314302e-05,
"loss": 2.4868,
"step": 1610
},
{
"epoch": 1.75,
"learning_rate": 6.076661259730305e-05,
"loss": 2.506,
"step": 1612
},
{
"epoch": 1.75,
"learning_rate": 6.068265983411685e-05,
"loss": 2.3774,
"step": 1614
},
{
"epoch": 1.75,
"learning_rate": 6.05986755016805e-05,
"loss": 2.4287,
"step": 1616
},
{
"epoch": 1.75,
"learning_rate": 6.051465984818332e-05,
"loss": 2.3267,
"step": 1618
},
{
"epoch": 1.75,
"learning_rate": 6.043061312190723e-05,
"loss": 2.4453,
"step": 1620
},
{
"epoch": 1.76,
"learning_rate": 6.034653557122598e-05,
"loss": 2.4022,
"step": 1622
},
{
"epoch": 1.76,
"learning_rate": 6.0262427444604384e-05,
"loss": 2.5554,
"step": 1624
},
{
"epoch": 1.76,
"learning_rate": 6.017828899059763e-05,
"loss": 2.4102,
"step": 1626
},
{
"epoch": 1.76,
"learning_rate": 6.009412045785051e-05,
"loss": 2.3266,
"step": 1628
},
{
"epoch": 1.77,
"learning_rate": 6.000992209509676e-05,
"loss": 2.2382,
"step": 1630
},
{
"epoch": 1.77,
"learning_rate": 5.9925694151158184e-05,
"loss": 2.2763,
"step": 1632
},
{
"epoch": 1.77,
"learning_rate": 5.984143687494409e-05,
"loss": 2.481,
"step": 1634
},
{
"epoch": 1.77,
"learning_rate": 5.975715051545039e-05,
"loss": 2.5598,
"step": 1636
},
{
"epoch": 1.77,
"learning_rate": 5.9672835321759016e-05,
"loss": 2.3289,
"step": 1638
},
{
"epoch": 1.78,
"learning_rate": 5.958849154303704e-05,
"loss": 2.4317,
"step": 1640
},
{
"epoch": 1.78,
"learning_rate": 5.9504119428536076e-05,
"loss": 2.448,
"step": 1642
},
{
"epoch": 1.78,
"learning_rate": 5.9419719227591405e-05,
"loss": 2.2034,
"step": 1644
},
{
"epoch": 1.78,
"learning_rate": 5.933529118962138e-05,
"loss": 2.4841,
"step": 1646
},
{
"epoch": 1.79,
"learning_rate": 5.925083556412657e-05,
"loss": 2.5998,
"step": 1648
},
{
"epoch": 1.79,
"learning_rate": 5.916635260068909e-05,
"loss": 2.5288,
"step": 1650
},
{
"epoch": 1.79,
"learning_rate": 5.908184254897182e-05,
"loss": 2.5148,
"step": 1652
},
{
"epoch": 1.79,
"learning_rate": 5.899730565871774e-05,
"loss": 2.5166,
"step": 1654
},
{
"epoch": 1.79,
"learning_rate": 5.891274217974907e-05,
"loss": 2.4235,
"step": 1656
},
{
"epoch": 1.8,
"learning_rate": 5.8828152361966685e-05,
"loss": 2.5575,
"step": 1658
},
{
"epoch": 1.8,
"learning_rate": 5.874353645534922e-05,
"loss": 2.4232,
"step": 1660
},
{
"epoch": 1.8,
"learning_rate": 5.865889470995248e-05,
"loss": 2.2509,
"step": 1662
},
{
"epoch": 1.8,
"learning_rate": 5.857422737590857e-05,
"loss": 2.2636,
"step": 1664
},
{
"epoch": 1.8,
"learning_rate": 5.8489534703425256e-05,
"loss": 2.4923,
"step": 1666
},
{
"epoch": 1.81,
"learning_rate": 5.8404816942785134e-05,
"loss": 2.3899,
"step": 1668
},
{
"epoch": 1.81,
"learning_rate": 5.8320074344345e-05,
"loss": 2.4698,
"step": 1670
},
{
"epoch": 1.81,
"learning_rate": 5.8235307158535e-05,
"loss": 2.65,
"step": 1672
},
{
"epoch": 1.81,
"learning_rate": 5.8150515635858e-05,
"loss": 2.4687,
"step": 1674
},
{
"epoch": 1.82,
"learning_rate": 5.806570002688869e-05,
"loss": 2.4793,
"step": 1676
},
{
"epoch": 1.82,
"learning_rate": 5.798086058227304e-05,
"loss": 2.2238,
"step": 1678
},
{
"epoch": 1.82,
"learning_rate": 5.78959975527274e-05,
"loss": 2.4365,
"step": 1680
},
{
"epoch": 1.82,
"learning_rate": 5.781111118903785e-05,
"loss": 2.4891,
"step": 1682
},
{
"epoch": 1.82,
"learning_rate": 5.772620174205938e-05,
"loss": 2.3248,
"step": 1684
},
{
"epoch": 1.83,
"learning_rate": 5.764126946271526e-05,
"loss": 2.6325,
"step": 1686
},
{
"epoch": 1.83,
"learning_rate": 5.755631460199616e-05,
"loss": 2.4281,
"step": 1688
},
{
"epoch": 1.83,
"learning_rate": 5.747133741095956e-05,
"loss": 2.4829,
"step": 1690
},
{
"epoch": 1.83,
"learning_rate": 5.738633814072888e-05,
"loss": 2.3321,
"step": 1692
},
{
"epoch": 1.83,
"learning_rate": 5.730131704249278e-05,
"loss": 2.5413,
"step": 1694
},
{
"epoch": 1.84,
"learning_rate": 5.721627436750449e-05,
"loss": 2.0978,
"step": 1696
},
{
"epoch": 1.84,
"learning_rate": 5.713121036708091e-05,
"loss": 2.5072,
"step": 1698
},
{
"epoch": 1.84,
"learning_rate": 5.704612529260205e-05,
"loss": 2.4096,
"step": 1700
},
{
"epoch": 1.84,
"learning_rate": 5.6961019395510126e-05,
"loss": 2.3686,
"step": 1702
},
{
"epoch": 1.85,
"learning_rate": 5.6875892927308936e-05,
"loss": 2.475,
"step": 1704
},
{
"epoch": 1.85,
"learning_rate": 5.679074613956307e-05,
"loss": 2.375,
"step": 1706
},
{
"epoch": 1.85,
"learning_rate": 5.6705579283897116e-05,
"loss": 2.5238,
"step": 1708
},
{
"epoch": 1.85,
"learning_rate": 5.662039261199502e-05,
"loss": 2.5842,
"step": 1710
},
{
"epoch": 1.85,
"learning_rate": 5.6535186375599266e-05,
"loss": 2.5468,
"step": 1712
},
{
"epoch": 1.86,
"learning_rate": 5.644996082651017e-05,
"loss": 2.4626,
"step": 1714
},
{
"epoch": 1.86,
"learning_rate": 5.636471621658508e-05,
"loss": 2.5552,
"step": 1716
},
{
"epoch": 1.86,
"learning_rate": 5.627945279773774e-05,
"loss": 2.2431,
"step": 1718
},
{
"epoch": 1.86,
"learning_rate": 5.61941708219374e-05,
"loss": 2.36,
"step": 1720
},
{
"epoch": 1.87,
"learning_rate": 5.6108870541208224e-05,
"loss": 2.3865,
"step": 1722
},
{
"epoch": 1.87,
"learning_rate": 5.602355220762838e-05,
"loss": 2.5472,
"step": 1724
},
{
"epoch": 1.87,
"learning_rate": 5.593821607332952e-05,
"loss": 2.3935,
"step": 1726
},
{
"epoch": 1.87,
"learning_rate": 5.585286239049574e-05,
"loss": 2.5526,
"step": 1728
},
{
"epoch": 1.87,
"learning_rate": 5.576749141136313e-05,
"loss": 2.5119,
"step": 1730
},
{
"epoch": 1.88,
"learning_rate": 5.568210338821881e-05,
"loss": 2.3868,
"step": 1732
},
{
"epoch": 1.88,
"learning_rate": 5.5596698573400306e-05,
"loss": 2.4324,
"step": 1734
},
{
"epoch": 1.88,
"learning_rate": 5.5511277219294765e-05,
"loss": 2.4088,
"step": 1736
},
{
"epoch": 1.88,
"learning_rate": 5.54258395783382e-05,
"loss": 2.447,
"step": 1738
},
{
"epoch": 1.88,
"learning_rate": 5.534038590301476e-05,
"loss": 2.4857,
"step": 1740
},
{
"epoch": 1.89,
"learning_rate": 5.5254916445855974e-05,
"loss": 2.3698,
"step": 1742
},
{
"epoch": 1.89,
"learning_rate": 5.5169431459440014e-05,
"loss": 2.5048,
"step": 1744
},
{
"epoch": 1.89,
"learning_rate": 5.508393119639094e-05,
"loss": 2.5057,
"step": 1746
},
{
"epoch": 1.89,
"learning_rate": 5.499841590937795e-05,
"loss": 2.4211,
"step": 1748
},
{
"epoch": 1.9,
"learning_rate": 5.491288585111467e-05,
"loss": 2.7328,
"step": 1750
},
{
"epoch": 1.9,
"learning_rate": 5.4827341274358344e-05,
"loss": 2.5598,
"step": 1752
},
{
"epoch": 1.9,
"learning_rate": 5.4741782431909136e-05,
"loss": 2.2472,
"step": 1754
},
{
"epoch": 1.9,
"learning_rate": 5.465620957660938e-05,
"loss": 2.4122,
"step": 1756
},
{
"epoch": 1.9,
"learning_rate": 5.457062296134279e-05,
"loss": 2.4685,
"step": 1758
},
{
"epoch": 1.91,
"learning_rate": 5.448502283903377e-05,
"loss": 2.5201,
"step": 1760
},
{
"epoch": 1.91,
"learning_rate": 5.439940946264662e-05,
"loss": 2.5483,
"step": 1762
},
{
"epoch": 1.91,
"learning_rate": 5.4313783085184825e-05,
"loss": 2.4956,
"step": 1764
},
{
"epoch": 1.91,
"learning_rate": 5.422814395969029e-05,
"loss": 2.5378,
"step": 1766
},
{
"epoch": 1.91,
"learning_rate": 5.414249233924258e-05,
"loss": 2.218,
"step": 1768
},
{
"epoch": 1.92,
"learning_rate": 5.40568284769582e-05,
"loss": 2.5364,
"step": 1770
},
{
"epoch": 1.92,
"learning_rate": 5.39711526259898e-05,
"loss": 2.4421,
"step": 1772
},
{
"epoch": 1.92,
"learning_rate": 5.388546503952551e-05,
"loss": 2.3293,
"step": 1774
},
{
"epoch": 1.92,
"learning_rate": 5.379976597078808e-05,
"loss": 2.4037,
"step": 1776
},
{
"epoch": 1.93,
"learning_rate": 5.371405567303428e-05,
"loss": 2.4786,
"step": 1778
},
{
"epoch": 1.93,
"learning_rate": 5.362833439955396e-05,
"loss": 2.2926,
"step": 1780
},
{
"epoch": 1.93,
"learning_rate": 5.354260240366947e-05,
"loss": 2.5112,
"step": 1782
},
{
"epoch": 1.93,
"learning_rate": 5.3456859938734836e-05,
"loss": 2.2066,
"step": 1784
},
{
"epoch": 1.93,
"learning_rate": 5.337110725813501e-05,
"loss": 2.4357,
"step": 1786
},
{
"epoch": 1.94,
"learning_rate": 5.328534461528515e-05,
"loss": 2.502,
"step": 1788
},
{
"epoch": 1.94,
"learning_rate": 5.3199572263629824e-05,
"loss": 2.2781,
"step": 1790
},
{
"epoch": 1.94,
"learning_rate": 5.3113790456642345e-05,
"loss": 2.3274,
"step": 1792
},
{
"epoch": 1.94,
"learning_rate": 5.3027999447823905e-05,
"loss": 2.4531,
"step": 1794
},
{
"epoch": 1.95,
"learning_rate": 5.2942199490702924e-05,
"loss": 2.4264,
"step": 1796
},
{
"epoch": 1.95,
"learning_rate": 5.285639083883428e-05,
"loss": 2.3976,
"step": 1798
},
{
"epoch": 1.95,
"learning_rate": 5.27705737457985e-05,
"loss": 2.3159,
"step": 1800
},
{
"epoch": 1.95,
"learning_rate": 5.268474846520112e-05,
"loss": 2.3113,
"step": 1802
},
{
"epoch": 1.95,
"learning_rate": 5.259891525067179e-05,
"loss": 2.3999,
"step": 1804
},
{
"epoch": 1.96,
"learning_rate": 5.251307435586368e-05,
"loss": 2.4202,
"step": 1806
},
{
"epoch": 1.96,
"learning_rate": 5.2427226034452614e-05,
"loss": 2.4635,
"step": 1808
},
{
"epoch": 1.96,
"learning_rate": 5.23413705401364e-05,
"loss": 2.588,
"step": 1810
},
{
"epoch": 1.96,
"learning_rate": 5.225550812663399e-05,
"loss": 2.3913,
"step": 1812
},
{
"epoch": 1.96,
"learning_rate": 5.216963904768485e-05,
"loss": 2.3559,
"step": 1814
},
{
"epoch": 1.97,
"learning_rate": 5.2083763557048056e-05,
"loss": 2.3511,
"step": 1816
},
{
"epoch": 1.97,
"learning_rate": 5.1997881908501736e-05,
"loss": 2.0888,
"step": 1818
},
{
"epoch": 1.97,
"learning_rate": 5.191199435584211e-05,
"loss": 2.3658,
"step": 1820
},
{
"epoch": 1.97,
"learning_rate": 5.182610115288295e-05,
"loss": 2.4578,
"step": 1822
},
{
"epoch": 1.98,
"learning_rate": 5.174020255345464e-05,
"loss": 2.5353,
"step": 1824
},
{
"epoch": 1.98,
"learning_rate": 5.1654298811403556e-05,
"loss": 2.3506,
"step": 1826
},
{
"epoch": 1.98,
"learning_rate": 5.1568390180591265e-05,
"loss": 2.43,
"step": 1828
},
{
"epoch": 1.98,
"learning_rate": 5.148247691489377e-05,
"loss": 2.5092,
"step": 1830
},
{
"epoch": 1.98,
"learning_rate": 5.139655926820078e-05,
"loss": 2.4586,
"step": 1832
},
{
"epoch": 1.99,
"learning_rate": 5.131063749441496e-05,
"loss": 2.3623,
"step": 1834
},
{
"epoch": 1.99,
"learning_rate": 5.1224711847451145e-05,
"loss": 2.5055,
"step": 1836
},
{
"epoch": 1.99,
"learning_rate": 5.113878258123563e-05,
"loss": 2.3857,
"step": 1838
},
{
"epoch": 1.99,
"learning_rate": 5.105284994970543e-05,
"loss": 2.6249,
"step": 1840
},
{
"epoch": 2.0,
"learning_rate": 5.096691420680745e-05,
"loss": 2.408,
"step": 1842
},
{
"epoch": 2.0,
"learning_rate": 5.088097560649784e-05,
"loss": 2.5748,
"step": 1844
},
{
"epoch": 2.0,
"learning_rate": 5.0795034402741185e-05,
"loss": 2.2292,
"step": 1846
},
{
"epoch": 2.0,
"learning_rate": 5.06661182712092e-05,
"loss": 3.0448,
"step": 1848
},
{
"epoch": 2.0,
"learning_rate": 5.0580171669978546e-05,
"loss": 2.4891,
"step": 1850
},
{
"epoch": 2.01,
"learning_rate": 5.049422335423252e-05,
"loss": 2.5769,
"step": 1852
},
{
"epoch": 2.01,
"learning_rate": 5.04082735779644e-05,
"loss": 2.3243,
"step": 1854
},
{
"epoch": 2.01,
"learning_rate": 5.032232259517179e-05,
"loss": 2.5441,
"step": 1856
},
{
"epoch": 2.01,
"learning_rate": 5.023637065985585e-05,
"loss": 2.4429,
"step": 1858
},
{
"epoch": 2.02,
"learning_rate": 5.015041802602057e-05,
"loss": 2.3943,
"step": 1860
},
{
"epoch": 2.02,
"learning_rate": 5.0064464947672e-05,
"loss": 2.3169,
"step": 1862
},
{
"epoch": 2.02,
"learning_rate": 4.9978511678817496e-05,
"loss": 2.4604,
"step": 1864
},
{
"epoch": 2.02,
"learning_rate": 4.989255847346499e-05,
"loss": 2.3745,
"step": 1866
},
{
"epoch": 2.02,
"learning_rate": 4.980660558562222e-05,
"loss": 2.3082,
"step": 1868
},
{
"epoch": 2.03,
"learning_rate": 4.972065326929598e-05,
"loss": 2.4983,
"step": 1870
},
{
"epoch": 2.03,
"learning_rate": 4.963470177849135e-05,
"loss": 2.3494,
"step": 1872
},
{
"epoch": 2.03,
"learning_rate": 4.954875136721104e-05,
"loss": 2.2882,
"step": 1874
},
{
"epoch": 2.03,
"learning_rate": 4.946280228945453e-05,
"loss": 2.401,
"step": 1876
},
{
"epoch": 2.03,
"learning_rate": 4.9376854799217327e-05,
"loss": 2.3044,
"step": 1878
},
{
"epoch": 2.04,
"learning_rate": 4.929090915049029e-05,
"loss": 2.51,
"step": 1880
},
{
"epoch": 2.04,
"learning_rate": 4.920496559725883e-05,
"loss": 2.5332,
"step": 1882
},
{
"epoch": 2.04,
"learning_rate": 4.911902439350217e-05,
"loss": 2.449,
"step": 1884
},
{
"epoch": 2.04,
"learning_rate": 4.9033085793192574e-05,
"loss": 2.4766,
"step": 1886
},
{
"epoch": 2.05,
"learning_rate": 4.894715005029459e-05,
"loss": 2.5255,
"step": 1888
},
{
"epoch": 2.05,
"learning_rate": 4.8861217418764374e-05,
"loss": 2.4169,
"step": 1890
},
{
"epoch": 2.05,
"learning_rate": 4.8775288152548866e-05,
"loss": 2.4542,
"step": 1892
},
{
"epoch": 2.05,
"learning_rate": 4.868936250558506e-05,
"loss": 2.3703,
"step": 1894
},
{
"epoch": 2.05,
"learning_rate": 4.8603440731799216e-05,
"loss": 2.4712,
"step": 1896
},
{
"epoch": 2.06,
"learning_rate": 4.851752308510624e-05,
"loss": 2.3871,
"step": 1898
},
{
"epoch": 2.06,
"learning_rate": 4.843160981940875e-05,
"loss": 2.3593,
"step": 1900
},
{
"epoch": 2.06,
"learning_rate": 4.8345701188596456e-05,
"loss": 2.3834,
"step": 1902
},
{
"epoch": 2.06,
"learning_rate": 4.825979744654536e-05,
"loss": 2.5184,
"step": 1904
},
{
"epoch": 2.06,
"learning_rate": 4.817389884711705e-05,
"loss": 2.3947,
"step": 1906
},
{
"epoch": 2.07,
"learning_rate": 4.8088005644157895e-05,
"loss": 2.3947,
"step": 1908
},
{
"epoch": 2.07,
"learning_rate": 4.800211809149829e-05,
"loss": 2.332,
"step": 1910
},
{
"epoch": 2.07,
"learning_rate": 4.791623644295195e-05,
"loss": 2.4736,
"step": 1912
},
{
"epoch": 2.07,
"learning_rate": 4.7830360952315164e-05,
"loss": 2.3257,
"step": 1914
},
{
"epoch": 2.08,
"learning_rate": 4.774449187336602e-05,
"loss": 2.4029,
"step": 1916
},
{
"epoch": 2.08,
"learning_rate": 4.765862945986362e-05,
"loss": 2.2457,
"step": 1918
},
{
"epoch": 2.08,
"learning_rate": 4.7572773965547384e-05,
"loss": 2.4841,
"step": 1920
},
{
"epoch": 2.08,
"learning_rate": 4.7486925644136324e-05,
"loss": 2.2552,
"step": 1922
},
{
"epoch": 2.08,
"learning_rate": 4.740108474932822e-05,
"loss": 2.2952,
"step": 1924
},
{
"epoch": 2.09,
"learning_rate": 4.731525153479891e-05,
"loss": 2.6216,
"step": 1926
},
{
"epoch": 2.09,
"learning_rate": 4.72294262542015e-05,
"loss": 2.6685,
"step": 1928
},
{
"epoch": 2.09,
"learning_rate": 4.7143609161165736e-05,
"loss": 2.5377,
"step": 1930
},
{
"epoch": 2.09,
"learning_rate": 4.705780050929708e-05,
"loss": 2.3924,
"step": 1932
},
{
"epoch": 2.1,
"learning_rate": 4.697200055217612e-05,
"loss": 2.3375,
"step": 1934
},
{
"epoch": 2.1,
"learning_rate": 4.688620954335766e-05,
"loss": 2.5853,
"step": 1936
},
{
"epoch": 2.1,
"learning_rate": 4.680042773637018e-05,
"loss": 2.4731,
"step": 1938
},
{
"epoch": 2.1,
"learning_rate": 4.671465538471486e-05,
"loss": 2.3143,
"step": 1940
},
{
"epoch": 2.1,
"learning_rate": 4.6628892741865e-05,
"loss": 2.3821,
"step": 1942
},
{
"epoch": 2.11,
"learning_rate": 4.654314006126516e-05,
"loss": 2.5513,
"step": 1944
},
{
"epoch": 2.11,
"learning_rate": 4.645739759633054e-05,
"loss": 2.3581,
"step": 1946
},
{
"epoch": 2.11,
"learning_rate": 4.637166560044605e-05,
"loss": 2.3677,
"step": 1948
},
{
"epoch": 2.11,
"learning_rate": 4.628594432696573e-05,
"loss": 2.2615,
"step": 1950
},
{
"epoch": 2.11,
"learning_rate": 4.620023402921191e-05,
"loss": 2.4965,
"step": 1952
},
{
"epoch": 2.12,
"learning_rate": 4.61145349604745e-05,
"loss": 2.3364,
"step": 1954
},
{
"epoch": 2.12,
"learning_rate": 4.602884737401022e-05,
"loss": 2.1789,
"step": 1956
},
{
"epoch": 2.12,
"learning_rate": 4.594317152304183e-05,
"loss": 2.229,
"step": 1958
},
{
"epoch": 2.12,
"learning_rate": 4.5857507660757424e-05,
"loss": 2.7305,
"step": 1960
},
{
"epoch": 2.13,
"learning_rate": 4.5771856040309716e-05,
"loss": 2.4481,
"step": 1962
},
{
"epoch": 2.13,
"learning_rate": 4.568621691481519e-05,
"loss": 2.5205,
"step": 1964
},
{
"epoch": 2.13,
"learning_rate": 4.5600590537353397e-05,
"loss": 2.4775,
"step": 1966
},
{
"epoch": 2.13,
"learning_rate": 4.551497716096624e-05,
"loss": 2.5465,
"step": 1968
},
{
"epoch": 2.13,
"learning_rate": 4.5429377038657214e-05,
"loss": 2.4793,
"step": 1970
},
{
"epoch": 2.14,
"learning_rate": 4.534379042339063e-05,
"loss": 2.5299,
"step": 1972
},
{
"epoch": 2.14,
"learning_rate": 4.5258217568090876e-05,
"loss": 2.4299,
"step": 1974
},
{
"epoch": 2.14,
"learning_rate": 4.517265872564167e-05,
"loss": 2.4201,
"step": 1976
},
{
"epoch": 2.14,
"learning_rate": 4.508711414888534e-05,
"loss": 2.3312,
"step": 1978
},
{
"epoch": 2.15,
"learning_rate": 4.5001584090622065e-05,
"loss": 2.3622,
"step": 1980
},
{
"epoch": 2.15,
"learning_rate": 4.491606880360909e-05,
"loss": 2.3231,
"step": 1982
},
{
"epoch": 2.15,
"learning_rate": 4.483056854055999e-05,
"loss": 2.3689,
"step": 1984
},
{
"epoch": 2.15,
"learning_rate": 4.474508355414404e-05,
"loss": 2.4958,
"step": 1986
},
{
"epoch": 2.15,
"learning_rate": 4.465961409698525e-05,
"loss": 2.5048,
"step": 1988
},
{
"epoch": 2.16,
"learning_rate": 4.457416042166181e-05,
"loss": 2.4204,
"step": 1990
},
{
"epoch": 2.16,
"learning_rate": 4.448872278070523e-05,
"loss": 2.5445,
"step": 1992
},
{
"epoch": 2.16,
"learning_rate": 4.4403301426599706e-05,
"loss": 2.1568,
"step": 1994
},
{
"epoch": 2.16,
"learning_rate": 4.431789661178121e-05,
"loss": 2.2937,
"step": 1996
},
{
"epoch": 2.16,
"learning_rate": 4.423250858863689e-05,
"loss": 2.336,
"step": 1998
},
{
"epoch": 2.17,
"learning_rate": 4.4147137609504266e-05,
"loss": 2.6334,
"step": 2000
},
{
"epoch": 2.17,
"learning_rate": 4.4061783926670496e-05,
"loss": 2.2973,
"step": 2002
},
{
"epoch": 2.17,
"learning_rate": 4.3976447792371624e-05,
"loss": 2.5251,
"step": 2004
},
{
"epoch": 2.17,
"learning_rate": 4.38911294587918e-05,
"loss": 2.4591,
"step": 2006
},
{
"epoch": 2.18,
"learning_rate": 4.38058291780626e-05,
"loss": 2.4621,
"step": 2008
},
{
"epoch": 2.18,
"learning_rate": 4.372054720226227e-05,
"loss": 2.3918,
"step": 2010
},
{
"epoch": 2.18,
"learning_rate": 4.3635283783414924e-05,
"loss": 2.357,
"step": 2012
},
{
"epoch": 2.18,
"learning_rate": 4.3550039173489845e-05,
"loss": 2.3846,
"step": 2014
},
{
"epoch": 2.18,
"learning_rate": 4.346481362440074e-05,
"loss": 2.421,
"step": 2016
},
{
"epoch": 2.19,
"learning_rate": 4.337960738800498e-05,
"loss": 2.523,
"step": 2018
},
{
"epoch": 2.19,
"learning_rate": 4.3294420716102895e-05,
"loss": 2.3806,
"step": 2020
},
{
"epoch": 2.19,
"learning_rate": 4.320925386043696e-05,
"loss": 2.4476,
"step": 2022
},
{
"epoch": 2.19,
"learning_rate": 4.3124107072691055e-05,
"loss": 2.4976,
"step": 2024
},
{
"epoch": 2.19,
"learning_rate": 4.3038980604489885e-05,
"loss": 2.6172,
"step": 2026
},
{
"epoch": 2.2,
"learning_rate": 4.2953874707397964e-05,
"loss": 2.4828,
"step": 2028
},
{
"epoch": 2.2,
"learning_rate": 4.28687896329191e-05,
"loss": 2.3404,
"step": 2030
},
{
"epoch": 2.2,
"learning_rate": 4.278372563249552e-05,
"loss": 2.4298,
"step": 2032
},
{
"epoch": 2.2,
"learning_rate": 4.269868295750722e-05,
"loss": 2.5339,
"step": 2034
},
{
"epoch": 2.21,
"learning_rate": 4.261366185927114e-05,
"loss": 2.2219,
"step": 2036
},
{
"epoch": 2.21,
"learning_rate": 4.252866258904045e-05,
"loss": 2.3277,
"step": 2038
},
{
"epoch": 2.21,
"learning_rate": 4.2443685398003835e-05,
"loss": 2.3991,
"step": 2040
},
{
"epoch": 2.21,
"learning_rate": 4.235873053728475e-05,
"loss": 2.3344,
"step": 2042
},
{
"epoch": 2.21,
"learning_rate": 4.227379825794063e-05,
"loss": 2.3301,
"step": 2044
},
{
"epoch": 2.22,
"learning_rate": 4.218888881096217e-05,
"loss": 2.4981,
"step": 2046
},
{
"epoch": 2.22,
"learning_rate": 4.21040024472726e-05,
"loss": 2.4976,
"step": 2048
},
{
"epoch": 2.22,
"learning_rate": 4.201913941772696e-05,
"loss": 2.6412,
"step": 2050
},
{
"epoch": 2.22,
"learning_rate": 4.193429997311132e-05,
"loss": 2.3847,
"step": 2052
},
{
"epoch": 2.23,
"learning_rate": 4.184948436414203e-05,
"loss": 2.2447,
"step": 2054
},
{
"epoch": 2.23,
"learning_rate": 4.1764692841464995e-05,
"loss": 2.5925,
"step": 2056
},
{
"epoch": 2.23,
"learning_rate": 4.1679925655655e-05,
"loss": 2.3348,
"step": 2058
},
{
"epoch": 2.23,
"learning_rate": 4.159518305721487e-05,
"loss": 2.5109,
"step": 2060
},
{
"epoch": 2.23,
"learning_rate": 4.151046529657477e-05,
"loss": 2.5121,
"step": 2062
},
{
"epoch": 2.24,
"learning_rate": 4.142577262409144e-05,
"loss": 2.4563,
"step": 2064
},
{
"epoch": 2.24,
"learning_rate": 4.134110529004753e-05,
"loss": 2.4912,
"step": 2066
},
{
"epoch": 2.24,
"learning_rate": 4.1256463544650783e-05,
"loss": 2.4457,
"step": 2068
},
{
"epoch": 2.24,
"learning_rate": 4.117184763803334e-05,
"loss": 2.4428,
"step": 2070
},
{
"epoch": 2.24,
"learning_rate": 4.108725782025092e-05,
"loss": 2.3274,
"step": 2072
},
{
"epoch": 2.25,
"learning_rate": 4.1002694341282276e-05,
"loss": 2.4062,
"step": 2074
},
{
"epoch": 2.25,
"learning_rate": 4.0918157451028185e-05,
"loss": 2.6018,
"step": 2076
},
{
"epoch": 2.25,
"learning_rate": 4.083364739931092e-05,
"loss": 2.4493,
"step": 2078
},
{
"epoch": 2.25,
"learning_rate": 4.0749164435873425e-05,
"loss": 2.5798,
"step": 2080
},
{
"epoch": 2.26,
"learning_rate": 4.0664708810378625e-05,
"loss": 2.2729,
"step": 2082
},
{
"epoch": 2.26,
"learning_rate": 4.05802807724086e-05,
"loss": 2.3844,
"step": 2084
},
{
"epoch": 2.26,
"learning_rate": 4.049588057146394e-05,
"loss": 2.402,
"step": 2086
},
{
"epoch": 2.26,
"learning_rate": 4.041150845696296e-05,
"loss": 2.4163,
"step": 2088
},
{
"epoch": 2.26,
"learning_rate": 4.032716467824099e-05,
"loss": 2.428,
"step": 2090
},
{
"epoch": 2.27,
"learning_rate": 4.0242849484549623e-05,
"loss": 2.4803,
"step": 2092
},
{
"epoch": 2.27,
"learning_rate": 4.015856312505593e-05,
"loss": 2.2398,
"step": 2094
},
{
"epoch": 2.27,
"learning_rate": 4.0074305848841814e-05,
"loss": 2.2521,
"step": 2096
},
{
"epoch": 2.27,
"learning_rate": 3.9990077904903254e-05,
"loss": 2.3918,
"step": 2098
},
{
"epoch": 2.28,
"learning_rate": 3.99058795421495e-05,
"loss": 2.519,
"step": 2100
},
{
"epoch": 2.28,
"learning_rate": 3.982171100940239e-05,
"loss": 2.4067,
"step": 2102
},
{
"epoch": 2.28,
"learning_rate": 3.973757255539562e-05,
"loss": 2.4408,
"step": 2104
},
{
"epoch": 2.28,
"learning_rate": 3.965346442877403e-05,
"loss": 2.4309,
"step": 2106
},
{
"epoch": 2.28,
"learning_rate": 3.9569386878092774e-05,
"loss": 2.2934,
"step": 2108
},
{
"epoch": 2.29,
"learning_rate": 3.94853401518167e-05,
"loss": 2.356,
"step": 2110
},
{
"epoch": 2.29,
"learning_rate": 3.94013244983195e-05,
"loss": 2.3314,
"step": 2112
},
{
"epoch": 2.29,
"learning_rate": 3.9317340165883156e-05,
"loss": 2.4871,
"step": 2114
},
{
"epoch": 2.29,
"learning_rate": 3.923338740269696e-05,
"loss": 2.4932,
"step": 2116
},
{
"epoch": 2.29,
"learning_rate": 3.9149466456857e-05,
"loss": 2.2894,
"step": 2118
},
{
"epoch": 2.3,
"learning_rate": 3.906557757636526e-05,
"loss": 2.4726,
"step": 2120
},
{
"epoch": 2.3,
"learning_rate": 3.898172100912908e-05,
"loss": 2.3281,
"step": 2122
},
{
"epoch": 2.3,
"learning_rate": 3.8897897002960195e-05,
"loss": 2.6179,
"step": 2124
},
{
"epoch": 2.3,
"learning_rate": 3.8814105805574166e-05,
"loss": 2.3778,
"step": 2126
},
{
"epoch": 2.31,
"learning_rate": 3.873034766458959e-05,
"loss": 2.4198,
"step": 2128
},
{
"epoch": 2.31,
"learning_rate": 3.86466228275274e-05,
"loss": 2.318,
"step": 2130
},
{
"epoch": 2.31,
"learning_rate": 3.856293154181009e-05,
"loss": 2.3894,
"step": 2132
},
{
"epoch": 2.31,
"learning_rate": 3.847927405476097e-05,
"loss": 2.388,
"step": 2134
},
{
"epoch": 2.31,
"learning_rate": 3.839565061360352e-05,
"loss": 2.4026,
"step": 2136
},
{
"epoch": 2.32,
"learning_rate": 3.831206146546059e-05,
"loss": 2.3764,
"step": 2138
},
{
"epoch": 2.32,
"learning_rate": 3.82285068573537e-05,
"loss": 2.2921,
"step": 2140
},
{
"epoch": 2.32,
"learning_rate": 3.814498703620226e-05,
"loss": 2.5344,
"step": 2142
},
{
"epoch": 2.32,
"learning_rate": 3.80615022488229e-05,
"loss": 2.3933,
"step": 2144
},
{
"epoch": 2.32,
"learning_rate": 3.797805274192875e-05,
"loss": 2.5373,
"step": 2146
},
{
"epoch": 2.33,
"learning_rate": 3.789463876212863e-05,
"loss": 2.2517,
"step": 2148
},
{
"epoch": 2.33,
"learning_rate": 3.781126055592641e-05,
"loss": 2.5876,
"step": 2150
},
{
"epoch": 2.33,
"learning_rate": 3.772791836972019e-05,
"loss": 2.5212,
"step": 2152
},
{
"epoch": 2.33,
"learning_rate": 3.764461244980169e-05,
"loss": 2.4556,
"step": 2154
},
{
"epoch": 2.34,
"learning_rate": 3.7561343042355415e-05,
"loss": 2.4632,
"step": 2156
},
{
"epoch": 2.34,
"learning_rate": 3.747811039345798e-05,
"loss": 2.333,
"step": 2158
},
{
"epoch": 2.34,
"learning_rate": 3.739491474907735e-05,
"loss": 2.3092,
"step": 2160
},
{
"epoch": 2.34,
"learning_rate": 3.731175635507219e-05,
"loss": 2.4144,
"step": 2162
},
{
"epoch": 2.34,
"learning_rate": 3.722863545719103e-05,
"loss": 2.51,
"step": 2164
},
{
"epoch": 2.35,
"learning_rate": 3.7145552301071594e-05,
"loss": 2.449,
"step": 2166
},
{
"epoch": 2.35,
"learning_rate": 3.706250713224011e-05,
"loss": 2.4989,
"step": 2168
},
{
"epoch": 2.35,
"learning_rate": 3.697950019611054e-05,
"loss": 2.4551,
"step": 2170
},
{
"epoch": 2.35,
"learning_rate": 3.689653173798381e-05,
"loss": 2.4758,
"step": 2172
},
{
"epoch": 2.36,
"learning_rate": 3.681360200304718e-05,
"loss": 2.5718,
"step": 2174
},
{
"epoch": 2.36,
"learning_rate": 3.673071123637347e-05,
"loss": 2.4931,
"step": 2176
},
{
"epoch": 2.36,
"learning_rate": 3.664785968292036e-05,
"loss": 2.5029,
"step": 2178
},
{
"epoch": 2.36,
"learning_rate": 3.656504758752961e-05,
"loss": 2.4353,
"step": 2180
},
{
"epoch": 2.36,
"learning_rate": 3.64822751949264e-05,
"loss": 2.2827,
"step": 2182
},
{
"epoch": 2.37,
"learning_rate": 3.639954274971854e-05,
"loss": 2.4667,
"step": 2184
},
{
"epoch": 2.37,
"learning_rate": 3.631685049639586e-05,
"loss": 2.3599,
"step": 2186
},
{
"epoch": 2.37,
"learning_rate": 3.623419867932937e-05,
"loss": 2.3421,
"step": 2188
},
{
"epoch": 2.37,
"learning_rate": 3.6151587542770567e-05,
"loss": 2.4358,
"step": 2190
},
{
"epoch": 2.37,
"learning_rate": 3.6069017330850754e-05,
"loss": 2.4472,
"step": 2192
},
{
"epoch": 2.38,
"learning_rate": 3.598648828758031e-05,
"loss": 2.4576,
"step": 2194
},
{
"epoch": 2.38,
"learning_rate": 3.590400065684792e-05,
"loss": 2.5057,
"step": 2196
},
{
"epoch": 2.38,
"learning_rate": 3.582155468241993e-05,
"loss": 2.4759,
"step": 2198
},
{
"epoch": 2.38,
"learning_rate": 3.573915060793949e-05,
"loss": 2.1946,
"step": 2200
},
{
"epoch": 2.39,
"learning_rate": 3.5656788676926066e-05,
"loss": 2.562,
"step": 2202
},
{
"epoch": 2.39,
"learning_rate": 3.557446913277448e-05,
"loss": 2.7023,
"step": 2204
},
{
"epoch": 2.39,
"learning_rate": 3.5492192218754326e-05,
"loss": 2.5342,
"step": 2206
},
{
"epoch": 2.39,
"learning_rate": 3.540995817800917e-05,
"loss": 2.4178,
"step": 2208
},
{
"epoch": 2.39,
"learning_rate": 3.532776725355601e-05,
"loss": 2.4072,
"step": 2210
},
{
"epoch": 2.4,
"learning_rate": 3.5245619688284274e-05,
"loss": 2.3832,
"step": 2212
},
{
"epoch": 2.4,
"learning_rate": 3.516351572495535e-05,
"loss": 2.5958,
"step": 2214
},
{
"epoch": 2.4,
"learning_rate": 3.508145560620173e-05,
"loss": 2.3778,
"step": 2216
},
{
"epoch": 2.4,
"learning_rate": 3.499943957452637e-05,
"loss": 2.3753,
"step": 2218
},
{
"epoch": 2.4,
"learning_rate": 3.4917467872301934e-05,
"loss": 2.4079,
"step": 2220
},
{
"epoch": 2.41,
"learning_rate": 3.4835540741770054e-05,
"loss": 2.4669,
"step": 2222
},
{
"epoch": 2.41,
"learning_rate": 3.4753658425040656e-05,
"loss": 2.4278,
"step": 2224
},
{
"epoch": 2.41,
"learning_rate": 3.467182116409127e-05,
"loss": 2.4575,
"step": 2226
},
{
"epoch": 2.41,
"learning_rate": 3.459002920076625e-05,
"loss": 2.5202,
"step": 2228
},
{
"epoch": 2.42,
"learning_rate": 3.450828277677606e-05,
"loss": 2.4366,
"step": 2230
},
{
"epoch": 2.42,
"learning_rate": 3.442658213369662e-05,
"loss": 2.4061,
"step": 2232
},
{
"epoch": 2.42,
"learning_rate": 3.434492751296856e-05,
"loss": 2.393,
"step": 2234
},
{
"epoch": 2.42,
"learning_rate": 3.426331915589651e-05,
"loss": 2.3859,
"step": 2236
},
{
"epoch": 2.42,
"learning_rate": 3.418175730364836e-05,
"loss": 2.516,
"step": 2238
},
{
"epoch": 2.43,
"learning_rate": 3.4100242197254564e-05,
"loss": 2.2877,
"step": 2240
},
{
"epoch": 2.43,
"learning_rate": 3.401877407760747e-05,
"loss": 2.3637,
"step": 2242
},
{
"epoch": 2.43,
"learning_rate": 3.393735318546054e-05,
"loss": 2.4131,
"step": 2244
},
{
"epoch": 2.43,
"learning_rate": 3.38559797614277e-05,
"loss": 2.4939,
"step": 2246
},
{
"epoch": 2.44,
"learning_rate": 3.377465404598253e-05,
"loss": 2.4648,
"step": 2248
},
{
"epoch": 2.44,
"learning_rate": 3.369337627945774e-05,
"loss": 2.504,
"step": 2250
},
{
"epoch": 2.44,
"learning_rate": 3.3612146702044226e-05,
"loss": 2.5889,
"step": 2252
},
{
"epoch": 2.44,
"learning_rate": 3.3530965553790526e-05,
"loss": 2.5468,
"step": 2254
},
{
"epoch": 2.44,
"learning_rate": 3.3449833074602064e-05,
"loss": 2.3507,
"step": 2256
},
{
"epoch": 2.45,
"learning_rate": 3.336874950424046e-05,
"loss": 2.418,
"step": 2258
},
{
"epoch": 2.45,
"learning_rate": 3.328771508232273e-05,
"loss": 2.424,
"step": 2260
},
{
"epoch": 2.45,
"learning_rate": 3.320673004832071e-05,
"loss": 2.4613,
"step": 2262
},
{
"epoch": 2.45,
"learning_rate": 3.312579464156025e-05,
"loss": 2.5197,
"step": 2264
},
{
"epoch": 2.45,
"learning_rate": 3.304490910122058e-05,
"loss": 2.3393,
"step": 2266
},
{
"epoch": 2.46,
"learning_rate": 3.2964073666333536e-05,
"loss": 2.2088,
"step": 2268
},
{
"epoch": 2.46,
"learning_rate": 3.2883288575782875e-05,
"loss": 2.4951,
"step": 2270
},
{
"epoch": 2.46,
"learning_rate": 3.2802554068303596e-05,
"loss": 2.3728,
"step": 2272
},
{
"epoch": 2.46,
"learning_rate": 3.272187038248121e-05,
"loss": 2.2495,
"step": 2274
},
{
"epoch": 2.47,
"learning_rate": 3.264123775675106e-05,
"loss": 2.3507,
"step": 2276
},
{
"epoch": 2.47,
"learning_rate": 3.256065642939756e-05,
"loss": 2.3987,
"step": 2278
},
{
"epoch": 2.47,
"learning_rate": 3.248012663855353e-05,
"loss": 2.4537,
"step": 2280
},
{
"epoch": 2.47,
"learning_rate": 3.239964862219954e-05,
"loss": 2.4883,
"step": 2282
},
{
"epoch": 2.47,
"learning_rate": 3.231922261816311e-05,
"loss": 2.1862,
"step": 2284
},
{
"epoch": 2.48,
"learning_rate": 3.223884886411807e-05,
"loss": 2.4353,
"step": 2286
},
{
"epoch": 2.48,
"learning_rate": 3.215852759758381e-05,
"loss": 2.2626,
"step": 2288
},
{
"epoch": 2.48,
"learning_rate": 3.2078259055924675e-05,
"loss": 2.3032,
"step": 2290
},
{
"epoch": 2.48,
"learning_rate": 3.199804347634915e-05,
"loss": 2.3917,
"step": 2292
},
{
"epoch": 2.49,
"learning_rate": 3.191788109590922e-05,
"loss": 2.4218,
"step": 2294
},
{
"epoch": 2.49,
"learning_rate": 3.183777215149962e-05,
"loss": 2.3915,
"step": 2296
},
{
"epoch": 2.49,
"learning_rate": 3.175771687985726e-05,
"loss": 2.5188,
"step": 2298
},
{
"epoch": 2.49,
"learning_rate": 3.167771551756036e-05,
"loss": 2.4356,
"step": 2300
},
{
"epoch": 2.49,
"learning_rate": 3.159776830102784e-05,
"loss": 2.3713,
"step": 2302
},
{
"epoch": 2.5,
"learning_rate": 3.1517875466518626e-05,
"loss": 2.488,
"step": 2304
},
{
"epoch": 2.5,
"learning_rate": 3.1438037250130944e-05,
"loss": 2.5058,
"step": 2306
},
{
"epoch": 2.5,
"learning_rate": 3.135825388780159e-05,
"loss": 2.5503,
"step": 2308
},
{
"epoch": 2.5,
"learning_rate": 3.127852561530526e-05,
"loss": 2.4376,
"step": 2310
},
{
"epoch": 2.5,
"learning_rate": 3.1198852668253856e-05,
"loss": 2.4054,
"step": 2312
},
{
"epoch": 2.51,
"learning_rate": 3.111923528209577e-05,
"loss": 2.3189,
"step": 2314
},
{
"epoch": 2.51,
"learning_rate": 3.103967369211525e-05,
"loss": 2.3736,
"step": 2316
},
{
"epoch": 2.51,
"learning_rate": 3.096016813343158e-05,
"loss": 2.4304,
"step": 2318
},
{
"epoch": 2.51,
"learning_rate": 3.08807188409985e-05,
"loss": 2.3717,
"step": 2320
},
{
"epoch": 2.52,
"learning_rate": 3.080132604960349e-05,
"loss": 2.3996,
"step": 2322
},
{
"epoch": 2.52,
"learning_rate": 3.072198999386704e-05,
"loss": 2.5024,
"step": 2324
},
{
"epoch": 2.52,
"learning_rate": 3.064271090824197e-05,
"loss": 2.3109,
"step": 2326
},
{
"epoch": 2.52,
"learning_rate": 3.056348902701274e-05,
"loss": 2.376,
"step": 2328
},
{
"epoch": 2.52,
"learning_rate": 3.0484324584294783e-05,
"loss": 2.5198,
"step": 2330
},
{
"epoch": 2.53,
"learning_rate": 3.040521781403377e-05,
"loss": 2.2787,
"step": 2332
},
{
"epoch": 2.53,
"learning_rate": 3.0326168950004964e-05,
"loss": 2.3356,
"step": 2334
},
{
"epoch": 2.53,
"learning_rate": 3.0247178225812435e-05,
"loss": 2.4738,
"step": 2336
},
{
"epoch": 2.53,
"learning_rate": 3.0168245874888557e-05,
"loss": 2.41,
"step": 2338
},
{
"epoch": 2.53,
"learning_rate": 3.00893721304931e-05,
"loss": 2.4233,
"step": 2340
},
{
"epoch": 2.54,
"learning_rate": 3.0010557225712667e-05,
"loss": 2.4596,
"step": 2342
},
{
"epoch": 2.54,
"learning_rate": 2.993180139345999e-05,
"loss": 2.5253,
"step": 2344
},
{
"epoch": 2.54,
"learning_rate": 2.9853104866473246e-05,
"loss": 2.3779,
"step": 2346
},
{
"epoch": 2.54,
"learning_rate": 2.977446787731532e-05,
"loss": 2.397,
"step": 2348
},
{
"epoch": 2.55,
"learning_rate": 2.9695890658373164e-05,
"loss": 2.4016,
"step": 2350
},
{
"epoch": 2.55,
"learning_rate": 2.96173734418571e-05,
"loss": 2.3168,
"step": 2352
},
{
"epoch": 2.55,
"learning_rate": 2.9538916459800136e-05,
"loss": 2.2834,
"step": 2354
},
{
"epoch": 2.55,
"learning_rate": 2.9460519944057284e-05,
"loss": 2.4692,
"step": 2356
},
{
"epoch": 2.55,
"learning_rate": 2.9382184126304834e-05,
"loss": 2.1485,
"step": 2358
},
{
"epoch": 2.56,
"learning_rate": 2.9303909238039718e-05,
"loss": 2.1016,
"step": 2360
},
{
"epoch": 2.56,
"learning_rate": 2.9225695510578843e-05,
"loss": 2.1819,
"step": 2362
},
{
"epoch": 2.56,
"learning_rate": 2.9147543175058335e-05,
"loss": 2.4526,
"step": 2364
},
{
"epoch": 2.56,
"learning_rate": 2.9069452462432883e-05,
"loss": 2.5037,
"step": 2366
},
{
"epoch": 2.57,
"learning_rate": 2.899142360347511e-05,
"loss": 2.223,
"step": 2368
},
{
"epoch": 2.57,
"learning_rate": 2.8913456828774854e-05,
"loss": 2.4296,
"step": 2370
},
{
"epoch": 2.57,
"learning_rate": 2.883555236873845e-05,
"loss": 2.3899,
"step": 2372
},
{
"epoch": 2.57,
"learning_rate": 2.875771045358805e-05,
"loss": 2.4758,
"step": 2374
},
{
"epoch": 2.57,
"learning_rate": 2.8679931313361053e-05,
"loss": 2.4788,
"step": 2376
},
{
"epoch": 2.58,
"learning_rate": 2.860221517790933e-05,
"loss": 2.4869,
"step": 2378
},
{
"epoch": 2.58,
"learning_rate": 2.8524562276898513e-05,
"loss": 2.4313,
"step": 2380
},
{
"epoch": 2.58,
"learning_rate": 2.8446972839807384e-05,
"loss": 2.2432,
"step": 2382
},
{
"epoch": 2.58,
"learning_rate": 2.8369447095927195e-05,
"loss": 2.2748,
"step": 2384
},
{
"epoch": 2.58,
"learning_rate": 2.8291985274360983e-05,
"loss": 2.6419,
"step": 2386
},
{
"epoch": 2.59,
"learning_rate": 2.8214587604022847e-05,
"loss": 2.4079,
"step": 2388
},
{
"epoch": 2.59,
"learning_rate": 2.8137254313637306e-05,
"loss": 2.3603,
"step": 2390
},
{
"epoch": 2.59,
"learning_rate": 2.805998563173866e-05,
"loss": 2.2753,
"step": 2392
},
{
"epoch": 2.59,
"learning_rate": 2.798278178667028e-05,
"loss": 2.3884,
"step": 2394
},
{
"epoch": 2.6,
"learning_rate": 2.790564300658387e-05,
"loss": 2.5817,
"step": 2396
},
{
"epoch": 2.6,
"learning_rate": 2.7828569519438942e-05,
"loss": 2.4844,
"step": 2398
},
{
"epoch": 2.6,
"learning_rate": 2.775156155300197e-05,
"loss": 2.4505,
"step": 2400
},
{
"epoch": 2.6,
"learning_rate": 2.7674619334845876e-05,
"loss": 2.443,
"step": 2402
},
{
"epoch": 2.6,
"learning_rate": 2.7597743092349217e-05,
"loss": 2.3359,
"step": 2404
},
{
"epoch": 2.61,
"learning_rate": 2.752093305269565e-05,
"loss": 2.4345,
"step": 2406
},
{
"epoch": 2.61,
"learning_rate": 2.7444189442873115e-05,
"loss": 2.2828,
"step": 2408
},
{
"epoch": 2.61,
"learning_rate": 2.7367512489673312e-05,
"loss": 2.5291,
"step": 2410
},
{
"epoch": 2.61,
"learning_rate": 2.7290902419690895e-05,
"loss": 2.391,
"step": 2412
},
{
"epoch": 2.62,
"learning_rate": 2.7214359459322924e-05,
"loss": 2.06,
"step": 2414
},
{
"epoch": 2.62,
"learning_rate": 2.7137883834768073e-05,
"loss": 2.6396,
"step": 2416
},
{
"epoch": 2.62,
"learning_rate": 2.7061475772026086e-05,
"loss": 2.4143,
"step": 2418
},
{
"epoch": 2.62,
"learning_rate": 2.698513549689703e-05,
"loss": 2.4331,
"step": 2420
},
{
"epoch": 2.62,
"learning_rate": 2.6908863234980636e-05,
"loss": 2.4801,
"step": 2422
},
{
"epoch": 2.63,
"learning_rate": 2.6832659211675627e-05,
"loss": 2.3912,
"step": 2424
},
{
"epoch": 2.63,
"learning_rate": 2.67565236521791e-05,
"loss": 2.4359,
"step": 2426
},
{
"epoch": 2.63,
"learning_rate": 2.668045678148584e-05,
"loss": 2.5352,
"step": 2428
},
{
"epoch": 2.63,
"learning_rate": 2.6604458824387614e-05,
"loss": 2.4982,
"step": 2430
},
{
"epoch": 2.63,
"learning_rate": 2.6528530005472518e-05,
"loss": 2.4898,
"step": 2432
},
{
"epoch": 2.64,
"learning_rate": 2.6452670549124375e-05,
"loss": 2.5291,
"step": 2434
},
{
"epoch": 2.64,
"learning_rate": 2.637688067952204e-05,
"loss": 2.3178,
"step": 2436
},
{
"epoch": 2.64,
"learning_rate": 2.630116062063867e-05,
"loss": 2.5588,
"step": 2438
},
{
"epoch": 2.64,
"learning_rate": 2.622551059624113e-05,
"loss": 2.3862,
"step": 2440
},
{
"epoch": 2.65,
"learning_rate": 2.614993082988937e-05,
"loss": 2.3327,
"step": 2442
},
{
"epoch": 2.65,
"learning_rate": 2.607442154493568e-05,
"loss": 2.3323,
"step": 2444
},
{
"epoch": 2.65,
"learning_rate": 2.599898296452406e-05,
"loss": 2.237,
"step": 2446
},
{
"epoch": 2.65,
"learning_rate": 2.592361531158952e-05,
"loss": 2.1117,
"step": 2448
},
{
"epoch": 2.65,
"learning_rate": 2.5848318808857606e-05,
"loss": 2.3355,
"step": 2450
},
{
"epoch": 2.66,
"learning_rate": 2.5773093678843473e-05,
"loss": 2.3701,
"step": 2452
},
{
"epoch": 2.66,
"learning_rate": 2.5697940143851375e-05,
"loss": 2.4158,
"step": 2454
},
{
"epoch": 2.66,
"learning_rate": 2.5622858425974018e-05,
"loss": 2.4807,
"step": 2456
},
{
"epoch": 2.66,
"learning_rate": 2.5547848747091897e-05,
"loss": 2.2695,
"step": 2458
},
{
"epoch": 2.66,
"learning_rate": 2.5472911328872574e-05,
"loss": 2.4957,
"step": 2460
},
{
"epoch": 2.67,
"learning_rate": 2.5398046392770054e-05,
"loss": 2.3591,
"step": 2462
},
{
"epoch": 2.67,
"learning_rate": 2.532325416002419e-05,
"loss": 2.6757,
"step": 2464
},
{
"epoch": 2.67,
"learning_rate": 2.524853485166e-05,
"loss": 2.3271,
"step": 2466
},
{
"epoch": 2.67,
"learning_rate": 2.517388868848692e-05,
"loss": 2.3084,
"step": 2468
},
{
"epoch": 2.68,
"learning_rate": 2.5099315891098264e-05,
"loss": 2.3597,
"step": 2470
},
{
"epoch": 2.68,
"learning_rate": 2.5024816679870556e-05,
"loss": 2.5597,
"step": 2472
},
{
"epoch": 2.68,
"learning_rate": 2.495039127496287e-05,
"loss": 2.4667,
"step": 2474
},
{
"epoch": 2.68,
"learning_rate": 2.4876039896316123e-05,
"loss": 2.2991,
"step": 2476
},
{
"epoch": 2.68,
"learning_rate": 2.4801762763652474e-05,
"loss": 2.27,
"step": 2478
},
{
"epoch": 2.69,
"learning_rate": 2.4727560096474706e-05,
"loss": 2.5184,
"step": 2480
},
{
"epoch": 2.69,
"learning_rate": 2.4653432114065544e-05,
"loss": 2.5034,
"step": 2482
},
{
"epoch": 2.69,
"learning_rate": 2.457937903548695e-05,
"loss": 2.3994,
"step": 2484
},
{
"epoch": 2.69,
"learning_rate": 2.450540107957961e-05,
"loss": 2.229,
"step": 2486
},
{
"epoch": 2.7,
"learning_rate": 2.443149846496212e-05,
"loss": 2.4133,
"step": 2488
},
{
"epoch": 2.7,
"learning_rate": 2.4357671410030526e-05,
"loss": 2.5226,
"step": 2490
},
{
"epoch": 2.7,
"learning_rate": 2.4283920132957482e-05,
"loss": 2.3836,
"step": 2492
},
{
"epoch": 2.7,
"learning_rate": 2.42102448516918e-05,
"loss": 2.4287,
"step": 2494
},
{
"epoch": 2.7,
"learning_rate": 2.413664578395761e-05,
"loss": 2.4322,
"step": 2496
},
{
"epoch": 2.71,
"learning_rate": 2.4063123147253923e-05,
"loss": 2.3545,
"step": 2498
},
{
"epoch": 2.71,
"learning_rate": 2.398967715885379e-05,
"loss": 2.3359,
"step": 2500
},
{
"epoch": 2.71,
"learning_rate": 2.391630803580382e-05,
"loss": 2.4889,
"step": 2502
},
{
"epoch": 2.71,
"learning_rate": 2.3843015994923412e-05,
"loss": 2.3731,
"step": 2504
},
{
"epoch": 2.71,
"learning_rate": 2.3769801252804213e-05,
"loss": 2.2901,
"step": 2506
},
{
"epoch": 2.72,
"learning_rate": 2.3696664025809458e-05,
"loss": 2.3341,
"step": 2508
},
{
"epoch": 2.72,
"learning_rate": 2.3623604530073245e-05,
"loss": 2.3624,
"step": 2510
},
{
"epoch": 2.72,
"learning_rate": 2.3550622981499988e-05,
"loss": 2.4377,
"step": 2512
},
{
"epoch": 2.72,
"learning_rate": 2.3477719595763774e-05,
"loss": 2.2931,
"step": 2514
},
{
"epoch": 2.73,
"learning_rate": 2.340489458830772e-05,
"loss": 2.3726,
"step": 2516
},
{
"epoch": 2.73,
"learning_rate": 2.3332148174343254e-05,
"loss": 2.2644,
"step": 2518
},
{
"epoch": 2.73,
"learning_rate": 2.3259480568849586e-05,
"loss": 2.5434,
"step": 2520
},
{
"epoch": 2.73,
"learning_rate": 2.3186891986573035e-05,
"loss": 2.2445,
"step": 2522
},
{
"epoch": 2.73,
"learning_rate": 2.3114382642026404e-05,
"loss": 2.3012,
"step": 2524
},
{
"epoch": 2.74,
"learning_rate": 2.3041952749488304e-05,
"loss": 2.298,
"step": 2526
},
{
"epoch": 2.74,
"learning_rate": 2.2969602523002543e-05,
"loss": 2.3226,
"step": 2528
},
{
"epoch": 2.74,
"learning_rate": 2.2897332176377528e-05,
"loss": 2.4809,
"step": 2530
},
{
"epoch": 2.74,
"learning_rate": 2.2825141923185632e-05,
"loss": 2.3514,
"step": 2532
},
{
"epoch": 2.74,
"learning_rate": 2.275303197676248e-05,
"loss": 2.4344,
"step": 2534
},
{
"epoch": 2.75,
"learning_rate": 2.2681002550206355e-05,
"loss": 2.3313,
"step": 2536
},
{
"epoch": 2.75,
"learning_rate": 2.2609053856377714e-05,
"loss": 2.1924,
"step": 2538
},
{
"epoch": 2.75,
"learning_rate": 2.2537186107898313e-05,
"loss": 2.1984,
"step": 2540
},
{
"epoch": 2.75,
"learning_rate": 2.2465399517150722e-05,
"loss": 2.4612,
"step": 2542
},
{
"epoch": 2.76,
"learning_rate": 2.2393694296277707e-05,
"loss": 2.3225,
"step": 2544
},
{
"epoch": 2.76,
"learning_rate": 2.2322070657181583e-05,
"loss": 2.3635,
"step": 2546
},
{
"epoch": 2.76,
"learning_rate": 2.2250528811523513e-05,
"loss": 2.4144,
"step": 2548
},
{
"epoch": 2.76,
"learning_rate": 2.2179068970722978e-05,
"loss": 2.5847,
"step": 2550
},
{
"epoch": 2.76,
"learning_rate": 2.2107691345957133e-05,
"loss": 2.3221,
"step": 2552
},
{
"epoch": 2.77,
"learning_rate": 2.203639614816017e-05,
"loss": 2.4227,
"step": 2554
},
{
"epoch": 2.77,
"learning_rate": 2.196518358802268e-05,
"loss": 2.4364,
"step": 2556
},
{
"epoch": 2.77,
"learning_rate": 2.1894053875991017e-05,
"loss": 2.4288,
"step": 2558
},
{
"epoch": 2.77,
"learning_rate": 2.182300722226675e-05,
"loss": 2.3931,
"step": 2560
},
{
"epoch": 2.78,
"learning_rate": 2.1752043836806002e-05,
"loss": 2.4772,
"step": 2562
},
{
"epoch": 2.78,
"learning_rate": 2.1681163929318777e-05,
"loss": 2.3936,
"step": 2564
},
{
"epoch": 2.78,
"learning_rate": 2.1610367709268387e-05,
"loss": 2.3759,
"step": 2566
},
{
"epoch": 2.78,
"learning_rate": 2.1539655385870877e-05,
"loss": 2.4427,
"step": 2568
},
{
"epoch": 2.78,
"learning_rate": 2.1469027168094347e-05,
"loss": 2.387,
"step": 2570
},
{
"epoch": 2.79,
"learning_rate": 2.1398483264658313e-05,
"loss": 2.2637,
"step": 2572
},
{
"epoch": 2.79,
"learning_rate": 2.132802388403319e-05,
"loss": 2.3364,
"step": 2574
},
{
"epoch": 2.79,
"learning_rate": 2.125764923443953e-05,
"loss": 2.3348,
"step": 2576
},
{
"epoch": 2.79,
"learning_rate": 2.118735952384757e-05,
"loss": 2.34,
"step": 2578
},
{
"epoch": 2.79,
"learning_rate": 2.1117154959976482e-05,
"loss": 2.2867,
"step": 2580
},
{
"epoch": 2.8,
"learning_rate": 2.104703575029385e-05,
"loss": 2.4191,
"step": 2582
},
{
"epoch": 2.8,
"learning_rate": 2.097700210201497e-05,
"loss": 2.2275,
"step": 2584
},
{
"epoch": 2.8,
"learning_rate": 2.090705422210237e-05,
"loss": 2.6198,
"step": 2586
},
{
"epoch": 2.8,
"learning_rate": 2.0837192317265016e-05,
"loss": 2.364,
"step": 2588
},
{
"epoch": 2.81,
"learning_rate": 2.0767416593957894e-05,
"loss": 2.2663,
"step": 2590
},
{
"epoch": 2.81,
"learning_rate": 2.0697727258381238e-05,
"loss": 2.2649,
"step": 2592
},
{
"epoch": 2.81,
"learning_rate": 2.0628124516480046e-05,
"loss": 2.5761,
"step": 2594
},
{
"epoch": 2.81,
"learning_rate": 2.0558608573943354e-05,
"loss": 2.2132,
"step": 2596
},
{
"epoch": 2.81,
"learning_rate": 2.0489179636203766e-05,
"loss": 2.3719,
"step": 2598
},
{
"epoch": 2.82,
"learning_rate": 2.0419837908436688e-05,
"loss": 2.4978,
"step": 2600
},
{
"epoch": 2.82,
"learning_rate": 2.0350583595559865e-05,
"loss": 2.2988,
"step": 2602
},
{
"epoch": 2.82,
"learning_rate": 2.0281416902232708e-05,
"loss": 2.255,
"step": 2604
},
{
"epoch": 2.82,
"learning_rate": 2.021233803285567e-05,
"loss": 2.2799,
"step": 2606
},
{
"epoch": 2.83,
"learning_rate": 2.014334719156966e-05,
"loss": 2.2972,
"step": 2608
},
{
"epoch": 2.83,
"learning_rate": 2.0074444582255485e-05,
"loss": 2.4158,
"step": 2610
},
{
"epoch": 2.83,
"learning_rate": 2.0005630408533215e-05,
"loss": 2.3353,
"step": 2612
},
{
"epoch": 2.83,
"learning_rate": 1.9936904873761536e-05,
"loss": 2.3829,
"step": 2614
},
{
"epoch": 2.83,
"learning_rate": 1.9868268181037185e-05,
"loss": 2.1709,
"step": 2616
},
{
"epoch": 2.84,
"learning_rate": 1.9799720533194404e-05,
"loss": 2.549,
"step": 2618
},
{
"epoch": 2.84,
"learning_rate": 1.9731262132804274e-05,
"loss": 2.5804,
"step": 2620
},
{
"epoch": 2.84,
"learning_rate": 1.966289318217411e-05,
"loss": 2.5311,
"step": 2622
},
{
"epoch": 2.84,
"learning_rate": 1.959461388334686e-05,
"loss": 2.3825,
"step": 2624
},
{
"epoch": 2.84,
"learning_rate": 1.9526424438100642e-05,
"loss": 2.3505,
"step": 2626
},
{
"epoch": 2.85,
"learning_rate": 1.9458325047947938e-05,
"loss": 2.3793,
"step": 2628
},
{
"epoch": 2.85,
"learning_rate": 1.9390315914135125e-05,
"loss": 2.2617,
"step": 2630
},
{
"epoch": 2.85,
"learning_rate": 1.9322397237641875e-05,
"loss": 2.5081,
"step": 2632
},
{
"epoch": 2.85,
"learning_rate": 1.925456921918055e-05,
"loss": 2.578,
"step": 2634
},
{
"epoch": 2.86,
"learning_rate": 1.918683205919557e-05,
"loss": 2.3566,
"step": 2636
},
{
"epoch": 2.86,
"learning_rate": 1.9119185957862835e-05,
"loss": 2.5683,
"step": 2638
},
{
"epoch": 2.86,
"learning_rate": 1.9051631115089196e-05,
"loss": 2.213,
"step": 2640
},
{
"epoch": 2.86,
"learning_rate": 1.8984167730511825e-05,
"loss": 2.6764,
"step": 2642
},
{
"epoch": 2.86,
"learning_rate": 1.8916796003497572e-05,
"loss": 2.422,
"step": 2644
},
{
"epoch": 2.87,
"learning_rate": 1.8849516133142432e-05,
"loss": 2.1258,
"step": 2646
},
{
"epoch": 2.87,
"learning_rate": 1.8782328318270964e-05,
"loss": 2.505,
"step": 2648
},
{
"epoch": 2.87,
"learning_rate": 1.8715232757435704e-05,
"loss": 2.2427,
"step": 2650
},
{
"epoch": 2.87,
"learning_rate": 1.864822964891651e-05,
"loss": 2.3599,
"step": 2652
},
{
"epoch": 2.87,
"learning_rate": 1.8581319190720035e-05,
"loss": 2.679,
"step": 2654
},
{
"epoch": 2.88,
"learning_rate": 1.851450158057918e-05,
"loss": 2.3708,
"step": 2656
},
{
"epoch": 2.88,
"learning_rate": 1.844777701595244e-05,
"loss": 2.414,
"step": 2658
},
{
"epoch": 2.88,
"learning_rate": 1.83811456940233e-05,
"loss": 2.524,
"step": 2660
},
{
"epoch": 2.88,
"learning_rate": 1.8314607811699762e-05,
"loss": 2.4521,
"step": 2662
},
{
"epoch": 2.89,
"learning_rate": 1.824816356561364e-05,
"loss": 2.3931,
"step": 2664
},
{
"epoch": 2.89,
"learning_rate": 1.8181813152120092e-05,
"loss": 2.3704,
"step": 2666
},
{
"epoch": 2.89,
"learning_rate": 1.8115556767296914e-05,
"loss": 2.5238,
"step": 2668
},
{
"epoch": 2.89,
"learning_rate": 1.804939460694411e-05,
"loss": 2.5418,
"step": 2670
},
{
"epoch": 2.89,
"learning_rate": 1.7983326866583144e-05,
"loss": 2.6173,
"step": 2672
},
{
"epoch": 2.9,
"learning_rate": 1.7917353741456545e-05,
"loss": 2.4272,
"step": 2674
},
{
"epoch": 2.9,
"learning_rate": 1.7851475426527142e-05,
"loss": 2.4986,
"step": 2676
},
{
"epoch": 2.9,
"learning_rate": 1.7785692116477682e-05,
"loss": 2.4365,
"step": 2678
},
{
"epoch": 2.9,
"learning_rate": 1.772000400571005e-05,
"loss": 2.5869,
"step": 2680
},
{
"epoch": 2.91,
"learning_rate": 1.76544112883449e-05,
"loss": 2.4987,
"step": 2682
},
{
"epoch": 2.91,
"learning_rate": 1.7588914158220898e-05,
"loss": 2.4701,
"step": 2684
},
{
"epoch": 2.91,
"learning_rate": 1.7523512808894288e-05,
"loss": 2.5142,
"step": 2686
},
{
"epoch": 2.91,
"learning_rate": 1.7458207433638223e-05,
"loss": 2.518,
"step": 2688
},
{
"epoch": 2.91,
"learning_rate": 1.7392998225442263e-05,
"loss": 2.386,
"step": 2690
},
{
"epoch": 2.92,
"learning_rate": 1.732788537701179e-05,
"loss": 2.2214,
"step": 2692
},
{
"epoch": 2.92,
"learning_rate": 1.726286908076738e-05,
"loss": 2.358,
"step": 2694
},
{
"epoch": 2.92,
"learning_rate": 1.7197949528844286e-05,
"loss": 2.5727,
"step": 2696
},
{
"epoch": 2.92,
"learning_rate": 1.7133126913091903e-05,
"loss": 2.5317,
"step": 2698
},
{
"epoch": 2.92,
"learning_rate": 1.706840142507315e-05,
"loss": 2.2929,
"step": 2700
},
{
"epoch": 2.93,
"learning_rate": 1.700377325606388e-05,
"loss": 2.4207,
"step": 2702
},
{
"epoch": 2.93,
"learning_rate": 1.6939242597052373e-05,
"loss": 2.4398,
"step": 2704
},
{
"epoch": 2.93,
"learning_rate": 1.6874809638738754e-05,
"loss": 2.3671,
"step": 2706
},
{
"epoch": 2.93,
"learning_rate": 1.681047457153444e-05,
"loss": 2.5831,
"step": 2708
},
{
"epoch": 2.94,
"learning_rate": 1.6746237585561524e-05,
"loss": 2.414,
"step": 2710
},
{
"epoch": 2.94,
"learning_rate": 1.6682098870652236e-05,
"loss": 2.2996,
"step": 2712
},
{
"epoch": 2.94,
"learning_rate": 1.6618058616348492e-05,
"loss": 2.4037,
"step": 2714
},
{
"epoch": 2.94,
"learning_rate": 1.655411701190115e-05,
"loss": 2.592,
"step": 2716
},
{
"epoch": 2.94,
"learning_rate": 1.6490274246269533e-05,
"loss": 2.2498,
"step": 2718
},
{
"epoch": 2.95,
"learning_rate": 1.642653050812094e-05,
"loss": 2.2538,
"step": 2720
},
{
"epoch": 2.95,
"learning_rate": 1.636288598583e-05,
"loss": 2.3926,
"step": 2722
},
{
"epoch": 2.95,
"learning_rate": 1.629934086747813e-05,
"loss": 2.5224,
"step": 2724
},
{
"epoch": 2.95,
"learning_rate": 1.6235895340852964e-05,
"loss": 2.3785,
"step": 2726
},
{
"epoch": 2.96,
"learning_rate": 1.6172549593447877e-05,
"loss": 2.4254,
"step": 2728
},
{
"epoch": 2.96,
"learning_rate": 1.6109303812461375e-05,
"loss": 2.2977,
"step": 2730
},
{
"epoch": 2.96,
"learning_rate": 1.60461581847965e-05,
"loss": 2.4296,
"step": 2732
},
{
"epoch": 2.96,
"learning_rate": 1.598311289706033e-05,
"loss": 2.1917,
"step": 2734
},
{
"epoch": 2.96,
"learning_rate": 1.592016813556347e-05,
"loss": 2.361,
"step": 2736
},
{
"epoch": 2.97,
"learning_rate": 1.5857324086319414e-05,
"loss": 2.3198,
"step": 2738
},
{
"epoch": 2.97,
"learning_rate": 1.579458093504403e-05,
"loss": 2.3945,
"step": 2740
},
{
"epoch": 2.97,
"learning_rate": 1.5731938867155e-05,
"loss": 2.2314,
"step": 2742
},
{
"epoch": 2.97,
"learning_rate": 1.5669398067771324e-05,
"loss": 2.4571,
"step": 2744
},
{
"epoch": 2.97,
"learning_rate": 1.560695872171273e-05,
"loss": 2.2473,
"step": 2746
},
{
"epoch": 2.98,
"learning_rate": 1.5544621013499094e-05,
"loss": 2.4553,
"step": 2748
},
{
"epoch": 2.98,
"learning_rate": 1.548238512734998e-05,
"loss": 2.3213,
"step": 2750
},
{
"epoch": 2.98,
"learning_rate": 1.542025124718401e-05,
"loss": 2.3302,
"step": 2752
},
{
"epoch": 2.98,
"learning_rate": 1.535821955661839e-05,
"loss": 2.2468,
"step": 2754
},
{
"epoch": 2.99,
"learning_rate": 1.5296290238968303e-05,
"loss": 2.3087,
"step": 2756
},
{
"epoch": 2.99,
"learning_rate": 1.5234463477246452e-05,
"loss": 2.4679,
"step": 2758
},
{
"epoch": 2.99,
"learning_rate": 1.5172739454162405e-05,
"loss": 2.3439,
"step": 2760
},
{
"epoch": 2.99,
"learning_rate": 1.5111118352122183e-05,
"loss": 2.2882,
"step": 2762
},
{
"epoch": 2.99,
"learning_rate": 1.5049600353227588e-05,
"loss": 2.4456,
"step": 2764
},
{
"epoch": 3.0,
"learning_rate": 1.4988185639275798e-05,
"loss": 2.3367,
"step": 2766
},
{
"epoch": 3.0,
"learning_rate": 1.4926874391758716e-05,
"loss": 2.3341,
"step": 2768
},
{
"epoch": 3.0,
"learning_rate": 1.4865666791862521e-05,
"loss": 2.7886,
"step": 2770
},
{
"epoch": 3.0,
"learning_rate": 1.4804563020467044e-05,
"loss": 2.6835,
"step": 2772
},
{
"epoch": 3.01,
"learning_rate": 1.4743563258145353e-05,
"loss": 2.3864,
"step": 2774
},
{
"epoch": 3.01,
"learning_rate": 1.4682667685163071e-05,
"loss": 2.3261,
"step": 2776
},
{
"epoch": 3.01,
"learning_rate": 1.4621876481477987e-05,
"loss": 2.4467,
"step": 2778
},
{
"epoch": 3.01,
"learning_rate": 1.4561189826739446e-05,
"loss": 2.3331,
"step": 2780
},
{
"epoch": 3.01,
"learning_rate": 1.45006079002878e-05,
"loss": 2.2346,
"step": 2782
},
{
"epoch": 3.02,
"learning_rate": 1.4440130881153917e-05,
"loss": 2.3942,
"step": 2784
},
{
"epoch": 3.02,
"learning_rate": 1.437975894805867e-05,
"loss": 2.441,
"step": 2786
},
{
"epoch": 3.02,
"learning_rate": 1.4319492279412388e-05,
"loss": 2.3998,
"step": 2788
},
{
"epoch": 3.02,
"learning_rate": 1.425933105331429e-05,
"loss": 2.518,
"step": 2790
},
{
"epoch": 3.02,
"learning_rate": 1.419927544755199e-05,
"loss": 2.3147,
"step": 2792
},
{
"epoch": 3.03,
"learning_rate": 1.4139325639601015e-05,
"loss": 2.2925,
"step": 2794
},
{
"epoch": 3.03,
"learning_rate": 1.4079481806624217e-05,
"loss": 2.5182,
"step": 2796
},
{
"epoch": 3.03,
"learning_rate": 1.4019744125471274e-05,
"loss": 2.4969,
"step": 2798
},
{
"epoch": 3.03,
"learning_rate": 1.3960112772678125e-05,
"loss": 2.4316,
"step": 2800
},
{
"epoch": 3.04,
"learning_rate": 1.3900587924466585e-05,
"loss": 2.3239,
"step": 2802
},
{
"epoch": 3.04,
"learning_rate": 1.3841169756743649e-05,
"loss": 2.6349,
"step": 2804
},
{
"epoch": 3.04,
"learning_rate": 1.378185844510107e-05,
"loss": 2.3982,
"step": 2806
},
{
"epoch": 3.04,
"learning_rate": 1.3722654164814796e-05,
"loss": 2.4663,
"step": 2808
},
{
"epoch": 3.04,
"learning_rate": 1.366355709084456e-05,
"loss": 2.4762,
"step": 2810
},
{
"epoch": 3.05,
"learning_rate": 1.3604567397833201e-05,
"loss": 2.4103,
"step": 2812
},
{
"epoch": 3.05,
"learning_rate": 1.354568526010624e-05,
"loss": 2.4714,
"step": 2814
},
{
"epoch": 3.05,
"learning_rate": 1.3486910851671374e-05,
"loss": 2.1582,
"step": 2816
},
{
"epoch": 3.05,
"learning_rate": 1.342824434621795e-05,
"loss": 2.3474,
"step": 2818
},
{
"epoch": 3.06,
"learning_rate": 1.3369685917116408e-05,
"loss": 2.3022,
"step": 2820
},
{
"epoch": 3.06,
"learning_rate": 1.3311235737417793e-05,
"loss": 2.2013,
"step": 2822
},
{
"epoch": 3.06,
"learning_rate": 1.3252893979853304e-05,
"loss": 2.5426,
"step": 2824
},
{
"epoch": 3.06,
"learning_rate": 1.319466081683371e-05,
"loss": 2.3739,
"step": 2826
},
{
"epoch": 3.06,
"learning_rate": 1.3136536420448841e-05,
"loss": 2.3773,
"step": 2828
},
{
"epoch": 3.07,
"learning_rate": 1.307852096246711e-05,
"loss": 2.4481,
"step": 2830
},
{
"epoch": 3.07,
"learning_rate": 1.302061461433502e-05,
"loss": 2.5957,
"step": 2832
},
{
"epoch": 3.07,
"learning_rate": 1.2962817547176625e-05,
"loss": 2.5113,
"step": 2834
},
{
"epoch": 3.07,
"learning_rate": 1.2905129931793009e-05,
"loss": 2.3745,
"step": 2836
},
{
"epoch": 3.07,
"learning_rate": 1.2847551938661839e-05,
"loss": 2.3667,
"step": 2838
},
{
"epoch": 3.08,
"learning_rate": 1.2790083737936798e-05,
"loss": 2.3051,
"step": 2840
},
{
"epoch": 3.08,
"learning_rate": 1.2732725499447146e-05,
"loss": 2.2803,
"step": 2842
},
{
"epoch": 3.08,
"learning_rate": 1.2675477392697139e-05,
"loss": 2.3317,
"step": 2844
},
{
"epoch": 3.08,
"learning_rate": 1.2618339586865625e-05,
"loss": 2.5069,
"step": 2846
},
{
"epoch": 3.09,
"learning_rate": 1.2561312250805435e-05,
"loss": 2.403,
"step": 2848
},
{
"epoch": 3.09,
"learning_rate": 1.2504395553043008e-05,
"loss": 2.3479,
"step": 2850
},
{
"epoch": 3.09,
"learning_rate": 1.2447589661777759e-05,
"loss": 2.2771,
"step": 2852
},
{
"epoch": 3.09,
"learning_rate": 1.239089474488171e-05,
"loss": 2.3368,
"step": 2854
},
{
"epoch": 3.09,
"learning_rate": 1.2334310969898871e-05,
"loss": 2.3194,
"step": 2856
},
{
"epoch": 3.1,
"learning_rate": 1.227783850404487e-05,
"loss": 2.3038,
"step": 2858
},
{
"epoch": 3.1,
"learning_rate": 1.2221477514206337e-05,
"loss": 2.5084,
"step": 2860
},
{
"epoch": 3.1,
"learning_rate": 1.216522816694053e-05,
"loss": 2.2501,
"step": 2862
},
{
"epoch": 3.1,
"learning_rate": 1.2109090628474718e-05,
"loss": 2.2798,
"step": 2864
},
{
"epoch": 3.11,
"learning_rate": 1.2053065064705805e-05,
"loss": 2.2456,
"step": 2866
},
{
"epoch": 3.11,
"learning_rate": 1.1997151641199772e-05,
"loss": 2.4106,
"step": 2868
},
{
"epoch": 3.11,
"learning_rate": 1.1941350523191208e-05,
"loss": 2.4705,
"step": 2870
},
{
"epoch": 3.11,
"learning_rate": 1.1885661875582783e-05,
"loss": 2.5891,
"step": 2872
},
{
"epoch": 3.11,
"learning_rate": 1.183008586294485e-05,
"loss": 2.2367,
"step": 2874
},
{
"epoch": 3.12,
"learning_rate": 1.1774622649514889e-05,
"loss": 2.5675,
"step": 2876
},
{
"epoch": 3.12,
"learning_rate": 1.1719272399197023e-05,
"loss": 2.4596,
"step": 2878
},
{
"epoch": 3.12,
"learning_rate": 1.166403527556153e-05,
"loss": 2.2995,
"step": 2880
},
{
"epoch": 3.12,
"learning_rate": 1.1608911441844429e-05,
"loss": 2.2225,
"step": 2882
},
{
"epoch": 3.12,
"learning_rate": 1.155390106094692e-05,
"loss": 2.2498,
"step": 2884
},
{
"epoch": 3.13,
"learning_rate": 1.1499004295434918e-05,
"loss": 2.3428,
"step": 2886
},
{
"epoch": 3.13,
"learning_rate": 1.1444221307538571e-05,
"loss": 2.3654,
"step": 2888
},
{
"epoch": 3.13,
"learning_rate": 1.1389552259151864e-05,
"loss": 2.3089,
"step": 2890
},
{
"epoch": 3.13,
"learning_rate": 1.1334997311832002e-05,
"loss": 2.3778,
"step": 2892
},
{
"epoch": 3.14,
"learning_rate": 1.1280556626799005e-05,
"loss": 2.3831,
"step": 2894
},
{
"epoch": 3.14,
"learning_rate": 1.1226230364935226e-05,
"loss": 2.4711,
"step": 2896
},
{
"epoch": 3.14,
"learning_rate": 1.1172018686784935e-05,
"loss": 2.5057,
"step": 2898
},
{
"epoch": 3.14,
"learning_rate": 1.1117921752553723e-05,
"loss": 2.3913,
"step": 2900
},
{
"epoch": 3.14,
"learning_rate": 1.106393972210809e-05,
"loss": 2.5023,
"step": 2902
},
{
"epoch": 3.15,
"learning_rate": 1.1010072754975014e-05,
"loss": 2.3522,
"step": 2904
},
{
"epoch": 3.15,
"learning_rate": 1.095632101034143e-05,
"loss": 2.6258,
"step": 2906
},
{
"epoch": 3.15,
"learning_rate": 1.0902684647053735e-05,
"loss": 2.3644,
"step": 2908
},
{
"epoch": 3.15,
"learning_rate": 1.0849163823617375e-05,
"loss": 2.4708,
"step": 2910
},
{
"epoch": 3.15,
"learning_rate": 1.0795758698196368e-05,
"loss": 2.2643,
"step": 2912
},
{
"epoch": 3.16,
"learning_rate": 1.0742469428612816e-05,
"loss": 2.4429,
"step": 2914
},
{
"epoch": 3.16,
"learning_rate": 1.0689296172346431e-05,
"loss": 2.2625,
"step": 2916
},
{
"epoch": 3.16,
"learning_rate": 1.0636239086534072e-05,
"loss": 2.3484,
"step": 2918
},
{
"epoch": 3.16,
"learning_rate": 1.0583298327969338e-05,
"loss": 2.4041,
"step": 2920
},
{
"epoch": 3.17,
"learning_rate": 1.0530474053102034e-05,
"loss": 2.1622,
"step": 2922
},
{
"epoch": 3.17,
"learning_rate": 1.047776641803772e-05,
"loss": 2.417,
"step": 2924
},
{
"epoch": 3.17,
"learning_rate": 1.0425175578537299e-05,
"loss": 2.3336,
"step": 2926
},
{
"epoch": 3.17,
"learning_rate": 1.0372701690016474e-05,
"loss": 2.4013,
"step": 2928
},
{
"epoch": 3.17,
"learning_rate": 1.0320344907545388e-05,
"loss": 2.2072,
"step": 2930
},
{
"epoch": 3.18,
"learning_rate": 1.0268105385848064e-05,
"loss": 2.5827,
"step": 2932
},
{
"epoch": 3.18,
"learning_rate": 1.0215983279302049e-05,
"loss": 2.5077,
"step": 2934
},
{
"epoch": 3.18,
"learning_rate": 1.0163978741937847e-05,
"loss": 2.512,
"step": 2936
},
{
"epoch": 3.18,
"learning_rate": 1.0112091927438583e-05,
"loss": 2.457,
"step": 2938
},
{
"epoch": 3.19,
"learning_rate": 1.0060322989139442e-05,
"loss": 2.5785,
"step": 2940
},
{
"epoch": 3.19,
"learning_rate": 1.0008672080027298e-05,
"loss": 2.4435,
"step": 2942
},
{
"epoch": 3.19,
"learning_rate": 9.957139352740191e-06,
"loss": 2.2668,
"step": 2944
},
{
"epoch": 3.19,
"learning_rate": 9.90572495956696e-06,
"loss": 2.4811,
"step": 2946
},
{
"epoch": 3.19,
"learning_rate": 9.854429052446684e-06,
"loss": 2.4208,
"step": 2948
},
{
"epoch": 3.2,
"learning_rate": 9.803251782968358e-06,
"loss": 2.4254,
"step": 2950
},
{
"epoch": 3.2,
"learning_rate": 9.752193302370315e-06,
"loss": 2.3277,
"step": 2952
},
{
"epoch": 3.2,
"learning_rate": 9.701253761539897e-06,
"loss": 2.2717,
"step": 2954
},
{
"epoch": 3.2,
"learning_rate": 9.650433311012946e-06,
"loss": 2.4127,
"step": 2956
},
{
"epoch": 3.2,
"learning_rate": 9.599732100973357e-06,
"loss": 2.6305,
"step": 2958
},
{
"epoch": 3.21,
"learning_rate": 9.549150281252633e-06,
"loss": 2.3212,
"step": 2960
},
{
"epoch": 3.21,
"learning_rate": 9.498688001329486e-06,
"loss": 2.3556,
"step": 2962
},
{
"epoch": 3.21,
"learning_rate": 9.448345410329379e-06,
"loss": 2.3342,
"step": 2964
},
{
"epoch": 3.21,
"learning_rate": 9.398122657024022e-06,
"loss": 2.454,
"step": 2966
},
{
"epoch": 3.22,
"learning_rate": 9.348019889831006e-06,
"loss": 2.6068,
"step": 2968
},
{
"epoch": 3.22,
"learning_rate": 9.298037256813347e-06,
"loss": 2.6167,
"step": 2970
},
{
"epoch": 3.22,
"learning_rate": 9.248174905679058e-06,
"loss": 2.3684,
"step": 2972
},
{
"epoch": 3.22,
"learning_rate": 9.198432983780658e-06,
"loss": 2.5119,
"step": 2974
},
{
"epoch": 3.22,
"learning_rate": 9.14881163811479e-06,
"loss": 2.3928,
"step": 2976
},
{
"epoch": 3.23,
"learning_rate": 9.099311015321782e-06,
"loss": 2.3656,
"step": 2978
},
{
"epoch": 3.23,
"learning_rate": 9.049931261685207e-06,
"loss": 2.3461,
"step": 2980
},
{
"epoch": 3.23,
"learning_rate": 9.000672523131431e-06,
"loss": 2.3134,
"step": 2982
},
{
"epoch": 3.23,
"learning_rate": 8.951534945229172e-06,
"loss": 2.3456,
"step": 2984
},
{
"epoch": 3.23,
"learning_rate": 8.902518673189192e-06,
"loss": 2.3656,
"step": 2986
},
{
"epoch": 3.24,
"learning_rate": 8.853623851863663e-06,
"loss": 2.3751,
"step": 2988
},
{
"epoch": 3.24,
"learning_rate": 8.804850625745897e-06,
"loss": 2.5872,
"step": 2990
},
{
"epoch": 3.24,
"learning_rate": 8.756199138969866e-06,
"loss": 2.2217,
"step": 2992
},
{
"epoch": 3.24,
"learning_rate": 8.707669535309793e-06,
"loss": 2.0714,
"step": 2994
},
{
"epoch": 3.25,
"learning_rate": 8.659261958179688e-06,
"loss": 2.5951,
"step": 2996
},
{
"epoch": 3.25,
"learning_rate": 8.610976550632943e-06,
"loss": 2.3067,
"step": 2998
},
{
"epoch": 3.25,
"learning_rate": 8.562813455361957e-06,
"loss": 2.3471,
"step": 3000
},
{
"epoch": 3.25,
"learning_rate": 8.514772814697653e-06,
"loss": 2.4585,
"step": 3002
},
{
"epoch": 3.25,
"learning_rate": 8.466854770609062e-06,
"loss": 2.199,
"step": 3004
},
{
"epoch": 3.26,
"learning_rate": 8.419059464702927e-06,
"loss": 2.2591,
"step": 3006
},
{
"epoch": 3.26,
"learning_rate": 8.371387038223289e-06,
"loss": 2.3367,
"step": 3008
},
{
"epoch": 3.26,
"learning_rate": 8.323837632051062e-06,
"loss": 2.5848,
"step": 3010
},
{
"epoch": 3.26,
"learning_rate": 8.27641138670358e-06,
"loss": 2.2525,
"step": 3012
},
{
"epoch": 3.27,
"learning_rate": 8.229108442334255e-06,
"loss": 2.7048,
"step": 3014
},
{
"epoch": 3.27,
"learning_rate": 8.18192893873208e-06,
"loss": 2.4397,
"step": 3016
},
{
"epoch": 3.27,
"learning_rate": 8.134873015321303e-06,
"loss": 2.3919,
"step": 3018
},
{
"epoch": 3.27,
"learning_rate": 8.087940811160916e-06,
"loss": 2.3169,
"step": 3020
},
{
"epoch": 3.27,
"learning_rate": 8.041132464944351e-06,
"loss": 2.4048,
"step": 3022
},
{
"epoch": 3.28,
"learning_rate": 7.994448114998975e-06,
"loss": 2.5458,
"step": 3024
},
{
"epoch": 3.28,
"learning_rate": 7.947887899285761e-06,
"loss": 2.2902,
"step": 3026
},
{
"epoch": 3.28,
"learning_rate": 7.901451955398792e-06,
"loss": 2.4315,
"step": 3028
},
{
"epoch": 3.28,
"learning_rate": 7.855140420564965e-06,
"loss": 2.5107,
"step": 3030
},
{
"epoch": 3.28,
"learning_rate": 7.808953431643467e-06,
"loss": 2.3578,
"step": 3032
},
{
"epoch": 3.29,
"learning_rate": 7.762891125125476e-06,
"loss": 2.3267,
"step": 3034
},
{
"epoch": 3.29,
"learning_rate": 7.716953637133677e-06,
"loss": 2.3038,
"step": 3036
},
{
"epoch": 3.29,
"learning_rate": 7.671141103421919e-06,
"loss": 2.2405,
"step": 3038
},
{
"epoch": 3.29,
"learning_rate": 7.625453659374754e-06,
"loss": 2.2669,
"step": 3040
},
{
"epoch": 3.3,
"learning_rate": 7.579891440007103e-06,
"loss": 2.4222,
"step": 3042
},
{
"epoch": 3.3,
"learning_rate": 7.534454579963829e-06,
"loss": 2.4086,
"step": 3044
},
{
"epoch": 3.3,
"learning_rate": 7.489143213519301e-06,
"loss": 2.3461,
"step": 3046
},
{
"epoch": 3.3,
"learning_rate": 7.44395747457704e-06,
"loss": 2.224,
"step": 3048
},
{
"epoch": 3.3,
"learning_rate": 7.398897496669338e-06,
"loss": 2.316,
"step": 3050
},
{
"epoch": 3.31,
"learning_rate": 7.353963412956838e-06,
"loss": 2.4673,
"step": 3052
},
{
"epoch": 3.31,
"learning_rate": 7.309155356228109e-06,
"loss": 2.3921,
"step": 3054
},
{
"epoch": 3.31,
"learning_rate": 7.264473458899301e-06,
"loss": 2.3709,
"step": 3056
},
{
"epoch": 3.31,
"learning_rate": 7.219917853013764e-06,
"loss": 2.5216,
"step": 3058
},
{
"epoch": 3.32,
"learning_rate": 7.175488670241609e-06,
"loss": 2.4435,
"step": 3060
},
{
"epoch": 3.32,
"learning_rate": 7.131186041879357e-06,
"loss": 2.4123,
"step": 3062
},
{
"epoch": 3.32,
"learning_rate": 7.0870100988495004e-06,
"loss": 2.2985,
"step": 3064
},
{
"epoch": 3.32,
"learning_rate": 7.0429609717002076e-06,
"loss": 2.4648,
"step": 3066
},
{
"epoch": 3.32,
"learning_rate": 6.999038790604856e-06,
"loss": 2.4027,
"step": 3068
},
{
"epoch": 3.33,
"learning_rate": 6.955243685361673e-06,
"loss": 2.5828,
"step": 3070
},
{
"epoch": 3.33,
"learning_rate": 6.911575785393326e-06,
"loss": 2.2331,
"step": 3072
},
{
"epoch": 3.33,
"learning_rate": 6.868035219746638e-06,
"loss": 2.3046,
"step": 3074
},
{
"epoch": 3.33,
"learning_rate": 6.824622117092078e-06,
"loss": 2.3877,
"step": 3076
},
{
"epoch": 3.33,
"learning_rate": 6.781336605723432e-06,
"loss": 2.307,
"step": 3078
},
{
"epoch": 3.34,
"learning_rate": 6.738178813557472e-06,
"loss": 2.4418,
"step": 3080
},
{
"epoch": 3.34,
"learning_rate": 6.695148868133516e-06,
"loss": 2.3749,
"step": 3082
},
{
"epoch": 3.34,
"learning_rate": 6.652246896613068e-06,
"loss": 2.4227,
"step": 3084
},
{
"epoch": 3.34,
"learning_rate": 6.609473025779434e-06,
"loss": 2.5151,
"step": 3086
},
{
"epoch": 3.35,
"learning_rate": 6.566827382037383e-06,
"loss": 2.4882,
"step": 3088
},
{
"epoch": 3.35,
"learning_rate": 6.524310091412739e-06,
"loss": 2.3111,
"step": 3090
},
{
"epoch": 3.35,
"learning_rate": 6.481921279552023e-06,
"loss": 2.3321,
"step": 3092
},
{
"epoch": 3.35,
"learning_rate": 6.439661071722048e-06,
"loss": 2.2051,
"step": 3094
},
{
"epoch": 3.35,
"learning_rate": 6.397529592809614e-06,
"loss": 2.3448,
"step": 3096
},
{
"epoch": 3.36,
"learning_rate": 6.355526967321112e-06,
"loss": 2.4095,
"step": 3098
},
{
"epoch": 3.36,
"learning_rate": 6.313653319382107e-06,
"loss": 2.1535,
"step": 3100
},
{
"epoch": 3.36,
"learning_rate": 6.271908772737017e-06,
"loss": 2.3662,
"step": 3102
},
{
"epoch": 3.36,
"learning_rate": 6.2302934507487755e-06,
"loss": 2.1468,
"step": 3104
},
{
"epoch": 3.36,
"learning_rate": 6.188807476398412e-06,
"loss": 2.4795,
"step": 3106
},
{
"epoch": 3.37,
"learning_rate": 6.147450972284696e-06,
"loss": 2.353,
"step": 3108
},
{
"epoch": 3.37,
"learning_rate": 6.106224060623822e-06,
"loss": 2.3579,
"step": 3110
},
{
"epoch": 3.37,
"learning_rate": 6.065126863248976e-06,
"loss": 2.3739,
"step": 3112
},
{
"epoch": 3.37,
"learning_rate": 6.0241595016100545e-06,
"loss": 2.3866,
"step": 3114
},
{
"epoch": 3.38,
"learning_rate": 5.98332209677322e-06,
"loss": 2.2253,
"step": 3116
},
{
"epoch": 3.38,
"learning_rate": 5.942614769420629e-06,
"loss": 2.3605,
"step": 3118
},
{
"epoch": 3.38,
"learning_rate": 5.902037639850011e-06,
"loss": 2.3848,
"step": 3120
},
{
"epoch": 3.38,
"learning_rate": 5.86159082797435e-06,
"loss": 2.1943,
"step": 3122
},
{
"epoch": 3.38,
"learning_rate": 5.8212744533215016e-06,
"loss": 2.3193,
"step": 3124
},
{
"epoch": 3.39,
"learning_rate": 5.781088635033882e-06,
"loss": 2.5142,
"step": 3126
},
{
"epoch": 3.39,
"learning_rate": 5.741033491868047e-06,
"loss": 2.5701,
"step": 3128
},
{
"epoch": 3.39,
"learning_rate": 5.701109142194422e-06,
"loss": 2.5069,
"step": 3130
},
{
"epoch": 3.39,
"learning_rate": 5.6613157039969055e-06,
"loss": 2.4168,
"step": 3132
},
{
"epoch": 3.4,
"learning_rate": 5.621653294872514e-06,
"loss": 2.4338,
"step": 3134
},
{
"epoch": 3.4,
"learning_rate": 5.582122032031051e-06,
"loss": 2.4563,
"step": 3136
},
{
"epoch": 3.4,
"learning_rate": 5.542722032294761e-06,
"loss": 2.2138,
"step": 3138
},
{
"epoch": 3.4,
"learning_rate": 5.503453412098003e-06,
"loss": 2.6032,
"step": 3140
},
{
"epoch": 3.4,
"learning_rate": 5.464316287486859e-06,
"loss": 2.3332,
"step": 3142
},
{
"epoch": 3.41,
"learning_rate": 5.425310774118802e-06,
"loss": 2.3154,
"step": 3144
},
{
"epoch": 3.41,
"learning_rate": 5.386436987262416e-06,
"loss": 2.6818,
"step": 3146
},
{
"epoch": 3.41,
"learning_rate": 5.347695041796985e-06,
"loss": 2.2799,
"step": 3148
},
{
"epoch": 3.41,
"learning_rate": 5.309085052212165e-06,
"loss": 2.5646,
"step": 3150
},
{
"epoch": 3.41,
"learning_rate": 5.270607132607663e-06,
"loss": 2.3395,
"step": 3152
},
{
"epoch": 3.42,
"learning_rate": 5.232261396692911e-06,
"loss": 2.4606,
"step": 3154
},
{
"epoch": 3.42,
"learning_rate": 5.194047957786713e-06,
"loss": 2.3552,
"step": 3156
},
{
"epoch": 3.42,
"learning_rate": 5.155966928816885e-06,
"loss": 2.5682,
"step": 3158
},
{
"epoch": 3.42,
"learning_rate": 5.118018422319948e-06,
"loss": 2.4571,
"step": 3160
},
{
"epoch": 3.43,
"learning_rate": 5.080202550440849e-06,
"loss": 2.24,
"step": 3162
},
{
"epoch": 3.43,
"learning_rate": 5.042519424932513e-06,
"loss": 2.5308,
"step": 3164
},
{
"epoch": 3.43,
"learning_rate": 5.0049691571555925e-06,
"loss": 2.4177,
"step": 3166
},
{
"epoch": 3.43,
"learning_rate": 4.967551858078129e-06,
"loss": 2.6177,
"step": 3168
},
{
"epoch": 3.43,
"learning_rate": 4.930267638275221e-06,
"loss": 2.505,
"step": 3170
},
{
"epoch": 3.44,
"learning_rate": 4.893116607928677e-06,
"loss": 2.5166,
"step": 3172
},
{
"epoch": 3.44,
"learning_rate": 4.856098876826709e-06,
"loss": 2.0793,
"step": 3174
},
{
"epoch": 3.44,
"learning_rate": 4.819214554363616e-06,
"loss": 2.4421,
"step": 3176
},
{
"epoch": 3.44,
"learning_rate": 4.782463749539446e-06,
"loss": 2.3317,
"step": 3178
},
{
"epoch": 3.45,
"learning_rate": 4.745846570959672e-06,
"loss": 2.4747,
"step": 3180
},
{
"epoch": 3.45,
"learning_rate": 4.70936312683487e-06,
"loss": 2.2323,
"step": 3182
},
{
"epoch": 3.45,
"learning_rate": 4.673013524980424e-06,
"loss": 2.3297,
"step": 3184
},
{
"epoch": 3.45,
"learning_rate": 4.63679787281619e-06,
"loss": 2.5994,
"step": 3186
},
{
"epoch": 3.45,
"learning_rate": 4.6007162773661515e-06,
"loss": 2.2933,
"step": 3188
},
{
"epoch": 3.46,
"learning_rate": 4.564768845258139e-06,
"loss": 2.4649,
"step": 3190
},
{
"epoch": 3.46,
"learning_rate": 4.528955682723529e-06,
"loss": 2.3754,
"step": 3192
},
{
"epoch": 3.46,
"learning_rate": 4.4932768955968876e-06,
"loss": 2.6034,
"step": 3194
},
{
"epoch": 3.46,
"learning_rate": 4.4577325893156715e-06,
"loss": 2.5477,
"step": 3196
},
{
"epoch": 3.46,
"learning_rate": 4.422322868919937e-06,
"loss": 2.3983,
"step": 3198
},
{
"epoch": 3.47,
"learning_rate": 4.3870478390519884e-06,
"loss": 2.3261,
"step": 3200
},
{
"epoch": 3.47,
"learning_rate": 4.3519076039561345e-06,
"loss": 2.4168,
"step": 3202
},
{
"epoch": 3.47,
"learning_rate": 4.316902267478296e-06,
"loss": 2.4235,
"step": 3204
},
{
"epoch": 3.47,
"learning_rate": 4.2820319330657835e-06,
"loss": 2.2992,
"step": 3206
},
{
"epoch": 3.48,
"learning_rate": 4.2472967037669066e-06,
"loss": 2.4394,
"step": 3208
},
{
"epoch": 3.48,
"learning_rate": 4.2126966822307715e-06,
"loss": 2.374,
"step": 3210
},
{
"epoch": 3.48,
"learning_rate": 4.178231970706858e-06,
"loss": 2.4277,
"step": 3212
},
{
"epoch": 3.48,
"learning_rate": 4.1439026710448355e-06,
"loss": 2.4958,
"step": 3214
},
{
"epoch": 3.48,
"learning_rate": 4.109708884694158e-06,
"loss": 2.3339,
"step": 3216
},
{
"epoch": 3.49,
"learning_rate": 4.075650712703849e-06,
"loss": 2.3244,
"step": 3218
},
{
"epoch": 3.49,
"learning_rate": 4.041728255722154e-06,
"loss": 2.4202,
"step": 3220
},
{
"epoch": 3.49,
"learning_rate": 4.0079416139962525e-06,
"loss": 2.4348,
"step": 3222
},
{
"epoch": 3.49,
"learning_rate": 3.974290887371951e-06,
"loss": 2.5305,
"step": 3224
},
{
"epoch": 3.49,
"learning_rate": 3.940776175293431e-06,
"loss": 2.3909,
"step": 3226
},
{
"epoch": 3.5,
"learning_rate": 3.9073975768029124e-06,
"loss": 2.4669,
"step": 3228
},
{
"epoch": 3.5,
"learning_rate": 3.8741551905403735e-06,
"loss": 2.5117,
"step": 3230
},
{
"epoch": 3.5,
"learning_rate": 3.8410491147432395e-06,
"loss": 2.3205,
"step": 3232
},
{
"epoch": 3.5,
"learning_rate": 3.808079447246149e-06,
"loss": 2.5002,
"step": 3234
},
{
"epoch": 3.51,
"learning_rate": 3.7752462854806213e-06,
"loss": 2.5181,
"step": 3236
},
{
"epoch": 3.51,
"learning_rate": 3.7425497264747534e-06,
"loss": 2.5206,
"step": 3238
},
{
"epoch": 3.51,
"learning_rate": 3.7099898668529642e-06,
"loss": 2.4898,
"step": 3240
},
{
"epoch": 3.51,
"learning_rate": 3.677566802835708e-06,
"loss": 2.4225,
"step": 3242
},
{
"epoch": 3.51,
"learning_rate": 3.6452806302392007e-06,
"loss": 2.3201,
"step": 3244
},
{
"epoch": 3.52,
"learning_rate": 3.6131314444750765e-06,
"loss": 2.4289,
"step": 3246
},
{
"epoch": 3.52,
"learning_rate": 3.58111934055016e-06,
"loss": 2.2184,
"step": 3248
},
{
"epoch": 3.52,
"learning_rate": 3.5492444130662108e-06,
"loss": 2.5492,
"step": 3250
},
{
"epoch": 3.52,
"learning_rate": 3.517506756219563e-06,
"loss": 2.5086,
"step": 3252
},
{
"epoch": 3.53,
"learning_rate": 3.4859064638009033e-06,
"loss": 2.4952,
"step": 3254
},
{
"epoch": 3.53,
"learning_rate": 3.4544436291949867e-06,
"loss": 2.469,
"step": 3256
},
{
"epoch": 3.53,
"learning_rate": 3.4231183453803604e-06,
"loss": 2.3824,
"step": 3258
},
{
"epoch": 3.53,
"learning_rate": 3.391930704929064e-06,
"loss": 2.4994,
"step": 3260
},
{
"epoch": 3.53,
"learning_rate": 3.360880800006383e-06,
"loss": 2.545,
"step": 3262
},
{
"epoch": 3.54,
"learning_rate": 3.3299687223705745e-06,
"loss": 2.3291,
"step": 3264
},
{
"epoch": 3.54,
"learning_rate": 3.299194563372604e-06,
"loss": 2.5543,
"step": 3266
},
{
"epoch": 3.54,
"learning_rate": 3.2685584139558243e-06,
"loss": 2.3818,
"step": 3268
},
{
"epoch": 3.54,
"learning_rate": 3.238060364655765e-06,
"loss": 2.6038,
"step": 3270
},
{
"epoch": 3.54,
"learning_rate": 3.2077005055998533e-06,
"loss": 2.4691,
"step": 3272
},
{
"epoch": 3.55,
"learning_rate": 3.177478926507127e-06,
"loss": 2.4399,
"step": 3274
},
{
"epoch": 3.55,
"learning_rate": 3.1473957166879897e-06,
"loss": 2.5692,
"step": 3276
},
{
"epoch": 3.55,
"learning_rate": 3.117450965043911e-06,
"loss": 2.4183,
"step": 3278
},
{
"epoch": 3.55,
"learning_rate": 3.087644760067232e-06,
"loss": 2.4085,
"step": 3280
},
{
"epoch": 3.56,
"learning_rate": 3.0579771898408326e-06,
"loss": 2.2894,
"step": 3282
},
{
"epoch": 3.56,
"learning_rate": 3.0284483420379097e-06,
"loss": 2.2705,
"step": 3284
},
{
"epoch": 3.56,
"learning_rate": 2.9990583039217203e-06,
"loss": 2.2714,
"step": 3286
},
{
"epoch": 3.56,
"learning_rate": 2.9698071623452895e-06,
"loss": 2.366,
"step": 3288
},
{
"epoch": 3.56,
"learning_rate": 2.940695003751198e-06,
"loss": 2.3525,
"step": 3290
},
{
"epoch": 3.57,
"learning_rate": 2.9117219141712947e-06,
"loss": 2.3377,
"step": 3292
},
{
"epoch": 3.57,
"learning_rate": 2.8828879792264675e-06,
"loss": 2.1998,
"step": 3294
},
{
"epoch": 3.57,
"learning_rate": 2.854193284126344e-06,
"loss": 2.3437,
"step": 3296
},
{
"epoch": 3.57,
"learning_rate": 2.825637913669121e-06,
"loss": 2.2963,
"step": 3298
},
{
"epoch": 3.57,
"learning_rate": 2.797221952241219e-06,
"loss": 2.3955,
"step": 3300
},
{
"epoch": 3.58,
"learning_rate": 2.7689454838171147e-06,
"loss": 2.2326,
"step": 3302
},
{
"epoch": 3.58,
"learning_rate": 2.7408085919590264e-06,
"loss": 2.3897,
"step": 3304
},
{
"epoch": 3.58,
"learning_rate": 2.7128113598167137e-06,
"loss": 2.4245,
"step": 3306
},
{
"epoch": 3.58,
"learning_rate": 2.684953870127227e-06,
"loss": 2.488,
"step": 3308
},
{
"epoch": 3.59,
"learning_rate": 2.657236205214625e-06,
"loss": 2.3614,
"step": 3310
},
{
"epoch": 3.59,
"learning_rate": 2.6296584469897743e-06,
"loss": 2.1686,
"step": 3312
},
{
"epoch": 3.59,
"learning_rate": 2.6022206769500845e-06,
"loss": 2.6152,
"step": 3314
},
{
"epoch": 3.59,
"learning_rate": 2.574922976179295e-06,
"loss": 2.3362,
"step": 3316
},
{
"epoch": 3.59,
"learning_rate": 2.547765425347187e-06,
"loss": 2.382,
"step": 3318
},
{
"epoch": 3.6,
"learning_rate": 2.520748104709375e-06,
"loss": 2.4045,
"step": 3320
},
{
"epoch": 3.6,
"learning_rate": 2.493871094107081e-06,
"loss": 2.2771,
"step": 3322
},
{
"epoch": 3.6,
"learning_rate": 2.467134472966892e-06,
"loss": 2.3296,
"step": 3324
},
{
"epoch": 3.6,
"learning_rate": 2.4405383203004894e-06,
"loss": 2.3129,
"step": 3326
},
{
"epoch": 3.61,
"learning_rate": 2.414082714704463e-06,
"loss": 2.2268,
"step": 3328
},
{
"epoch": 3.61,
"learning_rate": 2.3877677343600524e-06,
"loss": 2.476,
"step": 3330
},
{
"epoch": 3.61,
"learning_rate": 2.36159345703294e-06,
"loss": 2.5804,
"step": 3332
},
{
"epoch": 3.61,
"learning_rate": 2.3355599600729915e-06,
"loss": 2.4219,
"step": 3334
},
{
"epoch": 3.61,
"learning_rate": 2.3096673204140108e-06,
"loss": 2.4168,
"step": 3336
},
{
"epoch": 3.62,
"learning_rate": 2.2839156145736174e-06,
"loss": 2.3116,
"step": 3338
},
{
"epoch": 3.62,
"learning_rate": 2.2583049186528704e-06,
"loss": 2.3238,
"step": 3340
},
{
"epoch": 3.62,
"learning_rate": 2.2328353083361562e-06,
"loss": 2.4897,
"step": 3342
},
{
"epoch": 3.62,
"learning_rate": 2.207506858890912e-06,
"loss": 2.307,
"step": 3344
},
{
"epoch": 3.62,
"learning_rate": 2.182319645167441e-06,
"loss": 2.3267,
"step": 3346
},
{
"epoch": 3.63,
"learning_rate": 2.1572737415986422e-06,
"loss": 2.424,
"step": 3348
},
{
"epoch": 3.63,
"learning_rate": 2.1323692221998257e-06,
"loss": 2.4612,
"step": 3350
},
{
"epoch": 3.63,
"learning_rate": 2.1076061605684818e-06,
"loss": 2.4219,
"step": 3352
},
{
"epoch": 3.63,
"learning_rate": 2.0829846298840884e-06,
"loss": 2.4251,
"step": 3354
},
{
"epoch": 3.64,
"learning_rate": 2.058504702907843e-06,
"loss": 2.5063,
"step": 3356
},
{
"epoch": 3.64,
"learning_rate": 2.0341664519824887e-06,
"loss": 2.5947,
"step": 3358
},
{
"epoch": 3.64,
"learning_rate": 2.009969949032098e-06,
"loss": 2.4399,
"step": 3360
},
{
"epoch": 3.64,
"learning_rate": 1.9859152655618498e-06,
"loss": 2.405,
"step": 3362
},
{
"epoch": 3.64,
"learning_rate": 1.962002472657809e-06,
"loss": 2.3689,
"step": 3364
},
{
"epoch": 3.65,
"learning_rate": 1.9382316409867264e-06,
"loss": 2.451,
"step": 3366
},
{
"epoch": 3.65,
"learning_rate": 1.9146028407958484e-06,
"loss": 2.3364,
"step": 3368
},
{
"epoch": 3.65,
"learning_rate": 1.8911161419126854e-06,
"loss": 2.3012,
"step": 3370
},
{
"epoch": 3.65,
"learning_rate": 1.8677716137447954e-06,
"loss": 2.3833,
"step": 3372
},
{
"epoch": 3.66,
"learning_rate": 1.844569325279627e-06,
"loss": 2.5041,
"step": 3374
},
{
"epoch": 3.66,
"learning_rate": 1.8215093450842435e-06,
"loss": 2.2862,
"step": 3376
},
{
"epoch": 3.66,
"learning_rate": 1.7985917413052055e-06,
"loss": 2.3316,
"step": 3378
},
{
"epoch": 3.66,
"learning_rate": 1.7758165816682826e-06,
"loss": 2.2273,
"step": 3380
},
{
"epoch": 3.66,
"learning_rate": 1.7531839334783306e-06,
"loss": 2.2901,
"step": 3382
},
{
"epoch": 3.67,
"learning_rate": 1.7306938636190262e-06,
"loss": 2.5521,
"step": 3384
},
{
"epoch": 3.67,
"learning_rate": 1.7083464385527325e-06,
"loss": 2.3896,
"step": 3386
},
{
"epoch": 3.67,
"learning_rate": 1.686141724320245e-06,
"loss": 2.2818,
"step": 3388
},
{
"epoch": 3.67,
"learning_rate": 1.6640797865406288e-06,
"loss": 2.2733,
"step": 3390
},
{
"epoch": 3.67,
"learning_rate": 1.6421606904110264e-06,
"loss": 2.4238,
"step": 3392
},
{
"epoch": 3.68,
"learning_rate": 1.6203845007064455e-06,
"loss": 2.6201,
"step": 3394
},
{
"epoch": 3.68,
"learning_rate": 1.5987512817795924e-06,
"loss": 2.422,
"step": 3396
},
{
"epoch": 3.68,
"learning_rate": 1.5772610975606561e-06,
"loss": 2.2933,
"step": 3398
},
{
"epoch": 3.68,
"learning_rate": 1.5559140115571246e-06,
"loss": 2.4014,
"step": 3400
},
{
"epoch": 3.69,
"learning_rate": 1.5347100868536246e-06,
"loss": 2.3193,
"step": 3402
},
{
"epoch": 3.69,
"learning_rate": 1.5136493861117097e-06,
"loss": 2.4959,
"step": 3404
},
{
"epoch": 3.69,
"learning_rate": 1.4927319715696607e-06,
"loss": 2.3566,
"step": 3406
},
{
"epoch": 3.69,
"learning_rate": 1.4719579050423427e-06,
"loss": 2.3291,
"step": 3408
},
{
"epoch": 3.69,
"learning_rate": 1.4513272479209917e-06,
"loss": 2.3138,
"step": 3410
},
{
"epoch": 3.7,
"learning_rate": 1.43084006117305e-06,
"loss": 2.2497,
"step": 3412
},
{
"epoch": 3.7,
"learning_rate": 1.41049640534196e-06,
"loss": 2.2461,
"step": 3414
},
{
"epoch": 3.7,
"learning_rate": 1.3902963405470148e-06,
"loss": 2.3886,
"step": 3416
},
{
"epoch": 3.7,
"learning_rate": 1.37023992648318e-06,
"loss": 2.2535,
"step": 3418
},
{
"epoch": 3.7,
"learning_rate": 1.3503272224208884e-06,
"loss": 2.3367,
"step": 3420
},
{
"epoch": 3.71,
"learning_rate": 1.3305582872058963e-06,
"loss": 2.4806,
"step": 3422
},
{
"epoch": 3.71,
"learning_rate": 1.3109331792590773e-06,
"loss": 2.4335,
"step": 3424
},
{
"epoch": 3.71,
"learning_rate": 1.2914519565763062e-06,
"loss": 2.4195,
"step": 3426
},
{
"epoch": 3.71,
"learning_rate": 1.2721146767282033e-06,
"loss": 2.4332,
"step": 3428
},
{
"epoch": 3.72,
"learning_rate": 1.2529213968600406e-06,
"loss": 2.2733,
"step": 3430
},
{
"epoch": 3.72,
"learning_rate": 1.233872173691536e-06,
"loss": 2.3522,
"step": 3432
},
{
"epoch": 3.72,
"learning_rate": 1.2149670635166976e-06,
"loss": 2.5166,
"step": 3434
},
{
"epoch": 3.72,
"learning_rate": 1.196206122203647e-06,
"loss": 2.4279,
"step": 3436
},
{
"epoch": 3.72,
"learning_rate": 1.1775894051944514e-06,
"loss": 2.2575,
"step": 3438
},
{
"epoch": 3.73,
"learning_rate": 1.1591169675049863e-06,
"loss": 2.514,
"step": 3440
},
{
"epoch": 3.73,
"learning_rate": 1.140788863724751e-06,
"loss": 2.4809,
"step": 3442
},
{
"epoch": 3.73,
"learning_rate": 1.1226051480167032e-06,
"loss": 2.444,
"step": 3444
},
{
"epoch": 3.73,
"learning_rate": 1.1045658741171028e-06,
"loss": 2.3813,
"step": 3446
},
{
"epoch": 3.74,
"learning_rate": 1.0866710953353731e-06,
"loss": 2.3445,
"step": 3448
},
{
"epoch": 3.74,
"learning_rate": 1.068920864553924e-06,
"loss": 2.4029,
"step": 3450
},
{
"epoch": 3.74,
"learning_rate": 1.0513152342279842e-06,
"loss": 2.4019,
"step": 3452
},
{
"epoch": 3.74,
"learning_rate": 1.0338542563854748e-06,
"loss": 2.3407,
"step": 3454
},
{
"epoch": 3.74,
"learning_rate": 1.0165379826268417e-06,
"loss": 2.414,
"step": 3456
},
{
"epoch": 3.75,
"learning_rate": 9.993664641249012e-07,
"loss": 2.4748,
"step": 3458
},
{
"epoch": 3.75,
"learning_rate": 9.823397516246834e-07,
"loss": 2.2681,
"step": 3460
},
{
"epoch": 3.75,
"learning_rate": 9.654578954433059e-07,
"loss": 2.3164,
"step": 3462
},
{
"epoch": 3.75,
"learning_rate": 9.487209454697887e-07,
"loss": 2.382,
"step": 3464
},
{
"epoch": 3.75,
"learning_rate": 9.321289511649456e-07,
"loss": 2.3299,
"step": 3466
},
{
"epoch": 3.76,
"learning_rate": 9.156819615612044e-07,
"loss": 2.3526,
"step": 3468
},
{
"epoch": 3.76,
"learning_rate": 8.993800252624862e-07,
"loss": 2.4167,
"step": 3470
},
{
"epoch": 3.76,
"learning_rate": 8.832231904440491e-07,
"loss": 2.5703,
"step": 3472
},
{
"epoch": 3.76,
"learning_rate": 8.672115048523554e-07,
"loss": 2.5794,
"step": 3474
},
{
"epoch": 3.77,
"learning_rate": 8.513450158049108e-07,
"loss": 2.3276,
"step": 3476
},
{
"epoch": 3.77,
"learning_rate": 8.356237701901582e-07,
"loss": 2.3394,
"step": 3478
},
{
"epoch": 3.77,
"learning_rate": 8.200478144672952e-07,
"loss": 2.3505,
"step": 3480
},
{
"epoch": 3.77,
"learning_rate": 8.046171946661796e-07,
"loss": 2.4532,
"step": 3482
},
{
"epoch": 3.77,
"learning_rate": 7.893319563871682e-07,
"loss": 2.513,
"step": 3484
},
{
"epoch": 3.78,
"learning_rate": 7.741921448009837e-07,
"loss": 2.4655,
"step": 3486
},
{
"epoch": 3.78,
"learning_rate": 7.591978046485926e-07,
"loss": 2.605,
"step": 3488
},
{
"epoch": 3.78,
"learning_rate": 7.443489802410663e-07,
"loss": 2.4451,
"step": 3490
},
{
"epoch": 3.78,
"learning_rate": 7.296457154594482e-07,
"loss": 2.5196,
"step": 3492
},
{
"epoch": 3.79,
"learning_rate": 7.150880537546201e-07,
"loss": 2.2368,
"step": 3494
},
{
"epoch": 3.79,
"learning_rate": 7.006760381471856e-07,
"loss": 2.4034,
"step": 3496
},
{
"epoch": 3.79,
"learning_rate": 6.86409711227337e-07,
"loss": 2.5032,
"step": 3498
},
{
"epoch": 3.79,
"learning_rate": 6.722891151547284e-07,
"loss": 2.3998,
"step": 3500
},
{
"epoch": 3.79,
"learning_rate": 6.583142916583574e-07,
"loss": 2.4205,
"step": 3502
},
{
"epoch": 3.8,
"learning_rate": 6.444852820364222e-07,
"loss": 2.3034,
"step": 3504
},
{
"epoch": 3.8,
"learning_rate": 6.30802127156227e-07,
"loss": 2.3639,
"step": 3506
},
{
"epoch": 3.8,
"learning_rate": 6.172648674540426e-07,
"loss": 2.3491,
"step": 3508
},
{
"epoch": 3.8,
"learning_rate": 6.038735429349962e-07,
"loss": 2.4925,
"step": 3510
},
{
"epoch": 3.8,
"learning_rate": 5.90628193172943e-07,
"loss": 2.2371,
"step": 3512
},
{
"epoch": 3.81,
"learning_rate": 5.775288573103666e-07,
"loss": 2.3831,
"step": 3514
},
{
"epoch": 3.81,
"learning_rate": 5.645755740582404e-07,
"loss": 2.6221,
"step": 3516
},
{
"epoch": 3.81,
"learning_rate": 5.517683816959219e-07,
"loss": 2.4171,
"step": 3518
},
{
"epoch": 3.81,
"learning_rate": 5.391073180710638e-07,
"loss": 2.3518,
"step": 3520
},
{
"epoch": 3.82,
"learning_rate": 5.265924205994644e-07,
"loss": 2.4263,
"step": 3522
},
{
"epoch": 3.82,
"learning_rate": 5.14223726264973e-07,
"loss": 2.3707,
"step": 3524
},
{
"epoch": 3.82,
"learning_rate": 5.020012716193901e-07,
"loss": 2.2659,
"step": 3526
},
{
"epoch": 3.82,
"learning_rate": 4.899250927823396e-07,
"loss": 2.3573,
"step": 3528
},
{
"epoch": 3.82,
"learning_rate": 4.779952254411913e-07,
"loss": 2.2359,
"step": 3530
},
{
"epoch": 3.83,
"learning_rate": 4.662117048509218e-07,
"loss": 2.3461,
"step": 3532
},
{
"epoch": 3.83,
"learning_rate": 4.545745658340206e-07,
"loss": 2.4581,
"step": 3534
},
{
"epoch": 3.83,
"learning_rate": 4.4308384278041183e-07,
"loss": 2.4515,
"step": 3536
},
{
"epoch": 3.83,
"learning_rate": 4.317395696473214e-07,
"loss": 2.4953,
"step": 3538
},
{
"epoch": 3.83,
"learning_rate": 4.2054177995919374e-07,
"loss": 2.5276,
"step": 3540
},
{
"epoch": 3.84,
"learning_rate": 4.094905068075694e-07,
"loss": 2.323,
"step": 3542
},
{
"epoch": 3.84,
"learning_rate": 3.985857828510353e-07,
"loss": 2.4943,
"step": 3544
},
{
"epoch": 3.84,
"learning_rate": 3.878276403150749e-07,
"loss": 2.4179,
"step": 3546
},
{
"epoch": 3.84,
"learning_rate": 3.7721611099200693e-07,
"loss": 2.3685,
"step": 3548
},
{
"epoch": 3.85,
"learning_rate": 3.6675122624087454e-07,
"loss": 2.4998,
"step": 3550
},
{
"epoch": 3.85,
"learning_rate": 3.5643301698736196e-07,
"loss": 2.484,
"step": 3552
},
{
"epoch": 3.85,
"learning_rate": 3.462615137237002e-07,
"loss": 2.3272,
"step": 3554
},
{
"epoch": 3.85,
"learning_rate": 3.3623674650857806e-07,
"loss": 2.3971,
"step": 3556
},
{
"epoch": 3.85,
"learning_rate": 3.2635874496705356e-07,
"loss": 2.544,
"step": 3558
},
{
"epoch": 3.86,
"learning_rate": 3.1662753829045375e-07,
"loss": 2.6006,
"step": 3560
},
{
"epoch": 3.86,
"learning_rate": 3.0704315523631953e-07,
"loss": 2.3817,
"step": 3562
},
{
"epoch": 3.86,
"learning_rate": 2.976056241282721e-07,
"loss": 2.4897,
"step": 3564
},
{
"epoch": 3.86,
"learning_rate": 2.8831497285599085e-07,
"loss": 2.3797,
"step": 3566
},
{
"epoch": 3.87,
"learning_rate": 2.7917122887506364e-07,
"loss": 2.4418,
"step": 3568
},
{
"epoch": 3.87,
"learning_rate": 2.701744192069755e-07,
"loss": 2.4906,
"step": 3570
},
{
"epoch": 3.87,
"learning_rate": 2.613245704389644e-07,
"loss": 2.4531,
"step": 3572
},
{
"epoch": 3.87,
"learning_rate": 2.5262170872398796e-07,
"loss": 2.3002,
"step": 3574
},
{
"epoch": 3.87,
"learning_rate": 2.440658597806178e-07,
"loss": 2.5243,
"step": 3576
},
{
"epoch": 3.88,
"learning_rate": 2.3565704889298434e-07,
"loss": 2.4181,
"step": 3578
},
{
"epoch": 3.88,
"learning_rate": 2.2739530091069328e-07,
"loss": 2.2211,
"step": 3580
},
{
"epoch": 3.88,
"learning_rate": 2.1928064024874796e-07,
"loss": 2.0089,
"step": 3582
},
{
"epoch": 3.88,
"learning_rate": 2.113130908874772e-07,
"loss": 2.269,
"step": 3584
},
{
"epoch": 3.88,
"learning_rate": 2.0349267637247982e-07,
"loss": 2.4197,
"step": 3586
},
{
"epoch": 3.89,
"learning_rate": 1.9581941981453579e-07,
"loss": 2.319,
"step": 3588
},
{
"epoch": 3.89,
"learning_rate": 1.8829334388955067e-07,
"loss": 2.2629,
"step": 3590
},
{
"epoch": 3.89,
"learning_rate": 1.80914470838478e-07,
"loss": 2.5493,
"step": 3592
},
{
"epoch": 3.89,
"learning_rate": 1.7368282246726376e-07,
"loss": 2.3166,
"step": 3594
},
{
"epoch": 3.9,
"learning_rate": 1.6659842014677406e-07,
"loss": 2.5148,
"step": 3596
},
{
"epoch": 3.9,
"learning_rate": 1.596612848127399e-07,
"loss": 2.339,
"step": 3598
},
{
"epoch": 3.9,
"learning_rate": 1.5287143696568473e-07,
"loss": 2.308,
"step": 3600
},
{
"epoch": 3.9,
"learning_rate": 1.462288966708858e-07,
"loss": 2.5463,
"step": 3602
},
{
"epoch": 3.9,
"learning_rate": 1.397336835582741e-07,
"loss": 2.6243,
"step": 3604
},
{
"epoch": 3.91,
"learning_rate": 1.333858168224178e-07,
"loss": 2.4714,
"step": 3606
},
{
"epoch": 3.91,
"learning_rate": 1.2718531522244447e-07,
"loss": 2.2877,
"step": 3608
},
{
"epoch": 3.91,
"learning_rate": 1.211321970820023e-07,
"loss": 2.3737,
"step": 3610
},
{
"epoch": 3.91,
"learning_rate": 1.1522648028917116e-07,
"loss": 2.6491,
"step": 3612
},
{
"epoch": 3.91,
"learning_rate": 1.0946818229644607e-07,
"loss": 2.1288,
"step": 3614
},
{
"epoch": 3.92,
"learning_rate": 1.0385732012067607e-07,
"loss": 2.4494,
"step": 3616
},
{
"epoch": 3.92,
"learning_rate": 9.839391034300316e-08,
"loss": 2.5278,
"step": 3618
},
{
"epoch": 3.92,
"learning_rate": 9.307796910881794e-08,
"loss": 2.2621,
"step": 3620
},
{
"epoch": 3.92,
"learning_rate": 8.790951212771514e-08,
"loss": 2.2438,
"step": 3622
},
{
"epoch": 3.93,
"learning_rate": 8.28885546734548e-08,
"loss": 2.5771,
"step": 3624
},
{
"epoch": 3.93,
"learning_rate": 7.801511158390118e-08,
"loss": 2.1801,
"step": 3626
},
{
"epoch": 3.93,
"learning_rate": 7.328919726097838e-08,
"loss": 2.3484,
"step": 3628
},
{
"epoch": 3.93,
"learning_rate": 6.871082567065367e-08,
"loss": 2.5062,
"step": 3630
},
{
"epoch": 3.93,
"learning_rate": 6.42800103428598e-08,
"loss": 2.2535,
"step": 3632
},
{
"epoch": 3.94,
"learning_rate": 5.999676437148938e-08,
"loss": 2.4677,
"step": 3634
},
{
"epoch": 3.94,
"learning_rate": 5.5861100414322796e-08,
"loss": 2.2292,
"step": 3636
},
{
"epoch": 3.94,
"learning_rate": 5.1873030693028177e-08,
"loss": 2.3609,
"step": 3638
},
{
"epoch": 3.94,
"learning_rate": 4.8032566993089225e-08,
"loss": 2.4992,
"step": 3640
},
{
"epoch": 3.95,
"learning_rate": 4.4339720663788555e-08,
"loss": 2.5409,
"step": 3642
},
{
"epoch": 3.95,
"learning_rate": 4.079450261817997e-08,
"loss": 2.4727,
"step": 3644
},
{
"epoch": 3.95,
"learning_rate": 3.739692333304401e-08,
"loss": 2.2859,
"step": 3646
},
{
"epoch": 3.95,
"learning_rate": 3.4146992848854695e-08,
"loss": 2.3062,
"step": 3648
},
{
"epoch": 3.95,
"learning_rate": 3.104472076976839e-08,
"loss": 2.3304,
"step": 3650
},
{
"epoch": 3.96,
"learning_rate": 2.809011626357383e-08,
"loss": 2.4489,
"step": 3652
},
{
"epoch": 3.96,
"learning_rate": 2.528318806168106e-08,
"loss": 2.3078,
"step": 3654
},
{
"epoch": 3.96,
"learning_rate": 2.2623944459082557e-08,
"loss": 2.3173,
"step": 3656
},
{
"epoch": 3.96,
"learning_rate": 2.0112393314336565e-08,
"loss": 2.3973,
"step": 3658
},
{
"epoch": 3.96,
"learning_rate": 1.7748542049550453e-08,
"loss": 2.4975,
"step": 3660
},
{
"epoch": 3.97,
"learning_rate": 1.553239765034187e-08,
"loss": 2.4494,
"step": 3662
},
{
"epoch": 3.97,
"learning_rate": 1.346396666582761e-08,
"loss": 2.4442,
"step": 3664
},
{
"epoch": 3.97,
"learning_rate": 1.1543255208612546e-08,
"loss": 2.4622,
"step": 3666
},
{
"epoch": 3.97,
"learning_rate": 9.770268954756301e-09,
"loss": 2.3676,
"step": 3668
},
{
"epoch": 3.98,
"learning_rate": 8.145013143756597e-09,
"loss": 2.5927,
"step": 3670
},
{
"epoch": 3.98,
"learning_rate": 6.6674925785548125e-09,
"loss": 2.4922,
"step": 3672
},
{
"epoch": 3.98,
"learning_rate": 5.337711625497121e-09,
"loss": 2.1202,
"step": 3674
},
{
"epoch": 3.98,
"learning_rate": 4.155674214328942e-09,
"loss": 2.5643,
"step": 3676
},
{
"epoch": 3.98,
"learning_rate": 3.1213838382004867e-09,
"loss": 2.2801,
"step": 3678
},
{
"epoch": 3.99,
"learning_rate": 2.234843553627908e-09,
"loss": 2.4424,
"step": 3680
},
{
"epoch": 3.99,
"learning_rate": 1.496055980498845e-09,
"loss": 2.6128,
"step": 3682
},
{
"epoch": 3.99,
"learning_rate": 9.050233020779786e-10,
"loss": 2.3174,
"step": 3684
},
{
"epoch": 3.99,
"learning_rate": 4.6174726496817087e-10,
"loss": 2.4364,
"step": 3686
},
{
"epoch": 4.0,
"learning_rate": 1.6622917913267088e-10,
"loss": 2.5562,
"step": 3688
},
{
"epoch": 4.0,
"learning_rate": 1.8469917889563094e-11,
"loss": 2.2758,
"step": 3690
},
{
"epoch": 4.0,
"learning_rate": 1.8469917889563094e-11,
"loss": 2.4287,
"step": 3692
},
{
"epoch": 4.0,
"step": 3692,
"total_flos": 8.7881966778581e+16,
"train_loss": 2.440685138283933,
"train_runtime": 22362.2858,
"train_samples_per_second": 10.572,
"train_steps_per_second": 0.165
}
],
"max_steps": 3692,
"num_train_epochs": 4,
"total_flos": 8.7881966778581e+16,
"trial_name": null,
"trial_params": null
}