kullm-solar / trainer_state.json
heavytail's picture
Upload folder using huggingface_hub
3b54760 verified
raw
history blame
No virus
130 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.9944341372912802,
"eval_steps": 800,
"global_step": 4300,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0,
"loss": 2.4801,
"step": 4
},
{
"epoch": 0.0,
"learning_rate": 0,
"loss": 2.4284,
"step": 8
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 2.2651,
"step": 12
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 2.411,
"step": 16
},
{
"epoch": 0.01,
"learning_rate": 0,
"loss": 2.8299,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.0,
"loss": 2.2188,
"step": 24
},
{
"epoch": 0.01,
"learning_rate": 1.3082402064781276e-06,
"loss": 1.345,
"step": 28
},
{
"epoch": 0.01,
"learning_rate": 1.9623603097171917e-06,
"loss": 0.5695,
"step": 32
},
{
"epoch": 0.02,
"learning_rate": 2.262883767531511e-06,
"loss": 0.8812,
"step": 36
},
{
"epoch": 0.02,
"learning_rate": 2.5555756797431724e-06,
"loss": 0.8725,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 2.7786547836457785e-06,
"loss": 0.4851,
"step": 44
},
{
"epoch": 0.02,
"learning_rate": 2.9589528137043157e-06,
"loss": 0.8354,
"step": 48
},
{
"epoch": 0.02,
"learning_rate": 3.110267503805303e-06,
"loss": 0.4873,
"step": 52
},
{
"epoch": 0.03,
"learning_rate": 3.2406394020168525e-06,
"loss": 0.6584,
"step": 56
},
{
"epoch": 0.03,
"learning_rate": 3.3551671365864186e-06,
"loss": 0.529,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 3.4572878450621517e-06,
"loss": 0.5812,
"step": 64
},
{
"epoch": 0.03,
"learning_rate": 3.5494288615482305e-06,
"loss": 0.5163,
"step": 68
},
{
"epoch": 0.03,
"learning_rate": 3.6333682331099297e-06,
"loss": 0.6595,
"step": 72
},
{
"epoch": 0.04,
"learning_rate": 3.710447450306277e-06,
"loss": 0.4775,
"step": 76
},
{
"epoch": 0.04,
"learning_rate": 3.7817036126729157e-06,
"loss": 0.682,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 3.84795507876713e-06,
"loss": 0.7048,
"step": 84
},
{
"epoch": 0.04,
"learning_rate": 3.909858960648549e-06,
"loss": 0.9478,
"step": 88
},
{
"epoch": 0.04,
"learning_rate": 3.9679508875196075e-06,
"loss": 0.5932,
"step": 92
},
{
"epoch": 0.04,
"learning_rate": 4.022673220704539e-06,
"loss": 0.5733,
"step": 96
},
{
"epoch": 0.05,
"learning_rate": 4.074395524884577e-06,
"loss": 0.5817,
"step": 100
},
{
"epoch": 0.05,
"learning_rate": 4.123429713794031e-06,
"loss": 0.6372,
"step": 104
},
{
"epoch": 0.05,
"learning_rate": 4.170041450985754e-06,
"loss": 0.5108,
"step": 108
},
{
"epoch": 0.05,
"learning_rate": 4.214458864668026e-06,
"loss": 0.6262,
"step": 112
},
{
"epoch": 0.05,
"learning_rate": 4.256879301905398e-06,
"loss": 0.8594,
"step": 116
},
{
"epoch": 0.06,
"learning_rate": 4.297474628787183e-06,
"loss": 0.7241,
"step": 120
},
{
"epoch": 0.06,
"learning_rate": 4.336395436735046e-06,
"loss": 0.6471,
"step": 124
},
{
"epoch": 0.06,
"learning_rate": 4.373774415149143e-06,
"loss": 0.6144,
"step": 128
},
{
"epoch": 0.06,
"learning_rate": 4.409729081127459e-06,
"loss": 0.672,
"step": 132
},
{
"epoch": 0.06,
"learning_rate": 4.444364007946065e-06,
"loss": 0.6802,
"step": 136
},
{
"epoch": 0.06,
"learning_rate": 4.4777726588457195e-06,
"loss": 0.514,
"step": 140
},
{
"epoch": 0.07,
"learning_rate": 4.510038907149524e-06,
"loss": 0.715,
"step": 144
},
{
"epoch": 0.07,
"learning_rate": 4.541238304971202e-06,
"loss": 0.5189,
"step": 148
},
{
"epoch": 0.07,
"learning_rate": 4.5714391488166745e-06,
"loss": 0.7188,
"step": 152
},
{
"epoch": 0.07,
"learning_rate": 4.600703379889684e-06,
"loss": 0.5829,
"step": 156
},
{
"epoch": 0.07,
"learning_rate": 4.629087348946707e-06,
"loss": 0.5551,
"step": 160
},
{
"epoch": 0.08,
"learning_rate": 4.656642469442713e-06,
"loss": 0.7016,
"step": 164
},
{
"epoch": 0.08,
"learning_rate": 4.683415777991895e-06,
"loss": 0.5357,
"step": 168
},
{
"epoch": 0.08,
"learning_rate": 4.709450417491796e-06,
"loss": 0.6232,
"step": 172
},
{
"epoch": 0.08,
"learning_rate": 4.734786055373451e-06,
"loss": 0.7218,
"step": 176
},
{
"epoch": 0.08,
"learning_rate": 4.759459247158257e-06,
"loss": 0.51,
"step": 180
},
{
"epoch": 0.09,
"learning_rate": 4.783503753685794e-06,
"loss": 0.8871,
"step": 184
},
{
"epoch": 0.09,
"learning_rate": 4.806950818921448e-06,
"loss": 0.8177,
"step": 188
},
{
"epoch": 0.09,
"learning_rate": 4.8298294140798465e-06,
"loss": 0.5602,
"step": 192
},
{
"epoch": 0.09,
"learning_rate": 4.852166452849314e-06,
"loss": 0.7395,
"step": 196
},
{
"epoch": 0.09,
"learning_rate": 4.8739869817278244e-06,
"loss": 0.7008,
"step": 200
},
{
"epoch": 0.09,
"learning_rate": 4.89531434884623e-06,
"loss": 0.7096,
"step": 204
},
{
"epoch": 0.1,
"learning_rate": 4.916170354132174e-06,
"loss": 0.7617,
"step": 208
},
{
"epoch": 0.1,
"learning_rate": 4.936575383236021e-06,
"loss": 0.637,
"step": 212
},
{
"epoch": 0.1,
"learning_rate": 4.956548527281403e-06,
"loss": 0.7149,
"step": 216
},
{
"epoch": 0.1,
"learning_rate": 4.976107690203556e-06,
"loss": 0.585,
"step": 220
},
{
"epoch": 0.1,
"learning_rate": 4.995269685187989e-06,
"loss": 0.4153,
"step": 224
},
{
"epoch": 0.11,
"learning_rate": 4.998404594767071e-06,
"loss": 0.8215,
"step": 228
},
{
"epoch": 0.11,
"learning_rate": 4.995213784301213e-06,
"loss": 0.6051,
"step": 232
},
{
"epoch": 0.11,
"learning_rate": 4.992022973835355e-06,
"loss": 0.6785,
"step": 236
},
{
"epoch": 0.11,
"learning_rate": 4.988832163369496e-06,
"loss": 0.6774,
"step": 240
},
{
"epoch": 0.11,
"learning_rate": 4.985641352903638e-06,
"loss": 0.5039,
"step": 244
},
{
"epoch": 0.12,
"learning_rate": 4.982450542437779e-06,
"loss": 0.5724,
"step": 248
},
{
"epoch": 0.12,
"learning_rate": 4.979259731971921e-06,
"loss": 0.8469,
"step": 252
},
{
"epoch": 0.12,
"learning_rate": 4.976068921506063e-06,
"loss": 0.9735,
"step": 256
},
{
"epoch": 0.12,
"learning_rate": 4.972878111040205e-06,
"loss": 0.8594,
"step": 260
},
{
"epoch": 0.12,
"learning_rate": 4.969687300574346e-06,
"loss": 0.6523,
"step": 264
},
{
"epoch": 0.12,
"learning_rate": 4.9664964901084875e-06,
"loss": 0.6641,
"step": 268
},
{
"epoch": 0.13,
"learning_rate": 4.96330567964263e-06,
"loss": 0.6586,
"step": 272
},
{
"epoch": 0.13,
"learning_rate": 4.960114869176771e-06,
"loss": 0.6019,
"step": 276
},
{
"epoch": 0.13,
"learning_rate": 4.956924058710913e-06,
"loss": 0.7117,
"step": 280
},
{
"epoch": 0.13,
"learning_rate": 4.953733248245054e-06,
"loss": 0.4632,
"step": 284
},
{
"epoch": 0.13,
"learning_rate": 4.950542437779196e-06,
"loss": 0.733,
"step": 288
},
{
"epoch": 0.14,
"learning_rate": 4.947351627313338e-06,
"loss": 0.7205,
"step": 292
},
{
"epoch": 0.14,
"learning_rate": 4.9441608168474795e-06,
"loss": 0.6121,
"step": 296
},
{
"epoch": 0.14,
"learning_rate": 4.940970006381621e-06,
"loss": 0.7859,
"step": 300
},
{
"epoch": 0.14,
"learning_rate": 4.9377791959157625e-06,
"loss": 0.8041,
"step": 304
},
{
"epoch": 0.14,
"learning_rate": 4.934588385449905e-06,
"loss": 0.7834,
"step": 308
},
{
"epoch": 0.14,
"learning_rate": 4.931397574984046e-06,
"loss": 0.6704,
"step": 312
},
{
"epoch": 0.15,
"learning_rate": 4.928206764518188e-06,
"loss": 0.8402,
"step": 316
},
{
"epoch": 0.15,
"learning_rate": 4.925015954052329e-06,
"loss": 0.7851,
"step": 320
},
{
"epoch": 0.15,
"learning_rate": 4.9218251435864715e-06,
"loss": 0.501,
"step": 324
},
{
"epoch": 0.15,
"learning_rate": 4.918634333120613e-06,
"loss": 0.6039,
"step": 328
},
{
"epoch": 0.15,
"learning_rate": 4.9154435226547544e-06,
"loss": 0.64,
"step": 332
},
{
"epoch": 0.16,
"learning_rate": 4.912252712188896e-06,
"loss": 0.5726,
"step": 336
},
{
"epoch": 0.16,
"learning_rate": 4.909061901723038e-06,
"loss": 0.6605,
"step": 340
},
{
"epoch": 0.16,
"learning_rate": 4.90587109125718e-06,
"loss": 0.8105,
"step": 344
},
{
"epoch": 0.16,
"learning_rate": 4.902680280791321e-06,
"loss": 0.8422,
"step": 348
},
{
"epoch": 0.16,
"learning_rate": 4.8994894703254635e-06,
"loss": 0.5242,
"step": 352
},
{
"epoch": 0.17,
"learning_rate": 4.896298659859605e-06,
"loss": 0.6062,
"step": 356
},
{
"epoch": 0.17,
"learning_rate": 4.8931078493937464e-06,
"loss": 0.7289,
"step": 360
},
{
"epoch": 0.17,
"learning_rate": 4.889917038927888e-06,
"loss": 0.6916,
"step": 364
},
{
"epoch": 0.17,
"learning_rate": 4.88672622846203e-06,
"loss": 0.8526,
"step": 368
},
{
"epoch": 0.17,
"learning_rate": 4.883535417996172e-06,
"loss": 1.0668,
"step": 372
},
{
"epoch": 0.17,
"learning_rate": 4.880344607530313e-06,
"loss": 0.6912,
"step": 376
},
{
"epoch": 0.18,
"learning_rate": 4.877153797064455e-06,
"loss": 0.7383,
"step": 380
},
{
"epoch": 0.18,
"learning_rate": 4.873962986598597e-06,
"loss": 0.77,
"step": 384
},
{
"epoch": 0.18,
"learning_rate": 4.870772176132738e-06,
"loss": 0.8328,
"step": 388
},
{
"epoch": 0.18,
"learning_rate": 4.86758136566688e-06,
"loss": 0.7135,
"step": 392
},
{
"epoch": 0.18,
"learning_rate": 4.864390555201021e-06,
"loss": 0.7976,
"step": 396
},
{
"epoch": 0.19,
"learning_rate": 4.861199744735164e-06,
"loss": 0.5799,
"step": 400
},
{
"epoch": 0.19,
"learning_rate": 4.858008934269305e-06,
"loss": 0.5246,
"step": 404
},
{
"epoch": 0.19,
"learning_rate": 4.854818123803447e-06,
"loss": 0.5895,
"step": 408
},
{
"epoch": 0.19,
"learning_rate": 4.851627313337588e-06,
"loss": 0.7751,
"step": 412
},
{
"epoch": 0.19,
"learning_rate": 4.84843650287173e-06,
"loss": 0.7469,
"step": 416
},
{
"epoch": 0.19,
"learning_rate": 4.845245692405872e-06,
"loss": 0.5013,
"step": 420
},
{
"epoch": 0.2,
"learning_rate": 4.842054881940013e-06,
"loss": 0.5398,
"step": 424
},
{
"epoch": 0.2,
"learning_rate": 4.838864071474155e-06,
"loss": 0.4547,
"step": 428
},
{
"epoch": 0.2,
"learning_rate": 4.835673261008296e-06,
"loss": 0.8732,
"step": 432
},
{
"epoch": 0.2,
"learning_rate": 4.832482450542439e-06,
"loss": 0.7671,
"step": 436
},
{
"epoch": 0.2,
"learning_rate": 4.82929164007658e-06,
"loss": 0.6574,
"step": 440
},
{
"epoch": 0.21,
"learning_rate": 4.8261008296107215e-06,
"loss": 0.7173,
"step": 444
},
{
"epoch": 0.21,
"learning_rate": 4.822910019144863e-06,
"loss": 0.4371,
"step": 448
},
{
"epoch": 0.21,
"learning_rate": 4.819719208679005e-06,
"loss": 0.6992,
"step": 452
},
{
"epoch": 0.21,
"learning_rate": 4.816528398213147e-06,
"loss": 0.6827,
"step": 456
},
{
"epoch": 0.21,
"learning_rate": 4.813337587747288e-06,
"loss": 0.4919,
"step": 460
},
{
"epoch": 0.22,
"learning_rate": 4.81014677728143e-06,
"loss": 0.9571,
"step": 464
},
{
"epoch": 0.22,
"learning_rate": 4.806955966815571e-06,
"loss": 0.5202,
"step": 468
},
{
"epoch": 0.22,
"learning_rate": 4.8037651563497135e-06,
"loss": 0.7919,
"step": 472
},
{
"epoch": 0.22,
"learning_rate": 4.800574345883855e-06,
"loss": 0.5517,
"step": 476
},
{
"epoch": 0.22,
"learning_rate": 4.7973835354179965e-06,
"loss": 0.3889,
"step": 480
},
{
"epoch": 0.22,
"learning_rate": 4.794192724952138e-06,
"loss": 0.5933,
"step": 484
},
{
"epoch": 0.23,
"learning_rate": 4.79100191448628e-06,
"loss": 0.9298,
"step": 488
},
{
"epoch": 0.23,
"learning_rate": 4.787811104020422e-06,
"loss": 0.4758,
"step": 492
},
{
"epoch": 0.23,
"learning_rate": 4.784620293554563e-06,
"loss": 0.5162,
"step": 496
},
{
"epoch": 0.23,
"learning_rate": 4.781429483088705e-06,
"loss": 0.6675,
"step": 500
},
{
"epoch": 0.23,
"learning_rate": 4.778238672622846e-06,
"loss": 0.8493,
"step": 504
},
{
"epoch": 0.24,
"learning_rate": 4.7750478621569885e-06,
"loss": 0.6583,
"step": 508
},
{
"epoch": 0.24,
"learning_rate": 4.77185705169113e-06,
"loss": 0.4897,
"step": 512
},
{
"epoch": 0.24,
"learning_rate": 4.768666241225271e-06,
"loss": 0.6633,
"step": 516
},
{
"epoch": 0.24,
"learning_rate": 4.765475430759413e-06,
"loss": 0.782,
"step": 520
},
{
"epoch": 0.24,
"learning_rate": 4.762284620293555e-06,
"loss": 0.815,
"step": 524
},
{
"epoch": 0.24,
"learning_rate": 4.759093809827697e-06,
"loss": 0.4498,
"step": 528
},
{
"epoch": 0.25,
"learning_rate": 4.755902999361838e-06,
"loss": 0.6006,
"step": 532
},
{
"epoch": 0.25,
"learning_rate": 4.75271218889598e-06,
"loss": 0.9473,
"step": 536
},
{
"epoch": 0.25,
"learning_rate": 4.749521378430121e-06,
"loss": 0.4036,
"step": 540
},
{
"epoch": 0.25,
"learning_rate": 4.746330567964263e-06,
"loss": 0.555,
"step": 544
},
{
"epoch": 0.25,
"learning_rate": 4.743139757498405e-06,
"loss": 0.7843,
"step": 548
},
{
"epoch": 0.26,
"learning_rate": 4.739948947032546e-06,
"loss": 0.8376,
"step": 552
},
{
"epoch": 0.26,
"learning_rate": 4.736758136566688e-06,
"loss": 0.5423,
"step": 556
},
{
"epoch": 0.26,
"learning_rate": 4.73356732610083e-06,
"loss": 0.5533,
"step": 560
},
{
"epoch": 0.26,
"learning_rate": 4.7303765156349716e-06,
"loss": 0.5212,
"step": 564
},
{
"epoch": 0.26,
"learning_rate": 4.727185705169113e-06,
"loss": 0.8054,
"step": 568
},
{
"epoch": 0.27,
"learning_rate": 4.7239948947032545e-06,
"loss": 0.438,
"step": 572
},
{
"epoch": 0.27,
"learning_rate": 4.720804084237397e-06,
"loss": 0.6025,
"step": 576
},
{
"epoch": 0.27,
"learning_rate": 4.717613273771538e-06,
"loss": 0.8118,
"step": 580
},
{
"epoch": 0.27,
"learning_rate": 4.71442246330568e-06,
"loss": 0.6911,
"step": 584
},
{
"epoch": 0.27,
"learning_rate": 4.711231652839821e-06,
"loss": 0.7022,
"step": 588
},
{
"epoch": 0.27,
"learning_rate": 4.7080408423739636e-06,
"loss": 0.5918,
"step": 592
},
{
"epoch": 0.28,
"learning_rate": 4.704850031908105e-06,
"loss": 0.6012,
"step": 596
},
{
"epoch": 0.28,
"learning_rate": 4.7016592214422465e-06,
"loss": 0.8031,
"step": 600
},
{
"epoch": 0.28,
"learning_rate": 4.698468410976389e-06,
"loss": 0.7864,
"step": 604
},
{
"epoch": 0.28,
"learning_rate": 4.69527760051053e-06,
"loss": 0.6361,
"step": 608
},
{
"epoch": 0.28,
"learning_rate": 4.692086790044672e-06,
"loss": 0.6619,
"step": 612
},
{
"epoch": 0.29,
"learning_rate": 4.688895979578813e-06,
"loss": 0.5132,
"step": 616
},
{
"epoch": 0.29,
"learning_rate": 4.6857051691129555e-06,
"loss": 0.6111,
"step": 620
},
{
"epoch": 0.29,
"learning_rate": 4.682514358647097e-06,
"loss": 0.7884,
"step": 624
},
{
"epoch": 0.29,
"learning_rate": 4.6793235481812385e-06,
"loss": 0.4355,
"step": 628
},
{
"epoch": 0.29,
"learning_rate": 4.67613273771538e-06,
"loss": 0.7325,
"step": 632
},
{
"epoch": 0.29,
"learning_rate": 4.672941927249522e-06,
"loss": 0.5633,
"step": 636
},
{
"epoch": 0.3,
"learning_rate": 4.669751116783664e-06,
"loss": 0.6415,
"step": 640
},
{
"epoch": 0.3,
"learning_rate": 4.666560306317805e-06,
"loss": 0.6508,
"step": 644
},
{
"epoch": 0.3,
"learning_rate": 4.663369495851947e-06,
"loss": 0.5909,
"step": 648
},
{
"epoch": 0.3,
"learning_rate": 4.660178685386089e-06,
"loss": 0.5651,
"step": 652
},
{
"epoch": 0.3,
"learning_rate": 4.6569878749202305e-06,
"loss": 0.6729,
"step": 656
},
{
"epoch": 0.31,
"learning_rate": 4.653797064454372e-06,
"loss": 0.842,
"step": 660
},
{
"epoch": 0.31,
"learning_rate": 4.650606253988513e-06,
"loss": 0.5844,
"step": 664
},
{
"epoch": 0.31,
"learning_rate": 4.647415443522656e-06,
"loss": 0.7394,
"step": 668
},
{
"epoch": 0.31,
"learning_rate": 4.644224633056797e-06,
"loss": 0.6725,
"step": 672
},
{
"epoch": 0.31,
"learning_rate": 4.641033822590939e-06,
"loss": 0.6416,
"step": 676
},
{
"epoch": 0.32,
"learning_rate": 4.63784301212508e-06,
"loss": 0.7926,
"step": 680
},
{
"epoch": 0.32,
"learning_rate": 4.634652201659222e-06,
"loss": 0.5941,
"step": 684
},
{
"epoch": 0.32,
"learning_rate": 4.631461391193364e-06,
"loss": 0.9582,
"step": 688
},
{
"epoch": 0.32,
"learning_rate": 4.628270580727505e-06,
"loss": 0.4289,
"step": 692
},
{
"epoch": 0.32,
"learning_rate": 4.625079770261647e-06,
"loss": 0.6518,
"step": 696
},
{
"epoch": 0.32,
"learning_rate": 4.621888959795788e-06,
"loss": 0.8722,
"step": 700
},
{
"epoch": 0.33,
"learning_rate": 4.618698149329931e-06,
"loss": 0.5419,
"step": 704
},
{
"epoch": 0.33,
"learning_rate": 4.615507338864072e-06,
"loss": 0.6891,
"step": 708
},
{
"epoch": 0.33,
"learning_rate": 4.612316528398214e-06,
"loss": 0.5157,
"step": 712
},
{
"epoch": 0.33,
"learning_rate": 4.609125717932355e-06,
"loss": 0.7015,
"step": 716
},
{
"epoch": 0.33,
"learning_rate": 4.6059349074664965e-06,
"loss": 0.546,
"step": 720
},
{
"epoch": 0.34,
"learning_rate": 4.602744097000639e-06,
"loss": 0.6735,
"step": 724
},
{
"epoch": 0.34,
"learning_rate": 4.59955328653478e-06,
"loss": 0.5564,
"step": 728
},
{
"epoch": 0.34,
"learning_rate": 4.596362476068922e-06,
"loss": 0.5182,
"step": 732
},
{
"epoch": 0.34,
"learning_rate": 4.593171665603063e-06,
"loss": 0.4053,
"step": 736
},
{
"epoch": 0.34,
"learning_rate": 4.5899808551372056e-06,
"loss": 0.4039,
"step": 740
},
{
"epoch": 0.35,
"learning_rate": 4.586790044671347e-06,
"loss": 0.6502,
"step": 744
},
{
"epoch": 0.35,
"learning_rate": 4.5835992342054885e-06,
"loss": 0.8062,
"step": 748
},
{
"epoch": 0.35,
"learning_rate": 4.58040842373963e-06,
"loss": 0.4143,
"step": 752
},
{
"epoch": 0.35,
"learning_rate": 4.5772176132737715e-06,
"loss": 0.5539,
"step": 756
},
{
"epoch": 0.35,
"learning_rate": 4.574026802807914e-06,
"loss": 0.5926,
"step": 760
},
{
"epoch": 0.35,
"learning_rate": 4.570835992342055e-06,
"loss": 0.751,
"step": 764
},
{
"epoch": 0.36,
"learning_rate": 4.567645181876197e-06,
"loss": 0.5886,
"step": 768
},
{
"epoch": 0.36,
"learning_rate": 4.564454371410338e-06,
"loss": 0.677,
"step": 772
},
{
"epoch": 0.36,
"learning_rate": 4.5612635609444805e-06,
"loss": 0.7097,
"step": 776
},
{
"epoch": 0.36,
"learning_rate": 4.558072750478622e-06,
"loss": 0.56,
"step": 780
},
{
"epoch": 0.36,
"learning_rate": 4.5548819400127634e-06,
"loss": 0.4481,
"step": 784
},
{
"epoch": 0.37,
"learning_rate": 4.551691129546905e-06,
"loss": 0.4959,
"step": 788
},
{
"epoch": 0.37,
"learning_rate": 4.548500319081046e-06,
"loss": 0.8399,
"step": 792
},
{
"epoch": 0.37,
"learning_rate": 4.545309508615189e-06,
"loss": 0.6904,
"step": 796
},
{
"epoch": 0.37,
"learning_rate": 4.54211869814933e-06,
"loss": 0.8689,
"step": 800
},
{
"epoch": 0.37,
"learning_rate": 4.538927887683472e-06,
"loss": 0.6232,
"step": 804
},
{
"epoch": 0.37,
"learning_rate": 4.535737077217613e-06,
"loss": 0.6428,
"step": 808
},
{
"epoch": 0.38,
"learning_rate": 4.5325462667517554e-06,
"loss": 0.7462,
"step": 812
},
{
"epoch": 0.38,
"learning_rate": 4.529355456285897e-06,
"loss": 0.529,
"step": 816
},
{
"epoch": 0.38,
"learning_rate": 4.526164645820038e-06,
"loss": 0.4875,
"step": 820
},
{
"epoch": 0.38,
"learning_rate": 4.52297383535418e-06,
"loss": 0.6747,
"step": 824
},
{
"epoch": 0.38,
"learning_rate": 4.519783024888322e-06,
"loss": 0.7061,
"step": 828
},
{
"epoch": 0.39,
"learning_rate": 4.516592214422464e-06,
"loss": 0.7865,
"step": 832
},
{
"epoch": 0.39,
"learning_rate": 4.513401403956605e-06,
"loss": 0.5122,
"step": 836
},
{
"epoch": 0.39,
"learning_rate": 4.5102105934907466e-06,
"loss": 0.4014,
"step": 840
},
{
"epoch": 0.39,
"learning_rate": 4.507019783024889e-06,
"loss": 0.7509,
"step": 844
},
{
"epoch": 0.39,
"learning_rate": 4.50382897255903e-06,
"loss": 0.8073,
"step": 848
},
{
"epoch": 0.4,
"learning_rate": 4.500638162093172e-06,
"loss": 0.3459,
"step": 852
},
{
"epoch": 0.4,
"learning_rate": 4.497447351627314e-06,
"loss": 0.6814,
"step": 856
},
{
"epoch": 0.4,
"learning_rate": 4.494256541161456e-06,
"loss": 1.1027,
"step": 860
},
{
"epoch": 0.4,
"learning_rate": 4.491065730695597e-06,
"loss": 0.5254,
"step": 864
},
{
"epoch": 0.4,
"learning_rate": 4.4878749202297385e-06,
"loss": 0.7436,
"step": 868
},
{
"epoch": 0.4,
"learning_rate": 4.484684109763881e-06,
"loss": 0.4877,
"step": 872
},
{
"epoch": 0.41,
"learning_rate": 4.481493299298022e-06,
"loss": 0.657,
"step": 876
},
{
"epoch": 0.41,
"learning_rate": 4.478302488832164e-06,
"loss": 0.7193,
"step": 880
},
{
"epoch": 0.41,
"learning_rate": 4.475111678366305e-06,
"loss": 0.5461,
"step": 884
},
{
"epoch": 0.41,
"learning_rate": 4.471920867900448e-06,
"loss": 0.5707,
"step": 888
},
{
"epoch": 0.41,
"learning_rate": 4.468730057434589e-06,
"loss": 0.9755,
"step": 892
},
{
"epoch": 0.42,
"learning_rate": 4.4655392469687305e-06,
"loss": 0.551,
"step": 896
},
{
"epoch": 0.42,
"learning_rate": 4.462348436502872e-06,
"loss": 0.499,
"step": 900
},
{
"epoch": 0.42,
"learning_rate": 4.459157626037014e-06,
"loss": 0.4268,
"step": 904
},
{
"epoch": 0.42,
"learning_rate": 4.455966815571156e-06,
"loss": 0.6658,
"step": 908
},
{
"epoch": 0.42,
"learning_rate": 4.452776005105297e-06,
"loss": 0.5642,
"step": 912
},
{
"epoch": 0.42,
"learning_rate": 4.449585194639439e-06,
"loss": 0.6943,
"step": 916
},
{
"epoch": 0.43,
"learning_rate": 4.446394384173581e-06,
"loss": 0.5404,
"step": 920
},
{
"epoch": 0.43,
"learning_rate": 4.4432035737077225e-06,
"loss": 0.7934,
"step": 924
},
{
"epoch": 0.43,
"learning_rate": 4.440012763241864e-06,
"loss": 0.7138,
"step": 928
},
{
"epoch": 0.43,
"learning_rate": 4.4368219527760055e-06,
"loss": 0.5249,
"step": 932
},
{
"epoch": 0.43,
"learning_rate": 4.433631142310147e-06,
"loss": 0.9614,
"step": 936
},
{
"epoch": 0.44,
"learning_rate": 4.430440331844289e-06,
"loss": 0.5915,
"step": 940
},
{
"epoch": 0.44,
"learning_rate": 4.427249521378431e-06,
"loss": 0.6766,
"step": 944
},
{
"epoch": 0.44,
"learning_rate": 4.424058710912572e-06,
"loss": 0.6641,
"step": 948
},
{
"epoch": 0.44,
"learning_rate": 4.420867900446714e-06,
"loss": 0.4849,
"step": 952
},
{
"epoch": 0.44,
"learning_rate": 4.417677089980856e-06,
"loss": 0.7182,
"step": 956
},
{
"epoch": 0.45,
"learning_rate": 4.4144862795149974e-06,
"loss": 0.6782,
"step": 960
},
{
"epoch": 0.45,
"learning_rate": 4.411295469049139e-06,
"loss": 0.4837,
"step": 964
},
{
"epoch": 0.45,
"learning_rate": 4.40810465858328e-06,
"loss": 0.7323,
"step": 968
},
{
"epoch": 0.45,
"learning_rate": 4.404913848117422e-06,
"loss": 0.5807,
"step": 972
},
{
"epoch": 0.45,
"learning_rate": 4.401723037651564e-06,
"loss": 0.373,
"step": 976
},
{
"epoch": 0.45,
"learning_rate": 4.398532227185706e-06,
"loss": 0.5072,
"step": 980
},
{
"epoch": 0.46,
"learning_rate": 4.395341416719847e-06,
"loss": 0.5952,
"step": 984
},
{
"epoch": 0.46,
"learning_rate": 4.392150606253989e-06,
"loss": 0.549,
"step": 988
},
{
"epoch": 0.46,
"learning_rate": 4.388959795788131e-06,
"loss": 0.5918,
"step": 992
},
{
"epoch": 0.46,
"learning_rate": 4.385768985322272e-06,
"loss": 0.4411,
"step": 996
},
{
"epoch": 0.46,
"learning_rate": 4.382578174856414e-06,
"loss": 0.7001,
"step": 1000
},
{
"epoch": 0.47,
"learning_rate": 4.379387364390555e-06,
"loss": 0.744,
"step": 1004
},
{
"epoch": 0.47,
"learning_rate": 4.376196553924697e-06,
"loss": 0.4091,
"step": 1008
},
{
"epoch": 0.47,
"learning_rate": 4.373005743458839e-06,
"loss": 0.7464,
"step": 1012
},
{
"epoch": 0.47,
"learning_rate": 4.3698149329929806e-06,
"loss": 0.6164,
"step": 1016
},
{
"epoch": 0.47,
"learning_rate": 4.366624122527122e-06,
"loss": 0.6213,
"step": 1020
},
{
"epoch": 0.47,
"learning_rate": 4.3634333120612635e-06,
"loss": 0.6991,
"step": 1024
},
{
"epoch": 0.48,
"learning_rate": 4.360242501595406e-06,
"loss": 0.5268,
"step": 1028
},
{
"epoch": 0.48,
"learning_rate": 4.357051691129547e-06,
"loss": 0.7768,
"step": 1032
},
{
"epoch": 0.48,
"learning_rate": 4.353860880663689e-06,
"loss": 0.9204,
"step": 1036
},
{
"epoch": 0.48,
"learning_rate": 4.35067007019783e-06,
"loss": 0.5844,
"step": 1040
},
{
"epoch": 0.48,
"learning_rate": 4.347479259731972e-06,
"loss": 0.5198,
"step": 1044
},
{
"epoch": 0.49,
"learning_rate": 4.344288449266114e-06,
"loss": 0.3069,
"step": 1048
},
{
"epoch": 0.49,
"learning_rate": 4.3410976388002555e-06,
"loss": 0.5465,
"step": 1052
},
{
"epoch": 0.49,
"learning_rate": 4.337906828334397e-06,
"loss": 0.4729,
"step": 1056
},
{
"epoch": 0.49,
"learning_rate": 4.3347160178685384e-06,
"loss": 0.6514,
"step": 1060
},
{
"epoch": 0.49,
"learning_rate": 4.331525207402681e-06,
"loss": 0.8142,
"step": 1064
},
{
"epoch": 0.5,
"learning_rate": 4.328334396936822e-06,
"loss": 0.6477,
"step": 1068
},
{
"epoch": 0.5,
"learning_rate": 4.325143586470964e-06,
"loss": 0.4601,
"step": 1072
},
{
"epoch": 0.5,
"learning_rate": 4.321952776005105e-06,
"loss": 0.6687,
"step": 1076
},
{
"epoch": 0.5,
"learning_rate": 4.3187619655392475e-06,
"loss": 0.4565,
"step": 1080
},
{
"epoch": 0.5,
"learning_rate": 4.315571155073389e-06,
"loss": 0.646,
"step": 1084
},
{
"epoch": 0.5,
"learning_rate": 4.31238034460753e-06,
"loss": 0.6145,
"step": 1088
},
{
"epoch": 0.51,
"learning_rate": 4.309189534141672e-06,
"loss": 0.3854,
"step": 1092
},
{
"epoch": 0.51,
"learning_rate": 4.305998723675814e-06,
"loss": 0.6016,
"step": 1096
},
{
"epoch": 0.51,
"learning_rate": 4.302807913209956e-06,
"loss": 0.5223,
"step": 1100
},
{
"epoch": 0.51,
"learning_rate": 4.299617102744097e-06,
"loss": 0.6356,
"step": 1104
},
{
"epoch": 0.51,
"learning_rate": 4.2964262922782395e-06,
"loss": 0.4599,
"step": 1108
},
{
"epoch": 0.52,
"learning_rate": 4.293235481812381e-06,
"loss": 0.6452,
"step": 1112
},
{
"epoch": 0.52,
"learning_rate": 4.290044671346522e-06,
"loss": 0.386,
"step": 1116
},
{
"epoch": 0.52,
"learning_rate": 4.286853860880664e-06,
"loss": 0.6384,
"step": 1120
},
{
"epoch": 0.52,
"learning_rate": 4.283663050414806e-06,
"loss": 0.7654,
"step": 1124
},
{
"epoch": 0.52,
"learning_rate": 4.280472239948948e-06,
"loss": 0.6019,
"step": 1128
},
{
"epoch": 0.53,
"learning_rate": 4.277281429483089e-06,
"loss": 0.6078,
"step": 1132
},
{
"epoch": 0.53,
"learning_rate": 4.274090619017231e-06,
"loss": 0.5181,
"step": 1136
},
{
"epoch": 0.53,
"learning_rate": 4.270899808551373e-06,
"loss": 0.6731,
"step": 1140
},
{
"epoch": 0.53,
"learning_rate": 4.267708998085514e-06,
"loss": 0.4956,
"step": 1144
},
{
"epoch": 0.53,
"learning_rate": 4.264518187619656e-06,
"loss": 0.6115,
"step": 1148
},
{
"epoch": 0.53,
"learning_rate": 4.261327377153797e-06,
"loss": 0.7712,
"step": 1152
},
{
"epoch": 0.54,
"learning_rate": 4.25813656668794e-06,
"loss": 0.5086,
"step": 1156
},
{
"epoch": 0.54,
"learning_rate": 4.254945756222081e-06,
"loss": 0.7241,
"step": 1160
},
{
"epoch": 0.54,
"learning_rate": 4.251754945756223e-06,
"loss": 0.5275,
"step": 1164
},
{
"epoch": 0.54,
"learning_rate": 4.248564135290364e-06,
"loss": 0.7552,
"step": 1168
},
{
"epoch": 0.54,
"learning_rate": 4.245373324824506e-06,
"loss": 0.4292,
"step": 1172
},
{
"epoch": 0.55,
"learning_rate": 4.242182514358648e-06,
"loss": 0.7575,
"step": 1176
},
{
"epoch": 0.55,
"learning_rate": 4.238991703892789e-06,
"loss": 0.5653,
"step": 1180
},
{
"epoch": 0.55,
"learning_rate": 4.235800893426931e-06,
"loss": 0.6882,
"step": 1184
},
{
"epoch": 0.55,
"learning_rate": 4.232610082961072e-06,
"loss": 0.6488,
"step": 1188
},
{
"epoch": 0.55,
"learning_rate": 4.2294192724952146e-06,
"loss": 0.5522,
"step": 1192
},
{
"epoch": 0.55,
"learning_rate": 4.226228462029356e-06,
"loss": 0.578,
"step": 1196
},
{
"epoch": 0.56,
"learning_rate": 4.2230376515634975e-06,
"loss": 0.7412,
"step": 1200
},
{
"epoch": 0.56,
"learning_rate": 4.219846841097639e-06,
"loss": 0.5138,
"step": 1204
},
{
"epoch": 0.56,
"learning_rate": 4.216656030631781e-06,
"loss": 0.6943,
"step": 1208
},
{
"epoch": 0.56,
"learning_rate": 4.213465220165923e-06,
"loss": 0.4599,
"step": 1212
},
{
"epoch": 0.56,
"learning_rate": 4.210274409700064e-06,
"loss": 0.8815,
"step": 1216
},
{
"epoch": 0.57,
"learning_rate": 4.207083599234206e-06,
"loss": 0.6245,
"step": 1220
},
{
"epoch": 0.57,
"learning_rate": 4.203892788768347e-06,
"loss": 0.5513,
"step": 1224
},
{
"epoch": 0.57,
"learning_rate": 4.2007019783024895e-06,
"loss": 0.4635,
"step": 1228
},
{
"epoch": 0.57,
"learning_rate": 4.197511167836631e-06,
"loss": 0.5711,
"step": 1232
},
{
"epoch": 0.57,
"learning_rate": 4.1943203573707724e-06,
"loss": 0.5078,
"step": 1236
},
{
"epoch": 0.58,
"learning_rate": 4.191129546904914e-06,
"loss": 0.4304,
"step": 1240
},
{
"epoch": 0.58,
"learning_rate": 4.187938736439056e-06,
"loss": 0.715,
"step": 1244
},
{
"epoch": 0.58,
"learning_rate": 4.184747925973198e-06,
"loss": 0.6305,
"step": 1248
},
{
"epoch": 0.58,
"learning_rate": 4.181557115507339e-06,
"loss": 0.6243,
"step": 1252
},
{
"epoch": 0.58,
"learning_rate": 4.178366305041481e-06,
"loss": 0.6439,
"step": 1256
},
{
"epoch": 0.58,
"learning_rate": 4.175175494575622e-06,
"loss": 0.4782,
"step": 1260
},
{
"epoch": 0.59,
"learning_rate": 4.171984684109764e-06,
"loss": 0.4523,
"step": 1264
},
{
"epoch": 0.59,
"learning_rate": 4.168793873643906e-06,
"loss": 0.4884,
"step": 1268
},
{
"epoch": 0.59,
"learning_rate": 4.165603063178047e-06,
"loss": 0.3461,
"step": 1272
},
{
"epoch": 0.59,
"learning_rate": 4.162412252712189e-06,
"loss": 0.2459,
"step": 1276
},
{
"epoch": 0.59,
"learning_rate": 4.159221442246331e-06,
"loss": 0.8138,
"step": 1280
},
{
"epoch": 0.6,
"learning_rate": 4.156030631780473e-06,
"loss": 0.6026,
"step": 1284
},
{
"epoch": 0.6,
"learning_rate": 4.152839821314614e-06,
"loss": 0.5463,
"step": 1288
},
{
"epoch": 0.6,
"learning_rate": 4.1496490108487556e-06,
"loss": 0.4317,
"step": 1292
},
{
"epoch": 0.6,
"learning_rate": 4.146458200382897e-06,
"loss": 0.6244,
"step": 1296
},
{
"epoch": 0.6,
"learning_rate": 4.143267389917039e-06,
"loss": 0.554,
"step": 1300
},
{
"epoch": 0.6,
"learning_rate": 4.140076579451181e-06,
"loss": 0.6441,
"step": 1304
},
{
"epoch": 0.61,
"learning_rate": 4.136885768985322e-06,
"loss": 0.6233,
"step": 1308
},
{
"epoch": 0.61,
"learning_rate": 4.133694958519464e-06,
"loss": 0.5561,
"step": 1312
},
{
"epoch": 0.61,
"learning_rate": 4.130504148053606e-06,
"loss": 0.7524,
"step": 1316
},
{
"epoch": 0.61,
"learning_rate": 4.1273133375877475e-06,
"loss": 0.4338,
"step": 1320
},
{
"epoch": 0.61,
"learning_rate": 4.124122527121889e-06,
"loss": 0.4495,
"step": 1324
},
{
"epoch": 0.62,
"learning_rate": 4.1209317166560305e-06,
"loss": 0.5139,
"step": 1328
},
{
"epoch": 0.62,
"learning_rate": 4.117740906190173e-06,
"loss": 0.6545,
"step": 1332
},
{
"epoch": 0.62,
"learning_rate": 4.114550095724314e-06,
"loss": 0.5588,
"step": 1336
},
{
"epoch": 0.62,
"learning_rate": 4.111359285258456e-06,
"loss": 0.609,
"step": 1340
},
{
"epoch": 0.62,
"learning_rate": 4.108168474792597e-06,
"loss": 0.553,
"step": 1344
},
{
"epoch": 0.63,
"learning_rate": 4.1049776643267395e-06,
"loss": 0.5844,
"step": 1348
},
{
"epoch": 0.63,
"learning_rate": 4.101786853860881e-06,
"loss": 0.5779,
"step": 1352
},
{
"epoch": 0.63,
"learning_rate": 4.0985960433950225e-06,
"loss": 0.4207,
"step": 1356
},
{
"epoch": 0.63,
"learning_rate": 4.095405232929165e-06,
"loss": 0.4617,
"step": 1360
},
{
"epoch": 0.63,
"learning_rate": 4.092214422463306e-06,
"loss": 0.6092,
"step": 1364
},
{
"epoch": 0.63,
"learning_rate": 4.089023611997448e-06,
"loss": 0.4607,
"step": 1368
},
{
"epoch": 0.64,
"learning_rate": 4.085832801531589e-06,
"loss": 0.4239,
"step": 1372
},
{
"epoch": 0.64,
"learning_rate": 4.0826419910657315e-06,
"loss": 0.5438,
"step": 1376
},
{
"epoch": 0.64,
"learning_rate": 4.079451180599873e-06,
"loss": 0.5006,
"step": 1380
},
{
"epoch": 0.64,
"learning_rate": 4.0762603701340144e-06,
"loss": 0.6889,
"step": 1384
},
{
"epoch": 0.64,
"learning_rate": 4.073069559668156e-06,
"loss": 0.5742,
"step": 1388
},
{
"epoch": 0.65,
"learning_rate": 4.069878749202298e-06,
"loss": 0.8366,
"step": 1392
},
{
"epoch": 0.65,
"learning_rate": 4.06668793873644e-06,
"loss": 0.5182,
"step": 1396
},
{
"epoch": 0.65,
"learning_rate": 4.063497128270581e-06,
"loss": 0.4807,
"step": 1400
},
{
"epoch": 0.65,
"learning_rate": 4.060306317804723e-06,
"loss": 0.3995,
"step": 1404
},
{
"epoch": 0.65,
"learning_rate": 4.057115507338865e-06,
"loss": 0.5958,
"step": 1408
},
{
"epoch": 0.65,
"learning_rate": 4.0539246968730064e-06,
"loss": 0.4855,
"step": 1412
},
{
"epoch": 0.66,
"learning_rate": 4.050733886407148e-06,
"loss": 0.5908,
"step": 1416
},
{
"epoch": 0.66,
"learning_rate": 4.047543075941289e-06,
"loss": 0.7867,
"step": 1420
},
{
"epoch": 0.66,
"learning_rate": 4.044352265475432e-06,
"loss": 0.7617,
"step": 1424
},
{
"epoch": 0.66,
"learning_rate": 4.041161455009573e-06,
"loss": 0.4752,
"step": 1428
},
{
"epoch": 0.66,
"learning_rate": 4.037970644543715e-06,
"loss": 0.4732,
"step": 1432
},
{
"epoch": 0.67,
"learning_rate": 4.034779834077856e-06,
"loss": 0.635,
"step": 1436
},
{
"epoch": 0.67,
"learning_rate": 4.0315890236119976e-06,
"loss": 0.4924,
"step": 1440
},
{
"epoch": 0.67,
"learning_rate": 4.02839821314614e-06,
"loss": 0.4416,
"step": 1444
},
{
"epoch": 0.67,
"learning_rate": 4.025207402680281e-06,
"loss": 0.4448,
"step": 1448
},
{
"epoch": 0.67,
"learning_rate": 4.022016592214423e-06,
"loss": 0.7631,
"step": 1452
},
{
"epoch": 0.68,
"learning_rate": 4.018825781748564e-06,
"loss": 0.5035,
"step": 1456
},
{
"epoch": 0.68,
"learning_rate": 4.015634971282707e-06,
"loss": 0.3779,
"step": 1460
},
{
"epoch": 0.68,
"learning_rate": 4.012444160816848e-06,
"loss": 0.4924,
"step": 1464
},
{
"epoch": 0.68,
"learning_rate": 4.0092533503509896e-06,
"loss": 0.3932,
"step": 1468
},
{
"epoch": 0.68,
"learning_rate": 4.006062539885131e-06,
"loss": 0.6974,
"step": 1472
},
{
"epoch": 0.68,
"learning_rate": 4.0028717294192725e-06,
"loss": 0.7347,
"step": 1476
},
{
"epoch": 0.69,
"learning_rate": 3.999680918953415e-06,
"loss": 0.5564,
"step": 1480
},
{
"epoch": 0.69,
"learning_rate": 3.996490108487556e-06,
"loss": 0.4424,
"step": 1484
},
{
"epoch": 0.69,
"learning_rate": 3.993299298021698e-06,
"loss": 0.5323,
"step": 1488
},
{
"epoch": 0.69,
"learning_rate": 3.990108487555839e-06,
"loss": 0.6138,
"step": 1492
},
{
"epoch": 0.69,
"learning_rate": 3.9869176770899815e-06,
"loss": 0.5156,
"step": 1496
},
{
"epoch": 0.7,
"learning_rate": 3.983726866624123e-06,
"loss": 0.282,
"step": 1500
},
{
"epoch": 0.7,
"learning_rate": 3.9805360561582645e-06,
"loss": 0.5392,
"step": 1504
},
{
"epoch": 0.7,
"learning_rate": 3.977345245692406e-06,
"loss": 0.5721,
"step": 1508
},
{
"epoch": 0.7,
"learning_rate": 3.974154435226547e-06,
"loss": 0.6967,
"step": 1512
},
{
"epoch": 0.7,
"learning_rate": 3.97096362476069e-06,
"loss": 0.5348,
"step": 1516
},
{
"epoch": 0.71,
"learning_rate": 3.967772814294831e-06,
"loss": 0.6884,
"step": 1520
},
{
"epoch": 0.71,
"learning_rate": 3.964582003828973e-06,
"loss": 0.5065,
"step": 1524
},
{
"epoch": 0.71,
"learning_rate": 3.961391193363114e-06,
"loss": 0.4505,
"step": 1528
},
{
"epoch": 0.71,
"learning_rate": 3.9582003828972565e-06,
"loss": 0.6881,
"step": 1532
},
{
"epoch": 0.71,
"learning_rate": 3.955009572431398e-06,
"loss": 0.5952,
"step": 1536
},
{
"epoch": 0.71,
"learning_rate": 3.951818761965539e-06,
"loss": 0.5656,
"step": 1540
},
{
"epoch": 0.72,
"learning_rate": 3.948627951499681e-06,
"loss": 0.6437,
"step": 1544
},
{
"epoch": 0.72,
"learning_rate": 3.945437141033822e-06,
"loss": 0.5179,
"step": 1548
},
{
"epoch": 0.72,
"learning_rate": 3.942246330567965e-06,
"loss": 0.5278,
"step": 1552
},
{
"epoch": 0.72,
"learning_rate": 3.939055520102106e-06,
"loss": 0.6951,
"step": 1556
},
{
"epoch": 0.72,
"learning_rate": 3.935864709636248e-06,
"loss": 0.5468,
"step": 1560
},
{
"epoch": 0.73,
"learning_rate": 3.932673899170389e-06,
"loss": 0.5132,
"step": 1564
},
{
"epoch": 0.73,
"learning_rate": 3.929483088704531e-06,
"loss": 0.6297,
"step": 1568
},
{
"epoch": 0.73,
"learning_rate": 3.926292278238673e-06,
"loss": 0.5472,
"step": 1572
},
{
"epoch": 0.73,
"learning_rate": 3.923101467772814e-06,
"loss": 0.6623,
"step": 1576
},
{
"epoch": 0.73,
"learning_rate": 3.919910657306956e-06,
"loss": 0.6216,
"step": 1580
},
{
"epoch": 0.73,
"learning_rate": 3.916719846841098e-06,
"loss": 0.5332,
"step": 1584
},
{
"epoch": 0.74,
"learning_rate": 3.91352903637524e-06,
"loss": 0.4792,
"step": 1588
},
{
"epoch": 0.74,
"learning_rate": 3.910338225909381e-06,
"loss": 0.4573,
"step": 1592
},
{
"epoch": 0.74,
"learning_rate": 3.9071474154435225e-06,
"loss": 0.5135,
"step": 1596
},
{
"epoch": 0.74,
"learning_rate": 3.903956604977665e-06,
"loss": 0.7619,
"step": 1600
},
{
"epoch": 0.74,
"learning_rate": 3.900765794511806e-06,
"loss": 0.6681,
"step": 1604
},
{
"epoch": 0.75,
"learning_rate": 3.897574984045948e-06,
"loss": 0.7789,
"step": 1608
},
{
"epoch": 0.75,
"learning_rate": 3.89438417358009e-06,
"loss": 0.6078,
"step": 1612
},
{
"epoch": 0.75,
"learning_rate": 3.8911933631142316e-06,
"loss": 0.4812,
"step": 1616
},
{
"epoch": 0.75,
"learning_rate": 3.888002552648373e-06,
"loss": 0.5893,
"step": 1620
},
{
"epoch": 0.75,
"learning_rate": 3.8848117421825145e-06,
"loss": 0.4775,
"step": 1624
},
{
"epoch": 0.76,
"learning_rate": 3.881620931716657e-06,
"loss": 0.5012,
"step": 1628
},
{
"epoch": 0.76,
"learning_rate": 3.878430121250798e-06,
"loss": 0.4752,
"step": 1632
},
{
"epoch": 0.76,
"learning_rate": 3.87523931078494e-06,
"loss": 0.4365,
"step": 1636
},
{
"epoch": 0.76,
"learning_rate": 3.872048500319081e-06,
"loss": 0.6722,
"step": 1640
},
{
"epoch": 0.76,
"learning_rate": 3.8688576898532236e-06,
"loss": 0.6083,
"step": 1644
},
{
"epoch": 0.76,
"learning_rate": 3.865666879387365e-06,
"loss": 0.4533,
"step": 1648
},
{
"epoch": 0.77,
"learning_rate": 3.8624760689215065e-06,
"loss": 0.5879,
"step": 1652
},
{
"epoch": 0.77,
"learning_rate": 3.859285258455648e-06,
"loss": 0.6564,
"step": 1656
},
{
"epoch": 0.77,
"learning_rate": 3.85609444798979e-06,
"loss": 0.5475,
"step": 1660
},
{
"epoch": 0.77,
"learning_rate": 3.852903637523932e-06,
"loss": 0.5018,
"step": 1664
},
{
"epoch": 0.77,
"learning_rate": 3.849712827058073e-06,
"loss": 0.4544,
"step": 1668
},
{
"epoch": 0.78,
"learning_rate": 3.846522016592215e-06,
"loss": 0.6603,
"step": 1672
},
{
"epoch": 0.78,
"learning_rate": 3.843331206126357e-06,
"loss": 0.6887,
"step": 1676
},
{
"epoch": 0.78,
"learning_rate": 3.8401403956604985e-06,
"loss": 0.7819,
"step": 1680
},
{
"epoch": 0.78,
"learning_rate": 3.83694958519464e-06,
"loss": 0.5052,
"step": 1684
},
{
"epoch": 0.78,
"learning_rate": 3.833758774728781e-06,
"loss": 0.6689,
"step": 1688
},
{
"epoch": 0.78,
"learning_rate": 3.830567964262923e-06,
"loss": 0.5564,
"step": 1692
},
{
"epoch": 0.79,
"learning_rate": 3.827377153797065e-06,
"loss": 0.3658,
"step": 1696
},
{
"epoch": 0.79,
"learning_rate": 3.824186343331207e-06,
"loss": 0.6376,
"step": 1700
},
{
"epoch": 0.79,
"learning_rate": 3.820995532865348e-06,
"loss": 0.5681,
"step": 1704
},
{
"epoch": 0.79,
"learning_rate": 3.81780472239949e-06,
"loss": 0.5974,
"step": 1708
},
{
"epoch": 0.79,
"learning_rate": 3.814613911933632e-06,
"loss": 0.5623,
"step": 1712
},
{
"epoch": 0.8,
"learning_rate": 3.8114231014677734e-06,
"loss": 0.6437,
"step": 1716
},
{
"epoch": 0.8,
"learning_rate": 3.808232291001915e-06,
"loss": 0.6442,
"step": 1720
},
{
"epoch": 0.8,
"learning_rate": 3.8050414805360563e-06,
"loss": 0.4729,
"step": 1724
},
{
"epoch": 0.8,
"learning_rate": 3.801850670070198e-06,
"loss": 0.3677,
"step": 1728
},
{
"epoch": 0.8,
"learning_rate": 3.79865985960434e-06,
"loss": 0.4295,
"step": 1732
},
{
"epoch": 0.81,
"learning_rate": 3.7954690491384816e-06,
"loss": 0.6049,
"step": 1736
},
{
"epoch": 0.81,
"learning_rate": 3.792278238672623e-06,
"loss": 0.6363,
"step": 1740
},
{
"epoch": 0.81,
"learning_rate": 3.7890874282067645e-06,
"loss": 0.5939,
"step": 1744
},
{
"epoch": 0.81,
"learning_rate": 3.785896617740907e-06,
"loss": 0.5011,
"step": 1748
},
{
"epoch": 0.81,
"learning_rate": 3.7827058072750483e-06,
"loss": 0.5177,
"step": 1752
},
{
"epoch": 0.81,
"learning_rate": 3.77951499680919e-06,
"loss": 0.7722,
"step": 1756
},
{
"epoch": 0.82,
"learning_rate": 3.7763241863433313e-06,
"loss": 0.5204,
"step": 1760
},
{
"epoch": 0.82,
"learning_rate": 3.773133375877473e-06,
"loss": 0.455,
"step": 1764
},
{
"epoch": 0.82,
"learning_rate": 3.769942565411615e-06,
"loss": 0.5397,
"step": 1768
},
{
"epoch": 0.82,
"learning_rate": 3.7667517549457565e-06,
"loss": 0.5528,
"step": 1772
},
{
"epoch": 0.82,
"learning_rate": 3.763560944479898e-06,
"loss": 0.5286,
"step": 1776
},
{
"epoch": 0.83,
"learning_rate": 3.76037013401404e-06,
"loss": 0.5475,
"step": 1780
},
{
"epoch": 0.83,
"learning_rate": 3.7571793235481818e-06,
"loss": 0.3887,
"step": 1784
},
{
"epoch": 0.83,
"learning_rate": 3.7539885130823233e-06,
"loss": 0.6288,
"step": 1788
},
{
"epoch": 0.83,
"learning_rate": 3.7507977026164647e-06,
"loss": 0.5563,
"step": 1792
},
{
"epoch": 0.83,
"learning_rate": 3.7476068921506066e-06,
"loss": 0.6103,
"step": 1796
},
{
"epoch": 0.83,
"learning_rate": 3.744416081684748e-06,
"loss": 0.4141,
"step": 1800
},
{
"epoch": 0.84,
"learning_rate": 3.74122527121889e-06,
"loss": 0.4075,
"step": 1804
},
{
"epoch": 0.84,
"learning_rate": 3.738034460753032e-06,
"loss": 0.3594,
"step": 1808
},
{
"epoch": 0.84,
"learning_rate": 3.7348436502871733e-06,
"loss": 0.5157,
"step": 1812
},
{
"epoch": 0.84,
"learning_rate": 3.731652839821315e-06,
"loss": 0.4918,
"step": 1816
},
{
"epoch": 0.84,
"learning_rate": 3.7284620293554563e-06,
"loss": 0.4456,
"step": 1820
},
{
"epoch": 0.85,
"learning_rate": 3.7252712188895986e-06,
"loss": 0.7768,
"step": 1824
},
{
"epoch": 0.85,
"learning_rate": 3.72208040842374e-06,
"loss": 0.7511,
"step": 1828
},
{
"epoch": 0.85,
"learning_rate": 3.7188895979578815e-06,
"loss": 0.4604,
"step": 1832
},
{
"epoch": 0.85,
"learning_rate": 3.715698787492023e-06,
"loss": 0.6048,
"step": 1836
},
{
"epoch": 0.85,
"learning_rate": 3.7125079770261653e-06,
"loss": 0.6261,
"step": 1840
},
{
"epoch": 0.86,
"learning_rate": 3.709317166560307e-06,
"loss": 0.7588,
"step": 1844
},
{
"epoch": 0.86,
"learning_rate": 3.7061263560944483e-06,
"loss": 0.6608,
"step": 1848
},
{
"epoch": 0.86,
"learning_rate": 3.7029355456285897e-06,
"loss": 0.5453,
"step": 1852
},
{
"epoch": 0.86,
"learning_rate": 3.6997447351627312e-06,
"loss": 0.4361,
"step": 1856
},
{
"epoch": 0.86,
"learning_rate": 3.6965539246968735e-06,
"loss": 0.5557,
"step": 1860
},
{
"epoch": 0.86,
"learning_rate": 3.693363114231015e-06,
"loss": 0.6371,
"step": 1864
},
{
"epoch": 0.87,
"learning_rate": 3.6901723037651565e-06,
"loss": 0.4953,
"step": 1868
},
{
"epoch": 0.87,
"learning_rate": 3.686981493299298e-06,
"loss": 0.4157,
"step": 1872
},
{
"epoch": 0.87,
"learning_rate": 3.6837906828334403e-06,
"loss": 0.5469,
"step": 1876
},
{
"epoch": 0.87,
"learning_rate": 3.6805998723675817e-06,
"loss": 0.4933,
"step": 1880
},
{
"epoch": 0.87,
"learning_rate": 3.677409061901723e-06,
"loss": 0.4994,
"step": 1884
},
{
"epoch": 0.88,
"learning_rate": 3.6742182514358647e-06,
"loss": 0.3726,
"step": 1888
},
{
"epoch": 0.88,
"learning_rate": 3.6710274409700066e-06,
"loss": 0.5413,
"step": 1892
},
{
"epoch": 0.88,
"learning_rate": 3.6678366305041485e-06,
"loss": 0.574,
"step": 1896
},
{
"epoch": 0.88,
"learning_rate": 3.66464582003829e-06,
"loss": 0.2569,
"step": 1900
},
{
"epoch": 0.88,
"learning_rate": 3.6614550095724314e-06,
"loss": 0.5012,
"step": 1904
},
{
"epoch": 0.88,
"learning_rate": 3.6582641991065733e-06,
"loss": 0.586,
"step": 1908
},
{
"epoch": 0.89,
"learning_rate": 3.655073388640715e-06,
"loss": 0.4588,
"step": 1912
},
{
"epoch": 0.89,
"learning_rate": 3.6518825781748567e-06,
"loss": 0.3745,
"step": 1916
},
{
"epoch": 0.89,
"learning_rate": 3.6486917677089985e-06,
"loss": 0.5444,
"step": 1920
},
{
"epoch": 0.89,
"learning_rate": 3.64550095724314e-06,
"loss": 0.5545,
"step": 1924
},
{
"epoch": 0.89,
"learning_rate": 3.6423101467772815e-06,
"loss": 0.6965,
"step": 1928
},
{
"epoch": 0.9,
"learning_rate": 3.6391193363114234e-06,
"loss": 0.4442,
"step": 1932
},
{
"epoch": 0.9,
"learning_rate": 3.6359285258455653e-06,
"loss": 0.4866,
"step": 1936
},
{
"epoch": 0.9,
"learning_rate": 3.6327377153797067e-06,
"loss": 0.5114,
"step": 1940
},
{
"epoch": 0.9,
"learning_rate": 3.6295469049138482e-06,
"loss": 0.5922,
"step": 1944
},
{
"epoch": 0.9,
"learning_rate": 3.62635609444799e-06,
"loss": 0.4787,
"step": 1948
},
{
"epoch": 0.91,
"learning_rate": 3.623165283982132e-06,
"loss": 0.6709,
"step": 1952
},
{
"epoch": 0.91,
"learning_rate": 3.6199744735162735e-06,
"loss": 0.5078,
"step": 1956
},
{
"epoch": 0.91,
"learning_rate": 3.616783663050415e-06,
"loss": 0.5999,
"step": 1960
},
{
"epoch": 0.91,
"learning_rate": 3.6135928525845564e-06,
"loss": 0.5051,
"step": 1964
},
{
"epoch": 0.91,
"learning_rate": 3.6111997447351634e-06,
"loss": 0.4373,
"step": 1968
},
{
"epoch": 0.91,
"learning_rate": 3.608008934269305e-06,
"loss": 0.7497,
"step": 1972
},
{
"epoch": 0.92,
"learning_rate": 3.6048181238034463e-06,
"loss": 0.458,
"step": 1976
},
{
"epoch": 0.92,
"learning_rate": 3.6016273133375878e-06,
"loss": 0.3981,
"step": 1980
},
{
"epoch": 0.92,
"learning_rate": 3.59843650287173e-06,
"loss": 0.4995,
"step": 1984
},
{
"epoch": 0.92,
"learning_rate": 3.5952456924058716e-06,
"loss": 0.493,
"step": 1988
},
{
"epoch": 0.92,
"learning_rate": 3.592054881940013e-06,
"loss": 0.462,
"step": 1992
},
{
"epoch": 0.93,
"learning_rate": 3.5888640714741545e-06,
"loss": 0.5239,
"step": 1996
},
{
"epoch": 0.93,
"learning_rate": 3.585673261008296e-06,
"loss": 0.4376,
"step": 2000
},
{
"epoch": 0.93,
"learning_rate": 3.5824824505424383e-06,
"loss": 0.6067,
"step": 2004
},
{
"epoch": 0.93,
"learning_rate": 3.5792916400765798e-06,
"loss": 0.4091,
"step": 2008
},
{
"epoch": 0.93,
"learning_rate": 3.5761008296107212e-06,
"loss": 0.5261,
"step": 2012
},
{
"epoch": 0.94,
"learning_rate": 3.5729100191448627e-06,
"loss": 0.5408,
"step": 2016
},
{
"epoch": 0.94,
"learning_rate": 3.569719208679005e-06,
"loss": 0.5867,
"step": 2020
},
{
"epoch": 0.94,
"learning_rate": 3.5665283982131465e-06,
"loss": 0.636,
"step": 2024
},
{
"epoch": 0.94,
"learning_rate": 3.563337587747288e-06,
"loss": 0.4329,
"step": 2028
},
{
"epoch": 0.94,
"learning_rate": 3.56014677728143e-06,
"loss": 0.7026,
"step": 2032
},
{
"epoch": 0.94,
"learning_rate": 3.5569559668155713e-06,
"loss": 0.5245,
"step": 2036
},
{
"epoch": 0.95,
"learning_rate": 3.553765156349713e-06,
"loss": 0.4929,
"step": 2040
},
{
"epoch": 0.95,
"learning_rate": 3.5505743458838547e-06,
"loss": 0.4876,
"step": 2044
},
{
"epoch": 0.95,
"learning_rate": 3.5473835354179966e-06,
"loss": 0.45,
"step": 2048
},
{
"epoch": 0.95,
"learning_rate": 3.544192724952138e-06,
"loss": 0.5068,
"step": 2052
},
{
"epoch": 0.95,
"learning_rate": 3.54100191448628e-06,
"loss": 0.5647,
"step": 2056
},
{
"epoch": 0.96,
"learning_rate": 3.5378111040204214e-06,
"loss": 0.5048,
"step": 2060
},
{
"epoch": 0.96,
"learning_rate": 3.5346202935545633e-06,
"loss": 0.457,
"step": 2064
},
{
"epoch": 0.96,
"learning_rate": 3.5314294830887048e-06,
"loss": 0.4089,
"step": 2068
},
{
"epoch": 0.96,
"learning_rate": 3.5282386726228462e-06,
"loss": 0.3521,
"step": 2072
},
{
"epoch": 0.96,
"learning_rate": 3.5250478621569886e-06,
"loss": 0.3477,
"step": 2076
},
{
"epoch": 0.96,
"learning_rate": 3.52185705169113e-06,
"loss": 0.6625,
"step": 2080
},
{
"epoch": 0.97,
"learning_rate": 3.5186662412252715e-06,
"loss": 0.3829,
"step": 2084
},
{
"epoch": 0.97,
"learning_rate": 3.515475430759413e-06,
"loss": 0.4733,
"step": 2088
},
{
"epoch": 0.97,
"learning_rate": 3.5122846202935553e-06,
"loss": 0.4024,
"step": 2092
},
{
"epoch": 0.97,
"learning_rate": 3.5090938098276968e-06,
"loss": 0.5733,
"step": 2096
},
{
"epoch": 0.97,
"learning_rate": 3.5059029993618382e-06,
"loss": 0.5788,
"step": 2100
},
{
"epoch": 0.98,
"learning_rate": 3.5027121888959797e-06,
"loss": 0.4806,
"step": 2104
},
{
"epoch": 0.98,
"learning_rate": 3.499521378430121e-06,
"loss": 0.5091,
"step": 2108
},
{
"epoch": 0.98,
"learning_rate": 3.4963305679642635e-06,
"loss": 0.6465,
"step": 2112
},
{
"epoch": 0.98,
"learning_rate": 3.493139757498405e-06,
"loss": 0.4821,
"step": 2116
},
{
"epoch": 0.98,
"learning_rate": 3.4899489470325464e-06,
"loss": 0.3563,
"step": 2120
},
{
"epoch": 0.99,
"learning_rate": 3.486758136566688e-06,
"loss": 0.7174,
"step": 2124
},
{
"epoch": 0.99,
"learning_rate": 3.4835673261008302e-06,
"loss": 0.3833,
"step": 2128
},
{
"epoch": 0.99,
"learning_rate": 3.4803765156349717e-06,
"loss": 0.6688,
"step": 2132
},
{
"epoch": 0.99,
"learning_rate": 3.477185705169113e-06,
"loss": 0.5733,
"step": 2136
},
{
"epoch": 0.99,
"learning_rate": 3.4739948947032546e-06,
"loss": 0.5743,
"step": 2140
},
{
"epoch": 0.99,
"learning_rate": 3.4708040842373965e-06,
"loss": 0.5219,
"step": 2144
},
{
"epoch": 1.0,
"learning_rate": 3.4676132737715384e-06,
"loss": 0.5964,
"step": 2148
},
{
"epoch": 1.0,
"learning_rate": 3.46442246330568e-06,
"loss": 0.5981,
"step": 2152
},
{
"epoch": 1.0,
"learning_rate": 3.4612316528398214e-06,
"loss": 0.4068,
"step": 2156
},
{
"epoch": 1.0,
"learning_rate": 3.4580408423739632e-06,
"loss": 0.3966,
"step": 2160
},
{
"epoch": 1.0,
"learning_rate": 3.454850031908105e-06,
"loss": 0.2291,
"step": 2164
},
{
"epoch": 1.01,
"learning_rate": 3.4516592214422466e-06,
"loss": 0.4695,
"step": 2168
},
{
"epoch": 1.01,
"learning_rate": 3.4484684109763885e-06,
"loss": 0.5594,
"step": 2172
},
{
"epoch": 1.01,
"learning_rate": 3.44527760051053e-06,
"loss": 0.5603,
"step": 2176
},
{
"epoch": 1.01,
"learning_rate": 3.4420867900446714e-06,
"loss": 0.4934,
"step": 2180
},
{
"epoch": 1.01,
"learning_rate": 3.4388959795788133e-06,
"loss": 0.6316,
"step": 2184
},
{
"epoch": 1.01,
"learning_rate": 3.4357051691129552e-06,
"loss": 0.3424,
"step": 2188
},
{
"epoch": 1.02,
"learning_rate": 3.4325143586470967e-06,
"loss": 0.566,
"step": 2192
},
{
"epoch": 1.02,
"learning_rate": 3.429323548181238e-06,
"loss": 0.3565,
"step": 2196
},
{
"epoch": 1.02,
"learning_rate": 3.42613273771538e-06,
"loss": 0.5191,
"step": 2200
},
{
"epoch": 1.02,
"learning_rate": 3.422941927249522e-06,
"loss": 0.3848,
"step": 2204
},
{
"epoch": 1.02,
"learning_rate": 3.4197511167836634e-06,
"loss": 0.6962,
"step": 2208
},
{
"epoch": 1.03,
"learning_rate": 3.416560306317805e-06,
"loss": 0.3646,
"step": 2212
},
{
"epoch": 1.03,
"learning_rate": 3.4133694958519464e-06,
"loss": 0.3756,
"step": 2216
},
{
"epoch": 1.03,
"learning_rate": 3.4101786853860887e-06,
"loss": 0.2853,
"step": 2220
},
{
"epoch": 1.03,
"learning_rate": 3.40698787492023e-06,
"loss": 0.2925,
"step": 2224
},
{
"epoch": 1.03,
"learning_rate": 3.4037970644543716e-06,
"loss": 0.3838,
"step": 2228
},
{
"epoch": 1.04,
"learning_rate": 3.400606253988513e-06,
"loss": 0.4479,
"step": 2232
},
{
"epoch": 1.04,
"learning_rate": 3.3974154435226554e-06,
"loss": 0.5207,
"step": 2236
},
{
"epoch": 1.04,
"learning_rate": 3.394224633056797e-06,
"loss": 0.3813,
"step": 2240
},
{
"epoch": 1.04,
"learning_rate": 3.3910338225909384e-06,
"loss": 0.4028,
"step": 2244
},
{
"epoch": 1.04,
"learning_rate": 3.38784301212508e-06,
"loss": 0.3406,
"step": 2248
},
{
"epoch": 1.04,
"learning_rate": 3.3846522016592213e-06,
"loss": 0.4495,
"step": 2252
},
{
"epoch": 1.05,
"learning_rate": 3.3814613911933636e-06,
"loss": 0.5411,
"step": 2256
},
{
"epoch": 1.05,
"learning_rate": 3.378270580727505e-06,
"loss": 0.3533,
"step": 2260
},
{
"epoch": 1.05,
"learning_rate": 3.3750797702616465e-06,
"loss": 0.5577,
"step": 2264
},
{
"epoch": 1.05,
"learning_rate": 3.371888959795788e-06,
"loss": 0.4198,
"step": 2268
},
{
"epoch": 1.05,
"learning_rate": 3.3686981493299303e-06,
"loss": 0.2956,
"step": 2272
},
{
"epoch": 1.06,
"learning_rate": 3.365507338864072e-06,
"loss": 0.5714,
"step": 2276
},
{
"epoch": 1.06,
"learning_rate": 3.3623165283982133e-06,
"loss": 0.3393,
"step": 2280
},
{
"epoch": 1.06,
"learning_rate": 3.359125717932355e-06,
"loss": 0.3448,
"step": 2284
},
{
"epoch": 1.06,
"learning_rate": 3.3559349074664966e-06,
"loss": 0.4956,
"step": 2288
},
{
"epoch": 1.06,
"learning_rate": 3.3527440970006385e-06,
"loss": 0.4609,
"step": 2292
},
{
"epoch": 1.06,
"learning_rate": 3.34955328653478e-06,
"loss": 0.4499,
"step": 2296
},
{
"epoch": 1.07,
"learning_rate": 3.346362476068922e-06,
"loss": 0.3638,
"step": 2300
},
{
"epoch": 1.07,
"learning_rate": 3.3431716656030634e-06,
"loss": 0.6062,
"step": 2304
},
{
"epoch": 1.07,
"learning_rate": 3.3399808551372053e-06,
"loss": 0.319,
"step": 2308
},
{
"epoch": 1.07,
"learning_rate": 3.3367900446713467e-06,
"loss": 0.3106,
"step": 2312
},
{
"epoch": 1.07,
"learning_rate": 3.3335992342054886e-06,
"loss": 0.6715,
"step": 2316
},
{
"epoch": 1.08,
"learning_rate": 3.33040842373963e-06,
"loss": 0.4007,
"step": 2320
},
{
"epoch": 1.08,
"learning_rate": 3.3272176132737716e-06,
"loss": 0.5854,
"step": 2324
},
{
"epoch": 1.08,
"learning_rate": 3.324026802807914e-06,
"loss": 0.4384,
"step": 2328
},
{
"epoch": 1.08,
"learning_rate": 3.3208359923420554e-06,
"loss": 0.5186,
"step": 2332
},
{
"epoch": 1.08,
"learning_rate": 3.317645181876197e-06,
"loss": 0.2793,
"step": 2336
},
{
"epoch": 1.09,
"learning_rate": 3.3144543714103383e-06,
"loss": 0.3945,
"step": 2340
},
{
"epoch": 1.09,
"learning_rate": 3.3112635609444806e-06,
"loss": 0.4488,
"step": 2344
},
{
"epoch": 1.09,
"learning_rate": 3.308072750478622e-06,
"loss": 0.2692,
"step": 2348
},
{
"epoch": 1.09,
"learning_rate": 3.3048819400127635e-06,
"loss": 0.4689,
"step": 2352
},
{
"epoch": 1.09,
"learning_rate": 3.301691129546905e-06,
"loss": 0.2162,
"step": 2356
},
{
"epoch": 1.09,
"learning_rate": 3.2985003190810465e-06,
"loss": 0.3339,
"step": 2360
},
{
"epoch": 1.1,
"learning_rate": 3.295309508615189e-06,
"loss": 0.5855,
"step": 2364
},
{
"epoch": 1.1,
"learning_rate": 3.2921186981493303e-06,
"loss": 0.4823,
"step": 2368
},
{
"epoch": 1.1,
"learning_rate": 3.2889278876834717e-06,
"loss": 0.3587,
"step": 2372
},
{
"epoch": 1.1,
"learning_rate": 3.2857370772176132e-06,
"loss": 0.3903,
"step": 2376
},
{
"epoch": 1.1,
"learning_rate": 3.2825462667517555e-06,
"loss": 0.5669,
"step": 2380
},
{
"epoch": 1.11,
"learning_rate": 3.279355456285897e-06,
"loss": 0.413,
"step": 2384
},
{
"epoch": 1.11,
"learning_rate": 3.2761646458200385e-06,
"loss": 0.3735,
"step": 2388
},
{
"epoch": 1.11,
"learning_rate": 3.27297383535418e-06,
"loss": 0.5467,
"step": 2392
},
{
"epoch": 1.11,
"learning_rate": 3.269783024888322e-06,
"loss": 0.3738,
"step": 2396
},
{
"epoch": 1.11,
"learning_rate": 3.2665922144224637e-06,
"loss": 0.4619,
"step": 2400
},
{
"epoch": 1.12,
"learning_rate": 3.263401403956605e-06,
"loss": 0.3739,
"step": 2404
},
{
"epoch": 1.12,
"learning_rate": 3.2602105934907467e-06,
"loss": 0.4065,
"step": 2408
},
{
"epoch": 1.12,
"learning_rate": 3.2570197830248886e-06,
"loss": 0.3406,
"step": 2412
},
{
"epoch": 1.12,
"learning_rate": 3.2538289725590305e-06,
"loss": 0.4554,
"step": 2416
},
{
"epoch": 1.12,
"learning_rate": 3.250638162093172e-06,
"loss": 0.799,
"step": 2420
},
{
"epoch": 1.12,
"learning_rate": 3.247447351627314e-06,
"loss": 0.4552,
"step": 2424
},
{
"epoch": 1.13,
"learning_rate": 3.2442565411614553e-06,
"loss": 0.3708,
"step": 2428
},
{
"epoch": 1.13,
"learning_rate": 3.2410657306955968e-06,
"loss": 0.3309,
"step": 2432
},
{
"epoch": 1.13,
"learning_rate": 3.2378749202297387e-06,
"loss": 0.4065,
"step": 2436
},
{
"epoch": 1.13,
"learning_rate": 3.2346841097638806e-06,
"loss": 0.627,
"step": 2440
},
{
"epoch": 1.13,
"learning_rate": 3.231493299298022e-06,
"loss": 0.3551,
"step": 2444
},
{
"epoch": 1.14,
"learning_rate": 3.2283024888321635e-06,
"loss": 0.2465,
"step": 2448
},
{
"epoch": 1.14,
"learning_rate": 3.2251116783663054e-06,
"loss": 0.5104,
"step": 2452
},
{
"epoch": 1.14,
"learning_rate": 3.2219208679004473e-06,
"loss": 0.5923,
"step": 2456
},
{
"epoch": 1.14,
"learning_rate": 3.2187300574345887e-06,
"loss": 0.3455,
"step": 2460
},
{
"epoch": 1.14,
"learning_rate": 3.2155392469687302e-06,
"loss": 0.3767,
"step": 2464
},
{
"epoch": 1.14,
"learning_rate": 3.2123484365028717e-06,
"loss": 0.4728,
"step": 2468
},
{
"epoch": 1.15,
"learning_rate": 3.209157626037014e-06,
"loss": 0.4427,
"step": 2472
},
{
"epoch": 1.15,
"learning_rate": 3.2059668155711555e-06,
"loss": 0.3805,
"step": 2476
},
{
"epoch": 1.15,
"learning_rate": 3.202776005105297e-06,
"loss": 0.2417,
"step": 2480
},
{
"epoch": 1.15,
"learning_rate": 3.1995851946394384e-06,
"loss": 0.4459,
"step": 2484
},
{
"epoch": 1.15,
"learning_rate": 3.1963943841735807e-06,
"loss": 0.5951,
"step": 2488
},
{
"epoch": 1.16,
"learning_rate": 3.193203573707722e-06,
"loss": 0.4512,
"step": 2492
},
{
"epoch": 1.16,
"learning_rate": 3.1900127632418637e-06,
"loss": 0.4038,
"step": 2496
},
{
"epoch": 1.16,
"learning_rate": 3.186821952776005e-06,
"loss": 0.5716,
"step": 2500
},
{
"epoch": 1.16,
"learning_rate": 3.1836311423101466e-06,
"loss": 0.3122,
"step": 2504
},
{
"epoch": 1.16,
"learning_rate": 3.180440331844289e-06,
"loss": 0.6523,
"step": 2508
},
{
"epoch": 1.17,
"learning_rate": 3.1772495213784304e-06,
"loss": 0.4129,
"step": 2512
},
{
"epoch": 1.17,
"learning_rate": 3.174058710912572e-06,
"loss": 0.7674,
"step": 2516
},
{
"epoch": 1.17,
"learning_rate": 3.1708679004467138e-06,
"loss": 0.5227,
"step": 2520
},
{
"epoch": 1.17,
"learning_rate": 3.1676770899808557e-06,
"loss": 0.457,
"step": 2524
},
{
"epoch": 1.17,
"learning_rate": 3.164486279514997e-06,
"loss": 0.3279,
"step": 2528
},
{
"epoch": 1.17,
"learning_rate": 3.1612954690491386e-06,
"loss": 0.4809,
"step": 2532
},
{
"epoch": 1.18,
"learning_rate": 3.1581046585832805e-06,
"loss": 0.3513,
"step": 2536
},
{
"epoch": 1.18,
"learning_rate": 3.154913848117422e-06,
"loss": 0.5097,
"step": 2540
},
{
"epoch": 1.18,
"learning_rate": 3.151723037651564e-06,
"loss": 0.4727,
"step": 2544
},
{
"epoch": 1.18,
"learning_rate": 3.1485322271857053e-06,
"loss": 0.3848,
"step": 2548
},
{
"epoch": 1.18,
"learning_rate": 3.1453414167198472e-06,
"loss": 0.49,
"step": 2552
},
{
"epoch": 1.19,
"learning_rate": 3.1421506062539887e-06,
"loss": 0.4166,
"step": 2556
},
{
"epoch": 1.19,
"learning_rate": 3.1389597957881306e-06,
"loss": 0.4397,
"step": 2560
},
{
"epoch": 1.19,
"learning_rate": 3.135768985322272e-06,
"loss": 0.4295,
"step": 2564
},
{
"epoch": 1.19,
"learning_rate": 3.132578174856414e-06,
"loss": 0.3807,
"step": 2568
},
{
"epoch": 1.19,
"learning_rate": 3.1293873643905554e-06,
"loss": 0.5155,
"step": 2572
},
{
"epoch": 1.19,
"learning_rate": 3.126196553924697e-06,
"loss": 0.4183,
"step": 2576
},
{
"epoch": 1.2,
"learning_rate": 3.123005743458839e-06,
"loss": 0.4173,
"step": 2580
},
{
"epoch": 1.2,
"learning_rate": 3.1198149329929807e-06,
"loss": 0.5842,
"step": 2584
},
{
"epoch": 1.2,
"learning_rate": 3.116624122527122e-06,
"loss": 0.6673,
"step": 2588
},
{
"epoch": 1.2,
"learning_rate": 3.1134333120612636e-06,
"loss": 0.4492,
"step": 2592
},
{
"epoch": 1.2,
"learning_rate": 3.110242501595406e-06,
"loss": 0.4747,
"step": 2596
},
{
"epoch": 1.21,
"learning_rate": 3.1070516911295474e-06,
"loss": 0.5746,
"step": 2600
},
{
"epoch": 1.21,
"learning_rate": 3.103860880663689e-06,
"loss": 0.5708,
"step": 2604
},
{
"epoch": 1.21,
"learning_rate": 3.1006700701978303e-06,
"loss": 0.601,
"step": 2608
},
{
"epoch": 1.21,
"learning_rate": 3.097479259731972e-06,
"loss": 0.7205,
"step": 2612
},
{
"epoch": 1.21,
"learning_rate": 3.094288449266114e-06,
"loss": 0.4023,
"step": 2616
},
{
"epoch": 1.22,
"learning_rate": 3.0910976388002556e-06,
"loss": 0.5012,
"step": 2620
},
{
"epoch": 1.22,
"learning_rate": 3.087906828334397e-06,
"loss": 0.4779,
"step": 2624
},
{
"epoch": 1.22,
"learning_rate": 3.0847160178685385e-06,
"loss": 0.5062,
"step": 2628
},
{
"epoch": 1.22,
"learning_rate": 3.081525207402681e-06,
"loss": 0.2888,
"step": 2632
},
{
"epoch": 1.22,
"learning_rate": 3.0783343969368223e-06,
"loss": 0.4121,
"step": 2636
},
{
"epoch": 1.22,
"learning_rate": 3.075143586470964e-06,
"loss": 0.4721,
"step": 2640
},
{
"epoch": 1.23,
"learning_rate": 3.0719527760051053e-06,
"loss": 0.3422,
"step": 2644
},
{
"epoch": 1.23,
"learning_rate": 3.068761965539247e-06,
"loss": 0.5018,
"step": 2648
},
{
"epoch": 1.23,
"learning_rate": 3.065571155073389e-06,
"loss": 0.5165,
"step": 2652
},
{
"epoch": 1.23,
"learning_rate": 3.0623803446075305e-06,
"loss": 0.3574,
"step": 2656
},
{
"epoch": 1.23,
"learning_rate": 3.059189534141672e-06,
"loss": 0.579,
"step": 2660
},
{
"epoch": 1.24,
"learning_rate": 3.055998723675814e-06,
"loss": 0.4961,
"step": 2664
},
{
"epoch": 1.24,
"learning_rate": 3.0528079132099558e-06,
"loss": 0.3949,
"step": 2668
},
{
"epoch": 1.24,
"learning_rate": 3.0496171027440973e-06,
"loss": 0.3999,
"step": 2672
},
{
"epoch": 1.24,
"learning_rate": 3.046426292278239e-06,
"loss": 0.524,
"step": 2676
},
{
"epoch": 1.24,
"learning_rate": 3.0432354818123806e-06,
"loss": 0.3688,
"step": 2680
},
{
"epoch": 1.24,
"learning_rate": 3.040044671346522e-06,
"loss": 0.403,
"step": 2684
},
{
"epoch": 1.25,
"learning_rate": 3.036853860880664e-06,
"loss": 0.3388,
"step": 2688
},
{
"epoch": 1.25,
"learning_rate": 3.033663050414806e-06,
"loss": 0.4706,
"step": 2692
},
{
"epoch": 1.25,
"learning_rate": 3.0304722399489473e-06,
"loss": 0.6817,
"step": 2696
},
{
"epoch": 1.25,
"learning_rate": 3.027281429483089e-06,
"loss": 0.3896,
"step": 2700
},
{
"epoch": 1.25,
"learning_rate": 3.0240906190172307e-06,
"loss": 0.358,
"step": 2704
},
{
"epoch": 1.26,
"learning_rate": 3.0208998085513726e-06,
"loss": 0.3115,
"step": 2708
},
{
"epoch": 1.26,
"learning_rate": 3.017708998085514e-06,
"loss": 0.5322,
"step": 2712
},
{
"epoch": 1.26,
"learning_rate": 3.0145181876196555e-06,
"loss": 0.4613,
"step": 2716
},
{
"epoch": 1.26,
"learning_rate": 3.011327377153797e-06,
"loss": 0.4374,
"step": 2720
},
{
"epoch": 1.26,
"learning_rate": 3.0081365666879393e-06,
"loss": 0.4775,
"step": 2724
},
{
"epoch": 1.27,
"learning_rate": 3.004945756222081e-06,
"loss": 0.349,
"step": 2728
},
{
"epoch": 1.27,
"learning_rate": 3.0017549457562223e-06,
"loss": 0.5114,
"step": 2732
},
{
"epoch": 1.27,
"learning_rate": 2.9985641352903637e-06,
"loss": 0.3901,
"step": 2736
},
{
"epoch": 1.27,
"learning_rate": 2.995373324824506e-06,
"loss": 0.4756,
"step": 2740
},
{
"epoch": 1.27,
"learning_rate": 2.9921825143586475e-06,
"loss": 0.4669,
"step": 2744
},
{
"epoch": 1.27,
"learning_rate": 2.988991703892789e-06,
"loss": 0.5554,
"step": 2748
},
{
"epoch": 1.28,
"learning_rate": 2.9858008934269305e-06,
"loss": 0.3345,
"step": 2752
},
{
"epoch": 1.28,
"learning_rate": 2.982610082961072e-06,
"loss": 0.3653,
"step": 2756
},
{
"epoch": 1.28,
"learning_rate": 2.9794192724952143e-06,
"loss": 0.4543,
"step": 2760
},
{
"epoch": 1.28,
"learning_rate": 2.9762284620293557e-06,
"loss": 0.382,
"step": 2764
},
{
"epoch": 1.28,
"learning_rate": 2.973037651563497e-06,
"loss": 0.2821,
"step": 2768
},
{
"epoch": 1.29,
"learning_rate": 2.969846841097639e-06,
"loss": 0.4392,
"step": 2772
},
{
"epoch": 1.29,
"learning_rate": 2.966656030631781e-06,
"loss": 0.3785,
"step": 2776
},
{
"epoch": 1.29,
"learning_rate": 2.9634652201659224e-06,
"loss": 0.4799,
"step": 2780
},
{
"epoch": 1.29,
"learning_rate": 2.960274409700064e-06,
"loss": 0.4004,
"step": 2784
},
{
"epoch": 1.29,
"learning_rate": 2.957083599234206e-06,
"loss": 0.4598,
"step": 2788
},
{
"epoch": 1.29,
"learning_rate": 2.9538927887683473e-06,
"loss": 0.6889,
"step": 2792
},
{
"epoch": 1.3,
"learning_rate": 2.950701978302489e-06,
"loss": 0.3401,
"step": 2796
},
{
"epoch": 1.3,
"learning_rate": 2.9475111678366306e-06,
"loss": 0.5162,
"step": 2800
},
{
"epoch": 1.3,
"learning_rate": 2.9443203573707725e-06,
"loss": 0.3811,
"step": 2804
},
{
"epoch": 1.3,
"learning_rate": 2.941129546904914e-06,
"loss": 0.3048,
"step": 2808
},
{
"epoch": 1.3,
"learning_rate": 2.937938736439056e-06,
"loss": 0.5528,
"step": 2812
},
{
"epoch": 1.31,
"learning_rate": 2.9347479259731974e-06,
"loss": 0.3721,
"step": 2816
},
{
"epoch": 1.31,
"learning_rate": 2.9315571155073393e-06,
"loss": 0.4877,
"step": 2820
},
{
"epoch": 1.31,
"learning_rate": 2.9283663050414807e-06,
"loss": 0.3101,
"step": 2824
},
{
"epoch": 1.31,
"learning_rate": 2.925175494575622e-06,
"loss": 0.3458,
"step": 2828
},
{
"epoch": 1.31,
"learning_rate": 2.9219846841097645e-06,
"loss": 0.3741,
"step": 2832
},
{
"epoch": 1.32,
"learning_rate": 2.918793873643906e-06,
"loss": 0.7428,
"step": 2836
},
{
"epoch": 1.32,
"learning_rate": 2.9156030631780475e-06,
"loss": 0.3487,
"step": 2840
},
{
"epoch": 1.32,
"learning_rate": 2.912412252712189e-06,
"loss": 0.3184,
"step": 2844
},
{
"epoch": 1.32,
"learning_rate": 2.9092214422463313e-06,
"loss": 0.3778,
"step": 2848
},
{
"epoch": 1.32,
"learning_rate": 2.9060306317804727e-06,
"loss": 0.4085,
"step": 2852
},
{
"epoch": 1.32,
"learning_rate": 2.902839821314614e-06,
"loss": 0.4859,
"step": 2856
},
{
"epoch": 1.33,
"learning_rate": 2.8996490108487557e-06,
"loss": 0.5783,
"step": 2860
},
{
"epoch": 1.33,
"learning_rate": 2.896458200382897e-06,
"loss": 0.2396,
"step": 2864
},
{
"epoch": 1.33,
"learning_rate": 2.8932673899170395e-06,
"loss": 0.6563,
"step": 2868
},
{
"epoch": 1.33,
"learning_rate": 2.890076579451181e-06,
"loss": 0.3048,
"step": 2872
},
{
"epoch": 1.33,
"learning_rate": 2.8868857689853224e-06,
"loss": 0.3935,
"step": 2876
},
{
"epoch": 1.34,
"learning_rate": 2.883694958519464e-06,
"loss": 0.5063,
"step": 2880
},
{
"epoch": 1.34,
"learning_rate": 2.880504148053606e-06,
"loss": 0.5056,
"step": 2884
},
{
"epoch": 1.34,
"learning_rate": 2.8773133375877476e-06,
"loss": 0.2203,
"step": 2888
},
{
"epoch": 1.34,
"learning_rate": 2.874122527121889e-06,
"loss": 0.3044,
"step": 2892
},
{
"epoch": 1.34,
"learning_rate": 2.8709317166560306e-06,
"loss": 0.4444,
"step": 2896
},
{
"epoch": 1.35,
"learning_rate": 2.8677409061901725e-06,
"loss": 0.3645,
"step": 2900
},
{
"epoch": 1.35,
"learning_rate": 2.8645500957243144e-06,
"loss": 0.4594,
"step": 2904
},
{
"epoch": 1.35,
"learning_rate": 2.861359285258456e-06,
"loss": 0.4897,
"step": 2908
},
{
"epoch": 1.35,
"learning_rate": 2.8581684747925973e-06,
"loss": 0.4772,
"step": 2912
},
{
"epoch": 1.35,
"learning_rate": 2.854977664326739e-06,
"loss": 0.388,
"step": 2916
},
{
"epoch": 1.35,
"learning_rate": 2.851786853860881e-06,
"loss": 0.3869,
"step": 2920
},
{
"epoch": 1.36,
"learning_rate": 2.8485960433950226e-06,
"loss": 0.4853,
"step": 2924
},
{
"epoch": 1.36,
"learning_rate": 2.8454052329291645e-06,
"loss": 0.4467,
"step": 2928
},
{
"epoch": 1.36,
"learning_rate": 2.842214422463306e-06,
"loss": 0.2356,
"step": 2932
},
{
"epoch": 1.36,
"learning_rate": 2.8390236119974474e-06,
"loss": 0.4614,
"step": 2936
},
{
"epoch": 1.36,
"learning_rate": 2.8358328015315893e-06,
"loss": 0.3212,
"step": 2940
},
{
"epoch": 1.37,
"learning_rate": 2.832641991065731e-06,
"loss": 0.5037,
"step": 2944
},
{
"epoch": 1.37,
"learning_rate": 2.8294511805998727e-06,
"loss": 0.4957,
"step": 2948
},
{
"epoch": 1.37,
"learning_rate": 2.826260370134014e-06,
"loss": 0.418,
"step": 2952
},
{
"epoch": 1.37,
"learning_rate": 2.823069559668156e-06,
"loss": 0.2996,
"step": 2956
},
{
"epoch": 1.37,
"learning_rate": 2.819878749202298e-06,
"loss": 0.5421,
"step": 2960
},
{
"epoch": 1.37,
"learning_rate": 2.8166879387364394e-06,
"loss": 0.5049,
"step": 2964
},
{
"epoch": 1.38,
"learning_rate": 2.813497128270581e-06,
"loss": 0.3929,
"step": 2968
},
{
"epoch": 1.38,
"learning_rate": 2.8103063178047223e-06,
"loss": 0.4045,
"step": 2972
},
{
"epoch": 1.38,
"learning_rate": 2.8071155073388646e-06,
"loss": 0.3494,
"step": 2976
},
{
"epoch": 1.38,
"learning_rate": 2.803924696873006e-06,
"loss": 0.3782,
"step": 2980
},
{
"epoch": 1.38,
"learning_rate": 2.8007338864071476e-06,
"loss": 0.2768,
"step": 2984
},
{
"epoch": 1.39,
"learning_rate": 2.797543075941289e-06,
"loss": 0.531,
"step": 2988
},
{
"epoch": 1.39,
"learning_rate": 2.7943522654754314e-06,
"loss": 0.4958,
"step": 2992
},
{
"epoch": 1.39,
"learning_rate": 2.791161455009573e-06,
"loss": 0.6183,
"step": 2996
},
{
"epoch": 1.39,
"learning_rate": 2.7879706445437143e-06,
"loss": 0.3521,
"step": 3000
},
{
"epoch": 1.39,
"learning_rate": 2.7847798340778558e-06,
"loss": 0.4406,
"step": 3004
},
{
"epoch": 1.4,
"learning_rate": 2.7815890236119973e-06,
"loss": 0.4131,
"step": 3008
},
{
"epoch": 1.4,
"learning_rate": 2.7783982131461396e-06,
"loss": 0.5107,
"step": 3012
},
{
"epoch": 1.4,
"learning_rate": 2.775207402680281e-06,
"loss": 0.2735,
"step": 3016
},
{
"epoch": 1.4,
"learning_rate": 2.7720165922144225e-06,
"loss": 0.3788,
"step": 3020
},
{
"epoch": 1.4,
"learning_rate": 2.7696234843650286e-06,
"loss": 0.5599,
"step": 3024
},
{
"epoch": 1.4,
"learning_rate": 2.7664326738991705e-06,
"loss": 0.2355,
"step": 3028
},
{
"epoch": 1.41,
"learning_rate": 2.7632418634333124e-06,
"loss": 0.5358,
"step": 3032
},
{
"epoch": 1.41,
"learning_rate": 2.760051052967454e-06,
"loss": 0.3283,
"step": 3036
},
{
"epoch": 1.41,
"learning_rate": 2.7568602425015958e-06,
"loss": 0.4093,
"step": 3040
},
{
"epoch": 1.41,
"learning_rate": 2.7536694320357372e-06,
"loss": 0.287,
"step": 3044
},
{
"epoch": 1.41,
"learning_rate": 2.750478621569879e-06,
"loss": 0.5271,
"step": 3048
},
{
"epoch": 1.42,
"learning_rate": 2.7472878111040206e-06,
"loss": 0.3372,
"step": 3052
},
{
"epoch": 1.42,
"learning_rate": 2.7440970006381625e-06,
"loss": 0.5649,
"step": 3056
},
{
"epoch": 1.42,
"learning_rate": 2.740906190172304e-06,
"loss": 0.5017,
"step": 3060
},
{
"epoch": 1.42,
"learning_rate": 2.7377153797064454e-06,
"loss": 0.6057,
"step": 3064
},
{
"epoch": 1.42,
"learning_rate": 2.7345245692405873e-06,
"loss": 0.4184,
"step": 3068
},
{
"epoch": 1.42,
"learning_rate": 2.7313337587747292e-06,
"loss": 0.2892,
"step": 3072
},
{
"epoch": 1.43,
"learning_rate": 2.7281429483088707e-06,
"loss": 0.5914,
"step": 3076
},
{
"epoch": 1.43,
"learning_rate": 2.724952137843012e-06,
"loss": 0.472,
"step": 3080
},
{
"epoch": 1.43,
"learning_rate": 2.721761327377154e-06,
"loss": 0.3773,
"step": 3084
},
{
"epoch": 1.43,
"learning_rate": 2.718570516911296e-06,
"loss": 0.2942,
"step": 3088
},
{
"epoch": 1.43,
"learning_rate": 2.7153797064454374e-06,
"loss": 0.3445,
"step": 3092
},
{
"epoch": 1.44,
"learning_rate": 2.712188895979579e-06,
"loss": 0.2773,
"step": 3096
},
{
"epoch": 1.44,
"learning_rate": 2.7089980855137204e-06,
"loss": 0.4007,
"step": 3100
},
{
"epoch": 1.44,
"learning_rate": 2.7058072750478627e-06,
"loss": 0.3083,
"step": 3104
},
{
"epoch": 1.44,
"learning_rate": 2.702616464582004e-06,
"loss": 0.4782,
"step": 3108
},
{
"epoch": 1.44,
"learning_rate": 2.6994256541161456e-06,
"loss": 0.5419,
"step": 3112
},
{
"epoch": 1.45,
"learning_rate": 2.696234843650287e-06,
"loss": 0.5713,
"step": 3116
},
{
"epoch": 1.45,
"learning_rate": 2.6930440331844294e-06,
"loss": 0.3722,
"step": 3120
},
{
"epoch": 1.45,
"learning_rate": 2.689853222718571e-06,
"loss": 0.4663,
"step": 3124
},
{
"epoch": 1.45,
"learning_rate": 2.6866624122527123e-06,
"loss": 0.3208,
"step": 3128
},
{
"epoch": 1.45,
"learning_rate": 2.683471601786854e-06,
"loss": 0.351,
"step": 3132
},
{
"epoch": 1.45,
"learning_rate": 2.6802807913209957e-06,
"loss": 0.513,
"step": 3136
},
{
"epoch": 1.46,
"learning_rate": 2.6770899808551376e-06,
"loss": 0.4409,
"step": 3140
},
{
"epoch": 1.46,
"learning_rate": 2.673899170389279e-06,
"loss": 0.3335,
"step": 3144
},
{
"epoch": 1.46,
"learning_rate": 2.6707083599234205e-06,
"loss": 0.3706,
"step": 3148
},
{
"epoch": 1.46,
"learning_rate": 2.6675175494575624e-06,
"loss": 0.4404,
"step": 3152
},
{
"epoch": 1.46,
"learning_rate": 2.6643267389917043e-06,
"loss": 0.4186,
"step": 3156
},
{
"epoch": 1.47,
"learning_rate": 2.661135928525846e-06,
"loss": 0.3666,
"step": 3160
},
{
"epoch": 1.47,
"learning_rate": 2.6579451180599873e-06,
"loss": 0.3849,
"step": 3164
},
{
"epoch": 1.47,
"learning_rate": 2.654754307594129e-06,
"loss": 0.4564,
"step": 3168
},
{
"epoch": 1.47,
"learning_rate": 2.6515634971282706e-06,
"loss": 0.3534,
"step": 3172
},
{
"epoch": 1.47,
"learning_rate": 2.6483726866624125e-06,
"loss": 0.3735,
"step": 3176
},
{
"epoch": 1.47,
"learning_rate": 2.645181876196554e-06,
"loss": 0.4449,
"step": 3180
},
{
"epoch": 1.48,
"learning_rate": 2.641991065730696e-06,
"loss": 0.5032,
"step": 3184
},
{
"epoch": 1.48,
"learning_rate": 2.6388002552648374e-06,
"loss": 0.3677,
"step": 3188
},
{
"epoch": 1.48,
"learning_rate": 2.6356094447989793e-06,
"loss": 0.5004,
"step": 3192
},
{
"epoch": 1.48,
"learning_rate": 2.632418634333121e-06,
"loss": 0.1972,
"step": 3196
},
{
"epoch": 1.48,
"learning_rate": 2.6292278238672626e-06,
"loss": 0.4606,
"step": 3200
},
{
"epoch": 1.49,
"learning_rate": 2.626037013401404e-06,
"loss": 0.3533,
"step": 3204
},
{
"epoch": 1.49,
"learning_rate": 2.6228462029355456e-06,
"loss": 0.3607,
"step": 3208
},
{
"epoch": 1.49,
"learning_rate": 2.619655392469688e-06,
"loss": 0.5767,
"step": 3212
},
{
"epoch": 1.49,
"learning_rate": 2.6164645820038293e-06,
"loss": 0.5316,
"step": 3216
},
{
"epoch": 1.49,
"learning_rate": 2.613273771537971e-06,
"loss": 0.2474,
"step": 3220
},
{
"epoch": 1.5,
"learning_rate": 2.6100829610721123e-06,
"loss": 0.3168,
"step": 3224
},
{
"epoch": 1.5,
"learning_rate": 2.6068921506062546e-06,
"loss": 0.4029,
"step": 3228
},
{
"epoch": 1.5,
"learning_rate": 2.603701340140396e-06,
"loss": 0.2693,
"step": 3232
},
{
"epoch": 1.5,
"learning_rate": 2.6005105296745375e-06,
"loss": 0.3756,
"step": 3236
},
{
"epoch": 1.5,
"learning_rate": 2.597319719208679e-06,
"loss": 0.3712,
"step": 3240
},
{
"epoch": 1.5,
"learning_rate": 2.5941289087428205e-06,
"loss": 0.366,
"step": 3244
},
{
"epoch": 1.51,
"learning_rate": 2.590938098276963e-06,
"loss": 0.3813,
"step": 3248
},
{
"epoch": 1.51,
"learning_rate": 2.5877472878111043e-06,
"loss": 0.4442,
"step": 3252
},
{
"epoch": 1.51,
"learning_rate": 2.5845564773452457e-06,
"loss": 0.4061,
"step": 3256
},
{
"epoch": 1.51,
"learning_rate": 2.581365666879387e-06,
"loss": 0.3679,
"step": 3260
},
{
"epoch": 1.51,
"learning_rate": 2.5781748564135295e-06,
"loss": 0.2641,
"step": 3264
},
{
"epoch": 1.52,
"learning_rate": 2.574984045947671e-06,
"loss": 0.5656,
"step": 3268
},
{
"epoch": 1.52,
"learning_rate": 2.5717932354818125e-06,
"loss": 0.3672,
"step": 3272
},
{
"epoch": 1.52,
"learning_rate": 2.568602425015954e-06,
"loss": 0.3395,
"step": 3276
},
{
"epoch": 1.52,
"learning_rate": 2.565411614550096e-06,
"loss": 0.5946,
"step": 3280
},
{
"epoch": 1.52,
"learning_rate": 2.5622208040842377e-06,
"loss": 0.3526,
"step": 3284
},
{
"epoch": 1.53,
"learning_rate": 2.559029993618379e-06,
"loss": 0.3365,
"step": 3288
},
{
"epoch": 1.53,
"learning_rate": 2.555839183152521e-06,
"loss": 0.4003,
"step": 3292
},
{
"epoch": 1.53,
"learning_rate": 2.5526483726866626e-06,
"loss": 0.3994,
"step": 3296
},
{
"epoch": 1.53,
"learning_rate": 2.5494575622208045e-06,
"loss": 0.3623,
"step": 3300
},
{
"epoch": 1.53,
"learning_rate": 2.546266751754946e-06,
"loss": 0.5994,
"step": 3304
},
{
"epoch": 1.53,
"learning_rate": 2.543075941289088e-06,
"loss": 0.3717,
"step": 3308
},
{
"epoch": 1.54,
"learning_rate": 2.5398851308232293e-06,
"loss": 0.2424,
"step": 3312
},
{
"epoch": 1.54,
"learning_rate": 2.5366943203573708e-06,
"loss": 0.5083,
"step": 3316
},
{
"epoch": 1.54,
"learning_rate": 2.5335035098915127e-06,
"loss": 0.2865,
"step": 3320
},
{
"epoch": 1.54,
"learning_rate": 2.5303126994256545e-06,
"loss": 0.2184,
"step": 3324
},
{
"epoch": 1.54,
"learning_rate": 2.527121888959796e-06,
"loss": 0.5697,
"step": 3328
},
{
"epoch": 1.55,
"learning_rate": 2.5239310784939375e-06,
"loss": 0.3524,
"step": 3332
},
{
"epoch": 1.55,
"learning_rate": 2.5207402680280794e-06,
"loss": 0.3922,
"step": 3336
},
{
"epoch": 1.55,
"learning_rate": 2.5175494575622213e-06,
"loss": 0.3364,
"step": 3340
},
{
"epoch": 1.55,
"learning_rate": 2.5143586470963627e-06,
"loss": 0.3983,
"step": 3344
},
{
"epoch": 1.55,
"learning_rate": 2.5111678366305042e-06,
"loss": 0.3812,
"step": 3348
},
{
"epoch": 1.55,
"learning_rate": 2.5079770261646457e-06,
"loss": 0.3001,
"step": 3352
},
{
"epoch": 1.56,
"learning_rate": 2.504786215698788e-06,
"loss": 0.3159,
"step": 3356
},
{
"epoch": 1.56,
"learning_rate": 2.5015954052329295e-06,
"loss": 0.316,
"step": 3360
},
{
"epoch": 1.56,
"learning_rate": 2.498404594767071e-06,
"loss": 0.2347,
"step": 3364
},
{
"epoch": 1.56,
"learning_rate": 2.495213784301213e-06,
"loss": 0.4728,
"step": 3368
},
{
"epoch": 1.56,
"learning_rate": 2.4920229738353543e-06,
"loss": 0.4451,
"step": 3372
},
{
"epoch": 1.57,
"learning_rate": 2.488832163369496e-06,
"loss": 0.6876,
"step": 3376
},
{
"epoch": 1.57,
"learning_rate": 2.4856413529036377e-06,
"loss": 0.4799,
"step": 3380
},
{
"epoch": 1.57,
"learning_rate": 2.482450542437779e-06,
"loss": 0.3912,
"step": 3384
},
{
"epoch": 1.57,
"learning_rate": 2.479259731971921e-06,
"loss": 0.2295,
"step": 3388
},
{
"epoch": 1.57,
"learning_rate": 2.4760689215060625e-06,
"loss": 0.2529,
"step": 3392
},
{
"epoch": 1.58,
"learning_rate": 2.4728781110402044e-06,
"loss": 0.454,
"step": 3396
},
{
"epoch": 1.58,
"learning_rate": 2.469687300574346e-06,
"loss": 0.3894,
"step": 3400
},
{
"epoch": 1.58,
"learning_rate": 2.4664964901084878e-06,
"loss": 0.2908,
"step": 3404
},
{
"epoch": 1.58,
"learning_rate": 2.4633056796426292e-06,
"loss": 0.499,
"step": 3408
},
{
"epoch": 1.58,
"learning_rate": 2.460114869176771e-06,
"loss": 0.3336,
"step": 3412
},
{
"epoch": 1.58,
"learning_rate": 2.4569240587109126e-06,
"loss": 0.3016,
"step": 3416
},
{
"epoch": 1.59,
"learning_rate": 2.4537332482450545e-06,
"loss": 0.4519,
"step": 3420
},
{
"epoch": 1.59,
"learning_rate": 2.450542437779196e-06,
"loss": 0.2589,
"step": 3424
},
{
"epoch": 1.59,
"learning_rate": 2.447351627313338e-06,
"loss": 0.404,
"step": 3428
},
{
"epoch": 1.59,
"learning_rate": 2.4441608168474793e-06,
"loss": 0.335,
"step": 3432
},
{
"epoch": 1.59,
"learning_rate": 2.4409700063816212e-06,
"loss": 0.4312,
"step": 3436
},
{
"epoch": 1.6,
"learning_rate": 2.437779195915763e-06,
"loss": 0.2877,
"step": 3440
},
{
"epoch": 1.6,
"learning_rate": 2.4345883854499046e-06,
"loss": 0.3591,
"step": 3444
},
{
"epoch": 1.6,
"learning_rate": 2.4313975749840465e-06,
"loss": 0.3149,
"step": 3448
},
{
"epoch": 1.6,
"learning_rate": 2.428206764518188e-06,
"loss": 0.3785,
"step": 3452
},
{
"epoch": 1.6,
"learning_rate": 2.4250159540523294e-06,
"loss": 0.3654,
"step": 3456
},
{
"epoch": 1.6,
"learning_rate": 2.4218251435864713e-06,
"loss": 0.2894,
"step": 3460
},
{
"epoch": 1.61,
"learning_rate": 2.4186343331206128e-06,
"loss": 0.5198,
"step": 3464
},
{
"epoch": 1.61,
"learning_rate": 2.4154435226547547e-06,
"loss": 0.4666,
"step": 3468
},
{
"epoch": 1.61,
"learning_rate": 2.412252712188896e-06,
"loss": 0.3899,
"step": 3472
},
{
"epoch": 1.61,
"learning_rate": 2.409061901723038e-06,
"loss": 0.4248,
"step": 3476
},
{
"epoch": 1.61,
"learning_rate": 2.4058710912571795e-06,
"loss": 0.3144,
"step": 3480
},
{
"epoch": 1.62,
"learning_rate": 2.4026802807913214e-06,
"loss": 0.3294,
"step": 3484
},
{
"epoch": 1.62,
"learning_rate": 2.399489470325463e-06,
"loss": 0.3395,
"step": 3488
},
{
"epoch": 1.62,
"learning_rate": 2.3962986598596043e-06,
"loss": 0.4384,
"step": 3492
},
{
"epoch": 1.62,
"learning_rate": 2.3931078493937462e-06,
"loss": 0.3029,
"step": 3496
},
{
"epoch": 1.62,
"learning_rate": 2.3899170389278877e-06,
"loss": 0.3868,
"step": 3500
},
{
"epoch": 1.63,
"learning_rate": 2.3867262284620296e-06,
"loss": 0.233,
"step": 3504
},
{
"epoch": 1.63,
"learning_rate": 2.383535417996171e-06,
"loss": 0.4025,
"step": 3508
},
{
"epoch": 1.63,
"learning_rate": 2.380344607530313e-06,
"loss": 0.2714,
"step": 3512
},
{
"epoch": 1.63,
"learning_rate": 2.3771537970644544e-06,
"loss": 0.4694,
"step": 3516
},
{
"epoch": 1.63,
"learning_rate": 2.3739629865985963e-06,
"loss": 0.3092,
"step": 3520
},
{
"epoch": 1.63,
"learning_rate": 2.370772176132738e-06,
"loss": 0.3375,
"step": 3524
},
{
"epoch": 1.64,
"learning_rate": 2.3675813656668793e-06,
"loss": 0.2356,
"step": 3528
},
{
"epoch": 1.64,
"learning_rate": 2.364390555201021e-06,
"loss": 0.4403,
"step": 3532
},
{
"epoch": 1.64,
"learning_rate": 2.3611997447351626e-06,
"loss": 0.4015,
"step": 3536
},
{
"epoch": 1.64,
"learning_rate": 2.3580089342693045e-06,
"loss": 0.5201,
"step": 3540
},
{
"epoch": 1.64,
"learning_rate": 2.3548181238034464e-06,
"loss": 0.4203,
"step": 3544
},
{
"epoch": 1.65,
"learning_rate": 2.351627313337588e-06,
"loss": 0.4869,
"step": 3548
},
{
"epoch": 1.65,
"learning_rate": 2.3484365028717298e-06,
"loss": 0.3923,
"step": 3552
},
{
"epoch": 1.65,
"learning_rate": 2.3452456924058712e-06,
"loss": 0.6743,
"step": 3556
},
{
"epoch": 1.65,
"learning_rate": 2.342054881940013e-06,
"loss": 0.2588,
"step": 3560
},
{
"epoch": 1.65,
"learning_rate": 2.3388640714741546e-06,
"loss": 0.323,
"step": 3564
},
{
"epoch": 1.65,
"learning_rate": 2.3356732610082965e-06,
"loss": 0.2859,
"step": 3568
},
{
"epoch": 1.66,
"learning_rate": 2.332482450542438e-06,
"loss": 0.2747,
"step": 3572
},
{
"epoch": 1.66,
"learning_rate": 2.32929164007658e-06,
"loss": 0.2221,
"step": 3576
},
{
"epoch": 1.66,
"learning_rate": 2.3261008296107213e-06,
"loss": 0.3744,
"step": 3580
},
{
"epoch": 1.66,
"learning_rate": 2.3229100191448632e-06,
"loss": 0.3965,
"step": 3584
},
{
"epoch": 1.66,
"learning_rate": 2.3197192086790047e-06,
"loss": 0.4889,
"step": 3588
},
{
"epoch": 1.67,
"learning_rate": 2.3165283982131466e-06,
"loss": 0.4218,
"step": 3592
},
{
"epoch": 1.67,
"learning_rate": 2.313337587747288e-06,
"loss": 0.3016,
"step": 3596
},
{
"epoch": 1.67,
"learning_rate": 2.3101467772814295e-06,
"loss": 0.3408,
"step": 3600
},
{
"epoch": 1.67,
"learning_rate": 2.3069559668155714e-06,
"loss": 0.387,
"step": 3604
},
{
"epoch": 1.67,
"learning_rate": 2.303765156349713e-06,
"loss": 0.3845,
"step": 3608
},
{
"epoch": 1.68,
"learning_rate": 2.300574345883855e-06,
"loss": 0.2885,
"step": 3612
},
{
"epoch": 1.68,
"learning_rate": 2.2973835354179963e-06,
"loss": 0.1871,
"step": 3616
},
{
"epoch": 1.68,
"learning_rate": 2.294192724952138e-06,
"loss": 0.3516,
"step": 3620
},
{
"epoch": 1.68,
"learning_rate": 2.2910019144862796e-06,
"loss": 0.4165,
"step": 3624
},
{
"epoch": 1.68,
"learning_rate": 2.2878111040204215e-06,
"loss": 0.2891,
"step": 3628
},
{
"epoch": 1.68,
"learning_rate": 2.284620293554563e-06,
"loss": 0.3616,
"step": 3632
},
{
"epoch": 1.69,
"learning_rate": 2.2814294830887045e-06,
"loss": 0.4057,
"step": 3636
},
{
"epoch": 1.69,
"learning_rate": 2.2782386726228464e-06,
"loss": 0.5166,
"step": 3640
},
{
"epoch": 1.69,
"learning_rate": 2.275047862156988e-06,
"loss": 0.3279,
"step": 3644
},
{
"epoch": 1.69,
"learning_rate": 2.2718570516911297e-06,
"loss": 0.3537,
"step": 3648
},
{
"epoch": 1.69,
"learning_rate": 2.268666241225271e-06,
"loss": 0.3187,
"step": 3652
},
{
"epoch": 1.7,
"learning_rate": 2.265475430759413e-06,
"loss": 0.4043,
"step": 3656
},
{
"epoch": 1.7,
"learning_rate": 2.2622846202935546e-06,
"loss": 0.2799,
"step": 3660
},
{
"epoch": 1.7,
"learning_rate": 2.2590938098276964e-06,
"loss": 0.3363,
"step": 3664
},
{
"epoch": 1.7,
"learning_rate": 2.255902999361838e-06,
"loss": 0.6477,
"step": 3668
},
{
"epoch": 1.7,
"learning_rate": 2.25271218889598e-06,
"loss": 0.4967,
"step": 3672
},
{
"epoch": 1.71,
"learning_rate": 2.2495213784301213e-06,
"loss": 0.4474,
"step": 3676
},
{
"epoch": 1.71,
"learning_rate": 2.246330567964263e-06,
"loss": 0.2501,
"step": 3680
},
{
"epoch": 1.71,
"learning_rate": 2.2431397574984046e-06,
"loss": 0.3448,
"step": 3684
},
{
"epoch": 1.71,
"learning_rate": 2.2399489470325465e-06,
"loss": 0.3084,
"step": 3688
},
{
"epoch": 1.71,
"learning_rate": 2.2367581365666884e-06,
"loss": 0.3165,
"step": 3692
},
{
"epoch": 1.71,
"learning_rate": 2.23356732610083e-06,
"loss": 0.405,
"step": 3696
},
{
"epoch": 1.72,
"learning_rate": 2.230376515634972e-06,
"loss": 0.3648,
"step": 3700
},
{
"epoch": 1.72,
"learning_rate": 2.2271857051691133e-06,
"loss": 0.2938,
"step": 3704
},
{
"epoch": 1.72,
"learning_rate": 2.2239948947032547e-06,
"loss": 0.336,
"step": 3708
},
{
"epoch": 1.72,
"learning_rate": 2.2208040842373966e-06,
"loss": 0.4741,
"step": 3712
},
{
"epoch": 1.72,
"learning_rate": 2.217613273771538e-06,
"loss": 0.4006,
"step": 3716
},
{
"epoch": 1.73,
"learning_rate": 2.21442246330568e-06,
"loss": 0.3443,
"step": 3720
},
{
"epoch": 1.73,
"learning_rate": 2.2112316528398215e-06,
"loss": 0.2771,
"step": 3724
},
{
"epoch": 1.73,
"learning_rate": 2.2080408423739634e-06,
"loss": 0.2515,
"step": 3728
},
{
"epoch": 1.73,
"learning_rate": 2.204850031908105e-06,
"loss": 0.3897,
"step": 3732
},
{
"epoch": 1.73,
"learning_rate": 2.2016592214422467e-06,
"loss": 0.182,
"step": 3736
},
{
"epoch": 1.73,
"learning_rate": 2.198468410976388e-06,
"loss": 0.3575,
"step": 3740
},
{
"epoch": 1.74,
"learning_rate": 2.1952776005105297e-06,
"loss": 0.3662,
"step": 3744
},
{
"epoch": 1.74,
"learning_rate": 2.1920867900446716e-06,
"loss": 0.4394,
"step": 3748
},
{
"epoch": 1.74,
"learning_rate": 2.188895979578813e-06,
"loss": 0.3541,
"step": 3752
},
{
"epoch": 1.74,
"learning_rate": 2.185705169112955e-06,
"loss": 0.3837,
"step": 3756
},
{
"epoch": 1.74,
"learning_rate": 2.1825143586470964e-06,
"loss": 0.2765,
"step": 3760
},
{
"epoch": 1.75,
"learning_rate": 2.1793235481812383e-06,
"loss": 0.3349,
"step": 3764
},
{
"epoch": 1.75,
"learning_rate": 2.1761327377153797e-06,
"loss": 0.3141,
"step": 3768
},
{
"epoch": 1.75,
"learning_rate": 2.1729419272495216e-06,
"loss": 0.3836,
"step": 3772
},
{
"epoch": 1.75,
"learning_rate": 2.169751116783663e-06,
"loss": 0.417,
"step": 3776
},
{
"epoch": 1.75,
"learning_rate": 2.1665603063178046e-06,
"loss": 0.339,
"step": 3780
},
{
"epoch": 1.76,
"learning_rate": 2.1633694958519465e-06,
"loss": 0.4287,
"step": 3784
},
{
"epoch": 1.76,
"learning_rate": 2.1601786853860884e-06,
"loss": 0.3423,
"step": 3788
},
{
"epoch": 1.76,
"learning_rate": 2.15698787492023e-06,
"loss": 0.3367,
"step": 3792
},
{
"epoch": 1.76,
"learning_rate": 2.1537970644543717e-06,
"loss": 0.2519,
"step": 3796
},
{
"epoch": 1.76,
"learning_rate": 2.150606253988513e-06,
"loss": 0.3884,
"step": 3800
},
{
"epoch": 1.76,
"learning_rate": 2.147415443522655e-06,
"loss": 0.2767,
"step": 3804
},
{
"epoch": 1.77,
"learning_rate": 2.1442246330567966e-06,
"loss": 0.3162,
"step": 3808
},
{
"epoch": 1.77,
"learning_rate": 2.1410338225909385e-06,
"loss": 0.3722,
"step": 3812
},
{
"epoch": 1.77,
"learning_rate": 2.13784301212508e-06,
"loss": 0.462,
"step": 3816
},
{
"epoch": 1.77,
"learning_rate": 2.134652201659222e-06,
"loss": 0.4508,
"step": 3820
},
{
"epoch": 1.77,
"learning_rate": 2.1314613911933633e-06,
"loss": 0.309,
"step": 3824
},
{
"epoch": 1.78,
"learning_rate": 2.128270580727505e-06,
"loss": 0.4566,
"step": 3828
},
{
"epoch": 1.78,
"learning_rate": 2.1250797702616467e-06,
"loss": 0.3216,
"step": 3832
},
{
"epoch": 1.78,
"learning_rate": 2.1218889597957886e-06,
"loss": 0.4669,
"step": 3836
},
{
"epoch": 1.78,
"learning_rate": 2.11869814932993e-06,
"loss": 0.4764,
"step": 3840
},
{
"epoch": 1.78,
"learning_rate": 2.115507338864072e-06,
"loss": 0.3011,
"step": 3844
},
{
"epoch": 1.78,
"learning_rate": 2.1123165283982134e-06,
"loss": 0.3308,
"step": 3848
},
{
"epoch": 1.79,
"learning_rate": 2.109125717932355e-06,
"loss": 0.4038,
"step": 3852
},
{
"epoch": 1.79,
"learning_rate": 2.1059349074664967e-06,
"loss": 0.2768,
"step": 3856
},
{
"epoch": 1.79,
"learning_rate": 2.1027440970006382e-06,
"loss": 0.374,
"step": 3860
},
{
"epoch": 1.79,
"learning_rate": 2.09955328653478e-06,
"loss": 0.3393,
"step": 3864
},
{
"epoch": 1.79,
"learning_rate": 2.0963624760689216e-06,
"loss": 0.3846,
"step": 3868
},
{
"epoch": 1.8,
"learning_rate": 2.0931716656030635e-06,
"loss": 0.308,
"step": 3872
},
{
"epoch": 1.8,
"learning_rate": 2.089980855137205e-06,
"loss": 0.4816,
"step": 3876
},
{
"epoch": 1.8,
"learning_rate": 2.086790044671347e-06,
"loss": 0.2121,
"step": 3880
},
{
"epoch": 1.8,
"learning_rate": 2.0835992342054883e-06,
"loss": 0.3698,
"step": 3884
},
{
"epoch": 1.8,
"learning_rate": 2.0804084237396298e-06,
"loss": 0.3615,
"step": 3888
},
{
"epoch": 1.81,
"learning_rate": 2.0772176132737717e-06,
"loss": 0.2294,
"step": 3892
},
{
"epoch": 1.81,
"learning_rate": 2.074026802807913e-06,
"loss": 0.2515,
"step": 3896
},
{
"epoch": 1.81,
"learning_rate": 2.070835992342055e-06,
"loss": 0.3559,
"step": 3900
},
{
"epoch": 1.81,
"learning_rate": 2.0676451818761965e-06,
"loss": 0.4243,
"step": 3904
},
{
"epoch": 1.81,
"learning_rate": 2.0644543714103384e-06,
"loss": 0.3622,
"step": 3908
},
{
"epoch": 1.81,
"learning_rate": 2.06126356094448e-06,
"loss": 0.5588,
"step": 3912
},
{
"epoch": 1.82,
"learning_rate": 2.0580727504786218e-06,
"loss": 0.2169,
"step": 3916
},
{
"epoch": 1.82,
"learning_rate": 2.0548819400127632e-06,
"loss": 0.4732,
"step": 3920
},
{
"epoch": 1.82,
"learning_rate": 2.051691129546905e-06,
"loss": 0.2331,
"step": 3924
},
{
"epoch": 1.82,
"learning_rate": 2.0485003190810466e-06,
"loss": 0.3388,
"step": 3928
},
{
"epoch": 1.82,
"learning_rate": 2.0453095086151885e-06,
"loss": 0.4545,
"step": 3932
},
{
"epoch": 1.83,
"learning_rate": 2.04211869814933e-06,
"loss": 0.3886,
"step": 3936
},
{
"epoch": 1.83,
"learning_rate": 2.038927887683472e-06,
"loss": 0.2233,
"step": 3940
},
{
"epoch": 1.83,
"learning_rate": 2.0357370772176138e-06,
"loss": 0.3658,
"step": 3944
},
{
"epoch": 1.83,
"learning_rate": 2.0325462667517552e-06,
"loss": 0.3229,
"step": 3948
},
{
"epoch": 1.83,
"learning_rate": 2.029355456285897e-06,
"loss": 0.1759,
"step": 3952
},
{
"epoch": 1.83,
"learning_rate": 2.0261646458200386e-06,
"loss": 0.3737,
"step": 3956
},
{
"epoch": 1.84,
"learning_rate": 2.02297383535418e-06,
"loss": 0.3362,
"step": 3960
},
{
"epoch": 1.84,
"learning_rate": 2.019783024888322e-06,
"loss": 0.2873,
"step": 3964
},
{
"epoch": 1.84,
"learning_rate": 2.0165922144224634e-06,
"loss": 0.3454,
"step": 3968
},
{
"epoch": 1.84,
"learning_rate": 2.0134014039566053e-06,
"loss": 0.3428,
"step": 3972
},
{
"epoch": 1.84,
"learning_rate": 2.0102105934907468e-06,
"loss": 0.4089,
"step": 3976
},
{
"epoch": 1.85,
"learning_rate": 2.0070197830248887e-06,
"loss": 0.3472,
"step": 3980
},
{
"epoch": 1.85,
"learning_rate": 2.00382897255903e-06,
"loss": 0.2868,
"step": 3984
},
{
"epoch": 1.85,
"learning_rate": 2.000638162093172e-06,
"loss": 0.3088,
"step": 3988
},
{
"epoch": 1.85,
"learning_rate": 1.9974473516273135e-06,
"loss": 0.2471,
"step": 3992
},
{
"epoch": 1.85,
"learning_rate": 1.994256541161455e-06,
"loss": 0.2816,
"step": 3996
},
{
"epoch": 1.86,
"learning_rate": 1.991065730695597e-06,
"loss": 0.3135,
"step": 4000
},
{
"epoch": 1.86,
"learning_rate": 1.9878749202297383e-06,
"loss": 0.379,
"step": 4004
},
{
"epoch": 1.86,
"learning_rate": 1.9846841097638802e-06,
"loss": 0.5225,
"step": 4008
},
{
"epoch": 1.86,
"learning_rate": 1.9814932992980217e-06,
"loss": 0.3229,
"step": 4012
},
{
"epoch": 1.86,
"learning_rate": 1.9783024888321636e-06,
"loss": 0.3573,
"step": 4016
},
{
"epoch": 1.86,
"learning_rate": 1.975111678366305e-06,
"loss": 0.2219,
"step": 4020
},
{
"epoch": 1.87,
"learning_rate": 1.971920867900447e-06,
"loss": 0.2133,
"step": 4024
},
{
"epoch": 1.87,
"learning_rate": 1.9687300574345884e-06,
"loss": 0.4303,
"step": 4028
},
{
"epoch": 1.87,
"learning_rate": 1.96553924696873e-06,
"loss": 0.4735,
"step": 4032
},
{
"epoch": 1.87,
"learning_rate": 1.9631461391193364e-06,
"loss": 0.3223,
"step": 4036
},
{
"epoch": 1.87,
"learning_rate": 1.959955328653478e-06,
"loss": 0.3124,
"step": 4040
},
{
"epoch": 1.88,
"learning_rate": 1.95676451818762e-06,
"loss": 0.4547,
"step": 4044
},
{
"epoch": 1.88,
"learning_rate": 1.9535737077217613e-06,
"loss": 0.3089,
"step": 4048
},
{
"epoch": 1.88,
"learning_rate": 1.950382897255903e-06,
"loss": 0.344,
"step": 4052
},
{
"epoch": 1.88,
"learning_rate": 1.947192086790045e-06,
"loss": 0.1488,
"step": 4056
},
{
"epoch": 1.88,
"learning_rate": 1.9440012763241865e-06,
"loss": 0.4715,
"step": 4060
},
{
"epoch": 1.88,
"learning_rate": 1.9408104658583284e-06,
"loss": 0.2866,
"step": 4064
},
{
"epoch": 1.89,
"learning_rate": 1.93761965539247e-06,
"loss": 0.3207,
"step": 4068
},
{
"epoch": 1.89,
"learning_rate": 1.9344288449266118e-06,
"loss": 0.3532,
"step": 4072
},
{
"epoch": 1.89,
"learning_rate": 1.9312380344607532e-06,
"loss": 0.3416,
"step": 4076
},
{
"epoch": 1.89,
"learning_rate": 1.928047223994895e-06,
"loss": 0.6239,
"step": 4080
},
{
"epoch": 1.89,
"learning_rate": 1.9248564135290366e-06,
"loss": 0.1806,
"step": 4084
},
{
"epoch": 1.9,
"learning_rate": 1.9216656030631785e-06,
"loss": 0.3065,
"step": 4088
},
{
"epoch": 1.9,
"learning_rate": 1.91847479259732e-06,
"loss": 0.2393,
"step": 4092
},
{
"epoch": 1.9,
"learning_rate": 1.9152839821314614e-06,
"loss": 0.4581,
"step": 4096
},
{
"epoch": 1.9,
"learning_rate": 1.9120931716656033e-06,
"loss": 0.2407,
"step": 4100
},
{
"epoch": 1.9,
"learning_rate": 1.908902361199745e-06,
"loss": 0.3328,
"step": 4104
},
{
"epoch": 1.91,
"learning_rate": 1.9057115507338867e-06,
"loss": 0.2898,
"step": 4108
},
{
"epoch": 1.91,
"learning_rate": 1.9025207402680282e-06,
"loss": 0.5888,
"step": 4112
},
{
"epoch": 1.91,
"learning_rate": 1.89932992980217e-06,
"loss": 0.3909,
"step": 4116
},
{
"epoch": 1.91,
"learning_rate": 1.8961391193363115e-06,
"loss": 0.2613,
"step": 4120
},
{
"epoch": 1.91,
"learning_rate": 1.8929483088704534e-06,
"loss": 0.2594,
"step": 4124
},
{
"epoch": 1.91,
"learning_rate": 1.889757498404595e-06,
"loss": 0.3601,
"step": 4128
},
{
"epoch": 1.92,
"learning_rate": 1.8865666879387366e-06,
"loss": 0.1791,
"step": 4132
},
{
"epoch": 1.92,
"learning_rate": 1.8833758774728783e-06,
"loss": 0.3714,
"step": 4136
},
{
"epoch": 1.92,
"learning_rate": 1.88018506700702e-06,
"loss": 0.3601,
"step": 4140
},
{
"epoch": 1.92,
"learning_rate": 1.8769942565411616e-06,
"loss": 0.4697,
"step": 4144
},
{
"epoch": 1.92,
"learning_rate": 1.8738034460753033e-06,
"loss": 0.4277,
"step": 4148
},
{
"epoch": 1.93,
"learning_rate": 1.870612635609445e-06,
"loss": 0.4183,
"step": 4152
},
{
"epoch": 1.93,
"learning_rate": 1.8674218251435867e-06,
"loss": 0.2764,
"step": 4156
},
{
"epoch": 1.93,
"learning_rate": 1.8642310146777281e-06,
"loss": 0.3209,
"step": 4160
},
{
"epoch": 1.93,
"learning_rate": 1.86104020421187e-06,
"loss": 0.328,
"step": 4164
},
{
"epoch": 1.93,
"learning_rate": 1.8578493937460115e-06,
"loss": 0.3673,
"step": 4168
},
{
"epoch": 1.94,
"learning_rate": 1.8546585832801534e-06,
"loss": 0.2856,
"step": 4172
},
{
"epoch": 1.94,
"learning_rate": 1.8514677728142949e-06,
"loss": 0.4248,
"step": 4176
},
{
"epoch": 1.94,
"learning_rate": 1.8482769623484368e-06,
"loss": 0.419,
"step": 4180
},
{
"epoch": 1.94,
"learning_rate": 1.8450861518825782e-06,
"loss": 0.3315,
"step": 4184
},
{
"epoch": 1.94,
"learning_rate": 1.8418953414167201e-06,
"loss": 0.3508,
"step": 4188
},
{
"epoch": 1.94,
"learning_rate": 1.8387045309508616e-06,
"loss": 0.2016,
"step": 4192
},
{
"epoch": 1.95,
"learning_rate": 1.8355137204850033e-06,
"loss": 0.2352,
"step": 4196
},
{
"epoch": 1.95,
"learning_rate": 1.832322910019145e-06,
"loss": 0.4638,
"step": 4200
},
{
"epoch": 1.95,
"learning_rate": 1.8291320995532866e-06,
"loss": 0.4352,
"step": 4204
},
{
"epoch": 1.95,
"learning_rate": 1.8259412890874283e-06,
"loss": 0.4832,
"step": 4208
},
{
"epoch": 1.95,
"learning_rate": 1.82275047862157e-06,
"loss": 0.295,
"step": 4212
},
{
"epoch": 1.96,
"learning_rate": 1.8195596681557117e-06,
"loss": 0.3176,
"step": 4216
},
{
"epoch": 1.96,
"learning_rate": 1.8163688576898534e-06,
"loss": 0.0922,
"step": 4220
},
{
"epoch": 1.96,
"learning_rate": 1.813178047223995e-06,
"loss": 0.2375,
"step": 4224
},
{
"epoch": 1.96,
"learning_rate": 1.8099872367581367e-06,
"loss": 0.3374,
"step": 4228
},
{
"epoch": 1.96,
"learning_rate": 1.8067964262922782e-06,
"loss": 0.2551,
"step": 4232
},
{
"epoch": 1.96,
"learning_rate": 1.80360561582642e-06,
"loss": 0.3228,
"step": 4236
},
{
"epoch": 1.97,
"learning_rate": 1.8004148053605616e-06,
"loss": 0.3102,
"step": 4240
},
{
"epoch": 1.97,
"learning_rate": 1.7972239948947035e-06,
"loss": 0.2471,
"step": 4244
},
{
"epoch": 1.97,
"learning_rate": 1.794033184428845e-06,
"loss": 0.285,
"step": 4248
},
{
"epoch": 1.97,
"learning_rate": 1.7908423739629868e-06,
"loss": 0.3468,
"step": 4252
},
{
"epoch": 1.97,
"learning_rate": 1.7876515634971283e-06,
"loss": 0.2877,
"step": 4256
},
{
"epoch": 1.98,
"learning_rate": 1.7844607530312702e-06,
"loss": 0.4362,
"step": 4260
},
{
"epoch": 1.98,
"learning_rate": 1.7812699425654117e-06,
"loss": 0.1789,
"step": 4264
},
{
"epoch": 1.98,
"learning_rate": 1.7780791320995533e-06,
"loss": 0.3056,
"step": 4268
},
{
"epoch": 1.98,
"learning_rate": 1.774888321633695e-06,
"loss": 0.478,
"step": 4272
},
{
"epoch": 1.98,
"learning_rate": 1.7716975111678367e-06,
"loss": 0.3405,
"step": 4276
},
{
"epoch": 1.99,
"learning_rate": 1.7685067007019786e-06,
"loss": 0.2038,
"step": 4280
},
{
"epoch": 1.99,
"learning_rate": 1.76531589023612e-06,
"loss": 0.2301,
"step": 4284
},
{
"epoch": 1.99,
"learning_rate": 1.762125079770262e-06,
"loss": 0.3283,
"step": 4288
},
{
"epoch": 1.99,
"learning_rate": 1.7589342693044034e-06,
"loss": 0.1711,
"step": 4292
},
{
"epoch": 1.99,
"learning_rate": 1.7557434588385453e-06,
"loss": 0.241,
"step": 4296
},
{
"epoch": 1.99,
"learning_rate": 1.7525526483726868e-06,
"loss": 0.2408,
"step": 4300
}
],
"logging_steps": 4,
"max_steps": 6468,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"total_flos": 43550404509696.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}