diff --git "a/trainer_state.json" "b/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/trainer_state.json"
@@ -0,0 +1,19224 @@
+{
+  "best_metric": 1.806269645690918,
+  "best_model_checkpoint": "gpt_alpaca_gpt4/checkpoint-31880",
+  "epoch": 10.0,
+  "global_step": 31880,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 9.996863237139272e-06,
+      "loss": 72.7405,
+      "step": 10
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 9.993726474278545e-06,
+      "loss": 18.2176,
+      "step": 20
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 9.990589711417818e-06,
+      "loss": 5.3223,
+      "step": 30
+    },
+    {
+      "epoch": 0.01,
+      "learning_rate": 9.98745294855709e-06,
+      "loss": 4.4019,
+      "step": 40
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 9.984316185696362e-06,
+      "loss": 3.6758,
+      "step": 50
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 9.981179422835635e-06,
+      "loss": 3.4823,
+      "step": 60
+    },
+    {
+      "epoch": 0.02,
+      "learning_rate": 9.978042659974908e-06,
+      "loss": 3.024,
+      "step": 70
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 9.974905897114179e-06,
+      "loss": 2.9245,
+      "step": 80
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 9.971769134253452e-06,
+      "loss": 2.7949,
+      "step": 90
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 9.968632371392723e-06,
+      "loss": 2.7182,
+      "step": 100
+    },
+    {
+      "epoch": 0.03,
+      "learning_rate": 9.965495608531996e-06,
+      "loss": 2.7099,
+      "step": 110
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 9.962358845671269e-06,
+      "loss": 2.565,
+      "step": 120
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 9.95922208281054e-06,
+      "loss": 2.5437,
+      "step": 130
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 9.956085319949813e-06,
+      "loss": 2.4238,
+      "step": 140
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 9.952948557089086e-06,
+      "loss": 2.538,
+      "step": 150
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 9.949811794228357e-06,
+      "loss": 2.4926,
+      "step": 160
+    },
+    {
+      "epoch": 0.05,
+      "learning_rate": 9.94667503136763e-06,
+      "loss": 2.4414,
+      "step": 170
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 9.943538268506901e-06,
+      "loss": 2.4165,
+      "step": 180
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 9.940401505646174e-06,
+      "loss": 2.4311,
+      "step": 190
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 9.937264742785447e-06,
+      "loss": 2.461,
+      "step": 200
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 9.934127979924718e-06,
+      "loss": 2.4626,
+      "step": 210
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 9.930991217063991e-06,
+      "loss": 2.4015,
+      "step": 220
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 9.927854454203262e-06,
+      "loss": 2.4421,
+      "step": 230
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 9.924717691342535e-06,
+      "loss": 2.3519,
+      "step": 240
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 9.921580928481808e-06,
+      "loss": 2.4021,
+      "step": 250
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 9.918444165621079e-06,
+      "loss": 2.363,
+      "step": 260
+    },
+    {
+      "epoch": 0.08,
+      "learning_rate": 9.915307402760352e-06,
+      "loss": 2.3165,
+      "step": 270
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 9.912170639899625e-06,
+      "loss": 2.3224,
+      "step": 280
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 9.909033877038898e-06,
+      "loss": 2.4254,
+      "step": 290
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 9.905897114178169e-06,
+      "loss": 2.3419,
+      "step": 300
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 9.90276035131744e-06,
+      "loss": 2.3501,
+      "step": 310
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 9.899623588456713e-06,
+      "loss": 2.3411,
+      "step": 320
+    },
+    {
+      "epoch": 0.1,
+      "learning_rate": 9.896486825595986e-06,
+      "loss": 2.3303,
+      "step": 330
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 9.893350062735259e-06,
+      "loss": 2.342,
+      "step": 340
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 9.89021329987453e-06,
+      "loss": 2.3027,
+      "step": 350
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 9.887076537013803e-06,
+      "loss": 2.3274,
+      "step": 360
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 9.883939774153076e-06,
+      "loss": 2.3229,
+      "step": 370
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 9.880803011292347e-06,
+      "loss": 2.273,
+      "step": 380
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 9.87766624843162e-06,
+      "loss": 2.3415,
+      "step": 390
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 9.874529485570891e-06,
+      "loss": 2.2101,
+      "step": 400
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 9.871392722710164e-06,
+      "loss": 2.3486,
+      "step": 410
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 9.868255959849437e-06,
+      "loss": 2.2376,
+      "step": 420
+    },
+    {
+      "epoch": 0.13,
+      "learning_rate": 9.865119196988708e-06,
+      "loss": 2.1976,
+      "step": 430
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 9.861982434127981e-06,
+      "loss": 2.2299,
+      "step": 440
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 9.858845671267254e-06,
+      "loss": 2.2861,
+      "step": 450
+    },
+    {
+      "epoch": 0.14,
+      "learning_rate": 9.855708908406525e-06,
+      "loss": 2.2025,
+      "step": 460
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 9.852572145545798e-06,
+      "loss": 2.2542,
+      "step": 470
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 9.849435382685069e-06,
+      "loss": 2.3063,
+      "step": 480
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 9.846298619824342e-06,
+      "loss": 2.2572,
+      "step": 490
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 9.843161856963615e-06,
+      "loss": 2.1989,
+      "step": 500
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 9.840025094102886e-06,
+      "loss": 2.2597,
+      "step": 510
+    },
+    {
+      "epoch": 0.16,
+      "learning_rate": 9.836888331242159e-06,
+      "loss": 2.2688,
+      "step": 520
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 9.833751568381432e-06,
+      "loss": 2.2682,
+      "step": 530
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 9.830614805520703e-06,
+      "loss": 2.2296,
+      "step": 540
+    },
+    {
+      "epoch": 0.17,
+      "learning_rate": 9.827478042659976e-06,
+      "loss": 2.3093,
+      "step": 550
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 9.824341279799247e-06,
+      "loss": 2.2688,
+      "step": 560
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 9.82120451693852e-06,
+      "loss": 2.195,
+      "step": 570
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 9.818067754077793e-06,
+      "loss": 2.1817,
+      "step": 580
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 9.814930991217064e-06,
+      "loss": 2.3147,
+      "step": 590
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 9.811794228356337e-06,
+      "loss": 2.2445,
+      "step": 600
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 9.808657465495608e-06,
+      "loss": 2.2774,
+      "step": 610
+    },
+    {
+      "epoch": 0.19,
+      "learning_rate": 9.805520702634881e-06,
+      "loss": 2.107,
+      "step": 620
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 9.802383939774154e-06,
+      "loss": 2.284,
+      "step": 630
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 9.799247176913425e-06,
+      "loss": 2.2508,
+      "step": 640
+    },
+    {
+      "epoch": 0.2,
+      "learning_rate": 9.796110414052698e-06,
+      "loss": 2.2765,
+      "step": 650
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 9.792973651191971e-06,
+      "loss": 2.3185,
+      "step": 660
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 9.789836888331244e-06,
+      "loss": 2.2105,
+      "step": 670
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 9.786700125470515e-06,
+      "loss": 2.1659,
+      "step": 680
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 9.783563362609786e-06,
+      "loss": 2.189,
+      "step": 690
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 9.78042659974906e-06,
+      "loss": 2.223,
+      "step": 700
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 9.777289836888332e-06,
+      "loss": 2.2449,
+      "step": 710
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 9.774153074027605e-06,
+      "loss": 2.2252,
+      "step": 720
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 9.771016311166876e-06,
+      "loss": 2.1512,
+      "step": 730
+    },
+    {
+      "epoch": 0.23,
+      "learning_rate": 9.767879548306149e-06,
+      "loss": 2.1801,
+      "step": 740
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 9.764742785445422e-06,
+      "loss": 2.2061,
+      "step": 750
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 9.761606022584693e-06,
+      "loss": 2.2141,
+      "step": 760
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 9.758469259723966e-06,
+      "loss": 2.1549,
+      "step": 770
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 9.755332496863237e-06,
+      "loss": 2.2603,
+      "step": 780
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 9.75219573400251e-06,
+      "loss": 2.2523,
+      "step": 790
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 9.749058971141783e-06,
+      "loss": 2.1641,
+      "step": 800
+    },
+    {
+      "epoch": 0.25,
+      "learning_rate": 9.745922208281054e-06,
+      "loss": 2.1834,
+      "step": 810
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 9.742785445420327e-06,
+      "loss": 2.2233,
+      "step": 820
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 9.7396486825596e-06,
+      "loss": 2.1291,
+      "step": 830
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 9.736511919698871e-06,
+      "loss": 2.1742,
+      "step": 840
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 9.733375156838144e-06,
+      "loss": 2.2465,
+      "step": 850
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 9.730238393977415e-06,
+      "loss": 2.1936,
+      "step": 860
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 9.727101631116688e-06,
+      "loss": 2.1433,
+      "step": 870
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 9.723964868255961e-06,
+      "loss": 2.1827,
+      "step": 880
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 9.720828105395232e-06,
+      "loss": 2.2212,
+      "step": 890
+    },
+    {
+      "epoch": 0.28,
+      "learning_rate": 9.717691342534505e-06,
+      "loss": 2.1738,
+      "step": 900
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 9.714554579673778e-06,
+      "loss": 2.13,
+      "step": 910
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 9.711417816813051e-06,
+      "loss": 2.2494,
+      "step": 920
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 9.708281053952322e-06,
+      "loss": 2.1904,
+      "step": 930
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 9.705144291091593e-06,
+      "loss": 2.2029,
+      "step": 940
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 9.702007528230866e-06,
+      "loss": 2.1897,
+      "step": 950
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 9.69887076537014e-06,
+      "loss": 2.2508,
+      "step": 960
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 9.695734002509412e-06,
+      "loss": 2.1216,
+      "step": 970
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 9.692597239648683e-06,
+      "loss": 2.2157,
+      "step": 980
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 9.689460476787954e-06,
+      "loss": 2.2398,
+      "step": 990
+    },
+    {
+      "epoch": 0.31,
+      "learning_rate": 9.686323713927227e-06,
+      "loss": 2.2389,
+      "step": 1000
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 9.6831869510665e-06,
+      "loss": 2.1701,
+      "step": 1010
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 9.680050188205773e-06,
+      "loss": 2.0708,
+      "step": 1020
+    },
+    {
+      "epoch": 0.32,
+      "learning_rate": 9.676913425345044e-06,
+      "loss": 2.1543,
+      "step": 1030
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 9.673776662484317e-06,
+      "loss": 2.1717,
+      "step": 1040
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 9.67063989962359e-06,
+      "loss": 2.2078,
+      "step": 1050
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 9.667503136762861e-06,
+      "loss": 2.2218,
+      "step": 1060
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 9.664366373902134e-06,
+      "loss": 2.177,
+      "step": 1070
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 9.661229611041405e-06,
+      "loss": 2.1876,
+      "step": 1080
+    },
+    {
+      "epoch": 0.34,
+      "learning_rate": 9.658092848180678e-06,
+      "loss": 2.2374,
+      "step": 1090
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 9.654956085319951e-06,
+      "loss": 2.1146,
+      "step": 1100
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 9.651819322459222e-06,
+      "loss": 2.2132,
+      "step": 1110
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 9.648682559598495e-06,
+      "loss": 2.1513,
+      "step": 1120
+    },
+    {
+      "epoch": 0.35,
+      "learning_rate": 9.645545796737768e-06,
+      "loss": 2.2177,
+      "step": 1130
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 9.64240903387704e-06,
+      "loss": 2.1884,
+      "step": 1140
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 9.639272271016312e-06,
+      "loss": 2.1925,
+      "step": 1150
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 9.636135508155583e-06,
+      "loss": 2.2103,
+      "step": 1160
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 9.632998745294856e-06,
+      "loss": 2.2355,
+      "step": 1170
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 9.62986198243413e-06,
+      "loss": 2.1594,
+      "step": 1180
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 9.6267252195734e-06,
+      "loss": 2.1985,
+      "step": 1190
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 9.623588456712673e-06,
+      "loss": 2.1001,
+      "step": 1200
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 9.620451693851946e-06,
+      "loss": 2.1757,
+      "step": 1210
+    },
+    {
+      "epoch": 0.38,
+      "learning_rate": 9.617314930991219e-06,
+      "loss": 2.2362,
+      "step": 1220
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 9.61417816813049e-06,
+      "loss": 2.1813,
+      "step": 1230
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 9.611041405269761e-06,
+      "loss": 2.1573,
+      "step": 1240
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 9.607904642409034e-06,
+      "loss": 2.2222,
+      "step": 1250
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 9.604767879548307e-06,
+      "loss": 2.1565,
+      "step": 1260
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 9.60163111668758e-06,
+      "loss": 2.1788,
+      "step": 1270
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 9.598494353826851e-06,
+      "loss": 2.1476,
+      "step": 1280
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 9.595357590966123e-06,
+      "loss": 2.1585,
+      "step": 1290
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 9.592220828105397e-06,
+      "loss": 2.1576,
+      "step": 1300
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 9.589084065244668e-06,
+      "loss": 2.1962,
+      "step": 1310
+    },
+    {
+      "epoch": 0.41,
+      "learning_rate": 9.585947302383941e-06,
+      "loss": 2.1579,
+      "step": 1320
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 9.582810539523212e-06,
+      "loss": 2.1679,
+      "step": 1330
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 9.579673776662485e-06,
+      "loss": 2.0721,
+      "step": 1340
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 9.576537013801758e-06,
+      "loss": 2.1763,
+      "step": 1350
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 9.57340025094103e-06,
+      "loss": 2.1384,
+      "step": 1360
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 9.5702634880803e-06,
+      "loss": 2.1536,
+      "step": 1370
+    },
+    {
+      "epoch": 0.43,
+      "learning_rate": 9.567126725219574e-06,
+      "loss": 2.1527,
+      "step": 1380
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 9.563989962358846e-06,
+      "loss": 2.1347,
+      "step": 1390
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 9.56085319949812e-06,
+      "loss": 2.0246,
+      "step": 1400
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 9.55771643663739e-06,
+      "loss": 2.1478,
+      "step": 1410
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 9.554579673776663e-06,
+      "loss": 2.1347,
+      "step": 1420
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 9.551442910915936e-06,
+      "loss": 2.109,
+      "step": 1430
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 9.548306148055207e-06,
+      "loss": 2.1055,
+      "step": 1440
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 9.54516938519448e-06,
+      "loss": 2.1315,
+      "step": 1450
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 9.542032622333752e-06,
+      "loss": 2.0917,
+      "step": 1460
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 9.538895859473024e-06,
+      "loss": 2.0579,
+      "step": 1470
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 9.535759096612297e-06,
+      "loss": 2.0573,
+      "step": 1480
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 9.532622333751569e-06,
+      "loss": 2.1078,
+      "step": 1490
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 9.529485570890841e-06,
+      "loss": 2.1503,
+      "step": 1500
+    },
+    {
+      "epoch": 0.47,
+      "learning_rate": 9.526348808030114e-06,
+      "loss": 2.0821,
+      "step": 1510
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 9.523212045169386e-06,
+      "loss": 2.1639,
+      "step": 1520
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 9.520075282308658e-06,
+      "loss": 2.1036,
+      "step": 1530
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 9.51693851944793e-06,
+      "loss": 2.0536,
+      "step": 1540
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 9.513801756587203e-06,
+      "loss": 2.1113,
+      "step": 1550
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 9.510664993726475e-06,
+      "loss": 2.1774,
+      "step": 1560
+    },
+    {
+      "epoch": 0.49,
+      "learning_rate": 9.507528230865747e-06,
+      "loss": 2.0827,
+      "step": 1570
+    },
+    {
+      "epoch": 0.5,
+      "learning_rate": 9.50439146800502e-06,
+      "loss": 2.1706,
+      "step": 1580
+    },
+    {
+      "epoch": 0.5,
+      "learning_rate": 9.501254705144292e-06,
+      "loss": 2.1416,
+      "step": 1590
+    },
+    {
+      "epoch": 0.5,
+      "learning_rate": 9.498117942283565e-06,
+      "loss": 2.0689,
+      "step": 1600
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 9.494981179422836e-06,
+      "loss": 2.1223,
+      "step": 1610
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 9.491844416562108e-06,
+      "loss": 2.1743,
+      "step": 1620
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 9.48870765370138e-06,
+      "loss": 2.1426,
+      "step": 1630
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 9.485570890840653e-06,
+      "loss": 2.0827,
+      "step": 1640
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 9.482434127979926e-06,
+      "loss": 2.0918,
+      "step": 1650
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 9.479297365119198e-06,
+      "loss": 2.1753,
+      "step": 1660
+    },
+    {
+      "epoch": 0.52,
+      "learning_rate": 9.476160602258469e-06,
+      "loss": 2.1543,
+      "step": 1670
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 9.473023839397743e-06,
+      "loss": 2.1697,
+      "step": 1680
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 9.469887076537015e-06,
+      "loss": 2.1075,
+      "step": 1690
+    },
+    {
+      "epoch": 0.53,
+      "learning_rate": 9.466750313676287e-06,
+      "loss": 2.1636,
+      "step": 1700
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 9.463613550815559e-06,
+      "loss": 2.219,
+      "step": 1710
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 9.460476787954832e-06,
+      "loss": 2.1835,
+      "step": 1720
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 9.457340025094104e-06,
+      "loss": 2.1288,
+      "step": 1730
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 9.454203262233376e-06,
+      "loss": 2.218,
+      "step": 1740
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 9.451066499372648e-06,
+      "loss": 2.19,
+      "step": 1750
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 9.44792973651192e-06,
+      "loss": 2.0468,
+      "step": 1760
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 9.444792973651193e-06,
+      "loss": 2.0883,
+      "step": 1770
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 9.441656210790465e-06,
+      "loss": 2.1965,
+      "step": 1780
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 9.438519447929737e-06,
+      "loss": 2.1823,
+      "step": 1790
+    },
+    {
+      "epoch": 0.56,
+      "learning_rate": 9.43538268506901e-06,
+      "loss": 2.1347,
+      "step": 1800
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 9.432245922208282e-06,
+      "loss": 2.1487,
+      "step": 1810
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 9.429109159347554e-06,
+      "loss": 2.1439,
+      "step": 1820
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 9.425972396486827e-06,
+      "loss": 2.0782,
+      "step": 1830
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 9.422835633626098e-06,
+      "loss": 2.1454,
+      "step": 1840
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 9.41969887076537e-06,
+      "loss": 2.1088,
+      "step": 1850
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 9.416562107904644e-06,
+      "loss": 2.1606,
+      "step": 1860
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 9.413425345043915e-06,
+      "loss": 2.1161,
+      "step": 1870
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 9.410288582183188e-06,
+      "loss": 2.0404,
+      "step": 1880
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 9.40715181932246e-06,
+      "loss": 2.0949,
+      "step": 1890
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 9.404015056461733e-06,
+      "loss": 2.1427,
+      "step": 1900
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 9.400878293601005e-06,
+      "loss": 2.2431,
+      "step": 1910
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 9.397741530740276e-06,
+      "loss": 2.1508,
+      "step": 1920
+    },
+    {
+      "epoch": 0.61,
+      "learning_rate": 9.394604767879549e-06,
+      "loss": 2.1498,
+      "step": 1930
+    },
+    {
+      "epoch": 0.61,
+      "learning_rate": 9.391468005018822e-06,
+      "loss": 2.1357,
+      "step": 1940
+    },
+    {
+      "epoch": 0.61,
+      "learning_rate": 9.388331242158094e-06,
+      "loss": 2.122,
+      "step": 1950
+    },
+    {
+      "epoch": 0.61,
+      "learning_rate": 9.385194479297366e-06,
+      "loss": 2.1007,
+      "step": 1960
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 9.382057716436639e-06,
+      "loss": 2.1127,
+      "step": 1970
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 9.378920953575911e-06,
+      "loss": 2.061,
+      "step": 1980
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 9.375784190715183e-06,
+      "loss": 2.1258,
+      "step": 1990
+    },
+    {
+      "epoch": 0.63,
+      "learning_rate": 9.372647427854456e-06,
+      "loss": 2.1015,
+      "step": 2000
+    },
+    {
+      "epoch": 0.63,
+      "learning_rate": 9.369510664993727e-06,
+      "loss": 2.103,
+      "step": 2010
+    },
+    {
+      "epoch": 0.63,
+      "learning_rate": 9.366373902133e-06,
+      "loss": 2.1019,
+      "step": 2020
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 9.363237139272273e-06,
+      "loss": 2.0751,
+      "step": 2030
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 9.360100376411544e-06,
+      "loss": 2.1114,
+      "step": 2040
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 9.356963613550817e-06,
+      "loss": 2.1199,
+      "step": 2050
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 9.35382685069009e-06,
+      "loss": 2.1365,
+      "step": 2060
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 9.35069008782936e-06,
+      "loss": 2.1187,
+      "step": 2070
+    },
+    {
+      "epoch": 0.65,
+      "learning_rate": 9.347553324968634e-06,
+      "loss": 2.0958,
+      "step": 2080
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 9.344416562107905e-06,
+      "loss": 2.0829,
+      "step": 2090
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 9.341279799247178e-06,
+      "loss": 2.1963,
+      "step": 2100
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 9.33814303638645e-06,
+      "loss": 2.0786,
+      "step": 2110
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 9.335006273525722e-06,
+      "loss": 2.1495,
+      "step": 2120
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 9.331869510664995e-06,
+      "loss": 2.0716,
+      "step": 2130
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 9.328732747804266e-06,
+      "loss": 2.1562,
+      "step": 2140
+    },
+    {
+      "epoch": 0.67,
+      "learning_rate": 9.325595984943539e-06,
+      "loss": 2.148,
+      "step": 2150
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 9.322459222082812e-06,
+      "loss": 2.0868,
+      "step": 2160
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 9.319322459222083e-06,
+      "loss": 2.0862,
+      "step": 2170
+    },
+    {
+      "epoch": 0.68,
+      "learning_rate": 9.316185696361356e-06,
+      "loss": 2.019,
+      "step": 2180
+    },
+    {
+      "epoch": 0.69,
+      "learning_rate": 9.313048933500629e-06,
+      "loss": 2.1371,
+      "step": 2190
+    },
+    {
+      "epoch": 0.69,
+      "learning_rate": 9.309912170639902e-06,
+      "loss": 2.1355,
+      "step": 2200
+    },
+    {
+      "epoch": 0.69,
+      "learning_rate": 9.306775407779173e-06,
+      "loss": 2.0676,
+      "step": 2210
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 9.303638644918444e-06,
+      "loss": 2.0913,
+      "step": 2220
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 9.300501882057717e-06,
+      "loss": 2.0628,
+      "step": 2230
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 9.29736511919699e-06,
+      "loss": 2.1189,
+      "step": 2240
+    },
+    {
+      "epoch": 0.71,
+      "learning_rate": 9.294228356336261e-06,
+      "loss": 2.0403,
+      "step": 2250
+    },
+    {
+      "epoch": 0.71,
+      "learning_rate": 9.291091593475534e-06,
+      "loss": 2.1222,
+      "step": 2260
+    },
+    {
+      "epoch": 0.71,
+      "learning_rate": 9.287954830614807e-06,
+      "loss": 2.1159,
+      "step": 2270
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 9.28481806775408e-06,
+      "loss": 2.0344,
+      "step": 2280
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 9.28168130489335e-06,
+      "loss": 2.079,
+      "step": 2290
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 9.278544542032622e-06,
+      "loss": 2.1272,
+      "step": 2300
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 9.275407779171895e-06,
+      "loss": 2.1058,
+      "step": 2310
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 9.272271016311168e-06,
+      "loss": 2.1079,
+      "step": 2320
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 9.26913425345044e-06,
+      "loss": 2.1314,
+      "step": 2330
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 9.265997490589712e-06,
+      "loss": 2.0434,
+      "step": 2340
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 9.262860727728985e-06,
+      "loss": 2.1497,
+      "step": 2350
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 9.259723964868258e-06,
+      "loss": 2.0986,
+      "step": 2360
+    },
+    {
+      "epoch": 0.74,
+      "learning_rate": 9.256587202007529e-06,
+      "loss": 2.1581,
+      "step": 2370
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 9.253450439146802e-06,
+      "loss": 2.0301,
+      "step": 2380
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 9.250313676286073e-06,
+      "loss": 2.0729,
+      "step": 2390
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 9.247176913425346e-06,
+      "loss": 2.0853,
+      "step": 2400
+    },
+    {
+      "epoch": 0.76,
+      "learning_rate": 9.244040150564619e-06,
+      "loss": 2.0289,
+      "step": 2410
+    },
+    {
+      "epoch": 0.76,
+      "learning_rate": 9.24090338770389e-06,
+      "loss": 2.0674,
+      "step": 2420
+    },
+    {
+      "epoch": 0.76,
+      "learning_rate": 9.237766624843163e-06,
+      "loss": 2.1135,
+      "step": 2430
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 9.234629861982434e-06,
+      "loss": 2.0942,
+      "step": 2440
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 9.231493099121707e-06,
+      "loss": 2.0733,
+      "step": 2450
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 9.22835633626098e-06,
+      "loss": 2.0782,
+      "step": 2460
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 9.225219573400251e-06,
+      "loss": 2.0589,
+      "step": 2470
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 9.222082810539524e-06,
+      "loss": 2.114,
+      "step": 2480
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 9.218946047678797e-06,
+      "loss": 2.1313,
+      "step": 2490
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 9.215809284818068e-06,
+      "loss": 2.1386,
+      "step": 2500
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 9.21267252195734e-06,
+      "loss": 2.0737,
+      "step": 2510
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 9.209535759096612e-06,
+      "loss": 2.0333,
+      "step": 2520
+    },
+    {
+      "epoch": 0.79,
+      "learning_rate": 9.206398996235885e-06,
+      "loss": 2.1316,
+      "step": 2530
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 9.203262233375158e-06,
+      "loss": 2.1812,
+      "step": 2540
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 9.200125470514429e-06,
+      "loss": 2.1846,
+      "step": 2550
+    },
+    {
+      "epoch": 0.8,
+      "learning_rate": 9.196988707653702e-06,
+      "loss": 1.9971,
+      "step": 2560
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 9.193851944792975e-06,
+      "loss": 2.0572,
+      "step": 2570
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 9.190715181932248e-06,
+      "loss": 2.0798,
+      "step": 2580
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 9.187578419071519e-06,
+      "loss": 2.1017,
+      "step": 2590
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 9.18444165621079e-06,
+      "loss": 2.062,
+      "step": 2600
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 9.181304893350063e-06,
+      "loss": 2.0854,
+      "step": 2610
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 9.178168130489336e-06,
+      "loss": 2.0734,
+      "step": 2620
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 9.175031367628609e-06,
+      "loss": 2.1468,
+      "step": 2630
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 9.17189460476788e-06,
+      "loss": 2.0528,
+      "step": 2640
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 9.168757841907153e-06,
+      "loss": 2.0246,
+      "step": 2650
+    },
+    {
+      "epoch": 0.83,
+      "learning_rate": 9.165621079046426e-06,
+      "loss": 2.0171,
+      "step": 2660
+    },
+    {
+      "epoch": 0.84,
+      "learning_rate": 9.162484316185697e-06,
+      "loss": 2.0079,
+      "step": 2670
+    },
+    {
+      "epoch": 0.84,
+      "learning_rate": 9.15934755332497e-06,
+      "loss": 2.0205,
+      "step": 2680
+    },
+    {
+      "epoch": 0.84,
+      "learning_rate": 9.156210790464241e-06,
+      "loss": 2.021,
+      "step": 2690
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 9.153074027603514e-06,
+      "loss": 2.1194,
+      "step": 2700
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 9.149937264742787e-06,
+      "loss": 2.1149,
+      "step": 2710
+    },
+    {
+      "epoch": 0.85,
+      "learning_rate": 9.146800501882058e-06,
+      "loss": 2.0479,
+      "step": 2720
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 9.143663739021331e-06,
+      "loss": 2.0321,
+      "step": 2730
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 9.140526976160604e-06,
+      "loss": 2.0622,
+      "step": 2740
+    },
+    {
+      "epoch": 0.86,
+      "learning_rate": 9.137390213299875e-06,
+      "loss": 2.0104,
+      "step": 2750
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 9.134253450439148e-06,
+      "loss": 2.0323,
+      "step": 2760
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 9.131116687578419e-06,
+      "loss": 2.0314,
+      "step": 2770
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 9.127979924717692e-06,
+      "loss": 2.0728,
+      "step": 2780
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 9.124843161856965e-06,
+      "loss": 1.9149,
+      "step": 2790
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 9.121706398996236e-06,
+      "loss": 2.1232,
+      "step": 2800
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 9.118569636135509e-06,
+      "loss": 2.1028,
+      "step": 2810
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 9.11543287327478e-06,
+      "loss": 2.1481,
+      "step": 2820
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 9.112296110414055e-06,
+      "loss": 2.0296,
+      "step": 2830
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 9.109159347553326e-06,
+      "loss": 2.108,
+      "step": 2840
+    },
+    {
+      "epoch": 0.89,
+      "learning_rate": 9.106022584692597e-06,
+      "loss": 2.0785,
+      "step": 2850
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 9.10288582183187e-06,
+      "loss": 2.0647,
+      "step": 2860
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 9.099749058971143e-06,
+      "loss": 2.0573,
+      "step": 2870
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 9.096612296110416e-06,
+      "loss": 2.008,
+      "step": 2880
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 9.093475533249687e-06,
+      "loss": 2.1762,
+      "step": 2890
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 9.090338770388958e-06,
+      "loss": 2.0266,
+      "step": 2900
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 9.087202007528231e-06,
+      "loss": 1.9967,
+      "step": 2910
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 9.084065244667504e-06,
+      "loss": 2.1213,
+      "step": 2920
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 9.080928481806777e-06,
+      "loss": 2.0734,
+      "step": 2930
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 9.077791718946048e-06,
+      "loss": 2.0931,
+      "step": 2940
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 9.074654956085321e-06,
+      "loss": 2.1039,
+      "step": 2950
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 9.071518193224594e-06,
+      "loss": 2.031,
+      "step": 2960
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 9.068381430363865e-06,
+      "loss": 2.1349,
+      "step": 2970
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 9.065244667503138e-06,
+      "loss": 2.1053,
+      "step": 2980
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 9.06210790464241e-06,
+      "loss": 2.1441,
+      "step": 2990
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 9.058971141781682e-06,
+      "loss": 2.1022,
+      "step": 3000
+    },
+    {
+      "epoch": 0.94,
+      "learning_rate": 9.055834378920955e-06,
+      "loss": 2.0802,
+      "step": 3010
+    },
+    {
+      "epoch": 0.95,
+      "learning_rate": 9.052697616060226e-06,
+      "loss": 2.1117,
+      "step": 3020
+    },
+    {
+      "epoch": 0.95,
+      "learning_rate": 9.049560853199499e-06,
+      "loss": 2.1069,
+      "step": 3030
+    },
+    {
+      "epoch": 0.95,
+      "learning_rate": 9.046424090338772e-06,
+      "loss": 2.0744,
+      "step": 3040
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 9.043287327478043e-06,
+      "loss": 2.0427,
+      "step": 3050
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 9.040150564617316e-06,
+      "loss": 2.0614,
+      "step": 3060
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 9.037013801756587e-06,
+      "loss": 2.1204,
+      "step": 3070
+    },
+    {
+      "epoch": 0.97,
+      "learning_rate": 9.03387703889586e-06,
+      "loss": 2.0268,
+      "step": 3080
+    },
+    {
+      "epoch": 0.97,
+      "learning_rate": 9.030740276035133e-06,
+      "loss": 2.0976,
+      "step": 3090
+    },
+    {
+      "epoch": 0.97,
+      "learning_rate": 9.027603513174404e-06,
+      "loss": 2.0615,
+      "step": 3100
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 9.024466750313677e-06,
+      "loss": 2.0721,
+      "step": 3110
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 9.02132998745295e-06,
+      "loss": 2.0986,
+      "step": 3120
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 9.018193224592221e-06,
+      "loss": 2.0805,
+      "step": 3130
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 9.015056461731494e-06,
+      "loss": 2.0576,
+      "step": 3140
+    },
+    {
+      "epoch": 0.99,
+      "learning_rate": 9.011919698870765e-06,
+      "loss": 2.0693,
+      "step": 3150
+    },
+    {
+      "epoch": 0.99,
+      "learning_rate": 9.008782936010038e-06,
+      "loss": 2.0951,
+      "step": 3160
+    },
+    {
+      "epoch": 0.99,
+      "learning_rate": 9.005646173149311e-06,
+      "loss": 2.067,
+      "step": 3170
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 9.002509410288582e-06,
+      "loss": 2.0476,
+      "step": 3180
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.9344778060913086,
+      "eval_runtime": 13.6121,
+      "eval_samples_per_second": 73.464,
+      "eval_steps_per_second": 4.628,
+      "step": 3188
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 8.999372647427855e-06,
+      "loss": 2.0991,
+      "step": 3190
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 8.996235884567126e-06,
+      "loss": 2.1577,
+      "step": 3200
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 8.993099121706401e-06,
+      "loss": 2.0382,
+      "step": 3210
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 8.989962358845672e-06,
+      "loss": 2.0758,
+      "step": 3220
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 8.986825595984943e-06,
+      "loss": 2.0445,
+      "step": 3230
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 8.983688833124216e-06,
+      "loss": 1.9499,
+      "step": 3240
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 8.980552070263489e-06,
+      "loss": 2.0883,
+      "step": 3250
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 8.977415307402762e-06,
+      "loss": 2.1311,
+      "step": 3260
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 8.974278544542033e-06,
+      "loss": 2.0249,
+      "step": 3270
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 8.971141781681304e-06,
+      "loss": 2.0753,
+      "step": 3280
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 8.968005018820577e-06,
+      "loss": 2.0507,
+      "step": 3290
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.96486825595985e-06,
+      "loss": 2.0353,
+      "step": 3300
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.961731493099123e-06,
+      "loss": 2.0154,
+      "step": 3310
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.958594730238394e-06,
+      "loss": 2.035,
+      "step": 3320
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 8.955457967377667e-06,
+      "loss": 2.0578,
+      "step": 3330
+    },
+    {
+      "epoch": 1.05,
+      "learning_rate": 8.95232120451694e-06,
+      "loss": 1.9501,
+      "step": 3340
+    },
+    {
+      "epoch": 1.05,
+      "learning_rate": 8.949184441656211e-06,
+      "loss": 1.9773,
+      "step": 3350
+    },
+    {
+      "epoch": 1.05,
+      "learning_rate": 8.946047678795484e-06,
+      "loss": 2.0441,
+      "step": 3360
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 8.942910915934755e-06,
+      "loss": 2.1017,
+      "step": 3370
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 8.939774153074028e-06,
+      "loss": 2.0324,
+      "step": 3380
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 8.936637390213301e-06,
+      "loss": 2.1506,
+      "step": 3390
+    },
+    {
+      "epoch": 1.07,
+      "learning_rate": 8.933500627352572e-06,
+      "loss": 2.0552,
+      "step": 3400
+    },
+    {
+      "epoch": 1.07,
+      "learning_rate": 8.930363864491845e-06,
+      "loss": 2.031,
+      "step": 3410
+    },
+    {
+      "epoch": 1.07,
+      "learning_rate": 8.927227101631118e-06,
+      "loss": 1.9732,
+      "step": 3420
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 8.92409033877039e-06,
+      "loss": 2.02,
+      "step": 3430
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 8.920953575909662e-06,
+      "loss": 2.0123,
+      "step": 3440
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 8.917816813048933e-06,
+      "loss": 2.0242,
+      "step": 3450
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 8.914680050188206e-06,
+      "loss": 2.0093,
+      "step": 3460
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 8.91154328732748e-06,
+      "loss": 2.0639,
+      "step": 3470
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 8.90840652446675e-06,
+      "loss": 2.0396,
+      "step": 3480
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 8.905269761606023e-06,
+      "loss": 2.0431,
+      "step": 3490
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 8.902132998745296e-06,
+      "loss": 1.9602,
+      "step": 3500
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 8.898996235884569e-06,
+      "loss": 2.0483,
+      "step": 3510
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 8.89585947302384e-06,
+      "loss": 2.0904,
+      "step": 3520
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 8.892722710163111e-06,
+      "loss": 2.0676,
+      "step": 3530
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 8.889585947302384e-06,
+      "loss": 2.095,
+      "step": 3540
+    },
+    {
+      "epoch": 1.11,
+      "learning_rate": 8.886449184441657e-06,
+      "loss": 2.0368,
+      "step": 3550
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 8.88331242158093e-06,
+      "loss": 1.9903,
+      "step": 3560
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 8.880175658720201e-06,
+      "loss": 2.0505,
+      "step": 3570
+    },
+    {
+      "epoch": 1.12,
+      "learning_rate": 8.877038895859473e-06,
+      "loss": 2.0756,
+      "step": 3580
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 8.873902132998745e-06,
+      "loss": 2.0298,
+      "step": 3590
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 8.870765370138018e-06,
+      "loss": 2.1276,
+      "step": 3600
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 8.867628607277291e-06,
+      "loss": 1.9594,
+      "step": 3610
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 8.864491844416562e-06,
+      "loss": 2.047,
+      "step": 3620
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 8.861355081555835e-06,
+      "loss": 2.056,
+      "step": 3630
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 8.858218318695108e-06,
+      "loss": 2.0638,
+      "step": 3640
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 8.85508155583438e-06,
+      "loss": 2.0316,
+      "step": 3650
+    },
+    {
+      "epoch": 1.15,
+      "learning_rate": 8.851944792973652e-06,
+      "loss": 2.0345,
+      "step": 3660
+    },
+    {
+      "epoch": 1.15,
+      "learning_rate": 8.848808030112923e-06,
+      "loss": 2.0186,
+      "step": 3670
+    },
+    {
+      "epoch": 1.15,
+      "learning_rate": 8.845671267252196e-06,
+      "loss": 2.0418,
+      "step": 3680
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 8.84253450439147e-06,
+      "loss": 1.9689,
+      "step": 3690
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 8.83939774153074e-06,
+      "loss": 2.0088,
+      "step": 3700
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 8.836260978670013e-06,
+      "loss": 2.0619,
+      "step": 3710
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 8.833124215809286e-06,
+      "loss": 2.0637,
+      "step": 3720
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 8.829987452948557e-06,
+      "loss": 1.9639,
+      "step": 3730
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 8.82685069008783e-06,
+      "loss": 2.02,
+      "step": 3740
+    },
+    {
+      "epoch": 1.18,
+      "learning_rate": 8.823713927227102e-06,
+      "loss": 1.9772,
+      "step": 3750
+    },
+    {
+      "epoch": 1.18,
+      "learning_rate": 8.820577164366374e-06,
+      "loss": 1.9963,
+      "step": 3760
+    },
+    {
+      "epoch": 1.18,
+      "learning_rate": 8.817440401505647e-06,
+      "loss": 2.0384,
+      "step": 3770
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 8.814303638644919e-06,
+      "loss": 2.1394,
+      "step": 3780
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 8.811166875784191e-06,
+      "loss": 2.0293,
+      "step": 3790
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 8.808030112923464e-06,
+      "loss": 2.0535,
+      "step": 3800
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 8.804893350062737e-06,
+      "loss": 2.0553,
+      "step": 3810
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 8.801756587202008e-06,
+      "loss": 1.9466,
+      "step": 3820
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 8.79861982434128e-06,
+      "loss": 2.099,
+      "step": 3830
+    },
+    {
+      "epoch": 1.2,
+      "learning_rate": 8.795483061480552e-06,
+      "loss": 2.0236,
+      "step": 3840
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 8.792346298619825e-06,
+      "loss": 1.9915,
+      "step": 3850
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 8.789209535759098e-06,
+      "loss": 2.07,
+      "step": 3860
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 8.78607277289837e-06,
+      "loss": 2.0757,
+      "step": 3870
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 8.782936010037642e-06,
+      "loss": 2.0571,
+      "step": 3880
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 8.779799247176915e-06,
+      "loss": 2.0173,
+      "step": 3890
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 8.776662484316186e-06,
+      "loss": 2.014,
+      "step": 3900
+    },
+    {
+      "epoch": 1.23,
+      "learning_rate": 8.77352572145546e-06,
+      "loss": 2.0263,
+      "step": 3910
+    },
+    {
+      "epoch": 1.23,
+      "learning_rate": 8.77038895859473e-06,
+      "loss": 2.024,
+      "step": 3920
+    },
+    {
+      "epoch": 1.23,
+      "learning_rate": 8.767252195734003e-06,
+      "loss": 2.0025,
+      "step": 3930
+    },
+    {
+      "epoch": 1.24,
+      "learning_rate": 8.764115432873276e-06,
+      "loss": 2.0337,
+      "step": 3940
+    },
+    {
+      "epoch": 1.24,
+      "learning_rate": 8.760978670012547e-06,
+      "loss": 2.0432,
+      "step": 3950
+    },
+    {
+      "epoch": 1.24,
+      "learning_rate": 8.75784190715182e-06,
+      "loss": 2.0928,
+      "step": 3960
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 8.754705144291092e-06,
+      "loss": 2.0108,
+      "step": 3970
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 8.751568381430364e-06,
+      "loss": 2.0603,
+      "step": 3980
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 8.748431618569637e-06,
+      "loss": 2.1245,
+      "step": 3990
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 8.745294855708909e-06,
+      "loss": 2.0799,
+      "step": 4000
+    },
+    {
+      "epoch": 1.26,
+      "learning_rate": 8.742158092848181e-06,
+      "loss": 2.083,
+      "step": 4010
+    },
+    {
+      "epoch": 1.26,
+      "learning_rate": 8.739021329987454e-06,
+      "loss": 1.9249,
+      "step": 4020
+    },
+    {
+      "epoch": 1.26,
+      "learning_rate": 8.735884567126726e-06,
+      "loss": 2.0568,
+      "step": 4030
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 8.732747804265998e-06,
+      "loss": 2.037,
+      "step": 4040
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 8.72961104140527e-06,
+      "loss": 2.0338,
+      "step": 4050
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 8.726474278544543e-06,
+      "loss": 1.9625,
+      "step": 4060
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 8.723337515683815e-06,
+      "loss": 2.0356,
+      "step": 4070
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 8.720200752823087e-06,
+      "loss": 2.0143,
+      "step": 4080
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 8.71706398996236e-06,
+      "loss": 2.0188,
+      "step": 4090
+    },
+    {
+      "epoch": 1.29,
+      "learning_rate": 8.713927227101632e-06,
+      "loss": 1.9693,
+      "step": 4100
+    },
+    {
+      "epoch": 1.29,
+      "learning_rate": 8.710790464240904e-06,
+      "loss": 1.9619,
+      "step": 4110
+    },
+    {
+      "epoch": 1.29,
+      "learning_rate": 8.707653701380176e-06,
+      "loss": 2.012,
+      "step": 4120
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 8.704516938519448e-06,
+      "loss": 2.0252,
+      "step": 4130
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 8.70138017565872e-06,
+      "loss": 2.0667,
+      "step": 4140
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 8.698243412797993e-06,
+      "loss": 1.9636,
+      "step": 4150
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 8.695106649937265e-06,
+      "loss": 2.0088,
+      "step": 4160
+    },
+    {
+      "epoch": 1.31,
+      "learning_rate": 8.691969887076538e-06,
+      "loss": 2.0431,
+      "step": 4170
+    },
+    {
+      "epoch": 1.31,
+      "learning_rate": 8.68883312421581e-06,
+      "loss": 1.9949,
+      "step": 4180
+    },
+    {
+      "epoch": 1.31,
+      "learning_rate": 8.685696361355083e-06,
+      "loss": 2.0387,
+      "step": 4190
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 8.682559598494355e-06,
+      "loss": 2.0097,
+      "step": 4200
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 8.679422835633626e-06,
+      "loss": 1.9744,
+      "step": 4210
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 8.676286072772899e-06,
+      "loss": 2.0095,
+      "step": 4220
+    },
+    {
+      "epoch": 1.33,
+      "learning_rate": 8.673149309912172e-06,
+      "loss": 2.0816,
+      "step": 4230
+    },
+    {
+      "epoch": 1.33,
+      "learning_rate": 8.670012547051444e-06,
+      "loss": 2.0112,
+      "step": 4240
+    },
+    {
+      "epoch": 1.33,
+      "learning_rate": 8.666875784190716e-06,
+      "loss": 1.9396,
+      "step": 4250
+    },
+    {
+      "epoch": 1.34,
+      "learning_rate": 8.663739021329989e-06,
+      "loss": 2.0894,
+      "step": 4260
+    },
+    {
+      "epoch": 1.34,
+      "learning_rate": 8.660602258469261e-06,
+      "loss": 2.0172,
+      "step": 4270
+    },
+    {
+      "epoch": 1.34,
+      "learning_rate": 8.657465495608533e-06,
+      "loss": 2.0465,
+      "step": 4280
+    },
+    {
+      "epoch": 1.35,
+      "learning_rate": 8.654328732747805e-06,
+      "loss": 1.9854,
+      "step": 4290
+    },
+    {
+      "epoch": 1.35,
+      "learning_rate": 8.651191969887077e-06,
+      "loss": 2.0557,
+      "step": 4300
+    },
+    {
+      "epoch": 1.35,
+      "learning_rate": 8.64805520702635e-06,
+      "loss": 2.0323,
+      "step": 4310
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 8.644918444165622e-06,
+      "loss": 2.0219,
+      "step": 4320
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 8.641781681304894e-06,
+      "loss": 2.0503,
+      "step": 4330
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 8.638644918444167e-06,
+      "loss": 2.0392,
+      "step": 4340
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 8.635508155583438e-06,
+      "loss": 2.0205,
+      "step": 4350
+    },
+    {
+      "epoch": 1.37,
+      "learning_rate": 8.63237139272271e-06,
+      "loss": 2.0776,
+      "step": 4360
+    },
+    {
+      "epoch": 1.37,
+      "learning_rate": 8.629234629861984e-06,
+      "loss": 2.0107,
+      "step": 4370
+    },
+    {
+      "epoch": 1.37,
+      "learning_rate": 8.626097867001255e-06,
+      "loss": 1.9607,
+      "step": 4380
+    },
+    {
+      "epoch": 1.38,
+      "learning_rate": 8.622961104140528e-06,
+      "loss": 2.0339,
+      "step": 4390
+    },
+    {
+      "epoch": 1.38,
+      "learning_rate": 8.6198243412798e-06,
+      "loss": 1.9847,
+      "step": 4400
+    },
+    {
+      "epoch": 1.38,
+      "learning_rate": 8.616687578419072e-06,
+      "loss": 1.9772,
+      "step": 4410
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 8.613550815558345e-06,
+      "loss": 2.0,
+      "step": 4420
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 8.610414052697616e-06,
+      "loss": 1.9789,
+      "step": 4430
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 8.607277289836889e-06,
+      "loss": 2.021,
+      "step": 4440
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 8.604140526976162e-06,
+      "loss": 2.0075,
+      "step": 4450
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 8.601003764115433e-06,
+      "loss": 1.948,
+      "step": 4460
+    },
+    {
+      "epoch": 1.4,
+      "learning_rate": 8.597867001254706e-06,
+      "loss": 1.9574,
+      "step": 4470
+    },
+    {
+      "epoch": 1.41,
+      "learning_rate": 8.594730238393979e-06,
+      "loss": 1.9524,
+      "step": 4480
+    },
+    {
+      "epoch": 1.41,
+      "learning_rate": 8.591593475533251e-06,
+      "loss": 2.0068,
+      "step": 4490
+    },
+    {
+      "epoch": 1.41,
+      "learning_rate": 8.588456712672523e-06,
+      "loss": 2.0517,
+      "step": 4500
+    },
+    {
+      "epoch": 1.41,
+      "learning_rate": 8.585319949811794e-06,
+      "loss": 2.0514,
+      "step": 4510
+    },
+    {
+      "epoch": 1.42,
+      "learning_rate": 8.582183186951067e-06,
+      "loss": 2.0359,
+      "step": 4520
+    },
+    {
+      "epoch": 1.42,
+      "learning_rate": 8.57904642409034e-06,
+      "loss": 2.0724,
+      "step": 4530
+    },
+    {
+      "epoch": 1.42,
+      "learning_rate": 8.575909661229613e-06,
+      "loss": 2.0501,
+      "step": 4540
+    },
+    {
+      "epoch": 1.43,
+      "learning_rate": 8.572772898368884e-06,
+      "loss": 1.9899,
+      "step": 4550
+    },
+    {
+      "epoch": 1.43,
+      "learning_rate": 8.569636135508157e-06,
+      "loss": 2.0427,
+      "step": 4560
+    },
+    {
+      "epoch": 1.43,
+      "learning_rate": 8.56649937264743e-06,
+      "loss": 2.0431,
+      "step": 4570
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 8.5633626097867e-06,
+      "loss": 2.0224,
+      "step": 4580
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 8.560225846925974e-06,
+      "loss": 2.0217,
+      "step": 4590
+    },
+    {
+      "epoch": 1.44,
+      "learning_rate": 8.557089084065245e-06,
+      "loss": 2.0551,
+      "step": 4600
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 8.553952321204518e-06,
+      "loss": 2.0365,
+      "step": 4610
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 8.55081555834379e-06,
+      "loss": 2.0334,
+      "step": 4620
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 8.547678795483062e-06,
+      "loss": 2.0267,
+      "step": 4630
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 8.544542032622335e-06,
+      "loss": 2.0201,
+      "step": 4640
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 8.541405269761608e-06,
+      "loss": 1.9535,
+      "step": 4650
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 8.538268506900879e-06,
+      "loss": 2.0361,
+      "step": 4660
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 8.535131744040152e-06,
+      "loss": 2.0938,
+      "step": 4670
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 8.531994981179423e-06,
+      "loss": 1.9426,
+      "step": 4680
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 8.528858218318696e-06,
+      "loss": 2.0573,
+      "step": 4690
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 8.525721455457969e-06,
+      "loss": 1.96,
+      "step": 4700
+    },
+    {
+      "epoch": 1.48,
+      "learning_rate": 8.52258469259724e-06,
+      "loss": 2.039,
+      "step": 4710
+    },
+    {
+      "epoch": 1.48,
+      "learning_rate": 8.519447929736513e-06,
+      "loss": 2.0648,
+      "step": 4720
+    },
+    {
+      "epoch": 1.48,
+      "learning_rate": 8.516311166875784e-06,
+      "loss": 1.9469,
+      "step": 4730
+    },
+    {
+      "epoch": 1.49,
+      "learning_rate": 8.513174404015057e-06,
+      "loss": 2.0733,
+      "step": 4740
+    },
+    {
+      "epoch": 1.49,
+      "learning_rate": 8.51003764115433e-06,
+      "loss": 2.0004,
+      "step": 4750
+    },
+    {
+      "epoch": 1.49,
+      "learning_rate": 8.506900878293601e-06,
+      "loss": 2.014,
+      "step": 4760
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 8.503764115432874e-06,
+      "loss": 1.9856,
+      "step": 4770
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 8.500627352572147e-06,
+      "loss": 2.0515,
+      "step": 4780
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 8.49749058971142e-06,
+      "loss": 2.0236,
+      "step": 4790
+    },
+    {
+      "epoch": 1.51,
+      "learning_rate": 8.49435382685069e-06,
+      "loss": 2.0687,
+      "step": 4800
+    },
+    {
+      "epoch": 1.51,
+      "learning_rate": 8.491217063989962e-06,
+      "loss": 1.9949,
+      "step": 4810
+    },
+    {
+      "epoch": 1.51,
+      "learning_rate": 8.488080301129235e-06,
+      "loss": 2.0212,
+      "step": 4820
+    },
+    {
+      "epoch": 1.52,
+      "learning_rate": 8.484943538268508e-06,
+      "loss": 2.0332,
+      "step": 4830
+    },
+    {
+      "epoch": 1.52,
+      "learning_rate": 8.48180677540778e-06,
+      "loss": 2.0289,
+      "step": 4840
+    },
+    {
+      "epoch": 1.52,
+      "learning_rate": 8.478670012547052e-06,
+      "loss": 2.0191,
+      "step": 4850
+    },
+    {
+      "epoch": 1.52,
+      "learning_rate": 8.475533249686325e-06,
+      "loss": 1.9577,
+      "step": 4860
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 8.472396486825598e-06,
+      "loss": 2.0939,
+      "step": 4870
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 8.469259723964869e-06,
+      "loss": 1.9506,
+      "step": 4880
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 8.46612296110414e-06,
+      "loss": 1.8801,
+      "step": 4890
+    },
+    {
+      "epoch": 1.54,
+      "learning_rate": 8.462986198243413e-06,
+      "loss": 2.0006,
+      "step": 4900
+    },
+    {
+      "epoch": 1.54,
+      "learning_rate": 8.459849435382686e-06,
+      "loss": 2.0214,
+      "step": 4910
+    },
+    {
+      "epoch": 1.54,
+      "learning_rate": 8.456712672521959e-06,
+      "loss": 2.0355,
+      "step": 4920
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 8.45357590966123e-06,
+      "loss": 2.0364,
+      "step": 4930
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 8.450439146800503e-06,
+      "loss": 1.9857,
+      "step": 4940
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 8.447302383939776e-06,
+      "loss": 1.9923,
+      "step": 4950
+    },
+    {
+      "epoch": 1.56,
+      "learning_rate": 8.444165621079047e-06,
+      "loss": 2.0392,
+      "step": 4960
+    },
+    {
+      "epoch": 1.56,
+      "learning_rate": 8.44102885821832e-06,
+      "loss": 2.041,
+      "step": 4970
+    },
+    {
+      "epoch": 1.56,
+      "learning_rate": 8.437892095357591e-06,
+      "loss": 2.0334,
+      "step": 4980
+    },
+    {
+      "epoch": 1.57,
+      "learning_rate": 8.434755332496864e-06,
+      "loss": 1.9577,
+      "step": 4990
+    },
+    {
+      "epoch": 1.57,
+      "learning_rate": 8.431618569636137e-06,
+      "loss": 2.0012,
+      "step": 5000
+    },
+    {
+      "epoch": 1.57,
+      "learning_rate": 8.428481806775408e-06,
+      "loss": 2.0923,
+      "step": 5010
+    },
+    {
+      "epoch": 1.57,
+      "learning_rate": 8.425345043914681e-06,
+      "loss": 1.9933,
+      "step": 5020
+    },
+    {
+      "epoch": 1.58,
+      "learning_rate": 8.422208281053954e-06,
+      "loss": 2.0297,
+      "step": 5030
+    },
+    {
+      "epoch": 1.58,
+      "learning_rate": 8.419071518193225e-06,
+      "loss": 1.9755,
+      "step": 5040
+    },
+    {
+      "epoch": 1.58,
+      "learning_rate": 8.415934755332498e-06,
+      "loss": 2.0193,
+      "step": 5050
+    },
+    {
+      "epoch": 1.59,
+      "learning_rate": 8.412797992471769e-06,
+      "loss": 2.048,
+      "step": 5060
+    },
+    {
+      "epoch": 1.59,
+      "learning_rate": 8.409661229611042e-06,
+      "loss": 1.9316,
+      "step": 5070
+    },
+    {
+      "epoch": 1.59,
+      "learning_rate": 8.406524466750315e-06,
+      "loss": 2.0211,
+      "step": 5080
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 8.403387703889586e-06,
+      "loss": 2.0684,
+      "step": 5090
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 8.400250941028859e-06,
+      "loss": 1.9619,
+      "step": 5100
+    },
+    {
+      "epoch": 1.6,
+      "learning_rate": 8.39711417816813e-06,
+      "loss": 1.9671,
+      "step": 5110
+    },
+    {
+      "epoch": 1.61,
+      "learning_rate": 8.393977415307403e-06,
+      "loss": 1.9211,
+      "step": 5120
+    },
+    {
+      "epoch": 1.61,
+      "learning_rate": 8.390840652446676e-06,
+      "loss": 1.9531,
+      "step": 5130
+    },
+    {
+      "epoch": 1.61,
+      "learning_rate": 8.387703889585947e-06,
+      "loss": 2.0234,
+      "step": 5140
+    },
+    {
+      "epoch": 1.62,
+      "learning_rate": 8.38456712672522e-06,
+      "loss": 2.0263,
+      "step": 5150
+    },
+    {
+      "epoch": 1.62,
+      "learning_rate": 8.381430363864493e-06,
+      "loss": 2.038,
+      "step": 5160
+    },
+    {
+      "epoch": 1.62,
+      "learning_rate": 8.378293601003766e-06,
+      "loss": 2.0003,
+      "step": 5170
+    },
+    {
+      "epoch": 1.62,
+      "learning_rate": 8.375156838143037e-06,
+      "loss": 2.003,
+      "step": 5180
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 8.372020075282308e-06,
+      "loss": 2.0148,
+      "step": 5190
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 8.368883312421581e-06,
+      "loss": 2.0672,
+      "step": 5200
+    },
+    {
+      "epoch": 1.63,
+      "learning_rate": 8.365746549560854e-06,
+      "loss": 2.0141,
+      "step": 5210
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 8.362609786700127e-06,
+      "loss": 1.9545,
+      "step": 5220
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 8.359473023839398e-06,
+      "loss": 1.87,
+      "step": 5230
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 8.356336260978671e-06,
+      "loss": 1.9519,
+      "step": 5240
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 8.353199498117944e-06,
+      "loss": 2.0665,
+      "step": 5250
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 8.350062735257215e-06,
+      "loss": 1.9662,
+      "step": 5260
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 8.346925972396488e-06,
+      "loss": 1.9447,
+      "step": 5270
+    },
+    {
+      "epoch": 1.66,
+      "learning_rate": 8.343789209535759e-06,
+      "loss": 2.0348,
+      "step": 5280
+    },
+    {
+      "epoch": 1.66,
+      "learning_rate": 8.340652446675032e-06,
+      "loss": 2.1091,
+      "step": 5290
+    },
+    {
+      "epoch": 1.66,
+      "learning_rate": 8.337515683814305e-06,
+      "loss": 2.0061,
+      "step": 5300
+    },
+    {
+      "epoch": 1.67,
+      "learning_rate": 8.334378920953576e-06,
+      "loss": 1.9448,
+      "step": 5310
+    },
+    {
+      "epoch": 1.67,
+      "learning_rate": 8.331242158092849e-06,
+      "loss": 1.9846,
+      "step": 5320
+    },
+    {
+      "epoch": 1.67,
+      "learning_rate": 8.328105395232122e-06,
+      "loss": 2.0324,
+      "step": 5330
+    },
+    {
+      "epoch": 1.68,
+      "learning_rate": 8.324968632371393e-06,
+      "loss": 2.0742,
+      "step": 5340
+    },
+    {
+      "epoch": 1.68,
+      "learning_rate": 8.321831869510666e-06,
+      "loss": 2.0402,
+      "step": 5350
+    },
+    {
+      "epoch": 1.68,
+      "learning_rate": 8.318695106649937e-06,
+      "loss": 2.0432,
+      "step": 5360
+    },
+    {
+      "epoch": 1.68,
+      "learning_rate": 8.31555834378921e-06,
+      "loss": 2.0438,
+      "step": 5370
+    },
+    {
+      "epoch": 1.69,
+      "learning_rate": 8.312421580928483e-06,
+      "loss": 2.0219,
+      "step": 5380
+    },
+    {
+      "epoch": 1.69,
+      "learning_rate": 8.309284818067754e-06,
+      "loss": 1.9956,
+      "step": 5390
+    },
+    {
+      "epoch": 1.69,
+      "learning_rate": 8.306148055207027e-06,
+      "loss": 2.099,
+      "step": 5400
+    },
+    {
+      "epoch": 1.7,
+      "learning_rate": 8.3030112923463e-06,
+      "loss": 2.0049,
+      "step": 5410
+    },
+    {
+      "epoch": 1.7,
+      "learning_rate": 8.299874529485573e-06,
+      "loss": 2.0073,
+      "step": 5420
+    },
+    {
+      "epoch": 1.7,
+      "learning_rate": 8.296737766624844e-06,
+      "loss": 1.922,
+      "step": 5430
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 8.293601003764115e-06,
+      "loss": 2.0012,
+      "step": 5440
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 8.290464240903388e-06,
+      "loss": 1.9273,
+      "step": 5450
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 8.287327478042661e-06,
+      "loss": 1.9773,
+      "step": 5460
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 8.284190715181934e-06,
+      "loss": 1.9979,
+      "step": 5470
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 8.281053952321205e-06,
+      "loss": 2.0172,
+      "step": 5480
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 8.277917189460476e-06,
+      "loss": 2.0063,
+      "step": 5490
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 8.27478042659975e-06,
+      "loss": 1.9983,
+      "step": 5500
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 8.271643663739022e-06,
+      "loss": 1.9818,
+      "step": 5510
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 8.268506900878295e-06,
+      "loss": 1.9305,
+      "step": 5520
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 8.265370138017566e-06,
+      "loss": 2.0492,
+      "step": 5530
+    },
+    {
+      "epoch": 1.74,
+      "learning_rate": 8.262233375156839e-06,
+      "loss": 2.0331,
+      "step": 5540
+    },
+    {
+      "epoch": 1.74,
+      "learning_rate": 8.259096612296112e-06,
+      "loss": 2.0205,
+      "step": 5550
+    },
+    {
+      "epoch": 1.74,
+      "learning_rate": 8.255959849435383e-06,
+      "loss": 1.9663,
+      "step": 5560
+    },
+    {
+      "epoch": 1.75,
+      "learning_rate": 8.252823086574656e-06,
+      "loss": 2.0373,
+      "step": 5570
+    },
+    {
+      "epoch": 1.75,
+      "learning_rate": 8.249686323713927e-06,
+      "loss": 2.0176,
+      "step": 5580
+    },
+    {
+      "epoch": 1.75,
+      "learning_rate": 8.2465495608532e-06,
+      "loss": 1.9972,
+      "step": 5590
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 8.243412797992473e-06,
+      "loss": 2.0384,
+      "step": 5600
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 8.240276035131744e-06,
+      "loss": 2.0283,
+      "step": 5610
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 8.237139272271017e-06,
+      "loss": 1.8957,
+      "step": 5620
+    },
+    {
+      "epoch": 1.77,
+      "learning_rate": 8.23400250941029e-06,
+      "loss": 2.0583,
+      "step": 5630
+    },
+    {
+      "epoch": 1.77,
+      "learning_rate": 8.230865746549561e-06,
+      "loss": 1.9984,
+      "step": 5640
+    },
+    {
+      "epoch": 1.77,
+      "learning_rate": 8.227728983688834e-06,
+      "loss": 2.0165,
+      "step": 5650
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 8.224592220828105e-06,
+      "loss": 2.0825,
+      "step": 5660
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 8.221455457967378e-06,
+      "loss": 1.9821,
+      "step": 5670
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 8.218318695106651e-06,
+      "loss": 1.9673,
+      "step": 5680
+    },
+    {
+      "epoch": 1.78,
+      "learning_rate": 8.215181932245922e-06,
+      "loss": 1.9948,
+      "step": 5690
+    },
+    {
+      "epoch": 1.79,
+      "learning_rate": 8.212045169385195e-06,
+      "loss": 2.0304,
+      "step": 5700
+    },
+    {
+      "epoch": 1.79,
+      "learning_rate": 8.208908406524468e-06,
+      "loss": 2.0357,
+      "step": 5710
+    },
+    {
+      "epoch": 1.79,
+      "learning_rate": 8.205771643663741e-06,
+      "loss": 2.0254,
+      "step": 5720
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 8.202634880803012e-06,
+      "loss": 2.0155,
+      "step": 5730
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 8.199498117942283e-06,
+      "loss": 2.0461,
+      "step": 5740
+    },
+    {
+      "epoch": 1.8,
+      "learning_rate": 8.196361355081556e-06,
+      "loss": 1.9988,
+      "step": 5750
+    },
+    {
+      "epoch": 1.81,
+      "learning_rate": 8.193224592220829e-06,
+      "loss": 2.0348,
+      "step": 5760
+    },
+    {
+      "epoch": 1.81,
+      "learning_rate": 8.1900878293601e-06,
+      "loss": 1.979,
+      "step": 5770
+    },
+    {
+      "epoch": 1.81,
+      "learning_rate": 8.186951066499373e-06,
+      "loss": 2.0056,
+      "step": 5780
+    },
+    {
+      "epoch": 1.82,
+      "learning_rate": 8.183814303638644e-06,
+      "loss": 2.0097,
+      "step": 5790
+    },
+    {
+      "epoch": 1.82,
+      "learning_rate": 8.180677540777919e-06,
+      "loss": 2.0079,
+      "step": 5800
+    },
+    {
+      "epoch": 1.82,
+      "learning_rate": 8.17754077791719e-06,
+      "loss": 1.9262,
+      "step": 5810
+    },
+    {
+      "epoch": 1.83,
+      "learning_rate": 8.174404015056461e-06,
+      "loss": 1.9064,
+      "step": 5820
+    },
+    {
+      "epoch": 1.83,
+      "learning_rate": 8.171267252195734e-06,
+      "loss": 2.0339,
+      "step": 5830
+    },
+    {
+      "epoch": 1.83,
+      "learning_rate": 8.168130489335007e-06,
+      "loss": 1.9952,
+      "step": 5840
+    },
+    {
+      "epoch": 1.84,
+      "learning_rate": 8.16499372647428e-06,
+      "loss": 1.9242,
+      "step": 5850
+    },
+    {
+      "epoch": 1.84,
+      "learning_rate": 8.161856963613551e-06,
+      "loss": 1.9661,
+      "step": 5860
+    },
+    {
+      "epoch": 1.84,
+      "learning_rate": 8.158720200752822e-06,
+      "loss": 1.9482,
+      "step": 5870
+    },
+    {
+      "epoch": 1.84,
+      "learning_rate": 8.155583437892095e-06,
+      "loss": 1.927,
+      "step": 5880
+    },
+    {
+      "epoch": 1.85,
+      "learning_rate": 8.152446675031368e-06,
+      "loss": 1.9963,
+      "step": 5890
+    },
+    {
+      "epoch": 1.85,
+      "learning_rate": 8.149309912170641e-06,
+      "loss": 2.0093,
+      "step": 5900
+    },
+    {
+      "epoch": 1.85,
+      "learning_rate": 8.146173149309912e-06,
+      "loss": 2.0204,
+      "step": 5910
+    },
+    {
+      "epoch": 1.86,
+      "learning_rate": 8.143036386449185e-06,
+      "loss": 2.0233,
+      "step": 5920
+    },
+    {
+      "epoch": 1.86,
+      "learning_rate": 8.139899623588458e-06,
+      "loss": 2.0393,
+      "step": 5930
+    },
+    {
+      "epoch": 1.86,
+      "learning_rate": 8.13676286072773e-06,
+      "loss": 2.0474,
+      "step": 5940
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 8.133626097867002e-06,
+      "loss": 1.9908,
+      "step": 5950
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 8.130489335006273e-06,
+      "loss": 1.9855,
+      "step": 5960
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 8.127352572145546e-06,
+      "loss": 1.9181,
+      "step": 5970
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 8.12421580928482e-06,
+      "loss": 1.9918,
+      "step": 5980
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 8.12107904642409e-06,
+      "loss": 1.9965,
+      "step": 5990
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 8.117942283563363e-06,
+      "loss": 2.0341,
+      "step": 6000
+    },
+    {
+      "epoch": 1.89,
+      "learning_rate": 8.114805520702636e-06,
+      "loss": 2.0354,
+      "step": 6010
+    },
+    {
+      "epoch": 1.89,
+      "learning_rate": 8.111668757841907e-06,
+      "loss": 1.9341,
+      "step": 6020
+    },
+    {
+      "epoch": 1.89,
+      "learning_rate": 8.10853199498118e-06,
+      "loss": 2.0183,
+      "step": 6030
+    },
+    {
+      "epoch": 1.89,
+      "learning_rate": 8.105395232120451e-06,
+      "loss": 2.0165,
+      "step": 6040
+    },
+    {
+      "epoch": 1.9,
+      "learning_rate": 8.102258469259724e-06,
+      "loss": 1.9712,
+      "step": 6050
+    },
+    {
+      "epoch": 1.9,
+      "learning_rate": 8.099121706398997e-06,
+      "loss": 1.8994,
+      "step": 6060
+    },
+    {
+      "epoch": 1.9,
+      "learning_rate": 8.095984943538268e-06,
+      "loss": 1.9247,
+      "step": 6070
+    },
+    {
+      "epoch": 1.91,
+      "learning_rate": 8.092848180677541e-06,
+      "loss": 2.0145,
+      "step": 6080
+    },
+    {
+      "epoch": 1.91,
+      "learning_rate": 8.089711417816814e-06,
+      "loss": 2.0431,
+      "step": 6090
+    },
+    {
+      "epoch": 1.91,
+      "learning_rate": 8.086574654956087e-06,
+      "loss": 1.9666,
+      "step": 6100
+    },
+    {
+      "epoch": 1.92,
+      "learning_rate": 8.083437892095358e-06,
+      "loss": 1.9638,
+      "step": 6110
+    },
+    {
+      "epoch": 1.92,
+      "learning_rate": 8.08030112923463e-06,
+      "loss": 1.994,
+      "step": 6120
+    },
+    {
+      "epoch": 1.92,
+      "learning_rate": 8.077164366373902e-06,
+      "loss": 1.9206,
+      "step": 6130
+    },
+    {
+      "epoch": 1.93,
+      "learning_rate": 8.074027603513175e-06,
+      "loss": 1.9814,
+      "step": 6140
+    },
+    {
+      "epoch": 1.93,
+      "learning_rate": 8.070890840652448e-06,
+      "loss": 2.0413,
+      "step": 6150
+    },
+    {
+      "epoch": 1.93,
+      "learning_rate": 8.06775407779172e-06,
+      "loss": 2.0384,
+      "step": 6160
+    },
+    {
+      "epoch": 1.94,
+      "learning_rate": 8.06461731493099e-06,
+      "loss": 2.0274,
+      "step": 6170
+    },
+    {
+      "epoch": 1.94,
+      "learning_rate": 8.061480552070265e-06,
+      "loss": 1.9409,
+      "step": 6180
+    },
+    {
+      "epoch": 1.94,
+      "learning_rate": 8.058343789209536e-06,
+      "loss": 1.9933,
+      "step": 6190
+    },
+    {
+      "epoch": 1.94,
+      "learning_rate": 8.05520702634881e-06,
+      "loss": 2.0091,
+      "step": 6200
+    },
+    {
+      "epoch": 1.95,
+      "learning_rate": 8.05207026348808e-06,
+      "loss": 2.0404,
+      "step": 6210
+    },
+    {
+      "epoch": 1.95,
+      "learning_rate": 8.048933500627353e-06,
+      "loss": 1.9856,
+      "step": 6220
+    },
+    {
+      "epoch": 1.95,
+      "learning_rate": 8.045796737766626e-06,
+      "loss": 2.0037,
+      "step": 6230
+    },
+    {
+      "epoch": 1.96,
+      "learning_rate": 8.042659974905897e-06,
+      "loss": 1.9664,
+      "step": 6240
+    },
+    {
+      "epoch": 1.96,
+      "learning_rate": 8.03952321204517e-06,
+      "loss": 2.0269,
+      "step": 6250
+    },
+    {
+      "epoch": 1.96,
+      "learning_rate": 8.036386449184442e-06,
+      "loss": 2.0265,
+      "step": 6260
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 8.033249686323714e-06,
+      "loss": 1.9277,
+      "step": 6270
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 8.030112923462987e-06,
+      "loss": 1.9276,
+      "step": 6280
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 8.026976160602259e-06,
+      "loss": 2.0135,
+      "step": 6290
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 8.023839397741531e-06,
+      "loss": 2.0078,
+      "step": 6300
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 8.020702634880804e-06,
+      "loss": 2.0469,
+      "step": 6310
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 8.017565872020076e-06,
+      "loss": 2.0029,
+      "step": 6320
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 8.014429109159348e-06,
+      "loss": 1.9948,
+      "step": 6330
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 8.01129234629862e-06,
+      "loss": 1.9673,
+      "step": 6340
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 8.008155583437892e-06,
+      "loss": 1.9532,
+      "step": 6350
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 8.005018820577165e-06,
+      "loss": 1.9704,
+      "step": 6360
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 8.001882057716437e-06,
+      "loss": 1.9536,
+      "step": 6370
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 1.885330319404602,
+      "eval_runtime": 13.6115,
+      "eval_samples_per_second": 73.467,
+      "eval_steps_per_second": 4.628,
+      "step": 6376
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 7.99874529485571e-06,
+      "loss": 1.9421,
+      "step": 6380
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 7.995608531994982e-06,
+      "loss": 1.9968,
+      "step": 6390
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 7.992471769134255e-06,
+      "loss": 1.9777,
+      "step": 6400
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 7.989335006273526e-06,
+      "loss": 1.9875,
+      "step": 6410
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 7.986198243412798e-06,
+      "loss": 2.008,
+      "step": 6420
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 7.98306148055207e-06,
+      "loss": 1.9478,
+      "step": 6430
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 7.979924717691343e-06,
+      "loss": 1.946,
+      "step": 6440
+    },
+    {
+      "epoch": 2.02,
+      "learning_rate": 7.976787954830616e-06,
+      "loss": 2.0113,
+      "step": 6450
+    },
+    {
+      "epoch": 2.03,
+      "learning_rate": 7.973651191969888e-06,
+      "loss": 1.9268,
+      "step": 6460
+    },
+    {
+      "epoch": 2.03,
+      "learning_rate": 7.97051442910916e-06,
+      "loss": 1.974,
+      "step": 6470
+    },
+    {
+      "epoch": 2.03,
+      "learning_rate": 7.967377666248433e-06,
+      "loss": 1.942,
+      "step": 6480
+    },
+    {
+      "epoch": 2.04,
+      "learning_rate": 7.964240903387705e-06,
+      "loss": 2.0326,
+      "step": 6490
+    },
+    {
+      "epoch": 2.04,
+      "learning_rate": 7.961104140526977e-06,
+      "loss": 1.9315,
+      "step": 6500
+    },
+    {
+      "epoch": 2.04,
+      "learning_rate": 7.957967377666249e-06,
+      "loss": 1.9985,
+      "step": 6510
+    },
+    {
+      "epoch": 2.05,
+      "learning_rate": 7.954830614805521e-06,
+      "loss": 1.9155,
+      "step": 6520
+    },
+    {
+      "epoch": 2.05,
+      "learning_rate": 7.951693851944794e-06,
+      "loss": 2.0259,
+      "step": 6530
+    },
+    {
+      "epoch": 2.05,
+      "learning_rate": 7.948557089084066e-06,
+      "loss": 1.9476,
+      "step": 6540
+    },
+    {
+      "epoch": 2.05,
+      "learning_rate": 7.945420326223338e-06,
+      "loss": 1.971,
+      "step": 6550
+    },
+    {
+      "epoch": 2.06,
+      "learning_rate": 7.942283563362611e-06,
+      "loss": 1.9633,
+      "step": 6560
+    },
+    {
+      "epoch": 2.06,
+      "learning_rate": 7.939146800501883e-06,
+      "loss": 1.9443,
+      "step": 6570
+    },
+    {
+      "epoch": 2.06,
+      "learning_rate": 7.936010037641155e-06,
+      "loss": 2.0084,
+      "step": 6580
+    },
+    {
+      "epoch": 2.07,
+      "learning_rate": 7.932873274780427e-06,
+      "loss": 1.9961,
+      "step": 6590
+    },
+    {
+      "epoch": 2.07,
+      "learning_rate": 7.9297365119197e-06,
+      "loss": 2.0168,
+      "step": 6600
+    },
+    {
+      "epoch": 2.07,
+      "learning_rate": 7.926599749058972e-06,
+      "loss": 1.9562,
+      "step": 6610
+    },
+    {
+      "epoch": 2.08,
+      "learning_rate": 7.923462986198244e-06,
+      "loss": 1.9232,
+      "step": 6620
+    },
+    {
+      "epoch": 2.08,
+      "learning_rate": 7.920326223337517e-06,
+      "loss": 1.9462,
+      "step": 6630
+    },
+    {
+      "epoch": 2.08,
+      "learning_rate": 7.917189460476788e-06,
+      "loss": 1.9134,
+      "step": 6640
+    },
+    {
+      "epoch": 2.09,
+      "learning_rate": 7.91405269761606e-06,
+      "loss": 1.9443,
+      "step": 6650
+    },
+    {
+      "epoch": 2.09,
+      "learning_rate": 7.910915934755334e-06,
+      "loss": 1.9693,
+      "step": 6660
+    },
+    {
+      "epoch": 2.09,
+      "learning_rate": 7.907779171894605e-06,
+      "loss": 1.9401,
+      "step": 6670
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 7.904642409033878e-06,
+      "loss": 1.9176,
+      "step": 6680
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 7.90150564617315e-06,
+      "loss": 2.0308,
+      "step": 6690
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 7.898368883312422e-06,
+      "loss": 2.0137,
+      "step": 6700
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 7.895232120451695e-06,
+      "loss": 1.9504,
+      "step": 6710
+    },
+    {
+      "epoch": 2.11,
+      "learning_rate": 7.892095357590966e-06,
+      "loss": 1.9489,
+      "step": 6720
+    },
+    {
+      "epoch": 2.11,
+      "learning_rate": 7.888958594730239e-06,
+      "loss": 1.9366,
+      "step": 6730
+    },
+    {
+      "epoch": 2.11,
+      "learning_rate": 7.885821831869512e-06,
+      "loss": 1.9421,
+      "step": 6740
+    },
+    {
+      "epoch": 2.12,
+      "learning_rate": 7.882685069008783e-06,
+      "loss": 1.9611,
+      "step": 6750
+    },
+    {
+      "epoch": 2.12,
+      "learning_rate": 7.879548306148056e-06,
+      "loss": 1.9346,
+      "step": 6760
+    },
+    {
+      "epoch": 2.12,
+      "learning_rate": 7.876411543287329e-06,
+      "loss": 1.9328,
+      "step": 6770
+    },
+    {
+      "epoch": 2.13,
+      "learning_rate": 7.873274780426601e-06,
+      "loss": 2.0131,
+      "step": 6780
+    },
+    {
+      "epoch": 2.13,
+      "learning_rate": 7.870138017565873e-06,
+      "loss": 1.9271,
+      "step": 6790
+    },
+    {
+      "epoch": 2.13,
+      "learning_rate": 7.867001254705144e-06,
+      "loss": 1.9835,
+      "step": 6800
+    },
+    {
+      "epoch": 2.14,
+      "learning_rate": 7.863864491844417e-06,
+      "loss": 2.045,
+      "step": 6810
+    },
+    {
+      "epoch": 2.14,
+      "learning_rate": 7.86072772898369e-06,
+      "loss": 1.975,
+      "step": 6820
+    },
+    {
+      "epoch": 2.14,
+      "learning_rate": 7.857590966122963e-06,
+      "loss": 1.9628,
+      "step": 6830
+    },
+    {
+      "epoch": 2.15,
+      "learning_rate": 7.854454203262234e-06,
+      "loss": 2.0207,
+      "step": 6840
+    },
+    {
+      "epoch": 2.15,
+      "learning_rate": 7.851317440401507e-06,
+      "loss": 2.0245,
+      "step": 6850
+    },
+    {
+      "epoch": 2.15,
+      "learning_rate": 7.84818067754078e-06,
+      "loss": 1.9582,
+      "step": 6860
+    },
+    {
+      "epoch": 2.15,
+      "learning_rate": 7.84504391468005e-06,
+      "loss": 1.9514,
+      "step": 6870
+    },
+    {
+      "epoch": 2.16,
+      "learning_rate": 7.841907151819324e-06,
+      "loss": 2.0367,
+      "step": 6880
+    },
+    {
+      "epoch": 2.16,
+      "learning_rate": 7.838770388958595e-06,
+      "loss": 1.9545,
+      "step": 6890
+    },
+    {
+      "epoch": 2.16,
+      "learning_rate": 7.835633626097868e-06,
+      "loss": 2.0572,
+      "step": 6900
+    },
+    {
+      "epoch": 2.17,
+      "learning_rate": 7.83249686323714e-06,
+      "loss": 1.9569,
+      "step": 6910
+    },
+    {
+      "epoch": 2.17,
+      "learning_rate": 7.829360100376412e-06,
+      "loss": 1.9457,
+      "step": 6920
+    },
+    {
+      "epoch": 2.17,
+      "learning_rate": 7.826223337515685e-06,
+      "loss": 1.9065,
+      "step": 6930
+    },
+    {
+      "epoch": 2.18,
+      "learning_rate": 7.823086574654956e-06,
+      "loss": 1.9493,
+      "step": 6940
+    },
+    {
+      "epoch": 2.18,
+      "learning_rate": 7.819949811794229e-06,
+      "loss": 1.9474,
+      "step": 6950
+    },
+    {
+      "epoch": 2.18,
+      "learning_rate": 7.816813048933502e-06,
+      "loss": 1.9068,
+      "step": 6960
+    },
+    {
+      "epoch": 2.19,
+      "learning_rate": 7.813676286072773e-06,
+      "loss": 1.9634,
+      "step": 6970
+    },
+    {
+      "epoch": 2.19,
+      "learning_rate": 7.810539523212046e-06,
+      "loss": 1.8905,
+      "step": 6980
+    },
+    {
+      "epoch": 2.19,
+      "learning_rate": 7.807402760351319e-06,
+      "loss": 1.9783,
+      "step": 6990
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 7.80426599749059e-06,
+      "loss": 1.9273,
+      "step": 7000
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 7.801129234629863e-06,
+      "loss": 1.9219,
+      "step": 7010
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 7.797992471769134e-06,
+      "loss": 1.847,
+      "step": 7020
+    },
+    {
+      "epoch": 2.21,
+      "learning_rate": 7.794855708908407e-06,
+      "loss": 1.9704,
+      "step": 7030
+    },
+    {
+      "epoch": 2.21,
+      "learning_rate": 7.79171894604768e-06,
+      "loss": 1.9821,
+      "step": 7040
+    },
+    {
+      "epoch": 2.21,
+      "learning_rate": 7.788582183186951e-06,
+      "loss": 1.9342,
+      "step": 7050
+    },
+    {
+      "epoch": 2.21,
+      "learning_rate": 7.785445420326224e-06,
+      "loss": 2.0013,
+      "step": 7060
+    },
+    {
+      "epoch": 2.22,
+      "learning_rate": 7.782308657465497e-06,
+      "loss": 1.9475,
+      "step": 7070
+    },
+    {
+      "epoch": 2.22,
+      "learning_rate": 7.77917189460477e-06,
+      "loss": 1.9484,
+      "step": 7080
+    },
+    {
+      "epoch": 2.22,
+      "learning_rate": 7.77603513174404e-06,
+      "loss": 1.9431,
+      "step": 7090
+    },
+    {
+      "epoch": 2.23,
+      "learning_rate": 7.772898368883312e-06,
+      "loss": 1.9094,
+      "step": 7100
+    },
+    {
+      "epoch": 2.23,
+      "learning_rate": 7.769761606022585e-06,
+      "loss": 1.9403,
+      "step": 7110
+    },
+    {
+      "epoch": 2.23,
+      "learning_rate": 7.766624843161858e-06,
+      "loss": 2.0046,
+      "step": 7120
+    },
+    {
+      "epoch": 2.24,
+      "learning_rate": 7.76348808030113e-06,
+      "loss": 1.9858,
+      "step": 7130
+    },
+    {
+      "epoch": 2.24,
+      "learning_rate": 7.760351317440402e-06,
+      "loss": 2.0215,
+      "step": 7140
+    },
+    {
+      "epoch": 2.24,
+      "learning_rate": 7.757214554579675e-06,
+      "loss": 1.951,
+      "step": 7150
+    },
+    {
+      "epoch": 2.25,
+      "learning_rate": 7.754077791718948e-06,
+      "loss": 1.9801,
+      "step": 7160
+    },
+    {
+      "epoch": 2.25,
+      "learning_rate": 7.750941028858219e-06,
+      "loss": 1.9526,
+      "step": 7170
+    },
+    {
+      "epoch": 2.25,
+      "learning_rate": 7.747804265997492e-06,
+      "loss": 1.971,
+      "step": 7180
+    },
+    {
+      "epoch": 2.26,
+      "learning_rate": 7.744667503136763e-06,
+      "loss": 1.8804,
+      "step": 7190
+    },
+    {
+      "epoch": 2.26,
+      "learning_rate": 7.741530740276036e-06,
+      "loss": 2.0343,
+      "step": 7200
+    },
+    {
+      "epoch": 2.26,
+      "learning_rate": 7.738393977415309e-06,
+      "loss": 1.9925,
+      "step": 7210
+    },
+    {
+      "epoch": 2.26,
+      "learning_rate": 7.73525721455458e-06,
+      "loss": 1.9465,
+      "step": 7220
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 7.732120451693853e-06,
+      "loss": 1.8588,
+      "step": 7230
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 7.728983688833126e-06,
+      "loss": 1.9369,
+      "step": 7240
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 7.725846925972397e-06,
+      "loss": 1.9037,
+      "step": 7250
+    },
+    {
+      "epoch": 2.28,
+      "learning_rate": 7.72271016311167e-06,
+      "loss": 1.9352,
+      "step": 7260
+    },
+    {
+      "epoch": 2.28,
+      "learning_rate": 7.719573400250941e-06,
+      "loss": 1.9501,
+      "step": 7270
+    },
+    {
+      "epoch": 2.28,
+      "learning_rate": 7.716436637390214e-06,
+      "loss": 1.9301,
+      "step": 7280
+    },
+    {
+      "epoch": 2.29,
+      "learning_rate": 7.713299874529487e-06,
+      "loss": 1.9778,
+      "step": 7290
+    },
+    {
+      "epoch": 2.29,
+      "learning_rate": 7.710163111668758e-06,
+      "loss": 1.9291,
+      "step": 7300
+    },
+    {
+      "epoch": 2.29,
+      "learning_rate": 7.70702634880803e-06,
+      "loss": 2.0028,
+      "step": 7310
+    },
+    {
+      "epoch": 2.3,
+      "learning_rate": 7.703889585947302e-06,
+      "loss": 1.9809,
+      "step": 7320
+    },
+    {
+      "epoch": 2.3,
+      "learning_rate": 7.700752823086577e-06,
+      "loss": 1.9964,
+      "step": 7330
+    },
+    {
+      "epoch": 2.3,
+      "learning_rate": 7.697616060225848e-06,
+      "loss": 1.8848,
+      "step": 7340
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 7.694479297365119e-06,
+      "loss": 1.901,
+      "step": 7350
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 7.691342534504392e-06,
+      "loss": 1.9539,
+      "step": 7360
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 7.688205771643665e-06,
+      "loss": 1.9882,
+      "step": 7370
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 7.685069008782938e-06,
+      "loss": 1.9577,
+      "step": 7380
+    },
+    {
+      "epoch": 2.32,
+      "learning_rate": 7.681932245922209e-06,
+      "loss": 1.9839,
+      "step": 7390
+    },
+    {
+      "epoch": 2.32,
+      "learning_rate": 7.67879548306148e-06,
+      "loss": 1.9388,
+      "step": 7400
+    },
+    {
+      "epoch": 2.32,
+      "learning_rate": 7.675658720200753e-06,
+      "loss": 1.9851,
+      "step": 7410
+    },
+    {
+      "epoch": 2.33,
+      "learning_rate": 7.672521957340026e-06,
+      "loss": 2.0086,
+      "step": 7420
+    },
+    {
+      "epoch": 2.33,
+      "learning_rate": 7.669385194479299e-06,
+      "loss": 1.9862,
+      "step": 7430
+    },
+    {
+      "epoch": 2.33,
+      "learning_rate": 7.66624843161857e-06,
+      "loss": 1.9839,
+      "step": 7440
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 7.663111668757843e-06,
+      "loss": 1.9765,
+      "step": 7450
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 7.659974905897116e-06,
+      "loss": 1.9471,
+      "step": 7460
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 7.656838143036387e-06,
+      "loss": 1.9762,
+      "step": 7470
+    },
+    {
+      "epoch": 2.35,
+      "learning_rate": 7.65370138017566e-06,
+      "loss": 1.9488,
+      "step": 7480
+    },
+    {
+      "epoch": 2.35,
+      "learning_rate": 7.650564617314931e-06,
+      "loss": 1.8924,
+      "step": 7490
+    },
+    {
+      "epoch": 2.35,
+      "learning_rate": 7.647427854454204e-06,
+      "loss": 1.978,
+      "step": 7500
+    },
+    {
+      "epoch": 2.36,
+      "learning_rate": 7.644291091593477e-06,
+      "loss": 1.9168,
+      "step": 7510
+    },
+    {
+      "epoch": 2.36,
+      "learning_rate": 7.641154328732748e-06,
+      "loss": 1.9963,
+      "step": 7520
+    },
+    {
+      "epoch": 2.36,
+      "learning_rate": 7.638017565872021e-06,
+      "loss": 1.9229,
+      "step": 7530
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 7.634880803011294e-06,
+      "loss": 1.9212,
+      "step": 7540
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 7.631744040150565e-06,
+      "loss": 1.95,
+      "step": 7550
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 7.628607277289838e-06,
+      "loss": 1.9406,
+      "step": 7560
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 7.62547051442911e-06,
+      "loss": 2.0137,
+      "step": 7570
+    },
+    {
+      "epoch": 2.38,
+      "learning_rate": 7.622333751568381e-06,
+      "loss": 2.01,
+      "step": 7580
+    },
+    {
+      "epoch": 2.38,
+      "learning_rate": 7.619196988707655e-06,
+      "loss": 1.9069,
+      "step": 7590
+    },
+    {
+      "epoch": 2.38,
+      "learning_rate": 7.616060225846926e-06,
+      "loss": 1.988,
+      "step": 7600
+    },
+    {
+      "epoch": 2.39,
+      "learning_rate": 7.612923462986199e-06,
+      "loss": 1.9774,
+      "step": 7610
+    },
+    {
+      "epoch": 2.39,
+      "learning_rate": 7.609786700125471e-06,
+      "loss": 1.9948,
+      "step": 7620
+    },
+    {
+      "epoch": 2.39,
+      "learning_rate": 7.606649937264743e-06,
+      "loss": 1.9218,
+      "step": 7630
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 7.603513174404016e-06,
+      "loss": 1.9851,
+      "step": 7640
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 7.600376411543288e-06,
+      "loss": 1.9778,
+      "step": 7650
+    },
+    {
+      "epoch": 2.4,
+      "learning_rate": 7.597239648682561e-06,
+      "loss": 1.9702,
+      "step": 7660
+    },
+    {
+      "epoch": 2.41,
+      "learning_rate": 7.594102885821832e-06,
+      "loss": 1.8713,
+      "step": 7670
+    },
+    {
+      "epoch": 2.41,
+      "learning_rate": 7.590966122961104e-06,
+      "loss": 1.9425,
+      "step": 7680
+    },
+    {
+      "epoch": 2.41,
+      "learning_rate": 7.587829360100377e-06,
+      "loss": 1.9917,
+      "step": 7690
+    },
+    {
+      "epoch": 2.42,
+      "learning_rate": 7.584692597239649e-06,
+      "loss": 1.9039,
+      "step": 7700
+    },
+    {
+      "epoch": 2.42,
+      "learning_rate": 7.581555834378922e-06,
+      "loss": 1.939,
+      "step": 7710
+    },
+    {
+      "epoch": 2.42,
+      "learning_rate": 7.578419071518194e-06,
+      "loss": 1.9926,
+      "step": 7720
+    },
+    {
+      "epoch": 2.42,
+      "learning_rate": 7.575282308657466e-06,
+      "loss": 1.9994,
+      "step": 7730
+    },
+    {
+      "epoch": 2.43,
+      "learning_rate": 7.572145545796739e-06,
+      "loss": 1.9109,
+      "step": 7740
+    },
+    {
+      "epoch": 2.43,
+      "learning_rate": 7.56900878293601e-06,
+      "loss": 2.0109,
+      "step": 7750
+    },
+    {
+      "epoch": 2.43,
+      "learning_rate": 7.565872020075283e-06,
+      "loss": 1.9353,
+      "step": 7760
+    },
+    {
+      "epoch": 2.44,
+      "learning_rate": 7.562735257214555e-06,
+      "loss": 1.9251,
+      "step": 7770
+    },
+    {
+      "epoch": 2.44,
+      "learning_rate": 7.559598494353827e-06,
+      "loss": 1.9734,
+      "step": 7780
+    },
+    {
+      "epoch": 2.44,
+      "learning_rate": 7.5564617314931e-06,
+      "loss": 1.9337,
+      "step": 7790
+    },
+    {
+      "epoch": 2.45,
+      "learning_rate": 7.553324968632372e-06,
+      "loss": 1.9499,
+      "step": 7800
+    },
+    {
+      "epoch": 2.45,
+      "learning_rate": 7.550188205771645e-06,
+      "loss": 1.9695,
+      "step": 7810
+    },
+    {
+      "epoch": 2.45,
+      "learning_rate": 7.547051442910916e-06,
+      "loss": 1.9732,
+      "step": 7820
+    },
+    {
+      "epoch": 2.46,
+      "learning_rate": 7.543914680050188e-06,
+      "loss": 1.9596,
+      "step": 7830
+    },
+    {
+      "epoch": 2.46,
+      "learning_rate": 7.540777917189461e-06,
+      "loss": 1.9799,
+      "step": 7840
+    },
+    {
+      "epoch": 2.46,
+      "learning_rate": 7.537641154328733e-06,
+      "loss": 2.0665,
+      "step": 7850
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 7.534504391468006e-06,
+      "loss": 2.0205,
+      "step": 7860
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 7.531367628607278e-06,
+      "loss": 1.9653,
+      "step": 7870
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 7.52823086574655e-06,
+      "loss": 1.9521,
+      "step": 7880
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 7.525094102885823e-06,
+      "loss": 1.9296,
+      "step": 7890
+    },
+    {
+      "epoch": 2.48,
+      "learning_rate": 7.521957340025094e-06,
+      "loss": 1.9706,
+      "step": 7900
+    },
+    {
+      "epoch": 2.48,
+      "learning_rate": 7.518820577164367e-06,
+      "loss": 1.9166,
+      "step": 7910
+    },
+    {
+      "epoch": 2.48,
+      "learning_rate": 7.515683814303639e-06,
+      "loss": 1.9223,
+      "step": 7920
+    },
+    {
+      "epoch": 2.49,
+      "learning_rate": 7.512547051442911e-06,
+      "loss": 1.9756,
+      "step": 7930
+    },
+    {
+      "epoch": 2.49,
+      "learning_rate": 7.509410288582184e-06,
+      "loss": 1.9335,
+      "step": 7940
+    },
+    {
+      "epoch": 2.49,
+      "learning_rate": 7.506273525721456e-06,
+      "loss": 1.94,
+      "step": 7950
+    },
+    {
+      "epoch": 2.5,
+      "learning_rate": 7.503136762860729e-06,
+      "loss": 1.9802,
+      "step": 7960
+    },
+    {
+      "epoch": 2.5,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 1.9913,
+      "step": 7970
+    },
+    {
+      "epoch": 2.5,
+      "learning_rate": 7.496863237139272e-06,
+      "loss": 1.9385,
+      "step": 7980
+    },
+    {
+      "epoch": 2.51,
+      "learning_rate": 7.493726474278545e-06,
+      "loss": 1.9581,
+      "step": 7990
+    },
+    {
+      "epoch": 2.51,
+      "learning_rate": 7.490589711417817e-06,
+      "loss": 1.9955,
+      "step": 8000
+    },
+    {
+      "epoch": 2.51,
+      "learning_rate": 7.48745294855709e-06,
+      "loss": 1.9008,
+      "step": 8010
+    },
+    {
+      "epoch": 2.52,
+      "learning_rate": 7.484316185696362e-06,
+      "loss": 1.9286,
+      "step": 8020
+    },
+    {
+      "epoch": 2.52,
+      "learning_rate": 7.481179422835634e-06,
+      "loss": 1.9209,
+      "step": 8030
+    },
+    {
+      "epoch": 2.52,
+      "learning_rate": 7.478042659974907e-06,
+      "loss": 1.9612,
+      "step": 8040
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 7.474905897114178e-06,
+      "loss": 2.0569,
+      "step": 8050
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 7.471769134253451e-06,
+      "loss": 1.9281,
+      "step": 8060
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 7.468632371392723e-06,
+      "loss": 1.8916,
+      "step": 8070
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 7.465495608531995e-06,
+      "loss": 1.9725,
+      "step": 8080
+    },
+    {
+      "epoch": 2.54,
+      "learning_rate": 7.462358845671268e-06,
+      "loss": 1.9004,
+      "step": 8090
+    },
+    {
+      "epoch": 2.54,
+      "learning_rate": 7.45922208281054e-06,
+      "loss": 1.97,
+      "step": 8100
+    },
+    {
+      "epoch": 2.54,
+      "learning_rate": 7.456085319949813e-06,
+      "loss": 1.9328,
+      "step": 8110
+    },
+    {
+      "epoch": 2.55,
+      "learning_rate": 7.452948557089085e-06,
+      "loss": 1.9547,
+      "step": 8120
+    },
+    {
+      "epoch": 2.55,
+      "learning_rate": 7.449811794228356e-06,
+      "loss": 1.9816,
+      "step": 8130
+    },
+    {
+      "epoch": 2.55,
+      "learning_rate": 7.446675031367629e-06,
+      "loss": 2.0046,
+      "step": 8140
+    },
+    {
+      "epoch": 2.56,
+      "learning_rate": 7.443538268506901e-06,
+      "loss": 1.958,
+      "step": 8150
+    },
+    {
+      "epoch": 2.56,
+      "learning_rate": 7.440401505646174e-06,
+      "loss": 1.9852,
+      "step": 8160
+    },
+    {
+      "epoch": 2.56,
+      "learning_rate": 7.437264742785446e-06,
+      "loss": 1.9922,
+      "step": 8170
+    },
+    {
+      "epoch": 2.57,
+      "learning_rate": 7.434127979924718e-06,
+      "loss": 1.9294,
+      "step": 8180
+    },
+    {
+      "epoch": 2.57,
+      "learning_rate": 7.430991217063991e-06,
+      "loss": 1.9844,
+      "step": 8190
+    },
+    {
+      "epoch": 2.57,
+      "learning_rate": 7.427854454203262e-06,
+      "loss": 1.9652,
+      "step": 8200
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 7.424717691342535e-06,
+      "loss": 1.9633,
+      "step": 8210
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 7.421580928481807e-06,
+      "loss": 1.9338,
+      "step": 8220
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 7.418444165621079e-06,
+      "loss": 1.9838,
+      "step": 8230
+    },
+    {
+      "epoch": 2.58,
+      "learning_rate": 7.415307402760352e-06,
+      "loss": 1.9171,
+      "step": 8240
+    },
+    {
+      "epoch": 2.59,
+      "learning_rate": 7.412170639899624e-06,
+      "loss": 2.0102,
+      "step": 8250
+    },
+    {
+      "epoch": 2.59,
+      "learning_rate": 7.409033877038897e-06,
+      "loss": 1.959,
+      "step": 8260
+    },
+    {
+      "epoch": 2.59,
+      "learning_rate": 7.405897114178169e-06,
+      "loss": 1.9088,
+      "step": 8270
+    },
+    {
+      "epoch": 2.6,
+      "learning_rate": 7.40276035131744e-06,
+      "loss": 1.9144,
+      "step": 8280
+    },
+    {
+      "epoch": 2.6,
+      "learning_rate": 7.399623588456713e-06,
+      "loss": 1.8979,
+      "step": 8290
+    },
+    {
+      "epoch": 2.6,
+      "learning_rate": 7.396486825595985e-06,
+      "loss": 2.0027,
+      "step": 8300
+    },
+    {
+      "epoch": 2.61,
+      "learning_rate": 7.393350062735258e-06,
+      "loss": 1.8945,
+      "step": 8310
+    },
+    {
+      "epoch": 2.61,
+      "learning_rate": 7.39021329987453e-06,
+      "loss": 2.0055,
+      "step": 8320
+    },
+    {
+      "epoch": 2.61,
+      "learning_rate": 7.387076537013802e-06,
+      "loss": 1.9817,
+      "step": 8330
+    },
+    {
+      "epoch": 2.62,
+      "learning_rate": 7.383939774153075e-06,
+      "loss": 1.9699,
+      "step": 8340
+    },
+    {
+      "epoch": 2.62,
+      "learning_rate": 7.380803011292346e-06,
+      "loss": 1.9523,
+      "step": 8350
+    },
+    {
+      "epoch": 2.62,
+      "learning_rate": 7.37766624843162e-06,
+      "loss": 1.9713,
+      "step": 8360
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 7.374529485570891e-06,
+      "loss": 2.0491,
+      "step": 8370
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 7.371392722710163e-06,
+      "loss": 1.9829,
+      "step": 8380
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 7.368255959849436e-06,
+      "loss": 1.9455,
+      "step": 8390
+    },
+    {
+      "epoch": 2.63,
+      "learning_rate": 7.365119196988708e-06,
+      "loss": 2.0571,
+      "step": 8400
+    },
+    {
+      "epoch": 2.64,
+      "learning_rate": 7.361982434127981e-06,
+      "loss": 1.9815,
+      "step": 8410
+    },
+    {
+      "epoch": 2.64,
+      "learning_rate": 7.358845671267253e-06,
+      "loss": 1.9506,
+      "step": 8420
+    },
+    {
+      "epoch": 2.64,
+      "learning_rate": 7.355708908406524e-06,
+      "loss": 1.9344,
+      "step": 8430
+    },
+    {
+      "epoch": 2.65,
+      "learning_rate": 7.352572145545797e-06,
+      "loss": 2.0732,
+      "step": 8440
+    },
+    {
+      "epoch": 2.65,
+      "learning_rate": 7.349435382685069e-06,
+      "loss": 1.9703,
+      "step": 8450
+    },
+    {
+      "epoch": 2.65,
+      "learning_rate": 7.346298619824341e-06,
+      "loss": 1.9605,
+      "step": 8460
+    },
+    {
+      "epoch": 2.66,
+      "learning_rate": 7.343161856963614e-06,
+      "loss": 1.9911,
+      "step": 8470
+    },
+    {
+      "epoch": 2.66,
+      "learning_rate": 7.340025094102886e-06,
+      "loss": 1.8882,
+      "step": 8480
+    },
+    {
+      "epoch": 2.66,
+      "learning_rate": 7.336888331242159e-06,
+      "loss": 2.0295,
+      "step": 8490
+    },
+    {
+      "epoch": 2.67,
+      "learning_rate": 7.333751568381431e-06,
+      "loss": 1.9635,
+      "step": 8500
+    },
+    {
+      "epoch": 2.67,
+      "learning_rate": 7.3306148055207025e-06,
+      "loss": 1.9396,
+      "step": 8510
+    },
+    {
+      "epoch": 2.67,
+      "learning_rate": 7.327478042659975e-06,
+      "loss": 1.9473,
+      "step": 8520
+    },
+    {
+      "epoch": 2.68,
+      "learning_rate": 7.324341279799247e-06,
+      "loss": 1.9323,
+      "step": 8530
+    },
+    {
+      "epoch": 2.68,
+      "learning_rate": 7.32120451693852e-06,
+      "loss": 1.9396,
+      "step": 8540
+    },
+    {
+      "epoch": 2.68,
+      "learning_rate": 7.318067754077792e-06,
+      "loss": 1.9011,
+      "step": 8550
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 7.314930991217064e-06,
+      "loss": 1.9256,
+      "step": 8560
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 7.311794228356337e-06,
+      "loss": 1.9724,
+      "step": 8570
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 7.3086574654956085e-06,
+      "loss": 1.9135,
+      "step": 8580
+    },
+    {
+      "epoch": 2.69,
+      "learning_rate": 7.305520702634881e-06,
+      "loss": 1.9444,
+      "step": 8590
+    },
+    {
+      "epoch": 2.7,
+      "learning_rate": 7.302383939774153e-06,
+      "loss": 1.8843,
+      "step": 8600
+    },
+    {
+      "epoch": 2.7,
+      "learning_rate": 7.2992471769134255e-06,
+      "loss": 1.9864,
+      "step": 8610
+    },
+    {
+      "epoch": 2.7,
+      "learning_rate": 7.296110414052698e-06,
+      "loss": 1.9368,
+      "step": 8620
+    },
+    {
+      "epoch": 2.71,
+      "learning_rate": 7.29297365119197e-06,
+      "loss": 1.9264,
+      "step": 8630
+    },
+    {
+      "epoch": 2.71,
+      "learning_rate": 7.289836888331243e-06,
+      "loss": 1.9519,
+      "step": 8640
+    },
+    {
+      "epoch": 2.71,
+      "learning_rate": 7.286700125470515e-06,
+      "loss": 1.9394,
+      "step": 8650
+    },
+    {
+      "epoch": 2.72,
+      "learning_rate": 7.2835633626097865e-06,
+      "loss": 1.9725,
+      "step": 8660
+    },
+    {
+      "epoch": 2.72,
+      "learning_rate": 7.2804265997490594e-06,
+      "loss": 1.945,
+      "step": 8670
+    },
+    {
+      "epoch": 2.72,
+      "learning_rate": 7.2772898368883315e-06,
+      "loss": 1.9679,
+      "step": 8680
+    },
+    {
+      "epoch": 2.73,
+      "learning_rate": 7.274153074027604e-06,
+      "loss": 1.9632,
+      "step": 8690
+    },
+    {
+      "epoch": 2.73,
+      "learning_rate": 7.271016311166876e-06,
+      "loss": 1.9299,
+      "step": 8700
+    },
+    {
+      "epoch": 2.73,
+      "learning_rate": 7.2678795483061485e-06,
+      "loss": 1.8859,
+      "step": 8710
+    },
+    {
+      "epoch": 2.74,
+      "learning_rate": 7.264742785445421e-06,
+      "loss": 1.9887,
+      "step": 8720
+    },
+    {
+      "epoch": 2.74,
+      "learning_rate": 7.2616060225846925e-06,
+      "loss": 1.9067,
+      "step": 8730
+    },
+    {
+      "epoch": 2.74,
+      "learning_rate": 7.258469259723966e-06,
+      "loss": 1.9445,
+      "step": 8740
+    },
+    {
+      "epoch": 2.74,
+      "learning_rate": 7.2553324968632375e-06,
+      "loss": 1.9635,
+      "step": 8750
+    },
+    {
+      "epoch": 2.75,
+      "learning_rate": 7.2521957340025095e-06,
+      "loss": 1.9772,
+      "step": 8760
+    },
+    {
+      "epoch": 2.75,
+      "learning_rate": 7.249058971141782e-06,
+      "loss": 1.9778,
+      "step": 8770
+    },
+    {
+      "epoch": 2.75,
+      "learning_rate": 7.2459222082810545e-06,
+      "loss": 1.9077,
+      "step": 8780
+    },
+    {
+      "epoch": 2.76,
+      "learning_rate": 7.242785445420327e-06,
+      "loss": 2.0154,
+      "step": 8790
+    },
+    {
+      "epoch": 2.76,
+      "learning_rate": 7.239648682559599e-06,
+      "loss": 1.9579,
+      "step": 8800
+    },
+    {
+      "epoch": 2.76,
+      "learning_rate": 7.236511919698871e-06,
+      "loss": 1.9892,
+      "step": 8810
+    },
+    {
+      "epoch": 2.77,
+      "learning_rate": 7.2333751568381435e-06,
+      "loss": 1.927,
+      "step": 8820
+    },
+    {
+      "epoch": 2.77,
+      "learning_rate": 7.2302383939774155e-06,
+      "loss": 1.964,
+      "step": 8830
+    },
+    {
+      "epoch": 2.77,
+      "learning_rate": 7.2271016311166884e-06,
+      "loss": 1.9346,
+      "step": 8840
+    },
+    {
+      "epoch": 2.78,
+      "learning_rate": 7.2239648682559605e-06,
+      "loss": 1.8882,
+      "step": 8850
+    },
+    {
+      "epoch": 2.78,
+      "learning_rate": 7.2208281053952325e-06,
+      "loss": 1.9195,
+      "step": 8860
+    },
+    {
+      "epoch": 2.78,
+      "learning_rate": 7.217691342534505e-06,
+      "loss": 1.949,
+      "step": 8870
+    },
+    {
+      "epoch": 2.79,
+      "learning_rate": 7.2145545796737775e-06,
+      "loss": 1.9805,
+      "step": 8880
+    },
+    {
+      "epoch": 2.79,
+      "learning_rate": 7.21141781681305e-06,
+      "loss": 1.962,
+      "step": 8890
+    },
+    {
+      "epoch": 2.79,
+      "learning_rate": 7.2082810539523215e-06,
+      "loss": 1.9169,
+      "step": 8900
+    },
+    {
+      "epoch": 2.79,
+      "learning_rate": 7.205144291091594e-06,
+      "loss": 2.0492,
+      "step": 8910
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 7.2020075282308665e-06,
+      "loss": 2.0174,
+      "step": 8920
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 7.1988707653701385e-06,
+      "loss": 1.969,
+      "step": 8930
+    },
+    {
+      "epoch": 2.8,
+      "learning_rate": 7.195734002509411e-06,
+      "loss": 1.9197,
+      "step": 8940
+    },
+    {
+      "epoch": 2.81,
+      "learning_rate": 7.1925972396486835e-06,
+      "loss": 2.0031,
+      "step": 8950
+    },
+    {
+      "epoch": 2.81,
+      "learning_rate": 7.189460476787955e-06,
+      "loss": 1.9558,
+      "step": 8960
+    },
+    {
+      "epoch": 2.81,
+      "learning_rate": 7.1863237139272276e-06,
+      "loss": 1.94,
+      "step": 8970
+    },
+    {
+      "epoch": 2.82,
+      "learning_rate": 7.1831869510665e-06,
+      "loss": 1.9337,
+      "step": 8980
+    },
+    {
+      "epoch": 2.82,
+      "learning_rate": 7.1800501882057725e-06,
+      "loss": 1.9076,
+      "step": 8990
+    },
+    {
+      "epoch": 2.82,
+      "learning_rate": 7.1769134253450445e-06,
+      "loss": 1.9973,
+      "step": 9000
+    },
+    {
+      "epoch": 2.83,
+      "learning_rate": 7.173776662484317e-06,
+      "loss": 2.0062,
+      "step": 9010
+    },
+    {
+      "epoch": 2.83,
+      "learning_rate": 7.1706398996235895e-06,
+      "loss": 1.9347,
+      "step": 9020
+    },
+    {
+      "epoch": 2.83,
+      "learning_rate": 7.1675031367628615e-06,
+      "loss": 1.9871,
+      "step": 9030
+    },
+    {
+      "epoch": 2.84,
+      "learning_rate": 7.164366373902134e-06,
+      "loss": 1.9448,
+      "step": 9040
+    },
+    {
+      "epoch": 2.84,
+      "learning_rate": 7.161229611041406e-06,
+      "loss": 1.9392,
+      "step": 9050
+    },
+    {
+      "epoch": 2.84,
+      "learning_rate": 7.158092848180678e-06,
+      "loss": 1.9694,
+      "step": 9060
+    },
+    {
+      "epoch": 2.85,
+      "learning_rate": 7.1549560853199505e-06,
+      "loss": 1.8943,
+      "step": 9070
+    },
+    {
+      "epoch": 2.85,
+      "learning_rate": 7.151819322459223e-06,
+      "loss": 1.9112,
+      "step": 9080
+    },
+    {
+      "epoch": 2.85,
+      "learning_rate": 7.1486825595984955e-06,
+      "loss": 1.971,
+      "step": 9090
+    },
+    {
+      "epoch": 2.85,
+      "learning_rate": 7.1455457967377675e-06,
+      "loss": 2.0105,
+      "step": 9100
+    },
+    {
+      "epoch": 2.86,
+      "learning_rate": 7.142409033877039e-06,
+      "loss": 1.9241,
+      "step": 9110
+    },
+    {
+      "epoch": 2.86,
+      "learning_rate": 7.1392722710163125e-06,
+      "loss": 2.0026,
+      "step": 9120
+    },
+    {
+      "epoch": 2.86,
+      "learning_rate": 7.136135508155584e-06,
+      "loss": 1.9557,
+      "step": 9130
+    },
+    {
+      "epoch": 2.87,
+      "learning_rate": 7.1329987452948566e-06,
+      "loss": 1.9448,
+      "step": 9140
+    },
+    {
+      "epoch": 2.87,
+      "learning_rate": 7.129861982434129e-06,
+      "loss": 1.8889,
+      "step": 9150
+    },
+    {
+      "epoch": 2.87,
+      "learning_rate": 7.126725219573401e-06,
+      "loss": 1.9239,
+      "step": 9160
+    },
+    {
+      "epoch": 2.88,
+      "learning_rate": 7.1235884567126735e-06,
+      "loss": 2.022,
+      "step": 9170
+    },
+    {
+      "epoch": 2.88,
+      "learning_rate": 7.120451693851946e-06,
+      "loss": 1.9578,
+      "step": 9180
+    },
+    {
+      "epoch": 2.88,
+      "learning_rate": 7.1173149309912185e-06,
+      "loss": 1.9896,
+      "step": 9190
+    },
+    {
+      "epoch": 2.89,
+      "learning_rate": 7.11417816813049e-06,
+      "loss": 1.9313,
+      "step": 9200
+    },
+    {
+      "epoch": 2.89,
+      "learning_rate": 7.111041405269762e-06,
+      "loss": 1.8818,
+      "step": 9210
+    },
+    {
+      "epoch": 2.89,
+      "learning_rate": 7.107904642409035e-06,
+      "loss": 2.0096,
+      "step": 9220
+    },
+    {
+      "epoch": 2.9,
+      "learning_rate": 7.104767879548307e-06,
+      "loss": 1.9768,
+      "step": 9230
+    },
+    {
+      "epoch": 2.9,
+      "learning_rate": 7.1016311166875795e-06,
+      "loss": 1.9801,
+      "step": 9240
+    },
+    {
+      "epoch": 2.9,
+      "learning_rate": 7.098494353826852e-06,
+      "loss": 1.8505,
+      "step": 9250
+    },
+    {
+      "epoch": 2.9,
+      "learning_rate": 7.095357590966123e-06,
+      "loss": 1.9086,
+      "step": 9260
+    },
+    {
+      "epoch": 2.91,
+      "learning_rate": 7.0922208281053965e-06,
+      "loss": 1.9544,
+      "step": 9270
+    },
+    {
+      "epoch": 2.91,
+      "learning_rate": 7.089084065244668e-06,
+      "loss": 1.8913,
+      "step": 9280
+    },
+    {
+      "epoch": 2.91,
+      "learning_rate": 7.085947302383941e-06,
+      "loss": 1.9611,
+      "step": 9290
+    },
+    {
+      "epoch": 2.92,
+      "learning_rate": 7.082810539523213e-06,
+      "loss": 1.9619,
+      "step": 9300
+    },
+    {
+      "epoch": 2.92,
+      "learning_rate": 7.079673776662485e-06,
+      "loss": 1.9245,
+      "step": 9310
+    },
+    {
+      "epoch": 2.92,
+      "learning_rate": 7.076537013801758e-06,
+      "loss": 1.9268,
+      "step": 9320
+    },
+    {
+      "epoch": 2.93,
+      "learning_rate": 7.07340025094103e-06,
+      "loss": 1.9609,
+      "step": 9330
+    },
+    {
+      "epoch": 2.93,
+      "learning_rate": 7.070263488080301e-06,
+      "loss": 1.9626,
+      "step": 9340
+    },
+    {
+      "epoch": 2.93,
+      "learning_rate": 7.067126725219574e-06,
+      "loss": 1.9686,
+      "step": 9350
+    },
+    {
+      "epoch": 2.94,
+      "learning_rate": 7.063989962358846e-06,
+      "loss": 1.9401,
+      "step": 9360
+    },
+    {
+      "epoch": 2.94,
+      "learning_rate": 7.060853199498119e-06,
+      "loss": 2.0358,
+      "step": 9370
+    },
+    {
+      "epoch": 2.94,
+      "learning_rate": 7.057716436637391e-06,
+      "loss": 1.9737,
+      "step": 9380
+    },
+    {
+      "epoch": 2.95,
+      "learning_rate": 7.054579673776663e-06,
+      "loss": 1.8853,
+      "step": 9390
+    },
+    {
+      "epoch": 2.95,
+      "learning_rate": 7.051442910915936e-06,
+      "loss": 1.899,
+      "step": 9400
+    },
+    {
+      "epoch": 2.95,
+      "learning_rate": 7.048306148055208e-06,
+      "loss": 1.9412,
+      "step": 9410
+    },
+    {
+      "epoch": 2.95,
+      "learning_rate": 7.045169385194481e-06,
+      "loss": 1.9403,
+      "step": 9420
+    },
+    {
+      "epoch": 2.96,
+      "learning_rate": 7.042032622333752e-06,
+      "loss": 1.9022,
+      "step": 9430
+    },
+    {
+      "epoch": 2.96,
+      "learning_rate": 7.038895859473024e-06,
+      "loss": 1.9292,
+      "step": 9440
+    },
+    {
+      "epoch": 2.96,
+      "learning_rate": 7.035759096612297e-06,
+      "loss": 1.8933,
+      "step": 9450
+    },
+    {
+      "epoch": 2.97,
+      "learning_rate": 7.032622333751569e-06,
+      "loss": 1.9615,
+      "step": 9460
+    },
+    {
+      "epoch": 2.97,
+      "learning_rate": 7.029485570890842e-06,
+      "loss": 1.9655,
+      "step": 9470
+    },
+    {
+      "epoch": 2.97,
+      "learning_rate": 7.026348808030114e-06,
+      "loss": 1.9946,
+      "step": 9480
+    },
+    {
+      "epoch": 2.98,
+      "learning_rate": 7.023212045169385e-06,
+      "loss": 2.0278,
+      "step": 9490
+    },
+    {
+      "epoch": 2.98,
+      "learning_rate": 7.020075282308658e-06,
+      "loss": 1.9942,
+      "step": 9500
+    },
+    {
+      "epoch": 2.98,
+      "learning_rate": 7.01693851944793e-06,
+      "loss": 1.8639,
+      "step": 9510
+    },
+    {
+      "epoch": 2.99,
+      "learning_rate": 7.013801756587203e-06,
+      "loss": 1.9608,
+      "step": 9520
+    },
+    {
+      "epoch": 2.99,
+      "learning_rate": 7.010664993726475e-06,
+      "loss": 1.976,
+      "step": 9530
+    },
+    {
+      "epoch": 2.99,
+      "learning_rate": 7.007528230865747e-06,
+      "loss": 1.9355,
+      "step": 9540
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 7.00439146800502e-06,
+      "loss": 1.9438,
+      "step": 9550
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 7.001254705144292e-06,
+      "loss": 1.9831,
+      "step": 9560
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.8584038019180298,
+      "eval_runtime": 13.6198,
+      "eval_samples_per_second": 73.423,
+      "eval_steps_per_second": 4.626,
+      "step": 9564
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 6.998117942283565e-06,
+      "loss": 1.9902,
+      "step": 9570
+    },
+    {
+      "epoch": 3.01,
+      "learning_rate": 6.994981179422836e-06,
+      "loss": 1.9514,
+      "step": 9580
+    },
+    {
+      "epoch": 3.01,
+      "learning_rate": 6.991844416562108e-06,
+      "loss": 1.9013,
+      "step": 9590
+    },
+    {
+      "epoch": 3.01,
+      "learning_rate": 6.988707653701381e-06,
+      "loss": 1.9126,
+      "step": 9600
+    },
+    {
+      "epoch": 3.01,
+      "learning_rate": 6.985570890840653e-06,
+      "loss": 1.8983,
+      "step": 9610
+    },
+    {
+      "epoch": 3.02,
+      "learning_rate": 6.982434127979926e-06,
+      "loss": 1.9678,
+      "step": 9620
+    },
+    {
+      "epoch": 3.02,
+      "learning_rate": 6.979297365119198e-06,
+      "loss": 1.9002,
+      "step": 9630
+    },
+    {
+      "epoch": 3.02,
+      "learning_rate": 6.976160602258469e-06,
+      "loss": 1.9332,
+      "step": 9640
+    },
+    {
+      "epoch": 3.03,
+      "learning_rate": 6.973023839397743e-06,
+      "loss": 1.936,
+      "step": 9650
+    },
+    {
+      "epoch": 3.03,
+      "learning_rate": 6.969887076537014e-06,
+      "loss": 1.8892,
+      "step": 9660
+    },
+    {
+      "epoch": 3.03,
+      "learning_rate": 6.966750313676287e-06,
+      "loss": 1.9125,
+      "step": 9670
+    },
+    {
+      "epoch": 3.04,
+      "learning_rate": 6.963613550815559e-06,
+      "loss": 1.954,
+      "step": 9680
+    },
+    {
+      "epoch": 3.04,
+      "learning_rate": 6.960476787954831e-06,
+      "loss": 1.8804,
+      "step": 9690
+    },
+    {
+      "epoch": 3.04,
+      "learning_rate": 6.957340025094104e-06,
+      "loss": 1.9061,
+      "step": 9700
+    },
+    {
+      "epoch": 3.05,
+      "learning_rate": 6.954203262233376e-06,
+      "loss": 1.9739,
+      "step": 9710
+    },
+    {
+      "epoch": 3.05,
+      "learning_rate": 6.951066499372649e-06,
+      "loss": 1.8721,
+      "step": 9720
+    },
+    {
+      "epoch": 3.05,
+      "learning_rate": 6.94792973651192e-06,
+      "loss": 1.893,
+      "step": 9730
+    },
+    {
+      "epoch": 3.06,
+      "learning_rate": 6.944792973651192e-06,
+      "loss": 1.9453,
+      "step": 9740
+    },
+    {
+      "epoch": 3.06,
+      "learning_rate": 6.941656210790465e-06,
+      "loss": 1.9334,
+      "step": 9750
+    },
+    {
+      "epoch": 3.06,
+      "learning_rate": 6.938519447929737e-06,
+      "loss": 1.9699,
+      "step": 9760
+    },
+    {
+      "epoch": 3.06,
+      "learning_rate": 6.93538268506901e-06,
+      "loss": 1.9684,
+      "step": 9770
+    },
+    {
+      "epoch": 3.07,
+      "learning_rate": 6.932245922208282e-06,
+      "loss": 1.9842,
+      "step": 9780
+    },
+    {
+      "epoch": 3.07,
+      "learning_rate": 6.929109159347554e-06,
+      "loss": 1.9021,
+      "step": 9790
+    },
+    {
+      "epoch": 3.07,
+      "learning_rate": 6.925972396486827e-06,
+      "loss": 1.9448,
+      "step": 9800
+    },
+    {
+      "epoch": 3.08,
+      "learning_rate": 6.922835633626098e-06,
+      "loss": 1.9272,
+      "step": 9810
+    },
+    {
+      "epoch": 3.08,
+      "learning_rate": 6.919698870765371e-06,
+      "loss": 1.9218,
+      "step": 9820
+    },
+    {
+      "epoch": 3.08,
+      "learning_rate": 6.916562107904643e-06,
+      "loss": 1.9197,
+      "step": 9830
+    },
+    {
+      "epoch": 3.09,
+      "learning_rate": 6.913425345043915e-06,
+      "loss": 1.9336,
+      "step": 9840
+    },
+    {
+      "epoch": 3.09,
+      "learning_rate": 6.910288582183188e-06,
+      "loss": 1.9579,
+      "step": 9850
+    },
+    {
+      "epoch": 3.09,
+      "learning_rate": 6.90715181932246e-06,
+      "loss": 1.8816,
+      "step": 9860
+    },
+    {
+      "epoch": 3.1,
+      "learning_rate": 6.904015056461733e-06,
+      "loss": 1.9185,
+      "step": 9870
+    },
+    {
+      "epoch": 3.1,
+      "learning_rate": 6.900878293601004e-06,
+      "loss": 2.0019,
+      "step": 9880
+    },
+    {
+      "epoch": 3.1,
+      "learning_rate": 6.897741530740276e-06,
+      "loss": 1.9087,
+      "step": 9890
+    },
+    {
+      "epoch": 3.11,
+      "learning_rate": 6.894604767879549e-06,
+      "loss": 1.9677,
+      "step": 9900
+    },
+    {
+      "epoch": 3.11,
+      "learning_rate": 6.891468005018821e-06,
+      "loss": 1.9059,
+      "step": 9910
+    },
+    {
+      "epoch": 3.11,
+      "learning_rate": 6.888331242158094e-06,
+      "loss": 1.8861,
+      "step": 9920
+    },
+    {
+      "epoch": 3.11,
+      "learning_rate": 6.885194479297366e-06,
+      "loss": 1.8885,
+      "step": 9930
+    },
+    {
+      "epoch": 3.12,
+      "learning_rate": 6.882057716436638e-06,
+      "loss": 1.9043,
+      "step": 9940
+    },
+    {
+      "epoch": 3.12,
+      "learning_rate": 6.878920953575911e-06,
+      "loss": 1.9341,
+      "step": 9950
+    },
+    {
+      "epoch": 3.12,
+      "learning_rate": 6.875784190715182e-06,
+      "loss": 1.8998,
+      "step": 9960
+    },
+    {
+      "epoch": 3.13,
+      "learning_rate": 6.872647427854455e-06,
+      "loss": 1.8727,
+      "step": 9970
+    },
+    {
+      "epoch": 3.13,
+      "learning_rate": 6.869510664993727e-06,
+      "loss": 1.8693,
+      "step": 9980
+    },
+    {
+      "epoch": 3.13,
+      "learning_rate": 6.866373902132999e-06,
+      "loss": 1.9166,
+      "step": 9990
+    },
+    {
+      "epoch": 3.14,
+      "learning_rate": 6.863237139272272e-06,
+      "loss": 1.8551,
+      "step": 10000
+    },
+    {
+      "epoch": 3.14,
+      "learning_rate": 6.860100376411544e-06,
+      "loss": 1.9196,
+      "step": 10010
+    },
+    {
+      "epoch": 3.14,
+      "learning_rate": 6.856963613550817e-06,
+      "loss": 1.97,
+      "step": 10020
+    },
+    {
+      "epoch": 3.15,
+      "learning_rate": 6.853826850690089e-06,
+      "loss": 1.9672,
+      "step": 10030
+    },
+    {
+      "epoch": 3.15,
+      "learning_rate": 6.85069008782936e-06,
+      "loss": 1.8929,
+      "step": 10040
+    },
+    {
+      "epoch": 3.15,
+      "learning_rate": 6.847553324968633e-06,
+      "loss": 1.9713,
+      "step": 10050
+    },
+    {
+      "epoch": 3.16,
+      "learning_rate": 6.844416562107905e-06,
+      "loss": 1.9029,
+      "step": 10060
+    },
+    {
+      "epoch": 3.16,
+      "learning_rate": 6.841279799247178e-06,
+      "loss": 1.9074,
+      "step": 10070
+    },
+    {
+      "epoch": 3.16,
+      "learning_rate": 6.83814303638645e-06,
+      "loss": 1.9178,
+      "step": 10080
+    },
+    {
+      "epoch": 3.16,
+      "learning_rate": 6.835006273525722e-06,
+      "loss": 1.9428,
+      "step": 10090
+    },
+    {
+      "epoch": 3.17,
+      "learning_rate": 6.831869510664995e-06,
+      "loss": 1.9178,
+      "step": 10100
+    },
+    {
+      "epoch": 3.17,
+      "learning_rate": 6.828732747804266e-06,
+      "loss": 1.8657,
+      "step": 10110
+    },
+    {
+      "epoch": 3.17,
+      "learning_rate": 6.825595984943539e-06,
+      "loss": 1.9652,
+      "step": 10120
+    },
+    {
+      "epoch": 3.18,
+      "learning_rate": 6.822459222082811e-06,
+      "loss": 1.8956,
+      "step": 10130
+    },
+    {
+      "epoch": 3.18,
+      "learning_rate": 6.819322459222083e-06,
+      "loss": 1.945,
+      "step": 10140
+    },
+    {
+      "epoch": 3.18,
+      "learning_rate": 6.816185696361356e-06,
+      "loss": 1.9497,
+      "step": 10150
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 6.813048933500628e-06,
+      "loss": 1.9494,
+      "step": 10160
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 6.809912170639901e-06,
+      "loss": 2.0337,
+      "step": 10170
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 6.806775407779173e-06,
+      "loss": 1.8585,
+      "step": 10180
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 6.803638644918444e-06,
+      "loss": 1.9345,
+      "step": 10190
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 6.800501882057717e-06,
+      "loss": 1.867,
+      "step": 10200
+    },
+    {
+      "epoch": 3.2,
+      "learning_rate": 6.797365119196989e-06,
+      "loss": 1.9511,
+      "step": 10210
+    },
+    {
+      "epoch": 3.21,
+      "learning_rate": 6.794228356336261e-06,
+      "loss": 1.9295,
+      "step": 10220
+    },
+    {
+      "epoch": 3.21,
+      "learning_rate": 6.791091593475534e-06,
+      "loss": 1.8682,
+      "step": 10230
+    },
+    {
+      "epoch": 3.21,
+      "learning_rate": 6.787954830614806e-06,
+      "loss": 1.9623,
+      "step": 10240
+    },
+    {
+      "epoch": 3.22,
+      "learning_rate": 6.784818067754079e-06,
+      "loss": 1.8572,
+      "step": 10250
+    },
+    {
+      "epoch": 3.22,
+      "learning_rate": 6.78168130489335e-06,
+      "loss": 1.913,
+      "step": 10260
+    },
+    {
+      "epoch": 3.22,
+      "learning_rate": 6.778544542032622e-06,
+      "loss": 1.8812,
+      "step": 10270
+    },
+    {
+      "epoch": 3.22,
+      "learning_rate": 6.775407779171895e-06,
+      "loss": 1.9372,
+      "step": 10280
+    },
+    {
+      "epoch": 3.23,
+      "learning_rate": 6.772271016311167e-06,
+      "loss": 1.9093,
+      "step": 10290
+    },
+    {
+      "epoch": 3.23,
+      "learning_rate": 6.76913425345044e-06,
+      "loss": 1.9351,
+      "step": 10300
+    },
+    {
+      "epoch": 3.23,
+      "learning_rate": 6.765997490589712e-06,
+      "loss": 1.9689,
+      "step": 10310
+    },
+    {
+      "epoch": 3.24,
+      "learning_rate": 6.762860727728984e-06,
+      "loss": 1.9457,
+      "step": 10320
+    },
+    {
+      "epoch": 3.24,
+      "learning_rate": 6.759723964868257e-06,
+      "loss": 1.9288,
+      "step": 10330
+    },
+    {
+      "epoch": 3.24,
+      "learning_rate": 6.756587202007528e-06,
+      "loss": 1.9382,
+      "step": 10340
+    },
+    {
+      "epoch": 3.25,
+      "learning_rate": 6.753450439146801e-06,
+      "loss": 1.868,
+      "step": 10350
+    },
+    {
+      "epoch": 3.25,
+      "learning_rate": 6.750313676286073e-06,
+      "loss": 1.9053,
+      "step": 10360
+    },
+    {
+      "epoch": 3.25,
+      "learning_rate": 6.747176913425345e-06,
+      "loss": 1.9358,
+      "step": 10370
+    },
+    {
+      "epoch": 3.26,
+      "learning_rate": 6.744040150564618e-06,
+      "loss": 1.9296,
+      "step": 10380
+    },
+    {
+      "epoch": 3.26,
+      "learning_rate": 6.74090338770389e-06,
+      "loss": 1.9294,
+      "step": 10390
+    },
+    {
+      "epoch": 3.26,
+      "learning_rate": 6.737766624843163e-06,
+      "loss": 1.9203,
+      "step": 10400
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 6.734629861982434e-06,
+      "loss": 1.9498,
+      "step": 10410
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 6.731493099121706e-06,
+      "loss": 1.8721,
+      "step": 10420
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 6.728356336260979e-06,
+      "loss": 1.9807,
+      "step": 10430
+    },
+    {
+      "epoch": 3.27,
+      "learning_rate": 6.725219573400251e-06,
+      "loss": 1.9952,
+      "step": 10440
+    },
+    {
+      "epoch": 3.28,
+      "learning_rate": 6.722082810539524e-06,
+      "loss": 1.8955,
+      "step": 10450
+    },
+    {
+      "epoch": 3.28,
+      "learning_rate": 6.718946047678796e-06,
+      "loss": 1.954,
+      "step": 10460
+    },
+    {
+      "epoch": 3.28,
+      "learning_rate": 6.715809284818068e-06,
+      "loss": 1.9439,
+      "step": 10470
+    },
+    {
+      "epoch": 3.29,
+      "learning_rate": 6.712672521957341e-06,
+      "loss": 1.9301,
+      "step": 10480
+    },
+    {
+      "epoch": 3.29,
+      "learning_rate": 6.709535759096612e-06,
+      "loss": 1.9255,
+      "step": 10490
+    },
+    {
+      "epoch": 3.29,
+      "learning_rate": 6.706398996235885e-06,
+      "loss": 1.8499,
+      "step": 10500
+    },
+    {
+      "epoch": 3.3,
+      "learning_rate": 6.703262233375157e-06,
+      "loss": 1.8126,
+      "step": 10510
+    },
+    {
+      "epoch": 3.3,
+      "learning_rate": 6.700125470514429e-06,
+      "loss": 1.9344,
+      "step": 10520
+    },
+    {
+      "epoch": 3.3,
+      "learning_rate": 6.696988707653702e-06,
+      "loss": 1.9274,
+      "step": 10530
+    },
+    {
+      "epoch": 3.31,
+      "learning_rate": 6.693851944792974e-06,
+      "loss": 1.8945,
+      "step": 10540
+    },
+    {
+      "epoch": 3.31,
+      "learning_rate": 6.690715181932247e-06,
+      "loss": 1.8958,
+      "step": 10550
+    },
+    {
+      "epoch": 3.31,
+      "learning_rate": 6.687578419071519e-06,
+      "loss": 1.9337,
+      "step": 10560
+    },
+    {
+      "epoch": 3.32,
+      "learning_rate": 6.68444165621079e-06,
+      "loss": 1.9253,
+      "step": 10570
+    },
+    {
+      "epoch": 3.32,
+      "learning_rate": 6.681304893350063e-06,
+      "loss": 1.9544,
+      "step": 10580
+    },
+    {
+      "epoch": 3.32,
+      "learning_rate": 6.678168130489335e-06,
+      "loss": 1.8968,
+      "step": 10590
+    },
+    {
+      "epoch": 3.32,
+      "learning_rate": 6.675031367628608e-06,
+      "loss": 1.9505,
+      "step": 10600
+    },
+    {
+      "epoch": 3.33,
+      "learning_rate": 6.67189460476788e-06,
+      "loss": 1.9339,
+      "step": 10610
+    },
+    {
+      "epoch": 3.33,
+      "learning_rate": 6.668757841907152e-06,
+      "loss": 2.0069,
+      "step": 10620
+    },
+    {
+      "epoch": 3.33,
+      "learning_rate": 6.665621079046425e-06,
+      "loss": 1.8384,
+      "step": 10630
+    },
+    {
+      "epoch": 3.34,
+      "learning_rate": 6.662484316185696e-06,
+      "loss": 1.981,
+      "step": 10640
+    },
+    {
+      "epoch": 3.34,
+      "learning_rate": 6.659347553324969e-06,
+      "loss": 1.9825,
+      "step": 10650
+    },
+    {
+      "epoch": 3.34,
+      "learning_rate": 6.656210790464241e-06,
+      "loss": 1.8938,
+      "step": 10660
+    },
+    {
+      "epoch": 3.35,
+      "learning_rate": 6.653074027603513e-06,
+      "loss": 1.89,
+      "step": 10670
+    },
+    {
+      "epoch": 3.35,
+      "learning_rate": 6.649937264742786e-06,
+      "loss": 1.9083,
+      "step": 10680
+    },
+    {
+      "epoch": 3.35,
+      "learning_rate": 6.646800501882058e-06,
+      "loss": 1.8732,
+      "step": 10690
+    },
+    {
+      "epoch": 3.36,
+      "learning_rate": 6.643663739021331e-06,
+      "loss": 1.89,
+      "step": 10700
+    },
+    {
+      "epoch": 3.36,
+      "learning_rate": 6.640526976160603e-06,
+      "loss": 1.9387,
+      "step": 10710
+    },
+    {
+      "epoch": 3.36,
+      "learning_rate": 6.637390213299874e-06,
+      "loss": 1.9319,
+      "step": 10720
+    },
+    {
+      "epoch": 3.37,
+      "learning_rate": 6.634253450439147e-06,
+      "loss": 1.9586,
+      "step": 10730
+    },
+    {
+      "epoch": 3.37,
+      "learning_rate": 6.631116687578419e-06,
+      "loss": 1.9244,
+      "step": 10740
+    },
+    {
+      "epoch": 3.37,
+      "learning_rate": 6.627979924717692e-06,
+      "loss": 1.9045,
+      "step": 10750
+    },
+    {
+      "epoch": 3.38,
+      "learning_rate": 6.624843161856964e-06,
+      "loss": 1.8757,
+      "step": 10760
+    },
+    {
+      "epoch": 3.38,
+      "learning_rate": 6.621706398996236e-06,
+      "loss": 1.8907,
+      "step": 10770
+    },
+    {
+      "epoch": 3.38,
+      "learning_rate": 6.618569636135509e-06,
+      "loss": 1.9341,
+      "step": 10780
+    },
+    {
+      "epoch": 3.38,
+      "learning_rate": 6.61543287327478e-06,
+      "loss": 1.9399,
+      "step": 10790
+    },
+    {
+      "epoch": 3.39,
+      "learning_rate": 6.612296110414054e-06,
+      "loss": 1.9653,
+      "step": 10800
+    },
+    {
+      "epoch": 3.39,
+      "learning_rate": 6.609159347553325e-06,
+      "loss": 1.9733,
+      "step": 10810
+    },
+    {
+      "epoch": 3.39,
+      "learning_rate": 6.606022584692597e-06,
+      "loss": 1.924,
+      "step": 10820
+    },
+    {
+      "epoch": 3.4,
+      "learning_rate": 6.60288582183187e-06,
+      "loss": 1.9161,
+      "step": 10830
+    },
+    {
+      "epoch": 3.4,
+      "learning_rate": 6.599749058971142e-06,
+      "loss": 1.9374,
+      "step": 10840
+    },
+    {
+      "epoch": 3.4,
+      "learning_rate": 6.596612296110415e-06,
+      "loss": 2.0109,
+      "step": 10850
+    },
+    {
+      "epoch": 3.41,
+      "learning_rate": 6.593475533249687e-06,
+      "loss": 1.8972,
+      "step": 10860
+    },
+    {
+      "epoch": 3.41,
+      "learning_rate": 6.5903387703889584e-06,
+      "loss": 1.8119,
+      "step": 10870
+    },
+    {
+      "epoch": 3.41,
+      "learning_rate": 6.587202007528231e-06,
+      "loss": 1.8996,
+      "step": 10880
+    },
+    {
+      "epoch": 3.42,
+      "learning_rate": 6.584065244667503e-06,
+      "loss": 2.019,
+      "step": 10890
+    },
+    {
+      "epoch": 3.42,
+      "learning_rate": 6.580928481806776e-06,
+      "loss": 1.9472,
+      "step": 10900
+    },
+    {
+      "epoch": 3.42,
+      "learning_rate": 6.577791718946048e-06,
+      "loss": 1.9423,
+      "step": 10910
+    },
+    {
+      "epoch": 3.43,
+      "learning_rate": 6.57465495608532e-06,
+      "loss": 1.9815,
+      "step": 10920
+    },
+    {
+      "epoch": 3.43,
+      "learning_rate": 6.571518193224593e-06,
+      "loss": 1.8826,
+      "step": 10930
+    },
+    {
+      "epoch": 3.43,
+      "learning_rate": 6.568381430363865e-06,
+      "loss": 1.8968,
+      "step": 10940
+    },
+    {
+      "epoch": 3.43,
+      "learning_rate": 6.565244667503138e-06,
+      "loss": 1.9269,
+      "step": 10950
+    },
+    {
+      "epoch": 3.44,
+      "learning_rate": 6.562107904642409e-06,
+      "loss": 1.8981,
+      "step": 10960
+    },
+    {
+      "epoch": 3.44,
+      "learning_rate": 6.5589711417816814e-06,
+      "loss": 1.9447,
+      "step": 10970
+    },
+    {
+      "epoch": 3.44,
+      "learning_rate": 6.555834378920954e-06,
+      "loss": 1.9025,
+      "step": 10980
+    },
+    {
+      "epoch": 3.45,
+      "learning_rate": 6.552697616060226e-06,
+      "loss": 1.8283,
+      "step": 10990
+    },
+    {
+      "epoch": 3.45,
+      "learning_rate": 6.549560853199499e-06,
+      "loss": 1.8838,
+      "step": 11000
+    },
+    {
+      "epoch": 3.45,
+      "learning_rate": 6.546424090338771e-06,
+      "loss": 1.9003,
+      "step": 11010
+    },
+    {
+      "epoch": 3.46,
+      "learning_rate": 6.5432873274780425e-06,
+      "loss": 1.9028,
+      "step": 11020
+    },
+    {
+      "epoch": 3.46,
+      "learning_rate": 6.540150564617315e-06,
+      "loss": 1.9489,
+      "step": 11030
+    },
+    {
+      "epoch": 3.46,
+      "learning_rate": 6.5370138017565874e-06,
+      "loss": 1.9402,
+      "step": 11040
+    },
+    {
+      "epoch": 3.47,
+      "learning_rate": 6.53387703889586e-06,
+      "loss": 1.8734,
+      "step": 11050
+    },
+    {
+      "epoch": 3.47,
+      "learning_rate": 6.530740276035132e-06,
+      "loss": 1.9259,
+      "step": 11060
+    },
+    {
+      "epoch": 3.47,
+      "learning_rate": 6.527603513174404e-06,
+      "loss": 1.8362,
+      "step": 11070
+    },
+    {
+      "epoch": 3.48,
+      "learning_rate": 6.524466750313677e-06,
+      "loss": 1.9451,
+      "step": 11080
+    },
+    {
+      "epoch": 3.48,
+      "learning_rate": 6.521329987452949e-06,
+      "loss": 1.8961,
+      "step": 11090
+    },
+    {
+      "epoch": 3.48,
+      "learning_rate": 6.5181932245922206e-06,
+      "loss": 1.9401,
+      "step": 11100
+    },
+    {
+      "epoch": 3.48,
+      "learning_rate": 6.5150564617314934e-06,
+      "loss": 1.9333,
+      "step": 11110
+    },
+    {
+      "epoch": 3.49,
+      "learning_rate": 6.5119196988707655e-06,
+      "loss": 1.8602,
+      "step": 11120
+    },
+    {
+      "epoch": 3.49,
+      "learning_rate": 6.508782936010038e-06,
+      "loss": 1.9425,
+      "step": 11130
+    },
+    {
+      "epoch": 3.49,
+      "learning_rate": 6.5056461731493104e-06,
+      "loss": 1.9593,
+      "step": 11140
+    },
+    {
+      "epoch": 3.5,
+      "learning_rate": 6.5025094102885825e-06,
+      "loss": 1.8614,
+      "step": 11150
+    },
+    {
+      "epoch": 3.5,
+      "learning_rate": 6.499372647427855e-06,
+      "loss": 1.8861,
+      "step": 11160
+    },
+    {
+      "epoch": 3.5,
+      "learning_rate": 6.4962358845671266e-06,
+      "loss": 1.9378,
+      "step": 11170
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 6.4930991217064e-06,
+      "loss": 1.9192,
+      "step": 11180
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 6.4899623588456715e-06,
+      "loss": 1.8886,
+      "step": 11190
+    },
+    {
+      "epoch": 3.51,
+      "learning_rate": 6.4868255959849435e-06,
+      "loss": 1.8916,
+      "step": 11200
+    },
+    {
+      "epoch": 3.52,
+      "learning_rate": 6.4836888331242164e-06,
+      "loss": 1.9126,
+      "step": 11210
+    },
+    {
+      "epoch": 3.52,
+      "learning_rate": 6.4805520702634885e-06,
+      "loss": 1.9063,
+      "step": 11220
+    },
+    {
+      "epoch": 3.52,
+      "learning_rate": 6.477415307402761e-06,
+      "loss": 1.9309,
+      "step": 11230
+    },
+    {
+      "epoch": 3.53,
+      "learning_rate": 6.474278544542033e-06,
+      "loss": 1.949,
+      "step": 11240
+    },
+    {
+      "epoch": 3.53,
+      "learning_rate": 6.471141781681305e-06,
+      "loss": 1.9189,
+      "step": 11250
+    },
+    {
+      "epoch": 3.53,
+      "learning_rate": 6.4680050188205775e-06,
+      "loss": 1.9416,
+      "step": 11260
+    },
+    {
+      "epoch": 3.54,
+      "learning_rate": 6.4648682559598496e-06,
+      "loss": 1.9865,
+      "step": 11270
+    },
+    {
+      "epoch": 3.54,
+      "learning_rate": 6.4617314930991224e-06,
+      "loss": 1.8799,
+      "step": 11280
+    },
+    {
+      "epoch": 3.54,
+      "learning_rate": 6.4585947302383945e-06,
+      "loss": 1.9771,
+      "step": 11290
+    },
+    {
+      "epoch": 3.54,
+      "learning_rate": 6.4554579673776665e-06,
+      "loss": 1.933,
+      "step": 11300
+    },
+    {
+      "epoch": 3.55,
+      "learning_rate": 6.4523212045169394e-06,
+      "loss": 1.889,
+      "step": 11310
+    },
+    {
+      "epoch": 3.55,
+      "learning_rate": 6.4491844416562115e-06,
+      "loss": 1.9768,
+      "step": 11320
+    },
+    {
+      "epoch": 3.55,
+      "learning_rate": 6.446047678795484e-06,
+      "loss": 1.8904,
+      "step": 11330
+    },
+    {
+      "epoch": 3.56,
+      "learning_rate": 6.4429109159347556e-06,
+      "loss": 1.988,
+      "step": 11340
+    },
+    {
+      "epoch": 3.56,
+      "learning_rate": 6.439774153074028e-06,
+      "loss": 1.9245,
+      "step": 11350
+    },
+    {
+      "epoch": 3.56,
+      "learning_rate": 6.4366373902133005e-06,
+      "loss": 2.0271,
+      "step": 11360
+    },
+    {
+      "epoch": 3.57,
+      "learning_rate": 6.4335006273525725e-06,
+      "loss": 1.9249,
+      "step": 11370
+    },
+    {
+      "epoch": 3.57,
+      "learning_rate": 6.4303638644918454e-06,
+      "loss": 1.9529,
+      "step": 11380
+    },
+    {
+      "epoch": 3.57,
+      "learning_rate": 6.4272271016311175e-06,
+      "loss": 1.9427,
+      "step": 11390
+    },
+    {
+      "epoch": 3.58,
+      "learning_rate": 6.424090338770389e-06,
+      "loss": 1.8737,
+      "step": 11400
+    },
+    {
+      "epoch": 3.58,
+      "learning_rate": 6.4209535759096616e-06,
+      "loss": 1.934,
+      "step": 11410
+    },
+    {
+      "epoch": 3.58,
+      "learning_rate": 6.417816813048934e-06,
+      "loss": 1.97,
+      "step": 11420
+    },
+    {
+      "epoch": 3.59,
+      "learning_rate": 6.4146800501882065e-06,
+      "loss": 1.8813,
+      "step": 11430
+    },
+    {
+      "epoch": 3.59,
+      "learning_rate": 6.4115432873274786e-06,
+      "loss": 1.9036,
+      "step": 11440
+    },
+    {
+      "epoch": 3.59,
+      "learning_rate": 6.408406524466751e-06,
+      "loss": 1.9235,
+      "step": 11450
+    },
+    {
+      "epoch": 3.59,
+      "learning_rate": 6.4052697616060235e-06,
+      "loss": 1.9351,
+      "step": 11460
+    },
+    {
+      "epoch": 3.6,
+      "learning_rate": 6.4021329987452955e-06,
+      "loss": 1.8937,
+      "step": 11470
+    },
+    {
+      "epoch": 3.6,
+      "learning_rate": 6.3989962358845684e-06,
+      "loss": 1.8795,
+      "step": 11480
+    },
+    {
+      "epoch": 3.6,
+      "learning_rate": 6.39585947302384e-06,
+      "loss": 1.9792,
+      "step": 11490
+    },
+    {
+      "epoch": 3.61,
+      "learning_rate": 6.392722710163112e-06,
+      "loss": 1.9934,
+      "step": 11500
+    },
+    {
+      "epoch": 3.61,
+      "learning_rate": 6.3895859473023846e-06,
+      "loss": 1.955,
+      "step": 11510
+    },
+    {
+      "epoch": 3.61,
+      "learning_rate": 6.386449184441657e-06,
+      "loss": 1.9538,
+      "step": 11520
+    },
+    {
+      "epoch": 3.62,
+      "learning_rate": 6.3833124215809295e-06,
+      "loss": 1.9228,
+      "step": 11530
+    },
+    {
+      "epoch": 3.62,
+      "learning_rate": 6.3801756587202015e-06,
+      "loss": 1.8933,
+      "step": 11540
+    },
+    {
+      "epoch": 3.62,
+      "learning_rate": 6.377038895859473e-06,
+      "loss": 1.8376,
+      "step": 11550
+    },
+    {
+      "epoch": 3.63,
+      "learning_rate": 6.373902132998746e-06,
+      "loss": 1.8814,
+      "step": 11560
+    },
+    {
+      "epoch": 3.63,
+      "learning_rate": 6.370765370138018e-06,
+      "loss": 1.9261,
+      "step": 11570
+    },
+    {
+      "epoch": 3.63,
+      "learning_rate": 6.3676286072772906e-06,
+      "loss": 1.9451,
+      "step": 11580
+    },
+    {
+      "epoch": 3.64,
+      "learning_rate": 6.364491844416563e-06,
+      "loss": 1.9204,
+      "step": 11590
+    },
+    {
+      "epoch": 3.64,
+      "learning_rate": 6.361355081555835e-06,
+      "loss": 1.9825,
+      "step": 11600
+    },
+    {
+      "epoch": 3.64,
+      "learning_rate": 6.3582183186951076e-06,
+      "loss": 1.9243,
+      "step": 11610
+    },
+    {
+      "epoch": 3.64,
+      "learning_rate": 6.35508155583438e-06,
+      "loss": 1.9351,
+      "step": 11620
+    },
+    {
+      "epoch": 3.65,
+      "learning_rate": 6.3519447929736525e-06,
+      "loss": 1.8751,
+      "step": 11630
+    },
+    {
+      "epoch": 3.65,
+      "learning_rate": 6.348808030112924e-06,
+      "loss": 1.8572,
+      "step": 11640
+    },
+    {
+      "epoch": 3.65,
+      "learning_rate": 6.345671267252196e-06,
+      "loss": 1.8854,
+      "step": 11650
+    },
+    {
+      "epoch": 3.66,
+      "learning_rate": 6.342534504391469e-06,
+      "loss": 1.8833,
+      "step": 11660
+    },
+    {
+      "epoch": 3.66,
+      "learning_rate": 6.339397741530741e-06,
+      "loss": 1.8723,
+      "step": 11670
+    },
+    {
+      "epoch": 3.66,
+      "learning_rate": 6.3362609786700136e-06,
+      "loss": 1.8794,
+      "step": 11680
+    },
+    {
+      "epoch": 3.67,
+      "learning_rate": 6.333124215809286e-06,
+      "loss": 1.9154,
+      "step": 11690
+    },
+    {
+      "epoch": 3.67,
+      "learning_rate": 6.329987452948557e-06,
+      "loss": 1.8708,
+      "step": 11700
+    },
+    {
+      "epoch": 3.67,
+      "learning_rate": 6.3268506900878305e-06,
+      "loss": 1.8722,
+      "step": 11710
+    },
+    {
+      "epoch": 3.68,
+      "learning_rate": 6.323713927227102e-06,
+      "loss": 1.9407,
+      "step": 11720
+    },
+    {
+      "epoch": 3.68,
+      "learning_rate": 6.320577164366375e-06,
+      "loss": 1.9523,
+      "step": 11730
+    },
+    {
+      "epoch": 3.68,
+      "learning_rate": 6.317440401505647e-06,
+      "loss": 1.9717,
+      "step": 11740
+    },
+    {
+      "epoch": 3.69,
+      "learning_rate": 6.314303638644919e-06,
+      "loss": 1.921,
+      "step": 11750
+    },
+    {
+      "epoch": 3.69,
+      "learning_rate": 6.311166875784192e-06,
+      "loss": 1.9393,
+      "step": 11760
+    },
+    {
+      "epoch": 3.69,
+      "learning_rate": 6.308030112923464e-06,
+      "loss": 1.9194,
+      "step": 11770
+    },
+    {
+      "epoch": 3.7,
+      "learning_rate": 6.3048933500627365e-06,
+      "loss": 1.9261,
+      "step": 11780
+    },
+    {
+      "epoch": 3.7,
+      "learning_rate": 6.301756587202008e-06,
+      "loss": 1.9923,
+      "step": 11790
+    },
+    {
+      "epoch": 3.7,
+      "learning_rate": 6.29861982434128e-06,
+      "loss": 1.9222,
+      "step": 11800
+    },
+    {
+      "epoch": 3.7,
+      "learning_rate": 6.295483061480553e-06,
+      "loss": 1.8534,
+      "step": 11810
+    },
+    {
+      "epoch": 3.71,
+      "learning_rate": 6.292346298619825e-06,
+      "loss": 1.845,
+      "step": 11820
+    },
+    {
+      "epoch": 3.71,
+      "learning_rate": 6.289209535759098e-06,
+      "loss": 1.9502,
+      "step": 11830
+    },
+    {
+      "epoch": 3.71,
+      "learning_rate": 6.28607277289837e-06,
+      "loss": 1.8621,
+      "step": 11840
+    },
+    {
+      "epoch": 3.72,
+      "learning_rate": 6.282936010037642e-06,
+      "loss": 1.9693,
+      "step": 11850
+    },
+    {
+      "epoch": 3.72,
+      "learning_rate": 6.279799247176915e-06,
+      "loss": 1.9975,
+      "step": 11860
+    },
+    {
+      "epoch": 3.72,
+      "learning_rate": 6.276662484316186e-06,
+      "loss": 1.9713,
+      "step": 11870
+    },
+    {
+      "epoch": 3.73,
+      "learning_rate": 6.273525721455459e-06,
+      "loss": 1.8751,
+      "step": 11880
+    },
+    {
+      "epoch": 3.73,
+      "learning_rate": 6.270388958594731e-06,
+      "loss": 1.9712,
+      "step": 11890
+    },
+    {
+      "epoch": 3.73,
+      "learning_rate": 6.267252195734003e-06,
+      "loss": 1.8879,
+      "step": 11900
+    },
+    {
+      "epoch": 3.74,
+      "learning_rate": 6.264115432873276e-06,
+      "loss": 1.9103,
+      "step": 11910
+    },
+    {
+      "epoch": 3.74,
+      "learning_rate": 6.260978670012548e-06,
+      "loss": 1.9438,
+      "step": 11920
+    },
+    {
+      "epoch": 3.74,
+      "learning_rate": 6.257841907151821e-06,
+      "loss": 1.8902,
+      "step": 11930
+    },
+    {
+      "epoch": 3.75,
+      "learning_rate": 6.254705144291092e-06,
+      "loss": 1.9667,
+      "step": 11940
+    },
+    {
+      "epoch": 3.75,
+      "learning_rate": 6.251568381430364e-06,
+      "loss": 1.9765,
+      "step": 11950
+    },
+    {
+      "epoch": 3.75,
+      "learning_rate": 6.248431618569637e-06,
+      "loss": 1.8914,
+      "step": 11960
+    },
+    {
+      "epoch": 3.75,
+      "learning_rate": 6.245294855708909e-06,
+      "loss": 1.9268,
+      "step": 11970
+    },
+    {
+      "epoch": 3.76,
+      "learning_rate": 6.242158092848181e-06,
+      "loss": 1.9264,
+      "step": 11980
+    },
+    {
+      "epoch": 3.76,
+      "learning_rate": 6.239021329987454e-06,
+      "loss": 1.8823,
+      "step": 11990
+    },
+    {
+      "epoch": 3.76,
+      "learning_rate": 6.235884567126726e-06,
+      "loss": 1.9149,
+      "step": 12000
+    },
+    {
+      "epoch": 3.77,
+      "learning_rate": 6.232747804265999e-06,
+      "loss": 1.8905,
+      "step": 12010
+    },
+    {
+      "epoch": 3.77,
+      "learning_rate": 6.22961104140527e-06,
+      "loss": 1.8442,
+      "step": 12020
+    },
+    {
+      "epoch": 3.77,
+      "learning_rate": 6.226474278544542e-06,
+      "loss": 1.9242,
+      "step": 12030
+    },
+    {
+      "epoch": 3.78,
+      "learning_rate": 6.223337515683815e-06,
+      "loss": 1.8998,
+      "step": 12040
+    },
+    {
+      "epoch": 3.78,
+      "learning_rate": 6.220200752823087e-06,
+      "loss": 1.8563,
+      "step": 12050
+    },
+    {
+      "epoch": 3.78,
+      "learning_rate": 6.21706398996236e-06,
+      "loss": 1.9342,
+      "step": 12060
+    },
+    {
+      "epoch": 3.79,
+      "learning_rate": 6.213927227101632e-06,
+      "loss": 1.9558,
+      "step": 12070
+    },
+    {
+      "epoch": 3.79,
+      "learning_rate": 6.210790464240903e-06,
+      "loss": 1.9036,
+      "step": 12080
+    },
+    {
+      "epoch": 3.79,
+      "learning_rate": 6.207653701380177e-06,
+      "loss": 2.0094,
+      "step": 12090
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 6.204516938519448e-06,
+      "loss": 1.9639,
+      "step": 12100
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 6.201380175658721e-06,
+      "loss": 1.9426,
+      "step": 12110
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 6.198243412797993e-06,
+      "loss": 1.8982,
+      "step": 12120
+    },
+    {
+      "epoch": 3.8,
+      "learning_rate": 6.195106649937265e-06,
+      "loss": 1.9271,
+      "step": 12130
+    },
+    {
+      "epoch": 3.81,
+      "learning_rate": 6.191969887076538e-06,
+      "loss": 1.9468,
+      "step": 12140
+    },
+    {
+      "epoch": 3.81,
+      "learning_rate": 6.18883312421581e-06,
+      "loss": 1.7622,
+      "step": 12150
+    },
+    {
+      "epoch": 3.81,
+      "learning_rate": 6.185696361355083e-06,
+      "loss": 1.8988,
+      "step": 12160
+    },
+    {
+      "epoch": 3.82,
+      "learning_rate": 6.182559598494354e-06,
+      "loss": 1.8992,
+      "step": 12170
+    },
+    {
+      "epoch": 3.82,
+      "learning_rate": 6.179422835633626e-06,
+      "loss": 1.8374,
+      "step": 12180
+    },
+    {
+      "epoch": 3.82,
+      "learning_rate": 6.176286072772899e-06,
+      "loss": 1.9129,
+      "step": 12190
+    },
+    {
+      "epoch": 3.83,
+      "learning_rate": 6.173149309912171e-06,
+      "loss": 1.9074,
+      "step": 12200
+    },
+    {
+      "epoch": 3.83,
+      "learning_rate": 6.170012547051444e-06,
+      "loss": 1.8697,
+      "step": 12210
+    },
+    {
+      "epoch": 3.83,
+      "learning_rate": 6.166875784190716e-06,
+      "loss": 1.9257,
+      "step": 12220
+    },
+    {
+      "epoch": 3.84,
+      "learning_rate": 6.163739021329988e-06,
+      "loss": 1.9101,
+      "step": 12230
+    },
+    {
+      "epoch": 3.84,
+      "learning_rate": 6.160602258469261e-06,
+      "loss": 1.9156,
+      "step": 12240
+    },
+    {
+      "epoch": 3.84,
+      "learning_rate": 6.157465495608532e-06,
+      "loss": 1.9763,
+      "step": 12250
+    },
+    {
+      "epoch": 3.85,
+      "learning_rate": 6.154328732747805e-06,
+      "loss": 1.9401,
+      "step": 12260
+    },
+    {
+      "epoch": 3.85,
+      "learning_rate": 6.151191969887077e-06,
+      "loss": 1.8985,
+      "step": 12270
+    },
+    {
+      "epoch": 3.85,
+      "learning_rate": 6.148055207026349e-06,
+      "loss": 1.9099,
+      "step": 12280
+    },
+    {
+      "epoch": 3.86,
+      "learning_rate": 6.144918444165622e-06,
+      "loss": 1.9538,
+      "step": 12290
+    },
+    {
+      "epoch": 3.86,
+      "learning_rate": 6.141781681304894e-06,
+      "loss": 1.8808,
+      "step": 12300
+    },
+    {
+      "epoch": 3.86,
+      "learning_rate": 6.138644918444167e-06,
+      "loss": 1.9,
+      "step": 12310
+    },
+    {
+      "epoch": 3.86,
+      "learning_rate": 6.135508155583438e-06,
+      "loss": 1.8517,
+      "step": 12320
+    },
+    {
+      "epoch": 3.87,
+      "learning_rate": 6.13237139272271e-06,
+      "loss": 1.9548,
+      "step": 12330
+    },
+    {
+      "epoch": 3.87,
+      "learning_rate": 6.129234629861983e-06,
+      "loss": 1.9095,
+      "step": 12340
+    },
+    {
+      "epoch": 3.87,
+      "learning_rate": 6.126097867001255e-06,
+      "loss": 1.9689,
+      "step": 12350
+    },
+    {
+      "epoch": 3.88,
+      "learning_rate": 6.122961104140528e-06,
+      "loss": 1.9209,
+      "step": 12360
+    },
+    {
+      "epoch": 3.88,
+      "learning_rate": 6.1198243412798e-06,
+      "loss": 1.859,
+      "step": 12370
+    },
+    {
+      "epoch": 3.88,
+      "learning_rate": 6.116687578419072e-06,
+      "loss": 1.8979,
+      "step": 12380
+    },
+    {
+      "epoch": 3.89,
+      "learning_rate": 6.113550815558345e-06,
+      "loss": 1.929,
+      "step": 12390
+    },
+    {
+      "epoch": 3.89,
+      "learning_rate": 6.110414052697616e-06,
+      "loss": 1.9601,
+      "step": 12400
+    },
+    {
+      "epoch": 3.89,
+      "learning_rate": 6.107277289836889e-06,
+      "loss": 1.928,
+      "step": 12410
+    },
+    {
+      "epoch": 3.9,
+      "learning_rate": 6.104140526976161e-06,
+      "loss": 1.9434,
+      "step": 12420
+    },
+    {
+      "epoch": 3.9,
+      "learning_rate": 6.101003764115433e-06,
+      "loss": 1.8637,
+      "step": 12430
+    },
+    {
+      "epoch": 3.9,
+      "learning_rate": 6.097867001254706e-06,
+      "loss": 1.9568,
+      "step": 12440
+    },
+    {
+      "epoch": 3.91,
+      "learning_rate": 6.094730238393978e-06,
+      "loss": 1.9046,
+      "step": 12450
+    },
+    {
+      "epoch": 3.91,
+      "learning_rate": 6.091593475533251e-06,
+      "loss": 1.8826,
+      "step": 12460
+    },
+    {
+      "epoch": 3.91,
+      "learning_rate": 6.088456712672523e-06,
+      "loss": 1.8399,
+      "step": 12470
+    },
+    {
+      "epoch": 3.91,
+      "learning_rate": 6.085319949811794e-06,
+      "loss": 1.9354,
+      "step": 12480
+    },
+    {
+      "epoch": 3.92,
+      "learning_rate": 6.082183186951067e-06,
+      "loss": 1.9155,
+      "step": 12490
+    },
+    {
+      "epoch": 3.92,
+      "learning_rate": 6.079046424090339e-06,
+      "loss": 1.8871,
+      "step": 12500
+    },
+    {
+      "epoch": 3.92,
+      "learning_rate": 6.075909661229612e-06,
+      "loss": 1.8971,
+      "step": 12510
+    },
+    {
+      "epoch": 3.93,
+      "learning_rate": 6.072772898368884e-06,
+      "loss": 1.9115,
+      "step": 12520
+    },
+    {
+      "epoch": 3.93,
+      "learning_rate": 6.069636135508156e-06,
+      "loss": 1.8963,
+      "step": 12530
+    },
+    {
+      "epoch": 3.93,
+      "learning_rate": 6.066499372647429e-06,
+      "loss": 1.9291,
+      "step": 12540
+    },
+    {
+      "epoch": 3.94,
+      "learning_rate": 6.0633626097867e-06,
+      "loss": 1.9453,
+      "step": 12550
+    },
+    {
+      "epoch": 3.94,
+      "learning_rate": 6.060225846925973e-06,
+      "loss": 1.9084,
+      "step": 12560
+    },
+    {
+      "epoch": 3.94,
+      "learning_rate": 6.057089084065245e-06,
+      "loss": 1.9131,
+      "step": 12570
+    },
+    {
+      "epoch": 3.95,
+      "learning_rate": 6.053952321204517e-06,
+      "loss": 1.9279,
+      "step": 12580
+    },
+    {
+      "epoch": 3.95,
+      "learning_rate": 6.05081555834379e-06,
+      "loss": 1.8515,
+      "step": 12590
+    },
+    {
+      "epoch": 3.95,
+      "learning_rate": 6.047678795483062e-06,
+      "loss": 1.7713,
+      "step": 12600
+    },
+    {
+      "epoch": 3.96,
+      "learning_rate": 6.044542032622335e-06,
+      "loss": 1.9936,
+      "step": 12610
+    },
+    {
+      "epoch": 3.96,
+      "learning_rate": 6.041405269761607e-06,
+      "loss": 1.9475,
+      "step": 12620
+    },
+    {
+      "epoch": 3.96,
+      "learning_rate": 6.038268506900878e-06,
+      "loss": 1.8708,
+      "step": 12630
+    },
+    {
+      "epoch": 3.96,
+      "learning_rate": 6.035131744040151e-06,
+      "loss": 1.9751,
+      "step": 12640
+    },
+    {
+      "epoch": 3.97,
+      "learning_rate": 6.031994981179423e-06,
+      "loss": 1.908,
+      "step": 12650
+    },
+    {
+      "epoch": 3.97,
+      "learning_rate": 6.028858218318696e-06,
+      "loss": 1.8999,
+      "step": 12660
+    },
+    {
+      "epoch": 3.97,
+      "learning_rate": 6.025721455457968e-06,
+      "loss": 1.8607,
+      "step": 12670
+    },
+    {
+      "epoch": 3.98,
+      "learning_rate": 6.02258469259724e-06,
+      "loss": 1.921,
+      "step": 12680
+    },
+    {
+      "epoch": 3.98,
+      "learning_rate": 6.019447929736513e-06,
+      "loss": 1.8849,
+      "step": 12690
+    },
+    {
+      "epoch": 3.98,
+      "learning_rate": 6.016311166875784e-06,
+      "loss": 1.9265,
+      "step": 12700
+    },
+    {
+      "epoch": 3.99,
+      "learning_rate": 6.013174404015057e-06,
+      "loss": 1.9314,
+      "step": 12710
+    },
+    {
+      "epoch": 3.99,
+      "learning_rate": 6.010037641154329e-06,
+      "loss": 1.9201,
+      "step": 12720
+    },
+    {
+      "epoch": 3.99,
+      "learning_rate": 6.006900878293601e-06,
+      "loss": 1.9461,
+      "step": 12730
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 6.003764115432874e-06,
+      "loss": 1.9463,
+      "step": 12740
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 6.000627352572146e-06,
+      "loss": 1.8387,
+      "step": 12750
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 1.8398990631103516,
+      "eval_runtime": 13.6302,
+      "eval_samples_per_second": 73.366,
+      "eval_steps_per_second": 4.622,
+      "step": 12752
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 5.997490589711419e-06,
+      "loss": 1.8553,
+      "step": 12760
+    },
+    {
+      "epoch": 4.01,
+      "learning_rate": 5.994353826850691e-06,
+      "loss": 1.8496,
+      "step": 12770
+    },
+    {
+      "epoch": 4.01,
+      "learning_rate": 5.991217063989962e-06,
+      "loss": 1.9154,
+      "step": 12780
+    },
+    {
+      "epoch": 4.01,
+      "learning_rate": 5.988080301129235e-06,
+      "loss": 1.8631,
+      "step": 12790
+    },
+    {
+      "epoch": 4.02,
+      "learning_rate": 5.984943538268507e-06,
+      "loss": 1.9562,
+      "step": 12800
+    },
+    {
+      "epoch": 4.02,
+      "learning_rate": 5.98180677540778e-06,
+      "loss": 1.9106,
+      "step": 12810
+    },
+    {
+      "epoch": 4.02,
+      "learning_rate": 5.978670012547052e-06,
+      "loss": 1.9179,
+      "step": 12820
+    },
+    {
+      "epoch": 4.02,
+      "learning_rate": 5.975533249686324e-06,
+      "loss": 1.8616,
+      "step": 12830
+    },
+    {
+      "epoch": 4.03,
+      "learning_rate": 5.972396486825597e-06,
+      "loss": 1.903,
+      "step": 12840
+    },
+    {
+      "epoch": 4.03,
+      "learning_rate": 5.969259723964868e-06,
+      "loss": 1.959,
+      "step": 12850
+    },
+    {
+      "epoch": 4.03,
+      "learning_rate": 5.96612296110414e-06,
+      "loss": 1.8867,
+      "step": 12860
+    },
+    {
+      "epoch": 4.04,
+      "learning_rate": 5.962986198243413e-06,
+      "loss": 1.889,
+      "step": 12870
+    },
+    {
+      "epoch": 4.04,
+      "learning_rate": 5.959849435382685e-06,
+      "loss": 1.9415,
+      "step": 12880
+    },
+    {
+      "epoch": 4.04,
+      "learning_rate": 5.956712672521958e-06,
+      "loss": 1.9152,
+      "step": 12890
+    },
+    {
+      "epoch": 4.05,
+      "learning_rate": 5.95357590966123e-06,
+      "loss": 1.8811,
+      "step": 12900
+    },
+    {
+      "epoch": 4.05,
+      "learning_rate": 5.950439146800502e-06,
+      "loss": 1.8701,
+      "step": 12910
+    },
+    {
+      "epoch": 4.05,
+      "learning_rate": 5.947302383939775e-06,
+      "loss": 1.9534,
+      "step": 12920
+    },
+    {
+      "epoch": 4.06,
+      "learning_rate": 5.944165621079046e-06,
+      "loss": 1.8416,
+      "step": 12930
+    },
+    {
+      "epoch": 4.06,
+      "learning_rate": 5.941028858218319e-06,
+      "loss": 1.9234,
+      "step": 12940
+    },
+    {
+      "epoch": 4.06,
+      "learning_rate": 5.937892095357591e-06,
+      "loss": 1.953,
+      "step": 12950
+    },
+    {
+      "epoch": 4.07,
+      "learning_rate": 5.934755332496863e-06,
+      "loss": 1.8865,
+      "step": 12960
+    },
+    {
+      "epoch": 4.07,
+      "learning_rate": 5.931618569636136e-06,
+      "loss": 1.9868,
+      "step": 12970
+    },
+    {
+      "epoch": 4.07,
+      "learning_rate": 5.928481806775408e-06,
+      "loss": 1.9212,
+      "step": 12980
+    },
+    {
+      "epoch": 4.07,
+      "learning_rate": 5.925345043914681e-06,
+      "loss": 1.8667,
+      "step": 12990
+    },
+    {
+      "epoch": 4.08,
+      "learning_rate": 5.922208281053953e-06,
+      "loss": 1.9371,
+      "step": 13000
+    },
+    {
+      "epoch": 4.08,
+      "learning_rate": 5.919071518193224e-06,
+      "loss": 1.9271,
+      "step": 13010
+    },
+    {
+      "epoch": 4.08,
+      "learning_rate": 5.915934755332497e-06,
+      "loss": 1.9149,
+      "step": 13020
+    },
+    {
+      "epoch": 4.09,
+      "learning_rate": 5.912797992471769e-06,
+      "loss": 1.9657,
+      "step": 13030
+    },
+    {
+      "epoch": 4.09,
+      "learning_rate": 5.909661229611042e-06,
+      "loss": 1.8185,
+      "step": 13040
+    },
+    {
+      "epoch": 4.09,
+      "learning_rate": 5.906524466750314e-06,
+      "loss": 1.943,
+      "step": 13050
+    },
+    {
+      "epoch": 4.1,
+      "learning_rate": 5.903387703889586e-06,
+      "loss": 1.8899,
+      "step": 13060
+    },
+    {
+      "epoch": 4.1,
+      "learning_rate": 5.900250941028859e-06,
+      "loss": 1.8154,
+      "step": 13070
+    },
+    {
+      "epoch": 4.1,
+      "learning_rate": 5.89711417816813e-06,
+      "loss": 1.884,
+      "step": 13080
+    },
+    {
+      "epoch": 4.11,
+      "learning_rate": 5.893977415307403e-06,
+      "loss": 1.8621,
+      "step": 13090
+    },
+    {
+      "epoch": 4.11,
+      "learning_rate": 5.890840652446675e-06,
+      "loss": 1.8992,
+      "step": 13100
+    },
+    {
+      "epoch": 4.11,
+      "learning_rate": 5.887703889585947e-06,
+      "loss": 1.8549,
+      "step": 13110
+    },
+    {
+      "epoch": 4.12,
+      "learning_rate": 5.88456712672522e-06,
+      "loss": 1.8813,
+      "step": 13120
+    },
+    {
+      "epoch": 4.12,
+      "learning_rate": 5.881430363864492e-06,
+      "loss": 1.887,
+      "step": 13130
+    },
+    {
+      "epoch": 4.12,
+      "learning_rate": 5.878293601003765e-06,
+      "loss": 1.8874,
+      "step": 13140
+    },
+    {
+      "epoch": 4.12,
+      "learning_rate": 5.875156838143037e-06,
+      "loss": 1.9424,
+      "step": 13150
+    },
+    {
+      "epoch": 4.13,
+      "learning_rate": 5.872020075282308e-06,
+      "loss": 1.841,
+      "step": 13160
+    },
+    {
+      "epoch": 4.13,
+      "learning_rate": 5.868883312421581e-06,
+      "loss": 1.9826,
+      "step": 13170
+    },
+    {
+      "epoch": 4.13,
+      "learning_rate": 5.865746549560853e-06,
+      "loss": 1.9082,
+      "step": 13180
+    },
+    {
+      "epoch": 4.14,
+      "learning_rate": 5.862609786700126e-06,
+      "loss": 1.9529,
+      "step": 13190
+    },
+    {
+      "epoch": 4.14,
+      "learning_rate": 5.859473023839398e-06,
+      "loss": 1.8686,
+      "step": 13200
+    },
+    {
+      "epoch": 4.14,
+      "learning_rate": 5.85633626097867e-06,
+      "loss": 1.8908,
+      "step": 13210
+    },
+    {
+      "epoch": 4.15,
+      "learning_rate": 5.853199498117943e-06,
+      "loss": 1.8777,
+      "step": 13220
+    },
+    {
+      "epoch": 4.15,
+      "learning_rate": 5.850062735257214e-06,
+      "loss": 1.9514,
+      "step": 13230
+    },
+    {
+      "epoch": 4.15,
+      "learning_rate": 5.846925972396488e-06,
+      "loss": 1.8951,
+      "step": 13240
+    },
+    {
+      "epoch": 4.16,
+      "learning_rate": 5.843789209535759e-06,
+      "loss": 1.9109,
+      "step": 13250
+    },
+    {
+      "epoch": 4.16,
+      "learning_rate": 5.840652446675031e-06,
+      "loss": 1.9708,
+      "step": 13260
+    },
+    {
+      "epoch": 4.16,
+      "learning_rate": 5.837515683814304e-06,
+      "loss": 1.9357,
+      "step": 13270
+    },
+    {
+      "epoch": 4.17,
+      "learning_rate": 5.834378920953576e-06,
+      "loss": 1.9021,
+      "step": 13280
+    },
+    {
+      "epoch": 4.17,
+      "learning_rate": 5.831242158092849e-06,
+      "loss": 1.9638,
+      "step": 13290
+    },
+    {
+      "epoch": 4.17,
+      "learning_rate": 5.828105395232121e-06,
+      "loss": 1.8816,
+      "step": 13300
+    },
+    {
+      "epoch": 4.18,
+      "learning_rate": 5.8249686323713925e-06,
+      "loss": 1.8564,
+      "step": 13310
+    },
+    {
+      "epoch": 4.18,
+      "learning_rate": 5.821831869510665e-06,
+      "loss": 1.9092,
+      "step": 13320
+    },
+    {
+      "epoch": 4.18,
+      "learning_rate": 5.818695106649937e-06,
+      "loss": 1.9668,
+      "step": 13330
+    },
+    {
+      "epoch": 4.18,
+      "learning_rate": 5.81555834378921e-06,
+      "loss": 1.8914,
+      "step": 13340
+    },
+    {
+      "epoch": 4.19,
+      "learning_rate": 5.812421580928482e-06,
+      "loss": 1.924,
+      "step": 13350
+    },
+    {
+      "epoch": 4.19,
+      "learning_rate": 5.809284818067754e-06,
+      "loss": 1.8328,
+      "step": 13360
+    },
+    {
+      "epoch": 4.19,
+      "learning_rate": 5.806148055207027e-06,
+      "loss": 1.9482,
+      "step": 13370
+    },
+    {
+      "epoch": 4.2,
+      "learning_rate": 5.803011292346299e-06,
+      "loss": 1.8671,
+      "step": 13380
+    },
+    {
+      "epoch": 4.2,
+      "learning_rate": 5.799874529485572e-06,
+      "loss": 1.9224,
+      "step": 13390
+    },
+    {
+      "epoch": 4.2,
+      "learning_rate": 5.796737766624843e-06,
+      "loss": 1.927,
+      "step": 13400
+    },
+    {
+      "epoch": 4.21,
+      "learning_rate": 5.7936010037641154e-06,
+      "loss": 1.8817,
+      "step": 13410
+    },
+    {
+      "epoch": 4.21,
+      "learning_rate": 5.790464240903388e-06,
+      "loss": 1.8894,
+      "step": 13420
+    },
+    {
+      "epoch": 4.21,
+      "learning_rate": 5.78732747804266e-06,
+      "loss": 1.8203,
+      "step": 13430
+    },
+    {
+      "epoch": 4.22,
+      "learning_rate": 5.784190715181933e-06,
+      "loss": 1.8829,
+      "step": 13440
+    },
+    {
+      "epoch": 4.22,
+      "learning_rate": 5.781053952321205e-06,
+      "loss": 1.9016,
+      "step": 13450
+    },
+    {
+      "epoch": 4.22,
+      "learning_rate": 5.7779171894604765e-06,
+      "loss": 1.8105,
+      "step": 13460
+    },
+    {
+      "epoch": 4.23,
+      "learning_rate": 5.774780426599749e-06,
+      "loss": 1.9625,
+      "step": 13470
+    },
+    {
+      "epoch": 4.23,
+      "learning_rate": 5.7716436637390215e-06,
+      "loss": 1.9637,
+      "step": 13480
+    },
+    {
+      "epoch": 4.23,
+      "learning_rate": 5.768506900878294e-06,
+      "loss": 2.0088,
+      "step": 13490
+    },
+    {
+      "epoch": 4.23,
+      "learning_rate": 5.765370138017566e-06,
+      "loss": 1.8245,
+      "step": 13500
+    },
+    {
+      "epoch": 4.24,
+      "learning_rate": 5.7622333751568384e-06,
+      "loss": 1.9708,
+      "step": 13510
+    },
+    {
+      "epoch": 4.24,
+      "learning_rate": 5.759096612296111e-06,
+      "loss": 1.9308,
+      "step": 13520
+    },
+    {
+      "epoch": 4.24,
+      "learning_rate": 5.755959849435383e-06,
+      "loss": 1.8563,
+      "step": 13530
+    },
+    {
+      "epoch": 4.25,
+      "learning_rate": 5.752823086574656e-06,
+      "loss": 1.8595,
+      "step": 13540
+    },
+    {
+      "epoch": 4.25,
+      "learning_rate": 5.7496863237139275e-06,
+      "loss": 1.9206,
+      "step": 13550
+    },
+    {
+      "epoch": 4.25,
+      "learning_rate": 5.7465495608531995e-06,
+      "loss": 1.8673,
+      "step": 13560
+    },
+    {
+      "epoch": 4.26,
+      "learning_rate": 5.743412797992472e-06,
+      "loss": 1.9263,
+      "step": 13570
+    },
+    {
+      "epoch": 4.26,
+      "learning_rate": 5.7402760351317444e-06,
+      "loss": 1.9281,
+      "step": 13580
+    },
+    {
+      "epoch": 4.26,
+      "learning_rate": 5.737139272271017e-06,
+      "loss": 1.8446,
+      "step": 13590
+    },
+    {
+      "epoch": 4.27,
+      "learning_rate": 5.734002509410289e-06,
+      "loss": 1.9267,
+      "step": 13600
+    },
+    {
+      "epoch": 4.27,
+      "learning_rate": 5.730865746549561e-06,
+      "loss": 1.9336,
+      "step": 13610
+    },
+    {
+      "epoch": 4.27,
+      "learning_rate": 5.727728983688834e-06,
+      "loss": 1.765,
+      "step": 13620
+    },
+    {
+      "epoch": 4.28,
+      "learning_rate": 5.7245922208281055e-06,
+      "loss": 1.9527,
+      "step": 13630
+    },
+    {
+      "epoch": 4.28,
+      "learning_rate": 5.721455457967378e-06,
+      "loss": 1.9254,
+      "step": 13640
+    },
+    {
+      "epoch": 4.28,
+      "learning_rate": 5.7183186951066505e-06,
+      "loss": 1.8996,
+      "step": 13650
+    },
+    {
+      "epoch": 4.28,
+      "learning_rate": 5.7151819322459225e-06,
+      "loss": 1.8477,
+      "step": 13660
+    },
+    {
+      "epoch": 4.29,
+      "learning_rate": 5.712045169385195e-06,
+      "loss": 1.8041,
+      "step": 13670
+    },
+    {
+      "epoch": 4.29,
+      "learning_rate": 5.7089084065244674e-06,
+      "loss": 1.8327,
+      "step": 13680
+    },
+    {
+      "epoch": 4.29,
+      "learning_rate": 5.70577164366374e-06,
+      "loss": 1.8609,
+      "step": 13690
+    },
+    {
+      "epoch": 4.3,
+      "learning_rate": 5.7026348808030115e-06,
+      "loss": 1.9412,
+      "step": 13700
+    },
+    {
+      "epoch": 4.3,
+      "learning_rate": 5.6994981179422836e-06,
+      "loss": 1.8599,
+      "step": 13710
+    },
+    {
+      "epoch": 4.3,
+      "learning_rate": 5.6963613550815565e-06,
+      "loss": 1.9285,
+      "step": 13720
+    },
+    {
+      "epoch": 4.31,
+      "learning_rate": 5.6932245922208285e-06,
+      "loss": 1.8325,
+      "step": 13730
+    },
+    {
+      "epoch": 4.31,
+      "learning_rate": 5.6900878293601006e-06,
+      "loss": 1.9623,
+      "step": 13740
+    },
+    {
+      "epoch": 4.31,
+      "learning_rate": 5.6869510664993734e-06,
+      "loss": 1.8725,
+      "step": 13750
+    },
+    {
+      "epoch": 4.32,
+      "learning_rate": 5.683814303638645e-06,
+      "loss": 1.7946,
+      "step": 13760
+    },
+    {
+      "epoch": 4.32,
+      "learning_rate": 5.680677540777918e-06,
+      "loss": 2.0362,
+      "step": 13770
+    },
+    {
+      "epoch": 4.32,
+      "learning_rate": 5.67754077791719e-06,
+      "loss": 1.8396,
+      "step": 13780
+    },
+    {
+      "epoch": 4.33,
+      "learning_rate": 5.674404015056462e-06,
+      "loss": 1.9147,
+      "step": 13790
+    },
+    {
+      "epoch": 4.33,
+      "learning_rate": 5.6712672521957345e-06,
+      "loss": 1.9386,
+      "step": 13800
+    },
+    {
+      "epoch": 4.33,
+      "learning_rate": 5.6681304893350066e-06,
+      "loss": 1.8763,
+      "step": 13810
+    },
+    {
+      "epoch": 4.34,
+      "learning_rate": 5.6649937264742795e-06,
+      "loss": 1.8439,
+      "step": 13820
+    },
+    {
+      "epoch": 4.34,
+      "learning_rate": 5.6618569636135515e-06,
+      "loss": 1.9102,
+      "step": 13830
+    },
+    {
+      "epoch": 4.34,
+      "learning_rate": 5.658720200752823e-06,
+      "loss": 1.9619,
+      "step": 13840
+    },
+    {
+      "epoch": 4.34,
+      "learning_rate": 5.655583437892096e-06,
+      "loss": 1.879,
+      "step": 13850
+    },
+    {
+      "epoch": 4.35,
+      "learning_rate": 5.652446675031368e-06,
+      "loss": 1.9166,
+      "step": 13860
+    },
+    {
+      "epoch": 4.35,
+      "learning_rate": 5.6493099121706405e-06,
+      "loss": 1.809,
+      "step": 13870
+    },
+    {
+      "epoch": 4.35,
+      "learning_rate": 5.6461731493099126e-06,
+      "loss": 1.8136,
+      "step": 13880
+    },
+    {
+      "epoch": 4.36,
+      "learning_rate": 5.643036386449185e-06,
+      "loss": 1.8419,
+      "step": 13890
+    },
+    {
+      "epoch": 4.36,
+      "learning_rate": 5.6398996235884575e-06,
+      "loss": 1.9239,
+      "step": 13900
+    },
+    {
+      "epoch": 4.36,
+      "learning_rate": 5.6367628607277296e-06,
+      "loss": 1.9159,
+      "step": 13910
+    },
+    {
+      "epoch": 4.37,
+      "learning_rate": 5.6336260978670024e-06,
+      "loss": 1.8297,
+      "step": 13920
+    },
+    {
+      "epoch": 4.37,
+      "learning_rate": 5.630489335006274e-06,
+      "loss": 1.9141,
+      "step": 13930
+    },
+    {
+      "epoch": 4.37,
+      "learning_rate": 5.627352572145546e-06,
+      "loss": 1.8885,
+      "step": 13940
+    },
+    {
+      "epoch": 4.38,
+      "learning_rate": 5.624215809284819e-06,
+      "loss": 1.9794,
+      "step": 13950
+    },
+    {
+      "epoch": 4.38,
+      "learning_rate": 5.621079046424091e-06,
+      "loss": 1.9734,
+      "step": 13960
+    },
+    {
+      "epoch": 4.38,
+      "learning_rate": 5.6179422835633635e-06,
+      "loss": 1.8771,
+      "step": 13970
+    },
+    {
+      "epoch": 4.39,
+      "learning_rate": 5.6148055207026356e-06,
+      "loss": 1.873,
+      "step": 13980
+    },
+    {
+      "epoch": 4.39,
+      "learning_rate": 5.611668757841907e-06,
+      "loss": 1.9766,
+      "step": 13990
+    },
+    {
+      "epoch": 4.39,
+      "learning_rate": 5.60853199498118e-06,
+      "loss": 1.8123,
+      "step": 14000
+    },
+    {
+      "epoch": 4.39,
+      "learning_rate": 5.605395232120452e-06,
+      "loss": 1.873,
+      "step": 14010
+    },
+    {
+      "epoch": 4.4,
+      "learning_rate": 5.602258469259725e-06,
+      "loss": 1.8561,
+      "step": 14020
+    },
+    {
+      "epoch": 4.4,
+      "learning_rate": 5.599121706398997e-06,
+      "loss": 1.9332,
+      "step": 14030
+    },
+    {
+      "epoch": 4.4,
+      "learning_rate": 5.595984943538269e-06,
+      "loss": 1.897,
+      "step": 14040
+    },
+    {
+      "epoch": 4.41,
+      "learning_rate": 5.5928481806775416e-06,
+      "loss": 1.887,
+      "step": 14050
+    },
+    {
+      "epoch": 4.41,
+      "learning_rate": 5.589711417816814e-06,
+      "loss": 1.956,
+      "step": 14060
+    },
+    {
+      "epoch": 4.41,
+      "learning_rate": 5.5865746549560865e-06,
+      "loss": 1.9316,
+      "step": 14070
+    },
+    {
+      "epoch": 4.42,
+      "learning_rate": 5.583437892095358e-06,
+      "loss": 1.8974,
+      "step": 14080
+    },
+    {
+      "epoch": 4.42,
+      "learning_rate": 5.58030112923463e-06,
+      "loss": 1.9333,
+      "step": 14090
+    },
+    {
+      "epoch": 4.42,
+      "learning_rate": 5.577164366373903e-06,
+      "loss": 1.896,
+      "step": 14100
+    },
+    {
+      "epoch": 4.43,
+      "learning_rate": 5.574027603513175e-06,
+      "loss": 1.8135,
+      "step": 14110
+    },
+    {
+      "epoch": 4.43,
+      "learning_rate": 5.5708908406524476e-06,
+      "loss": 1.8435,
+      "step": 14120
+    },
+    {
+      "epoch": 4.43,
+      "learning_rate": 5.56775407779172e-06,
+      "loss": 1.8622,
+      "step": 14130
+    },
+    {
+      "epoch": 4.44,
+      "learning_rate": 5.564617314930991e-06,
+      "loss": 1.9146,
+      "step": 14140
+    },
+    {
+      "epoch": 4.44,
+      "learning_rate": 5.5614805520702646e-06,
+      "loss": 1.8822,
+      "step": 14150
+    },
+    {
+      "epoch": 4.44,
+      "learning_rate": 5.558343789209536e-06,
+      "loss": 1.9138,
+      "step": 14160
+    },
+    {
+      "epoch": 4.44,
+      "learning_rate": 5.555207026348809e-06,
+      "loss": 1.8841,
+      "step": 14170
+    },
+    {
+      "epoch": 4.45,
+      "learning_rate": 5.552070263488081e-06,
+      "loss": 1.9401,
+      "step": 14180
+    },
+    {
+      "epoch": 4.45,
+      "learning_rate": 5.548933500627353e-06,
+      "loss": 1.8915,
+      "step": 14190
+    },
+    {
+      "epoch": 4.45,
+      "learning_rate": 5.545796737766626e-06,
+      "loss": 1.8691,
+      "step": 14200
+    },
+    {
+      "epoch": 4.46,
+      "learning_rate": 5.542659974905898e-06,
+      "loss": 1.8973,
+      "step": 14210
+    },
+    {
+      "epoch": 4.46,
+      "learning_rate": 5.5395232120451706e-06,
+      "loss": 1.9184,
+      "step": 14220
+    },
+    {
+      "epoch": 4.46,
+      "learning_rate": 5.536386449184442e-06,
+      "loss": 1.913,
+      "step": 14230
+    },
+    {
+      "epoch": 4.47,
+      "learning_rate": 5.533249686323714e-06,
+      "loss": 1.8781,
+      "step": 14240
+    },
+    {
+      "epoch": 4.47,
+      "learning_rate": 5.530112923462987e-06,
+      "loss": 1.9734,
+      "step": 14250
+    },
+    {
+      "epoch": 4.47,
+      "learning_rate": 5.526976160602259e-06,
+      "loss": 1.8681,
+      "step": 14260
+    },
+    {
+      "epoch": 4.48,
+      "learning_rate": 5.523839397741532e-06,
+      "loss": 1.848,
+      "step": 14270
+    },
+    {
+      "epoch": 4.48,
+      "learning_rate": 5.520702634880804e-06,
+      "loss": 1.8625,
+      "step": 14280
+    },
+    {
+      "epoch": 4.48,
+      "learning_rate": 5.517565872020076e-06,
+      "loss": 1.9166,
+      "step": 14290
+    },
+    {
+      "epoch": 4.49,
+      "learning_rate": 5.514429109159349e-06,
+      "loss": 1.8398,
+      "step": 14300
+    },
+    {
+      "epoch": 4.49,
+      "learning_rate": 5.51129234629862e-06,
+      "loss": 1.867,
+      "step": 14310
+    },
+    {
+      "epoch": 4.49,
+      "learning_rate": 5.508155583437893e-06,
+      "loss": 1.9392,
+      "step": 14320
+    },
+    {
+      "epoch": 4.49,
+      "learning_rate": 5.505018820577165e-06,
+      "loss": 1.9181,
+      "step": 14330
+    },
+    {
+      "epoch": 4.5,
+      "learning_rate": 5.501882057716437e-06,
+      "loss": 1.904,
+      "step": 14340
+    },
+    {
+      "epoch": 4.5,
+      "learning_rate": 5.49874529485571e-06,
+      "loss": 1.8688,
+      "step": 14350
+    },
+    {
+      "epoch": 4.5,
+      "learning_rate": 5.495608531994982e-06,
+      "loss": 1.8458,
+      "step": 14360
+    },
+    {
+      "epoch": 4.51,
+      "learning_rate": 5.492471769134255e-06,
+      "loss": 1.8275,
+      "step": 14370
+    },
+    {
+      "epoch": 4.51,
+      "learning_rate": 5.489335006273526e-06,
+      "loss": 1.9024,
+      "step": 14380
+    },
+    {
+      "epoch": 4.51,
+      "learning_rate": 5.486198243412798e-06,
+      "loss": 1.8987,
+      "step": 14390
+    },
+    {
+      "epoch": 4.52,
+      "learning_rate": 5.483061480552071e-06,
+      "loss": 1.8981,
+      "step": 14400
+    },
+    {
+      "epoch": 4.52,
+      "learning_rate": 5.479924717691343e-06,
+      "loss": 1.9143,
+      "step": 14410
+    },
+    {
+      "epoch": 4.52,
+      "learning_rate": 5.476787954830616e-06,
+      "loss": 1.8899,
+      "step": 14420
+    },
+    {
+      "epoch": 4.53,
+      "learning_rate": 5.473651191969888e-06,
+      "loss": 1.8701,
+      "step": 14430
+    },
+    {
+      "epoch": 4.53,
+      "learning_rate": 5.47051442910916e-06,
+      "loss": 1.927,
+      "step": 14440
+    },
+    {
+      "epoch": 4.53,
+      "learning_rate": 5.467377666248433e-06,
+      "loss": 1.8568,
+      "step": 14450
+    },
+    {
+      "epoch": 4.54,
+      "learning_rate": 5.464240903387704e-06,
+      "loss": 1.9297,
+      "step": 14460
+    },
+    {
+      "epoch": 4.54,
+      "learning_rate": 5.461104140526977e-06,
+      "loss": 1.9036,
+      "step": 14470
+    },
+    {
+      "epoch": 4.54,
+      "learning_rate": 5.457967377666249e-06,
+      "loss": 1.8501,
+      "step": 14480
+    },
+    {
+      "epoch": 4.55,
+      "learning_rate": 5.454830614805521e-06,
+      "loss": 1.8453,
+      "step": 14490
+    },
+    {
+      "epoch": 4.55,
+      "learning_rate": 5.451693851944794e-06,
+      "loss": 1.8577,
+      "step": 14500
+    },
+    {
+      "epoch": 4.55,
+      "learning_rate": 5.448557089084066e-06,
+      "loss": 1.8766,
+      "step": 14510
+    },
+    {
+      "epoch": 4.55,
+      "learning_rate": 5.445420326223339e-06,
+      "loss": 1.9118,
+      "step": 14520
+    },
+    {
+      "epoch": 4.56,
+      "learning_rate": 5.442283563362611e-06,
+      "loss": 1.9961,
+      "step": 14530
+    },
+    {
+      "epoch": 4.56,
+      "learning_rate": 5.439146800501882e-06,
+      "loss": 1.8682,
+      "step": 14540
+    },
+    {
+      "epoch": 4.56,
+      "learning_rate": 5.436010037641155e-06,
+      "loss": 1.8594,
+      "step": 14550
+    },
+    {
+      "epoch": 4.57,
+      "learning_rate": 5.432873274780427e-06,
+      "loss": 1.861,
+      "step": 14560
+    },
+    {
+      "epoch": 4.57,
+      "learning_rate": 5.4297365119197e-06,
+      "loss": 1.9484,
+      "step": 14570
+    },
+    {
+      "epoch": 4.57,
+      "learning_rate": 5.426599749058972e-06,
+      "loss": 1.8979,
+      "step": 14580
+    },
+    {
+      "epoch": 4.58,
+      "learning_rate": 5.423462986198244e-06,
+      "loss": 1.8477,
+      "step": 14590
+    },
+    {
+      "epoch": 4.58,
+      "learning_rate": 5.420326223337517e-06,
+      "loss": 1.926,
+      "step": 14600
+    },
+    {
+      "epoch": 4.58,
+      "learning_rate": 5.417189460476788e-06,
+      "loss": 1.9283,
+      "step": 14610
+    },
+    {
+      "epoch": 4.59,
+      "learning_rate": 5.41405269761606e-06,
+      "loss": 1.8808,
+      "step": 14620
+    },
+    {
+      "epoch": 4.59,
+      "learning_rate": 5.410915934755333e-06,
+      "loss": 1.9147,
+      "step": 14630
+    },
+    {
+      "epoch": 4.59,
+      "learning_rate": 5.407779171894605e-06,
+      "loss": 1.8446,
+      "step": 14640
+    },
+    {
+      "epoch": 4.6,
+      "learning_rate": 5.404642409033878e-06,
+      "loss": 1.8838,
+      "step": 14650
+    },
+    {
+      "epoch": 4.6,
+      "learning_rate": 5.40150564617315e-06,
+      "loss": 1.9644,
+      "step": 14660
+    },
+    {
+      "epoch": 4.6,
+      "learning_rate": 5.398368883312422e-06,
+      "loss": 1.96,
+      "step": 14670
+    },
+    {
+      "epoch": 4.6,
+      "learning_rate": 5.395232120451695e-06,
+      "loss": 1.8574,
+      "step": 14680
+    },
+    {
+      "epoch": 4.61,
+      "learning_rate": 5.392095357590966e-06,
+      "loss": 1.8522,
+      "step": 14690
+    },
+    {
+      "epoch": 4.61,
+      "learning_rate": 5.388958594730239e-06,
+      "loss": 1.902,
+      "step": 14700
+    },
+    {
+      "epoch": 4.61,
+      "learning_rate": 5.385821831869511e-06,
+      "loss": 1.8718,
+      "step": 14710
+    },
+    {
+      "epoch": 4.62,
+      "learning_rate": 5.382685069008783e-06,
+      "loss": 1.8183,
+      "step": 14720
+    },
+    {
+      "epoch": 4.62,
+      "learning_rate": 5.379548306148056e-06,
+      "loss": 1.91,
+      "step": 14730
+    },
+    {
+      "epoch": 4.62,
+      "learning_rate": 5.376411543287328e-06,
+      "loss": 1.8759,
+      "step": 14740
+    },
+    {
+      "epoch": 4.63,
+      "learning_rate": 5.373274780426601e-06,
+      "loss": 1.9229,
+      "step": 14750
+    },
+    {
+      "epoch": 4.63,
+      "learning_rate": 5.370138017565872e-06,
+      "loss": 1.8429,
+      "step": 14760
+    },
+    {
+      "epoch": 4.63,
+      "learning_rate": 5.367001254705144e-06,
+      "loss": 1.9231,
+      "step": 14770
+    },
+    {
+      "epoch": 4.64,
+      "learning_rate": 5.363864491844417e-06,
+      "loss": 1.8604,
+      "step": 14780
+    },
+    {
+      "epoch": 4.64,
+      "learning_rate": 5.360727728983689e-06,
+      "loss": 1.8929,
+      "step": 14790
+    },
+    {
+      "epoch": 4.64,
+      "learning_rate": 5.357590966122962e-06,
+      "loss": 1.8052,
+      "step": 14800
+    },
+    {
+      "epoch": 4.65,
+      "learning_rate": 5.354454203262234e-06,
+      "loss": 1.9085,
+      "step": 14810
+    },
+    {
+      "epoch": 4.65,
+      "learning_rate": 5.351317440401506e-06,
+      "loss": 1.902,
+      "step": 14820
+    },
+    {
+      "epoch": 4.65,
+      "learning_rate": 5.348180677540779e-06,
+      "loss": 1.8918,
+      "step": 14830
+    },
+    {
+      "epoch": 4.65,
+      "learning_rate": 5.34504391468005e-06,
+      "loss": 1.883,
+      "step": 14840
+    },
+    {
+      "epoch": 4.66,
+      "learning_rate": 5.341907151819323e-06,
+      "loss": 1.8666,
+      "step": 14850
+    },
+    {
+      "epoch": 4.66,
+      "learning_rate": 5.338770388958595e-06,
+      "loss": 1.8527,
+      "step": 14860
+    },
+    {
+      "epoch": 4.66,
+      "learning_rate": 5.335633626097867e-06,
+      "loss": 1.8875,
+      "step": 14870
+    },
+    {
+      "epoch": 4.67,
+      "learning_rate": 5.33249686323714e-06,
+      "loss": 1.9765,
+      "step": 14880
+    },
+    {
+      "epoch": 4.67,
+      "learning_rate": 5.329360100376412e-06,
+      "loss": 1.7983,
+      "step": 14890
+    },
+    {
+      "epoch": 4.67,
+      "learning_rate": 5.326223337515685e-06,
+      "loss": 1.895,
+      "step": 14900
+    },
+    {
+      "epoch": 4.68,
+      "learning_rate": 5.323086574654956e-06,
+      "loss": 1.9591,
+      "step": 14910
+    },
+    {
+      "epoch": 4.68,
+      "learning_rate": 5.319949811794228e-06,
+      "loss": 1.9159,
+      "step": 14920
+    },
+    {
+      "epoch": 4.68,
+      "learning_rate": 5.316813048933501e-06,
+      "loss": 1.8443,
+      "step": 14930
+    },
+    {
+      "epoch": 4.69,
+      "learning_rate": 5.313676286072773e-06,
+      "loss": 1.9166,
+      "step": 14940
+    },
+    {
+      "epoch": 4.69,
+      "learning_rate": 5.310539523212046e-06,
+      "loss": 1.8883,
+      "step": 14950
+    },
+    {
+      "epoch": 4.69,
+      "learning_rate": 5.307402760351318e-06,
+      "loss": 1.9,
+      "step": 14960
+    },
+    {
+      "epoch": 4.7,
+      "learning_rate": 5.30426599749059e-06,
+      "loss": 1.7444,
+      "step": 14970
+    },
+    {
+      "epoch": 4.7,
+      "learning_rate": 5.301129234629863e-06,
+      "loss": 1.8834,
+      "step": 14980
+    },
+    {
+      "epoch": 4.7,
+      "learning_rate": 5.297992471769134e-06,
+      "loss": 1.8931,
+      "step": 14990
+    },
+    {
+      "epoch": 4.71,
+      "learning_rate": 5.294855708908407e-06,
+      "loss": 1.8867,
+      "step": 15000
+    },
+    {
+      "epoch": 4.71,
+      "learning_rate": 5.291718946047679e-06,
+      "loss": 1.8816,
+      "step": 15010
+    },
+    {
+      "epoch": 4.71,
+      "learning_rate": 5.288582183186951e-06,
+      "loss": 1.8777,
+      "step": 15020
+    },
+    {
+      "epoch": 4.71,
+      "learning_rate": 5.285445420326224e-06,
+      "loss": 1.9305,
+      "step": 15030
+    },
+    {
+      "epoch": 4.72,
+      "learning_rate": 5.282308657465496e-06,
+      "loss": 1.9274,
+      "step": 15040
+    },
+    {
+      "epoch": 4.72,
+      "learning_rate": 5.279171894604769e-06,
+      "loss": 1.8658,
+      "step": 15050
+    },
+    {
+      "epoch": 4.72,
+      "learning_rate": 5.276035131744041e-06,
+      "loss": 1.9291,
+      "step": 15060
+    },
+    {
+      "epoch": 4.73,
+      "learning_rate": 5.272898368883312e-06,
+      "loss": 1.8839,
+      "step": 15070
+    },
+    {
+      "epoch": 4.73,
+      "learning_rate": 5.269761606022585e-06,
+      "loss": 1.8327,
+      "step": 15080
+    },
+    {
+      "epoch": 4.73,
+      "learning_rate": 5.266624843161857e-06,
+      "loss": 1.8446,
+      "step": 15090
+    },
+    {
+      "epoch": 4.74,
+      "learning_rate": 5.26348808030113e-06,
+      "loss": 1.8522,
+      "step": 15100
+    },
+    {
+      "epoch": 4.74,
+      "learning_rate": 5.260351317440402e-06,
+      "loss": 1.7783,
+      "step": 15110
+    },
+    {
+      "epoch": 4.74,
+      "learning_rate": 5.257214554579674e-06,
+      "loss": 1.8502,
+      "step": 15120
+    },
+    {
+      "epoch": 4.75,
+      "learning_rate": 5.254077791718947e-06,
+      "loss": 1.8753,
+      "step": 15130
+    },
+    {
+      "epoch": 4.75,
+      "learning_rate": 5.250941028858218e-06,
+      "loss": 1.9161,
+      "step": 15140
+    },
+    {
+      "epoch": 4.75,
+      "learning_rate": 5.247804265997491e-06,
+      "loss": 1.9177,
+      "step": 15150
+    },
+    {
+      "epoch": 4.76,
+      "learning_rate": 5.244667503136763e-06,
+      "loss": 1.8746,
+      "step": 15160
+    },
+    {
+      "epoch": 4.76,
+      "learning_rate": 5.241530740276035e-06,
+      "loss": 1.9254,
+      "step": 15170
+    },
+    {
+      "epoch": 4.76,
+      "learning_rate": 5.238393977415308e-06,
+      "loss": 1.8629,
+      "step": 15180
+    },
+    {
+      "epoch": 4.76,
+      "learning_rate": 5.23525721455458e-06,
+      "loss": 1.9017,
+      "step": 15190
+    },
+    {
+      "epoch": 4.77,
+      "learning_rate": 5.232120451693853e-06,
+      "loss": 1.8562,
+      "step": 15200
+    },
+    {
+      "epoch": 4.77,
+      "learning_rate": 5.228983688833125e-06,
+      "loss": 1.9271,
+      "step": 15210
+    },
+    {
+      "epoch": 4.77,
+      "learning_rate": 5.225846925972396e-06,
+      "loss": 1.8436,
+      "step": 15220
+    },
+    {
+      "epoch": 4.78,
+      "learning_rate": 5.222710163111669e-06,
+      "loss": 1.9025,
+      "step": 15230
+    },
+    {
+      "epoch": 4.78,
+      "learning_rate": 5.219573400250941e-06,
+      "loss": 1.9184,
+      "step": 15240
+    },
+    {
+      "epoch": 4.78,
+      "learning_rate": 5.216436637390214e-06,
+      "loss": 1.9348,
+      "step": 15250
+    },
+    {
+      "epoch": 4.79,
+      "learning_rate": 5.213299874529486e-06,
+      "loss": 1.8559,
+      "step": 15260
+    },
+    {
+      "epoch": 4.79,
+      "learning_rate": 5.210163111668758e-06,
+      "loss": 1.833,
+      "step": 15270
+    },
+    {
+      "epoch": 4.79,
+      "learning_rate": 5.207026348808031e-06,
+      "loss": 1.8749,
+      "step": 15280
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 5.203889585947302e-06,
+      "loss": 1.8664,
+      "step": 15290
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 5.200752823086576e-06,
+      "loss": 1.9232,
+      "step": 15300
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 5.197616060225847e-06,
+      "loss": 1.9121,
+      "step": 15310
+    },
+    {
+      "epoch": 4.81,
+      "learning_rate": 5.194479297365119e-06,
+      "loss": 1.9184,
+      "step": 15320
+    },
+    {
+      "epoch": 4.81,
+      "learning_rate": 5.191342534504392e-06,
+      "loss": 1.865,
+      "step": 15330
+    },
+    {
+      "epoch": 4.81,
+      "learning_rate": 5.188205771643664e-06,
+      "loss": 1.9039,
+      "step": 15340
+    },
+    {
+      "epoch": 4.81,
+      "learning_rate": 5.185069008782937e-06,
+      "loss": 1.8667,
+      "step": 15350
+    },
+    {
+      "epoch": 4.82,
+      "learning_rate": 5.181932245922209e-06,
+      "loss": 1.8866,
+      "step": 15360
+    },
+    {
+      "epoch": 4.82,
+      "learning_rate": 5.17879548306148e-06,
+      "loss": 1.8476,
+      "step": 15370
+    },
+    {
+      "epoch": 4.82,
+      "learning_rate": 5.175658720200753e-06,
+      "loss": 1.8192,
+      "step": 15380
+    },
+    {
+      "epoch": 4.83,
+      "learning_rate": 5.172521957340025e-06,
+      "loss": 1.8782,
+      "step": 15390
+    },
+    {
+      "epoch": 4.83,
+      "learning_rate": 5.169385194479298e-06,
+      "loss": 1.9038,
+      "step": 15400
+    },
+    {
+      "epoch": 4.83,
+      "learning_rate": 5.16624843161857e-06,
+      "loss": 1.9254,
+      "step": 15410
+    },
+    {
+      "epoch": 4.84,
+      "learning_rate": 5.163111668757842e-06,
+      "loss": 1.9146,
+      "step": 15420
+    },
+    {
+      "epoch": 4.84,
+      "learning_rate": 5.159974905897115e-06,
+      "loss": 1.8519,
+      "step": 15430
+    },
+    {
+      "epoch": 4.84,
+      "learning_rate": 5.156838143036387e-06,
+      "loss": 1.8355,
+      "step": 15440
+    },
+    {
+      "epoch": 4.85,
+      "learning_rate": 5.15370138017566e-06,
+      "loss": 1.8274,
+      "step": 15450
+    },
+    {
+      "epoch": 4.85,
+      "learning_rate": 5.150564617314931e-06,
+      "loss": 1.9244,
+      "step": 15460
+    },
+    {
+      "epoch": 4.85,
+      "learning_rate": 5.147427854454203e-06,
+      "loss": 1.912,
+      "step": 15470
+    },
+    {
+      "epoch": 4.86,
+      "learning_rate": 5.144291091593476e-06,
+      "loss": 1.9544,
+      "step": 15480
+    },
+    {
+      "epoch": 4.86,
+      "learning_rate": 5.141154328732748e-06,
+      "loss": 1.8746,
+      "step": 15490
+    },
+    {
+      "epoch": 4.86,
+      "learning_rate": 5.13801756587202e-06,
+      "loss": 1.906,
+      "step": 15500
+    },
+    {
+      "epoch": 4.87,
+      "learning_rate": 5.134880803011293e-06,
+      "loss": 1.9023,
+      "step": 15510
+    },
+    {
+      "epoch": 4.87,
+      "learning_rate": 5.131744040150564e-06,
+      "loss": 1.9178,
+      "step": 15520
+    },
+    {
+      "epoch": 4.87,
+      "learning_rate": 5.128607277289837e-06,
+      "loss": 1.8647,
+      "step": 15530
+    },
+    {
+      "epoch": 4.87,
+      "learning_rate": 5.125470514429109e-06,
+      "loss": 1.9327,
+      "step": 15540
+    },
+    {
+      "epoch": 4.88,
+      "learning_rate": 5.122333751568381e-06,
+      "loss": 1.8794,
+      "step": 15550
+    },
+    {
+      "epoch": 4.88,
+      "learning_rate": 5.119196988707654e-06,
+      "loss": 1.975,
+      "step": 15560
+    },
+    {
+      "epoch": 4.88,
+      "learning_rate": 5.116060225846926e-06,
+      "loss": 1.9128,
+      "step": 15570
+    },
+    {
+      "epoch": 4.89,
+      "learning_rate": 5.112923462986199e-06,
+      "loss": 1.9147,
+      "step": 15580
+    },
+    {
+      "epoch": 4.89,
+      "learning_rate": 5.109786700125471e-06,
+      "loss": 1.9159,
+      "step": 15590
+    },
+    {
+      "epoch": 4.89,
+      "learning_rate": 5.106649937264742e-06,
+      "loss": 1.9288,
+      "step": 15600
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 5.103513174404015e-06,
+      "loss": 1.8773,
+      "step": 15610
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 5.100376411543287e-06,
+      "loss": 1.8637,
+      "step": 15620
+    },
+    {
+      "epoch": 4.9,
+      "learning_rate": 5.09723964868256e-06,
+      "loss": 1.9168,
+      "step": 15630
+    },
+    {
+      "epoch": 4.91,
+      "learning_rate": 5.094102885821832e-06,
+      "loss": 1.8844,
+      "step": 15640
+    },
+    {
+      "epoch": 4.91,
+      "learning_rate": 5.090966122961104e-06,
+      "loss": 1.9208,
+      "step": 15650
+    },
+    {
+      "epoch": 4.91,
+      "learning_rate": 5.087829360100377e-06,
+      "loss": 1.8717,
+      "step": 15660
+    },
+    {
+      "epoch": 4.92,
+      "learning_rate": 5.084692597239648e-06,
+      "loss": 1.9348,
+      "step": 15670
+    },
+    {
+      "epoch": 4.92,
+      "learning_rate": 5.081555834378922e-06,
+      "loss": 1.8565,
+      "step": 15680
+    },
+    {
+      "epoch": 4.92,
+      "learning_rate": 5.078419071518193e-06,
+      "loss": 1.8659,
+      "step": 15690
+    },
+    {
+      "epoch": 4.92,
+      "learning_rate": 5.075282308657465e-06,
+      "loss": 1.8334,
+      "step": 15700
+    },
+    {
+      "epoch": 4.93,
+      "learning_rate": 5.072145545796738e-06,
+      "loss": 1.8489,
+      "step": 15710
+    },
+    {
+      "epoch": 4.93,
+      "learning_rate": 5.06900878293601e-06,
+      "loss": 1.8754,
+      "step": 15720
+    },
+    {
+      "epoch": 4.93,
+      "learning_rate": 5.065872020075283e-06,
+      "loss": 1.901,
+      "step": 15730
+    },
+    {
+      "epoch": 4.94,
+      "learning_rate": 5.062735257214555e-06,
+      "loss": 1.8595,
+      "step": 15740
+    },
+    {
+      "epoch": 4.94,
+      "learning_rate": 5.0595984943538265e-06,
+      "loss": 1.7944,
+      "step": 15750
+    },
+    {
+      "epoch": 4.94,
+      "learning_rate": 5.056461731493099e-06,
+      "loss": 1.8644,
+      "step": 15760
+    },
+    {
+      "epoch": 4.95,
+      "learning_rate": 5.053324968632371e-06,
+      "loss": 1.8543,
+      "step": 15770
+    },
+    {
+      "epoch": 4.95,
+      "learning_rate": 5.050188205771644e-06,
+      "loss": 1.8879,
+      "step": 15780
+    },
+    {
+      "epoch": 4.95,
+      "learning_rate": 5.047051442910916e-06,
+      "loss": 1.9381,
+      "step": 15790
+    },
+    {
+      "epoch": 4.96,
+      "learning_rate": 5.043914680050188e-06,
+      "loss": 1.9313,
+      "step": 15800
+    },
+    {
+      "epoch": 4.96,
+      "learning_rate": 5.040777917189461e-06,
+      "loss": 1.8585,
+      "step": 15810
+    },
+    {
+      "epoch": 4.96,
+      "learning_rate": 5.037641154328733e-06,
+      "loss": 1.9605,
+      "step": 15820
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 5.034504391468006e-06,
+      "loss": 1.8209,
+      "step": 15830
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 5.031367628607277e-06,
+      "loss": 1.7861,
+      "step": 15840
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 5.0282308657465495e-06,
+      "loss": 1.8689,
+      "step": 15850
+    },
+    {
+      "epoch": 4.97,
+      "learning_rate": 5.025094102885822e-06,
+      "loss": 1.8678,
+      "step": 15860
+    },
+    {
+      "epoch": 4.98,
+      "learning_rate": 5.021957340025094e-06,
+      "loss": 1.981,
+      "step": 15870
+    },
+    {
+      "epoch": 4.98,
+      "learning_rate": 5.018820577164367e-06,
+      "loss": 1.769,
+      "step": 15880
+    },
+    {
+      "epoch": 4.98,
+      "learning_rate": 5.015683814303639e-06,
+      "loss": 1.8633,
+      "step": 15890
+    },
+    {
+      "epoch": 4.99,
+      "learning_rate": 5.0125470514429105e-06,
+      "loss": 2.0299,
+      "step": 15900
+    },
+    {
+      "epoch": 4.99,
+      "learning_rate": 5.0094102885821834e-06,
+      "loss": 1.9424,
+      "step": 15910
+    },
+    {
+      "epoch": 4.99,
+      "learning_rate": 5.0062735257214555e-06,
+      "loss": 1.9456,
+      "step": 15920
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 5.003136762860728e-06,
+      "loss": 1.8269,
+      "step": 15930
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 5e-06,
+      "loss": 1.8382,
+      "step": 15940
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 1.828749418258667,
+      "eval_runtime": 13.6137,
+      "eval_samples_per_second": 73.455,
+      "eval_steps_per_second": 4.628,
+      "step": 15940
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 4.9968632371392725e-06,
+      "loss": 1.9341,
+      "step": 15950
+    },
+    {
+      "epoch": 5.01,
+      "learning_rate": 4.993726474278545e-06,
+      "loss": 1.8757,
+      "step": 15960
+    },
+    {
+      "epoch": 5.01,
+      "learning_rate": 4.990589711417817e-06,
+      "loss": 1.8885,
+      "step": 15970
+    },
+    {
+      "epoch": 5.01,
+      "learning_rate": 4.9874529485570894e-06,
+      "loss": 1.806,
+      "step": 15980
+    },
+    {
+      "epoch": 5.02,
+      "learning_rate": 4.9843161856963615e-06,
+      "loss": 1.8617,
+      "step": 15990
+    },
+    {
+      "epoch": 5.02,
+      "learning_rate": 4.981179422835634e-06,
+      "loss": 1.8927,
+      "step": 16000
+    },
+    {
+      "epoch": 5.02,
+      "learning_rate": 4.978042659974906e-06,
+      "loss": 1.8202,
+      "step": 16010
+    },
+    {
+      "epoch": 5.03,
+      "learning_rate": 4.9749058971141785e-06,
+      "loss": 1.898,
+      "step": 16020
+    },
+    {
+      "epoch": 5.03,
+      "learning_rate": 4.9717691342534505e-06,
+      "loss": 1.8298,
+      "step": 16030
+    },
+    {
+      "epoch": 5.03,
+      "learning_rate": 4.968632371392723e-06,
+      "loss": 1.8598,
+      "step": 16040
+    },
+    {
+      "epoch": 5.03,
+      "learning_rate": 4.9654956085319954e-06,
+      "loss": 1.9222,
+      "step": 16050
+    },
+    {
+      "epoch": 5.04,
+      "learning_rate": 4.9623588456712675e-06,
+      "loss": 1.8679,
+      "step": 16060
+    },
+    {
+      "epoch": 5.04,
+      "learning_rate": 4.9592220828105395e-06,
+      "loss": 1.8874,
+      "step": 16070
+    },
+    {
+      "epoch": 5.04,
+      "learning_rate": 4.9560853199498124e-06,
+      "loss": 1.8677,
+      "step": 16080
+    },
+    {
+      "epoch": 5.05,
+      "learning_rate": 4.9529485570890845e-06,
+      "loss": 1.753,
+      "step": 16090
+    },
+    {
+      "epoch": 5.05,
+      "learning_rate": 4.9498117942283565e-06,
+      "loss": 1.759,
+      "step": 16100
+    },
+    {
+      "epoch": 5.05,
+      "learning_rate": 4.946675031367629e-06,
+      "loss": 1.8249,
+      "step": 16110
+    },
+    {
+      "epoch": 5.06,
+      "learning_rate": 4.9435382685069015e-06,
+      "loss": 1.9584,
+      "step": 16120
+    },
+    {
+      "epoch": 5.06,
+      "learning_rate": 4.9404015056461735e-06,
+      "loss": 1.9016,
+      "step": 16130
+    },
+    {
+      "epoch": 5.06,
+      "learning_rate": 4.9372647427854455e-06,
+      "loss": 1.7952,
+      "step": 16140
+    },
+    {
+      "epoch": 5.07,
+      "learning_rate": 4.9341279799247184e-06,
+      "loss": 1.8447,
+      "step": 16150
+    },
+    {
+      "epoch": 5.07,
+      "learning_rate": 4.9309912170639905e-06,
+      "loss": 1.8471,
+      "step": 16160
+    },
+    {
+      "epoch": 5.07,
+      "learning_rate": 4.9278544542032625e-06,
+      "loss": 1.7276,
+      "step": 16170
+    },
+    {
+      "epoch": 5.08,
+      "learning_rate": 4.9247176913425346e-06,
+      "loss": 1.7841,
+      "step": 16180
+    },
+    {
+      "epoch": 5.08,
+      "learning_rate": 4.9215809284818075e-06,
+      "loss": 1.8478,
+      "step": 16190
+    },
+    {
+      "epoch": 5.08,
+      "learning_rate": 4.9184441656210795e-06,
+      "loss": 1.9077,
+      "step": 16200
+    },
+    {
+      "epoch": 5.08,
+      "learning_rate": 4.9153074027603516e-06,
+      "loss": 1.9702,
+      "step": 16210
+    },
+    {
+      "epoch": 5.09,
+      "learning_rate": 4.912170639899624e-06,
+      "loss": 1.8798,
+      "step": 16220
+    },
+    {
+      "epoch": 5.09,
+      "learning_rate": 4.9090338770388965e-06,
+      "loss": 1.8828,
+      "step": 16230
+    },
+    {
+      "epoch": 5.09,
+      "learning_rate": 4.9058971141781685e-06,
+      "loss": 1.858,
+      "step": 16240
+    },
+    {
+      "epoch": 5.1,
+      "learning_rate": 4.902760351317441e-06,
+      "loss": 1.9143,
+      "step": 16250
+    },
+    {
+      "epoch": 5.1,
+      "learning_rate": 4.899623588456713e-06,
+      "loss": 1.845,
+      "step": 16260
+    },
+    {
+      "epoch": 5.1,
+      "learning_rate": 4.8964868255959855e-06,
+      "loss": 1.9097,
+      "step": 16270
+    },
+    {
+      "epoch": 5.11,
+      "learning_rate": 4.8933500627352576e-06,
+      "loss": 1.8443,
+      "step": 16280
+    },
+    {
+      "epoch": 5.11,
+      "learning_rate": 4.89021329987453e-06,
+      "loss": 1.8654,
+      "step": 16290
+    },
+    {
+      "epoch": 5.11,
+      "learning_rate": 4.8870765370138025e-06,
+      "loss": 1.9197,
+      "step": 16300
+    },
+    {
+      "epoch": 5.12,
+      "learning_rate": 4.8839397741530745e-06,
+      "loss": 1.8181,
+      "step": 16310
+    },
+    {
+      "epoch": 5.12,
+      "learning_rate": 4.880803011292347e-06,
+      "loss": 1.8374,
+      "step": 16320
+    },
+    {
+      "epoch": 5.12,
+      "learning_rate": 4.877666248431619e-06,
+      "loss": 1.7974,
+      "step": 16330
+    },
+    {
+      "epoch": 5.13,
+      "learning_rate": 4.8745294855708915e-06,
+      "loss": 1.8759,
+      "step": 16340
+    },
+    {
+      "epoch": 5.13,
+      "learning_rate": 4.8713927227101636e-06,
+      "loss": 1.8612,
+      "step": 16350
+    },
+    {
+      "epoch": 5.13,
+      "learning_rate": 4.868255959849436e-06,
+      "loss": 1.8389,
+      "step": 16360
+    },
+    {
+      "epoch": 5.13,
+      "learning_rate": 4.865119196988708e-06,
+      "loss": 1.8966,
+      "step": 16370
+    },
+    {
+      "epoch": 5.14,
+      "learning_rate": 4.8619824341279805e-06,
+      "loss": 1.9176,
+      "step": 16380
+    },
+    {
+      "epoch": 5.14,
+      "learning_rate": 4.858845671267253e-06,
+      "loss": 1.8387,
+      "step": 16390
+    },
+    {
+      "epoch": 5.14,
+      "learning_rate": 4.8557089084065255e-06,
+      "loss": 1.8397,
+      "step": 16400
+    },
+    {
+      "epoch": 5.15,
+      "learning_rate": 4.852572145545797e-06,
+      "loss": 1.8909,
+      "step": 16410
+    },
+    {
+      "epoch": 5.15,
+      "learning_rate": 4.84943538268507e-06,
+      "loss": 1.8981,
+      "step": 16420
+    },
+    {
+      "epoch": 5.15,
+      "learning_rate": 4.846298619824342e-06,
+      "loss": 1.8632,
+      "step": 16430
+    },
+    {
+      "epoch": 5.16,
+      "learning_rate": 4.843161856963614e-06,
+      "loss": 1.8874,
+      "step": 16440
+    },
+    {
+      "epoch": 5.16,
+      "learning_rate": 4.8400250941028866e-06,
+      "loss": 1.8557,
+      "step": 16450
+    },
+    {
+      "epoch": 5.16,
+      "learning_rate": 4.836888331242159e-06,
+      "loss": 1.8918,
+      "step": 16460
+    },
+    {
+      "epoch": 5.17,
+      "learning_rate": 4.833751568381431e-06,
+      "loss": 1.8779,
+      "step": 16470
+    },
+    {
+      "epoch": 5.17,
+      "learning_rate": 4.830614805520703e-06,
+      "loss": 1.8792,
+      "step": 16480
+    },
+    {
+      "epoch": 5.17,
+      "learning_rate": 4.827478042659976e-06,
+      "loss": 1.9252,
+      "step": 16490
+    },
+    {
+      "epoch": 5.18,
+      "learning_rate": 4.824341279799248e-06,
+      "loss": 1.7958,
+      "step": 16500
+    },
+    {
+      "epoch": 5.18,
+      "learning_rate": 4.82120451693852e-06,
+      "loss": 1.9067,
+      "step": 16510
+    },
+    {
+      "epoch": 5.18,
+      "learning_rate": 4.818067754077792e-06,
+      "loss": 1.7881,
+      "step": 16520
+    },
+    {
+      "epoch": 5.19,
+      "learning_rate": 4.814930991217065e-06,
+      "loss": 1.9106,
+      "step": 16530
+    },
+    {
+      "epoch": 5.19,
+      "learning_rate": 4.811794228356337e-06,
+      "loss": 1.8793,
+      "step": 16540
+    },
+    {
+      "epoch": 5.19,
+      "learning_rate": 4.8086574654956095e-06,
+      "loss": 1.9255,
+      "step": 16550
+    },
+    {
+      "epoch": 5.19,
+      "learning_rate": 4.805520702634881e-06,
+      "loss": 1.9354,
+      "step": 16560
+    },
+    {
+      "epoch": 5.2,
+      "learning_rate": 4.802383939774154e-06,
+      "loss": 1.9018,
+      "step": 16570
+    },
+    {
+      "epoch": 5.2,
+      "learning_rate": 4.799247176913426e-06,
+      "loss": 1.8821,
+      "step": 16580
+    },
+    {
+      "epoch": 5.2,
+      "learning_rate": 4.7961104140526986e-06,
+      "loss": 1.8658,
+      "step": 16590
+    },
+    {
+      "epoch": 5.21,
+      "learning_rate": 4.792973651191971e-06,
+      "loss": 1.8904,
+      "step": 16600
+    },
+    {
+      "epoch": 5.21,
+      "learning_rate": 4.789836888331243e-06,
+      "loss": 1.8478,
+      "step": 16610
+    },
+    {
+      "epoch": 5.21,
+      "learning_rate": 4.786700125470515e-06,
+      "loss": 1.8945,
+      "step": 16620
+    },
+    {
+      "epoch": 5.22,
+      "learning_rate": 4.783563362609787e-06,
+      "loss": 1.8028,
+      "step": 16630
+    },
+    {
+      "epoch": 5.22,
+      "learning_rate": 4.78042659974906e-06,
+      "loss": 1.8476,
+      "step": 16640
+    },
+    {
+      "epoch": 5.22,
+      "learning_rate": 4.777289836888332e-06,
+      "loss": 1.8735,
+      "step": 16650
+    },
+    {
+      "epoch": 5.23,
+      "learning_rate": 4.774153074027604e-06,
+      "loss": 1.9773,
+      "step": 16660
+    },
+    {
+      "epoch": 5.23,
+      "learning_rate": 4.771016311166876e-06,
+      "loss": 1.8794,
+      "step": 16670
+    },
+    {
+      "epoch": 5.23,
+      "learning_rate": 4.767879548306149e-06,
+      "loss": 1.8646,
+      "step": 16680
+    },
+    {
+      "epoch": 5.24,
+      "learning_rate": 4.764742785445421e-06,
+      "loss": 1.8607,
+      "step": 16690
+    },
+    {
+      "epoch": 5.24,
+      "learning_rate": 4.761606022584693e-06,
+      "loss": 1.9471,
+      "step": 16700
+    },
+    {
+      "epoch": 5.24,
+      "learning_rate": 4.758469259723965e-06,
+      "loss": 1.8419,
+      "step": 16710
+    },
+    {
+      "epoch": 5.24,
+      "learning_rate": 4.755332496863238e-06,
+      "loss": 1.8559,
+      "step": 16720
+    },
+    {
+      "epoch": 5.25,
+      "learning_rate": 4.75219573400251e-06,
+      "loss": 1.8327,
+      "step": 16730
+    },
+    {
+      "epoch": 5.25,
+      "learning_rate": 4.749058971141783e-06,
+      "loss": 1.9288,
+      "step": 16740
+    },
+    {
+      "epoch": 5.25,
+      "learning_rate": 4.745922208281054e-06,
+      "loss": 1.8511,
+      "step": 16750
+    },
+    {
+      "epoch": 5.26,
+      "learning_rate": 4.742785445420327e-06,
+      "loss": 1.9087,
+      "step": 16760
+    },
+    {
+      "epoch": 5.26,
+      "learning_rate": 4.739648682559599e-06,
+      "loss": 1.8333,
+      "step": 16770
+    },
+    {
+      "epoch": 5.26,
+      "learning_rate": 4.736511919698872e-06,
+      "loss": 1.8701,
+      "step": 16780
+    },
+    {
+      "epoch": 5.27,
+      "learning_rate": 4.733375156838144e-06,
+      "loss": 1.9376,
+      "step": 16790
+    },
+    {
+      "epoch": 5.27,
+      "learning_rate": 4.730238393977416e-06,
+      "loss": 1.848,
+      "step": 16800
+    },
+    {
+      "epoch": 5.27,
+      "learning_rate": 4.727101631116688e-06,
+      "loss": 1.7526,
+      "step": 16810
+    },
+    {
+      "epoch": 5.28,
+      "learning_rate": 4.72396486825596e-06,
+      "loss": 1.8326,
+      "step": 16820
+    },
+    {
+      "epoch": 5.28,
+      "learning_rate": 4.720828105395233e-06,
+      "loss": 1.8106,
+      "step": 16830
+    },
+    {
+      "epoch": 5.28,
+      "learning_rate": 4.717691342534505e-06,
+      "loss": 1.8723,
+      "step": 16840
+    },
+    {
+      "epoch": 5.29,
+      "learning_rate": 4.714554579673777e-06,
+      "loss": 1.8554,
+      "step": 16850
+    },
+    {
+      "epoch": 5.29,
+      "learning_rate": 4.711417816813049e-06,
+      "loss": 1.847,
+      "step": 16860
+    },
+    {
+      "epoch": 5.29,
+      "learning_rate": 4.708281053952322e-06,
+      "loss": 1.9274,
+      "step": 16870
+    },
+    {
+      "epoch": 5.29,
+      "learning_rate": 4.705144291091594e-06,
+      "loss": 1.8982,
+      "step": 16880
+    },
+    {
+      "epoch": 5.3,
+      "learning_rate": 4.702007528230867e-06,
+      "loss": 1.8529,
+      "step": 16890
+    },
+    {
+      "epoch": 5.3,
+      "learning_rate": 4.698870765370138e-06,
+      "loss": 1.853,
+      "step": 16900
+    },
+    {
+      "epoch": 5.3,
+      "learning_rate": 4.695734002509411e-06,
+      "loss": 1.8639,
+      "step": 16910
+    },
+    {
+      "epoch": 5.31,
+      "learning_rate": 4.692597239648683e-06,
+      "loss": 1.9326,
+      "step": 16920
+    },
+    {
+      "epoch": 5.31,
+      "learning_rate": 4.689460476787956e-06,
+      "loss": 1.9268,
+      "step": 16930
+    },
+    {
+      "epoch": 5.31,
+      "learning_rate": 4.686323713927228e-06,
+      "loss": 1.9351,
+      "step": 16940
+    },
+    {
+      "epoch": 5.32,
+      "learning_rate": 4.6831869510665e-06,
+      "loss": 1.9076,
+      "step": 16950
+    },
+    {
+      "epoch": 5.32,
+      "learning_rate": 4.680050188205772e-06,
+      "loss": 1.8779,
+      "step": 16960
+    },
+    {
+      "epoch": 5.32,
+      "learning_rate": 4.676913425345045e-06,
+      "loss": 1.8081,
+      "step": 16970
+    },
+    {
+      "epoch": 5.33,
+      "learning_rate": 4.673776662484317e-06,
+      "loss": 1.908,
+      "step": 16980
+    },
+    {
+      "epoch": 5.33,
+      "learning_rate": 4.670639899623589e-06,
+      "loss": 1.8247,
+      "step": 16990
+    },
+    {
+      "epoch": 5.33,
+      "learning_rate": 4.667503136762861e-06,
+      "loss": 1.8314,
+      "step": 17000
+    },
+    {
+      "epoch": 5.34,
+      "learning_rate": 4.664366373902133e-06,
+      "loss": 1.8752,
+      "step": 17010
+    },
+    {
+      "epoch": 5.34,
+      "learning_rate": 4.661229611041406e-06,
+      "loss": 1.8456,
+      "step": 17020
+    },
+    {
+      "epoch": 5.34,
+      "learning_rate": 4.658092848180678e-06,
+      "loss": 1.8795,
+      "step": 17030
+    },
+    {
+      "epoch": 5.35,
+      "learning_rate": 4.654956085319951e-06,
+      "loss": 1.8288,
+      "step": 17040
+    },
+    {
+      "epoch": 5.35,
+      "learning_rate": 4.651819322459222e-06,
+      "loss": 1.8917,
+      "step": 17050
+    },
+    {
+      "epoch": 5.35,
+      "learning_rate": 4.648682559598495e-06,
+      "loss": 1.8626,
+      "step": 17060
+    },
+    {
+      "epoch": 5.35,
+      "learning_rate": 4.645545796737767e-06,
+      "loss": 1.8603,
+      "step": 17070
+    },
+    {
+      "epoch": 5.36,
+      "learning_rate": 4.64240903387704e-06,
+      "loss": 1.8863,
+      "step": 17080
+    },
+    {
+      "epoch": 5.36,
+      "learning_rate": 4.639272271016311e-06,
+      "loss": 1.8566,
+      "step": 17090
+    },
+    {
+      "epoch": 5.36,
+      "learning_rate": 4.636135508155584e-06,
+      "loss": 1.9462,
+      "step": 17100
+    },
+    {
+      "epoch": 5.37,
+      "learning_rate": 4.632998745294856e-06,
+      "loss": 1.8992,
+      "step": 17110
+    },
+    {
+      "epoch": 5.37,
+      "learning_rate": 4.629861982434129e-06,
+      "loss": 1.8613,
+      "step": 17120
+    },
+    {
+      "epoch": 5.37,
+      "learning_rate": 4.626725219573401e-06,
+      "loss": 1.8436,
+      "step": 17130
+    },
+    {
+      "epoch": 5.38,
+      "learning_rate": 4.623588456712673e-06,
+      "loss": 1.9199,
+      "step": 17140
+    },
+    {
+      "epoch": 5.38,
+      "learning_rate": 4.620451693851945e-06,
+      "loss": 1.9195,
+      "step": 17150
+    },
+    {
+      "epoch": 5.38,
+      "learning_rate": 4.617314930991217e-06,
+      "loss": 1.8925,
+      "step": 17160
+    },
+    {
+      "epoch": 5.39,
+      "learning_rate": 4.61417816813049e-06,
+      "loss": 1.9009,
+      "step": 17170
+    },
+    {
+      "epoch": 5.39,
+      "learning_rate": 4.611041405269762e-06,
+      "loss": 1.8888,
+      "step": 17180
+    },
+    {
+      "epoch": 5.39,
+      "learning_rate": 4.607904642409034e-06,
+      "loss": 1.8742,
+      "step": 17190
+    },
+    {
+      "epoch": 5.4,
+      "learning_rate": 4.604767879548306e-06,
+      "loss": 1.8256,
+      "step": 17200
+    },
+    {
+      "epoch": 5.4,
+      "learning_rate": 4.601631116687579e-06,
+      "loss": 1.8244,
+      "step": 17210
+    },
+    {
+      "epoch": 5.4,
+      "learning_rate": 4.598494353826851e-06,
+      "loss": 1.8462,
+      "step": 17220
+    },
+    {
+      "epoch": 5.4,
+      "learning_rate": 4.595357590966124e-06,
+      "loss": 1.9451,
+      "step": 17230
+    },
+    {
+      "epoch": 5.41,
+      "learning_rate": 4.592220828105395e-06,
+      "loss": 1.8587,
+      "step": 17240
+    },
+    {
+      "epoch": 5.41,
+      "learning_rate": 4.589084065244668e-06,
+      "loss": 1.9033,
+      "step": 17250
+    },
+    {
+      "epoch": 5.41,
+      "learning_rate": 4.58594730238394e-06,
+      "loss": 1.8973,
+      "step": 17260
+    },
+    {
+      "epoch": 5.42,
+      "learning_rate": 4.582810539523213e-06,
+      "loss": 1.8485,
+      "step": 17270
+    },
+    {
+      "epoch": 5.42,
+      "learning_rate": 4.579673776662485e-06,
+      "loss": 1.8579,
+      "step": 17280
+    },
+    {
+      "epoch": 5.42,
+      "learning_rate": 4.576537013801757e-06,
+      "loss": 1.9269,
+      "step": 17290
+    },
+    {
+      "epoch": 5.43,
+      "learning_rate": 4.573400250941029e-06,
+      "loss": 1.8768,
+      "step": 17300
+    },
+    {
+      "epoch": 5.43,
+      "learning_rate": 4.570263488080302e-06,
+      "loss": 1.9166,
+      "step": 17310
+    },
+    {
+      "epoch": 5.43,
+      "learning_rate": 4.567126725219574e-06,
+      "loss": 1.871,
+      "step": 17320
+    },
+    {
+      "epoch": 5.44,
+      "learning_rate": 4.563989962358846e-06,
+      "loss": 1.8885,
+      "step": 17330
+    },
+    {
+      "epoch": 5.44,
+      "learning_rate": 4.560853199498118e-06,
+      "loss": 1.8639,
+      "step": 17340
+    },
+    {
+      "epoch": 5.44,
+      "learning_rate": 4.55771643663739e-06,
+      "loss": 1.8968,
+      "step": 17350
+    },
+    {
+      "epoch": 5.45,
+      "learning_rate": 4.554579673776663e-06,
+      "loss": 1.9012,
+      "step": 17360
+    },
+    {
+      "epoch": 5.45,
+      "learning_rate": 4.551442910915935e-06,
+      "loss": 1.8502,
+      "step": 17370
+    },
+    {
+      "epoch": 5.45,
+      "learning_rate": 4.548306148055208e-06,
+      "loss": 1.8242,
+      "step": 17380
+    },
+    {
+      "epoch": 5.45,
+      "learning_rate": 4.545169385194479e-06,
+      "loss": 1.9132,
+      "step": 17390
+    },
+    {
+      "epoch": 5.46,
+      "learning_rate": 4.542032622333752e-06,
+      "loss": 1.8413,
+      "step": 17400
+    },
+    {
+      "epoch": 5.46,
+      "learning_rate": 4.538895859473024e-06,
+      "loss": 1.9127,
+      "step": 17410
+    },
+    {
+      "epoch": 5.46,
+      "learning_rate": 4.535759096612297e-06,
+      "loss": 1.9078,
+      "step": 17420
+    },
+    {
+      "epoch": 5.47,
+      "learning_rate": 4.532622333751569e-06,
+      "loss": 1.9186,
+      "step": 17430
+    },
+    {
+      "epoch": 5.47,
+      "learning_rate": 4.529485570890841e-06,
+      "loss": 1.8833,
+      "step": 17440
+    },
+    {
+      "epoch": 5.47,
+      "learning_rate": 4.526348808030113e-06,
+      "loss": 1.8754,
+      "step": 17450
+    },
+    {
+      "epoch": 5.48,
+      "learning_rate": 4.523212045169386e-06,
+      "loss": 1.8861,
+      "step": 17460
+    },
+    {
+      "epoch": 5.48,
+      "learning_rate": 4.520075282308658e-06,
+      "loss": 1.7951,
+      "step": 17470
+    },
+    {
+      "epoch": 5.48,
+      "learning_rate": 4.51693851944793e-06,
+      "loss": 1.9778,
+      "step": 17480
+    },
+    {
+      "epoch": 5.49,
+      "learning_rate": 4.513801756587202e-06,
+      "loss": 1.8056,
+      "step": 17490
+    },
+    {
+      "epoch": 5.49,
+      "learning_rate": 4.510664993726475e-06,
+      "loss": 1.8797,
+      "step": 17500
+    },
+    {
+      "epoch": 5.49,
+      "learning_rate": 4.507528230865747e-06,
+      "loss": 1.8466,
+      "step": 17510
+    },
+    {
+      "epoch": 5.5,
+      "learning_rate": 4.504391468005019e-06,
+      "loss": 1.9108,
+      "step": 17520
+    },
+    {
+      "epoch": 5.5,
+      "learning_rate": 4.501254705144291e-06,
+      "loss": 1.9139,
+      "step": 17530
+    },
+    {
+      "epoch": 5.5,
+      "learning_rate": 4.498117942283563e-06,
+      "loss": 1.8835,
+      "step": 17540
+    },
+    {
+      "epoch": 5.51,
+      "learning_rate": 4.494981179422836e-06,
+      "loss": 1.8341,
+      "step": 17550
+    },
+    {
+      "epoch": 5.51,
+      "learning_rate": 4.491844416562108e-06,
+      "loss": 1.8345,
+      "step": 17560
+    },
+    {
+      "epoch": 5.51,
+      "learning_rate": 4.488707653701381e-06,
+      "loss": 1.8053,
+      "step": 17570
+    },
+    {
+      "epoch": 5.51,
+      "learning_rate": 4.485570890840652e-06,
+      "loss": 1.8901,
+      "step": 17580
+    },
+    {
+      "epoch": 5.52,
+      "learning_rate": 4.482434127979925e-06,
+      "loss": 1.882,
+      "step": 17590
+    },
+    {
+      "epoch": 5.52,
+      "learning_rate": 4.479297365119197e-06,
+      "loss": 1.8712,
+      "step": 17600
+    },
+    {
+      "epoch": 5.52,
+      "learning_rate": 4.47616060225847e-06,
+      "loss": 1.8634,
+      "step": 17610
+    },
+    {
+      "epoch": 5.53,
+      "learning_rate": 4.473023839397742e-06,
+      "loss": 1.8308,
+      "step": 17620
+    },
+    {
+      "epoch": 5.53,
+      "learning_rate": 4.469887076537014e-06,
+      "loss": 1.8628,
+      "step": 17630
+    },
+    {
+      "epoch": 5.53,
+      "learning_rate": 4.466750313676286e-06,
+      "loss": 1.87,
+      "step": 17640
+    },
+    {
+      "epoch": 5.54,
+      "learning_rate": 4.463613550815559e-06,
+      "loss": 1.8509,
+      "step": 17650
+    },
+    {
+      "epoch": 5.54,
+      "learning_rate": 4.460476787954831e-06,
+      "loss": 1.861,
+      "step": 17660
+    },
+    {
+      "epoch": 5.54,
+      "learning_rate": 4.457340025094103e-06,
+      "loss": 1.8706,
+      "step": 17670
+    },
+    {
+      "epoch": 5.55,
+      "learning_rate": 4.454203262233375e-06,
+      "loss": 1.9479,
+      "step": 17680
+    },
+    {
+      "epoch": 5.55,
+      "learning_rate": 4.451066499372648e-06,
+      "loss": 1.8876,
+      "step": 17690
+    },
+    {
+      "epoch": 5.55,
+      "learning_rate": 4.44792973651192e-06,
+      "loss": 1.8264,
+      "step": 17700
+    },
+    {
+      "epoch": 5.56,
+      "learning_rate": 4.444792973651192e-06,
+      "loss": 1.7968,
+      "step": 17710
+    },
+    {
+      "epoch": 5.56,
+      "learning_rate": 4.441656210790465e-06,
+      "loss": 1.8326,
+      "step": 17720
+    },
+    {
+      "epoch": 5.56,
+      "learning_rate": 4.438519447929736e-06,
+      "loss": 1.7976,
+      "step": 17730
+    },
+    {
+      "epoch": 5.56,
+      "learning_rate": 4.435382685069009e-06,
+      "loss": 1.8688,
+      "step": 17740
+    },
+    {
+      "epoch": 5.57,
+      "learning_rate": 4.432245922208281e-06,
+      "loss": 1.8763,
+      "step": 17750
+    },
+    {
+      "epoch": 5.57,
+      "learning_rate": 4.429109159347554e-06,
+      "loss": 1.872,
+      "step": 17760
+    },
+    {
+      "epoch": 5.57,
+      "learning_rate": 4.425972396486826e-06,
+      "loss": 1.8453,
+      "step": 17770
+    },
+    {
+      "epoch": 5.58,
+      "learning_rate": 4.422835633626098e-06,
+      "loss": 1.911,
+      "step": 17780
+    },
+    {
+      "epoch": 5.58,
+      "learning_rate": 4.41969887076537e-06,
+      "loss": 1.831,
+      "step": 17790
+    },
+    {
+      "epoch": 5.58,
+      "learning_rate": 4.416562107904643e-06,
+      "loss": 1.8414,
+      "step": 17800
+    },
+    {
+      "epoch": 5.59,
+      "learning_rate": 4.413425345043915e-06,
+      "loss": 1.8333,
+      "step": 17810
+    },
+    {
+      "epoch": 5.59,
+      "learning_rate": 4.410288582183187e-06,
+      "loss": 1.8801,
+      "step": 17820
+    },
+    {
+      "epoch": 5.59,
+      "learning_rate": 4.407151819322459e-06,
+      "loss": 1.9353,
+      "step": 17830
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 4.404015056461732e-06,
+      "loss": 1.8287,
+      "step": 17840
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 4.400878293601004e-06,
+      "loss": 1.8044,
+      "step": 17850
+    },
+    {
+      "epoch": 5.6,
+      "learning_rate": 4.397741530740276e-06,
+      "loss": 1.8164,
+      "step": 17860
+    },
+    {
+      "epoch": 5.61,
+      "learning_rate": 4.394604767879549e-06,
+      "loss": 1.8935,
+      "step": 17870
+    },
+    {
+      "epoch": 5.61,
+      "learning_rate": 4.391468005018821e-06,
+      "loss": 1.8232,
+      "step": 17880
+    },
+    {
+      "epoch": 5.61,
+      "learning_rate": 4.388331242158093e-06,
+      "loss": 1.8721,
+      "step": 17890
+    },
+    {
+      "epoch": 5.61,
+      "learning_rate": 4.385194479297365e-06,
+      "loss": 1.9139,
+      "step": 17900
+    },
+    {
+      "epoch": 5.62,
+      "learning_rate": 4.382057716436638e-06,
+      "loss": 1.9591,
+      "step": 17910
+    },
+    {
+      "epoch": 5.62,
+      "learning_rate": 4.37892095357591e-06,
+      "loss": 1.7527,
+      "step": 17920
+    },
+    {
+      "epoch": 5.62,
+      "learning_rate": 4.375784190715182e-06,
+      "loss": 1.841,
+      "step": 17930
+    },
+    {
+      "epoch": 5.63,
+      "learning_rate": 4.372647427854454e-06,
+      "loss": 1.7895,
+      "step": 17940
+    },
+    {
+      "epoch": 5.63,
+      "learning_rate": 4.369510664993727e-06,
+      "loss": 1.9204,
+      "step": 17950
+    },
+    {
+      "epoch": 5.63,
+      "learning_rate": 4.366373902132999e-06,
+      "loss": 1.8384,
+      "step": 17960
+    },
+    {
+      "epoch": 5.64,
+      "learning_rate": 4.363237139272271e-06,
+      "loss": 1.8726,
+      "step": 17970
+    },
+    {
+      "epoch": 5.64,
+      "learning_rate": 4.360100376411543e-06,
+      "loss": 1.9236,
+      "step": 17980
+    },
+    {
+      "epoch": 5.64,
+      "learning_rate": 4.356963613550816e-06,
+      "loss": 1.867,
+      "step": 17990
+    },
+    {
+      "epoch": 5.65,
+      "learning_rate": 4.353826850690088e-06,
+      "loss": 1.8962,
+      "step": 18000
+    },
+    {
+      "epoch": 5.65,
+      "learning_rate": 4.35069008782936e-06,
+      "loss": 1.8803,
+      "step": 18010
+    },
+    {
+      "epoch": 5.65,
+      "learning_rate": 4.347553324968632e-06,
+      "loss": 1.9137,
+      "step": 18020
+    },
+    {
+      "epoch": 5.66,
+      "learning_rate": 4.344416562107905e-06,
+      "loss": 1.8177,
+      "step": 18030
+    },
+    {
+      "epoch": 5.66,
+      "learning_rate": 4.341279799247177e-06,
+      "loss": 1.923,
+      "step": 18040
+    },
+    {
+      "epoch": 5.66,
+      "learning_rate": 4.338143036386449e-06,
+      "loss": 1.8752,
+      "step": 18050
+    },
+    {
+      "epoch": 5.66,
+      "learning_rate": 4.335006273525722e-06,
+      "loss": 1.7683,
+      "step": 18060
+    },
+    {
+      "epoch": 5.67,
+      "learning_rate": 4.331869510664994e-06,
+      "loss": 1.8277,
+      "step": 18070
+    },
+    {
+      "epoch": 5.67,
+      "learning_rate": 4.328732747804266e-06,
+      "loss": 1.8932,
+      "step": 18080
+    },
+    {
+      "epoch": 5.67,
+      "learning_rate": 4.325595984943538e-06,
+      "loss": 1.8611,
+      "step": 18090
+    },
+    {
+      "epoch": 5.68,
+      "learning_rate": 4.322459222082811e-06,
+      "loss": 1.9092,
+      "step": 18100
+    },
+    {
+      "epoch": 5.68,
+      "learning_rate": 4.319322459222083e-06,
+      "loss": 1.8887,
+      "step": 18110
+    },
+    {
+      "epoch": 5.68,
+      "learning_rate": 4.316185696361355e-06,
+      "loss": 1.8339,
+      "step": 18120
+    },
+    {
+      "epoch": 5.69,
+      "learning_rate": 4.313048933500627e-06,
+      "loss": 1.8809,
+      "step": 18130
+    },
+    {
+      "epoch": 5.69,
+      "learning_rate": 4.3099121706399e-06,
+      "loss": 1.838,
+      "step": 18140
+    },
+    {
+      "epoch": 5.69,
+      "learning_rate": 4.306775407779172e-06,
+      "loss": 1.9172,
+      "step": 18150
+    },
+    {
+      "epoch": 5.7,
+      "learning_rate": 4.303638644918444e-06,
+      "loss": 1.8116,
+      "step": 18160
+    },
+    {
+      "epoch": 5.7,
+      "learning_rate": 4.300501882057716e-06,
+      "loss": 1.9088,
+      "step": 18170
+    },
+    {
+      "epoch": 5.7,
+      "learning_rate": 4.297365119196989e-06,
+      "loss": 1.8633,
+      "step": 18180
+    },
+    {
+      "epoch": 5.71,
+      "learning_rate": 4.294228356336261e-06,
+      "loss": 1.8457,
+      "step": 18190
+    },
+    {
+      "epoch": 5.71,
+      "learning_rate": 4.291091593475533e-06,
+      "loss": 1.8625,
+      "step": 18200
+    },
+    {
+      "epoch": 5.71,
+      "learning_rate": 4.287954830614806e-06,
+      "loss": 1.895,
+      "step": 18210
+    },
+    {
+      "epoch": 5.72,
+      "learning_rate": 4.284818067754078e-06,
+      "loss": 1.8002,
+      "step": 18220
+    },
+    {
+      "epoch": 5.72,
+      "learning_rate": 4.28168130489335e-06,
+      "loss": 1.8445,
+      "step": 18230
+    },
+    {
+      "epoch": 5.72,
+      "learning_rate": 4.278544542032622e-06,
+      "loss": 1.8905,
+      "step": 18240
+    },
+    {
+      "epoch": 5.72,
+      "learning_rate": 4.275407779171895e-06,
+      "loss": 1.9096,
+      "step": 18250
+    },
+    {
+      "epoch": 5.73,
+      "learning_rate": 4.272271016311167e-06,
+      "loss": 1.9088,
+      "step": 18260
+    },
+    {
+      "epoch": 5.73,
+      "learning_rate": 4.269134253450439e-06,
+      "loss": 1.9363,
+      "step": 18270
+    },
+    {
+      "epoch": 5.73,
+      "learning_rate": 4.2659974905897114e-06,
+      "loss": 1.8302,
+      "step": 18280
+    },
+    {
+      "epoch": 5.74,
+      "learning_rate": 4.262860727728984e-06,
+      "loss": 1.9126,
+      "step": 18290
+    },
+    {
+      "epoch": 5.74,
+      "learning_rate": 4.259723964868256e-06,
+      "loss": 1.8292,
+      "step": 18300
+    },
+    {
+      "epoch": 5.74,
+      "learning_rate": 4.256587202007528e-06,
+      "loss": 1.8952,
+      "step": 18310
+    },
+    {
+      "epoch": 5.75,
+      "learning_rate": 4.2534504391468005e-06,
+      "loss": 1.8559,
+      "step": 18320
+    },
+    {
+      "epoch": 5.75,
+      "learning_rate": 4.250313676286073e-06,
+      "loss": 1.8406,
+      "step": 18330
+    },
+    {
+      "epoch": 5.75,
+      "learning_rate": 4.247176913425345e-06,
+      "loss": 1.8789,
+      "step": 18340
+    },
+    {
+      "epoch": 5.76,
+      "learning_rate": 4.2440401505646174e-06,
+      "loss": 1.9105,
+      "step": 18350
+    },
+    {
+      "epoch": 5.76,
+      "learning_rate": 4.24090338770389e-06,
+      "loss": 1.8775,
+      "step": 18360
+    },
+    {
+      "epoch": 5.76,
+      "learning_rate": 4.237766624843162e-06,
+      "loss": 1.8922,
+      "step": 18370
+    },
+    {
+      "epoch": 5.77,
+      "learning_rate": 4.2346298619824344e-06,
+      "loss": 1.8724,
+      "step": 18380
+    },
+    {
+      "epoch": 5.77,
+      "learning_rate": 4.2314930991217065e-06,
+      "loss": 1.8211,
+      "step": 18390
+    },
+    {
+      "epoch": 5.77,
+      "learning_rate": 4.228356336260979e-06,
+      "loss": 1.8813,
+      "step": 18400
+    },
+    {
+      "epoch": 5.77,
+      "learning_rate": 4.225219573400251e-06,
+      "loss": 1.8587,
+      "step": 18410
+    },
+    {
+      "epoch": 5.78,
+      "learning_rate": 4.2220828105395235e-06,
+      "loss": 1.8624,
+      "step": 18420
+    },
+    {
+      "epoch": 5.78,
+      "learning_rate": 4.2189460476787955e-06,
+      "loss": 1.8503,
+      "step": 18430
+    },
+    {
+      "epoch": 5.78,
+      "learning_rate": 4.215809284818068e-06,
+      "loss": 1.8128,
+      "step": 18440
+    },
+    {
+      "epoch": 5.79,
+      "learning_rate": 4.2126725219573404e-06,
+      "loss": 1.8921,
+      "step": 18450
+    },
+    {
+      "epoch": 5.79,
+      "learning_rate": 4.2095357590966125e-06,
+      "loss": 1.8751,
+      "step": 18460
+    },
+    {
+      "epoch": 5.79,
+      "learning_rate": 4.2063989962358845e-06,
+      "loss": 1.8635,
+      "step": 18470
+    },
+    {
+      "epoch": 5.8,
+      "learning_rate": 4.203262233375157e-06,
+      "loss": 1.8687,
+      "step": 18480
+    },
+    {
+      "epoch": 5.8,
+      "learning_rate": 4.2001254705144295e-06,
+      "loss": 1.9078,
+      "step": 18490
+    },
+    {
+      "epoch": 5.8,
+      "learning_rate": 4.1969887076537015e-06,
+      "loss": 1.8334,
+      "step": 18500
+    },
+    {
+      "epoch": 5.81,
+      "learning_rate": 4.1938519447929736e-06,
+      "loss": 1.8734,
+      "step": 18510
+    },
+    {
+      "epoch": 5.81,
+      "learning_rate": 4.1907151819322464e-06,
+      "loss": 1.9317,
+      "step": 18520
+    },
+    {
+      "epoch": 5.81,
+      "learning_rate": 4.1875784190715185e-06,
+      "loss": 1.8736,
+      "step": 18530
+    },
+    {
+      "epoch": 5.82,
+      "learning_rate": 4.1844416562107905e-06,
+      "loss": 1.9142,
+      "step": 18540
+    },
+    {
+      "epoch": 5.82,
+      "learning_rate": 4.1813048933500634e-06,
+      "loss": 1.8304,
+      "step": 18550
+    },
+    {
+      "epoch": 5.82,
+      "learning_rate": 4.1781681304893355e-06,
+      "loss": 1.8861,
+      "step": 18560
+    },
+    {
+      "epoch": 5.82,
+      "learning_rate": 4.1750313676286075e-06,
+      "loss": 1.8007,
+      "step": 18570
+    },
+    {
+      "epoch": 5.83,
+      "learning_rate": 4.1718946047678796e-06,
+      "loss": 1.9076,
+      "step": 18580
+    },
+    {
+      "epoch": 5.83,
+      "learning_rate": 4.1687578419071525e-06,
+      "loss": 1.7931,
+      "step": 18590
+    },
+    {
+      "epoch": 5.83,
+      "learning_rate": 4.1656210790464245e-06,
+      "loss": 1.9116,
+      "step": 18600
+    },
+    {
+      "epoch": 5.84,
+      "learning_rate": 4.1624843161856965e-06,
+      "loss": 1.8149,
+      "step": 18610
+    },
+    {
+      "epoch": 5.84,
+      "learning_rate": 4.159347553324969e-06,
+      "loss": 1.8394,
+      "step": 18620
+    },
+    {
+      "epoch": 5.84,
+      "learning_rate": 4.1562107904642415e-06,
+      "loss": 1.8453,
+      "step": 18630
+    },
+    {
+      "epoch": 5.85,
+      "learning_rate": 4.1530740276035135e-06,
+      "loss": 1.9149,
+      "step": 18640
+    },
+    {
+      "epoch": 5.85,
+      "learning_rate": 4.149937264742786e-06,
+      "loss": 1.865,
+      "step": 18650
+    },
+    {
+      "epoch": 5.85,
+      "learning_rate": 4.146800501882058e-06,
+      "loss": 1.9112,
+      "step": 18660
+    },
+    {
+      "epoch": 5.86,
+      "learning_rate": 4.1436637390213305e-06,
+      "loss": 1.8812,
+      "step": 18670
+    },
+    {
+      "epoch": 5.86,
+      "learning_rate": 4.1405269761606026e-06,
+      "loss": 1.8773,
+      "step": 18680
+    },
+    {
+      "epoch": 5.86,
+      "learning_rate": 4.137390213299875e-06,
+      "loss": 1.8407,
+      "step": 18690
+    },
+    {
+      "epoch": 5.87,
+      "learning_rate": 4.1342534504391475e-06,
+      "loss": 1.9239,
+      "step": 18700
+    },
+    {
+      "epoch": 5.87,
+      "learning_rate": 4.1311166875784195e-06,
+      "loss": 1.8716,
+      "step": 18710
+    },
+    {
+      "epoch": 5.87,
+      "learning_rate": 4.127979924717692e-06,
+      "loss": 1.9464,
+      "step": 18720
+    },
+    {
+      "epoch": 5.88,
+      "learning_rate": 4.124843161856964e-06,
+      "loss": 1.7955,
+      "step": 18730
+    },
+    {
+      "epoch": 5.88,
+      "learning_rate": 4.1217063989962365e-06,
+      "loss": 1.8431,
+      "step": 18740
+    },
+    {
+      "epoch": 5.88,
+      "learning_rate": 4.1185696361355086e-06,
+      "loss": 1.8306,
+      "step": 18750
+    },
+    {
+      "epoch": 5.88,
+      "learning_rate": 4.115432873274781e-06,
+      "loss": 1.8927,
+      "step": 18760
+    },
+    {
+      "epoch": 5.89,
+      "learning_rate": 4.112296110414053e-06,
+      "loss": 1.918,
+      "step": 18770
+    },
+    {
+      "epoch": 5.89,
+      "learning_rate": 4.1091593475533255e-06,
+      "loss": 1.8618,
+      "step": 18780
+    },
+    {
+      "epoch": 5.89,
+      "learning_rate": 4.106022584692598e-06,
+      "loss": 1.9345,
+      "step": 18790
+    },
+    {
+      "epoch": 5.9,
+      "learning_rate": 4.1028858218318705e-06,
+      "loss": 1.9022,
+      "step": 18800
+    },
+    {
+      "epoch": 5.9,
+      "learning_rate": 4.099749058971142e-06,
+      "loss": 1.8736,
+      "step": 18810
+    },
+    {
+      "epoch": 5.9,
+      "learning_rate": 4.0966122961104146e-06,
+      "loss": 1.8973,
+      "step": 18820
+    },
+    {
+      "epoch": 5.91,
+      "learning_rate": 4.093475533249687e-06,
+      "loss": 1.8944,
+      "step": 18830
+    },
+    {
+      "epoch": 5.91,
+      "learning_rate": 4.0903387703889595e-06,
+      "loss": 1.9158,
+      "step": 18840
+    },
+    {
+      "epoch": 5.91,
+      "learning_rate": 4.087202007528231e-06,
+      "loss": 1.8534,
+      "step": 18850
+    },
+    {
+      "epoch": 5.92,
+      "learning_rate": 4.084065244667504e-06,
+      "loss": 1.8687,
+      "step": 18860
+    },
+    {
+      "epoch": 5.92,
+      "learning_rate": 4.080928481806776e-06,
+      "loss": 1.9409,
+      "step": 18870
+    },
+    {
+      "epoch": 5.92,
+      "learning_rate": 4.077791718946048e-06,
+      "loss": 1.8839,
+      "step": 18880
+    },
+    {
+      "epoch": 5.93,
+      "learning_rate": 4.0746549560853206e-06,
+      "loss": 1.8978,
+      "step": 18890
+    },
+    {
+      "epoch": 5.93,
+      "learning_rate": 4.071518193224593e-06,
+      "loss": 1.8626,
+      "step": 18900
+    },
+    {
+      "epoch": 5.93,
+      "learning_rate": 4.068381430363865e-06,
+      "loss": 1.8504,
+      "step": 18910
+    },
+    {
+      "epoch": 5.93,
+      "learning_rate": 4.065244667503137e-06,
+      "loss": 1.8749,
+      "step": 18920
+    },
+    {
+      "epoch": 5.94,
+      "learning_rate": 4.06210790464241e-06,
+      "loss": 1.8506,
+      "step": 18930
+    },
+    {
+      "epoch": 5.94,
+      "learning_rate": 4.058971141781682e-06,
+      "loss": 1.931,
+      "step": 18940
+    },
+    {
+      "epoch": 5.94,
+      "learning_rate": 4.055834378920954e-06,
+      "loss": 1.8148,
+      "step": 18950
+    },
+    {
+      "epoch": 5.95,
+      "learning_rate": 4.052697616060226e-06,
+      "loss": 1.8824,
+      "step": 18960
+    },
+    {
+      "epoch": 5.95,
+      "learning_rate": 4.049560853199499e-06,
+      "loss": 1.8675,
+      "step": 18970
+    },
+    {
+      "epoch": 5.95,
+      "learning_rate": 4.046424090338771e-06,
+      "loss": 1.8647,
+      "step": 18980
+    },
+    {
+      "epoch": 5.96,
+      "learning_rate": 4.0432873274780436e-06,
+      "loss": 1.8516,
+      "step": 18990
+    },
+    {
+      "epoch": 5.96,
+      "learning_rate": 4.040150564617315e-06,
+      "loss": 1.8243,
+      "step": 19000
+    },
+    {
+      "epoch": 5.96,
+      "learning_rate": 4.037013801756588e-06,
+      "loss": 1.8079,
+      "step": 19010
+    },
+    {
+      "epoch": 5.97,
+      "learning_rate": 4.03387703889586e-06,
+      "loss": 1.9742,
+      "step": 19020
+    },
+    {
+      "epoch": 5.97,
+      "learning_rate": 4.030740276035133e-06,
+      "loss": 1.861,
+      "step": 19030
+    },
+    {
+      "epoch": 5.97,
+      "learning_rate": 4.027603513174405e-06,
+      "loss": 1.8048,
+      "step": 19040
+    },
+    {
+      "epoch": 5.98,
+      "learning_rate": 4.024466750313677e-06,
+      "loss": 1.825,
+      "step": 19050
+    },
+    {
+      "epoch": 5.98,
+      "learning_rate": 4.021329987452949e-06,
+      "loss": 1.903,
+      "step": 19060
+    },
+    {
+      "epoch": 5.98,
+      "learning_rate": 4.018193224592221e-06,
+      "loss": 1.8486,
+      "step": 19070
+    },
+    {
+      "epoch": 5.98,
+      "learning_rate": 4.015056461731494e-06,
+      "loss": 1.887,
+      "step": 19080
+    },
+    {
+      "epoch": 5.99,
+      "learning_rate": 4.011919698870766e-06,
+      "loss": 1.888,
+      "step": 19090
+    },
+    {
+      "epoch": 5.99,
+      "learning_rate": 4.008782936010038e-06,
+      "loss": 1.8812,
+      "step": 19100
+    },
+    {
+      "epoch": 5.99,
+      "learning_rate": 4.00564617314931e-06,
+      "loss": 1.8965,
+      "step": 19110
+    },
+    {
+      "epoch": 6.0,
+      "learning_rate": 4.002509410288583e-06,
+      "loss": 1.8517,
+      "step": 19120
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 1.820218563079834,
+      "eval_runtime": 13.6104,
+      "eval_samples_per_second": 73.473,
+      "eval_steps_per_second": 4.629,
+      "step": 19128
+    },
+    {
+      "epoch": 6.0,
+      "learning_rate": 3.999372647427855e-06,
+      "loss": 1.8515,
+      "step": 19130
+    },
+    {
+      "epoch": 6.0,
+      "learning_rate": 3.996235884567128e-06,
+      "loss": 1.8202,
+      "step": 19140
+    },
+    {
+      "epoch": 6.01,
+      "learning_rate": 3.993099121706399e-06,
+      "loss": 1.8573,
+      "step": 19150
+    },
+    {
+      "epoch": 6.01,
+      "learning_rate": 3.989962358845672e-06,
+      "loss": 1.8828,
+      "step": 19160
+    },
+    {
+      "epoch": 6.01,
+      "learning_rate": 3.986825595984944e-06,
+      "loss": 1.872,
+      "step": 19170
+    },
+    {
+      "epoch": 6.02,
+      "learning_rate": 3.983688833124217e-06,
+      "loss": 1.8282,
+      "step": 19180
+    },
+    {
+      "epoch": 6.02,
+      "learning_rate": 3.980552070263489e-06,
+      "loss": 1.8519,
+      "step": 19190
+    },
+    {
+      "epoch": 6.02,
+      "learning_rate": 3.977415307402761e-06,
+      "loss": 1.9018,
+      "step": 19200
+    },
+    {
+      "epoch": 6.03,
+      "learning_rate": 3.974278544542033e-06,
+      "loss": 1.8212,
+      "step": 19210
+    },
+    {
+      "epoch": 6.03,
+      "learning_rate": 3.971141781681306e-06,
+      "loss": 1.8703,
+      "step": 19220
+    },
+    {
+      "epoch": 6.03,
+      "learning_rate": 3.968005018820578e-06,
+      "loss": 1.9376,
+      "step": 19230
+    },
+    {
+      "epoch": 6.04,
+      "learning_rate": 3.96486825595985e-06,
+      "loss": 1.7754,
+      "step": 19240
+    },
+    {
+      "epoch": 6.04,
+      "learning_rate": 3.961731493099122e-06,
+      "loss": 1.7899,
+      "step": 19250
+    },
+    {
+      "epoch": 6.04,
+      "learning_rate": 3.958594730238394e-06,
+      "loss": 1.8671,
+      "step": 19260
+    },
+    {
+      "epoch": 6.04,
+      "learning_rate": 3.955457967377667e-06,
+      "loss": 1.7892,
+      "step": 19270
+    },
+    {
+      "epoch": 6.05,
+      "learning_rate": 3.952321204516939e-06,
+      "loss": 1.8795,
+      "step": 19280
+    },
+    {
+      "epoch": 6.05,
+      "learning_rate": 3.949184441656211e-06,
+      "loss": 1.8955,
+      "step": 19290
+    },
+    {
+      "epoch": 6.05,
+      "learning_rate": 3.946047678795483e-06,
+      "loss": 1.8663,
+      "step": 19300
+    },
+    {
+      "epoch": 6.06,
+      "learning_rate": 3.942910915934756e-06,
+      "loss": 1.8066,
+      "step": 19310
+    },
+    {
+      "epoch": 6.06,
+      "learning_rate": 3.939774153074028e-06,
+      "loss": 1.8993,
+      "step": 19320
+    },
+    {
+      "epoch": 6.06,
+      "learning_rate": 3.936637390213301e-06,
+      "loss": 1.9433,
+      "step": 19330
+    },
+    {
+      "epoch": 6.07,
+      "learning_rate": 3.933500627352572e-06,
+      "loss": 1.8686,
+      "step": 19340
+    },
+    {
+      "epoch": 6.07,
+      "learning_rate": 3.930363864491845e-06,
+      "loss": 1.8731,
+      "step": 19350
+    },
+    {
+      "epoch": 6.07,
+      "learning_rate": 3.927227101631117e-06,
+      "loss": 1.8142,
+      "step": 19360
+    },
+    {
+      "epoch": 6.08,
+      "learning_rate": 3.92409033877039e-06,
+      "loss": 1.8284,
+      "step": 19370
+    },
+    {
+      "epoch": 6.08,
+      "learning_rate": 3.920953575909662e-06,
+      "loss": 1.8389,
+      "step": 19380
+    },
+    {
+      "epoch": 6.08,
+      "learning_rate": 3.917816813048934e-06,
+      "loss": 1.8045,
+      "step": 19390
+    },
+    {
+      "epoch": 6.09,
+      "learning_rate": 3.914680050188206e-06,
+      "loss": 1.9117,
+      "step": 19400
+    },
+    {
+      "epoch": 6.09,
+      "learning_rate": 3.911543287327478e-06,
+      "loss": 1.7794,
+      "step": 19410
+    },
+    {
+      "epoch": 6.09,
+      "learning_rate": 3.908406524466751e-06,
+      "loss": 1.7901,
+      "step": 19420
+    },
+    {
+      "epoch": 6.09,
+      "learning_rate": 3.905269761606023e-06,
+      "loss": 1.8203,
+      "step": 19430
+    },
+    {
+      "epoch": 6.1,
+      "learning_rate": 3.902132998745295e-06,
+      "loss": 1.8181,
+      "step": 19440
+    },
+    {
+      "epoch": 6.1,
+      "learning_rate": 3.898996235884567e-06,
+      "loss": 1.8661,
+      "step": 19450
+    },
+    {
+      "epoch": 6.1,
+      "learning_rate": 3.89585947302384e-06,
+      "loss": 1.838,
+      "step": 19460
+    },
+    {
+      "epoch": 6.11,
+      "learning_rate": 3.892722710163112e-06,
+      "loss": 1.8502,
+      "step": 19470
+    },
+    {
+      "epoch": 6.11,
+      "learning_rate": 3.889585947302385e-06,
+      "loss": 1.8204,
+      "step": 19480
+    },
+    {
+      "epoch": 6.11,
+      "learning_rate": 3.886449184441656e-06,
+      "loss": 1.8692,
+      "step": 19490
+    },
+    {
+      "epoch": 6.12,
+      "learning_rate": 3.883312421580929e-06,
+      "loss": 1.8647,
+      "step": 19500
+    },
+    {
+      "epoch": 6.12,
+      "learning_rate": 3.880175658720201e-06,
+      "loss": 1.8123,
+      "step": 19510
+    },
+    {
+      "epoch": 6.12,
+      "learning_rate": 3.877038895859474e-06,
+      "loss": 1.832,
+      "step": 19520
+    },
+    {
+      "epoch": 6.13,
+      "learning_rate": 3.873902132998746e-06,
+      "loss": 1.8847,
+      "step": 19530
+    },
+    {
+      "epoch": 6.13,
+      "learning_rate": 3.870765370138018e-06,
+      "loss": 1.8446,
+      "step": 19540
+    },
+    {
+      "epoch": 6.13,
+      "learning_rate": 3.86762860727729e-06,
+      "loss": 1.8965,
+      "step": 19550
+    },
+    {
+      "epoch": 6.14,
+      "learning_rate": 3.864491844416563e-06,
+      "loss": 1.873,
+      "step": 19560
+    },
+    {
+      "epoch": 6.14,
+      "learning_rate": 3.861355081555835e-06,
+      "loss": 1.8918,
+      "step": 19570
+    },
+    {
+      "epoch": 6.14,
+      "learning_rate": 3.858218318695107e-06,
+      "loss": 1.8685,
+      "step": 19580
+    },
+    {
+      "epoch": 6.14,
+      "learning_rate": 3.855081555834379e-06,
+      "loss": 1.794,
+      "step": 19590
+    },
+    {
+      "epoch": 6.15,
+      "learning_rate": 3.851944792973651e-06,
+      "loss": 1.8445,
+      "step": 19600
+    },
+    {
+      "epoch": 6.15,
+      "learning_rate": 3.848808030112924e-06,
+      "loss": 1.906,
+      "step": 19610
+    },
+    {
+      "epoch": 6.15,
+      "learning_rate": 3.845671267252196e-06,
+      "loss": 1.8826,
+      "step": 19620
+    },
+    {
+      "epoch": 6.16,
+      "learning_rate": 3.842534504391469e-06,
+      "loss": 1.7995,
+      "step": 19630
+    },
+    {
+      "epoch": 6.16,
+      "learning_rate": 3.83939774153074e-06,
+      "loss": 1.8395,
+      "step": 19640
+    },
+    {
+      "epoch": 6.16,
+      "learning_rate": 3.836260978670013e-06,
+      "loss": 1.7938,
+      "step": 19650
+    },
+    {
+      "epoch": 6.17,
+      "learning_rate": 3.833124215809285e-06,
+      "loss": 1.8405,
+      "step": 19660
+    },
+    {
+      "epoch": 6.17,
+      "learning_rate": 3.829987452948558e-06,
+      "loss": 1.8978,
+      "step": 19670
+    },
+    {
+      "epoch": 6.17,
+      "learning_rate": 3.82685069008783e-06,
+      "loss": 1.8358,
+      "step": 19680
+    },
+    {
+      "epoch": 6.18,
+      "learning_rate": 3.823713927227102e-06,
+      "loss": 1.8231,
+      "step": 19690
+    },
+    {
+      "epoch": 6.18,
+      "learning_rate": 3.820577164366374e-06,
+      "loss": 1.8856,
+      "step": 19700
+    },
+    {
+      "epoch": 6.18,
+      "learning_rate": 3.817440401505647e-06,
+      "loss": 1.833,
+      "step": 19710
+    },
+    {
+      "epoch": 6.19,
+      "learning_rate": 3.814303638644919e-06,
+      "loss": 1.7791,
+      "step": 19720
+    },
+    {
+      "epoch": 6.19,
+      "learning_rate": 3.8111668757841906e-06,
+      "loss": 1.8833,
+      "step": 19730
+    },
+    {
+      "epoch": 6.19,
+      "learning_rate": 3.808030112923463e-06,
+      "loss": 1.8569,
+      "step": 19740
+    },
+    {
+      "epoch": 6.2,
+      "learning_rate": 3.8048933500627355e-06,
+      "loss": 1.8558,
+      "step": 19750
+    },
+    {
+      "epoch": 6.2,
+      "learning_rate": 3.801756587202008e-06,
+      "loss": 1.7897,
+      "step": 19760
+    },
+    {
+      "epoch": 6.2,
+      "learning_rate": 3.7986198243412804e-06,
+      "loss": 1.7784,
+      "step": 19770
+    },
+    {
+      "epoch": 6.2,
+      "learning_rate": 3.795483061480552e-06,
+      "loss": 1.8789,
+      "step": 19780
+    },
+    {
+      "epoch": 6.21,
+      "learning_rate": 3.7923462986198245e-06,
+      "loss": 1.8205,
+      "step": 19790
+    },
+    {
+      "epoch": 6.21,
+      "learning_rate": 3.789209535759097e-06,
+      "loss": 1.7749,
+      "step": 19800
+    },
+    {
+      "epoch": 6.21,
+      "learning_rate": 3.7860727728983695e-06,
+      "loss": 1.896,
+      "step": 19810
+    },
+    {
+      "epoch": 6.22,
+      "learning_rate": 3.7829360100376415e-06,
+      "loss": 1.8102,
+      "step": 19820
+    },
+    {
+      "epoch": 6.22,
+      "learning_rate": 3.7797992471769136e-06,
+      "loss": 1.8213,
+      "step": 19830
+    },
+    {
+      "epoch": 6.22,
+      "learning_rate": 3.776662484316186e-06,
+      "loss": 1.8603,
+      "step": 19840
+    },
+    {
+      "epoch": 6.23,
+      "learning_rate": 3.773525721455458e-06,
+      "loss": 1.8553,
+      "step": 19850
+    },
+    {
+      "epoch": 6.23,
+      "learning_rate": 3.7703889585947305e-06,
+      "loss": 1.8214,
+      "step": 19860
+    },
+    {
+      "epoch": 6.23,
+      "learning_rate": 3.767252195734003e-06,
+      "loss": 1.8503,
+      "step": 19870
+    },
+    {
+      "epoch": 6.24,
+      "learning_rate": 3.764115432873275e-06,
+      "loss": 1.8402,
+      "step": 19880
+    },
+    {
+      "epoch": 6.24,
+      "learning_rate": 3.760978670012547e-06,
+      "loss": 1.8608,
+      "step": 19890
+    },
+    {
+      "epoch": 6.24,
+      "learning_rate": 3.7578419071518196e-06,
+      "loss": 1.8155,
+      "step": 19900
+    },
+    {
+      "epoch": 6.25,
+      "learning_rate": 3.754705144291092e-06,
+      "loss": 1.8343,
+      "step": 19910
+    },
+    {
+      "epoch": 6.25,
+      "learning_rate": 3.7515683814303645e-06,
+      "loss": 1.799,
+      "step": 19920
+    },
+    {
+      "epoch": 6.25,
+      "learning_rate": 3.748431618569636e-06,
+      "loss": 1.8569,
+      "step": 19930
+    },
+    {
+      "epoch": 6.25,
+      "learning_rate": 3.7452948557089086e-06,
+      "loss": 1.866,
+      "step": 19940
+    },
+    {
+      "epoch": 6.26,
+      "learning_rate": 3.742158092848181e-06,
+      "loss": 1.8655,
+      "step": 19950
+    },
+    {
+      "epoch": 6.26,
+      "learning_rate": 3.7390213299874535e-06,
+      "loss": 1.8749,
+      "step": 19960
+    },
+    {
+      "epoch": 6.26,
+      "learning_rate": 3.7358845671267256e-06,
+      "loss": 1.8648,
+      "step": 19970
+    },
+    {
+      "epoch": 6.27,
+      "learning_rate": 3.7327478042659976e-06,
+      "loss": 1.8451,
+      "step": 19980
+    },
+    {
+      "epoch": 6.27,
+      "learning_rate": 3.72961104140527e-06,
+      "loss": 1.851,
+      "step": 19990
+    },
+    {
+      "epoch": 6.27,
+      "learning_rate": 3.7264742785445425e-06,
+      "loss": 1.8748,
+      "step": 20000
+    },
+    {
+      "epoch": 6.28,
+      "learning_rate": 3.7233375156838146e-06,
+      "loss": 1.8593,
+      "step": 20010
+    },
+    {
+      "epoch": 6.28,
+      "learning_rate": 3.720200752823087e-06,
+      "loss": 1.8805,
+      "step": 20020
+    },
+    {
+      "epoch": 6.28,
+      "learning_rate": 3.717063989962359e-06,
+      "loss": 1.8956,
+      "step": 20030
+    },
+    {
+      "epoch": 6.29,
+      "learning_rate": 3.713927227101631e-06,
+      "loss": 1.7666,
+      "step": 20040
+    },
+    {
+      "epoch": 6.29,
+      "learning_rate": 3.7107904642409036e-06,
+      "loss": 1.8467,
+      "step": 20050
+    },
+    {
+      "epoch": 6.29,
+      "learning_rate": 3.707653701380176e-06,
+      "loss": 1.8006,
+      "step": 20060
+    },
+    {
+      "epoch": 6.3,
+      "learning_rate": 3.7045169385194486e-06,
+      "loss": 1.857,
+      "step": 20070
+    },
+    {
+      "epoch": 6.3,
+      "learning_rate": 3.70138017565872e-06,
+      "loss": 1.8936,
+      "step": 20080
+    },
+    {
+      "epoch": 6.3,
+      "learning_rate": 3.6982434127979926e-06,
+      "loss": 1.878,
+      "step": 20090
+    },
+    {
+      "epoch": 6.3,
+      "learning_rate": 3.695106649937265e-06,
+      "loss": 1.8468,
+      "step": 20100
+    },
+    {
+      "epoch": 6.31,
+      "learning_rate": 3.6919698870765376e-06,
+      "loss": 1.8623,
+      "step": 20110
+    },
+    {
+      "epoch": 6.31,
+      "learning_rate": 3.68883312421581e-06,
+      "loss": 1.8862,
+      "step": 20120
+    },
+    {
+      "epoch": 6.31,
+      "learning_rate": 3.6856963613550817e-06,
+      "loss": 1.919,
+      "step": 20130
+    },
+    {
+      "epoch": 6.32,
+      "learning_rate": 3.682559598494354e-06,
+      "loss": 1.8015,
+      "step": 20140
+    },
+    {
+      "epoch": 6.32,
+      "learning_rate": 3.6794228356336266e-06,
+      "loss": 1.9012,
+      "step": 20150
+    },
+    {
+      "epoch": 6.32,
+      "learning_rate": 3.6762860727728987e-06,
+      "loss": 1.7934,
+      "step": 20160
+    },
+    {
+      "epoch": 6.33,
+      "learning_rate": 3.6731493099121707e-06,
+      "loss": 1.8981,
+      "step": 20170
+    },
+    {
+      "epoch": 6.33,
+      "learning_rate": 3.670012547051443e-06,
+      "loss": 1.829,
+      "step": 20180
+    },
+    {
+      "epoch": 6.33,
+      "learning_rate": 3.6668757841907156e-06,
+      "loss": 1.881,
+      "step": 20190
+    },
+    {
+      "epoch": 6.34,
+      "learning_rate": 3.6637390213299877e-06,
+      "loss": 1.8448,
+      "step": 20200
+    },
+    {
+      "epoch": 6.34,
+      "learning_rate": 3.66060225846926e-06,
+      "loss": 1.9271,
+      "step": 20210
+    },
+    {
+      "epoch": 6.34,
+      "learning_rate": 3.657465495608532e-06,
+      "loss": 1.8187,
+      "step": 20220
+    },
+    {
+      "epoch": 6.35,
+      "learning_rate": 3.6543287327478042e-06,
+      "loss": 1.9268,
+      "step": 20230
+    },
+    {
+      "epoch": 6.35,
+      "learning_rate": 3.6511919698870767e-06,
+      "loss": 1.8618,
+      "step": 20240
+    },
+    {
+      "epoch": 6.35,
+      "learning_rate": 3.648055207026349e-06,
+      "loss": 1.9267,
+      "step": 20250
+    },
+    {
+      "epoch": 6.36,
+      "learning_rate": 3.6449184441656216e-06,
+      "loss": 1.8365,
+      "step": 20260
+    },
+    {
+      "epoch": 6.36,
+      "learning_rate": 3.6417816813048933e-06,
+      "loss": 1.872,
+      "step": 20270
+    },
+    {
+      "epoch": 6.36,
+      "learning_rate": 3.6386449184441657e-06,
+      "loss": 1.9315,
+      "step": 20280
+    },
+    {
+      "epoch": 6.36,
+      "learning_rate": 3.635508155583438e-06,
+      "loss": 1.8915,
+      "step": 20290
+    },
+    {
+      "epoch": 6.37,
+      "learning_rate": 3.6323713927227107e-06,
+      "loss": 1.8635,
+      "step": 20300
+    },
+    {
+      "epoch": 6.37,
+      "learning_rate": 3.629234629861983e-06,
+      "loss": 1.8777,
+      "step": 20310
+    },
+    {
+      "epoch": 6.37,
+      "learning_rate": 3.6260978670012548e-06,
+      "loss": 1.9139,
+      "step": 20320
+    },
+    {
+      "epoch": 6.38,
+      "learning_rate": 3.6229611041405272e-06,
+      "loss": 1.7894,
+      "step": 20330
+    },
+    {
+      "epoch": 6.38,
+      "learning_rate": 3.6198243412797997e-06,
+      "loss": 1.8212,
+      "step": 20340
+    },
+    {
+      "epoch": 6.38,
+      "learning_rate": 3.6166875784190717e-06,
+      "loss": 1.8739,
+      "step": 20350
+    },
+    {
+      "epoch": 6.39,
+      "learning_rate": 3.6135508155583442e-06,
+      "loss": 1.8713,
+      "step": 20360
+    },
+    {
+      "epoch": 6.39,
+      "learning_rate": 3.6104140526976163e-06,
+      "loss": 1.9083,
+      "step": 20370
+    },
+    {
+      "epoch": 6.39,
+      "learning_rate": 3.6072772898368887e-06,
+      "loss": 1.8237,
+      "step": 20380
+    },
+    {
+      "epoch": 6.4,
+      "learning_rate": 3.6041405269761608e-06,
+      "loss": 1.8525,
+      "step": 20390
+    },
+    {
+      "epoch": 6.4,
+      "learning_rate": 3.6010037641154332e-06,
+      "loss": 1.9006,
+      "step": 20400
+    },
+    {
+      "epoch": 6.4,
+      "learning_rate": 3.5978670012547057e-06,
+      "loss": 1.7635,
+      "step": 20410
+    },
+    {
+      "epoch": 6.41,
+      "learning_rate": 3.5947302383939773e-06,
+      "loss": 1.8394,
+      "step": 20420
+    },
+    {
+      "epoch": 6.41,
+      "learning_rate": 3.59159347553325e-06,
+      "loss": 1.8701,
+      "step": 20430
+    },
+    {
+      "epoch": 6.41,
+      "learning_rate": 3.5884567126725223e-06,
+      "loss": 1.8752,
+      "step": 20440
+    },
+    {
+      "epoch": 6.41,
+      "learning_rate": 3.5853199498117947e-06,
+      "loss": 1.8211,
+      "step": 20450
+    },
+    {
+      "epoch": 6.42,
+      "learning_rate": 3.582183186951067e-06,
+      "loss": 1.8772,
+      "step": 20460
+    },
+    {
+      "epoch": 6.42,
+      "learning_rate": 3.579046424090339e-06,
+      "loss": 1.849,
+      "step": 20470
+    },
+    {
+      "epoch": 6.42,
+      "learning_rate": 3.5759096612296113e-06,
+      "loss": 1.8903,
+      "step": 20480
+    },
+    {
+      "epoch": 6.43,
+      "learning_rate": 3.5727728983688838e-06,
+      "loss": 1.8642,
+      "step": 20490
+    },
+    {
+      "epoch": 6.43,
+      "learning_rate": 3.5696361355081562e-06,
+      "loss": 1.9196,
+      "step": 20500
+    },
+    {
+      "epoch": 6.43,
+      "learning_rate": 3.5664993726474283e-06,
+      "loss": 1.7683,
+      "step": 20510
+    },
+    {
+      "epoch": 6.44,
+      "learning_rate": 3.5633626097867003e-06,
+      "loss": 1.8828,
+      "step": 20520
+    },
+    {
+      "epoch": 6.44,
+      "learning_rate": 3.560225846925973e-06,
+      "loss": 1.828,
+      "step": 20530
+    },
+    {
+      "epoch": 6.44,
+      "learning_rate": 3.557089084065245e-06,
+      "loss": 1.8525,
+      "step": 20540
+    },
+    {
+      "epoch": 6.45,
+      "learning_rate": 3.5539523212045173e-06,
+      "loss": 1.8449,
+      "step": 20550
+    },
+    {
+      "epoch": 6.45,
+      "learning_rate": 3.5508155583437898e-06,
+      "loss": 1.8893,
+      "step": 20560
+    },
+    {
+      "epoch": 6.45,
+      "learning_rate": 3.5476787954830614e-06,
+      "loss": 1.854,
+      "step": 20570
+    },
+    {
+      "epoch": 6.46,
+      "learning_rate": 3.544542032622334e-06,
+      "loss": 1.8377,
+      "step": 20580
+    },
+    {
+      "epoch": 6.46,
+      "learning_rate": 3.5414052697616063e-06,
+      "loss": 1.813,
+      "step": 20590
+    },
+    {
+      "epoch": 6.46,
+      "learning_rate": 3.538268506900879e-06,
+      "loss": 1.8363,
+      "step": 20600
+    },
+    {
+      "epoch": 6.46,
+      "learning_rate": 3.5351317440401504e-06,
+      "loss": 1.8302,
+      "step": 20610
+    },
+    {
+      "epoch": 6.47,
+      "learning_rate": 3.531994981179423e-06,
+      "loss": 1.9071,
+      "step": 20620
+    },
+    {
+      "epoch": 6.47,
+      "learning_rate": 3.5288582183186954e-06,
+      "loss": 1.8615,
+      "step": 20630
+    },
+    {
+      "epoch": 6.47,
+      "learning_rate": 3.525721455457968e-06,
+      "loss": 1.8583,
+      "step": 20640
+    },
+    {
+      "epoch": 6.48,
+      "learning_rate": 3.5225846925972403e-06,
+      "loss": 1.8154,
+      "step": 20650
+    },
+    {
+      "epoch": 6.48,
+      "learning_rate": 3.519447929736512e-06,
+      "loss": 1.8424,
+      "step": 20660
+    },
+    {
+      "epoch": 6.48,
+      "learning_rate": 3.5163111668757844e-06,
+      "loss": 1.8462,
+      "step": 20670
+    },
+    {
+      "epoch": 6.49,
+      "learning_rate": 3.513174404015057e-06,
+      "loss": 1.9179,
+      "step": 20680
+    },
+    {
+      "epoch": 6.49,
+      "learning_rate": 3.510037641154329e-06,
+      "loss": 1.8476,
+      "step": 20690
+    },
+    {
+      "epoch": 6.49,
+      "learning_rate": 3.5069008782936014e-06,
+      "loss": 1.8685,
+      "step": 20700
+    },
+    {
+      "epoch": 6.5,
+      "learning_rate": 3.5037641154328734e-06,
+      "loss": 1.845,
+      "step": 20710
+    },
+    {
+      "epoch": 6.5,
+      "learning_rate": 3.500627352572146e-06,
+      "loss": 1.8744,
+      "step": 20720
+    },
+    {
+      "epoch": 6.5,
+      "learning_rate": 3.497490589711418e-06,
+      "loss": 1.9114,
+      "step": 20730
+    },
+    {
+      "epoch": 6.51,
+      "learning_rate": 3.4943538268506904e-06,
+      "loss": 1.8563,
+      "step": 20740
+    },
+    {
+      "epoch": 6.51,
+      "learning_rate": 3.491217063989963e-06,
+      "loss": 1.866,
+      "step": 20750
+    },
+    {
+      "epoch": 6.51,
+      "learning_rate": 3.4880803011292345e-06,
+      "loss": 1.939,
+      "step": 20760
+    },
+    {
+      "epoch": 6.52,
+      "learning_rate": 3.484943538268507e-06,
+      "loss": 1.8258,
+      "step": 20770
+    },
+    {
+      "epoch": 6.52,
+      "learning_rate": 3.4818067754077794e-06,
+      "loss": 1.903,
+      "step": 20780
+    },
+    {
+      "epoch": 6.52,
+      "learning_rate": 3.478670012547052e-06,
+      "loss": 1.8604,
+      "step": 20790
+    },
+    {
+      "epoch": 6.52,
+      "learning_rate": 3.4755332496863244e-06,
+      "loss": 1.7805,
+      "step": 20800
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 3.472396486825596e-06,
+      "loss": 1.8821,
+      "step": 20810
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 3.4692597239648684e-06,
+      "loss": 1.8822,
+      "step": 20820
+    },
+    {
+      "epoch": 6.53,
+      "learning_rate": 3.466122961104141e-06,
+      "loss": 1.8069,
+      "step": 20830
+    },
+    {
+      "epoch": 6.54,
+      "learning_rate": 3.4629861982434134e-06,
+      "loss": 1.8125,
+      "step": 20840
+    },
+    {
+      "epoch": 6.54,
+      "learning_rate": 3.4598494353826854e-06,
+      "loss": 1.9261,
+      "step": 20850
+    },
+    {
+      "epoch": 6.54,
+      "learning_rate": 3.4567126725219575e-06,
+      "loss": 1.7982,
+      "step": 20860
+    },
+    {
+      "epoch": 6.55,
+      "learning_rate": 3.45357590966123e-06,
+      "loss": 1.9647,
+      "step": 20870
+    },
+    {
+      "epoch": 6.55,
+      "learning_rate": 3.450439146800502e-06,
+      "loss": 1.7362,
+      "step": 20880
+    },
+    {
+      "epoch": 6.55,
+      "learning_rate": 3.4473023839397745e-06,
+      "loss": 1.828,
+      "step": 20890
+    },
+    {
+      "epoch": 6.56,
+      "learning_rate": 3.444165621079047e-06,
+      "loss": 1.8861,
+      "step": 20900
+    },
+    {
+      "epoch": 6.56,
+      "learning_rate": 3.441028858218319e-06,
+      "loss": 1.9139,
+      "step": 20910
+    },
+    {
+      "epoch": 6.56,
+      "learning_rate": 3.437892095357591e-06,
+      "loss": 1.866,
+      "step": 20920
+    },
+    {
+      "epoch": 6.57,
+      "learning_rate": 3.4347553324968635e-06,
+      "loss": 1.7982,
+      "step": 20930
+    },
+    {
+      "epoch": 6.57,
+      "learning_rate": 3.431618569636136e-06,
+      "loss": 1.884,
+      "step": 20940
+    },
+    {
+      "epoch": 6.57,
+      "learning_rate": 3.4284818067754084e-06,
+      "loss": 1.7966,
+      "step": 20950
+    },
+    {
+      "epoch": 6.57,
+      "learning_rate": 3.42534504391468e-06,
+      "loss": 1.9128,
+      "step": 20960
+    },
+    {
+      "epoch": 6.58,
+      "learning_rate": 3.4222082810539525e-06,
+      "loss": 1.8859,
+      "step": 20970
+    },
+    {
+      "epoch": 6.58,
+      "learning_rate": 3.419071518193225e-06,
+      "loss": 1.8191,
+      "step": 20980
+    },
+    {
+      "epoch": 6.58,
+      "learning_rate": 3.4159347553324974e-06,
+      "loss": 1.8634,
+      "step": 20990
+    },
+    {
+      "epoch": 6.59,
+      "learning_rate": 3.4127979924717695e-06,
+      "loss": 1.8503,
+      "step": 21000
+    },
+    {
+      "epoch": 6.59,
+      "learning_rate": 3.4096612296110415e-06,
+      "loss": 1.9214,
+      "step": 21010
+    },
+    {
+      "epoch": 6.59,
+      "learning_rate": 3.406524466750314e-06,
+      "loss": 1.9069,
+      "step": 21020
+    },
+    {
+      "epoch": 6.6,
+      "learning_rate": 3.4033877038895865e-06,
+      "loss": 1.8755,
+      "step": 21030
+    },
+    {
+      "epoch": 6.6,
+      "learning_rate": 3.4002509410288585e-06,
+      "loss": 1.899,
+      "step": 21040
+    },
+    {
+      "epoch": 6.6,
+      "learning_rate": 3.3971141781681306e-06,
+      "loss": 1.8116,
+      "step": 21050
+    },
+    {
+      "epoch": 6.61,
+      "learning_rate": 3.393977415307403e-06,
+      "loss": 1.838,
+      "step": 21060
+    },
+    {
+      "epoch": 6.61,
+      "learning_rate": 3.390840652446675e-06,
+      "loss": 1.7901,
+      "step": 21070
+    },
+    {
+      "epoch": 6.61,
+      "learning_rate": 3.3877038895859475e-06,
+      "loss": 1.7779,
+      "step": 21080
+    },
+    {
+      "epoch": 6.62,
+      "learning_rate": 3.38456712672522e-06,
+      "loss": 1.869,
+      "step": 21090
+    },
+    {
+      "epoch": 6.62,
+      "learning_rate": 3.381430363864492e-06,
+      "loss": 1.8738,
+      "step": 21100
+    },
+    {
+      "epoch": 6.62,
+      "learning_rate": 3.378293601003764e-06,
+      "loss": 1.8513,
+      "step": 21110
+    },
+    {
+      "epoch": 6.62,
+      "learning_rate": 3.3751568381430366e-06,
+      "loss": 1.8699,
+      "step": 21120
+    },
+    {
+      "epoch": 6.63,
+      "learning_rate": 3.372020075282309e-06,
+      "loss": 1.9343,
+      "step": 21130
+    },
+    {
+      "epoch": 6.63,
+      "learning_rate": 3.3688833124215815e-06,
+      "loss": 1.8008,
+      "step": 21140
+    },
+    {
+      "epoch": 6.63,
+      "learning_rate": 3.365746549560853e-06,
+      "loss": 1.8803,
+      "step": 21150
+    },
+    {
+      "epoch": 6.64,
+      "learning_rate": 3.3626097867001256e-06,
+      "loss": 1.8176,
+      "step": 21160
+    },
+    {
+      "epoch": 6.64,
+      "learning_rate": 3.359473023839398e-06,
+      "loss": 1.855,
+      "step": 21170
+    },
+    {
+      "epoch": 6.64,
+      "learning_rate": 3.3563362609786705e-06,
+      "loss": 1.8621,
+      "step": 21180
+    },
+    {
+      "epoch": 6.65,
+      "learning_rate": 3.3531994981179426e-06,
+      "loss": 1.8497,
+      "step": 21190
+    },
+    {
+      "epoch": 6.65,
+      "learning_rate": 3.3500627352572146e-06,
+      "loss": 1.7359,
+      "step": 21200
+    },
+    {
+      "epoch": 6.65,
+      "learning_rate": 3.346925972396487e-06,
+      "loss": 1.8435,
+      "step": 21210
+    },
+    {
+      "epoch": 6.66,
+      "learning_rate": 3.3437892095357596e-06,
+      "loss": 1.6913,
+      "step": 21220
+    },
+    {
+      "epoch": 6.66,
+      "learning_rate": 3.3406524466750316e-06,
+      "loss": 1.924,
+      "step": 21230
+    },
+    {
+      "epoch": 6.66,
+      "learning_rate": 3.337515683814304e-06,
+      "loss": 1.8733,
+      "step": 21240
+    },
+    {
+      "epoch": 6.67,
+      "learning_rate": 3.334378920953576e-06,
+      "loss": 1.8878,
+      "step": 21250
+    },
+    {
+      "epoch": 6.67,
+      "learning_rate": 3.331242158092848e-06,
+      "loss": 1.8516,
+      "step": 21260
+    },
+    {
+      "epoch": 6.67,
+      "learning_rate": 3.3281053952321206e-06,
+      "loss": 1.8499,
+      "step": 21270
+    },
+    {
+      "epoch": 6.68,
+      "learning_rate": 3.324968632371393e-06,
+      "loss": 1.8517,
+      "step": 21280
+    },
+    {
+      "epoch": 6.68,
+      "learning_rate": 3.3218318695106656e-06,
+      "loss": 1.8365,
+      "step": 21290
+    },
+    {
+      "epoch": 6.68,
+      "learning_rate": 3.318695106649937e-06,
+      "loss": 1.8175,
+      "step": 21300
+    },
+    {
+      "epoch": 6.68,
+      "learning_rate": 3.3155583437892097e-06,
+      "loss": 1.8422,
+      "step": 21310
+    },
+    {
+      "epoch": 6.69,
+      "learning_rate": 3.312421580928482e-06,
+      "loss": 1.7359,
+      "step": 21320
+    },
+    {
+      "epoch": 6.69,
+      "learning_rate": 3.3092848180677546e-06,
+      "loss": 1.8765,
+      "step": 21330
+    },
+    {
+      "epoch": 6.69,
+      "learning_rate": 3.306148055207027e-06,
+      "loss": 1.8147,
+      "step": 21340
+    },
+    {
+      "epoch": 6.7,
+      "learning_rate": 3.3030112923462987e-06,
+      "loss": 1.9332,
+      "step": 21350
+    },
+    {
+      "epoch": 6.7,
+      "learning_rate": 3.299874529485571e-06,
+      "loss": 1.8503,
+      "step": 21360
+    },
+    {
+      "epoch": 6.7,
+      "learning_rate": 3.2967377666248436e-06,
+      "loss": 1.9164,
+      "step": 21370
+    },
+    {
+      "epoch": 6.71,
+      "learning_rate": 3.2936010037641157e-06,
+      "loss": 1.8094,
+      "step": 21380
+    },
+    {
+      "epoch": 6.71,
+      "learning_rate": 3.290464240903388e-06,
+      "loss": 1.8952,
+      "step": 21390
+    },
+    {
+      "epoch": 6.71,
+      "learning_rate": 3.28732747804266e-06,
+      "loss": 1.9792,
+      "step": 21400
+    },
+    {
+      "epoch": 6.72,
+      "learning_rate": 3.2841907151819326e-06,
+      "loss": 1.8349,
+      "step": 21410
+    },
+    {
+      "epoch": 6.72,
+      "learning_rate": 3.2810539523212047e-06,
+      "loss": 1.8463,
+      "step": 21420
+    },
+    {
+      "epoch": 6.72,
+      "learning_rate": 3.277917189460477e-06,
+      "loss": 1.8696,
+      "step": 21430
+    },
+    {
+      "epoch": 6.73,
+      "learning_rate": 3.2747804265997496e-06,
+      "loss": 1.8152,
+      "step": 21440
+    },
+    {
+      "epoch": 6.73,
+      "learning_rate": 3.2716436637390213e-06,
+      "loss": 1.8882,
+      "step": 21450
+    },
+    {
+      "epoch": 6.73,
+      "learning_rate": 3.2685069008782937e-06,
+      "loss": 1.8587,
+      "step": 21460
+    },
+    {
+      "epoch": 6.73,
+      "learning_rate": 3.265370138017566e-06,
+      "loss": 1.8516,
+      "step": 21470
+    },
+    {
+      "epoch": 6.74,
+      "learning_rate": 3.2622333751568387e-06,
+      "loss": 1.84,
+      "step": 21480
+    },
+    {
+      "epoch": 6.74,
+      "learning_rate": 3.2590966122961103e-06,
+      "loss": 1.8502,
+      "step": 21490
+    },
+    {
+      "epoch": 6.74,
+      "learning_rate": 3.2559598494353827e-06,
+      "loss": 1.9171,
+      "step": 21500
+    },
+    {
+      "epoch": 6.75,
+      "learning_rate": 3.2528230865746552e-06,
+      "loss": 1.8936,
+      "step": 21510
+    },
+    {
+      "epoch": 6.75,
+      "learning_rate": 3.2496863237139277e-06,
+      "loss": 1.8184,
+      "step": 21520
+    },
+    {
+      "epoch": 6.75,
+      "learning_rate": 3.2465495608532e-06,
+      "loss": 1.852,
+      "step": 21530
+    },
+    {
+      "epoch": 6.76,
+      "learning_rate": 3.2434127979924718e-06,
+      "loss": 1.7877,
+      "step": 21540
+    },
+    {
+      "epoch": 6.76,
+      "learning_rate": 3.2402760351317442e-06,
+      "loss": 1.8432,
+      "step": 21550
+    },
+    {
+      "epoch": 6.76,
+      "learning_rate": 3.2371392722710167e-06,
+      "loss": 1.8481,
+      "step": 21560
+    },
+    {
+      "epoch": 6.77,
+      "learning_rate": 3.2340025094102888e-06,
+      "loss": 1.856,
+      "step": 21570
+    },
+    {
+      "epoch": 6.77,
+      "learning_rate": 3.2308657465495612e-06,
+      "loss": 1.8798,
+      "step": 21580
+    },
+    {
+      "epoch": 6.77,
+      "learning_rate": 3.2277289836888333e-06,
+      "loss": 1.8807,
+      "step": 21590
+    },
+    {
+      "epoch": 6.78,
+      "learning_rate": 3.2245922208281057e-06,
+      "loss": 1.8887,
+      "step": 21600
+    },
+    {
+      "epoch": 6.78,
+      "learning_rate": 3.2214554579673778e-06,
+      "loss": 1.8997,
+      "step": 21610
+    },
+    {
+      "epoch": 6.78,
+      "learning_rate": 3.2183186951066502e-06,
+      "loss": 1.8024,
+      "step": 21620
+    },
+    {
+      "epoch": 6.78,
+      "learning_rate": 3.2151819322459227e-06,
+      "loss": 1.8396,
+      "step": 21630
+    },
+    {
+      "epoch": 6.79,
+      "learning_rate": 3.2120451693851943e-06,
+      "loss": 1.8613,
+      "step": 21640
+    },
+    {
+      "epoch": 6.79,
+      "learning_rate": 3.208908406524467e-06,
+      "loss": 1.8002,
+      "step": 21650
+    },
+    {
+      "epoch": 6.79,
+      "learning_rate": 3.2057716436637393e-06,
+      "loss": 1.8375,
+      "step": 21660
+    },
+    {
+      "epoch": 6.8,
+      "learning_rate": 3.2026348808030117e-06,
+      "loss": 1.8506,
+      "step": 21670
+    },
+    {
+      "epoch": 6.8,
+      "learning_rate": 3.1994981179422842e-06,
+      "loss": 1.7801,
+      "step": 21680
+    },
+    {
+      "epoch": 6.8,
+      "learning_rate": 3.196361355081556e-06,
+      "loss": 1.8555,
+      "step": 21690
+    },
+    {
+      "epoch": 6.81,
+      "learning_rate": 3.1932245922208283e-06,
+      "loss": 1.9085,
+      "step": 21700
+    },
+    {
+      "epoch": 6.81,
+      "learning_rate": 3.1900878293601008e-06,
+      "loss": 1.8794,
+      "step": 21710
+    },
+    {
+      "epoch": 6.81,
+      "learning_rate": 3.186951066499373e-06,
+      "loss": 1.8402,
+      "step": 21720
+    },
+    {
+      "epoch": 6.82,
+      "learning_rate": 3.1838143036386453e-06,
+      "loss": 1.8715,
+      "step": 21730
+    },
+    {
+      "epoch": 6.82,
+      "learning_rate": 3.1806775407779173e-06,
+      "loss": 1.8458,
+      "step": 21740
+    },
+    {
+      "epoch": 6.82,
+      "learning_rate": 3.17754077791719e-06,
+      "loss": 1.866,
+      "step": 21750
+    },
+    {
+      "epoch": 6.83,
+      "learning_rate": 3.174404015056462e-06,
+      "loss": 1.8352,
+      "step": 21760
+    },
+    {
+      "epoch": 6.83,
+      "learning_rate": 3.1712672521957343e-06,
+      "loss": 1.927,
+      "step": 21770
+    },
+    {
+      "epoch": 6.83,
+      "learning_rate": 3.1681304893350068e-06,
+      "loss": 1.9368,
+      "step": 21780
+    },
+    {
+      "epoch": 6.84,
+      "learning_rate": 3.1649937264742784e-06,
+      "loss": 1.875,
+      "step": 21790
+    },
+    {
+      "epoch": 6.84,
+      "learning_rate": 3.161856963613551e-06,
+      "loss": 1.8245,
+      "step": 21800
+    },
+    {
+      "epoch": 6.84,
+      "learning_rate": 3.1587202007528233e-06,
+      "loss": 1.742,
+      "step": 21810
+    },
+    {
+      "epoch": 6.84,
+      "learning_rate": 3.155583437892096e-06,
+      "loss": 1.8363,
+      "step": 21820
+    },
+    {
+      "epoch": 6.85,
+      "learning_rate": 3.1524466750313683e-06,
+      "loss": 1.8633,
+      "step": 21830
+    },
+    {
+      "epoch": 6.85,
+      "learning_rate": 3.14930991217064e-06,
+      "loss": 1.8008,
+      "step": 21840
+    },
+    {
+      "epoch": 6.85,
+      "learning_rate": 3.1461731493099124e-06,
+      "loss": 1.8937,
+      "step": 21850
+    },
+    {
+      "epoch": 6.86,
+      "learning_rate": 3.143036386449185e-06,
+      "loss": 1.7756,
+      "step": 21860
+    },
+    {
+      "epoch": 6.86,
+      "learning_rate": 3.1398996235884573e-06,
+      "loss": 1.8713,
+      "step": 21870
+    },
+    {
+      "epoch": 6.86,
+      "learning_rate": 3.1367628607277293e-06,
+      "loss": 1.8834,
+      "step": 21880
+    },
+    {
+      "epoch": 6.87,
+      "learning_rate": 3.1336260978670014e-06,
+      "loss": 1.907,
+      "step": 21890
+    },
+    {
+      "epoch": 6.87,
+      "learning_rate": 3.130489335006274e-06,
+      "loss": 1.8408,
+      "step": 21900
+    },
+    {
+      "epoch": 6.87,
+      "learning_rate": 3.127352572145546e-06,
+      "loss": 1.8588,
+      "step": 21910
+    },
+    {
+      "epoch": 6.88,
+      "learning_rate": 3.1242158092848184e-06,
+      "loss": 1.8575,
+      "step": 21920
+    },
+    {
+      "epoch": 6.88,
+      "learning_rate": 3.1210790464240904e-06,
+      "loss": 1.834,
+      "step": 21930
+    },
+    {
+      "epoch": 6.88,
+      "learning_rate": 3.117942283563363e-06,
+      "loss": 1.8749,
+      "step": 21940
+    },
+    {
+      "epoch": 6.89,
+      "learning_rate": 3.114805520702635e-06,
+      "loss": 1.8892,
+      "step": 21950
+    },
+    {
+      "epoch": 6.89,
+      "learning_rate": 3.1116687578419074e-06,
+      "loss": 1.8717,
+      "step": 21960
+    },
+    {
+      "epoch": 6.89,
+      "learning_rate": 3.10853199498118e-06,
+      "loss": 1.8847,
+      "step": 21970
+    },
+    {
+      "epoch": 6.89,
+      "learning_rate": 3.1053952321204515e-06,
+      "loss": 1.9115,
+      "step": 21980
+    },
+    {
+      "epoch": 6.9,
+      "learning_rate": 3.102258469259724e-06,
+      "loss": 1.8327,
+      "step": 21990
+    },
+    {
+      "epoch": 6.9,
+      "learning_rate": 3.0991217063989964e-06,
+      "loss": 1.8495,
+      "step": 22000
+    },
+    {
+      "epoch": 6.9,
+      "learning_rate": 3.095984943538269e-06,
+      "loss": 1.9436,
+      "step": 22010
+    },
+    {
+      "epoch": 6.91,
+      "learning_rate": 3.0928481806775414e-06,
+      "loss": 1.8471,
+      "step": 22020
+    },
+    {
+      "epoch": 6.91,
+      "learning_rate": 3.089711417816813e-06,
+      "loss": 1.9117,
+      "step": 22030
+    },
+    {
+      "epoch": 6.91,
+      "learning_rate": 3.0865746549560855e-06,
+      "loss": 1.8174,
+      "step": 22040
+    },
+    {
+      "epoch": 6.92,
+      "learning_rate": 3.083437892095358e-06,
+      "loss": 1.8559,
+      "step": 22050
+    },
+    {
+      "epoch": 6.92,
+      "learning_rate": 3.0803011292346304e-06,
+      "loss": 1.8634,
+      "step": 22060
+    },
+    {
+      "epoch": 6.92,
+      "learning_rate": 3.0771643663739024e-06,
+      "loss": 1.9015,
+      "step": 22070
+    },
+    {
+      "epoch": 6.93,
+      "learning_rate": 3.0740276035131745e-06,
+      "loss": 1.7752,
+      "step": 22080
+    },
+    {
+      "epoch": 6.93,
+      "learning_rate": 3.070890840652447e-06,
+      "loss": 1.8868,
+      "step": 22090
+    },
+    {
+      "epoch": 6.93,
+      "learning_rate": 3.067754077791719e-06,
+      "loss": 1.9024,
+      "step": 22100
+    },
+    {
+      "epoch": 6.94,
+      "learning_rate": 3.0646173149309915e-06,
+      "loss": 1.8447,
+      "step": 22110
+    },
+    {
+      "epoch": 6.94,
+      "learning_rate": 3.061480552070264e-06,
+      "loss": 1.8881,
+      "step": 22120
+    },
+    {
+      "epoch": 6.94,
+      "learning_rate": 3.058343789209536e-06,
+      "loss": 1.8455,
+      "step": 22130
+    },
+    {
+      "epoch": 6.94,
+      "learning_rate": 3.055207026348808e-06,
+      "loss": 1.8524,
+      "step": 22140
+    },
+    {
+      "epoch": 6.95,
+      "learning_rate": 3.0520702634880805e-06,
+      "loss": 1.8238,
+      "step": 22150
+    },
+    {
+      "epoch": 6.95,
+      "learning_rate": 3.048933500627353e-06,
+      "loss": 1.8148,
+      "step": 22160
+    },
+    {
+      "epoch": 6.95,
+      "learning_rate": 3.0457967377666254e-06,
+      "loss": 1.8265,
+      "step": 22170
+    },
+    {
+      "epoch": 6.96,
+      "learning_rate": 3.042659974905897e-06,
+      "loss": 1.857,
+      "step": 22180
+    },
+    {
+      "epoch": 6.96,
+      "learning_rate": 3.0395232120451695e-06,
+      "loss": 1.8612,
+      "step": 22190
+    },
+    {
+      "epoch": 6.96,
+      "learning_rate": 3.036386449184442e-06,
+      "loss": 1.8471,
+      "step": 22200
+    },
+    {
+      "epoch": 6.97,
+      "learning_rate": 3.0332496863237145e-06,
+      "loss": 1.7689,
+      "step": 22210
+    },
+    {
+      "epoch": 6.97,
+      "learning_rate": 3.0301129234629865e-06,
+      "loss": 1.848,
+      "step": 22220
+    },
+    {
+      "epoch": 6.97,
+      "learning_rate": 3.0269761606022585e-06,
+      "loss": 1.8965,
+      "step": 22230
+    },
+    {
+      "epoch": 6.98,
+      "learning_rate": 3.023839397741531e-06,
+      "loss": 1.8563,
+      "step": 22240
+    },
+    {
+      "epoch": 6.98,
+      "learning_rate": 3.0207026348808035e-06,
+      "loss": 1.8156,
+      "step": 22250
+    },
+    {
+      "epoch": 6.98,
+      "learning_rate": 3.0175658720200755e-06,
+      "loss": 1.7559,
+      "step": 22260
+    },
+    {
+      "epoch": 6.99,
+      "learning_rate": 3.014429109159348e-06,
+      "loss": 1.8958,
+      "step": 22270
+    },
+    {
+      "epoch": 6.99,
+      "learning_rate": 3.01129234629862e-06,
+      "loss": 1.8257,
+      "step": 22280
+    },
+    {
+      "epoch": 6.99,
+      "learning_rate": 3.008155583437892e-06,
+      "loss": 1.8366,
+      "step": 22290
+    },
+    {
+      "epoch": 6.99,
+      "learning_rate": 3.0050188205771646e-06,
+      "loss": 1.884,
+      "step": 22300
+    },
+    {
+      "epoch": 7.0,
+      "learning_rate": 3.001882057716437e-06,
+      "loss": 1.8405,
+      "step": 22310
+    },
+    {
+      "epoch": 7.0,
+      "eval_loss": 1.8138045072555542,
+      "eval_runtime": 13.6048,
+      "eval_samples_per_second": 73.504,
+      "eval_steps_per_second": 4.631,
+      "step": 22316
+    },
+    {
+      "epoch": 7.0,
+      "learning_rate": 2.9987452948557095e-06,
+      "loss": 1.9038,
+      "step": 22320
+    },
+    {
+      "epoch": 7.0,
+      "learning_rate": 2.995608531994981e-06,
+      "loss": 1.8173,
+      "step": 22330
+    },
+    {
+      "epoch": 7.01,
+      "learning_rate": 2.9924717691342536e-06,
+      "loss": 1.7758,
+      "step": 22340
+    },
+    {
+      "epoch": 7.01,
+      "learning_rate": 2.989335006273526e-06,
+      "loss": 1.7838,
+      "step": 22350
+    },
+    {
+      "epoch": 7.01,
+      "learning_rate": 2.9861982434127985e-06,
+      "loss": 1.899,
+      "step": 22360
+    },
+    {
+      "epoch": 7.02,
+      "learning_rate": 2.98306148055207e-06,
+      "loss": 1.8519,
+      "step": 22370
+    },
+    {
+      "epoch": 7.02,
+      "learning_rate": 2.9799247176913426e-06,
+      "loss": 1.8812,
+      "step": 22380
+    },
+    {
+      "epoch": 7.02,
+      "learning_rate": 2.976787954830615e-06,
+      "loss": 1.8858,
+      "step": 22390
+    },
+    {
+      "epoch": 7.03,
+      "learning_rate": 2.9736511919698875e-06,
+      "loss": 1.8638,
+      "step": 22400
+    },
+    {
+      "epoch": 7.03,
+      "learning_rate": 2.9705144291091596e-06,
+      "loss": 1.9083,
+      "step": 22410
+    },
+    {
+      "epoch": 7.03,
+      "learning_rate": 2.9673776662484316e-06,
+      "loss": 1.837,
+      "step": 22420
+    },
+    {
+      "epoch": 7.04,
+      "learning_rate": 2.964240903387704e-06,
+      "loss": 1.835,
+      "step": 22430
+    },
+    {
+      "epoch": 7.04,
+      "learning_rate": 2.9611041405269766e-06,
+      "loss": 1.7929,
+      "step": 22440
+    },
+    {
+      "epoch": 7.04,
+      "learning_rate": 2.9579673776662486e-06,
+      "loss": 1.8362,
+      "step": 22450
+    },
+    {
+      "epoch": 7.05,
+      "learning_rate": 2.954830614805521e-06,
+      "loss": 1.9089,
+      "step": 22460
+    },
+    {
+      "epoch": 7.05,
+      "learning_rate": 2.951693851944793e-06,
+      "loss": 1.8951,
+      "step": 22470
+    },
+    {
+      "epoch": 7.05,
+      "learning_rate": 2.948557089084065e-06,
+      "loss": 1.8888,
+      "step": 22480
+    },
+    {
+      "epoch": 7.05,
+      "learning_rate": 2.9454203262233376e-06,
+      "loss": 1.8226,
+      "step": 22490
+    },
+    {
+      "epoch": 7.06,
+      "learning_rate": 2.94228356336261e-06,
+      "loss": 1.7939,
+      "step": 22500
+    },
+    {
+      "epoch": 7.06,
+      "learning_rate": 2.9391468005018826e-06,
+      "loss": 1.7777,
+      "step": 22510
+    },
+    {
+      "epoch": 7.06,
+      "learning_rate": 2.936010037641154e-06,
+      "loss": 1.8305,
+      "step": 22520
+    },
+    {
+      "epoch": 7.07,
+      "learning_rate": 2.9328732747804267e-06,
+      "loss": 1.8603,
+      "step": 22530
+    },
+    {
+      "epoch": 7.07,
+      "learning_rate": 2.929736511919699e-06,
+      "loss": 1.8084,
+      "step": 22540
+    },
+    {
+      "epoch": 7.07,
+      "learning_rate": 2.9265997490589716e-06,
+      "loss": 1.9212,
+      "step": 22550
+    },
+    {
+      "epoch": 7.08,
+      "learning_rate": 2.923462986198244e-06,
+      "loss": 1.7966,
+      "step": 22560
+    },
+    {
+      "epoch": 7.08,
+      "learning_rate": 2.9203262233375157e-06,
+      "loss": 1.7703,
+      "step": 22570
+    },
+    {
+      "epoch": 7.08,
+      "learning_rate": 2.917189460476788e-06,
+      "loss": 1.8393,
+      "step": 22580
+    },
+    {
+      "epoch": 7.09,
+      "learning_rate": 2.9140526976160606e-06,
+      "loss": 1.7991,
+      "step": 22590
+    },
+    {
+      "epoch": 7.09,
+      "learning_rate": 2.9109159347553327e-06,
+      "loss": 1.8828,
+      "step": 22600
+    },
+    {
+      "epoch": 7.09,
+      "learning_rate": 2.907779171894605e-06,
+      "loss": 1.8453,
+      "step": 22610
+    },
+    {
+      "epoch": 7.1,
+      "learning_rate": 2.904642409033877e-06,
+      "loss": 1.8254,
+      "step": 22620
+    },
+    {
+      "epoch": 7.1,
+      "learning_rate": 2.9015056461731497e-06,
+      "loss": 1.8666,
+      "step": 22630
+    },
+    {
+      "epoch": 7.1,
+      "learning_rate": 2.8983688833124217e-06,
+      "loss": 1.7819,
+      "step": 22640
+    },
+    {
+      "epoch": 7.1,
+      "learning_rate": 2.895232120451694e-06,
+      "loss": 1.8606,
+      "step": 22650
+    },
+    {
+      "epoch": 7.11,
+      "learning_rate": 2.8920953575909666e-06,
+      "loss": 1.8466,
+      "step": 22660
+    },
+    {
+      "epoch": 7.11,
+      "learning_rate": 2.8889585947302383e-06,
+      "loss": 1.7494,
+      "step": 22670
+    },
+    {
+      "epoch": 7.11,
+      "learning_rate": 2.8858218318695107e-06,
+      "loss": 1.9066,
+      "step": 22680
+    },
+    {
+      "epoch": 7.12,
+      "learning_rate": 2.882685069008783e-06,
+      "loss": 1.8383,
+      "step": 22690
+    },
+    {
+      "epoch": 7.12,
+      "learning_rate": 2.8795483061480557e-06,
+      "loss": 1.9013,
+      "step": 22700
+    },
+    {
+      "epoch": 7.12,
+      "learning_rate": 2.876411543287328e-06,
+      "loss": 1.8759,
+      "step": 22710
+    },
+    {
+      "epoch": 7.13,
+      "learning_rate": 2.8732747804265998e-06,
+      "loss": 1.835,
+      "step": 22720
+    },
+    {
+      "epoch": 7.13,
+      "learning_rate": 2.8701380175658722e-06,
+      "loss": 1.8592,
+      "step": 22730
+    },
+    {
+      "epoch": 7.13,
+      "learning_rate": 2.8670012547051447e-06,
+      "loss": 1.7547,
+      "step": 22740
+    },
+    {
+      "epoch": 7.14,
+      "learning_rate": 2.863864491844417e-06,
+      "loss": 1.8349,
+      "step": 22750
+    },
+    {
+      "epoch": 7.14,
+      "learning_rate": 2.860727728983689e-06,
+      "loss": 1.8603,
+      "step": 22760
+    },
+    {
+      "epoch": 7.14,
+      "learning_rate": 2.8575909661229612e-06,
+      "loss": 1.8041,
+      "step": 22770
+    },
+    {
+      "epoch": 7.15,
+      "learning_rate": 2.8544542032622337e-06,
+      "loss": 1.8681,
+      "step": 22780
+    },
+    {
+      "epoch": 7.15,
+      "learning_rate": 2.8513174404015058e-06,
+      "loss": 1.8604,
+      "step": 22790
+    },
+    {
+      "epoch": 7.15,
+      "learning_rate": 2.8481806775407782e-06,
+      "loss": 1.8059,
+      "step": 22800
+    },
+    {
+      "epoch": 7.15,
+      "learning_rate": 2.8450439146800503e-06,
+      "loss": 1.9027,
+      "step": 22810
+    },
+    {
+      "epoch": 7.16,
+      "learning_rate": 2.8419071518193223e-06,
+      "loss": 1.8495,
+      "step": 22820
+    },
+    {
+      "epoch": 7.16,
+      "learning_rate": 2.838770388958595e-06,
+      "loss": 1.8858,
+      "step": 22830
+    },
+    {
+      "epoch": 7.16,
+      "learning_rate": 2.8356336260978673e-06,
+      "loss": 1.8676,
+      "step": 22840
+    },
+    {
+      "epoch": 7.17,
+      "learning_rate": 2.8324968632371397e-06,
+      "loss": 1.7541,
+      "step": 22850
+    },
+    {
+      "epoch": 7.17,
+      "learning_rate": 2.8293601003764113e-06,
+      "loss": 1.8457,
+      "step": 22860
+    },
+    {
+      "epoch": 7.17,
+      "learning_rate": 2.826223337515684e-06,
+      "loss": 1.814,
+      "step": 22870
+    },
+    {
+      "epoch": 7.18,
+      "learning_rate": 2.8230865746549563e-06,
+      "loss": 1.7803,
+      "step": 22880
+    },
+    {
+      "epoch": 7.18,
+      "learning_rate": 2.8199498117942288e-06,
+      "loss": 1.8027,
+      "step": 22890
+    },
+    {
+      "epoch": 7.18,
+      "learning_rate": 2.8168130489335012e-06,
+      "loss": 1.8909,
+      "step": 22900
+    },
+    {
+      "epoch": 7.19,
+      "learning_rate": 2.813676286072773e-06,
+      "loss": 1.7367,
+      "step": 22910
+    },
+    {
+      "epoch": 7.19,
+      "learning_rate": 2.8105395232120453e-06,
+      "loss": 1.917,
+      "step": 22920
+    },
+    {
+      "epoch": 7.19,
+      "learning_rate": 2.8074027603513178e-06,
+      "loss": 1.7805,
+      "step": 22930
+    },
+    {
+      "epoch": 7.2,
+      "learning_rate": 2.80426599749059e-06,
+      "loss": 1.8529,
+      "step": 22940
+    },
+    {
+      "epoch": 7.2,
+      "learning_rate": 2.8011292346298623e-06,
+      "loss": 1.8119,
+      "step": 22950
+    },
+    {
+      "epoch": 7.2,
+      "learning_rate": 2.7979924717691343e-06,
+      "loss": 1.7894,
+      "step": 22960
+    },
+    {
+      "epoch": 7.21,
+      "learning_rate": 2.794855708908407e-06,
+      "loss": 1.8554,
+      "step": 22970
+    },
+    {
+      "epoch": 7.21,
+      "learning_rate": 2.791718946047679e-06,
+      "loss": 1.8412,
+      "step": 22980
+    },
+    {
+      "epoch": 7.21,
+      "learning_rate": 2.7885821831869513e-06,
+      "loss": 1.8466,
+      "step": 22990
+    },
+    {
+      "epoch": 7.21,
+      "learning_rate": 2.7854454203262238e-06,
+      "loss": 1.8696,
+      "step": 23000
+    },
+    {
+      "epoch": 7.22,
+      "learning_rate": 2.7823086574654954e-06,
+      "loss": 1.7816,
+      "step": 23010
+    },
+    {
+      "epoch": 7.22,
+      "learning_rate": 2.779171894604768e-06,
+      "loss": 1.7382,
+      "step": 23020
+    },
+    {
+      "epoch": 7.22,
+      "learning_rate": 2.7760351317440403e-06,
+      "loss": 1.8222,
+      "step": 23030
+    },
+    {
+      "epoch": 7.23,
+      "learning_rate": 2.772898368883313e-06,
+      "loss": 1.8955,
+      "step": 23040
+    },
+    {
+      "epoch": 7.23,
+      "learning_rate": 2.7697616060225853e-06,
+      "loss": 1.7663,
+      "step": 23050
+    },
+    {
+      "epoch": 7.23,
+      "learning_rate": 2.766624843161857e-06,
+      "loss": 1.8362,
+      "step": 23060
+    },
+    {
+      "epoch": 7.24,
+      "learning_rate": 2.7634880803011294e-06,
+      "loss": 1.9312,
+      "step": 23070
+    },
+    {
+      "epoch": 7.24,
+      "learning_rate": 2.760351317440402e-06,
+      "loss": 1.7954,
+      "step": 23080
+    },
+    {
+      "epoch": 7.24,
+      "learning_rate": 2.7572145545796743e-06,
+      "loss": 1.8945,
+      "step": 23090
+    },
+    {
+      "epoch": 7.25,
+      "learning_rate": 2.7540777917189464e-06,
+      "loss": 1.8134,
+      "step": 23100
+    },
+    {
+      "epoch": 7.25,
+      "learning_rate": 2.7509410288582184e-06,
+      "loss": 1.8199,
+      "step": 23110
+    },
+    {
+      "epoch": 7.25,
+      "learning_rate": 2.747804265997491e-06,
+      "loss": 1.8319,
+      "step": 23120
+    },
+    {
+      "epoch": 7.26,
+      "learning_rate": 2.744667503136763e-06,
+      "loss": 1.9164,
+      "step": 23130
+    },
+    {
+      "epoch": 7.26,
+      "learning_rate": 2.7415307402760354e-06,
+      "loss": 1.8681,
+      "step": 23140
+    },
+    {
+      "epoch": 7.26,
+      "learning_rate": 2.738393977415308e-06,
+      "loss": 1.867,
+      "step": 23150
+    },
+    {
+      "epoch": 7.26,
+      "learning_rate": 2.73525721455458e-06,
+      "loss": 1.8132,
+      "step": 23160
+    },
+    {
+      "epoch": 7.27,
+      "learning_rate": 2.732120451693852e-06,
+      "loss": 1.9204,
+      "step": 23170
+    },
+    {
+      "epoch": 7.27,
+      "learning_rate": 2.7289836888331244e-06,
+      "loss": 1.7829,
+      "step": 23180
+    },
+    {
+      "epoch": 7.27,
+      "learning_rate": 2.725846925972397e-06,
+      "loss": 1.8375,
+      "step": 23190
+    },
+    {
+      "epoch": 7.28,
+      "learning_rate": 2.7227101631116693e-06,
+      "loss": 1.7998,
+      "step": 23200
+    },
+    {
+      "epoch": 7.28,
+      "learning_rate": 2.719573400250941e-06,
+      "loss": 1.8404,
+      "step": 23210
+    },
+    {
+      "epoch": 7.28,
+      "learning_rate": 2.7164366373902134e-06,
+      "loss": 1.8477,
+      "step": 23220
+    },
+    {
+      "epoch": 7.29,
+      "learning_rate": 2.713299874529486e-06,
+      "loss": 1.8236,
+      "step": 23230
+    },
+    {
+      "epoch": 7.29,
+      "learning_rate": 2.7101631116687584e-06,
+      "loss": 1.8057,
+      "step": 23240
+    },
+    {
+      "epoch": 7.29,
+      "learning_rate": 2.70702634880803e-06,
+      "loss": 1.9069,
+      "step": 23250
+    },
+    {
+      "epoch": 7.3,
+      "learning_rate": 2.7038895859473025e-06,
+      "loss": 1.8121,
+      "step": 23260
+    },
+    {
+      "epoch": 7.3,
+      "learning_rate": 2.700752823086575e-06,
+      "loss": 1.8416,
+      "step": 23270
+    },
+    {
+      "epoch": 7.3,
+      "learning_rate": 2.6976160602258474e-06,
+      "loss": 1.8124,
+      "step": 23280
+    },
+    {
+      "epoch": 7.31,
+      "learning_rate": 2.6944792973651194e-06,
+      "loss": 1.811,
+      "step": 23290
+    },
+    {
+      "epoch": 7.31,
+      "learning_rate": 2.6913425345043915e-06,
+      "loss": 1.8152,
+      "step": 23300
+    },
+    {
+      "epoch": 7.31,
+      "learning_rate": 2.688205771643664e-06,
+      "loss": 1.8165,
+      "step": 23310
+    },
+    {
+      "epoch": 7.31,
+      "learning_rate": 2.685069008782936e-06,
+      "loss": 1.8534,
+      "step": 23320
+    },
+    {
+      "epoch": 7.32,
+      "learning_rate": 2.6819322459222085e-06,
+      "loss": 1.8197,
+      "step": 23330
+    },
+    {
+      "epoch": 7.32,
+      "learning_rate": 2.678795483061481e-06,
+      "loss": 1.8595,
+      "step": 23340
+    },
+    {
+      "epoch": 7.32,
+      "learning_rate": 2.675658720200753e-06,
+      "loss": 1.8172,
+      "step": 23350
+    },
+    {
+      "epoch": 7.33,
+      "learning_rate": 2.672521957340025e-06,
+      "loss": 1.8871,
+      "step": 23360
+    },
+    {
+      "epoch": 7.33,
+      "learning_rate": 2.6693851944792975e-06,
+      "loss": 1.8423,
+      "step": 23370
+    },
+    {
+      "epoch": 7.33,
+      "learning_rate": 2.66624843161857e-06,
+      "loss": 1.8303,
+      "step": 23380
+    },
+    {
+      "epoch": 7.34,
+      "learning_rate": 2.6631116687578424e-06,
+      "loss": 1.8854,
+      "step": 23390
+    },
+    {
+      "epoch": 7.34,
+      "learning_rate": 2.659974905897114e-06,
+      "loss": 1.8654,
+      "step": 23400
+    },
+    {
+      "epoch": 7.34,
+      "learning_rate": 2.6568381430363865e-06,
+      "loss": 1.7865,
+      "step": 23410
+    },
+    {
+      "epoch": 7.35,
+      "learning_rate": 2.653701380175659e-06,
+      "loss": 1.8362,
+      "step": 23420
+    },
+    {
+      "epoch": 7.35,
+      "learning_rate": 2.6505646173149315e-06,
+      "loss": 1.9038,
+      "step": 23430
+    },
+    {
+      "epoch": 7.35,
+      "learning_rate": 2.6474278544542035e-06,
+      "loss": 1.8733,
+      "step": 23440
+    },
+    {
+      "epoch": 7.36,
+      "learning_rate": 2.6442910915934756e-06,
+      "loss": 1.8038,
+      "step": 23450
+    },
+    {
+      "epoch": 7.36,
+      "learning_rate": 2.641154328732748e-06,
+      "loss": 1.9265,
+      "step": 23460
+    },
+    {
+      "epoch": 7.36,
+      "learning_rate": 2.6380175658720205e-06,
+      "loss": 1.8768,
+      "step": 23470
+    },
+    {
+      "epoch": 7.37,
+      "learning_rate": 2.6348808030112925e-06,
+      "loss": 1.827,
+      "step": 23480
+    },
+    {
+      "epoch": 7.37,
+      "learning_rate": 2.631744040150565e-06,
+      "loss": 1.7804,
+      "step": 23490
+    },
+    {
+      "epoch": 7.37,
+      "learning_rate": 2.628607277289837e-06,
+      "loss": 1.8359,
+      "step": 23500
+    },
+    {
+      "epoch": 7.37,
+      "learning_rate": 2.625470514429109e-06,
+      "loss": 1.8243,
+      "step": 23510
+    },
+    {
+      "epoch": 7.38,
+      "learning_rate": 2.6223337515683816e-06,
+      "loss": 1.8012,
+      "step": 23520
+    },
+    {
+      "epoch": 7.38,
+      "learning_rate": 2.619196988707654e-06,
+      "loss": 1.7882,
+      "step": 23530
+    },
+    {
+      "epoch": 7.38,
+      "learning_rate": 2.6160602258469265e-06,
+      "loss": 1.8081,
+      "step": 23540
+    },
+    {
+      "epoch": 7.39,
+      "learning_rate": 2.612923462986198e-06,
+      "loss": 1.9011,
+      "step": 23550
+    },
+    {
+      "epoch": 7.39,
+      "learning_rate": 2.6097867001254706e-06,
+      "loss": 1.847,
+      "step": 23560
+    },
+    {
+      "epoch": 7.39,
+      "learning_rate": 2.606649937264743e-06,
+      "loss": 1.8524,
+      "step": 23570
+    },
+    {
+      "epoch": 7.4,
+      "learning_rate": 2.6035131744040155e-06,
+      "loss": 1.8984,
+      "step": 23580
+    },
+    {
+      "epoch": 7.4,
+      "learning_rate": 2.600376411543288e-06,
+      "loss": 1.9214,
+      "step": 23590
+    },
+    {
+      "epoch": 7.4,
+      "learning_rate": 2.5972396486825596e-06,
+      "loss": 1.8319,
+      "step": 23600
+    },
+    {
+      "epoch": 7.41,
+      "learning_rate": 2.594102885821832e-06,
+      "loss": 1.8231,
+      "step": 23610
+    },
+    {
+      "epoch": 7.41,
+      "learning_rate": 2.5909661229611045e-06,
+      "loss": 1.8265,
+      "step": 23620
+    },
+    {
+      "epoch": 7.41,
+      "learning_rate": 2.5878293601003766e-06,
+      "loss": 1.8764,
+      "step": 23630
+    },
+    {
+      "epoch": 7.42,
+      "learning_rate": 2.584692597239649e-06,
+      "loss": 1.7521,
+      "step": 23640
+    },
+    {
+      "epoch": 7.42,
+      "learning_rate": 2.581555834378921e-06,
+      "loss": 1.8229,
+      "step": 23650
+    },
+    {
+      "epoch": 7.42,
+      "learning_rate": 2.5784190715181936e-06,
+      "loss": 1.902,
+      "step": 23660
+    },
+    {
+      "epoch": 7.42,
+      "learning_rate": 2.5752823086574656e-06,
+      "loss": 1.8338,
+      "step": 23670
+    },
+    {
+      "epoch": 7.43,
+      "learning_rate": 2.572145545796738e-06,
+      "loss": 1.8587,
+      "step": 23680
+    },
+    {
+      "epoch": 7.43,
+      "learning_rate": 2.56900878293601e-06,
+      "loss": 1.8247,
+      "step": 23690
+    },
+    {
+      "epoch": 7.43,
+      "learning_rate": 2.565872020075282e-06,
+      "loss": 1.8186,
+      "step": 23700
+    },
+    {
+      "epoch": 7.44,
+      "learning_rate": 2.5627352572145546e-06,
+      "loss": 1.8091,
+      "step": 23710
+    },
+    {
+      "epoch": 7.44,
+      "learning_rate": 2.559598494353827e-06,
+      "loss": 1.8887,
+      "step": 23720
+    },
+    {
+      "epoch": 7.44,
+      "learning_rate": 2.5564617314930996e-06,
+      "loss": 1.8898,
+      "step": 23730
+    },
+    {
+      "epoch": 7.45,
+      "learning_rate": 2.553324968632371e-06,
+      "loss": 1.8661,
+      "step": 23740
+    },
+    {
+      "epoch": 7.45,
+      "learning_rate": 2.5501882057716437e-06,
+      "loss": 1.7916,
+      "step": 23750
+    },
+    {
+      "epoch": 7.45,
+      "learning_rate": 2.547051442910916e-06,
+      "loss": 1.9033,
+      "step": 23760
+    },
+    {
+      "epoch": 7.46,
+      "learning_rate": 2.5439146800501886e-06,
+      "loss": 1.8425,
+      "step": 23770
+    },
+    {
+      "epoch": 7.46,
+      "learning_rate": 2.540777917189461e-06,
+      "loss": 1.9196,
+      "step": 23780
+    },
+    {
+      "epoch": 7.46,
+      "learning_rate": 2.5376411543287327e-06,
+      "loss": 1.865,
+      "step": 23790
+    },
+    {
+      "epoch": 7.47,
+      "learning_rate": 2.534504391468005e-06,
+      "loss": 1.8719,
+      "step": 23800
+    },
+    {
+      "epoch": 7.47,
+      "learning_rate": 2.5313676286072776e-06,
+      "loss": 1.8598,
+      "step": 23810
+    },
+    {
+      "epoch": 7.47,
+      "learning_rate": 2.5282308657465497e-06,
+      "loss": 1.8614,
+      "step": 23820
+    },
+    {
+      "epoch": 7.47,
+      "learning_rate": 2.525094102885822e-06,
+      "loss": 1.7822,
+      "step": 23830
+    },
+    {
+      "epoch": 7.48,
+      "learning_rate": 2.521957340025094e-06,
+      "loss": 1.8435,
+      "step": 23840
+    },
+    {
+      "epoch": 7.48,
+      "learning_rate": 2.5188205771643667e-06,
+      "loss": 1.8492,
+      "step": 23850
+    },
+    {
+      "epoch": 7.48,
+      "learning_rate": 2.5156838143036387e-06,
+      "loss": 1.7792,
+      "step": 23860
+    },
+    {
+      "epoch": 7.49,
+      "learning_rate": 2.512547051442911e-06,
+      "loss": 1.8483,
+      "step": 23870
+    },
+    {
+      "epoch": 7.49,
+      "learning_rate": 2.5094102885821836e-06,
+      "loss": 1.8582,
+      "step": 23880
+    },
+    {
+      "epoch": 7.49,
+      "learning_rate": 2.5062735257214553e-06,
+      "loss": 1.8717,
+      "step": 23890
+    },
+    {
+      "epoch": 7.5,
+      "learning_rate": 2.5031367628607277e-06,
+      "loss": 1.8539,
+      "step": 23900
+    },
+    {
+      "epoch": 7.5,
+      "learning_rate": 2.5e-06,
+      "loss": 1.776,
+      "step": 23910
+    },
+    {
+      "epoch": 7.5,
+      "learning_rate": 2.4968632371392727e-06,
+      "loss": 1.7718,
+      "step": 23920
+    },
+    {
+      "epoch": 7.51,
+      "learning_rate": 2.4937264742785447e-06,
+      "loss": 1.8526,
+      "step": 23930
+    },
+    {
+      "epoch": 7.51,
+      "learning_rate": 2.490589711417817e-06,
+      "loss": 1.8475,
+      "step": 23940
+    },
+    {
+      "epoch": 7.51,
+      "learning_rate": 2.4874529485570892e-06,
+      "loss": 1.926,
+      "step": 23950
+    },
+    {
+      "epoch": 7.52,
+      "learning_rate": 2.4843161856963617e-06,
+      "loss": 1.8306,
+      "step": 23960
+    },
+    {
+      "epoch": 7.52,
+      "learning_rate": 2.4811794228356337e-06,
+      "loss": 1.8626,
+      "step": 23970
+    },
+    {
+      "epoch": 7.52,
+      "learning_rate": 2.4780426599749062e-06,
+      "loss": 1.8465,
+      "step": 23980
+    },
+    {
+      "epoch": 7.53,
+      "learning_rate": 2.4749058971141783e-06,
+      "loss": 1.9154,
+      "step": 23990
+    },
+    {
+      "epoch": 7.53,
+      "learning_rate": 2.4717691342534507e-06,
+      "loss": 1.8894,
+      "step": 24000
+    },
+    {
+      "epoch": 7.53,
+      "learning_rate": 2.4686323713927228e-06,
+      "loss": 1.8561,
+      "step": 24010
+    },
+    {
+      "epoch": 7.53,
+      "learning_rate": 2.4654956085319952e-06,
+      "loss": 1.9377,
+      "step": 24020
+    },
+    {
+      "epoch": 7.54,
+      "learning_rate": 2.4623588456712673e-06,
+      "loss": 1.797,
+      "step": 24030
+    },
+    {
+      "epoch": 7.54,
+      "learning_rate": 2.4592220828105398e-06,
+      "loss": 1.8609,
+      "step": 24040
+    },
+    {
+      "epoch": 7.54,
+      "learning_rate": 2.456085319949812e-06,
+      "loss": 1.9067,
+      "step": 24050
+    },
+    {
+      "epoch": 7.55,
+      "learning_rate": 2.4529485570890843e-06,
+      "loss": 1.8253,
+      "step": 24060
+    },
+    {
+      "epoch": 7.55,
+      "learning_rate": 2.4498117942283563e-06,
+      "loss": 1.8497,
+      "step": 24070
+    },
+    {
+      "epoch": 7.55,
+      "learning_rate": 2.4466750313676288e-06,
+      "loss": 1.9389,
+      "step": 24080
+    },
+    {
+      "epoch": 7.56,
+      "learning_rate": 2.4435382685069012e-06,
+      "loss": 1.8728,
+      "step": 24090
+    },
+    {
+      "epoch": 7.56,
+      "learning_rate": 2.4404015056461733e-06,
+      "loss": 1.8859,
+      "step": 24100
+    },
+    {
+      "epoch": 7.56,
+      "learning_rate": 2.4372647427854458e-06,
+      "loss": 1.8464,
+      "step": 24110
+    },
+    {
+      "epoch": 7.57,
+      "learning_rate": 2.434127979924718e-06,
+      "loss": 1.874,
+      "step": 24120
+    },
+    {
+      "epoch": 7.57,
+      "learning_rate": 2.4309912170639903e-06,
+      "loss": 1.8951,
+      "step": 24130
+    },
+    {
+      "epoch": 7.57,
+      "learning_rate": 2.4278544542032627e-06,
+      "loss": 1.7477,
+      "step": 24140
+    },
+    {
+      "epoch": 7.58,
+      "learning_rate": 2.424717691342535e-06,
+      "loss": 1.9261,
+      "step": 24150
+    },
+    {
+      "epoch": 7.58,
+      "learning_rate": 2.421580928481807e-06,
+      "loss": 1.8659,
+      "step": 24160
+    },
+    {
+      "epoch": 7.58,
+      "learning_rate": 2.4184441656210793e-06,
+      "loss": 1.7717,
+      "step": 24170
+    },
+    {
+      "epoch": 7.58,
+      "learning_rate": 2.4153074027603513e-06,
+      "loss": 1.8292,
+      "step": 24180
+    },
+    {
+      "epoch": 7.59,
+      "learning_rate": 2.412170639899624e-06,
+      "loss": 1.824,
+      "step": 24190
+    },
+    {
+      "epoch": 7.59,
+      "learning_rate": 2.409033877038896e-06,
+      "loss": 1.8637,
+      "step": 24200
+    },
+    {
+      "epoch": 7.59,
+      "learning_rate": 2.4058971141781683e-06,
+      "loss": 1.7925,
+      "step": 24210
+    },
+    {
+      "epoch": 7.6,
+      "learning_rate": 2.4027603513174404e-06,
+      "loss": 1.8698,
+      "step": 24220
+    },
+    {
+      "epoch": 7.6,
+      "learning_rate": 2.399623588456713e-06,
+      "loss": 1.8401,
+      "step": 24230
+    },
+    {
+      "epoch": 7.6,
+      "learning_rate": 2.3964868255959853e-06,
+      "loss": 1.8329,
+      "step": 24240
+    },
+    {
+      "epoch": 7.61,
+      "learning_rate": 2.3933500627352574e-06,
+      "loss": 1.8431,
+      "step": 24250
+    },
+    {
+      "epoch": 7.61,
+      "learning_rate": 2.39021329987453e-06,
+      "loss": 1.8929,
+      "step": 24260
+    },
+    {
+      "epoch": 7.61,
+      "learning_rate": 2.387076537013802e-06,
+      "loss": 1.7704,
+      "step": 24270
+    },
+    {
+      "epoch": 7.62,
+      "learning_rate": 2.3839397741530743e-06,
+      "loss": 1.7222,
+      "step": 24280
+    },
+    {
+      "epoch": 7.62,
+      "learning_rate": 2.3808030112923464e-06,
+      "loss": 1.8664,
+      "step": 24290
+    },
+    {
+      "epoch": 7.62,
+      "learning_rate": 2.377666248431619e-06,
+      "loss": 1.8683,
+      "step": 24300
+    },
+    {
+      "epoch": 7.63,
+      "learning_rate": 2.3745294855708913e-06,
+      "loss": 1.8453,
+      "step": 24310
+    },
+    {
+      "epoch": 7.63,
+      "learning_rate": 2.3713927227101634e-06,
+      "loss": 1.8595,
+      "step": 24320
+    },
+    {
+      "epoch": 7.63,
+      "learning_rate": 2.368255959849436e-06,
+      "loss": 1.8028,
+      "step": 24330
+    },
+    {
+      "epoch": 7.63,
+      "learning_rate": 2.365119196988708e-06,
+      "loss": 1.8816,
+      "step": 24340
+    },
+    {
+      "epoch": 7.64,
+      "learning_rate": 2.36198243412798e-06,
+      "loss": 1.71,
+      "step": 24350
+    },
+    {
+      "epoch": 7.64,
+      "learning_rate": 2.3588456712672524e-06,
+      "loss": 1.8009,
+      "step": 24360
+    },
+    {
+      "epoch": 7.64,
+      "learning_rate": 2.3557089084065244e-06,
+      "loss": 1.7629,
+      "step": 24370
+    },
+    {
+      "epoch": 7.65,
+      "learning_rate": 2.352572145545797e-06,
+      "loss": 1.7828,
+      "step": 24380
+    },
+    {
+      "epoch": 7.65,
+      "learning_rate": 2.349435382685069e-06,
+      "loss": 1.88,
+      "step": 24390
+    },
+    {
+      "epoch": 7.65,
+      "learning_rate": 2.3462986198243414e-06,
+      "loss": 1.8083,
+      "step": 24400
+    },
+    {
+      "epoch": 7.66,
+      "learning_rate": 2.343161856963614e-06,
+      "loss": 1.7952,
+      "step": 24410
+    },
+    {
+      "epoch": 7.66,
+      "learning_rate": 2.340025094102886e-06,
+      "loss": 1.8013,
+      "step": 24420
+    },
+    {
+      "epoch": 7.66,
+      "learning_rate": 2.3368883312421584e-06,
+      "loss": 1.775,
+      "step": 24430
+    },
+    {
+      "epoch": 7.67,
+      "learning_rate": 2.3337515683814304e-06,
+      "loss": 1.8962,
+      "step": 24440
+    },
+    {
+      "epoch": 7.67,
+      "learning_rate": 2.330614805520703e-06,
+      "loss": 1.8714,
+      "step": 24450
+    },
+    {
+      "epoch": 7.67,
+      "learning_rate": 2.3274780426599754e-06,
+      "loss": 1.8892,
+      "step": 24460
+    },
+    {
+      "epoch": 7.68,
+      "learning_rate": 2.3243412797992474e-06,
+      "loss": 1.8778,
+      "step": 24470
+    },
+    {
+      "epoch": 7.68,
+      "learning_rate": 2.32120451693852e-06,
+      "loss": 1.9277,
+      "step": 24480
+    },
+    {
+      "epoch": 7.68,
+      "learning_rate": 2.318067754077792e-06,
+      "loss": 1.8388,
+      "step": 24490
+    },
+    {
+      "epoch": 7.69,
+      "learning_rate": 2.3149309912170644e-06,
+      "loss": 1.7903,
+      "step": 24500
+    },
+    {
+      "epoch": 7.69,
+      "learning_rate": 2.3117942283563365e-06,
+      "loss": 1.8229,
+      "step": 24510
+    },
+    {
+      "epoch": 7.69,
+      "learning_rate": 2.3086574654956085e-06,
+      "loss": 1.8884,
+      "step": 24520
+    },
+    {
+      "epoch": 7.69,
+      "learning_rate": 2.305520702634881e-06,
+      "loss": 1.8402,
+      "step": 24530
+    },
+    {
+      "epoch": 7.7,
+      "learning_rate": 2.302383939774153e-06,
+      "loss": 1.9046,
+      "step": 24540
+    },
+    {
+      "epoch": 7.7,
+      "learning_rate": 2.2992471769134255e-06,
+      "loss": 1.9163,
+      "step": 24550
+    },
+    {
+      "epoch": 7.7,
+      "learning_rate": 2.2961104140526975e-06,
+      "loss": 1.8306,
+      "step": 24560
+    },
+    {
+      "epoch": 7.71,
+      "learning_rate": 2.29297365119197e-06,
+      "loss": 1.8335,
+      "step": 24570
+    },
+    {
+      "epoch": 7.71,
+      "learning_rate": 2.2898368883312425e-06,
+      "loss": 1.8506,
+      "step": 24580
+    },
+    {
+      "epoch": 7.71,
+      "learning_rate": 2.2867001254705145e-06,
+      "loss": 1.9307,
+      "step": 24590
+    },
+    {
+      "epoch": 7.72,
+      "learning_rate": 2.283563362609787e-06,
+      "loss": 1.8544,
+      "step": 24600
+    },
+    {
+      "epoch": 7.72,
+      "learning_rate": 2.280426599749059e-06,
+      "loss": 1.7701,
+      "step": 24610
+    },
+    {
+      "epoch": 7.72,
+      "learning_rate": 2.2772898368883315e-06,
+      "loss": 1.8246,
+      "step": 24620
+    },
+    {
+      "epoch": 7.73,
+      "learning_rate": 2.274153074027604e-06,
+      "loss": 1.804,
+      "step": 24630
+    },
+    {
+      "epoch": 7.73,
+      "learning_rate": 2.271016311166876e-06,
+      "loss": 1.8989,
+      "step": 24640
+    },
+    {
+      "epoch": 7.73,
+      "learning_rate": 2.2678795483061485e-06,
+      "loss": 1.8234,
+      "step": 24650
+    },
+    {
+      "epoch": 7.74,
+      "learning_rate": 2.2647427854454205e-06,
+      "loss": 1.8747,
+      "step": 24660
+    },
+    {
+      "epoch": 7.74,
+      "learning_rate": 2.261606022584693e-06,
+      "loss": 1.8087,
+      "step": 24670
+    },
+    {
+      "epoch": 7.74,
+      "learning_rate": 2.258469259723965e-06,
+      "loss": 1.9062,
+      "step": 24680
+    },
+    {
+      "epoch": 7.74,
+      "learning_rate": 2.2553324968632375e-06,
+      "loss": 1.8029,
+      "step": 24690
+    },
+    {
+      "epoch": 7.75,
+      "learning_rate": 2.2521957340025095e-06,
+      "loss": 1.8597,
+      "step": 24700
+    },
+    {
+      "epoch": 7.75,
+      "learning_rate": 2.2490589711417816e-06,
+      "loss": 1.742,
+      "step": 24710
+    },
+    {
+      "epoch": 7.75,
+      "learning_rate": 2.245922208281054e-06,
+      "loss": 1.9272,
+      "step": 24720
+    },
+    {
+      "epoch": 7.76,
+      "learning_rate": 2.242785445420326e-06,
+      "loss": 1.7777,
+      "step": 24730
+    },
+    {
+      "epoch": 7.76,
+      "learning_rate": 2.2396486825595986e-06,
+      "loss": 1.8376,
+      "step": 24740
+    },
+    {
+      "epoch": 7.76,
+      "learning_rate": 2.236511919698871e-06,
+      "loss": 1.847,
+      "step": 24750
+    },
+    {
+      "epoch": 7.77,
+      "learning_rate": 2.233375156838143e-06,
+      "loss": 1.8417,
+      "step": 24760
+    },
+    {
+      "epoch": 7.77,
+      "learning_rate": 2.2302383939774155e-06,
+      "loss": 1.894,
+      "step": 24770
+    },
+    {
+      "epoch": 7.77,
+      "learning_rate": 2.2271016311166876e-06,
+      "loss": 1.7936,
+      "step": 24780
+    },
+    {
+      "epoch": 7.78,
+      "learning_rate": 2.22396486825596e-06,
+      "loss": 1.8742,
+      "step": 24790
+    },
+    {
+      "epoch": 7.78,
+      "learning_rate": 2.2208281053952325e-06,
+      "loss": 1.8119,
+      "step": 24800
+    },
+    {
+      "epoch": 7.78,
+      "learning_rate": 2.2176913425345046e-06,
+      "loss": 1.8426,
+      "step": 24810
+    },
+    {
+      "epoch": 7.79,
+      "learning_rate": 2.214554579673777e-06,
+      "loss": 1.9206,
+      "step": 24820
+    },
+    {
+      "epoch": 7.79,
+      "learning_rate": 2.211417816813049e-06,
+      "loss": 1.811,
+      "step": 24830
+    },
+    {
+      "epoch": 7.79,
+      "learning_rate": 2.2082810539523216e-06,
+      "loss": 1.8007,
+      "step": 24840
+    },
+    {
+      "epoch": 7.79,
+      "learning_rate": 2.2051442910915936e-06,
+      "loss": 1.8859,
+      "step": 24850
+    },
+    {
+      "epoch": 7.8,
+      "learning_rate": 2.202007528230866e-06,
+      "loss": 1.8268,
+      "step": 24860
+    },
+    {
+      "epoch": 7.8,
+      "learning_rate": 2.198870765370138e-06,
+      "loss": 1.852,
+      "step": 24870
+    },
+    {
+      "epoch": 7.8,
+      "learning_rate": 2.1957340025094106e-06,
+      "loss": 1.7632,
+      "step": 24880
+    },
+    {
+      "epoch": 7.81,
+      "learning_rate": 2.1925972396486826e-06,
+      "loss": 1.8561,
+      "step": 24890
+    },
+    {
+      "epoch": 7.81,
+      "learning_rate": 2.189460476787955e-06,
+      "loss": 1.817,
+      "step": 24900
+    },
+    {
+      "epoch": 7.81,
+      "learning_rate": 2.186323713927227e-06,
+      "loss": 1.792,
+      "step": 24910
+    },
+    {
+      "epoch": 7.82,
+      "learning_rate": 2.1831869510664996e-06,
+      "loss": 1.8024,
+      "step": 24920
+    },
+    {
+      "epoch": 7.82,
+      "learning_rate": 2.1800501882057717e-06,
+      "loss": 1.8756,
+      "step": 24930
+    },
+    {
+      "epoch": 7.82,
+      "learning_rate": 2.176913425345044e-06,
+      "loss": 1.8239,
+      "step": 24940
+    },
+    {
+      "epoch": 7.83,
+      "learning_rate": 2.173776662484316e-06,
+      "loss": 1.846,
+      "step": 24950
+    },
+    {
+      "epoch": 7.83,
+      "learning_rate": 2.1706398996235886e-06,
+      "loss": 1.8936,
+      "step": 24960
+    },
+    {
+      "epoch": 7.83,
+      "learning_rate": 2.167503136762861e-06,
+      "loss": 1.9536,
+      "step": 24970
+    },
+    {
+      "epoch": 7.84,
+      "learning_rate": 2.164366373902133e-06,
+      "loss": 1.8185,
+      "step": 24980
+    },
+    {
+      "epoch": 7.84,
+      "learning_rate": 2.1612296110414056e-06,
+      "loss": 1.8076,
+      "step": 24990
+    },
+    {
+      "epoch": 7.84,
+      "learning_rate": 2.1580928481806777e-06,
+      "loss": 1.9388,
+      "step": 25000
+    },
+    {
+      "epoch": 7.85,
+      "learning_rate": 2.15495608531995e-06,
+      "loss": 1.7815,
+      "step": 25010
+    },
+    {
+      "epoch": 7.85,
+      "learning_rate": 2.151819322459222e-06,
+      "loss": 1.8411,
+      "step": 25020
+    },
+    {
+      "epoch": 7.85,
+      "learning_rate": 2.1486825595984946e-06,
+      "loss": 1.858,
+      "step": 25030
+    },
+    {
+      "epoch": 7.85,
+      "learning_rate": 2.1455457967377667e-06,
+      "loss": 1.8114,
+      "step": 25040
+    },
+    {
+      "epoch": 7.86,
+      "learning_rate": 2.142409033877039e-06,
+      "loss": 1.8516,
+      "step": 25050
+    },
+    {
+      "epoch": 7.86,
+      "learning_rate": 2.139272271016311e-06,
+      "loss": 1.8683,
+      "step": 25060
+    },
+    {
+      "epoch": 7.86,
+      "learning_rate": 2.1361355081555837e-06,
+      "loss": 1.9183,
+      "step": 25070
+    },
+    {
+      "epoch": 7.87,
+      "learning_rate": 2.1329987452948557e-06,
+      "loss": 1.8695,
+      "step": 25080
+    },
+    {
+      "epoch": 7.87,
+      "learning_rate": 2.129861982434128e-06,
+      "loss": 1.8753,
+      "step": 25090
+    },
+    {
+      "epoch": 7.87,
+      "learning_rate": 2.1267252195734002e-06,
+      "loss": 1.8406,
+      "step": 25100
+    },
+    {
+      "epoch": 7.88,
+      "learning_rate": 2.1235884567126727e-06,
+      "loss": 1.8115,
+      "step": 25110
+    },
+    {
+      "epoch": 7.88,
+      "learning_rate": 2.120451693851945e-06,
+      "loss": 1.9119,
+      "step": 25120
+    },
+    {
+      "epoch": 7.88,
+      "learning_rate": 2.1173149309912172e-06,
+      "loss": 1.7686,
+      "step": 25130
+    },
+    {
+      "epoch": 7.89,
+      "learning_rate": 2.1141781681304897e-06,
+      "loss": 1.9034,
+      "step": 25140
+    },
+    {
+      "epoch": 7.89,
+      "learning_rate": 2.1110414052697617e-06,
+      "loss": 1.8498,
+      "step": 25150
+    },
+    {
+      "epoch": 7.89,
+      "learning_rate": 2.107904642409034e-06,
+      "loss": 1.8183,
+      "step": 25160
+    },
+    {
+      "epoch": 7.9,
+      "learning_rate": 2.1047678795483062e-06,
+      "loss": 1.8255,
+      "step": 25170
+    },
+    {
+      "epoch": 7.9,
+      "learning_rate": 2.1016311166875787e-06,
+      "loss": 1.8853,
+      "step": 25180
+    },
+    {
+      "epoch": 7.9,
+      "learning_rate": 2.0984943538268508e-06,
+      "loss": 1.8643,
+      "step": 25190
+    },
+    {
+      "epoch": 7.9,
+      "learning_rate": 2.0953575909661232e-06,
+      "loss": 1.8537,
+      "step": 25200
+    },
+    {
+      "epoch": 7.91,
+      "learning_rate": 2.0922208281053953e-06,
+      "loss": 1.8535,
+      "step": 25210
+    },
+    {
+      "epoch": 7.91,
+      "learning_rate": 2.0890840652446677e-06,
+      "loss": 1.8192,
+      "step": 25220
+    },
+    {
+      "epoch": 7.91,
+      "learning_rate": 2.0859473023839398e-06,
+      "loss": 1.9235,
+      "step": 25230
+    },
+    {
+      "epoch": 7.92,
+      "learning_rate": 2.0828105395232122e-06,
+      "loss": 1.7911,
+      "step": 25240
+    },
+    {
+      "epoch": 7.92,
+      "learning_rate": 2.0796737766624843e-06,
+      "loss": 1.8031,
+      "step": 25250
+    },
+    {
+      "epoch": 7.92,
+      "learning_rate": 2.0765370138017568e-06,
+      "loss": 1.8374,
+      "step": 25260
+    },
+    {
+      "epoch": 7.93,
+      "learning_rate": 2.073400250941029e-06,
+      "loss": 1.8302,
+      "step": 25270
+    },
+    {
+      "epoch": 7.93,
+      "learning_rate": 2.0702634880803013e-06,
+      "loss": 1.7694,
+      "step": 25280
+    },
+    {
+      "epoch": 7.93,
+      "learning_rate": 2.0671267252195737e-06,
+      "loss": 1.8412,
+      "step": 25290
+    },
+    {
+      "epoch": 7.94,
+      "learning_rate": 2.063989962358846e-06,
+      "loss": 1.8374,
+      "step": 25300
+    },
+    {
+      "epoch": 7.94,
+      "learning_rate": 2.0608531994981183e-06,
+      "loss": 1.8052,
+      "step": 25310
+    },
+    {
+      "epoch": 7.94,
+      "learning_rate": 2.0577164366373903e-06,
+      "loss": 1.826,
+      "step": 25320
+    },
+    {
+      "epoch": 7.95,
+      "learning_rate": 2.0545796737766628e-06,
+      "loss": 1.884,
+      "step": 25330
+    },
+    {
+      "epoch": 7.95,
+      "learning_rate": 2.0514429109159352e-06,
+      "loss": 1.8261,
+      "step": 25340
+    },
+    {
+      "epoch": 7.95,
+      "learning_rate": 2.0483061480552073e-06,
+      "loss": 1.8037,
+      "step": 25350
+    },
+    {
+      "epoch": 7.95,
+      "learning_rate": 2.0451693851944798e-06,
+      "loss": 1.8361,
+      "step": 25360
+    },
+    {
+      "epoch": 7.96,
+      "learning_rate": 2.042032622333752e-06,
+      "loss": 1.789,
+      "step": 25370
+    },
+    {
+      "epoch": 7.96,
+      "learning_rate": 2.038895859473024e-06,
+      "loss": 1.8791,
+      "step": 25380
+    },
+    {
+      "epoch": 7.96,
+      "learning_rate": 2.0357590966122963e-06,
+      "loss": 1.7095,
+      "step": 25390
+    },
+    {
+      "epoch": 7.97,
+      "learning_rate": 2.0326223337515684e-06,
+      "loss": 1.8753,
+      "step": 25400
+    },
+    {
+      "epoch": 7.97,
+      "learning_rate": 2.029485570890841e-06,
+      "loss": 1.6906,
+      "step": 25410
+    },
+    {
+      "epoch": 7.97,
+      "learning_rate": 2.026348808030113e-06,
+      "loss": 1.8425,
+      "step": 25420
+    },
+    {
+      "epoch": 7.98,
+      "learning_rate": 2.0232120451693853e-06,
+      "loss": 1.8265,
+      "step": 25430
+    },
+    {
+      "epoch": 7.98,
+      "learning_rate": 2.0200752823086574e-06,
+      "loss": 1.8445,
+      "step": 25440
+    },
+    {
+      "epoch": 7.98,
+      "learning_rate": 2.01693851944793e-06,
+      "loss": 1.901,
+      "step": 25450
+    },
+    {
+      "epoch": 7.99,
+      "learning_rate": 2.0138017565872023e-06,
+      "loss": 1.855,
+      "step": 25460
+    },
+    {
+      "epoch": 7.99,
+      "learning_rate": 2.0106649937264744e-06,
+      "loss": 1.8212,
+      "step": 25470
+    },
+    {
+      "epoch": 7.99,
+      "learning_rate": 2.007528230865747e-06,
+      "loss": 1.8864,
+      "step": 25480
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 2.004391468005019e-06,
+      "loss": 1.8775,
+      "step": 25490
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 2.0012547051442913e-06,
+      "loss": 1.8761,
+      "step": 25500
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 1.8085027933120728,
+      "eval_runtime": 13.6113,
+      "eval_samples_per_second": 73.468,
+      "eval_steps_per_second": 4.628,
+      "step": 25504
+    },
+    {
+      "epoch": 8.0,
+      "learning_rate": 1.998117942283564e-06,
+      "loss": 1.8419,
+      "step": 25510
+    },
+    {
+      "epoch": 8.01,
+      "learning_rate": 1.994981179422836e-06,
+      "loss": 1.8554,
+      "step": 25520
+    },
+    {
+      "epoch": 8.01,
+      "learning_rate": 1.9918444165621083e-06,
+      "loss": 1.8783,
+      "step": 25530
+    },
+    {
+      "epoch": 8.01,
+      "learning_rate": 1.9887076537013804e-06,
+      "loss": 1.8445,
+      "step": 25540
+    },
+    {
+      "epoch": 8.01,
+      "learning_rate": 1.985570890840653e-06,
+      "loss": 1.8647,
+      "step": 25550
+    },
+    {
+      "epoch": 8.02,
+      "learning_rate": 1.982434127979925e-06,
+      "loss": 1.7844,
+      "step": 25560
+    },
+    {
+      "epoch": 8.02,
+      "learning_rate": 1.979297365119197e-06,
+      "loss": 1.8327,
+      "step": 25570
+    },
+    {
+      "epoch": 8.02,
+      "learning_rate": 1.9761606022584694e-06,
+      "loss": 1.8807,
+      "step": 25580
+    },
+    {
+      "epoch": 8.03,
+      "learning_rate": 1.9730238393977414e-06,
+      "loss": 1.8707,
+      "step": 25590
+    },
+    {
+      "epoch": 8.03,
+      "learning_rate": 1.969887076537014e-06,
+      "loss": 1.8531,
+      "step": 25600
+    },
+    {
+      "epoch": 8.03,
+      "learning_rate": 1.966750313676286e-06,
+      "loss": 1.8546,
+      "step": 25610
+    },
+    {
+      "epoch": 8.04,
+      "learning_rate": 1.9636135508155584e-06,
+      "loss": 1.8613,
+      "step": 25620
+    },
+    {
+      "epoch": 8.04,
+      "learning_rate": 1.960476787954831e-06,
+      "loss": 1.8682,
+      "step": 25630
+    },
+    {
+      "epoch": 8.04,
+      "learning_rate": 1.957340025094103e-06,
+      "loss": 1.8709,
+      "step": 25640
+    },
+    {
+      "epoch": 8.05,
+      "learning_rate": 1.9542032622333754e-06,
+      "loss": 1.8032,
+      "step": 25650
+    },
+    {
+      "epoch": 8.05,
+      "learning_rate": 1.9510664993726475e-06,
+      "loss": 1.8855,
+      "step": 25660
+    },
+    {
+      "epoch": 8.05,
+      "learning_rate": 1.94792973651192e-06,
+      "loss": 1.8255,
+      "step": 25670
+    },
+    {
+      "epoch": 8.06,
+      "learning_rate": 1.9447929736511924e-06,
+      "loss": 1.8782,
+      "step": 25680
+    },
+    {
+      "epoch": 8.06,
+      "learning_rate": 1.9416562107904644e-06,
+      "loss": 1.8759,
+      "step": 25690
+    },
+    {
+      "epoch": 8.06,
+      "learning_rate": 1.938519447929737e-06,
+      "loss": 1.8345,
+      "step": 25700
+    },
+    {
+      "epoch": 8.06,
+      "learning_rate": 1.935382685069009e-06,
+      "loss": 1.8346,
+      "step": 25710
+    },
+    {
+      "epoch": 8.07,
+      "learning_rate": 1.9322459222082814e-06,
+      "loss": 1.8408,
+      "step": 25720
+    },
+    {
+      "epoch": 8.07,
+      "learning_rate": 1.9291091593475535e-06,
+      "loss": 1.848,
+      "step": 25730
+    },
+    {
+      "epoch": 8.07,
+      "learning_rate": 1.9259723964868255e-06,
+      "loss": 1.8057,
+      "step": 25740
+    },
+    {
+      "epoch": 8.08,
+      "learning_rate": 1.922835633626098e-06,
+      "loss": 1.7701,
+      "step": 25750
+    },
+    {
+      "epoch": 8.08,
+      "learning_rate": 1.91969887076537e-06,
+      "loss": 1.8032,
+      "step": 25760
+    },
+    {
+      "epoch": 8.08,
+      "learning_rate": 1.9165621079046425e-06,
+      "loss": 1.8393,
+      "step": 25770
+    },
+    {
+      "epoch": 8.09,
+      "learning_rate": 1.913425345043915e-06,
+      "loss": 1.7831,
+      "step": 25780
+    },
+    {
+      "epoch": 8.09,
+      "learning_rate": 1.910288582183187e-06,
+      "loss": 1.7805,
+      "step": 25790
+    },
+    {
+      "epoch": 8.09,
+      "learning_rate": 1.9071518193224595e-06,
+      "loss": 1.8198,
+      "step": 25800
+    },
+    {
+      "epoch": 8.1,
+      "learning_rate": 1.9040150564617315e-06,
+      "loss": 1.7913,
+      "step": 25810
+    },
+    {
+      "epoch": 8.1,
+      "learning_rate": 1.900878293601004e-06,
+      "loss": 1.7361,
+      "step": 25820
+    },
+    {
+      "epoch": 8.1,
+      "learning_rate": 1.897741530740276e-06,
+      "loss": 1.8002,
+      "step": 25830
+    },
+    {
+      "epoch": 8.11,
+      "learning_rate": 1.8946047678795485e-06,
+      "loss": 1.8309,
+      "step": 25840
+    },
+    {
+      "epoch": 8.11,
+      "learning_rate": 1.8914680050188208e-06,
+      "loss": 1.8588,
+      "step": 25850
+    },
+    {
+      "epoch": 8.11,
+      "learning_rate": 1.888331242158093e-06,
+      "loss": 1.8685,
+      "step": 25860
+    },
+    {
+      "epoch": 8.11,
+      "learning_rate": 1.8851944792973653e-06,
+      "loss": 1.7893,
+      "step": 25870
+    },
+    {
+      "epoch": 8.12,
+      "learning_rate": 1.8820577164366375e-06,
+      "loss": 1.7893,
+      "step": 25880
+    },
+    {
+      "epoch": 8.12,
+      "learning_rate": 1.8789209535759098e-06,
+      "loss": 1.9111,
+      "step": 25890
+    },
+    {
+      "epoch": 8.12,
+      "learning_rate": 1.8757841907151822e-06,
+      "loss": 1.8303,
+      "step": 25900
+    },
+    {
+      "epoch": 8.13,
+      "learning_rate": 1.8726474278544543e-06,
+      "loss": 1.8459,
+      "step": 25910
+    },
+    {
+      "epoch": 8.13,
+      "learning_rate": 1.8695106649937268e-06,
+      "loss": 1.8117,
+      "step": 25920
+    },
+    {
+      "epoch": 8.13,
+      "learning_rate": 1.8663739021329988e-06,
+      "loss": 1.7602,
+      "step": 25930
+    },
+    {
+      "epoch": 8.14,
+      "learning_rate": 1.8632371392722713e-06,
+      "loss": 1.8715,
+      "step": 25940
+    },
+    {
+      "epoch": 8.14,
+      "learning_rate": 1.8601003764115435e-06,
+      "loss": 1.8285,
+      "step": 25950
+    },
+    {
+      "epoch": 8.14,
+      "learning_rate": 1.8569636135508156e-06,
+      "loss": 1.7996,
+      "step": 25960
+    },
+    {
+      "epoch": 8.15,
+      "learning_rate": 1.853826850690088e-06,
+      "loss": 1.8965,
+      "step": 25970
+    },
+    {
+      "epoch": 8.15,
+      "learning_rate": 1.85069008782936e-06,
+      "loss": 1.7853,
+      "step": 25980
+    },
+    {
+      "epoch": 8.15,
+      "learning_rate": 1.8475533249686326e-06,
+      "loss": 1.8037,
+      "step": 25990
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 1.844416562107905e-06,
+      "loss": 1.786,
+      "step": 26000
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 1.841279799247177e-06,
+      "loss": 1.9019,
+      "step": 26010
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 1.8381430363864493e-06,
+      "loss": 1.9225,
+      "step": 26020
+    },
+    {
+      "epoch": 8.16,
+      "learning_rate": 1.8350062735257216e-06,
+      "loss": 1.8514,
+      "step": 26030
+    },
+    {
+      "epoch": 8.17,
+      "learning_rate": 1.8318695106649938e-06,
+      "loss": 1.8365,
+      "step": 26040
+    },
+    {
+      "epoch": 8.17,
+      "learning_rate": 1.828732747804266e-06,
+      "loss": 1.7828,
+      "step": 26050
+    },
+    {
+      "epoch": 8.17,
+      "learning_rate": 1.8255959849435384e-06,
+      "loss": 1.8321,
+      "step": 26060
+    },
+    {
+      "epoch": 8.18,
+      "learning_rate": 1.8224592220828108e-06,
+      "loss": 1.7532,
+      "step": 26070
+    },
+    {
+      "epoch": 8.18,
+      "learning_rate": 1.8193224592220829e-06,
+      "loss": 1.8928,
+      "step": 26080
+    },
+    {
+      "epoch": 8.18,
+      "learning_rate": 1.8161856963613553e-06,
+      "loss": 1.8561,
+      "step": 26090
+    },
+    {
+      "epoch": 8.19,
+      "learning_rate": 1.8130489335006274e-06,
+      "loss": 1.9077,
+      "step": 26100
+    },
+    {
+      "epoch": 8.19,
+      "learning_rate": 1.8099121706398999e-06,
+      "loss": 1.815,
+      "step": 26110
+    },
+    {
+      "epoch": 8.19,
+      "learning_rate": 1.8067754077791721e-06,
+      "loss": 1.7752,
+      "step": 26120
+    },
+    {
+      "epoch": 8.2,
+      "learning_rate": 1.8036386449184444e-06,
+      "loss": 1.8435,
+      "step": 26130
+    },
+    {
+      "epoch": 8.2,
+      "learning_rate": 1.8005018820577166e-06,
+      "loss": 1.8642,
+      "step": 26140
+    },
+    {
+      "epoch": 8.2,
+      "learning_rate": 1.7973651191969887e-06,
+      "loss": 1.807,
+      "step": 26150
+    },
+    {
+      "epoch": 8.21,
+      "learning_rate": 1.7942283563362611e-06,
+      "loss": 1.8056,
+      "step": 26160
+    },
+    {
+      "epoch": 8.21,
+      "learning_rate": 1.7910915934755336e-06,
+      "loss": 1.8102,
+      "step": 26170
+    },
+    {
+      "epoch": 8.21,
+      "learning_rate": 1.7879548306148056e-06,
+      "loss": 1.9075,
+      "step": 26180
+    },
+    {
+      "epoch": 8.22,
+      "learning_rate": 1.7848180677540781e-06,
+      "loss": 1.7885,
+      "step": 26190
+    },
+    {
+      "epoch": 8.22,
+      "learning_rate": 1.7816813048933502e-06,
+      "loss": 1.8633,
+      "step": 26200
+    },
+    {
+      "epoch": 8.22,
+      "learning_rate": 1.7785445420326224e-06,
+      "loss": 1.8132,
+      "step": 26210
+    },
+    {
+      "epoch": 8.22,
+      "learning_rate": 1.7754077791718949e-06,
+      "loss": 1.8301,
+      "step": 26220
+    },
+    {
+      "epoch": 8.23,
+      "learning_rate": 1.772271016311167e-06,
+      "loss": 1.8868,
+      "step": 26230
+    },
+    {
+      "epoch": 8.23,
+      "learning_rate": 1.7691342534504394e-06,
+      "loss": 1.8068,
+      "step": 26240
+    },
+    {
+      "epoch": 8.23,
+      "learning_rate": 1.7659974905897114e-06,
+      "loss": 1.816,
+      "step": 26250
+    },
+    {
+      "epoch": 8.24,
+      "learning_rate": 1.762860727728984e-06,
+      "loss": 1.7982,
+      "step": 26260
+    },
+    {
+      "epoch": 8.24,
+      "learning_rate": 1.759723964868256e-06,
+      "loss": 1.8405,
+      "step": 26270
+    },
+    {
+      "epoch": 8.24,
+      "learning_rate": 1.7565872020075284e-06,
+      "loss": 1.8382,
+      "step": 26280
+    },
+    {
+      "epoch": 8.25,
+      "learning_rate": 1.7534504391468007e-06,
+      "loss": 1.7793,
+      "step": 26290
+    },
+    {
+      "epoch": 8.25,
+      "learning_rate": 1.750313676286073e-06,
+      "loss": 1.8145,
+      "step": 26300
+    },
+    {
+      "epoch": 8.25,
+      "learning_rate": 1.7471769134253452e-06,
+      "loss": 1.8759,
+      "step": 26310
+    },
+    {
+      "epoch": 8.26,
+      "learning_rate": 1.7440401505646172e-06,
+      "loss": 1.8452,
+      "step": 26320
+    },
+    {
+      "epoch": 8.26,
+      "learning_rate": 1.7409033877038897e-06,
+      "loss": 1.8489,
+      "step": 26330
+    },
+    {
+      "epoch": 8.26,
+      "learning_rate": 1.7377666248431622e-06,
+      "loss": 1.8525,
+      "step": 26340
+    },
+    {
+      "epoch": 8.27,
+      "learning_rate": 1.7346298619824342e-06,
+      "loss": 1.8752,
+      "step": 26350
+    },
+    {
+      "epoch": 8.27,
+      "learning_rate": 1.7314930991217067e-06,
+      "loss": 1.8404,
+      "step": 26360
+    },
+    {
+      "epoch": 8.27,
+      "learning_rate": 1.7283563362609787e-06,
+      "loss": 1.8458,
+      "step": 26370
+    },
+    {
+      "epoch": 8.27,
+      "learning_rate": 1.725219573400251e-06,
+      "loss": 1.8859,
+      "step": 26380
+    },
+    {
+      "epoch": 8.28,
+      "learning_rate": 1.7220828105395235e-06,
+      "loss": 1.7915,
+      "step": 26390
+    },
+    {
+      "epoch": 8.28,
+      "learning_rate": 1.7189460476787955e-06,
+      "loss": 1.9167,
+      "step": 26400
+    },
+    {
+      "epoch": 8.28,
+      "learning_rate": 1.715809284818068e-06,
+      "loss": 1.8609,
+      "step": 26410
+    },
+    {
+      "epoch": 8.29,
+      "learning_rate": 1.71267252195734e-06,
+      "loss": 1.9143,
+      "step": 26420
+    },
+    {
+      "epoch": 8.29,
+      "learning_rate": 1.7095357590966125e-06,
+      "loss": 1.809,
+      "step": 26430
+    },
+    {
+      "epoch": 8.29,
+      "learning_rate": 1.7063989962358847e-06,
+      "loss": 1.7668,
+      "step": 26440
+    },
+    {
+      "epoch": 8.3,
+      "learning_rate": 1.703262233375157e-06,
+      "loss": 1.8096,
+      "step": 26450
+    },
+    {
+      "epoch": 8.3,
+      "learning_rate": 1.7001254705144293e-06,
+      "loss": 1.8033,
+      "step": 26460
+    },
+    {
+      "epoch": 8.3,
+      "learning_rate": 1.6969887076537015e-06,
+      "loss": 1.8294,
+      "step": 26470
+    },
+    {
+      "epoch": 8.31,
+      "learning_rate": 1.6938519447929738e-06,
+      "loss": 1.7282,
+      "step": 26480
+    },
+    {
+      "epoch": 8.31,
+      "learning_rate": 1.690715181932246e-06,
+      "loss": 1.8274,
+      "step": 26490
+    },
+    {
+      "epoch": 8.31,
+      "learning_rate": 1.6875784190715183e-06,
+      "loss": 1.811,
+      "step": 26500
+    },
+    {
+      "epoch": 8.32,
+      "learning_rate": 1.6844416562107908e-06,
+      "loss": 1.8093,
+      "step": 26510
+    },
+    {
+      "epoch": 8.32,
+      "learning_rate": 1.6813048933500628e-06,
+      "loss": 1.8854,
+      "step": 26520
+    },
+    {
+      "epoch": 8.32,
+      "learning_rate": 1.6781681304893353e-06,
+      "loss": 1.8327,
+      "step": 26530
+    },
+    {
+      "epoch": 8.32,
+      "learning_rate": 1.6750313676286073e-06,
+      "loss": 1.8446,
+      "step": 26540
+    },
+    {
+      "epoch": 8.33,
+      "learning_rate": 1.6718946047678798e-06,
+      "loss": 1.8623,
+      "step": 26550
+    },
+    {
+      "epoch": 8.33,
+      "learning_rate": 1.668757841907152e-06,
+      "loss": 1.8318,
+      "step": 26560
+    },
+    {
+      "epoch": 8.33,
+      "learning_rate": 1.665621079046424e-06,
+      "loss": 1.7852,
+      "step": 26570
+    },
+    {
+      "epoch": 8.34,
+      "learning_rate": 1.6624843161856965e-06,
+      "loss": 1.7953,
+      "step": 26580
+    },
+    {
+      "epoch": 8.34,
+      "learning_rate": 1.6593475533249686e-06,
+      "loss": 1.9024,
+      "step": 26590
+    },
+    {
+      "epoch": 8.34,
+      "learning_rate": 1.656210790464241e-06,
+      "loss": 1.838,
+      "step": 26600
+    },
+    {
+      "epoch": 8.35,
+      "learning_rate": 1.6530740276035135e-06,
+      "loss": 1.9028,
+      "step": 26610
+    },
+    {
+      "epoch": 8.35,
+      "learning_rate": 1.6499372647427856e-06,
+      "loss": 1.8269,
+      "step": 26620
+    },
+    {
+      "epoch": 8.35,
+      "learning_rate": 1.6468005018820578e-06,
+      "loss": 1.8538,
+      "step": 26630
+    },
+    {
+      "epoch": 8.36,
+      "learning_rate": 1.64366373902133e-06,
+      "loss": 1.8703,
+      "step": 26640
+    },
+    {
+      "epoch": 8.36,
+      "learning_rate": 1.6405269761606023e-06,
+      "loss": 1.769,
+      "step": 26650
+    },
+    {
+      "epoch": 8.36,
+      "learning_rate": 1.6373902132998748e-06,
+      "loss": 1.7672,
+      "step": 26660
+    },
+    {
+      "epoch": 8.37,
+      "learning_rate": 1.6342534504391469e-06,
+      "loss": 1.8547,
+      "step": 26670
+    },
+    {
+      "epoch": 8.37,
+      "learning_rate": 1.6311166875784193e-06,
+      "loss": 1.8411,
+      "step": 26680
+    },
+    {
+      "epoch": 8.37,
+      "learning_rate": 1.6279799247176914e-06,
+      "loss": 1.813,
+      "step": 26690
+    },
+    {
+      "epoch": 8.38,
+      "learning_rate": 1.6248431618569638e-06,
+      "loss": 1.9123,
+      "step": 26700
+    },
+    {
+      "epoch": 8.38,
+      "learning_rate": 1.6217063989962359e-06,
+      "loss": 1.844,
+      "step": 26710
+    },
+    {
+      "epoch": 8.38,
+      "learning_rate": 1.6185696361355084e-06,
+      "loss": 1.8704,
+      "step": 26720
+    },
+    {
+      "epoch": 8.38,
+      "learning_rate": 1.6154328732747806e-06,
+      "loss": 1.7696,
+      "step": 26730
+    },
+    {
+      "epoch": 8.39,
+      "learning_rate": 1.6122961104140529e-06,
+      "loss": 1.8558,
+      "step": 26740
+    },
+    {
+      "epoch": 8.39,
+      "learning_rate": 1.6091593475533251e-06,
+      "loss": 1.8155,
+      "step": 26750
+    },
+    {
+      "epoch": 8.39,
+      "learning_rate": 1.6060225846925972e-06,
+      "loss": 1.8761,
+      "step": 26760
+    },
+    {
+      "epoch": 8.4,
+      "learning_rate": 1.6028858218318696e-06,
+      "loss": 1.7798,
+      "step": 26770
+    },
+    {
+      "epoch": 8.4,
+      "learning_rate": 1.5997490589711421e-06,
+      "loss": 1.8079,
+      "step": 26780
+    },
+    {
+      "epoch": 8.4,
+      "learning_rate": 1.5966122961104142e-06,
+      "loss": 1.8161,
+      "step": 26790
+    },
+    {
+      "epoch": 8.41,
+      "learning_rate": 1.5934755332496864e-06,
+      "loss": 1.8797,
+      "step": 26800
+    },
+    {
+      "epoch": 8.41,
+      "learning_rate": 1.5903387703889587e-06,
+      "loss": 1.8012,
+      "step": 26810
+    },
+    {
+      "epoch": 8.41,
+      "learning_rate": 1.587202007528231e-06,
+      "loss": 1.865,
+      "step": 26820
+    },
+    {
+      "epoch": 8.42,
+      "learning_rate": 1.5840652446675034e-06,
+      "loss": 1.808,
+      "step": 26830
+    },
+    {
+      "epoch": 8.42,
+      "learning_rate": 1.5809284818067754e-06,
+      "loss": 1.8396,
+      "step": 26840
+    },
+    {
+      "epoch": 8.42,
+      "learning_rate": 1.577791718946048e-06,
+      "loss": 1.794,
+      "step": 26850
+    },
+    {
+      "epoch": 8.43,
+      "learning_rate": 1.57465495608532e-06,
+      "loss": 1.8262,
+      "step": 26860
+    },
+    {
+      "epoch": 8.43,
+      "learning_rate": 1.5715181932245924e-06,
+      "loss": 1.8224,
+      "step": 26870
+    },
+    {
+      "epoch": 8.43,
+      "learning_rate": 1.5683814303638647e-06,
+      "loss": 1.8647,
+      "step": 26880
+    },
+    {
+      "epoch": 8.43,
+      "learning_rate": 1.565244667503137e-06,
+      "loss": 1.8389,
+      "step": 26890
+    },
+    {
+      "epoch": 8.44,
+      "learning_rate": 1.5621079046424092e-06,
+      "loss": 1.8384,
+      "step": 26900
+    },
+    {
+      "epoch": 8.44,
+      "learning_rate": 1.5589711417816814e-06,
+      "loss": 1.8201,
+      "step": 26910
+    },
+    {
+      "epoch": 8.44,
+      "learning_rate": 1.5558343789209537e-06,
+      "loss": 1.8713,
+      "step": 26920
+    },
+    {
+      "epoch": 8.45,
+      "learning_rate": 1.5526976160602257e-06,
+      "loss": 1.7515,
+      "step": 26930
+    },
+    {
+      "epoch": 8.45,
+      "learning_rate": 1.5495608531994982e-06,
+      "loss": 1.832,
+      "step": 26940
+    },
+    {
+      "epoch": 8.45,
+      "learning_rate": 1.5464240903387707e-06,
+      "loss": 1.8487,
+      "step": 26950
+    },
+    {
+      "epoch": 8.46,
+      "learning_rate": 1.5432873274780427e-06,
+      "loss": 1.7924,
+      "step": 26960
+    },
+    {
+      "epoch": 8.46,
+      "learning_rate": 1.5401505646173152e-06,
+      "loss": 1.7845,
+      "step": 26970
+    },
+    {
+      "epoch": 8.46,
+      "learning_rate": 1.5370138017565872e-06,
+      "loss": 1.8839,
+      "step": 26980
+    },
+    {
+      "epoch": 8.47,
+      "learning_rate": 1.5338770388958595e-06,
+      "loss": 1.7541,
+      "step": 26990
+    },
+    {
+      "epoch": 8.47,
+      "learning_rate": 1.530740276035132e-06,
+      "loss": 1.7754,
+      "step": 27000
+    },
+    {
+      "epoch": 8.47,
+      "learning_rate": 1.527603513174404e-06,
+      "loss": 1.8529,
+      "step": 27010
+    },
+    {
+      "epoch": 8.48,
+      "learning_rate": 1.5244667503136765e-06,
+      "loss": 1.8047,
+      "step": 27020
+    },
+    {
+      "epoch": 8.48,
+      "learning_rate": 1.5213299874529485e-06,
+      "loss": 1.8239,
+      "step": 27030
+    },
+    {
+      "epoch": 8.48,
+      "learning_rate": 1.518193224592221e-06,
+      "loss": 1.8293,
+      "step": 27040
+    },
+    {
+      "epoch": 8.48,
+      "learning_rate": 1.5150564617314932e-06,
+      "loss": 1.8962,
+      "step": 27050
+    },
+    {
+      "epoch": 8.49,
+      "learning_rate": 1.5119196988707655e-06,
+      "loss": 1.803,
+      "step": 27060
+    },
+    {
+      "epoch": 8.49,
+      "learning_rate": 1.5087829360100378e-06,
+      "loss": 1.819,
+      "step": 27070
+    },
+    {
+      "epoch": 8.49,
+      "learning_rate": 1.50564617314931e-06,
+      "loss": 1.839,
+      "step": 27080
+    },
+    {
+      "epoch": 8.5,
+      "learning_rate": 1.5025094102885823e-06,
+      "loss": 1.8884,
+      "step": 27090
+    },
+    {
+      "epoch": 8.5,
+      "learning_rate": 1.4993726474278547e-06,
+      "loss": 1.9084,
+      "step": 27100
+    },
+    {
+      "epoch": 8.5,
+      "learning_rate": 1.4962358845671268e-06,
+      "loss": 1.8082,
+      "step": 27110
+    },
+    {
+      "epoch": 8.51,
+      "learning_rate": 1.4930991217063993e-06,
+      "loss": 1.7976,
+      "step": 27120
+    },
+    {
+      "epoch": 8.51,
+      "learning_rate": 1.4899623588456713e-06,
+      "loss": 1.815,
+      "step": 27130
+    },
+    {
+      "epoch": 8.51,
+      "learning_rate": 1.4868255959849438e-06,
+      "loss": 1.8745,
+      "step": 27140
+    },
+    {
+      "epoch": 8.52,
+      "learning_rate": 1.4836888331242158e-06,
+      "loss": 1.8226,
+      "step": 27150
+    },
+    {
+      "epoch": 8.52,
+      "learning_rate": 1.4805520702634883e-06,
+      "loss": 1.8518,
+      "step": 27160
+    },
+    {
+      "epoch": 8.52,
+      "learning_rate": 1.4774153074027605e-06,
+      "loss": 1.8528,
+      "step": 27170
+    },
+    {
+      "epoch": 8.53,
+      "learning_rate": 1.4742785445420326e-06,
+      "loss": 1.8632,
+      "step": 27180
+    },
+    {
+      "epoch": 8.53,
+      "learning_rate": 1.471141781681305e-06,
+      "loss": 1.7614,
+      "step": 27190
+    },
+    {
+      "epoch": 8.53,
+      "learning_rate": 1.468005018820577e-06,
+      "loss": 1.7805,
+      "step": 27200
+    },
+    {
+      "epoch": 8.54,
+      "learning_rate": 1.4648682559598496e-06,
+      "loss": 1.8009,
+      "step": 27210
+    },
+    {
+      "epoch": 8.54,
+      "learning_rate": 1.461731493099122e-06,
+      "loss": 1.8522,
+      "step": 27220
+    },
+    {
+      "epoch": 8.54,
+      "learning_rate": 1.458594730238394e-06,
+      "loss": 1.806,
+      "step": 27230
+    },
+    {
+      "epoch": 8.54,
+      "learning_rate": 1.4554579673776663e-06,
+      "loss": 1.8533,
+      "step": 27240
+    },
+    {
+      "epoch": 8.55,
+      "learning_rate": 1.4523212045169386e-06,
+      "loss": 1.9368,
+      "step": 27250
+    },
+    {
+      "epoch": 8.55,
+      "learning_rate": 1.4491844416562109e-06,
+      "loss": 1.8579,
+      "step": 27260
+    },
+    {
+      "epoch": 8.55,
+      "learning_rate": 1.4460476787954833e-06,
+      "loss": 1.8546,
+      "step": 27270
+    },
+    {
+      "epoch": 8.56,
+      "learning_rate": 1.4429109159347554e-06,
+      "loss": 1.7599,
+      "step": 27280
+    },
+    {
+      "epoch": 8.56,
+      "learning_rate": 1.4397741530740278e-06,
+      "loss": 1.9231,
+      "step": 27290
+    },
+    {
+      "epoch": 8.56,
+      "learning_rate": 1.4366373902132999e-06,
+      "loss": 1.8332,
+      "step": 27300
+    },
+    {
+      "epoch": 8.57,
+      "learning_rate": 1.4335006273525723e-06,
+      "loss": 1.7885,
+      "step": 27310
+    },
+    {
+      "epoch": 8.57,
+      "learning_rate": 1.4303638644918446e-06,
+      "loss": 1.849,
+      "step": 27320
+    },
+    {
+      "epoch": 8.57,
+      "learning_rate": 1.4272271016311169e-06,
+      "loss": 1.8045,
+      "step": 27330
+    },
+    {
+      "epoch": 8.58,
+      "learning_rate": 1.4240903387703891e-06,
+      "loss": 1.8928,
+      "step": 27340
+    },
+    {
+      "epoch": 8.58,
+      "learning_rate": 1.4209535759096612e-06,
+      "loss": 1.8827,
+      "step": 27350
+    },
+    {
+      "epoch": 8.58,
+      "learning_rate": 1.4178168130489336e-06,
+      "loss": 1.8549,
+      "step": 27360
+    },
+    {
+      "epoch": 8.59,
+      "learning_rate": 1.4146800501882057e-06,
+      "loss": 1.787,
+      "step": 27370
+    },
+    {
+      "epoch": 8.59,
+      "learning_rate": 1.4115432873274781e-06,
+      "loss": 1.8102,
+      "step": 27380
+    },
+    {
+      "epoch": 8.59,
+      "learning_rate": 1.4084065244667506e-06,
+      "loss": 1.8703,
+      "step": 27390
+    },
+    {
+      "epoch": 8.59,
+      "learning_rate": 1.4052697616060227e-06,
+      "loss": 1.8759,
+      "step": 27400
+    },
+    {
+      "epoch": 8.6,
+      "learning_rate": 1.402132998745295e-06,
+      "loss": 1.8059,
+      "step": 27410
+    },
+    {
+      "epoch": 8.6,
+      "learning_rate": 1.3989962358845672e-06,
+      "loss": 1.8641,
+      "step": 27420
+    },
+    {
+      "epoch": 8.6,
+      "learning_rate": 1.3958594730238394e-06,
+      "loss": 1.7733,
+      "step": 27430
+    },
+    {
+      "epoch": 8.61,
+      "learning_rate": 1.3927227101631119e-06,
+      "loss": 1.8804,
+      "step": 27440
+    },
+    {
+      "epoch": 8.61,
+      "learning_rate": 1.389585947302384e-06,
+      "loss": 1.8322,
+      "step": 27450
+    },
+    {
+      "epoch": 8.61,
+      "learning_rate": 1.3864491844416564e-06,
+      "loss": 1.8676,
+      "step": 27460
+    },
+    {
+      "epoch": 8.62,
+      "learning_rate": 1.3833124215809285e-06,
+      "loss": 1.7624,
+      "step": 27470
+    },
+    {
+      "epoch": 8.62,
+      "learning_rate": 1.380175658720201e-06,
+      "loss": 1.8463,
+      "step": 27480
+    },
+    {
+      "epoch": 8.62,
+      "learning_rate": 1.3770388958594732e-06,
+      "loss": 1.7873,
+      "step": 27490
+    },
+    {
+      "epoch": 8.63,
+      "learning_rate": 1.3739021329987454e-06,
+      "loss": 1.8458,
+      "step": 27500
+    },
+    {
+      "epoch": 8.63,
+      "learning_rate": 1.3707653701380177e-06,
+      "loss": 1.9638,
+      "step": 27510
+    },
+    {
+      "epoch": 8.63,
+      "learning_rate": 1.36762860727729e-06,
+      "loss": 1.8246,
+      "step": 27520
+    },
+    {
+      "epoch": 8.64,
+      "learning_rate": 1.3644918444165622e-06,
+      "loss": 1.7838,
+      "step": 27530
+    },
+    {
+      "epoch": 8.64,
+      "learning_rate": 1.3613550815558347e-06,
+      "loss": 1.859,
+      "step": 27540
+    },
+    {
+      "epoch": 8.64,
+      "learning_rate": 1.3582183186951067e-06,
+      "loss": 1.8282,
+      "step": 27550
+    },
+    {
+      "epoch": 8.64,
+      "learning_rate": 1.3550815558343792e-06,
+      "loss": 1.7744,
+      "step": 27560
+    },
+    {
+      "epoch": 8.65,
+      "learning_rate": 1.3519447929736512e-06,
+      "loss": 1.8345,
+      "step": 27570
+    },
+    {
+      "epoch": 8.65,
+      "learning_rate": 1.3488080301129237e-06,
+      "loss": 1.8246,
+      "step": 27580
+    },
+    {
+      "epoch": 8.65,
+      "learning_rate": 1.3456712672521957e-06,
+      "loss": 1.8078,
+      "step": 27590
+    },
+    {
+      "epoch": 8.66,
+      "learning_rate": 1.342534504391468e-06,
+      "loss": 1.8357,
+      "step": 27600
+    },
+    {
+      "epoch": 8.66,
+      "learning_rate": 1.3393977415307405e-06,
+      "loss": 1.8526,
+      "step": 27610
+    },
+    {
+      "epoch": 8.66,
+      "learning_rate": 1.3362609786700125e-06,
+      "loss": 1.8486,
+      "step": 27620
+    },
+    {
+      "epoch": 8.67,
+      "learning_rate": 1.333124215809285e-06,
+      "loss": 1.8583,
+      "step": 27630
+    },
+    {
+      "epoch": 8.67,
+      "learning_rate": 1.329987452948557e-06,
+      "loss": 1.8361,
+      "step": 27640
+    },
+    {
+      "epoch": 8.67,
+      "learning_rate": 1.3268506900878295e-06,
+      "loss": 1.8049,
+      "step": 27650
+    },
+    {
+      "epoch": 8.68,
+      "learning_rate": 1.3237139272271018e-06,
+      "loss": 1.8453,
+      "step": 27660
+    },
+    {
+      "epoch": 8.68,
+      "learning_rate": 1.320577164366374e-06,
+      "loss": 1.7916,
+      "step": 27670
+    },
+    {
+      "epoch": 8.68,
+      "learning_rate": 1.3174404015056463e-06,
+      "loss": 1.8771,
+      "step": 27680
+    },
+    {
+      "epoch": 8.69,
+      "learning_rate": 1.3143036386449185e-06,
+      "loss": 1.8396,
+      "step": 27690
+    },
+    {
+      "epoch": 8.69,
+      "learning_rate": 1.3111668757841908e-06,
+      "loss": 1.8477,
+      "step": 27700
+    },
+    {
+      "epoch": 8.69,
+      "learning_rate": 1.3080301129234632e-06,
+      "loss": 1.896,
+      "step": 27710
+    },
+    {
+      "epoch": 8.7,
+      "learning_rate": 1.3048933500627353e-06,
+      "loss": 1.8484,
+      "step": 27720
+    },
+    {
+      "epoch": 8.7,
+      "learning_rate": 1.3017565872020078e-06,
+      "loss": 1.8034,
+      "step": 27730
+    },
+    {
+      "epoch": 8.7,
+      "learning_rate": 1.2986198243412798e-06,
+      "loss": 1.8502,
+      "step": 27740
+    },
+    {
+      "epoch": 8.7,
+      "learning_rate": 1.2954830614805523e-06,
+      "loss": 1.8693,
+      "step": 27750
+    },
+    {
+      "epoch": 8.71,
+      "learning_rate": 1.2923462986198245e-06,
+      "loss": 1.8367,
+      "step": 27760
+    },
+    {
+      "epoch": 8.71,
+      "learning_rate": 1.2892095357590968e-06,
+      "loss": 1.849,
+      "step": 27770
+    },
+    {
+      "epoch": 8.71,
+      "learning_rate": 1.286072772898369e-06,
+      "loss": 1.8938,
+      "step": 27780
+    },
+    {
+      "epoch": 8.72,
+      "learning_rate": 1.282936010037641e-06,
+      "loss": 1.8266,
+      "step": 27790
+    },
+    {
+      "epoch": 8.72,
+      "learning_rate": 1.2797992471769136e-06,
+      "loss": 1.7925,
+      "step": 27800
+    },
+    {
+      "epoch": 8.72,
+      "learning_rate": 1.2766624843161856e-06,
+      "loss": 1.7811,
+      "step": 27810
+    },
+    {
+      "epoch": 8.73,
+      "learning_rate": 1.273525721455458e-06,
+      "loss": 1.9285,
+      "step": 27820
+    },
+    {
+      "epoch": 8.73,
+      "learning_rate": 1.2703889585947305e-06,
+      "loss": 1.8258,
+      "step": 27830
+    },
+    {
+      "epoch": 8.73,
+      "learning_rate": 1.2672521957340026e-06,
+      "loss": 1.8142,
+      "step": 27840
+    },
+    {
+      "epoch": 8.74,
+      "learning_rate": 1.2641154328732748e-06,
+      "loss": 1.7592,
+      "step": 27850
+    },
+    {
+      "epoch": 8.74,
+      "learning_rate": 1.260978670012547e-06,
+      "loss": 1.7749,
+      "step": 27860
+    },
+    {
+      "epoch": 8.74,
+      "learning_rate": 1.2578419071518194e-06,
+      "loss": 1.893,
+      "step": 27870
+    },
+    {
+      "epoch": 8.75,
+      "learning_rate": 1.2547051442910918e-06,
+      "loss": 1.7893,
+      "step": 27880
+    },
+    {
+      "epoch": 8.75,
+      "learning_rate": 1.2515683814303639e-06,
+      "loss": 1.8241,
+      "step": 27890
+    },
+    {
+      "epoch": 8.75,
+      "learning_rate": 1.2484316185696363e-06,
+      "loss": 1.8894,
+      "step": 27900
+    },
+    {
+      "epoch": 8.75,
+      "learning_rate": 1.2452948557089086e-06,
+      "loss": 1.8563,
+      "step": 27910
+    },
+    {
+      "epoch": 8.76,
+      "learning_rate": 1.2421580928481808e-06,
+      "loss": 1.8682,
+      "step": 27920
+    },
+    {
+      "epoch": 8.76,
+      "learning_rate": 1.2390213299874531e-06,
+      "loss": 1.8305,
+      "step": 27930
+    },
+    {
+      "epoch": 8.76,
+      "learning_rate": 1.2358845671267254e-06,
+      "loss": 1.7523,
+      "step": 27940
+    },
+    {
+      "epoch": 8.77,
+      "learning_rate": 1.2327478042659976e-06,
+      "loss": 1.7347,
+      "step": 27950
+    },
+    {
+      "epoch": 8.77,
+      "learning_rate": 1.2296110414052699e-06,
+      "loss": 1.8742,
+      "step": 27960
+    },
+    {
+      "epoch": 8.77,
+      "learning_rate": 1.2264742785445421e-06,
+      "loss": 1.8519,
+      "step": 27970
+    },
+    {
+      "epoch": 8.78,
+      "learning_rate": 1.2233375156838144e-06,
+      "loss": 1.8233,
+      "step": 27980
+    },
+    {
+      "epoch": 8.78,
+      "learning_rate": 1.2202007528230866e-06,
+      "loss": 1.8308,
+      "step": 27990
+    },
+    {
+      "epoch": 8.78,
+      "learning_rate": 1.217063989962359e-06,
+      "loss": 1.8121,
+      "step": 28000
+    },
+    {
+      "epoch": 8.79,
+      "learning_rate": 1.2139272271016314e-06,
+      "loss": 1.8179,
+      "step": 28010
+    },
+    {
+      "epoch": 8.79,
+      "learning_rate": 1.2107904642409034e-06,
+      "loss": 1.8859,
+      "step": 28020
+    },
+    {
+      "epoch": 8.79,
+      "learning_rate": 1.2076537013801757e-06,
+      "loss": 1.776,
+      "step": 28030
+    },
+    {
+      "epoch": 8.8,
+      "learning_rate": 1.204516938519448e-06,
+      "loss": 1.7907,
+      "step": 28040
+    },
+    {
+      "epoch": 8.8,
+      "learning_rate": 1.2013801756587202e-06,
+      "loss": 1.8682,
+      "step": 28050
+    },
+    {
+      "epoch": 8.8,
+      "learning_rate": 1.1982434127979927e-06,
+      "loss": 1.8239,
+      "step": 28060
+    },
+    {
+      "epoch": 8.8,
+      "learning_rate": 1.195106649937265e-06,
+      "loss": 1.8007,
+      "step": 28070
+    },
+    {
+      "epoch": 8.81,
+      "learning_rate": 1.1919698870765372e-06,
+      "loss": 1.7608,
+      "step": 28080
+    },
+    {
+      "epoch": 8.81,
+      "learning_rate": 1.1888331242158094e-06,
+      "loss": 1.8069,
+      "step": 28090
+    },
+    {
+      "epoch": 8.81,
+      "learning_rate": 1.1856963613550817e-06,
+      "loss": 1.8402,
+      "step": 28100
+    },
+    {
+      "epoch": 8.82,
+      "learning_rate": 1.182559598494354e-06,
+      "loss": 1.8986,
+      "step": 28110
+    },
+    {
+      "epoch": 8.82,
+      "learning_rate": 1.1794228356336262e-06,
+      "loss": 1.8938,
+      "step": 28120
+    },
+    {
+      "epoch": 8.82,
+      "learning_rate": 1.1762860727728985e-06,
+      "loss": 1.8132,
+      "step": 28130
+    },
+    {
+      "epoch": 8.83,
+      "learning_rate": 1.1731493099121707e-06,
+      "loss": 1.8571,
+      "step": 28140
+    },
+    {
+      "epoch": 8.83,
+      "learning_rate": 1.170012547051443e-06,
+      "loss": 1.8696,
+      "step": 28150
+    },
+    {
+      "epoch": 8.83,
+      "learning_rate": 1.1668757841907152e-06,
+      "loss": 1.8399,
+      "step": 28160
+    },
+    {
+      "epoch": 8.84,
+      "learning_rate": 1.1637390213299877e-06,
+      "loss": 1.806,
+      "step": 28170
+    },
+    {
+      "epoch": 8.84,
+      "learning_rate": 1.16060225846926e-06,
+      "loss": 1.884,
+      "step": 28180
+    },
+    {
+      "epoch": 8.84,
+      "learning_rate": 1.1574654956085322e-06,
+      "loss": 1.7557,
+      "step": 28190
+    },
+    {
+      "epoch": 8.85,
+      "learning_rate": 1.1543287327478042e-06,
+      "loss": 1.8812,
+      "step": 28200
+    },
+    {
+      "epoch": 8.85,
+      "learning_rate": 1.1511919698870765e-06,
+      "loss": 1.7912,
+      "step": 28210
+    },
+    {
+      "epoch": 8.85,
+      "learning_rate": 1.1480552070263488e-06,
+      "loss": 1.7655,
+      "step": 28220
+    },
+    {
+      "epoch": 8.86,
+      "learning_rate": 1.1449184441656212e-06,
+      "loss": 1.7835,
+      "step": 28230
+    },
+    {
+      "epoch": 8.86,
+      "learning_rate": 1.1417816813048935e-06,
+      "loss": 1.8024,
+      "step": 28240
+    },
+    {
+      "epoch": 8.86,
+      "learning_rate": 1.1386449184441657e-06,
+      "loss": 1.8642,
+      "step": 28250
+    },
+    {
+      "epoch": 8.86,
+      "learning_rate": 1.135508155583438e-06,
+      "loss": 1.8401,
+      "step": 28260
+    },
+    {
+      "epoch": 8.87,
+      "learning_rate": 1.1323713927227103e-06,
+      "loss": 1.7648,
+      "step": 28270
+    },
+    {
+      "epoch": 8.87,
+      "learning_rate": 1.1292346298619825e-06,
+      "loss": 1.8564,
+      "step": 28280
+    },
+    {
+      "epoch": 8.87,
+      "learning_rate": 1.1260978670012548e-06,
+      "loss": 1.864,
+      "step": 28290
+    },
+    {
+      "epoch": 8.88,
+      "learning_rate": 1.122961104140527e-06,
+      "loss": 1.888,
+      "step": 28300
+    },
+    {
+      "epoch": 8.88,
+      "learning_rate": 1.1198243412797993e-06,
+      "loss": 1.871,
+      "step": 28310
+    },
+    {
+      "epoch": 8.88,
+      "learning_rate": 1.1166875784190715e-06,
+      "loss": 1.8735,
+      "step": 28320
+    },
+    {
+      "epoch": 8.89,
+      "learning_rate": 1.1135508155583438e-06,
+      "loss": 1.8082,
+      "step": 28330
+    },
+    {
+      "epoch": 8.89,
+      "learning_rate": 1.1104140526976163e-06,
+      "loss": 1.8749,
+      "step": 28340
+    },
+    {
+      "epoch": 8.89,
+      "learning_rate": 1.1072772898368885e-06,
+      "loss": 1.7936,
+      "step": 28350
+    },
+    {
+      "epoch": 8.9,
+      "learning_rate": 1.1041405269761608e-06,
+      "loss": 1.8015,
+      "step": 28360
+    },
+    {
+      "epoch": 8.9,
+      "learning_rate": 1.101003764115433e-06,
+      "loss": 1.8876,
+      "step": 28370
+    },
+    {
+      "epoch": 8.9,
+      "learning_rate": 1.0978670012547053e-06,
+      "loss": 1.8649,
+      "step": 28380
+    },
+    {
+      "epoch": 8.91,
+      "learning_rate": 1.0947302383939775e-06,
+      "loss": 1.7942,
+      "step": 28390
+    },
+    {
+      "epoch": 8.91,
+      "learning_rate": 1.0915934755332498e-06,
+      "loss": 1.8252,
+      "step": 28400
+    },
+    {
+      "epoch": 8.91,
+      "learning_rate": 1.088456712672522e-06,
+      "loss": 1.8082,
+      "step": 28410
+    },
+    {
+      "epoch": 8.91,
+      "learning_rate": 1.0853199498117943e-06,
+      "loss": 1.8855,
+      "step": 28420
+    },
+    {
+      "epoch": 8.92,
+      "learning_rate": 1.0821831869510666e-06,
+      "loss": 1.8745,
+      "step": 28430
+    },
+    {
+      "epoch": 8.92,
+      "learning_rate": 1.0790464240903388e-06,
+      "loss": 1.856,
+      "step": 28440
+    },
+    {
+      "epoch": 8.92,
+      "learning_rate": 1.075909661229611e-06,
+      "loss": 1.7749,
+      "step": 28450
+    },
+    {
+      "epoch": 8.93,
+      "learning_rate": 1.0727728983688833e-06,
+      "loss": 1.7862,
+      "step": 28460
+    },
+    {
+      "epoch": 8.93,
+      "learning_rate": 1.0696361355081556e-06,
+      "loss": 1.811,
+      "step": 28470
+    },
+    {
+      "epoch": 8.93,
+      "learning_rate": 1.0664993726474279e-06,
+      "loss": 1.8381,
+      "step": 28480
+    },
+    {
+      "epoch": 8.94,
+      "learning_rate": 1.0633626097867001e-06,
+      "loss": 1.8144,
+      "step": 28490
+    },
+    {
+      "epoch": 8.94,
+      "learning_rate": 1.0602258469259726e-06,
+      "loss": 1.9025,
+      "step": 28500
+    },
+    {
+      "epoch": 8.94,
+      "learning_rate": 1.0570890840652448e-06,
+      "loss": 1.7983,
+      "step": 28510
+    },
+    {
+      "epoch": 8.95,
+      "learning_rate": 1.053952321204517e-06,
+      "loss": 1.8449,
+      "step": 28520
+    },
+    {
+      "epoch": 8.95,
+      "learning_rate": 1.0508155583437894e-06,
+      "loss": 1.8314,
+      "step": 28530
+    },
+    {
+      "epoch": 8.95,
+      "learning_rate": 1.0476787954830616e-06,
+      "loss": 1.8405,
+      "step": 28540
+    },
+    {
+      "epoch": 8.96,
+      "learning_rate": 1.0445420326223339e-06,
+      "loss": 1.8417,
+      "step": 28550
+    },
+    {
+      "epoch": 8.96,
+      "learning_rate": 1.0414052697616061e-06,
+      "loss": 1.7298,
+      "step": 28560
+    },
+    {
+      "epoch": 8.96,
+      "learning_rate": 1.0382685069008784e-06,
+      "loss": 1.919,
+      "step": 28570
+    },
+    {
+      "epoch": 8.96,
+      "learning_rate": 1.0351317440401506e-06,
+      "loss": 1.8295,
+      "step": 28580
+    },
+    {
+      "epoch": 8.97,
+      "learning_rate": 1.031994981179423e-06,
+      "loss": 1.8841,
+      "step": 28590
+    },
+    {
+      "epoch": 8.97,
+      "learning_rate": 1.0288582183186952e-06,
+      "loss": 1.8232,
+      "step": 28600
+    },
+    {
+      "epoch": 8.97,
+      "learning_rate": 1.0257214554579676e-06,
+      "loss": 1.7992,
+      "step": 28610
+    },
+    {
+      "epoch": 8.98,
+      "learning_rate": 1.0225846925972399e-06,
+      "loss": 1.8448,
+      "step": 28620
+    },
+    {
+      "epoch": 8.98,
+      "learning_rate": 1.019447929736512e-06,
+      "loss": 1.8227,
+      "step": 28630
+    },
+    {
+      "epoch": 8.98,
+      "learning_rate": 1.0163111668757842e-06,
+      "loss": 1.8837,
+      "step": 28640
+    },
+    {
+      "epoch": 8.99,
+      "learning_rate": 1.0131744040150564e-06,
+      "loss": 1.814,
+      "step": 28650
+    },
+    {
+      "epoch": 8.99,
+      "learning_rate": 1.0100376411543287e-06,
+      "loss": 1.8313,
+      "step": 28660
+    },
+    {
+      "epoch": 8.99,
+      "learning_rate": 1.0069008782936012e-06,
+      "loss": 1.7769,
+      "step": 28670
+    },
+    {
+      "epoch": 9.0,
+      "learning_rate": 1.0037641154328734e-06,
+      "loss": 1.8154,
+      "step": 28680
+    },
+    {
+      "epoch": 9.0,
+      "learning_rate": 1.0006273525721457e-06,
+      "loss": 1.786,
+      "step": 28690
+    },
+    {
+      "epoch": 9.0,
+      "eval_loss": 1.8068885803222656,
+      "eval_runtime": 13.6002,
+      "eval_samples_per_second": 73.528,
+      "eval_steps_per_second": 4.632,
+      "step": 28692
+    },
+    {
+      "epoch": 9.0,
+      "learning_rate": 9.97490589711418e-07,
+      "loss": 1.8434,
+      "step": 28700
+    },
+    {
+      "epoch": 9.01,
+      "learning_rate": 9.943538268506902e-07,
+      "loss": 1.7965,
+      "step": 28710
+    },
+    {
+      "epoch": 9.01,
+      "learning_rate": 9.912170639899624e-07,
+      "loss": 1.8304,
+      "step": 28720
+    },
+    {
+      "epoch": 9.01,
+      "learning_rate": 9.880803011292347e-07,
+      "loss": 1.8272,
+      "step": 28730
+    },
+    {
+      "epoch": 9.02,
+      "learning_rate": 9.84943538268507e-07,
+      "loss": 1.8509,
+      "step": 28740
+    },
+    {
+      "epoch": 9.02,
+      "learning_rate": 9.818067754077792e-07,
+      "loss": 1.8452,
+      "step": 28750
+    },
+    {
+      "epoch": 9.02,
+      "learning_rate": 9.786700125470515e-07,
+      "loss": 1.7812,
+      "step": 28760
+    },
+    {
+      "epoch": 9.02,
+      "learning_rate": 9.755332496863237e-07,
+      "loss": 1.8231,
+      "step": 28770
+    },
+    {
+      "epoch": 9.03,
+      "learning_rate": 9.723964868255962e-07,
+      "loss": 1.7942,
+      "step": 28780
+    },
+    {
+      "epoch": 9.03,
+      "learning_rate": 9.692597239648685e-07,
+      "loss": 1.8309,
+      "step": 28790
+    },
+    {
+      "epoch": 9.03,
+      "learning_rate": 9.661229611041407e-07,
+      "loss": 1.8246,
+      "step": 28800
+    },
+    {
+      "epoch": 9.04,
+      "learning_rate": 9.629861982434128e-07,
+      "loss": 1.8079,
+      "step": 28810
+    },
+    {
+      "epoch": 9.04,
+      "learning_rate": 9.59849435382685e-07,
+      "loss": 1.8027,
+      "step": 28820
+    },
+    {
+      "epoch": 9.04,
+      "learning_rate": 9.567126725219575e-07,
+      "loss": 1.834,
+      "step": 28830
+    },
+    {
+      "epoch": 9.05,
+      "learning_rate": 9.535759096612297e-07,
+      "loss": 1.8374,
+      "step": 28840
+    },
+    {
+      "epoch": 9.05,
+      "learning_rate": 9.50439146800502e-07,
+      "loss": 1.7601,
+      "step": 28850
+    },
+    {
+      "epoch": 9.05,
+      "learning_rate": 9.473023839397742e-07,
+      "loss": 1.803,
+      "step": 28860
+    },
+    {
+      "epoch": 9.06,
+      "learning_rate": 9.441656210790465e-07,
+      "loss": 1.7717,
+      "step": 28870
+    },
+    {
+      "epoch": 9.06,
+      "learning_rate": 9.410288582183188e-07,
+      "loss": 1.8937,
+      "step": 28880
+    },
+    {
+      "epoch": 9.06,
+      "learning_rate": 9.378920953575911e-07,
+      "loss": 1.8349,
+      "step": 28890
+    },
+    {
+      "epoch": 9.07,
+      "learning_rate": 9.347553324968634e-07,
+      "loss": 1.7933,
+      "step": 28900
+    },
+    {
+      "epoch": 9.07,
+      "learning_rate": 9.316185696361356e-07,
+      "loss": 1.8891,
+      "step": 28910
+    },
+    {
+      "epoch": 9.07,
+      "learning_rate": 9.284818067754078e-07,
+      "loss": 1.8436,
+      "step": 28920
+    },
+    {
+      "epoch": 9.07,
+      "learning_rate": 9.2534504391468e-07,
+      "loss": 1.8642,
+      "step": 28930
+    },
+    {
+      "epoch": 9.08,
+      "learning_rate": 9.222082810539525e-07,
+      "loss": 1.8881,
+      "step": 28940
+    },
+    {
+      "epoch": 9.08,
+      "learning_rate": 9.190715181932247e-07,
+      "loss": 1.8353,
+      "step": 28950
+    },
+    {
+      "epoch": 9.08,
+      "learning_rate": 9.159347553324969e-07,
+      "loss": 1.9411,
+      "step": 28960
+    },
+    {
+      "epoch": 9.09,
+      "learning_rate": 9.127979924717692e-07,
+      "loss": 1.7851,
+      "step": 28970
+    },
+    {
+      "epoch": 9.09,
+      "learning_rate": 9.096612296110414e-07,
+      "loss": 1.8456,
+      "step": 28980
+    },
+    {
+      "epoch": 9.09,
+      "learning_rate": 9.065244667503137e-07,
+      "loss": 1.8592,
+      "step": 28990
+    },
+    {
+      "epoch": 9.1,
+      "learning_rate": 9.033877038895861e-07,
+      "loss": 1.7884,
+      "step": 29000
+    },
+    {
+      "epoch": 9.1,
+      "learning_rate": 9.002509410288583e-07,
+      "loss": 1.9313,
+      "step": 29010
+    },
+    {
+      "epoch": 9.1,
+      "learning_rate": 8.971141781681306e-07,
+      "loss": 1.8029,
+      "step": 29020
+    },
+    {
+      "epoch": 9.11,
+      "learning_rate": 8.939774153074028e-07,
+      "loss": 1.7627,
+      "step": 29030
+    },
+    {
+      "epoch": 9.11,
+      "learning_rate": 8.908406524466751e-07,
+      "loss": 1.8337,
+      "step": 29040
+    },
+    {
+      "epoch": 9.11,
+      "learning_rate": 8.877038895859474e-07,
+      "loss": 1.8205,
+      "step": 29050
+    },
+    {
+      "epoch": 9.12,
+      "learning_rate": 8.845671267252197e-07,
+      "loss": 1.8569,
+      "step": 29060
+    },
+    {
+      "epoch": 9.12,
+      "learning_rate": 8.81430363864492e-07,
+      "loss": 1.8458,
+      "step": 29070
+    },
+    {
+      "epoch": 9.12,
+      "learning_rate": 8.782936010037642e-07,
+      "loss": 1.7912,
+      "step": 29080
+    },
+    {
+      "epoch": 9.12,
+      "learning_rate": 8.751568381430365e-07,
+      "loss": 1.8414,
+      "step": 29090
+    },
+    {
+      "epoch": 9.13,
+      "learning_rate": 8.720200752823086e-07,
+      "loss": 1.8249,
+      "step": 29100
+    },
+    {
+      "epoch": 9.13,
+      "learning_rate": 8.688833124215811e-07,
+      "loss": 1.8705,
+      "step": 29110
+    },
+    {
+      "epoch": 9.13,
+      "learning_rate": 8.657465495608533e-07,
+      "loss": 1.805,
+      "step": 29120
+    },
+    {
+      "epoch": 9.14,
+      "learning_rate": 8.626097867001255e-07,
+      "loss": 1.8525,
+      "step": 29130
+    },
+    {
+      "epoch": 9.14,
+      "learning_rate": 8.594730238393978e-07,
+      "loss": 1.9076,
+      "step": 29140
+    },
+    {
+      "epoch": 9.14,
+      "learning_rate": 8.5633626097867e-07,
+      "loss": 1.8028,
+      "step": 29150
+    },
+    {
+      "epoch": 9.15,
+      "learning_rate": 8.531994981179424e-07,
+      "loss": 1.795,
+      "step": 29160
+    },
+    {
+      "epoch": 9.15,
+      "learning_rate": 8.500627352572146e-07,
+      "loss": 1.8149,
+      "step": 29170
+    },
+    {
+      "epoch": 9.15,
+      "learning_rate": 8.469259723964869e-07,
+      "loss": 1.8113,
+      "step": 29180
+    },
+    {
+      "epoch": 9.16,
+      "learning_rate": 8.437892095357591e-07,
+      "loss": 1.749,
+      "step": 29190
+    },
+    {
+      "epoch": 9.16,
+      "learning_rate": 8.406524466750314e-07,
+      "loss": 1.8426,
+      "step": 29200
+    },
+    {
+      "epoch": 9.16,
+      "learning_rate": 8.375156838143037e-07,
+      "loss": 1.8539,
+      "step": 29210
+    },
+    {
+      "epoch": 9.17,
+      "learning_rate": 8.34378920953576e-07,
+      "loss": 1.795,
+      "step": 29220
+    },
+    {
+      "epoch": 9.17,
+      "learning_rate": 8.312421580928483e-07,
+      "loss": 1.8827,
+      "step": 29230
+    },
+    {
+      "epoch": 9.17,
+      "learning_rate": 8.281053952321205e-07,
+      "loss": 1.7929,
+      "step": 29240
+    },
+    {
+      "epoch": 9.18,
+      "learning_rate": 8.249686323713928e-07,
+      "loss": 1.822,
+      "step": 29250
+    },
+    {
+      "epoch": 9.18,
+      "learning_rate": 8.21831869510665e-07,
+      "loss": 1.8702,
+      "step": 29260
+    },
+    {
+      "epoch": 9.18,
+      "learning_rate": 8.186951066499374e-07,
+      "loss": 1.8072,
+      "step": 29270
+    },
+    {
+      "epoch": 9.18,
+      "learning_rate": 8.155583437892097e-07,
+      "loss": 1.9117,
+      "step": 29280
+    },
+    {
+      "epoch": 9.19,
+      "learning_rate": 8.124215809284819e-07,
+      "loss": 1.8963,
+      "step": 29290
+    },
+    {
+      "epoch": 9.19,
+      "learning_rate": 8.092848180677542e-07,
+      "loss": 1.908,
+      "step": 29300
+    },
+    {
+      "epoch": 9.19,
+      "learning_rate": 8.061480552070264e-07,
+      "loss": 1.7967,
+      "step": 29310
+    },
+    {
+      "epoch": 9.2,
+      "learning_rate": 8.030112923462986e-07,
+      "loss": 1.8093,
+      "step": 29320
+    },
+    {
+      "epoch": 9.2,
+      "learning_rate": 7.998745294855711e-07,
+      "loss": 1.8192,
+      "step": 29330
+    },
+    {
+      "epoch": 9.2,
+      "learning_rate": 7.967377666248432e-07,
+      "loss": 1.8415,
+      "step": 29340
+    },
+    {
+      "epoch": 9.21,
+      "learning_rate": 7.936010037641155e-07,
+      "loss": 1.7622,
+      "step": 29350
+    },
+    {
+      "epoch": 9.21,
+      "learning_rate": 7.904642409033877e-07,
+      "loss": 1.791,
+      "step": 29360
+    },
+    {
+      "epoch": 9.21,
+      "learning_rate": 7.8732747804266e-07,
+      "loss": 1.7661,
+      "step": 29370
+    },
+    {
+      "epoch": 9.22,
+      "learning_rate": 7.841907151819323e-07,
+      "loss": 1.7692,
+      "step": 29380
+    },
+    {
+      "epoch": 9.22,
+      "learning_rate": 7.810539523212046e-07,
+      "loss": 1.8309,
+      "step": 29390
+    },
+    {
+      "epoch": 9.22,
+      "learning_rate": 7.779171894604768e-07,
+      "loss": 1.9005,
+      "step": 29400
+    },
+    {
+      "epoch": 9.23,
+      "learning_rate": 7.747804265997491e-07,
+      "loss": 1.7942,
+      "step": 29410
+    },
+    {
+      "epoch": 9.23,
+      "learning_rate": 7.716436637390214e-07,
+      "loss": 1.8144,
+      "step": 29420
+    },
+    {
+      "epoch": 9.23,
+      "learning_rate": 7.685069008782936e-07,
+      "loss": 1.7952,
+      "step": 29430
+    },
+    {
+      "epoch": 9.23,
+      "learning_rate": 7.65370138017566e-07,
+      "loss": 1.8524,
+      "step": 29440
+    },
+    {
+      "epoch": 9.24,
+      "learning_rate": 7.622333751568382e-07,
+      "loss": 1.8202,
+      "step": 29450
+    },
+    {
+      "epoch": 9.24,
+      "learning_rate": 7.590966122961105e-07,
+      "loss": 1.8161,
+      "step": 29460
+    },
+    {
+      "epoch": 9.24,
+      "learning_rate": 7.559598494353828e-07,
+      "loss": 1.8541,
+      "step": 29470
+    },
+    {
+      "epoch": 9.25,
+      "learning_rate": 7.52823086574655e-07,
+      "loss": 1.849,
+      "step": 29480
+    },
+    {
+      "epoch": 9.25,
+      "learning_rate": 7.496863237139274e-07,
+      "loss": 1.8604,
+      "step": 29490
+    },
+    {
+      "epoch": 9.25,
+      "learning_rate": 7.465495608531996e-07,
+      "loss": 1.8133,
+      "step": 29500
+    },
+    {
+      "epoch": 9.26,
+      "learning_rate": 7.434127979924719e-07,
+      "loss": 1.86,
+      "step": 29510
+    },
+    {
+      "epoch": 9.26,
+      "learning_rate": 7.402760351317441e-07,
+      "loss": 1.7641,
+      "step": 29520
+    },
+    {
+      "epoch": 9.26,
+      "learning_rate": 7.371392722710163e-07,
+      "loss": 1.7825,
+      "step": 29530
+    },
+    {
+      "epoch": 9.27,
+      "learning_rate": 7.340025094102885e-07,
+      "loss": 1.7355,
+      "step": 29540
+    },
+    {
+      "epoch": 9.27,
+      "learning_rate": 7.30865746549561e-07,
+      "loss": 1.7702,
+      "step": 29550
+    },
+    {
+      "epoch": 9.27,
+      "learning_rate": 7.277289836888332e-07,
+      "loss": 1.8893,
+      "step": 29560
+    },
+    {
+      "epoch": 9.28,
+      "learning_rate": 7.245922208281054e-07,
+      "loss": 1.8506,
+      "step": 29570
+    },
+    {
+      "epoch": 9.28,
+      "learning_rate": 7.214554579673777e-07,
+      "loss": 1.8254,
+      "step": 29580
+    },
+    {
+      "epoch": 9.28,
+      "learning_rate": 7.183186951066499e-07,
+      "loss": 1.8266,
+      "step": 29590
+    },
+    {
+      "epoch": 9.28,
+      "learning_rate": 7.151819322459223e-07,
+      "loss": 1.8025,
+      "step": 29600
+    },
+    {
+      "epoch": 9.29,
+      "learning_rate": 7.120451693851946e-07,
+      "loss": 1.8061,
+      "step": 29610
+    },
+    {
+      "epoch": 9.29,
+      "learning_rate": 7.089084065244668e-07,
+      "loss": 1.9078,
+      "step": 29620
+    },
+    {
+      "epoch": 9.29,
+      "learning_rate": 7.057716436637391e-07,
+      "loss": 1.8518,
+      "step": 29630
+    },
+    {
+      "epoch": 9.3,
+      "learning_rate": 7.026348808030113e-07,
+      "loss": 1.8402,
+      "step": 29640
+    },
+    {
+      "epoch": 9.3,
+      "learning_rate": 6.994981179422836e-07,
+      "loss": 1.8586,
+      "step": 29650
+    },
+    {
+      "epoch": 9.3,
+      "learning_rate": 6.963613550815559e-07,
+      "loss": 1.8409,
+      "step": 29660
+    },
+    {
+      "epoch": 9.31,
+      "learning_rate": 6.932245922208282e-07,
+      "loss": 1.819,
+      "step": 29670
+    },
+    {
+      "epoch": 9.31,
+      "learning_rate": 6.900878293601005e-07,
+      "loss": 1.8325,
+      "step": 29680
+    },
+    {
+      "epoch": 9.31,
+      "learning_rate": 6.869510664993727e-07,
+      "loss": 1.8588,
+      "step": 29690
+    },
+    {
+      "epoch": 9.32,
+      "learning_rate": 6.83814303638645e-07,
+      "loss": 1.8201,
+      "step": 29700
+    },
+    {
+      "epoch": 9.32,
+      "learning_rate": 6.806775407779173e-07,
+      "loss": 1.8307,
+      "step": 29710
+    },
+    {
+      "epoch": 9.32,
+      "learning_rate": 6.775407779171896e-07,
+      "loss": 1.7905,
+      "step": 29720
+    },
+    {
+      "epoch": 9.33,
+      "learning_rate": 6.744040150564618e-07,
+      "loss": 1.8419,
+      "step": 29730
+    },
+    {
+      "epoch": 9.33,
+      "learning_rate": 6.71267252195734e-07,
+      "loss": 1.7966,
+      "step": 29740
+    },
+    {
+      "epoch": 9.33,
+      "learning_rate": 6.681304893350063e-07,
+      "loss": 1.7745,
+      "step": 29750
+    },
+    {
+      "epoch": 9.34,
+      "learning_rate": 6.649937264742785e-07,
+      "loss": 1.8687,
+      "step": 29760
+    },
+    {
+      "epoch": 9.34,
+      "learning_rate": 6.618569636135509e-07,
+      "loss": 1.8453,
+      "step": 29770
+    },
+    {
+      "epoch": 9.34,
+      "learning_rate": 6.587202007528231e-07,
+      "loss": 1.8108,
+      "step": 29780
+    },
+    {
+      "epoch": 9.34,
+      "learning_rate": 6.555834378920954e-07,
+      "loss": 1.8947,
+      "step": 29790
+    },
+    {
+      "epoch": 9.35,
+      "learning_rate": 6.524466750313676e-07,
+      "loss": 1.8328,
+      "step": 29800
+    },
+    {
+      "epoch": 9.35,
+      "learning_rate": 6.493099121706399e-07,
+      "loss": 1.7725,
+      "step": 29810
+    },
+    {
+      "epoch": 9.35,
+      "learning_rate": 6.461731493099123e-07,
+      "loss": 1.8631,
+      "step": 29820
+    },
+    {
+      "epoch": 9.36,
+      "learning_rate": 6.430363864491845e-07,
+      "loss": 1.7941,
+      "step": 29830
+    },
+    {
+      "epoch": 9.36,
+      "learning_rate": 6.398996235884568e-07,
+      "loss": 1.7644,
+      "step": 29840
+    },
+    {
+      "epoch": 9.36,
+      "learning_rate": 6.36762860727729e-07,
+      "loss": 1.8353,
+      "step": 29850
+    },
+    {
+      "epoch": 9.37,
+      "learning_rate": 6.336260978670013e-07,
+      "loss": 1.8568,
+      "step": 29860
+    },
+    {
+      "epoch": 9.37,
+      "learning_rate": 6.304893350062735e-07,
+      "loss": 1.8817,
+      "step": 29870
+    },
+    {
+      "epoch": 9.37,
+      "learning_rate": 6.273525721455459e-07,
+      "loss": 1.7775,
+      "step": 29880
+    },
+    {
+      "epoch": 9.38,
+      "learning_rate": 6.242158092848182e-07,
+      "loss": 1.9279,
+      "step": 29890
+    },
+    {
+      "epoch": 9.38,
+      "learning_rate": 6.210790464240904e-07,
+      "loss": 1.8116,
+      "step": 29900
+    },
+    {
+      "epoch": 9.38,
+      "learning_rate": 6.179422835633627e-07,
+      "loss": 1.8166,
+      "step": 29910
+    },
+    {
+      "epoch": 9.39,
+      "learning_rate": 6.148055207026349e-07,
+      "loss": 1.7839,
+      "step": 29920
+    },
+    {
+      "epoch": 9.39,
+      "learning_rate": 6.116687578419072e-07,
+      "loss": 1.8313,
+      "step": 29930
+    },
+    {
+      "epoch": 9.39,
+      "learning_rate": 6.085319949811795e-07,
+      "loss": 1.8809,
+      "step": 29940
+    },
+    {
+      "epoch": 9.39,
+      "learning_rate": 6.053952321204517e-07,
+      "loss": 1.8002,
+      "step": 29950
+    },
+    {
+      "epoch": 9.4,
+      "learning_rate": 6.02258469259724e-07,
+      "loss": 1.8235,
+      "step": 29960
+    },
+    {
+      "epoch": 9.4,
+      "learning_rate": 5.991217063989963e-07,
+      "loss": 1.7913,
+      "step": 29970
+    },
+    {
+      "epoch": 9.4,
+      "learning_rate": 5.959849435382686e-07,
+      "loss": 1.7681,
+      "step": 29980
+    },
+    {
+      "epoch": 9.41,
+      "learning_rate": 5.928481806775408e-07,
+      "loss": 1.8847,
+      "step": 29990
+    },
+    {
+      "epoch": 9.41,
+      "learning_rate": 5.897114178168131e-07,
+      "loss": 1.865,
+      "step": 30000
+    },
+    {
+      "epoch": 9.41,
+      "learning_rate": 5.865746549560854e-07,
+      "loss": 1.8295,
+      "step": 30010
+    },
+    {
+      "epoch": 9.42,
+      "learning_rate": 5.834378920953576e-07,
+      "loss": 1.8705,
+      "step": 30020
+    },
+    {
+      "epoch": 9.42,
+      "learning_rate": 5.8030112923463e-07,
+      "loss": 1.7746,
+      "step": 30030
+    },
+    {
+      "epoch": 9.42,
+      "learning_rate": 5.771643663739021e-07,
+      "loss": 1.8573,
+      "step": 30040
+    },
+    {
+      "epoch": 9.43,
+      "learning_rate": 5.740276035131744e-07,
+      "loss": 1.8142,
+      "step": 30050
+    },
+    {
+      "epoch": 9.43,
+      "learning_rate": 5.708908406524467e-07,
+      "loss": 1.8023,
+      "step": 30060
+    },
+    {
+      "epoch": 9.43,
+      "learning_rate": 5.67754077791719e-07,
+      "loss": 1.8147,
+      "step": 30070
+    },
+    {
+      "epoch": 9.44,
+      "learning_rate": 5.646173149309913e-07,
+      "loss": 1.799,
+      "step": 30080
+    },
+    {
+      "epoch": 9.44,
+      "learning_rate": 5.614805520702635e-07,
+      "loss": 1.8694,
+      "step": 30090
+    },
+    {
+      "epoch": 9.44,
+      "learning_rate": 5.583437892095358e-07,
+      "loss": 1.872,
+      "step": 30100
+    },
+    {
+      "epoch": 9.44,
+      "learning_rate": 5.552070263488081e-07,
+      "loss": 1.8582,
+      "step": 30110
+    },
+    {
+      "epoch": 9.45,
+      "learning_rate": 5.520702634880804e-07,
+      "loss": 1.7729,
+      "step": 30120
+    },
+    {
+      "epoch": 9.45,
+      "learning_rate": 5.489335006273526e-07,
+      "loss": 1.8739,
+      "step": 30130
+    },
+    {
+      "epoch": 9.45,
+      "learning_rate": 5.457967377666249e-07,
+      "loss": 1.85,
+      "step": 30140
+    },
+    {
+      "epoch": 9.46,
+      "learning_rate": 5.426599749058972e-07,
+      "loss": 1.8112,
+      "step": 30150
+    },
+    {
+      "epoch": 9.46,
+      "learning_rate": 5.395232120451694e-07,
+      "loss": 1.8687,
+      "step": 30160
+    },
+    {
+      "epoch": 9.46,
+      "learning_rate": 5.363864491844417e-07,
+      "loss": 1.8587,
+      "step": 30170
+    },
+    {
+      "epoch": 9.47,
+      "learning_rate": 5.332496863237139e-07,
+      "loss": 1.8573,
+      "step": 30180
+    },
+    {
+      "epoch": 9.47,
+      "learning_rate": 5.301129234629863e-07,
+      "loss": 1.7881,
+      "step": 30190
+    },
+    {
+      "epoch": 9.47,
+      "learning_rate": 5.269761606022585e-07,
+      "loss": 1.8975,
+      "step": 30200
+    },
+    {
+      "epoch": 9.48,
+      "learning_rate": 5.238393977415308e-07,
+      "loss": 1.8516,
+      "step": 30210
+    },
+    {
+      "epoch": 9.48,
+      "learning_rate": 5.207026348808031e-07,
+      "loss": 1.8825,
+      "step": 30220
+    },
+    {
+      "epoch": 9.48,
+      "learning_rate": 5.175658720200753e-07,
+      "loss": 1.7745,
+      "step": 30230
+    },
+    {
+      "epoch": 9.49,
+      "learning_rate": 5.144291091593476e-07,
+      "loss": 1.8163,
+      "step": 30240
+    },
+    {
+      "epoch": 9.49,
+      "learning_rate": 5.112923462986199e-07,
+      "loss": 1.8587,
+      "step": 30250
+    },
+    {
+      "epoch": 9.49,
+      "learning_rate": 5.081555834378921e-07,
+      "loss": 1.8959,
+      "step": 30260
+    },
+    {
+      "epoch": 9.49,
+      "learning_rate": 5.050188205771643e-07,
+      "loss": 1.8437,
+      "step": 30270
+    },
+    {
+      "epoch": 9.5,
+      "learning_rate": 5.018820577164367e-07,
+      "loss": 1.8326,
+      "step": 30280
+    },
+    {
+      "epoch": 9.5,
+      "learning_rate": 4.98745294855709e-07,
+      "loss": 1.8475,
+      "step": 30290
+    },
+    {
+      "epoch": 9.5,
+      "learning_rate": 4.956085319949812e-07,
+      "loss": 1.8396,
+      "step": 30300
+    },
+    {
+      "epoch": 9.51,
+      "learning_rate": 4.924717691342535e-07,
+      "loss": 1.8421,
+      "step": 30310
+    },
+    {
+      "epoch": 9.51,
+      "learning_rate": 4.893350062735257e-07,
+      "loss": 1.8836,
+      "step": 30320
+    },
+    {
+      "epoch": 9.51,
+      "learning_rate": 4.861982434127981e-07,
+      "loss": 1.8699,
+      "step": 30330
+    },
+    {
+      "epoch": 9.52,
+      "learning_rate": 4.830614805520704e-07,
+      "loss": 1.7754,
+      "step": 30340
+    },
+    {
+      "epoch": 9.52,
+      "learning_rate": 4.799247176913425e-07,
+      "loss": 1.7852,
+      "step": 30350
+    },
+    {
+      "epoch": 9.52,
+      "learning_rate": 4.7678795483061487e-07,
+      "loss": 1.829,
+      "step": 30360
+    },
+    {
+      "epoch": 9.53,
+      "learning_rate": 4.736511919698871e-07,
+      "loss": 1.8097,
+      "step": 30370
+    },
+    {
+      "epoch": 9.53,
+      "learning_rate": 4.705144291091594e-07,
+      "loss": 1.8907,
+      "step": 30380
+    },
+    {
+      "epoch": 9.53,
+      "learning_rate": 4.673776662484317e-07,
+      "loss": 1.8662,
+      "step": 30390
+    },
+    {
+      "epoch": 9.54,
+      "learning_rate": 4.642409033877039e-07,
+      "loss": 1.8575,
+      "step": 30400
+    },
+    {
+      "epoch": 9.54,
+      "learning_rate": 4.6110414052697626e-07,
+      "loss": 1.8372,
+      "step": 30410
+    },
+    {
+      "epoch": 9.54,
+      "learning_rate": 4.5796737766624846e-07,
+      "loss": 1.8453,
+      "step": 30420
+    },
+    {
+      "epoch": 9.55,
+      "learning_rate": 4.548306148055207e-07,
+      "loss": 1.875,
+      "step": 30430
+    },
+    {
+      "epoch": 9.55,
+      "learning_rate": 4.5169385194479303e-07,
+      "loss": 1.8458,
+      "step": 30440
+    },
+    {
+      "epoch": 9.55,
+      "learning_rate": 4.485570890840653e-07,
+      "loss": 1.7614,
+      "step": 30450
+    },
+    {
+      "epoch": 9.55,
+      "learning_rate": 4.4542032622333754e-07,
+      "loss": 1.8266,
+      "step": 30460
+    },
+    {
+      "epoch": 9.56,
+      "learning_rate": 4.4228356336260985e-07,
+      "loss": 1.8236,
+      "step": 30470
+    },
+    {
+      "epoch": 9.56,
+      "learning_rate": 4.391468005018821e-07,
+      "loss": 1.8841,
+      "step": 30480
+    },
+    {
+      "epoch": 9.56,
+      "learning_rate": 4.360100376411543e-07,
+      "loss": 1.8424,
+      "step": 30490
+    },
+    {
+      "epoch": 9.57,
+      "learning_rate": 4.3287327478042667e-07,
+      "loss": 1.8284,
+      "step": 30500
+    },
+    {
+      "epoch": 9.57,
+      "learning_rate": 4.297365119196989e-07,
+      "loss": 1.7975,
+      "step": 30510
+    },
+    {
+      "epoch": 9.57,
+      "learning_rate": 4.265997490589712e-07,
+      "loss": 1.86,
+      "step": 30520
+    },
+    {
+      "epoch": 9.58,
+      "learning_rate": 4.2346298619824344e-07,
+      "loss": 1.8916,
+      "step": 30530
+    },
+    {
+      "epoch": 9.58,
+      "learning_rate": 4.203262233375157e-07,
+      "loss": 1.8348,
+      "step": 30540
+    },
+    {
+      "epoch": 9.58,
+      "learning_rate": 4.17189460476788e-07,
+      "loss": 1.8673,
+      "step": 30550
+    },
+    {
+      "epoch": 9.59,
+      "learning_rate": 4.1405269761606027e-07,
+      "loss": 1.8117,
+      "step": 30560
+    },
+    {
+      "epoch": 9.59,
+      "learning_rate": 4.109159347553325e-07,
+      "loss": 1.8157,
+      "step": 30570
+    },
+    {
+      "epoch": 9.59,
+      "learning_rate": 4.0777917189460483e-07,
+      "loss": 1.8566,
+      "step": 30580
+    },
+    {
+      "epoch": 9.6,
+      "learning_rate": 4.046424090338771e-07,
+      "loss": 1.9022,
+      "step": 30590
+    },
+    {
+      "epoch": 9.6,
+      "learning_rate": 4.015056461731493e-07,
+      "loss": 1.8392,
+      "step": 30600
+    },
+    {
+      "epoch": 9.6,
+      "learning_rate": 3.983688833124216e-07,
+      "loss": 1.8338,
+      "step": 30610
+    },
+    {
+      "epoch": 9.6,
+      "learning_rate": 3.9523212045169386e-07,
+      "loss": 1.8496,
+      "step": 30620
+    },
+    {
+      "epoch": 9.61,
+      "learning_rate": 3.9209535759096617e-07,
+      "loss": 1.8472,
+      "step": 30630
+    },
+    {
+      "epoch": 9.61,
+      "learning_rate": 3.889585947302384e-07,
+      "loss": 1.8065,
+      "step": 30640
+    },
+    {
+      "epoch": 9.61,
+      "learning_rate": 3.858218318695107e-07,
+      "loss": 1.7734,
+      "step": 30650
+    },
+    {
+      "epoch": 9.62,
+      "learning_rate": 3.82685069008783e-07,
+      "loss": 1.8462,
+      "step": 30660
+    },
+    {
+      "epoch": 9.62,
+      "learning_rate": 3.7954830614805525e-07,
+      "loss": 1.8648,
+      "step": 30670
+    },
+    {
+      "epoch": 9.62,
+      "learning_rate": 3.764115432873275e-07,
+      "loss": 1.7626,
+      "step": 30680
+    },
+    {
+      "epoch": 9.63,
+      "learning_rate": 3.732747804265998e-07,
+      "loss": 1.8509,
+      "step": 30690
+    },
+    {
+      "epoch": 9.63,
+      "learning_rate": 3.7013801756587207e-07,
+      "loss": 1.8544,
+      "step": 30700
+    },
+    {
+      "epoch": 9.63,
+      "learning_rate": 3.670012547051443e-07,
+      "loss": 1.8313,
+      "step": 30710
+    },
+    {
+      "epoch": 9.64,
+      "learning_rate": 3.638644918444166e-07,
+      "loss": 1.8364,
+      "step": 30720
+    },
+    {
+      "epoch": 9.64,
+      "learning_rate": 3.6072772898368884e-07,
+      "loss": 1.833,
+      "step": 30730
+    },
+    {
+      "epoch": 9.64,
+      "learning_rate": 3.5759096612296115e-07,
+      "loss": 1.8429,
+      "step": 30740
+    },
+    {
+      "epoch": 9.65,
+      "learning_rate": 3.544542032622334e-07,
+      "loss": 1.807,
+      "step": 30750
+    },
+    {
+      "epoch": 9.65,
+      "learning_rate": 3.5131744040150566e-07,
+      "loss": 1.8482,
+      "step": 30760
+    },
+    {
+      "epoch": 9.65,
+      "learning_rate": 3.4818067754077797e-07,
+      "loss": 1.8544,
+      "step": 30770
+    },
+    {
+      "epoch": 9.65,
+      "learning_rate": 3.4504391468005023e-07,
+      "loss": 1.8451,
+      "step": 30780
+    },
+    {
+      "epoch": 9.66,
+      "learning_rate": 3.419071518193225e-07,
+      "loss": 1.9084,
+      "step": 30790
+    },
+    {
+      "epoch": 9.66,
+      "learning_rate": 3.387703889585948e-07,
+      "loss": 1.8419,
+      "step": 30800
+    },
+    {
+      "epoch": 9.66,
+      "learning_rate": 3.35633626097867e-07,
+      "loss": 1.7771,
+      "step": 30810
+    },
+    {
+      "epoch": 9.67,
+      "learning_rate": 3.3249686323713926e-07,
+      "loss": 1.7244,
+      "step": 30820
+    },
+    {
+      "epoch": 9.67,
+      "learning_rate": 3.2936010037641157e-07,
+      "loss": 1.8022,
+      "step": 30830
+    },
+    {
+      "epoch": 9.67,
+      "learning_rate": 3.262233375156838e-07,
+      "loss": 1.8439,
+      "step": 30840
+    },
+    {
+      "epoch": 9.68,
+      "learning_rate": 3.2308657465495613e-07,
+      "loss": 1.7798,
+      "step": 30850
+    },
+    {
+      "epoch": 9.68,
+      "learning_rate": 3.199498117942284e-07,
+      "loss": 1.8043,
+      "step": 30860
+    },
+    {
+      "epoch": 9.68,
+      "learning_rate": 3.1681304893350065e-07,
+      "loss": 1.7272,
+      "step": 30870
+    },
+    {
+      "epoch": 9.69,
+      "learning_rate": 3.1367628607277296e-07,
+      "loss": 1.8712,
+      "step": 30880
+    },
+    {
+      "epoch": 9.69,
+      "learning_rate": 3.105395232120452e-07,
+      "loss": 1.7728,
+      "step": 30890
+    },
+    {
+      "epoch": 9.69,
+      "learning_rate": 3.0740276035131747e-07,
+      "loss": 1.8384,
+      "step": 30900
+    },
+    {
+      "epoch": 9.7,
+      "learning_rate": 3.042659974905897e-07,
+      "loss": 1.8052,
+      "step": 30910
+    },
+    {
+      "epoch": 9.7,
+      "learning_rate": 3.01129234629862e-07,
+      "loss": 1.8385,
+      "step": 30920
+    },
+    {
+      "epoch": 9.7,
+      "learning_rate": 2.979924717691343e-07,
+      "loss": 1.9009,
+      "step": 30930
+    },
+    {
+      "epoch": 9.71,
+      "learning_rate": 2.9485570890840655e-07,
+      "loss": 1.8618,
+      "step": 30940
+    },
+    {
+      "epoch": 9.71,
+      "learning_rate": 2.917189460476788e-07,
+      "loss": 1.8397,
+      "step": 30950
+    },
+    {
+      "epoch": 9.71,
+      "learning_rate": 2.8858218318695106e-07,
+      "loss": 1.7626,
+      "step": 30960
+    },
+    {
+      "epoch": 9.71,
+      "learning_rate": 2.8544542032622337e-07,
+      "loss": 1.858,
+      "step": 30970
+    },
+    {
+      "epoch": 9.72,
+      "learning_rate": 2.8230865746549563e-07,
+      "loss": 1.8049,
+      "step": 30980
+    },
+    {
+      "epoch": 9.72,
+      "learning_rate": 2.791718946047679e-07,
+      "loss": 1.8149,
+      "step": 30990
+    },
+    {
+      "epoch": 9.72,
+      "learning_rate": 2.760351317440402e-07,
+      "loss": 1.8721,
+      "step": 31000
+    },
+    {
+      "epoch": 9.73,
+      "learning_rate": 2.7289836888331245e-07,
+      "loss": 1.8053,
+      "step": 31010
+    },
+    {
+      "epoch": 9.73,
+      "learning_rate": 2.697616060225847e-07,
+      "loss": 1.8912,
+      "step": 31020
+    },
+    {
+      "epoch": 9.73,
+      "learning_rate": 2.6662484316185696e-07,
+      "loss": 1.7795,
+      "step": 31030
+    },
+    {
+      "epoch": 9.74,
+      "learning_rate": 2.634880803011293e-07,
+      "loss": 1.794,
+      "step": 31040
+    },
+    {
+      "epoch": 9.74,
+      "learning_rate": 2.6035131744040153e-07,
+      "loss": 1.8035,
+      "step": 31050
+    },
+    {
+      "epoch": 9.74,
+      "learning_rate": 2.572145545796738e-07,
+      "loss": 1.8038,
+      "step": 31060
+    },
+    {
+      "epoch": 9.75,
+      "learning_rate": 2.5407779171894604e-07,
+      "loss": 1.8127,
+      "step": 31070
+    },
+    {
+      "epoch": 9.75,
+      "learning_rate": 2.5094102885821835e-07,
+      "loss": 1.7435,
+      "step": 31080
+    },
+    {
+      "epoch": 9.75,
+      "learning_rate": 2.478042659974906e-07,
+      "loss": 1.8577,
+      "step": 31090
+    },
+    {
+      "epoch": 9.76,
+      "learning_rate": 2.4466750313676287e-07,
+      "loss": 1.8321,
+      "step": 31100
+    },
+    {
+      "epoch": 9.76,
+      "learning_rate": 2.415307402760352e-07,
+      "loss": 1.8336,
+      "step": 31110
+    },
+    {
+      "epoch": 9.76,
+      "learning_rate": 2.3839397741530743e-07,
+      "loss": 1.8103,
+      "step": 31120
+    },
+    {
+      "epoch": 9.76,
+      "learning_rate": 2.352572145545797e-07,
+      "loss": 1.8046,
+      "step": 31130
+    },
+    {
+      "epoch": 9.77,
+      "learning_rate": 2.3212045169385195e-07,
+      "loss": 1.7768,
+      "step": 31140
+    },
+    {
+      "epoch": 9.77,
+      "learning_rate": 2.2898368883312423e-07,
+      "loss": 1.8558,
+      "step": 31150
+    },
+    {
+      "epoch": 9.77,
+      "learning_rate": 2.2584692597239651e-07,
+      "loss": 1.7743,
+      "step": 31160
+    },
+    {
+      "epoch": 9.78,
+      "learning_rate": 2.2271016311166877e-07,
+      "loss": 1.8035,
+      "step": 31170
+    },
+    {
+      "epoch": 9.78,
+      "learning_rate": 2.1957340025094105e-07,
+      "loss": 1.778,
+      "step": 31180
+    },
+    {
+      "epoch": 9.78,
+      "learning_rate": 2.1643663739021334e-07,
+      "loss": 1.7578,
+      "step": 31190
+    },
+    {
+      "epoch": 9.79,
+      "learning_rate": 2.132998745294856e-07,
+      "loss": 1.9089,
+      "step": 31200
+    },
+    {
+      "epoch": 9.79,
+      "learning_rate": 2.1016311166875785e-07,
+      "loss": 1.8688,
+      "step": 31210
+    },
+    {
+      "epoch": 9.79,
+      "learning_rate": 2.0702634880803013e-07,
+      "loss": 1.8124,
+      "step": 31220
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 2.0388958594730242e-07,
+      "loss": 1.8198,
+      "step": 31230
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 2.0075282308657465e-07,
+      "loss": 1.8393,
+      "step": 31240
+    },
+    {
+      "epoch": 9.8,
+      "learning_rate": 1.9761606022584693e-07,
+      "loss": 1.7781,
+      "step": 31250
+    },
+    {
+      "epoch": 9.81,
+      "learning_rate": 1.944792973651192e-07,
+      "loss": 1.8233,
+      "step": 31260
+    },
+    {
+      "epoch": 9.81,
+      "learning_rate": 1.913425345043915e-07,
+      "loss": 1.8858,
+      "step": 31270
+    },
+    {
+      "epoch": 9.81,
+      "learning_rate": 1.8820577164366375e-07,
+      "loss": 1.7169,
+      "step": 31280
+    },
+    {
+      "epoch": 9.81,
+      "learning_rate": 1.8506900878293604e-07,
+      "loss": 1.8674,
+      "step": 31290
+    },
+    {
+      "epoch": 9.82,
+      "learning_rate": 1.819322459222083e-07,
+      "loss": 1.8088,
+      "step": 31300
+    },
+    {
+      "epoch": 9.82,
+      "learning_rate": 1.7879548306148058e-07,
+      "loss": 1.7843,
+      "step": 31310
+    },
+    {
+      "epoch": 9.82,
+      "learning_rate": 1.7565872020075283e-07,
+      "loss": 1.8251,
+      "step": 31320
+    },
+    {
+      "epoch": 9.83,
+      "learning_rate": 1.7252195734002512e-07,
+      "loss": 1.9427,
+      "step": 31330
+    },
+    {
+      "epoch": 9.83,
+      "learning_rate": 1.693851944792974e-07,
+      "loss": 1.7929,
+      "step": 31340
+    },
+    {
+      "epoch": 9.83,
+      "learning_rate": 1.6624843161856963e-07,
+      "loss": 1.8276,
+      "step": 31350
+    },
+    {
+      "epoch": 9.84,
+      "learning_rate": 1.631116687578419e-07,
+      "loss": 1.8108,
+      "step": 31360
+    },
+    {
+      "epoch": 9.84,
+      "learning_rate": 1.599749058971142e-07,
+      "loss": 1.8391,
+      "step": 31370
+    },
+    {
+      "epoch": 9.84,
+      "learning_rate": 1.5683814303638648e-07,
+      "loss": 1.8235,
+      "step": 31380
+    },
+    {
+      "epoch": 9.85,
+      "learning_rate": 1.5370138017565873e-07,
+      "loss": 1.8547,
+      "step": 31390
+    },
+    {
+      "epoch": 9.85,
+      "learning_rate": 1.50564617314931e-07,
+      "loss": 1.8215,
+      "step": 31400
+    },
+    {
+      "epoch": 9.85,
+      "learning_rate": 1.4742785445420327e-07,
+      "loss": 1.7807,
+      "step": 31410
+    },
+    {
+      "epoch": 9.86,
+      "learning_rate": 1.4429109159347553e-07,
+      "loss": 1.8327,
+      "step": 31420
+    },
+    {
+      "epoch": 9.86,
+      "learning_rate": 1.4115432873274781e-07,
+      "loss": 1.741,
+      "step": 31430
+    },
+    {
+      "epoch": 9.86,
+      "learning_rate": 1.380175658720201e-07,
+      "loss": 1.7717,
+      "step": 31440
+    },
+    {
+      "epoch": 9.87,
+      "learning_rate": 1.3488080301129235e-07,
+      "loss": 1.853,
+      "step": 31450
+    },
+    {
+      "epoch": 9.87,
+      "learning_rate": 1.3174404015056464e-07,
+      "loss": 1.8866,
+      "step": 31460
+    },
+    {
+      "epoch": 9.87,
+      "learning_rate": 1.286072772898369e-07,
+      "loss": 1.7436,
+      "step": 31470
+    },
+    {
+      "epoch": 9.87,
+      "learning_rate": 1.2547051442910918e-07,
+      "loss": 1.7374,
+      "step": 31480
+    },
+    {
+      "epoch": 9.88,
+      "learning_rate": 1.2233375156838143e-07,
+      "loss": 1.8098,
+      "step": 31490
+    },
+    {
+      "epoch": 9.88,
+      "learning_rate": 1.1919698870765372e-07,
+      "loss": 1.7861,
+      "step": 31500
+    },
+    {
+      "epoch": 9.88,
+      "learning_rate": 1.1606022584692597e-07,
+      "loss": 1.8022,
+      "step": 31510
+    },
+    {
+      "epoch": 9.89,
+      "learning_rate": 1.1292346298619826e-07,
+      "loss": 1.8897,
+      "step": 31520
+    },
+    {
+      "epoch": 9.89,
+      "learning_rate": 1.0978670012547053e-07,
+      "loss": 1.7814,
+      "step": 31530
+    },
+    {
+      "epoch": 9.89,
+      "learning_rate": 1.066499372647428e-07,
+      "loss": 1.8239,
+      "step": 31540
+    },
+    {
+      "epoch": 9.9,
+      "learning_rate": 1.0351317440401507e-07,
+      "loss": 1.8212,
+      "step": 31550
+    },
+    {
+      "epoch": 9.9,
+      "learning_rate": 1.0037641154328732e-07,
+      "loss": 1.7385,
+      "step": 31560
+    },
+    {
+      "epoch": 9.9,
+      "learning_rate": 9.72396486825596e-08,
+      "loss": 1.8555,
+      "step": 31570
+    },
+    {
+      "epoch": 9.91,
+      "learning_rate": 9.410288582183188e-08,
+      "loss": 1.8566,
+      "step": 31580
+    },
+    {
+      "epoch": 9.91,
+      "learning_rate": 9.096612296110415e-08,
+      "loss": 1.822,
+      "step": 31590
+    },
+    {
+      "epoch": 9.91,
+      "learning_rate": 8.782936010037642e-08,
+      "loss": 1.8715,
+      "step": 31600
+    },
+    {
+      "epoch": 9.92,
+      "learning_rate": 8.46925972396487e-08,
+      "loss": 1.8113,
+      "step": 31610
+    },
+    {
+      "epoch": 9.92,
+      "learning_rate": 8.155583437892096e-08,
+      "loss": 1.832,
+      "step": 31620
+    },
+    {
+      "epoch": 9.92,
+      "learning_rate": 7.841907151819324e-08,
+      "loss": 1.7301,
+      "step": 31630
+    },
+    {
+      "epoch": 9.92,
+      "learning_rate": 7.52823086574655e-08,
+      "loss": 1.7224,
+      "step": 31640
+    },
+    {
+      "epoch": 9.93,
+      "learning_rate": 7.214554579673777e-08,
+      "loss": 1.8191,
+      "step": 31650
+    },
+    {
+      "epoch": 9.93,
+      "learning_rate": 6.900878293601005e-08,
+      "loss": 1.815,
+      "step": 31660
+    },
+    {
+      "epoch": 9.93,
+      "learning_rate": 6.587202007528232e-08,
+      "loss": 1.8129,
+      "step": 31670
+    },
+    {
+      "epoch": 9.94,
+      "learning_rate": 6.273525721455459e-08,
+      "loss": 1.8734,
+      "step": 31680
+    },
+    {
+      "epoch": 9.94,
+      "learning_rate": 5.959849435382686e-08,
+      "loss": 1.8497,
+      "step": 31690
+    },
+    {
+      "epoch": 9.94,
+      "learning_rate": 5.646173149309913e-08,
+      "loss": 1.7949,
+      "step": 31700
+    },
+    {
+      "epoch": 9.95,
+      "learning_rate": 5.33249686323714e-08,
+      "loss": 1.8628,
+      "step": 31710
+    },
+    {
+      "epoch": 9.95,
+      "learning_rate": 5.018820577164366e-08,
+      "loss": 1.7708,
+      "step": 31720
+    },
+    {
+      "epoch": 9.95,
+      "learning_rate": 4.705144291091594e-08,
+      "loss": 1.8327,
+      "step": 31730
+    },
+    {
+      "epoch": 9.96,
+      "learning_rate": 4.391468005018821e-08,
+      "loss": 1.9184,
+      "step": 31740
+    },
+    {
+      "epoch": 9.96,
+      "learning_rate": 4.077791718946048e-08,
+      "loss": 1.8494,
+      "step": 31750
+    },
+    {
+      "epoch": 9.96,
+      "learning_rate": 3.764115432873275e-08,
+      "loss": 1.7633,
+      "step": 31760
+    },
+    {
+      "epoch": 9.97,
+      "learning_rate": 3.4504391468005024e-08,
+      "loss": 1.7958,
+      "step": 31770
+    },
+    {
+      "epoch": 9.97,
+      "learning_rate": 3.1367628607277294e-08,
+      "loss": 1.7584,
+      "step": 31780
+    },
+    {
+      "epoch": 9.97,
+      "learning_rate": 2.8230865746549564e-08,
+      "loss": 1.8353,
+      "step": 31790
+    },
+    {
+      "epoch": 9.97,
+      "learning_rate": 2.509410288582183e-08,
+      "loss": 1.8028,
+      "step": 31800
+    },
+    {
+      "epoch": 9.98,
+      "learning_rate": 2.1957340025094104e-08,
+      "loss": 1.8179,
+      "step": 31810
+    },
+    {
+      "epoch": 9.98,
+      "learning_rate": 1.8820577164366374e-08,
+      "loss": 1.768,
+      "step": 31820
+    },
+    {
+      "epoch": 9.98,
+      "learning_rate": 1.5683814303638647e-08,
+      "loss": 1.8442,
+      "step": 31830
+    },
+    {
+      "epoch": 9.99,
+      "learning_rate": 1.2547051442910915e-08,
+      "loss": 1.8952,
+      "step": 31840
+    },
+    {
+      "epoch": 9.99,
+      "learning_rate": 9.410288582183187e-09,
+      "loss": 1.8367,
+      "step": 31850
+    },
+    {
+      "epoch": 9.99,
+      "learning_rate": 6.273525721455458e-09,
+      "loss": 1.8358,
+      "step": 31860
+    },
+    {
+      "epoch": 10.0,
+      "learning_rate": 3.136762860727729e-09,
+      "loss": 1.861,
+      "step": 31870
+    },
+    {
+      "epoch": 10.0,
+      "learning_rate": 0.0,
+      "loss": 1.8891,
+      "step": 31880
+    },
+    {
+      "epoch": 10.0,
+      "eval_loss": 1.806269645690918,
+      "eval_runtime": 13.6101,
+      "eval_samples_per_second": 73.475,
+      "eval_steps_per_second": 4.629,
+      "step": 31880
+    }
+  ],
+  "max_steps": 31880,
+  "num_train_epochs": 10,
+  "total_flos": 1.2339389647872e+17,
+  "trial_name": null,
+  "trial_params": null
+}