{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 134060, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 8.0891, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.04e-06, "loss": 7.3717, "step": 30 }, { "epoch": 0.0, "learning_rate": 2.24e-06, "loss": 5.2176, "step": 60 }, { "epoch": 0.0, "learning_rate": 3.44e-06, "loss": 4.35, "step": 90 }, { "epoch": 0.0, "learning_rate": 4.6400000000000005e-06, "loss": 3.4418, "step": 120 }, { "epoch": 0.01, "learning_rate": 5.84e-06, "loss": 2.8796, "step": 150 }, { "epoch": 0.01, "learning_rate": 7.04e-06, "loss": 2.5285, "step": 180 }, { "epoch": 0.01, "learning_rate": 8.24e-06, "loss": 2.2902, "step": 210 }, { "epoch": 0.01, "learning_rate": 9.440000000000001e-06, "loss": 2.0726, "step": 240 }, { "epoch": 0.01, "learning_rate": 1.0640000000000001e-05, "loss": 1.9922, "step": 270 }, { "epoch": 0.01, "learning_rate": 1.184e-05, "loss": 1.8281, "step": 300 }, { "epoch": 0.01, "learning_rate": 1.3040000000000002e-05, "loss": 1.8522, "step": 330 }, { "epoch": 0.01, "learning_rate": 1.4240000000000001e-05, "loss": 1.694, "step": 360 }, { "epoch": 0.01, "learning_rate": 1.544e-05, "loss": 1.5824, "step": 390 }, { "epoch": 0.02, "learning_rate": 1.664e-05, "loss": 1.52, "step": 420 }, { "epoch": 0.02, "learning_rate": 1.7840000000000002e-05, "loss": 1.5005, "step": 450 }, { "epoch": 0.02, "learning_rate": 1.904e-05, "loss": 1.474, "step": 480 }, { "epoch": 0.02, "learning_rate": 1.9881069312188603e-05, "loss": 1.4102, "step": 510 }, { "epoch": 0.02, "learning_rate": 1.9316685232156396e-05, "loss": 1.3253, "step": 540 }, { "epoch": 0.02, "learning_rate": 1.879778950992281e-05, "loss": 1.3384, "step": 570 }, { "epoch": 0.02, "learning_rate": 1.8318582636182796e-05, "loss": 1.1948, "step": 600 }, { "epoch": 0.02, "learning_rate": 1.7874250135079064e-05, "loss": 1.1314, "step": 630 }, { "epoch": 0.02, "learning_rate": 1.7460757394239457e-05, "loss": 1.062, "step": 660 }, { "epoch": 0.03, "learning_rate": 1.707469441906277e-05, "loss": 1.0079, "step": 690 }, { "epoch": 0.03, "learning_rate": 1.6713156761621893e-05, "loss": 1.0524, "step": 720 }, { "epoch": 0.03, "learning_rate": 1.6373653066597823e-05, "loss": 1.0395, "step": 750 }, { "epoch": 0.03, "learning_rate": 1.605403247669839e-05, "loss": 0.9386, "step": 780 }, { "epoch": 0.03, "learning_rate": 1.5752427045264396e-05, "loss": 0.9778, "step": 810 }, { "epoch": 0.03, "learning_rate": 1.546720562224365e-05, "loss": 0.9788, "step": 840 }, { "epoch": 0.03, "learning_rate": 1.5196936606339123e-05, "loss": 0.9897, "step": 870 }, { "epoch": 0.03, "learning_rate": 1.494035761667992e-05, "loss": 0.9103, "step": 900 }, { "epoch": 0.03, "learning_rate": 1.4696350614431104e-05, "loss": 0.916, "step": 930 }, { "epoch": 0.04, "learning_rate": 1.4463921353509297e-05, "loss": 0.8597, "step": 960 }, { "epoch": 0.04, "learning_rate": 1.4242182297397127e-05, "loss": 0.7784, "step": 990 }, { "epoch": 0.04, "learning_rate": 1.4030338331657845e-05, "loss": 0.8172, "step": 1020 }, { "epoch": 0.04, "learning_rate": 1.3827674747047456e-05, "loss": 0.8181, "step": 1050 }, { "epoch": 0.04, "learning_rate": 1.3633547078730296e-05, "loss": 0.7576, "step": 1080 }, { "epoch": 0.04, "learning_rate": 1.3447372472027636e-05, "loss": 0.7949, "step": 1110 }, { "epoch": 0.04, "learning_rate": 1.3268622310856882e-05, "loss": 0.7204, "step": 1140 }, { "epoch": 0.04, "learning_rate": 1.3096815896275181e-05, "loss": 0.7064, "step": 1170 }, { "epoch": 0.04, "learning_rate": 1.2931515002796793e-05, "loss": 0.7781, "step": 1200 }, { "epoch": 0.05, "learning_rate": 1.2772319171982632e-05, "loss": 0.6695, "step": 1230 }, { "epoch": 0.05, "learning_rate": 1.261886162812672e-05, "loss": 0.7092, "step": 1260 }, { "epoch": 0.05, "learning_rate": 1.2470805721138076e-05, "loss": 0.6964, "step": 1290 }, { "epoch": 0.05, "learning_rate": 1.2327841818038448e-05, "loss": 0.6547, "step": 1320 }, { "epoch": 0.05, "learning_rate": 1.2189684577707986e-05, "loss": 0.6966, "step": 1350 }, { "epoch": 0.05, "learning_rate": 1.2056070554260304e-05, "loss": 0.707, "step": 1380 }, { "epoch": 0.05, "learning_rate": 1.1926756083217403e-05, "loss": 0.7237, "step": 1410 }, { "epoch": 0.05, "learning_rate": 1.1801515411874576e-05, "loss": 0.6281, "step": 1440 }, { "epoch": 0.05, "learning_rate": 1.1680139041202725e-05, "loss": 0.6783, "step": 1470 }, { "epoch": 0.06, "learning_rate": 1.156243225157201e-05, "loss": 0.628, "step": 1500 }, { "epoch": 0.06, "learning_rate": 1.1448213788688051e-05, "loss": 0.6361, "step": 1530 }, { "epoch": 0.06, "learning_rate": 1.1337314689562955e-05, "loss": 0.5683, "step": 1560 }, { "epoch": 0.06, "learning_rate": 1.1229577231219766e-05, "loss": 0.5641, "step": 1590 }, { "epoch": 0.06, "learning_rate": 1.112485398724962e-05, "loss": 0.6237, "step": 1620 }, { "epoch": 0.06, "learning_rate": 1.1023006979384424e-05, "loss": 0.5927, "step": 1650 }, { "epoch": 0.06, "learning_rate": 1.0923906912979292e-05, "loss": 0.6144, "step": 1680 }, { "epoch": 0.06, "learning_rate": 1.0827432486770289e-05, "loss": 0.5461, "step": 1710 }, { "epoch": 0.06, "learning_rate": 1.0733469768527298e-05, "loss": 0.5728, "step": 1740 }, { "epoch": 0.07, "learning_rate": 1.0641911629294233e-05, "loss": 0.5896, "step": 1770 }, { "epoch": 0.07, "learning_rate": 1.0552657229828183e-05, "loss": 0.5095, "step": 1800 }, { "epoch": 0.07, "learning_rate": 1.0465611553639654e-05, "loss": 0.58, "step": 1830 }, { "epoch": 0.07, "learning_rate": 1.0380684981717496e-05, "loss": 0.5911, "step": 1860 }, { "epoch": 0.07, "learning_rate": 1.0297792904611e-05, "loss": 0.5328, "step": 1890 }, { "epoch": 0.07, "learning_rate": 1.0216855368051905e-05, "loss": 0.5636, "step": 1920 }, { "epoch": 0.07, "learning_rate": 1.0137796748742198e-05, "loss": 0.5733, "step": 1950 }, { "epoch": 0.07, "learning_rate": 1.0060545457319173e-05, "loss": 0.5326, "step": 1980 }, { "epoch": 0.07, "learning_rate": 9.98503366584589e-06, "loss": 0.5016, "step": 2010 }, { "epoch": 0.08, "learning_rate": 9.911197057469108e-06, "loss": 0.5416, "step": 2040 }, { "epoch": 0.08, "learning_rate": 9.838974596144754e-06, "loss": 0.6255, "step": 2070 }, { "epoch": 0.08, "learning_rate": 9.768308314557045e-06, "loss": 0.4626, "step": 2100 }, { "epoch": 0.08, "learning_rate": 9.699143118556543e-06, "loss": 0.496, "step": 2130 }, { "epoch": 0.08, "learning_rate": 9.631426606617744e-06, "loss": 0.5069, "step": 2160 }, { "epoch": 0.08, "learning_rate": 9.565108902971731e-06, "loss": 0.4882, "step": 2190 }, { "epoch": 0.08, "learning_rate": 9.50014250320633e-06, "loss": 0.5291, "step": 2220 }, { "epoch": 0.08, "learning_rate": 9.436482131247697e-06, "loss": 0.541, "step": 2250 }, { "epoch": 0.09, "learning_rate": 9.374084606744877e-06, "loss": 0.5595, "step": 2280 }, { "epoch": 0.09, "learning_rate": 9.312908721974755e-06, "loss": 0.5041, "step": 2310 }, { "epoch": 0.09, "learning_rate": 9.252915127470068e-06, "loss": 0.568, "step": 2340 }, { "epoch": 0.09, "learning_rate": 9.194066225649182e-06, "loss": 0.4467, "step": 2370 }, { "epoch": 0.09, "learning_rate": 9.13632607179441e-06, "loss": 0.471, "step": 2400 }, { "epoch": 0.09, "learning_rate": 9.079660281786226e-06, "loss": 0.5507, "step": 2430 }, { "epoch": 0.09, "learning_rate": 9.02403594605542e-06, "loss": 0.4895, "step": 2460 }, { "epoch": 0.09, "learning_rate": 8.969421549263868e-06, "loss": 0.5492, "step": 2490 }, { "epoch": 0.09, "learning_rate": 8.915786895268652e-06, "loss": 0.4661, "step": 2520 }, { "epoch": 0.1, "learning_rate": 8.863103036963604e-06, "loss": 0.4957, "step": 2550 }, { "epoch": 0.1, "learning_rate": 8.811342210628018e-06, "loss": 0.4698, "step": 2580 }, { "epoch": 0.1, "learning_rate": 8.760477774444217e-06, "loss": 0.4813, "step": 2610 }, { "epoch": 0.1, "learning_rate": 8.710484150874759e-06, "loss": 0.4382, "step": 2640 }, { "epoch": 0.1, "learning_rate": 8.661336772616119e-06, "loss": 0.5073, "step": 2670 }, { "epoch": 0.1, "learning_rate": 8.613012031869432e-06, "loss": 0.4712, "step": 2700 }, { "epoch": 0.1, "learning_rate": 8.5654872326904e-06, "loss": 0.5269, "step": 2730 }, { "epoch": 0.1, "learning_rate": 8.51874054619982e-06, "loss": 0.4369, "step": 2760 }, { "epoch": 0.1, "learning_rate": 8.472750968454027e-06, "loss": 0.4954, "step": 2790 }, { "epoch": 0.11, "learning_rate": 8.427498280790527e-06, "loss": 0.4503, "step": 2820 }, { "epoch": 0.11, "learning_rate": 8.382963012478792e-06, "loss": 0.4511, "step": 2850 }, { "epoch": 0.11, "learning_rate": 8.339126405519484e-06, "loss": 0.4604, "step": 2880 }, { "epoch": 0.11, "learning_rate": 8.295970381447577e-06, "loss": 0.4256, "step": 2910 }, { "epoch": 0.11, "learning_rate": 8.253477510005973e-06, "loss": 0.4544, "step": 2940 }, { "epoch": 0.11, "learning_rate": 8.211630979566229e-06, "loss": 0.4404, "step": 2970 }, { "epoch": 0.11, "learning_rate": 8.170414569182507e-06, "loss": 0.4183, "step": 3000 }, { "epoch": 0.11, "learning_rate": 8.129812622173143e-06, "loss": 0.4439, "step": 3030 }, { "epoch": 0.11, "learning_rate": 8.08981002113217e-06, "loss": 0.4319, "step": 3060 }, { "epoch": 0.12, "learning_rate": 8.050392164280187e-06, "loss": 0.4402, "step": 3090 }, { "epoch": 0.12, "learning_rate": 8.011544943070564e-06, "loss": 0.4871, "step": 3120 }, { "epoch": 0.12, "learning_rate": 7.973254720972994e-06, "loss": 0.4748, "step": 3150 }, { "epoch": 0.12, "learning_rate": 7.935508313361898e-06, "loss": 0.5215, "step": 3180 }, { "epoch": 0.12, "learning_rate": 7.898292968442374e-06, "loss": 0.4496, "step": 3210 }, { "epoch": 0.12, "learning_rate": 7.861596349150974e-06, "loss": 0.4912, "step": 3240 }, { "epoch": 0.12, "learning_rate": 7.825406515973003e-06, "loss": 0.4403, "step": 3270 }, { "epoch": 0.12, "learning_rate": 7.78971191062195e-06, "loss": 0.4216, "step": 3300 }, { "epoch": 0.12, "learning_rate": 7.754501340530456e-06, "loss": 0.4121, "step": 3330 }, { "epoch": 0.13, "learning_rate": 7.719763964105497e-06, "loss": 0.4315, "step": 3360 }, { "epoch": 0.13, "learning_rate": 7.68548927670368e-06, "loss": 0.4272, "step": 3390 }, { "epoch": 0.13, "learning_rate": 7.651667097285501e-06, "loss": 0.4234, "step": 3420 }, { "epoch": 0.13, "learning_rate": 7.618287555709996e-06, "loss": 0.5091, "step": 3450 }, { "epoch": 0.13, "learning_rate": 7.585341080633831e-06, "loss": 0.4435, "step": 3480 }, { "epoch": 0.13, "learning_rate": 7.552818387981102e-06, "loss": 0.3596, "step": 3510 }, { "epoch": 0.13, "learning_rate": 7.520710469952336e-06, "loss": 0.4096, "step": 3540 }, { "epoch": 0.13, "learning_rate": 7.489008584543126e-06, "loss": 0.3699, "step": 3570 }, { "epoch": 0.13, "learning_rate": 7.457704245544709e-06, "loss": 0.4257, "step": 3600 }, { "epoch": 0.14, "learning_rate": 7.4267892130005204e-06, "loss": 0.3542, "step": 3630 }, { "epoch": 0.14, "learning_rate": 7.396255484094341e-06, "loss": 0.4619, "step": 3660 }, { "epoch": 0.14, "learning_rate": 7.366095284447187e-06, "loss": 0.442, "step": 3690 }, { "epoch": 0.14, "learning_rate": 7.336301059801395e-06, "loss": 0.3701, "step": 3720 }, { "epoch": 0.14, "learning_rate": 7.306865468071773e-06, "loss": 0.4379, "step": 3750 }, { "epoch": 0.14, "learning_rate": 7.277781371744775e-06, "loss": 0.4511, "step": 3780 }, { "epoch": 0.14, "learning_rate": 7.24904183060784e-06, "loss": 0.3869, "step": 3810 }, { "epoch": 0.14, "learning_rate": 7.220640094792103e-06, "loss": 0.3576, "step": 3840 }, { "epoch": 0.14, "learning_rate": 7.1925695981126075e-06, "loss": 0.344, "step": 3870 }, { "epoch": 0.15, "learning_rate": 7.164823951691131e-06, "loss": 0.3708, "step": 3900 }, { "epoch": 0.15, "learning_rate": 7.137396937847532e-06, "loss": 0.414, "step": 3930 }, { "epoch": 0.15, "learning_rate": 7.110282504246376e-06, "loss": 0.4553, "step": 3960 }, { "epoch": 0.15, "learning_rate": 7.083474758286323e-06, "loss": 0.4011, "step": 3990 }, { "epoch": 0.15, "learning_rate": 7.056967961720459e-06, "loss": 0.3563, "step": 4020 }, { "epoch": 0.15, "learning_rate": 7.0307565254964336e-06, "loss": 0.3607, "step": 4050 }, { "epoch": 0.15, "learning_rate": 7.004835004805859e-06, "loss": 0.3634, "step": 4080 }, { "epoch": 0.15, "learning_rate": 6.979198094333013e-06, "loss": 0.3656, "step": 4110 }, { "epoch": 0.15, "learning_rate": 6.953840623693443e-06, "loss": 0.4157, "step": 4140 }, { "epoch": 0.16, "learning_rate": 6.928757553053551e-06, "loss": 0.3799, "step": 4170 }, { "epoch": 0.16, "learning_rate": 6.9039439689227496e-06, "loss": 0.3595, "step": 4200 }, { "epoch": 0.16, "learning_rate": 6.879395080110185e-06, "loss": 0.3912, "step": 4230 }, { "epoch": 0.16, "learning_rate": 6.855106213838523e-06, "loss": 0.3785, "step": 4260 }, { "epoch": 0.16, "learning_rate": 6.83107281200758e-06, "loss": 0.4702, "step": 4290 }, { "epoch": 0.16, "learning_rate": 6.807290427601057e-06, "loss": 0.3987, "step": 4320 }, { "epoch": 0.16, "learning_rate": 6.7837547212299395e-06, "loss": 0.3743, "step": 4350 }, { "epoch": 0.16, "learning_rate": 6.760461457806433e-06, "loss": 0.3816, "step": 4380 }, { "epoch": 0.16, "learning_rate": 6.737406503342708e-06, "loss": 0.3293, "step": 4410 }, { "epoch": 0.17, "learning_rate": 6.714585821868879e-06, "loss": 0.4261, "step": 4440 }, { "epoch": 0.17, "learning_rate": 6.691995472465075e-06, "loss": 0.3562, "step": 4470 }, { "epoch": 0.17, "learning_rate": 6.6696316064026025e-06, "loss": 0.3437, "step": 4500 }, { "epoch": 0.17, "learning_rate": 6.6474904643895035e-06, "loss": 0.3696, "step": 4530 }, { "epoch": 0.17, "learning_rate": 6.625568373916034e-06, "loss": 0.4247, "step": 4560 }, { "epoch": 0.17, "learning_rate": 6.603861746695817e-06, "loss": 0.3945, "step": 4590 }, { "epoch": 0.17, "learning_rate": 6.582367076198595e-06, "loss": 0.3798, "step": 4620 }, { "epoch": 0.17, "learning_rate": 6.561080935270765e-06, "loss": 0.3464, "step": 4650 }, { "epoch": 0.17, "learning_rate": 6.53999997384e-06, "loss": 0.3523, "step": 4680 }, { "epoch": 0.18, "learning_rate": 6.5191209167004895e-06, "loss": 0.363, "step": 4710 }, { "epoch": 0.18, "learning_rate": 6.4984405613754545e-06, "loss": 0.3986, "step": 4740 }, { "epoch": 0.18, "learning_rate": 6.477955776053786e-06, "loss": 0.3417, "step": 4770 }, { "epoch": 0.18, "learning_rate": 6.457663497597783e-06, "loss": 0.366, "step": 4800 }, { "epoch": 0.18, "learning_rate": 6.437560729619116e-06, "loss": 0.3212, "step": 4830 }, { "epoch": 0.18, "learning_rate": 6.41764454062025e-06, "loss": 0.42, "step": 4860 }, { "epoch": 0.18, "learning_rate": 6.3979120621987555e-06, "loss": 0.3657, "step": 4890 }, { "epoch": 0.18, "learning_rate": 6.378360487311966e-06, "loss": 0.3274, "step": 4920 }, { "epoch": 0.18, "learning_rate": 6.358987068599611e-06, "loss": 0.3556, "step": 4950 }, { "epoch": 0.19, "learning_rate": 6.339789116762173e-06, "loss": 0.3901, "step": 4980 }, { "epoch": 0.19, "learning_rate": 6.320763998992753e-06, "loss": 0.3037, "step": 5010 }, { "epoch": 0.19, "learning_rate": 6.301909137460409e-06, "loss": 0.3604, "step": 5040 }, { "epoch": 0.19, "learning_rate": 6.283222007842957e-06, "loss": 0.3274, "step": 5070 }, { "epoch": 0.19, "learning_rate": 6.264700137907351e-06, "loss": 0.349, "step": 5100 }, { "epoch": 0.19, "learning_rate": 6.246341106135824e-06, "loss": 0.3592, "step": 5130 }, { "epoch": 0.19, "learning_rate": 6.22814254039606e-06, "loss": 0.3222, "step": 5160 }, { "epoch": 0.19, "learning_rate": 6.21010211665374e-06, "loss": 0.3953, "step": 5190 }, { "epoch": 0.19, "learning_rate": 6.192217557725875e-06, "loss": 0.3456, "step": 5220 }, { "epoch": 0.2, "learning_rate": 6.174486632073397e-06, "loss": 0.2952, "step": 5250 }, { "epoch": 0.2, "learning_rate": 6.156907152631576e-06, "loss": 0.3385, "step": 5280 }, { "epoch": 0.2, "learning_rate": 6.1394769756768524e-06, "loss": 0.3676, "step": 5310 }, { "epoch": 0.2, "learning_rate": 6.12219399972876e-06, "loss": 0.3084, "step": 5340 }, { "epoch": 0.2, "learning_rate": 6.105056164485665e-06, "loss": 0.3262, "step": 5370 }, { "epoch": 0.2, "learning_rate": 6.0880614497930825e-06, "loss": 0.3254, "step": 5400 }, { "epoch": 0.2, "learning_rate": 6.071207874643425e-06, "loss": 0.3312, "step": 5430 }, { "epoch": 0.2, "learning_rate": 6.054493496206035e-06, "loss": 0.2912, "step": 5460 }, { "epoch": 0.2, "learning_rate": 6.037916408886427e-06, "loss": 0.3067, "step": 5490 }, { "epoch": 0.21, "learning_rate": 6.021474743413714e-06, "loss": 0.2967, "step": 5520 }, { "epoch": 0.21, "learning_rate": 6.005166665955221e-06, "loss": 0.3792, "step": 5550 }, { "epoch": 0.21, "learning_rate": 5.988990377257332e-06, "loss": 0.3027, "step": 5580 }, { "epoch": 0.21, "learning_rate": 5.972944111811658e-06, "loss": 0.3165, "step": 5610 }, { "epoch": 0.21, "learning_rate": 5.9570261370456484e-06, "loss": 0.331, "step": 5640 }, { "epoch": 0.21, "learning_rate": 5.941234752536812e-06, "loss": 0.3406, "step": 5670 }, { "epoch": 0.21, "learning_rate": 5.925568289249716e-06, "loss": 0.3086, "step": 5700 }, { "epoch": 0.21, "learning_rate": 5.910025108795013e-06, "loss": 0.335, "step": 5730 }, { "epoch": 0.21, "learning_rate": 5.894603602709729e-06, "loss": 0.3411, "step": 5760 }, { "epoch": 0.22, "learning_rate": 5.879302191758114e-06, "loss": 0.3122, "step": 5790 }, { "epoch": 0.22, "learning_rate": 5.864119325252328e-06, "loss": 0.3138, "step": 5820 }, { "epoch": 0.22, "learning_rate": 5.8490534803923536e-06, "loss": 0.2888, "step": 5850 }, { "epoch": 0.22, "learning_rate": 5.8341031616244595e-06, "loss": 0.3032, "step": 5880 }, { "epoch": 0.22, "learning_rate": 5.819266900017594e-06, "loss": 0.3255, "step": 5910 }, { "epoch": 0.22, "learning_rate": 5.804543252657151e-06, "loss": 0.3174, "step": 5940 }, { "epoch": 0.22, "learning_rate": 5.789930802055526e-06, "loss": 0.3083, "step": 5970 }, { "epoch": 0.22, "learning_rate": 5.775428155578901e-06, "loss": 0.292, "step": 6000 }, { "epoch": 0.22, "learning_rate": 5.761033944889748e-06, "loss": 0.3553, "step": 6030 }, { "epoch": 0.23, "learning_rate": 5.746746825404546e-06, "loss": 0.316, "step": 6060 }, { "epoch": 0.23, "learning_rate": 5.7325654757662186e-06, "loss": 0.3385, "step": 6090 }, { "epoch": 0.23, "learning_rate": 5.718956157005957e-06, "loss": 0.3383, "step": 6120 }, { "epoch": 0.23, "learning_rate": 5.704979053851296e-06, "loss": 0.3152, "step": 6150 }, { "epoch": 0.23, "learning_rate": 5.691103932244723e-06, "loss": 0.2978, "step": 6180 }, { "epoch": 0.23, "learning_rate": 5.677329558036595e-06, "loss": 0.3089, "step": 6210 }, { "epoch": 0.23, "learning_rate": 5.66365471788599e-06, "loss": 0.3295, "step": 6240 }, { "epoch": 0.23, "learning_rate": 5.650078218811767e-06, "loss": 0.2739, "step": 6270 }, { "epoch": 0.23, "learning_rate": 5.636598887755416e-06, "loss": 0.3879, "step": 6300 }, { "epoch": 0.24, "learning_rate": 5.623215571155333e-06, "loss": 0.2913, "step": 6330 }, { "epoch": 0.24, "learning_rate": 5.609927134532144e-06, "loss": 0.3459, "step": 6360 }, { "epoch": 0.24, "learning_rate": 5.596732462084774e-06, "loss": 0.3362, "step": 6390 }, { "epoch": 0.24, "learning_rate": 5.583630456296903e-06, "loss": 0.3566, "step": 6420 }, { "epoch": 0.24, "learning_rate": 5.570620037553528e-06, "loss": 0.2793, "step": 6450 }, { "epoch": 0.24, "learning_rate": 5.557700143767285e-06, "loss": 0.3272, "step": 6480 }, { "epoch": 0.24, "learning_rate": 5.544869730014276e-06, "loss": 0.2916, "step": 6510 }, { "epoch": 0.24, "learning_rate": 5.532127768179103e-06, "loss": 0.327, "step": 6540 }, { "epoch": 0.25, "learning_rate": 5.519473246608816e-06, "loss": 0.33, "step": 6570 }, { "epoch": 0.25, "learning_rate": 5.50690516977555e-06, "loss": 0.2756, "step": 6600 }, { "epoch": 0.25, "learning_rate": 5.494422557947562e-06, "loss": 0.3566, "step": 6630 }, { "epoch": 0.25, "learning_rate": 5.482024446868429e-06, "loss": 0.3124, "step": 6660 }, { "epoch": 0.25, "learning_rate": 5.469709887444195e-06, "loss": 0.3601, "step": 6690 }, { "epoch": 0.25, "learning_rate": 5.457477945438182e-06, "loss": 0.3182, "step": 6720 }, { "epoch": 0.25, "learning_rate": 5.445327701173302e-06, "loss": 0.3418, "step": 6750 }, { "epoch": 0.25, "learning_rate": 5.433258249241613e-06, "loss": 0.2957, "step": 6780 }, { "epoch": 0.25, "learning_rate": 5.4212686982209314e-06, "loss": 0.3465, "step": 6810 }, { "epoch": 0.26, "learning_rate": 5.409358170398289e-06, "loss": 0.3435, "step": 6840 }, { "epoch": 0.26, "learning_rate": 5.397525801500045e-06, "loss": 0.3535, "step": 6870 }, { "epoch": 0.26, "learning_rate": 5.385770740428468e-06, "loss": 0.313, "step": 6900 }, { "epoch": 0.26, "learning_rate": 5.37409214900459e-06, "loss": 0.3222, "step": 6930 }, { "epoch": 0.26, "learning_rate": 5.362489201717187e-06, "loss": 0.2975, "step": 6960 }, { "epoch": 0.26, "learning_rate": 5.350961085477669e-06, "loss": 0.3027, "step": 6990 }, { "epoch": 0.26, "learning_rate": 5.339506999380768e-06, "loss": 0.2935, "step": 7020 }, { "epoch": 0.26, "learning_rate": 5.3281261544708195e-06, "loss": 0.2793, "step": 7050 }, { "epoch": 0.26, "learning_rate": 5.316817773513507e-06, "loss": 0.2699, "step": 7080 }, { "epoch": 0.27, "learning_rate": 5.30558109077291e-06, "loss": 0.3002, "step": 7110 }, { "epoch": 0.27, "learning_rate": 5.2947864079915216e-06, "loss": 0.3071, "step": 7140 }, { "epoch": 0.27, "learning_rate": 5.283688541235345e-06, "loss": 0.2798, "step": 7170 }, { "epoch": 0.27, "learning_rate": 5.272660166570372e-06, "loss": 0.3221, "step": 7200 }, { "epoch": 0.27, "learning_rate": 5.261700561773909e-06, "loss": 0.2804, "step": 7230 }, { "epoch": 0.27, "learning_rate": 5.250809015088132e-06, "loss": 0.3268, "step": 7260 }, { "epoch": 0.27, "learning_rate": 5.239984825025921e-06, "loss": 0.3198, "step": 7290 }, { "epoch": 0.27, "learning_rate": 5.229227300181094e-06, "loss": 0.308, "step": 7320 }, { "epoch": 0.27, "learning_rate": 5.218535759042913e-06, "loss": 0.3606, "step": 7350 }, { "epoch": 0.28, "learning_rate": 5.2079095298147244e-06, "loss": 0.295, "step": 7380 }, { "epoch": 0.28, "learning_rate": 5.197347950236668e-06, "loss": 0.3313, "step": 7410 }, { "epoch": 0.28, "learning_rate": 5.18685036741231e-06, "loss": 0.3253, "step": 7440 }, { "epoch": 0.28, "learning_rate": 5.176416137639122e-06, "loss": 0.2724, "step": 7470 }, { "epoch": 0.28, "learning_rate": 5.166044626242685e-06, "loss": 0.3162, "step": 7500 }, { "epoch": 0.28, "learning_rate": 5.155735207414551e-06, "loss": 0.3187, "step": 7530 }, { "epoch": 0.28, "learning_rate": 5.145487264053619e-06, "loss": 0.2858, "step": 7560 }, { "epoch": 0.28, "learning_rate": 5.135300187611006e-06, "loss": 0.3, "step": 7590 }, { "epoch": 0.28, "learning_rate": 5.125173377938251e-06, "loss": 0.3222, "step": 7620 }, { "epoch": 0.29, "learning_rate": 5.115106243138807e-06, "loss": 0.2799, "step": 7650 }, { "epoch": 0.29, "learning_rate": 5.105098199422739e-06, "loss": 0.2617, "step": 7680 }, { "epoch": 0.29, "learning_rate": 5.095148670964526e-06, "loss": 0.3158, "step": 7710 }, { "epoch": 0.29, "learning_rate": 5.085257089763904e-06, "loss": 0.2533, "step": 7740 }, { "epoch": 0.29, "learning_rate": 5.075422895509673e-06, "loss": 0.2548, "step": 7770 }, { "epoch": 0.29, "learning_rate": 5.065645535446375e-06, "loss": 0.2719, "step": 7800 }, { "epoch": 0.29, "learning_rate": 5.055924464243811e-06, "loss": 0.2877, "step": 7830 }, { "epoch": 0.29, "learning_rate": 5.046259143869278e-06, "loss": 0.3081, "step": 7860 }, { "epoch": 0.29, "learning_rate": 5.0366490434624956e-06, "loss": 0.3128, "step": 7890 }, { "epoch": 0.3, "learning_rate": 5.027093639213143e-06, "loss": 0.323, "step": 7920 }, { "epoch": 0.3, "learning_rate": 5.017592414240932e-06, "loss": 0.2887, "step": 7950 }, { "epoch": 0.3, "learning_rate": 5.008144858478176e-06, "loss": 0.3268, "step": 7980 }, { "epoch": 0.3, "learning_rate": 4.9987504685547725e-06, "loss": 0.3533, "step": 8010 }, { "epoch": 0.3, "learning_rate": 4.989408747685563e-06, "loss": 0.2842, "step": 8040 }, { "epoch": 0.3, "learning_rate": 4.980119205559974e-06, "loss": 0.254, "step": 8070 }, { "epoch": 0.3, "learning_rate": 4.970881358233946e-06, "loss": 0.2835, "step": 8100 }, { "epoch": 0.3, "learning_rate": 4.961694728024028e-06, "loss": 0.3396, "step": 8130 }, { "epoch": 0.3, "learning_rate": 4.952558843403633e-06, "loss": 0.3007, "step": 8160 }, { "epoch": 0.31, "learning_rate": 4.943473238901383e-06, "loss": 0.3001, "step": 8190 }, { "epoch": 0.31, "learning_rate": 4.934437455001497e-06, "loss": 0.3292, "step": 8220 }, { "epoch": 0.31, "learning_rate": 4.925451038046182e-06, "loss": 0.2772, "step": 8250 }, { "epoch": 0.31, "learning_rate": 4.916513540139955e-06, "loss": 0.3337, "step": 8280 }, { "epoch": 0.31, "learning_rate": 4.907624519055888e-06, "loss": 0.2756, "step": 8310 }, { "epoch": 0.31, "learning_rate": 4.898783538143701e-06, "loss": 0.3178, "step": 8340 }, { "epoch": 0.31, "learning_rate": 4.889990166239664e-06, "loss": 0.321, "step": 8370 }, { "epoch": 0.31, "learning_rate": 4.881243977578285e-06, "loss": 0.3156, "step": 8400 }, { "epoch": 0.31, "learning_rate": 4.872544551705718e-06, "loss": 0.2833, "step": 8430 }, { "epoch": 0.32, "learning_rate": 4.8638914733948715e-06, "loss": 0.2803, "step": 8460 }, { "epoch": 0.32, "learning_rate": 4.85528433256216e-06, "loss": 0.2551, "step": 8490 }, { "epoch": 0.32, "learning_rate": 4.8467227241858775e-06, "loss": 0.2435, "step": 8520 }, { "epoch": 0.32, "learning_rate": 4.838206248226147e-06, "loss": 0.2682, "step": 8550 }, { "epoch": 0.32, "learning_rate": 4.829734509546414e-06, "loss": 0.3036, "step": 8580 }, { "epoch": 0.32, "learning_rate": 4.821307117836431e-06, "loss": 0.2704, "step": 8610 }, { "epoch": 0.32, "learning_rate": 4.8129236875367506e-06, "loss": 0.299, "step": 8640 }, { "epoch": 0.32, "learning_rate": 4.804583837764616e-06, "loss": 0.2679, "step": 8670 }, { "epoch": 0.32, "learning_rate": 4.796287192241285e-06, "loss": 0.2749, "step": 8700 }, { "epoch": 0.33, "learning_rate": 4.788033379220728e-06, "loss": 0.2858, "step": 8730 }, { "epoch": 0.33, "learning_rate": 4.779822031419661e-06, "loss": 0.2544, "step": 8760 }, { "epoch": 0.33, "learning_rate": 4.771652785948902e-06, "loss": 0.3259, "step": 8790 }, { "epoch": 0.33, "learning_rate": 4.763525284246008e-06, "loss": 0.3, "step": 8820 }, { "epoch": 0.33, "learning_rate": 4.755439172009178e-06, "loss": 0.2682, "step": 8850 }, { "epoch": 0.33, "learning_rate": 4.7473940991323755e-06, "loss": 0.2911, "step": 8880 }, { "epoch": 0.33, "learning_rate": 4.739389719641665e-06, "loss": 0.2617, "step": 8910 }, { "epoch": 0.33, "learning_rate": 4.731425691632717e-06, "loss": 0.2739, "step": 8940 }, { "epoch": 0.33, "learning_rate": 4.723501677209465e-06, "loss": 0.259, "step": 8970 }, { "epoch": 0.34, "learning_rate": 4.715617342423896e-06, "loss": 0.3113, "step": 9000 }, { "epoch": 0.34, "learning_rate": 4.707772357216934e-06, "loss": 0.2924, "step": 9030 }, { "epoch": 0.34, "learning_rate": 4.69996639536041e-06, "loss": 0.2422, "step": 9060 }, { "epoch": 0.34, "learning_rate": 4.692199134400076e-06, "loss": 0.2919, "step": 9090 }, { "epoch": 0.34, "learning_rate": 4.684470255599664e-06, "loss": 0.2697, "step": 9120 }, { "epoch": 0.34, "learning_rate": 4.676779443885949e-06, "loss": 0.2681, "step": 9150 }, { "epoch": 0.34, "learning_rate": 4.669126387794794e-06, "loss": 0.2753, "step": 9180 }, { "epoch": 0.34, "learning_rate": 4.6615107794181814e-06, "loss": 0.2761, "step": 9210 }, { "epoch": 0.34, "learning_rate": 4.653932314352172e-06, "loss": 0.2615, "step": 9240 }, { "epoch": 0.35, "learning_rate": 4.646390691645805e-06, "loss": 0.2758, "step": 9270 }, { "epoch": 0.35, "learning_rate": 4.638885613750897e-06, "loss": 0.2362, "step": 9300 }, { "epoch": 0.35, "learning_rate": 4.6314167864727375e-06, "loss": 0.2778, "step": 9330 }, { "epoch": 0.35, "learning_rate": 4.62398391892165e-06, "loss": 0.2577, "step": 9360 }, { "epoch": 0.35, "learning_rate": 4.616586723465408e-06, "loss": 0.2716, "step": 9390 }, { "epoch": 0.35, "learning_rate": 4.609224915682485e-06, "loss": 0.2567, "step": 9420 }, { "epoch": 0.35, "learning_rate": 4.601898214316119e-06, "loss": 0.2956, "step": 9450 }, { "epoch": 0.35, "learning_rate": 4.5946063412291834e-06, "loss": 0.2766, "step": 9480 }, { "epoch": 0.35, "learning_rate": 4.587349021359836e-06, "loss": 0.2743, "step": 9510 }, { "epoch": 0.36, "learning_rate": 4.580125982677943e-06, "loss": 0.2914, "step": 9540 }, { "epoch": 0.36, "learning_rate": 4.572936956142248e-06, "loss": 0.2849, "step": 9570 }, { "epoch": 0.36, "learning_rate": 4.565781675658282e-06, "loss": 0.2763, "step": 9600 }, { "epoch": 0.36, "learning_rate": 4.5586598780369954e-06, "loss": 0.2888, "step": 9630 }, { "epoch": 0.36, "learning_rate": 4.5515713029541005e-06, "loss": 0.2379, "step": 9660 }, { "epoch": 0.36, "learning_rate": 4.5445156929101e-06, "loss": 0.2716, "step": 9690 }, { "epoch": 0.36, "learning_rate": 4.537492793190998e-06, "loss": 0.2842, "step": 9720 }, { "epoch": 0.36, "learning_rate": 4.530502351829687e-06, "loss": 0.2865, "step": 9750 }, { "epoch": 0.36, "learning_rate": 4.5235441195679665e-06, "loss": 0.2819, "step": 9780 }, { "epoch": 0.37, "learning_rate": 4.51661784981922e-06, "loss": 0.2526, "step": 9810 }, { "epoch": 0.37, "learning_rate": 4.509723298631711e-06, "loss": 0.274, "step": 9840 }, { "epoch": 0.37, "learning_rate": 4.5028602246524934e-06, "loss": 0.2711, "step": 9870 }, { "epoch": 0.37, "learning_rate": 4.496028389091924e-06, "loss": 0.2126, "step": 9900 }, { "epoch": 0.37, "learning_rate": 4.489227555688767e-06, "loss": 0.2575, "step": 9930 }, { "epoch": 0.37, "learning_rate": 4.482457490675879e-06, "loss": 0.2622, "step": 9960 }, { "epoch": 0.37, "learning_rate": 4.475717962746456e-06, "loss": 0.2528, "step": 9990 }, { "epoch": 0.37, "learning_rate": 4.469008743020842e-06, "loss": 0.2992, "step": 10020 }, { "epoch": 0.37, "learning_rate": 4.462329605013882e-06, "loss": 0.3006, "step": 10050 }, { "epoch": 0.38, "learning_rate": 4.455680324602811e-06, "loss": 0.2729, "step": 10080 }, { "epoch": 0.38, "learning_rate": 4.4490606799956615e-06, "loss": 0.2525, "step": 10110 }, { "epoch": 0.38, "learning_rate": 4.442470451700195e-06, "loss": 0.215, "step": 10140 }, { "epoch": 0.38, "learning_rate": 4.435909422493333e-06, "loss": 0.2413, "step": 10170 }, { "epoch": 0.38, "learning_rate": 4.429377377391085e-06, "loss": 0.2817, "step": 10200 }, { "epoch": 0.38, "learning_rate": 4.42287410361896e-06, "loss": 0.3017, "step": 10230 }, { "epoch": 0.38, "learning_rate": 4.416399390582857e-06, "loss": 0.2611, "step": 10260 }, { "epoch": 0.38, "learning_rate": 4.409953029840417e-06, "loss": 0.3096, "step": 10290 }, { "epoch": 0.38, "learning_rate": 4.403534815072848e-06, "loss": 0.2704, "step": 10320 }, { "epoch": 0.39, "learning_rate": 4.397144542057179e-06, "loss": 0.2775, "step": 10350 }, { "epoch": 0.39, "learning_rate": 4.390782008638974e-06, "loss": 0.3019, "step": 10380 }, { "epoch": 0.39, "learning_rate": 4.3844470147054704e-06, "loss": 0.2804, "step": 10410 }, { "epoch": 0.39, "learning_rate": 4.378139362159141e-06, "loss": 0.274, "step": 10440 }, { "epoch": 0.39, "learning_rate": 4.371858854891681e-06, "loss": 0.2692, "step": 10470 }, { "epoch": 0.39, "learning_rate": 4.365605298758394e-06, "loss": 0.2608, "step": 10500 }, { "epoch": 0.39, "learning_rate": 4.359378501552986e-06, "loss": 0.245, "step": 10530 }, { "epoch": 0.39, "learning_rate": 4.353178272982758e-06, "loss": 0.2776, "step": 10560 }, { "epoch": 0.39, "learning_rate": 4.347004424644176e-06, "loss": 0.2599, "step": 10590 }, { "epoch": 0.4, "learning_rate": 4.340856769998823e-06, "loss": 0.2604, "step": 10620 }, { "epoch": 0.4, "learning_rate": 4.3347351243497375e-06, "loss": 0.2437, "step": 10650 }, { "epoch": 0.4, "learning_rate": 4.3286393048180905e-06, "loss": 0.2808, "step": 10680 }, { "epoch": 0.4, "learning_rate": 4.322569130320256e-06, "loss": 0.2593, "step": 10710 }, { "epoch": 0.4, "learning_rate": 4.316524421545204e-06, "loss": 0.278, "step": 10740 }, { "epoch": 0.4, "learning_rate": 4.310505000932264e-06, "loss": 0.242, "step": 10770 }, { "epoch": 0.4, "learning_rate": 4.304510692649209e-06, "loss": 0.2666, "step": 10800 }, { "epoch": 0.4, "learning_rate": 4.298541322570686e-06, "loss": 0.2539, "step": 10830 }, { "epoch": 0.41, "learning_rate": 4.2925967182569775e-06, "loss": 0.2709, "step": 10860 }, { "epoch": 0.41, "learning_rate": 4.286676708933067e-06, "loss": 0.2264, "step": 10890 }, { "epoch": 0.41, "learning_rate": 4.280781125468046e-06, "loss": 0.2364, "step": 10920 }, { "epoch": 0.41, "learning_rate": 4.274909800354809e-06, "loss": 0.2534, "step": 10950 }, { "epoch": 0.41, "learning_rate": 4.269062567690073e-06, "loss": 0.248, "step": 10980 }, { "epoch": 0.41, "learning_rate": 4.263239263154682e-06, "loss": 0.2808, "step": 11010 }, { "epoch": 0.41, "learning_rate": 4.257439723994211e-06, "loss": 0.2815, "step": 11040 }, { "epoch": 0.41, "learning_rate": 4.251663788999866e-06, "loss": 0.2839, "step": 11070 }, { "epoch": 0.41, "learning_rate": 4.245911298489661e-06, "loss": 0.2275, "step": 11100 }, { "epoch": 0.42, "learning_rate": 4.24018209428987e-06, "loss": 0.2606, "step": 11130 }, { "epoch": 0.42, "learning_rate": 4.234476019716772e-06, "loss": 0.2492, "step": 11160 }, { "epoch": 0.42, "learning_rate": 4.228792919558642e-06, "loss": 0.244, "step": 11190 }, { "epoch": 0.42, "learning_rate": 4.2233209499878915e-06, "loss": 0.2441, "step": 11220 }, { "epoch": 0.42, "learning_rate": 4.217682585651912e-06, "loss": 0.2401, "step": 11250 }, { "epoch": 0.42, "learning_rate": 4.212066743759049e-06, "loss": 0.3018, "step": 11280 }, { "epoch": 0.42, "learning_rate": 4.206473274764352e-06, "loss": 0.2571, "step": 11310 }, { "epoch": 0.42, "learning_rate": 4.200902030509323e-06, "loss": 0.2204, "step": 11340 }, { "epoch": 0.42, "learning_rate": 4.195352864205429e-06, "loss": 0.2832, "step": 11370 }, { "epoch": 0.43, "learning_rate": 4.18982563041786e-06, "loss": 0.2632, "step": 11400 }, { "epoch": 0.43, "learning_rate": 4.184320185049522e-06, "loss": 0.2718, "step": 11430 }, { "epoch": 0.43, "learning_rate": 4.178836385325257e-06, "loss": 0.2863, "step": 11460 }, { "epoch": 0.43, "learning_rate": 4.173374089776288e-06, "loss": 0.221, "step": 11490 }, { "epoch": 0.43, "learning_rate": 4.167933158224896e-06, "loss": 0.2486, "step": 11520 }, { "epoch": 0.43, "learning_rate": 4.1625134517693085e-06, "loss": 0.2539, "step": 11550 }, { "epoch": 0.43, "learning_rate": 4.157114832768805e-06, "loss": 0.2283, "step": 11580 }, { "epoch": 0.43, "learning_rate": 4.151737164829035e-06, "loss": 0.2161, "step": 11610 }, { "epoch": 0.43, "learning_rate": 4.146380312787547e-06, "loss": 0.2378, "step": 11640 }, { "epoch": 0.44, "learning_rate": 4.141044142699519e-06, "loss": 0.2391, "step": 11670 }, { "epoch": 0.44, "learning_rate": 4.135728521823688e-06, "loss": 0.2288, "step": 11700 }, { "epoch": 0.44, "learning_rate": 4.130433318608488e-06, "loss": 0.2284, "step": 11730 }, { "epoch": 0.44, "learning_rate": 4.125158402678369e-06, "loss": 0.2769, "step": 11760 }, { "epoch": 0.44, "learning_rate": 4.11990364482032e-06, "loss": 0.2132, "step": 11790 }, { "epoch": 0.44, "learning_rate": 4.114668916970569e-06, "loss": 0.2419, "step": 11820 }, { "epoch": 0.44, "learning_rate": 4.109454092201482e-06, "loss": 0.2575, "step": 11850 }, { "epoch": 0.44, "learning_rate": 4.104259044708631e-06, "loss": 0.242, "step": 11880 }, { "epoch": 0.44, "learning_rate": 4.099083649798046e-06, "loss": 0.2375, "step": 11910 }, { "epoch": 0.45, "learning_rate": 4.0939277838736505e-06, "loss": 0.2461, "step": 11940 }, { "epoch": 0.45, "learning_rate": 4.0887913244248565e-06, "loss": 0.2147, "step": 11970 }, { "epoch": 0.45, "learning_rate": 4.083674150014344e-06, "loss": 0.2503, "step": 12000 }, { "epoch": 0.45, "learning_rate": 4.078576140266e-06, "loss": 0.2648, "step": 12030 }, { "epoch": 0.45, "learning_rate": 4.073497175853025e-06, "loss": 0.227, "step": 12060 }, { "epoch": 0.45, "learning_rate": 4.068437138486202e-06, "loss": 0.2633, "step": 12090 }, { "epoch": 0.45, "learning_rate": 4.0633959109023236e-06, "loss": 0.2051, "step": 12120 }, { "epoch": 0.45, "learning_rate": 4.0583733768527826e-06, "loss": 0.2756, "step": 12150 }, { "epoch": 0.45, "learning_rate": 4.053369421092312e-06, "loss": 0.258, "step": 12180 }, { "epoch": 0.46, "learning_rate": 4.048383929367871e-06, "loss": 0.2311, "step": 12210 }, { "epoch": 0.46, "learning_rate": 4.043416788407701e-06, "loss": 0.2362, "step": 12240 }, { "epoch": 0.46, "learning_rate": 4.038467885910504e-06, "loss": 0.2256, "step": 12270 }, { "epoch": 0.46, "learning_rate": 4.03353711053479e-06, "loss": 0.2869, "step": 12300 }, { "epoch": 0.46, "learning_rate": 4.0286243518883425e-06, "loss": 0.2358, "step": 12330 }, { "epoch": 0.46, "learning_rate": 4.023729500517853e-06, "loss": 0.2447, "step": 12360 }, { "epoch": 0.46, "learning_rate": 4.018852447898667e-06, "loss": 0.2623, "step": 12390 }, { "epoch": 0.46, "learning_rate": 4.013993086424682e-06, "loss": 0.2221, "step": 12420 }, { "epoch": 0.46, "learning_rate": 4.009151309398378e-06, "loss": 0.2294, "step": 12450 }, { "epoch": 0.47, "learning_rate": 4.004327011020975e-06, "loss": 0.2331, "step": 12480 }, { "epoch": 0.47, "learning_rate": 3.999520086382724e-06, "loss": 0.2695, "step": 12510 }, { "epoch": 0.47, "learning_rate": 3.994730431453324e-06, "loss": 0.2675, "step": 12540 }, { "epoch": 0.47, "learning_rate": 3.989957943072466e-06, "loss": 0.2398, "step": 12570 }, { "epoch": 0.47, "learning_rate": 3.985202518940505e-06, "loss": 0.2182, "step": 12600 }, { "epoch": 0.47, "learning_rate": 3.980464057609243e-06, "loss": 0.2384, "step": 12630 }, { "epoch": 0.47, "learning_rate": 3.975742458472844e-06, "loss": 0.2626, "step": 12660 }, { "epoch": 0.47, "learning_rate": 3.971037621758861e-06, "loss": 0.2213, "step": 12690 }, { "epoch": 0.47, "learning_rate": 3.966349448519381e-06, "loss": 0.2146, "step": 12720 }, { "epoch": 0.48, "learning_rate": 3.961677840622287e-06, "loss": 0.2705, "step": 12750 }, { "epoch": 0.48, "learning_rate": 3.957022700742623e-06, "loss": 0.2623, "step": 12780 }, { "epoch": 0.48, "learning_rate": 3.952383932354089e-06, "loss": 0.2269, "step": 12810 }, { "epoch": 0.48, "learning_rate": 3.947761439720625e-06, "loss": 0.2925, "step": 12840 }, { "epoch": 0.48, "learning_rate": 3.94315512788812e-06, "loss": 0.1932, "step": 12870 }, { "epoch": 0.48, "learning_rate": 3.9385649026762125e-06, "loss": 0.2326, "step": 12900 }, { "epoch": 0.48, "learning_rate": 3.9339906706702065e-06, "loss": 0.205, "step": 12930 }, { "epoch": 0.48, "learning_rate": 3.929432339213089e-06, "loss": 0.2248, "step": 12960 }, { "epoch": 0.48, "learning_rate": 3.92488981639764e-06, "loss": 0.2375, "step": 12990 }, { "epoch": 0.49, "learning_rate": 3.920363011058658e-06, "loss": 0.2357, "step": 13020 }, { "epoch": 0.49, "learning_rate": 3.91585183276527e-06, "loss": 0.2548, "step": 13050 }, { "epoch": 0.49, "learning_rate": 3.911356191813345e-06, "loss": 0.2319, "step": 13080 }, { "epoch": 0.49, "learning_rate": 3.906875999218003e-06, "loss": 0.2322, "step": 13110 }, { "epoch": 0.49, "learning_rate": 3.902411166706214e-06, "loss": 0.2329, "step": 13140 }, { "epoch": 0.49, "learning_rate": 3.897961606709499e-06, "loss": 0.2551, "step": 13170 }, { "epoch": 0.49, "learning_rate": 3.893527232356712e-06, "loss": 0.2108, "step": 13200 }, { "epoch": 0.49, "learning_rate": 3.8891079574669165e-06, "loss": 0.2038, "step": 13230 }, { "epoch": 0.49, "learning_rate": 3.884703696542354e-06, "loss": 0.2323, "step": 13260 }, { "epoch": 0.5, "learning_rate": 3.8803143647614936e-06, "loss": 0.2142, "step": 13290 }, { "epoch": 0.5, "learning_rate": 3.87593987797217e-06, "loss": 0.2314, "step": 13320 }, { "epoch": 0.5, "learning_rate": 3.871580152684815e-06, "loss": 0.2061, "step": 13350 }, { "epoch": 0.5, "learning_rate": 3.867235106065752e-06, "loss": 0.2245, "step": 13380 }, { "epoch": 0.5, "learning_rate": 3.862904655930595e-06, "loss": 0.2586, "step": 13410 }, { "epoch": 0.5, "learning_rate": 3.858588720737721e-06, "loss": 0.2408, "step": 13440 }, { "epoch": 0.5, "learning_rate": 3.854287219581815e-06, "loss": 0.2, "step": 13470 }, { "epoch": 0.5, "learning_rate": 3.850000072187502e-06, "loss": 0.225, "step": 13500 }, { "epoch": 0.5, "learning_rate": 3.8457271989030575e-06, "loss": 0.2431, "step": 13530 }, { "epoch": 0.51, "learning_rate": 3.841468520694188e-06, "loss": 0.2341, "step": 13560 }, { "epoch": 0.51, "learning_rate": 3.837223959137896e-06, "loss": 0.2108, "step": 13590 }, { "epoch": 0.51, "learning_rate": 3.832993436416406e-06, "loss": 0.206, "step": 13620 }, { "epoch": 0.51, "learning_rate": 3.828776875311186e-06, "loss": 0.2474, "step": 13650 }, { "epoch": 0.51, "learning_rate": 3.824574199197011e-06, "loss": 0.2402, "step": 13680 }, { "epoch": 0.51, "learning_rate": 3.820524739262436e-06, "loss": 0.2061, "step": 13710 }, { "epoch": 0.51, "learning_rate": 3.8163491490251235e-06, "loss": 0.2226, "step": 13740 }, { "epoch": 0.51, "learning_rate": 3.8121872198917364e-06, "loss": 0.1776, "step": 13770 }, { "epoch": 0.51, "learning_rate": 3.8080388775336093e-06, "loss": 0.2507, "step": 13800 }, { "epoch": 0.52, "learning_rate": 3.8039040481870244e-06, "loss": 0.1796, "step": 13830 }, { "epoch": 0.52, "learning_rate": 3.7997826586477104e-06, "loss": 0.2571, "step": 13860 }, { "epoch": 0.52, "learning_rate": 3.795674636265392e-06, "loss": 0.2295, "step": 13890 }, { "epoch": 0.52, "learning_rate": 3.7915799089384153e-06, "loss": 0.2282, "step": 13920 }, { "epoch": 0.52, "learning_rate": 3.7874984051084297e-06, "loss": 0.2232, "step": 13950 }, { "epoch": 0.52, "learning_rate": 3.7834300537551376e-06, "loss": 0.2261, "step": 13980 }, { "epoch": 0.52, "learning_rate": 3.7793747843911037e-06, "loss": 0.2474, "step": 14010 }, { "epoch": 0.52, "learning_rate": 3.775332527056626e-06, "loss": 0.2066, "step": 14040 }, { "epoch": 0.52, "learning_rate": 3.7713032123146647e-06, "loss": 0.2008, "step": 14070 }, { "epoch": 0.53, "learning_rate": 3.7672867712458343e-06, "loss": 0.2688, "step": 14100 }, { "epoch": 0.53, "learning_rate": 3.763283135443452e-06, "loss": 0.2385, "step": 14130 }, { "epoch": 0.53, "learning_rate": 3.7592922370086455e-06, "loss": 0.24, "step": 14160 }, { "epoch": 0.53, "learning_rate": 3.7553140085455136e-06, "loss": 0.1846, "step": 14190 }, { "epoch": 0.53, "learning_rate": 3.7513483831563473e-06, "loss": 0.2544, "step": 14220 }, { "epoch": 0.53, "learning_rate": 3.7473952944369068e-06, "loss": 0.1966, "step": 14250 }, { "epoch": 0.53, "learning_rate": 3.7434546764717492e-06, "loss": 0.2499, "step": 14280 }, { "epoch": 0.53, "learning_rate": 3.7395264638296123e-06, "loss": 0.2198, "step": 14310 }, { "epoch": 0.53, "learning_rate": 3.735610591558852e-06, "loss": 0.227, "step": 14340 }, { "epoch": 0.54, "learning_rate": 3.7318369179611134e-06, "loss": 0.2223, "step": 14370 }, { "epoch": 0.54, "learning_rate": 3.7279451274374513e-06, "loss": 0.2398, "step": 14400 }, { "epoch": 0.54, "learning_rate": 3.7240654873736155e-06, "loss": 0.2662, "step": 14430 }, { "epoch": 0.54, "learning_rate": 3.720197934676515e-06, "loss": 0.2502, "step": 14460 }, { "epoch": 0.54, "learning_rate": 3.7163424067107786e-06, "loss": 0.2303, "step": 14490 }, { "epoch": 0.54, "learning_rate": 3.712498841294488e-06, "loss": 0.2081, "step": 14520 }, { "epoch": 0.54, "learning_rate": 3.7086671766949745e-06, "loss": 0.2286, "step": 14550 }, { "epoch": 0.54, "learning_rate": 3.7048473516246474e-06, "loss": 0.2246, "step": 14580 }, { "epoch": 0.54, "learning_rate": 3.7010393052368808e-06, "loss": 0.2142, "step": 14610 }, { "epoch": 0.55, "learning_rate": 3.697242977121942e-06, "loss": 0.2154, "step": 14640 }, { "epoch": 0.55, "learning_rate": 3.693458307302965e-06, "loss": 0.2299, "step": 14670 }, { "epoch": 0.55, "learning_rate": 3.6896852362319716e-06, "loss": 0.1961, "step": 14700 }, { "epoch": 0.55, "learning_rate": 3.685923704785937e-06, "loss": 0.2212, "step": 14730 }, { "epoch": 0.55, "learning_rate": 3.6821736542628983e-06, "loss": 0.2012, "step": 14760 }, { "epoch": 0.55, "learning_rate": 3.6784350263781066e-06, "loss": 0.1914, "step": 14790 }, { "epoch": 0.55, "learning_rate": 3.674707763260221e-06, "loss": 0.2305, "step": 14820 }, { "epoch": 0.55, "learning_rate": 3.670991807447548e-06, "loss": 0.2325, "step": 14850 }, { "epoch": 0.55, "learning_rate": 3.6672871018843207e-06, "loss": 0.222, "step": 14880 }, { "epoch": 0.56, "learning_rate": 3.663593589917019e-06, "loss": 0.2182, "step": 14910 }, { "epoch": 0.56, "learning_rate": 3.65991121529073e-06, "loss": 0.256, "step": 14940 }, { "epoch": 0.56, "learning_rate": 3.656239922145549e-06, "loss": 0.1909, "step": 14970 }, { "epoch": 0.56, "learning_rate": 3.652579655013025e-06, "loss": 0.1799, "step": 15000 }, { "epoch": 0.56, "learning_rate": 3.6489303588126325e-06, "loss": 0.2382, "step": 15030 }, { "epoch": 0.56, "learning_rate": 3.645291978848299e-06, "loss": 0.2181, "step": 15060 }, { "epoch": 0.56, "learning_rate": 3.641664460804956e-06, "loss": 0.2192, "step": 15090 }, { "epoch": 0.56, "learning_rate": 3.6380477507451357e-06, "loss": 0.2245, "step": 15120 }, { "epoch": 0.57, "learning_rate": 3.6344417951056025e-06, "loss": 0.2237, "step": 15150 }, { "epoch": 0.57, "learning_rate": 3.6308465406940214e-06, "loss": 0.2335, "step": 15180 }, { "epoch": 0.57, "learning_rate": 3.62726193468566e-06, "loss": 0.2129, "step": 15210 }, { "epoch": 0.57, "learning_rate": 3.623687924620131e-06, "loss": 0.1975, "step": 15240 }, { "epoch": 0.57, "learning_rate": 3.6201244583981697e-06, "loss": 0.2239, "step": 15270 }, { "epoch": 0.57, "learning_rate": 3.6165714842784393e-06, "loss": 0.2059, "step": 15300 }, { "epoch": 0.57, "learning_rate": 3.6130289508743783e-06, "loss": 0.22, "step": 15330 }, { "epoch": 0.57, "learning_rate": 3.609496807151077e-06, "loss": 0.2146, "step": 15360 }, { "epoch": 0.57, "learning_rate": 3.6059750024221956e-06, "loss": 0.2378, "step": 15390 }, { "epoch": 0.58, "learning_rate": 3.6024634863468995e-06, "loss": 0.2177, "step": 15420 }, { "epoch": 0.58, "learning_rate": 3.598962208926845e-06, "loss": 0.2528, "step": 15450 }, { "epoch": 0.58, "learning_rate": 3.5954711205031867e-06, "loss": 0.2179, "step": 15480 }, { "epoch": 0.58, "learning_rate": 3.5919901717536176e-06, "loss": 0.225, "step": 15510 }, { "epoch": 0.58, "learning_rate": 3.588519313689446e-06, "loss": 0.2327, "step": 15540 }, { "epoch": 0.58, "learning_rate": 3.585058497652696e-06, "loss": 0.2267, "step": 15570 }, { "epoch": 0.58, "learning_rate": 3.5816076753132444e-06, "loss": 0.1847, "step": 15600 }, { "epoch": 0.58, "learning_rate": 3.5781667986659885e-06, "loss": 0.218, "step": 15630 }, { "epoch": 0.58, "learning_rate": 3.574735820028035e-06, "loss": 0.2386, "step": 15660 }, { "epoch": 0.59, "learning_rate": 3.57131469203593e-06, "loss": 0.2043, "step": 15690 }, { "epoch": 0.59, "learning_rate": 3.5679033676429114e-06, "loss": 0.2702, "step": 15720 }, { "epoch": 0.59, "learning_rate": 3.5645018001161923e-06, "loss": 0.2302, "step": 15750 }, { "epoch": 0.59, "learning_rate": 3.5611099430342683e-06, "loss": 0.2406, "step": 15780 }, { "epoch": 0.59, "learning_rate": 3.5577277502842616e-06, "loss": 0.2107, "step": 15810 }, { "epoch": 0.59, "learning_rate": 3.5543551760592854e-06, "loss": 0.2309, "step": 15840 }, { "epoch": 0.59, "learning_rate": 3.5509921748558384e-06, "loss": 0.2256, "step": 15870 }, { "epoch": 0.59, "learning_rate": 3.547638701471227e-06, "loss": 0.2328, "step": 15900 }, { "epoch": 0.59, "learning_rate": 3.5442947110010123e-06, "loss": 0.2417, "step": 15930 }, { "epoch": 0.6, "learning_rate": 3.5409601588364854e-06, "loss": 0.197, "step": 15960 }, { "epoch": 0.6, "learning_rate": 3.5376350006621686e-06, "loss": 0.2551, "step": 15990 }, { "epoch": 0.6, "learning_rate": 3.53431919245334e-06, "loss": 0.1884, "step": 16020 }, { "epoch": 0.6, "learning_rate": 3.531012690473587e-06, "loss": 0.2096, "step": 16050 }, { "epoch": 0.6, "learning_rate": 3.5277154512723837e-06, "loss": 0.1941, "step": 16080 }, { "epoch": 0.6, "learning_rate": 3.5244274316826905e-06, "loss": 0.2387, "step": 16110 }, { "epoch": 0.6, "learning_rate": 3.5211485888185817e-06, "loss": 0.1942, "step": 16140 }, { "epoch": 0.6, "learning_rate": 3.5178788800728976e-06, "loss": 0.2117, "step": 16170 }, { "epoch": 0.6, "learning_rate": 3.5146182631149166e-06, "loss": 0.2085, "step": 16200 }, { "epoch": 0.61, "learning_rate": 3.511366695888056e-06, "loss": 0.2429, "step": 16230 }, { "epoch": 0.61, "learning_rate": 3.50812413660759e-06, "loss": 0.2289, "step": 16260 }, { "epoch": 0.61, "learning_rate": 3.504890543758399e-06, "loss": 0.2241, "step": 16290 }, { "epoch": 0.61, "learning_rate": 3.5016658760927338e-06, "loss": 0.195, "step": 16320 }, { "epoch": 0.61, "learning_rate": 3.4984500926280053e-06, "loss": 0.2107, "step": 16350 }, { "epoch": 0.61, "learning_rate": 3.4952431526446017e-06, "loss": 0.1986, "step": 16380 }, { "epoch": 0.61, "learning_rate": 3.492045015683717e-06, "loss": 0.2294, "step": 16410 }, { "epoch": 0.61, "learning_rate": 3.4888556415452106e-06, "loss": 0.2219, "step": 16440 }, { "epoch": 0.61, "learning_rate": 3.485674990285484e-06, "loss": 0.2011, "step": 16470 }, { "epoch": 0.62, "learning_rate": 3.48250302221538e-06, "loss": 0.2119, "step": 16500 }, { "epoch": 0.62, "learning_rate": 3.4793396978981016e-06, "loss": 0.2136, "step": 16530 }, { "epoch": 0.62, "learning_rate": 3.4761849781471552e-06, "loss": 0.1887, "step": 16560 }, { "epoch": 0.62, "learning_rate": 3.4730388240243084e-06, "loss": 0.2112, "step": 16590 }, { "epoch": 0.62, "learning_rate": 3.469901196837573e-06, "loss": 0.1988, "step": 16620 }, { "epoch": 0.62, "learning_rate": 3.4667720581392095e-06, "loss": 0.1944, "step": 16650 }, { "epoch": 0.62, "learning_rate": 3.4636513697237406e-06, "loss": 0.208, "step": 16680 }, { "epoch": 0.62, "learning_rate": 3.460539093626001e-06, "loss": 0.1934, "step": 16710 }, { "epoch": 0.62, "learning_rate": 3.45743519211919e-06, "loss": 0.175, "step": 16740 }, { "epoch": 0.63, "learning_rate": 3.4543396277129578e-06, "loss": 0.2059, "step": 16770 }, { "epoch": 0.63, "learning_rate": 3.451252363151495e-06, "loss": 0.2617, "step": 16800 }, { "epoch": 0.63, "learning_rate": 3.4481733614116573e-06, "loss": 0.1825, "step": 16830 }, { "epoch": 0.63, "learning_rate": 3.4451025857010983e-06, "loss": 0.2182, "step": 16860 }, { "epoch": 0.63, "learning_rate": 3.4420399994564192e-06, "loss": 0.1792, "step": 16890 }, { "epoch": 0.63, "learning_rate": 3.438985566341346e-06, "loss": 0.2609, "step": 16920 }, { "epoch": 0.63, "learning_rate": 3.4359392502449156e-06, "loss": 0.2008, "step": 16950 }, { "epoch": 0.63, "learning_rate": 3.4329010152796816e-06, "loss": 0.1738, "step": 16980 }, { "epoch": 0.63, "learning_rate": 3.429870825779942e-06, "loss": 0.2357, "step": 17010 }, { "epoch": 0.64, "learning_rate": 3.426848646299979e-06, "loss": 0.2309, "step": 17040 }, { "epoch": 0.64, "learning_rate": 3.423834441612316e-06, "loss": 0.2102, "step": 17070 }, { "epoch": 0.64, "learning_rate": 3.4208281767059957e-06, "loss": 0.1933, "step": 17100 }, { "epoch": 0.64, "learning_rate": 3.4178298167848716e-06, "loss": 0.1512, "step": 17130 }, { "epoch": 0.64, "learning_rate": 3.414839327265915e-06, "loss": 0.1976, "step": 17160 }, { "epoch": 0.64, "learning_rate": 3.4118566737775383e-06, "loss": 0.2193, "step": 17190 }, { "epoch": 0.64, "learning_rate": 3.4088818221579424e-06, "loss": 0.2124, "step": 17220 }, { "epoch": 0.64, "learning_rate": 3.4059147384534635e-06, "loss": 0.2116, "step": 17250 }, { "epoch": 0.64, "learning_rate": 3.4029553889169558e-06, "loss": 0.1893, "step": 17280 }, { "epoch": 0.65, "learning_rate": 3.4000037400061716e-06, "loss": 0.2162, "step": 17310 }, { "epoch": 0.65, "learning_rate": 3.3970597583821664e-06, "loss": 0.2022, "step": 17340 }, { "epoch": 0.65, "learning_rate": 3.394123410907721e-06, "loss": 0.2171, "step": 17370 }, { "epoch": 0.65, "learning_rate": 3.3911946646457707e-06, "loss": 0.2321, "step": 17400 }, { "epoch": 0.65, "learning_rate": 3.3882734868578534e-06, "loss": 0.2312, "step": 17430 }, { "epoch": 0.65, "learning_rate": 3.3853598450025764e-06, "loss": 0.1934, "step": 17460 }, { "epoch": 0.65, "learning_rate": 3.3824537067340902e-06, "loss": 0.177, "step": 17490 }, { "epoch": 0.65, "learning_rate": 3.379555039900584e-06, "loss": 0.193, "step": 17520 }, { "epoch": 0.65, "learning_rate": 3.3766638125427857e-06, "loss": 0.2091, "step": 17550 }, { "epoch": 0.66, "learning_rate": 3.373779992892488e-06, "loss": 0.2174, "step": 17580 }, { "epoch": 0.66, "learning_rate": 3.3709035493710807e-06, "loss": 0.2565, "step": 17610 }, { "epoch": 0.66, "learning_rate": 3.368034450588095e-06, "loss": 0.2153, "step": 17640 }, { "epoch": 0.66, "learning_rate": 3.3651726653397697e-06, "loss": 0.2146, "step": 17670 }, { "epoch": 0.66, "learning_rate": 3.3623181626076225e-06, "loss": 0.1969, "step": 17700 }, { "epoch": 0.66, "learning_rate": 3.359470911557039e-06, "loss": 0.2297, "step": 17730 }, { "epoch": 0.66, "learning_rate": 3.3566308815358723e-06, "loss": 0.2054, "step": 17760 }, { "epoch": 0.66, "learning_rate": 3.3537980420730624e-06, "loss": 0.2202, "step": 17790 }, { "epoch": 0.66, "learning_rate": 3.3509723628772543e-06, "loss": 0.2301, "step": 17820 }, { "epoch": 0.67, "learning_rate": 3.348153813835445e-06, "loss": 0.2316, "step": 17850 }, { "epoch": 0.67, "learning_rate": 3.3453423650116334e-06, "loss": 0.2068, "step": 17880 }, { "epoch": 0.67, "learning_rate": 3.342537986645483e-06, "loss": 0.2173, "step": 17910 }, { "epoch": 0.67, "learning_rate": 3.3397406491510005e-06, "loss": 0.1801, "step": 17940 }, { "epoch": 0.67, "learning_rate": 3.336950323115227e-06, "loss": 0.2013, "step": 17970 }, { "epoch": 0.67, "learning_rate": 3.3341669792969324e-06, "loss": 0.2041, "step": 18000 }, { "epoch": 0.67, "learning_rate": 3.3313905886253366e-06, "loss": 0.178, "step": 18030 }, { "epoch": 0.67, "learning_rate": 3.3286211221988295e-06, "loss": 0.1897, "step": 18060 }, { "epoch": 0.67, "learning_rate": 3.325858551283706e-06, "loss": 0.2258, "step": 18090 }, { "epoch": 0.68, "learning_rate": 3.3231028473129184e-06, "loss": 0.191, "step": 18120 }, { "epoch": 0.68, "learning_rate": 3.3203539818848333e-06, "loss": 0.1796, "step": 18150 }, { "epoch": 0.68, "learning_rate": 3.3176119267620007e-06, "loss": 0.2318, "step": 18180 }, { "epoch": 0.68, "learning_rate": 3.3148766538699385e-06, "loss": 0.1636, "step": 18210 }, { "epoch": 0.68, "learning_rate": 3.3121481352959235e-06, "loss": 0.1984, "step": 18240 }, { "epoch": 0.68, "learning_rate": 3.3094263432877955e-06, "loss": 0.1771, "step": 18270 }, { "epoch": 0.68, "learning_rate": 3.306711250252772e-06, "loss": 0.1933, "step": 18300 }, { "epoch": 0.68, "learning_rate": 3.304002828756273e-06, "loss": 0.1874, "step": 18330 }, { "epoch": 0.68, "learning_rate": 3.3013010515207573e-06, "loss": 0.2102, "step": 18360 }, { "epoch": 0.69, "learning_rate": 3.298605891424567e-06, "loss": 0.1955, "step": 18390 }, { "epoch": 0.69, "learning_rate": 3.2959173215007867e-06, "loss": 0.1924, "step": 18420 }, { "epoch": 0.69, "learning_rate": 3.293235314936108e-06, "loss": 0.1786, "step": 18450 }, { "epoch": 0.69, "learning_rate": 3.2905598450697064e-06, "loss": 0.2073, "step": 18480 }, { "epoch": 0.69, "learning_rate": 3.2878908853921286e-06, "loss": 0.1909, "step": 18510 }, { "epoch": 0.69, "learning_rate": 3.2852284095441923e-06, "loss": 0.1895, "step": 18540 }, { "epoch": 0.69, "learning_rate": 3.282572391315887e-06, "loss": 0.1972, "step": 18570 }, { "epoch": 0.69, "learning_rate": 3.279922804645294e-06, "loss": 0.1673, "step": 18600 }, { "epoch": 0.69, "learning_rate": 3.2772796236175144e-06, "loss": 0.225, "step": 18630 }, { "epoch": 0.7, "learning_rate": 3.274642822463597e-06, "loss": 0.2317, "step": 18660 }, { "epoch": 0.7, "learning_rate": 3.2720999550184424e-06, "loss": 0.2248, "step": 18690 }, { "epoch": 0.7, "learning_rate": 3.269475626333966e-06, "loss": 0.2053, "step": 18720 }, { "epoch": 0.7, "learning_rate": 3.266857601924006e-06, "loss": 0.2262, "step": 18750 }, { "epoch": 0.7, "learning_rate": 3.2642458565882563e-06, "loss": 0.2002, "step": 18780 }, { "epoch": 0.7, "learning_rate": 3.2616403652672114e-06, "loss": 0.1788, "step": 18810 }, { "epoch": 0.7, "learning_rate": 3.2590411030411596e-06, "loss": 0.1958, "step": 18840 }, { "epoch": 0.7, "learning_rate": 3.2564480451291814e-06, "loss": 0.1836, "step": 18870 }, { "epoch": 0.7, "learning_rate": 3.253861166888155e-06, "loss": 0.2255, "step": 18900 }, { "epoch": 0.71, "learning_rate": 3.2512804438117756e-06, "loss": 0.1828, "step": 18930 }, { "epoch": 0.71, "learning_rate": 3.24870585152958e-06, "loss": 0.1978, "step": 18960 }, { "epoch": 0.71, "learning_rate": 3.2461373658059773e-06, "loss": 0.1853, "step": 18990 }, { "epoch": 0.71, "learning_rate": 3.243574962539294e-06, "loss": 0.1664, "step": 19020 }, { "epoch": 0.71, "learning_rate": 3.241018617760822e-06, "loss": 0.1852, "step": 19050 }, { "epoch": 0.71, "learning_rate": 3.2384683076338774e-06, "loss": 0.193, "step": 19080 }, { "epoch": 0.71, "learning_rate": 3.235924008452868e-06, "loss": 0.2052, "step": 19110 }, { "epoch": 0.71, "learning_rate": 3.233385696642366e-06, "loss": 0.1822, "step": 19140 }, { "epoch": 0.71, "learning_rate": 3.230853348756193e-06, "loss": 0.2126, "step": 19170 }, { "epoch": 0.72, "learning_rate": 3.2283269414765067e-06, "loss": 0.1973, "step": 19200 }, { "epoch": 0.72, "learning_rate": 3.225806451612903e-06, "loss": 0.2046, "step": 19230 }, { "epoch": 0.72, "learning_rate": 3.2232918561015214e-06, "loss": 0.2206, "step": 19260 }, { "epoch": 0.72, "learning_rate": 3.2207831320041542e-06, "loss": 0.1786, "step": 19290 }, { "epoch": 0.72, "learning_rate": 3.2182802565073746e-06, "loss": 0.1932, "step": 19320 }, { "epoch": 0.72, "learning_rate": 3.215783206921658e-06, "loss": 0.1883, "step": 19350 }, { "epoch": 0.72, "learning_rate": 3.213291960680524e-06, "loss": 0.2078, "step": 19380 }, { "epoch": 0.72, "learning_rate": 3.210806495339678e-06, "loss": 0.1932, "step": 19410 }, { "epoch": 0.73, "learning_rate": 3.208326788576161e-06, "loss": 0.1897, "step": 19440 }, { "epoch": 0.73, "learning_rate": 3.205852818187507e-06, "loss": 0.1865, "step": 19470 }, { "epoch": 0.73, "learning_rate": 3.2033845620909114e-06, "loss": 0.1811, "step": 19500 }, { "epoch": 0.73, "learning_rate": 3.2009219983224e-06, "loss": 0.1725, "step": 19530 }, { "epoch": 0.73, "learning_rate": 3.1984651050360064e-06, "loss": 0.2187, "step": 19560 }, { "epoch": 0.73, "learning_rate": 3.1960138605029663e-06, "loss": 0.2321, "step": 19590 }, { "epoch": 0.73, "learning_rate": 3.1935682431108977e-06, "loss": 0.2007, "step": 19620 }, { "epoch": 0.73, "learning_rate": 3.1911282313630144e-06, "loss": 0.1824, "step": 19650 }, { "epoch": 0.73, "learning_rate": 3.18869380387732e-06, "loss": 0.2498, "step": 19680 }, { "epoch": 0.74, "learning_rate": 3.1862649393858314e-06, "loss": 0.2345, "step": 19710 }, { "epoch": 0.74, "learning_rate": 3.183841616733788e-06, "loss": 0.202, "step": 19740 }, { "epoch": 0.74, "learning_rate": 3.181423814878889e-06, "loss": 0.2071, "step": 19770 }, { "epoch": 0.74, "learning_rate": 3.1790115128905165e-06, "loss": 0.1641, "step": 19800 }, { "epoch": 0.74, "learning_rate": 3.17660468994898e-06, "loss": 0.2057, "step": 19830 }, { "epoch": 0.74, "learning_rate": 3.1742033253447586e-06, "loss": 0.1941, "step": 19860 }, { "epoch": 0.74, "learning_rate": 3.1718073984777564e-06, "loss": 0.2097, "step": 19890 }, { "epoch": 0.74, "learning_rate": 3.169416888856555e-06, "loss": 0.1986, "step": 19920 }, { "epoch": 0.74, "learning_rate": 3.167031776097682e-06, "loss": 0.1843, "step": 19950 }, { "epoch": 0.75, "learning_rate": 3.1646520399248797e-06, "loss": 0.2038, "step": 19980 }, { "epoch": 0.75, "learning_rate": 3.1622776601683796e-06, "loss": 0.1745, "step": 20010 }, { "epoch": 0.75, "learning_rate": 3.159908616764186e-06, "loss": 0.199, "step": 20040 }, { "epoch": 0.75, "learning_rate": 3.1575448897533633e-06, "loss": 0.1845, "step": 20070 }, { "epoch": 0.75, "learning_rate": 3.15518645928133e-06, "loss": 0.206, "step": 20100 }, { "epoch": 0.75, "learning_rate": 3.1528333055971584e-06, "loss": 0.2374, "step": 20130 }, { "epoch": 0.75, "learning_rate": 3.1504854090528795e-06, "loss": 0.2106, "step": 20160 }, { "epoch": 0.75, "learning_rate": 3.1481427501027932e-06, "loss": 0.1568, "step": 20190 }, { "epoch": 0.75, "learning_rate": 3.1458053093027873e-06, "loss": 0.1677, "step": 20220 }, { "epoch": 0.76, "learning_rate": 3.1434730673096573e-06, "loss": 0.187, "step": 20250 }, { "epoch": 0.76, "learning_rate": 3.141146004880436e-06, "loss": 0.1736, "step": 20280 }, { "epoch": 0.76, "learning_rate": 3.1388241028717226e-06, "loss": 0.2169, "step": 20310 }, { "epoch": 0.76, "learning_rate": 3.136507342239028e-06, "loss": 0.1903, "step": 20340 }, { "epoch": 0.76, "learning_rate": 3.1341957040361138e-06, "loss": 0.1915, "step": 20370 }, { "epoch": 0.76, "learning_rate": 3.131889169414341e-06, "loss": 0.1775, "step": 20400 }, { "epoch": 0.76, "learning_rate": 3.1295877196220293e-06, "loss": 0.2063, "step": 20430 }, { "epoch": 0.76, "learning_rate": 3.127291336003811e-06, "loss": 0.1973, "step": 20460 }, { "epoch": 0.76, "learning_rate": 3.125e-06, "loss": 0.1864, "step": 20490 }, { "epoch": 0.77, "learning_rate": 3.1227136931459613e-06, "loss": 0.1893, "step": 20520 }, { "epoch": 0.77, "learning_rate": 3.1204323970714836e-06, "loss": 0.23, "step": 20550 }, { "epoch": 0.77, "learning_rate": 3.118156093500161e-06, "loss": 0.2014, "step": 20580 }, { "epoch": 0.77, "learning_rate": 3.1158847642487794e-06, "loss": 0.1873, "step": 20610 }, { "epoch": 0.77, "learning_rate": 3.1136183912267038e-06, "loss": 0.2029, "step": 20640 }, { "epoch": 0.77, "learning_rate": 3.111356956435275e-06, "loss": 0.1946, "step": 20670 }, { "epoch": 0.77, "learning_rate": 3.1091004419672094e-06, "loss": 0.2022, "step": 20700 }, { "epoch": 0.77, "learning_rate": 3.1068488300060003e-06, "loss": 0.1916, "step": 20730 }, { "epoch": 0.77, "learning_rate": 3.1046021028253316e-06, "loss": 0.1954, "step": 20760 }, { "epoch": 0.78, "learning_rate": 3.1023602427884887e-06, "loss": 0.1604, "step": 20790 }, { "epoch": 0.78, "learning_rate": 3.1001232323477775e-06, "loss": 0.2243, "step": 20820 }, { "epoch": 0.78, "learning_rate": 3.0978910540439495e-06, "loss": 0.214, "step": 20850 }, { "epoch": 0.78, "learning_rate": 3.095663690505624e-06, "loss": 0.1776, "step": 20880 }, { "epoch": 0.78, "learning_rate": 3.09344112444873e-06, "loss": 0.203, "step": 20910 }, { "epoch": 0.78, "learning_rate": 3.0912233386759318e-06, "loss": 0.185, "step": 20940 }, { "epoch": 0.78, "learning_rate": 3.0890103160760777e-06, "loss": 0.1801, "step": 20970 }, { "epoch": 0.78, "learning_rate": 3.0868020396236425e-06, "loss": 0.1748, "step": 21000 }, { "epoch": 0.78, "learning_rate": 3.0845984923781787e-06, "loss": 0.2048, "step": 21030 }, { "epoch": 0.79, "learning_rate": 3.0823996574837696e-06, "loss": 0.1797, "step": 21060 }, { "epoch": 0.79, "learning_rate": 3.0802055181684876e-06, "loss": 0.1694, "step": 21090 }, { "epoch": 0.79, "learning_rate": 3.0780889645514934e-06, "loss": 0.1713, "step": 21120 }, { "epoch": 0.79, "learning_rate": 3.075904011269121e-06, "loss": 0.1855, "step": 21150 }, { "epoch": 0.79, "learning_rate": 3.0737237042981292e-06, "loss": 0.2117, "step": 21180 }, { "epoch": 0.79, "learning_rate": 3.0715480271945e-06, "loss": 0.1906, "step": 21210 }, { "epoch": 0.79, "learning_rate": 3.0693769635955745e-06, "loss": 0.248, "step": 21240 }, { "epoch": 0.79, "learning_rate": 3.067210497219538e-06, "loss": 0.1815, "step": 21270 }, { "epoch": 0.79, "learning_rate": 3.0650486118649127e-06, "loss": 0.1695, "step": 21300 }, { "epoch": 0.8, "learning_rate": 3.0628912914100383e-06, "loss": 0.1733, "step": 21330 }, { "epoch": 0.8, "learning_rate": 3.0607385198125768e-06, "loss": 0.187, "step": 21360 }, { "epoch": 0.8, "learning_rate": 3.0585902811090056e-06, "loss": 0.1775, "step": 21390 }, { "epoch": 0.8, "learning_rate": 3.056446559414125e-06, "loss": 0.2013, "step": 21420 }, { "epoch": 0.8, "learning_rate": 3.0543073389205587e-06, "loss": 0.1936, "step": 21450 }, { "epoch": 0.8, "learning_rate": 3.05217260389827e-06, "loss": 0.1781, "step": 21480 }, { "epoch": 0.8, "learning_rate": 3.0500423386940733e-06, "loss": 0.1818, "step": 21510 }, { "epoch": 0.8, "learning_rate": 3.0479165277311516e-06, "loss": 0.1745, "step": 21540 }, { "epoch": 0.8, "learning_rate": 3.0457951555085815e-06, "loss": 0.1871, "step": 21570 }, { "epoch": 0.81, "learning_rate": 3.0436782066008522e-06, "loss": 0.2433, "step": 21600 }, { "epoch": 0.81, "learning_rate": 3.0415656656574006e-06, "loss": 0.1986, "step": 21630 }, { "epoch": 0.81, "learning_rate": 3.0394575174021406e-06, "loss": 0.176, "step": 21660 }, { "epoch": 0.81, "learning_rate": 3.0373537466330005e-06, "loss": 0.2019, "step": 21690 }, { "epoch": 0.81, "learning_rate": 3.0352543382214616e-06, "loss": 0.2233, "step": 21720 }, { "epoch": 0.81, "learning_rate": 3.0332290426009857e-06, "loss": 0.2413, "step": 21750 }, { "epoch": 0.81, "learning_rate": 3.0311381696410036e-06, "loss": 0.1946, "step": 21780 }, { "epoch": 0.81, "learning_rate": 3.0290516145856953e-06, "loss": 0.1893, "step": 21810 }, { "epoch": 0.81, "learning_rate": 3.0269693625938358e-06, "loss": 0.2216, "step": 21840 }, { "epoch": 0.82, "learning_rate": 3.0248913988955188e-06, "loss": 0.1971, "step": 21870 }, { "epoch": 0.82, "learning_rate": 3.0228177087917153e-06, "loss": 0.1776, "step": 21900 }, { "epoch": 0.82, "learning_rate": 3.0207482776538377e-06, "loss": 0.1729, "step": 21930 }, { "epoch": 0.82, "learning_rate": 3.0186830909233056e-06, "loss": 0.2129, "step": 21960 }, { "epoch": 0.82, "learning_rate": 3.0166221341111156e-06, "loss": 0.1983, "step": 21990 }, { "epoch": 0.82, "learning_rate": 3.014565392797416e-06, "loss": 0.1759, "step": 22020 }, { "epoch": 0.82, "learning_rate": 3.01251285263108e-06, "loss": 0.1762, "step": 22050 }, { "epoch": 0.82, "learning_rate": 3.01046449932929e-06, "loss": 0.1912, "step": 22080 }, { "epoch": 0.82, "learning_rate": 3.008420318677114e-06, "loss": 0.1635, "step": 22110 }, { "epoch": 0.83, "learning_rate": 3.006380296527096e-06, "loss": 0.1975, "step": 22140 }, { "epoch": 0.83, "learning_rate": 3.004344418798843e-06, "loss": 0.1715, "step": 22170 }, { "epoch": 0.83, "learning_rate": 3.0023126714786185e-06, "loss": 0.172, "step": 22200 }, { "epoch": 0.83, "learning_rate": 3.0002850406189315e-06, "loss": 0.1759, "step": 22230 }, { "epoch": 0.83, "learning_rate": 2.9982615123381435e-06, "loss": 0.1836, "step": 22260 }, { "epoch": 0.83, "learning_rate": 2.9962420728200638e-06, "loss": 0.1822, "step": 22290 }, { "epoch": 0.83, "learning_rate": 2.99422670831355e-06, "loss": 0.1924, "step": 22320 }, { "epoch": 0.83, "learning_rate": 2.992215405132124e-06, "loss": 0.1958, "step": 22350 }, { "epoch": 0.83, "learning_rate": 2.990208149653574e-06, "loss": 0.1966, "step": 22380 }, { "epoch": 0.84, "learning_rate": 2.988204928319569e-06, "loss": 0.1612, "step": 22410 }, { "epoch": 0.84, "learning_rate": 2.9862057276352764e-06, "loss": 0.1816, "step": 22440 }, { "epoch": 0.84, "learning_rate": 2.984210534168976e-06, "loss": 0.2078, "step": 22470 }, { "epoch": 0.84, "learning_rate": 2.982219334551684e-06, "loss": 0.1877, "step": 22500 }, { "epoch": 0.84, "learning_rate": 2.9802321154767783e-06, "loss": 0.1862, "step": 22530 }, { "epoch": 0.84, "learning_rate": 2.978248863699621e-06, "loss": 0.2263, "step": 22560 }, { "epoch": 0.84, "learning_rate": 2.9762695660371887e-06, "loss": 0.1911, "step": 22590 }, { "epoch": 0.84, "learning_rate": 2.9742942093677073e-06, "loss": 0.1784, "step": 22620 }, { "epoch": 0.84, "learning_rate": 2.9723227806302822e-06, "loss": 0.2086, "step": 22650 }, { "epoch": 0.85, "learning_rate": 2.9703552668245405e-06, "loss": 0.2191, "step": 22680 }, { "epoch": 0.85, "learning_rate": 2.9683916550102658e-06, "loss": 0.1633, "step": 22710 }, { "epoch": 0.85, "learning_rate": 2.9664319323070455e-06, "loss": 0.181, "step": 22740 }, { "epoch": 0.85, "learning_rate": 2.964476085893913e-06, "loss": 0.2081, "step": 22770 }, { "epoch": 0.85, "learning_rate": 2.9625241030089954e-06, "loss": 0.1903, "step": 22800 }, { "epoch": 0.85, "learning_rate": 2.9605759709491647e-06, "loss": 0.1801, "step": 22830 }, { "epoch": 0.85, "learning_rate": 2.9586316770696934e-06, "loss": 0.223, "step": 22860 }, { "epoch": 0.85, "learning_rate": 2.9566912087839018e-06, "loss": 0.1891, "step": 22890 }, { "epoch": 0.85, "learning_rate": 2.954754553562824e-06, "loss": 0.1841, "step": 22920 }, { "epoch": 0.86, "learning_rate": 2.9528216989348657e-06, "loss": 0.2035, "step": 22950 }, { "epoch": 0.86, "learning_rate": 2.9508926324854625e-06, "loss": 0.1673, "step": 22980 }, { "epoch": 0.86, "learning_rate": 2.9489673418567527e-06, "loss": 0.1781, "step": 23010 }, { "epoch": 0.86, "learning_rate": 2.947045814747238e-06, "loss": 0.1882, "step": 23040 }, { "epoch": 0.86, "learning_rate": 2.9451280389114573e-06, "loss": 0.1896, "step": 23070 }, { "epoch": 0.86, "learning_rate": 2.9432140021596566e-06, "loss": 0.1775, "step": 23100 }, { "epoch": 0.86, "learning_rate": 2.941303692357466e-06, "loss": 0.1693, "step": 23130 }, { "epoch": 0.86, "learning_rate": 2.9393970974255757e-06, "loss": 0.2118, "step": 23160 }, { "epoch": 0.86, "learning_rate": 2.937494205339412e-06, "loss": 0.1826, "step": 23190 }, { "epoch": 0.87, "learning_rate": 2.9355950041288252e-06, "loss": 0.1622, "step": 23220 }, { "epoch": 0.87, "learning_rate": 2.9336994818777668e-06, "loss": 0.1661, "step": 23250 }, { "epoch": 0.87, "learning_rate": 2.93180762672398e-06, "loss": 0.1776, "step": 23280 }, { "epoch": 0.87, "learning_rate": 2.9299194268586878e-06, "loss": 0.1617, "step": 23310 }, { "epoch": 0.87, "learning_rate": 2.9280348705262775e-06, "loss": 0.165, "step": 23340 }, { "epoch": 0.87, "learning_rate": 2.926153946024002e-06, "loss": 0.1841, "step": 23370 }, { "epoch": 0.87, "learning_rate": 2.9242766417016677e-06, "loss": 0.1686, "step": 23400 }, { "epoch": 0.87, "learning_rate": 2.922402945961337e-06, "loss": 0.179, "step": 23430 }, { "epoch": 0.87, "learning_rate": 2.9205328472570204e-06, "loss": 0.1653, "step": 23460 }, { "epoch": 0.88, "learning_rate": 2.9186663340943855e-06, "loss": 0.212, "step": 23490 }, { "epoch": 0.88, "learning_rate": 2.9168033950304554e-06, "loss": 0.1896, "step": 23520 }, { "epoch": 0.88, "learning_rate": 2.9149440186733125e-06, "loss": 0.1886, "step": 23550 }, { "epoch": 0.88, "learning_rate": 2.9130881936818123e-06, "loss": 0.1959, "step": 23580 }, { "epoch": 0.88, "learning_rate": 2.9112359087652875e-06, "loss": 0.211, "step": 23610 }, { "epoch": 0.88, "learning_rate": 2.909387152683261e-06, "loss": 0.1904, "step": 23640 }, { "epoch": 0.88, "learning_rate": 2.9075419142451583e-06, "loss": 0.1705, "step": 23670 }, { "epoch": 0.88, "learning_rate": 2.9057001823100285e-06, "loss": 0.1779, "step": 23700 }, { "epoch": 0.89, "learning_rate": 2.903861945786251e-06, "loss": 0.1967, "step": 23730 }, { "epoch": 0.89, "learning_rate": 2.9020271936312684e-06, "loss": 0.1746, "step": 23760 }, { "epoch": 0.89, "learning_rate": 2.9001959148512965e-06, "loss": 0.2198, "step": 23790 }, { "epoch": 0.89, "learning_rate": 2.898368098501052e-06, "loss": 0.1712, "step": 23820 }, { "epoch": 0.89, "learning_rate": 2.89654373368348e-06, "loss": 0.218, "step": 23850 }, { "epoch": 0.89, "learning_rate": 2.894722809549479e-06, "loss": 0.1932, "step": 23880 }, { "epoch": 0.89, "learning_rate": 2.8929053152976272e-06, "loss": 0.1703, "step": 23910 }, { "epoch": 0.89, "learning_rate": 2.8910912401739185e-06, "loss": 0.2124, "step": 23940 }, { "epoch": 0.89, "learning_rate": 2.8892805734714926e-06, "loss": 0.2129, "step": 23970 }, { "epoch": 0.9, "learning_rate": 2.8874733045303684e-06, "loss": 0.1994, "step": 24000 }, { "epoch": 0.9, "learning_rate": 2.885669422737183e-06, "loss": 0.1716, "step": 24030 }, { "epoch": 0.9, "learning_rate": 2.8838689175249286e-06, "loss": 0.2043, "step": 24060 }, { "epoch": 0.9, "learning_rate": 2.8820717783726926e-06, "loss": 0.1575, "step": 24090 }, { "epoch": 0.9, "learning_rate": 2.8802779948053998e-06, "loss": 0.1846, "step": 24120 }, { "epoch": 0.9, "learning_rate": 2.878487556393554e-06, "loss": 0.2028, "step": 24150 }, { "epoch": 0.9, "learning_rate": 2.8767004527529884e-06, "loss": 0.1681, "step": 24180 }, { "epoch": 0.9, "learning_rate": 2.8749166735446054e-06, "loss": 0.2006, "step": 24210 }, { "epoch": 0.9, "learning_rate": 2.873136208474131e-06, "loss": 0.1828, "step": 24240 }, { "epoch": 0.91, "learning_rate": 2.8713590472918633e-06, "loss": 0.1757, "step": 24270 }, { "epoch": 0.91, "learning_rate": 2.8695851797924253e-06, "loss": 0.1763, "step": 24300 }, { "epoch": 0.91, "learning_rate": 2.8678145958145158e-06, "loss": 0.2044, "step": 24330 }, { "epoch": 0.91, "learning_rate": 2.8660472852406714e-06, "loss": 0.2031, "step": 24360 }, { "epoch": 0.91, "learning_rate": 2.8642832379970157e-06, "loss": 0.2166, "step": 24390 }, { "epoch": 0.91, "learning_rate": 2.862522444053026e-06, "loss": 0.1725, "step": 24420 }, { "epoch": 0.91, "learning_rate": 2.860764893421287e-06, "loss": 0.1614, "step": 24450 }, { "epoch": 0.91, "learning_rate": 2.859010576157256e-06, "loss": 0.1866, "step": 24480 }, { "epoch": 0.91, "learning_rate": 2.857259482359027e-06, "loss": 0.1888, "step": 24510 }, { "epoch": 0.92, "learning_rate": 2.855511602167096e-06, "loss": 0.1799, "step": 24540 }, { "epoch": 0.92, "learning_rate": 2.8537669257641252e-06, "loss": 0.1852, "step": 24570 }, { "epoch": 0.92, "learning_rate": 2.8520254433747156e-06, "loss": 0.1932, "step": 24600 }, { "epoch": 0.92, "learning_rate": 2.8502871452651733e-06, "loss": 0.1782, "step": 24630 }, { "epoch": 0.92, "learning_rate": 2.8485520217432816e-06, "loss": 0.1932, "step": 24660 }, { "epoch": 0.92, "learning_rate": 2.8468200631580763e-06, "loss": 0.1474, "step": 24690 }, { "epoch": 0.92, "learning_rate": 2.8450912598996183e-06, "loss": 0.1396, "step": 24720 }, { "epoch": 0.92, "learning_rate": 2.8433656023987705e-06, "loss": 0.1527, "step": 24750 }, { "epoch": 0.92, "learning_rate": 2.841643081126973e-06, "loss": 0.2015, "step": 24780 }, { "epoch": 0.93, "learning_rate": 2.8399236865960273e-06, "loss": 0.1824, "step": 24810 }, { "epoch": 0.93, "learning_rate": 2.8382074093578704e-06, "loss": 0.1834, "step": 24840 }, { "epoch": 0.93, "learning_rate": 2.836494240004362e-06, "loss": 0.1714, "step": 24870 }, { "epoch": 0.93, "learning_rate": 2.8347841691670646e-06, "loss": 0.1673, "step": 24900 }, { "epoch": 0.93, "learning_rate": 2.833077187517031e-06, "loss": 0.1669, "step": 24930 }, { "epoch": 0.93, "learning_rate": 2.8313732857645867e-06, "loss": 0.2079, "step": 24960 }, { "epoch": 0.93, "learning_rate": 2.829672454659122e-06, "loss": 0.1795, "step": 24990 }, { "epoch": 0.93, "learning_rate": 2.827974684988878e-06, "loss": 0.1923, "step": 25020 }, { "epoch": 0.93, "learning_rate": 2.826279967580738e-06, "loss": 0.1613, "step": 25050 }, { "epoch": 0.94, "learning_rate": 2.8245882933000197e-06, "loss": 0.2025, "step": 25080 }, { "epoch": 0.94, "learning_rate": 2.8228996530502693e-06, "loss": 0.2004, "step": 25110 }, { "epoch": 0.94, "learning_rate": 2.8212140377730524e-06, "loss": 0.1627, "step": 25140 }, { "epoch": 0.94, "learning_rate": 2.819531438447754e-06, "loss": 0.1889, "step": 25170 }, { "epoch": 0.94, "learning_rate": 2.8178518460913747e-06, "loss": 0.1689, "step": 25200 }, { "epoch": 0.94, "learning_rate": 2.8161752517583257e-06, "loss": 0.1703, "step": 25230 }, { "epoch": 0.94, "learning_rate": 2.8145016465402352e-06, "loss": 0.1798, "step": 25260 }, { "epoch": 0.94, "learning_rate": 2.812831021565744e-06, "loss": 0.2057, "step": 25290 }, { "epoch": 0.94, "learning_rate": 2.811163368000311e-06, "loss": 0.1988, "step": 25320 }, { "epoch": 0.95, "learning_rate": 2.809498677046015e-06, "loss": 0.1747, "step": 25350 }, { "epoch": 0.95, "learning_rate": 2.807836939941363e-06, "loss": 0.1707, "step": 25380 }, { "epoch": 0.95, "learning_rate": 2.806178147961091e-06, "loss": 0.1702, "step": 25410 }, { "epoch": 0.95, "learning_rate": 2.8045222924159783e-06, "loss": 0.1531, "step": 25440 }, { "epoch": 0.95, "learning_rate": 2.8028693646526505e-06, "loss": 0.1773, "step": 25470 }, { "epoch": 0.95, "learning_rate": 2.8012193560533934e-06, "loss": 0.2086, "step": 25500 }, { "epoch": 0.95, "learning_rate": 2.799572258035962e-06, "loss": 0.1826, "step": 25530 }, { "epoch": 0.95, "learning_rate": 2.797928062053395e-06, "loss": 0.1524, "step": 25560 }, { "epoch": 0.95, "learning_rate": 2.7962867595938266e-06, "loss": 0.1954, "step": 25590 }, { "epoch": 0.96, "learning_rate": 2.794648342180302e-06, "loss": 0.1594, "step": 25620 }, { "epoch": 0.96, "learning_rate": 2.7930128013705946e-06, "loss": 0.1492, "step": 25650 }, { "epoch": 0.96, "learning_rate": 2.791380128757023e-06, "loss": 0.1868, "step": 25680 }, { "epoch": 0.96, "learning_rate": 2.789750315966267e-06, "loss": 0.1852, "step": 25710 }, { "epoch": 0.96, "learning_rate": 2.7881233546591905e-06, "loss": 0.1662, "step": 25740 }, { "epoch": 0.96, "learning_rate": 2.786499236530662e-06, "loss": 0.188, "step": 25770 }, { "epoch": 0.96, "learning_rate": 2.7848779533093735e-06, "loss": 0.1579, "step": 25800 }, { "epoch": 0.96, "learning_rate": 2.783259496757666e-06, "loss": 0.1935, "step": 25830 }, { "epoch": 0.96, "learning_rate": 2.7816438586713548e-06, "loss": 0.1776, "step": 25860 }, { "epoch": 0.97, "learning_rate": 2.7800310308795516e-06, "loss": 0.182, "step": 25890 }, { "epoch": 0.97, "learning_rate": 2.7784210052444933e-06, "loss": 0.1845, "step": 25920 }, { "epoch": 0.97, "learning_rate": 2.77681377366137e-06, "loss": 0.1834, "step": 25950 }, { "epoch": 0.97, "learning_rate": 2.775209328058151e-06, "loss": 0.1745, "step": 25980 }, { "epoch": 0.97, "learning_rate": 2.7736076603954173e-06, "loss": 0.1624, "step": 26010 }, { "epoch": 0.97, "learning_rate": 2.772062014714772e-06, "loss": 0.1656, "step": 26040 }, { "epoch": 0.97, "learning_rate": 2.7704657870071948e-06, "loss": 0.1633, "step": 26070 }, { "epoch": 0.97, "learning_rate": 2.7689253850700936e-06, "loss": 0.1738, "step": 26100 }, { "epoch": 0.97, "learning_rate": 2.7673345665931835e-06, "loss": 0.1711, "step": 26130 }, { "epoch": 0.98, "learning_rate": 2.7657464868672066e-06, "loss": 0.173, "step": 26160 }, { "epoch": 0.98, "learning_rate": 2.764161138042791e-06, "loss": 0.1626, "step": 26190 }, { "epoch": 0.98, "learning_rate": 2.762578512302018e-06, "loss": 0.182, "step": 26220 }, { "epoch": 0.98, "learning_rate": 2.760998601858272e-06, "loss": 0.1748, "step": 26250 }, { "epoch": 0.98, "learning_rate": 2.7594213989560703e-06, "loss": 0.1944, "step": 26280 }, { "epoch": 0.98, "learning_rate": 2.7578468958709083e-06, "loss": 0.1825, "step": 26310 }, { "epoch": 0.98, "learning_rate": 2.7562750849090968e-06, "loss": 0.1798, "step": 26340 }, { "epoch": 0.98, "learning_rate": 2.7547059584076095e-06, "loss": 0.1606, "step": 26370 }, { "epoch": 0.98, "learning_rate": 2.753139508733921e-06, "loss": 0.1856, "step": 26400 }, { "epoch": 0.99, "learning_rate": 2.7515757282858534e-06, "loss": 0.1706, "step": 26430 }, { "epoch": 0.99, "learning_rate": 2.75001460949142e-06, "loss": 0.1915, "step": 26460 }, { "epoch": 0.99, "learning_rate": 2.7484561448086723e-06, "loss": 0.195, "step": 26490 }, { "epoch": 0.99, "learning_rate": 2.746900326725547e-06, "loss": 0.1873, "step": 26520 }, { "epoch": 0.99, "learning_rate": 2.7453471477597134e-06, "loss": 0.1907, "step": 26550 }, { "epoch": 0.99, "learning_rate": 2.743796600458422e-06, "loss": 0.1844, "step": 26580 }, { "epoch": 0.99, "learning_rate": 2.742248677398357e-06, "loss": 0.1556, "step": 26610 }, { "epoch": 0.99, "learning_rate": 2.7407033711854815e-06, "loss": 0.1819, "step": 26640 }, { "epoch": 0.99, "learning_rate": 2.739160674454895e-06, "loss": 0.1591, "step": 26670 }, { "epoch": 1.0, "learning_rate": 2.7376205798706813e-06, "loss": 0.1474, "step": 26700 }, { "epoch": 1.0, "learning_rate": 2.7360830801257666e-06, "loss": 0.1874, "step": 26730 }, { "epoch": 1.0, "learning_rate": 2.73454816794177e-06, "loss": 0.1631, "step": 26760 }, { "epoch": 1.0, "learning_rate": 2.7330158360688597e-06, "loss": 0.1639, "step": 26790 }, { "epoch": 1.0, "learning_rate": 2.7314860772856113e-06, "loss": 0.2042, "step": 26820 }, { "epoch": 1.0, "learning_rate": 2.729958884398861e-06, "loss": 0.1806, "step": 26850 }, { "epoch": 1.0, "learning_rate": 2.7284342502435685e-06, "loss": 0.183, "step": 26880 }, { "epoch": 1.0, "learning_rate": 2.7269121676826703e-06, "loss": 0.219, "step": 26910 }, { "epoch": 1.0, "learning_rate": 2.7253926296069453e-06, "loss": 0.1616, "step": 26940 }, { "epoch": 1.01, "learning_rate": 2.723875628934869e-06, "loss": 0.1491, "step": 26970 }, { "epoch": 1.01, "learning_rate": 2.7223611586124805e-06, "loss": 0.2203, "step": 27000 }, { "epoch": 1.01, "learning_rate": 2.7208492116132414e-06, "loss": 0.1466, "step": 27030 }, { "epoch": 1.01, "learning_rate": 2.719339780937899e-06, "loss": 0.1725, "step": 27060 }, { "epoch": 1.01, "learning_rate": 2.717832859614352e-06, "loss": 0.1656, "step": 27090 }, { "epoch": 1.01, "learning_rate": 2.716328440697516e-06, "loss": 0.198, "step": 27120 }, { "epoch": 1.01, "learning_rate": 2.714826517269184e-06, "loss": 0.1988, "step": 27150 }, { "epoch": 1.01, "learning_rate": 2.7133270824378975e-06, "loss": 0.1754, "step": 27180 }, { "epoch": 1.01, "learning_rate": 2.711830129338813e-06, "loss": 0.1788, "step": 27210 }, { "epoch": 1.02, "learning_rate": 2.7103356511335694e-06, "loss": 0.1858, "step": 27240 }, { "epoch": 1.02, "learning_rate": 2.7088436410101547e-06, "loss": 0.1464, "step": 27270 }, { "epoch": 1.02, "learning_rate": 2.70735409218278e-06, "loss": 0.1739, "step": 27300 }, { "epoch": 1.02, "learning_rate": 2.7058669978917464e-06, "loss": 0.1741, "step": 27330 }, { "epoch": 1.02, "learning_rate": 2.7043823514033168e-06, "loss": 0.1454, "step": 27360 }, { "epoch": 1.02, "learning_rate": 2.7029001460095887e-06, "loss": 0.1555, "step": 27390 }, { "epoch": 1.02, "learning_rate": 2.701420375028368e-06, "loss": 0.1698, "step": 27420 }, { "epoch": 1.02, "learning_rate": 2.6999430318030373e-06, "loss": 0.1675, "step": 27450 }, { "epoch": 1.02, "learning_rate": 2.698468109702438e-06, "loss": 0.1998, "step": 27480 }, { "epoch": 1.03, "learning_rate": 2.696995602120738e-06, "loss": 0.2141, "step": 27510 }, { "epoch": 1.03, "learning_rate": 2.695525502477311e-06, "loss": 0.1873, "step": 27540 }, { "epoch": 1.03, "learning_rate": 2.6940578042166143e-06, "loss": 0.1848, "step": 27570 }, { "epoch": 1.03, "learning_rate": 2.692592500808063e-06, "loss": 0.1503, "step": 27600 }, { "epoch": 1.03, "learning_rate": 2.691129585745908e-06, "loss": 0.1921, "step": 27630 }, { "epoch": 1.03, "learning_rate": 2.689669052549118e-06, "loss": 0.1928, "step": 27660 }, { "epoch": 1.03, "learning_rate": 2.6882108947612555e-06, "loss": 0.1454, "step": 27690 }, { "epoch": 1.03, "learning_rate": 2.6867551059503586e-06, "loss": 0.1938, "step": 27720 }, { "epoch": 1.03, "learning_rate": 2.685301679708821e-06, "loss": 0.1702, "step": 27750 }, { "epoch": 1.04, "learning_rate": 2.6838506096532747e-06, "loss": 0.161, "step": 27780 }, { "epoch": 1.04, "learning_rate": 2.682401889424471e-06, "loss": 0.1699, "step": 27810 }, { "epoch": 1.04, "learning_rate": 2.6809555126871633e-06, "loss": 0.1529, "step": 27840 }, { "epoch": 1.04, "learning_rate": 2.6795114731299932e-06, "loss": 0.1697, "step": 27870 }, { "epoch": 1.04, "learning_rate": 2.6780697644653724e-06, "loss": 0.1936, "step": 27900 }, { "epoch": 1.04, "learning_rate": 2.676630380429367e-06, "loss": 0.1625, "step": 27930 }, { "epoch": 1.04, "learning_rate": 2.675193314781585e-06, "loss": 0.1762, "step": 27960 }, { "epoch": 1.04, "learning_rate": 2.6737585613050653e-06, "loss": 0.173, "step": 27990 }, { "epoch": 1.05, "learning_rate": 2.672326113806157e-06, "loss": 0.181, "step": 28020 }, { "epoch": 1.05, "learning_rate": 2.670895966114414e-06, "loss": 0.1551, "step": 28050 }, { "epoch": 1.05, "learning_rate": 2.6694681120824823e-06, "loss": 0.1743, "step": 28080 }, { "epoch": 1.05, "learning_rate": 2.668042545585986e-06, "loss": 0.1613, "step": 28110 }, { "epoch": 1.05, "learning_rate": 2.6666192605234194e-06, "loss": 0.1659, "step": 28140 }, { "epoch": 1.05, "learning_rate": 2.665198250816037e-06, "loss": 0.1945, "step": 28170 }, { "epoch": 1.05, "learning_rate": 2.663779510407744e-06, "loss": 0.1741, "step": 28200 }, { "epoch": 1.05, "learning_rate": 2.662363033264988e-06, "loss": 0.1546, "step": 28230 }, { "epoch": 1.05, "learning_rate": 2.6609488133766516e-06, "loss": 0.1615, "step": 28260 }, { "epoch": 1.06, "learning_rate": 2.659536844753944e-06, "loss": 0.1553, "step": 28290 }, { "epoch": 1.06, "learning_rate": 2.6581271214302974e-06, "loss": 0.1362, "step": 28320 }, { "epoch": 1.06, "learning_rate": 2.656719637461258e-06, "loss": 0.1575, "step": 28350 }, { "epoch": 1.06, "learning_rate": 2.6553143869243816e-06, "loss": 0.1635, "step": 28380 }, { "epoch": 1.06, "learning_rate": 2.653911363919129e-06, "loss": 0.1524, "step": 28410 }, { "epoch": 1.06, "learning_rate": 2.652510562566764e-06, "loss": 0.1629, "step": 28440 }, { "epoch": 1.06, "learning_rate": 2.651111977010246e-06, "loss": 0.1578, "step": 28470 }, { "epoch": 1.06, "learning_rate": 2.6497156014141305e-06, "loss": 0.1817, "step": 28500 }, { "epoch": 1.06, "learning_rate": 2.6483214299644663e-06, "loss": 0.1375, "step": 28530 }, { "epoch": 1.07, "learning_rate": 2.6469294568686905e-06, "loss": 0.1616, "step": 28560 }, { "epoch": 1.07, "learning_rate": 2.6455396763555318e-06, "loss": 0.1864, "step": 28590 }, { "epoch": 1.07, "learning_rate": 2.6441520826749096e-06, "loss": 0.1538, "step": 28620 }, { "epoch": 1.07, "learning_rate": 2.6427666700978293e-06, "loss": 0.1763, "step": 28650 }, { "epoch": 1.07, "learning_rate": 2.64138343291629e-06, "loss": 0.171, "step": 28680 }, { "epoch": 1.07, "learning_rate": 2.6400023654431796e-06, "loss": 0.1688, "step": 28710 }, { "epoch": 1.07, "learning_rate": 2.6386234620121797e-06, "loss": 0.1436, "step": 28740 }, { "epoch": 1.07, "learning_rate": 2.6372467169776683e-06, "loss": 0.1494, "step": 28770 }, { "epoch": 1.07, "learning_rate": 2.6358721247146202e-06, "loss": 0.1806, "step": 28800 }, { "epoch": 1.08, "learning_rate": 2.634499679618514e-06, "loss": 0.1994, "step": 28830 }, { "epoch": 1.08, "learning_rate": 2.6331293761052316e-06, "loss": 0.1753, "step": 28860 }, { "epoch": 1.08, "learning_rate": 2.631761208610969e-06, "loss": 0.1863, "step": 28890 }, { "epoch": 1.08, "learning_rate": 2.630395171592134e-06, "loss": 0.1742, "step": 28920 }, { "epoch": 1.08, "learning_rate": 2.6290312595252576e-06, "loss": 0.1663, "step": 28950 }, { "epoch": 1.08, "learning_rate": 2.6276694669068985e-06, "loss": 0.1601, "step": 28980 }, { "epoch": 1.08, "learning_rate": 2.6263097882535484e-06, "loss": 0.1424, "step": 29010 }, { "epoch": 1.08, "learning_rate": 2.6249522181015406e-06, "loss": 0.1756, "step": 29040 }, { "epoch": 1.08, "learning_rate": 2.623596751006959e-06, "loss": 0.1582, "step": 29070 }, { "epoch": 1.09, "learning_rate": 2.6222433815455445e-06, "loss": 0.1803, "step": 29100 }, { "epoch": 1.09, "learning_rate": 2.6208921043126025e-06, "loss": 0.1791, "step": 29130 }, { "epoch": 1.09, "learning_rate": 2.6195429139229154e-06, "loss": 0.1579, "step": 29160 }, { "epoch": 1.09, "learning_rate": 2.618195805010653e-06, "loss": 0.1633, "step": 29190 }, { "epoch": 1.09, "learning_rate": 2.616850772229277e-06, "loss": 0.1399, "step": 29220 }, { "epoch": 1.09, "learning_rate": 2.6155078102514603e-06, "loss": 0.1731, "step": 29250 }, { "epoch": 1.09, "learning_rate": 2.6141669137689908e-06, "loss": 0.1989, "step": 29280 }, { "epoch": 1.09, "learning_rate": 2.612828077492688e-06, "loss": 0.1508, "step": 29310 }, { "epoch": 1.09, "learning_rate": 2.611491296152313e-06, "loss": 0.1514, "step": 29340 }, { "epoch": 1.1, "learning_rate": 2.6101565644964837e-06, "loss": 0.1835, "step": 29370 }, { "epoch": 1.1, "learning_rate": 2.6088238772925843e-06, "loss": 0.1823, "step": 29400 }, { "epoch": 1.1, "learning_rate": 2.607493229326684e-06, "loss": 0.1761, "step": 29430 }, { "epoch": 1.1, "learning_rate": 2.606164615403447e-06, "loss": 0.1523, "step": 29460 }, { "epoch": 1.1, "learning_rate": 2.6048380303460504e-06, "loss": 0.167, "step": 29490 }, { "epoch": 1.1, "learning_rate": 2.6035134689960962e-06, "loss": 0.1705, "step": 29520 }, { "epoch": 1.1, "learning_rate": 2.602190926213532e-06, "loss": 0.185, "step": 29550 }, { "epoch": 1.1, "learning_rate": 2.6008703968765623e-06, "loss": 0.1641, "step": 29580 }, { "epoch": 1.1, "learning_rate": 2.599551875881568e-06, "loss": 0.1599, "step": 29610 }, { "epoch": 1.11, "learning_rate": 2.598235358143022e-06, "loss": 0.1412, "step": 29640 }, { "epoch": 1.11, "learning_rate": 2.5969208385934085e-06, "loss": 0.1463, "step": 29670 }, { "epoch": 1.11, "learning_rate": 2.5956083121831394e-06, "loss": 0.1586, "step": 29700 }, { "epoch": 1.11, "learning_rate": 2.5942977738804735e-06, "loss": 0.1557, "step": 29730 }, { "epoch": 1.11, "learning_rate": 2.5929892186714367e-06, "loss": 0.157, "step": 29760 }, { "epoch": 1.11, "learning_rate": 2.5916826415597386e-06, "loss": 0.1673, "step": 29790 }, { "epoch": 1.11, "learning_rate": 2.590378037566695e-06, "loss": 0.1452, "step": 29820 }, { "epoch": 1.11, "learning_rate": 2.589075401731149e-06, "loss": 0.1613, "step": 29850 }, { "epoch": 1.11, "learning_rate": 2.5877747291093864e-06, "loss": 0.1588, "step": 29880 }, { "epoch": 1.12, "learning_rate": 2.586476014775063e-06, "loss": 0.2077, "step": 29910 }, { "epoch": 1.12, "learning_rate": 2.5851792538191226e-06, "loss": 0.1625, "step": 29940 }, { "epoch": 1.12, "learning_rate": 2.5838844413497223e-06, "loss": 0.1911, "step": 29970 }, { "epoch": 1.12, "learning_rate": 2.5825915724921503e-06, "loss": 0.1794, "step": 30000 }, { "epoch": 1.12, "learning_rate": 2.581300642388753e-06, "loss": 0.148, "step": 30030 }, { "epoch": 1.12, "learning_rate": 2.5800116461988566e-06, "loss": 0.1765, "step": 30060 }, { "epoch": 1.12, "learning_rate": 2.578724579098691e-06, "loss": 0.1541, "step": 30090 }, { "epoch": 1.12, "learning_rate": 2.577439436281313e-06, "loss": 0.1802, "step": 30120 }, { "epoch": 1.12, "learning_rate": 2.576156212956535e-06, "loss": 0.181, "step": 30150 }, { "epoch": 1.13, "learning_rate": 2.574874904350842e-06, "loss": 0.1844, "step": 30180 }, { "epoch": 1.13, "learning_rate": 2.573595505707327e-06, "loss": 0.1545, "step": 30210 }, { "epoch": 1.13, "learning_rate": 2.5723180122856094e-06, "loss": 0.1621, "step": 30240 }, { "epoch": 1.13, "learning_rate": 2.5710424193617634e-06, "loss": 0.1829, "step": 30270 }, { "epoch": 1.13, "learning_rate": 2.569768722228245e-06, "loss": 0.1689, "step": 30300 }, { "epoch": 1.13, "learning_rate": 2.56849691619382e-06, "loss": 0.1457, "step": 30330 }, { "epoch": 1.13, "learning_rate": 2.5672269965834895e-06, "loss": 0.1635, "step": 30360 }, { "epoch": 1.13, "learning_rate": 2.5659589587384176e-06, "loss": 0.1363, "step": 30390 }, { "epoch": 1.13, "learning_rate": 2.564692798015863e-06, "loss": 0.161, "step": 30420 }, { "epoch": 1.14, "learning_rate": 2.5634285097891025e-06, "loss": 0.1627, "step": 30450 }, { "epoch": 1.14, "learning_rate": 2.5621660894473655e-06, "loss": 0.1606, "step": 30480 }, { "epoch": 1.14, "learning_rate": 2.560905532395757e-06, "loss": 0.1622, "step": 30510 }, { "epoch": 1.14, "learning_rate": 2.559646834055194e-06, "loss": 0.1622, "step": 30540 }, { "epoch": 1.14, "learning_rate": 2.5583899898623316e-06, "loss": 0.1603, "step": 30570 }, { "epoch": 1.14, "learning_rate": 2.557134995269493e-06, "loss": 0.1517, "step": 30600 }, { "epoch": 1.14, "learning_rate": 2.5558818457446043e-06, "loss": 0.169, "step": 30630 }, { "epoch": 1.14, "learning_rate": 2.554630536771122e-06, "loss": 0.1671, "step": 30660 }, { "epoch": 1.14, "learning_rate": 2.553381063847966e-06, "loss": 0.1823, "step": 30690 }, { "epoch": 1.15, "learning_rate": 2.5521334224894508e-06, "loss": 0.1486, "step": 30720 }, { "epoch": 1.15, "learning_rate": 2.5508876082252192e-06, "loss": 0.1592, "step": 30750 }, { "epoch": 1.15, "learning_rate": 2.5496436166001757e-06, "loss": 0.1751, "step": 30780 }, { "epoch": 1.15, "learning_rate": 2.5484014431744175e-06, "loss": 0.1687, "step": 30810 }, { "epoch": 1.15, "learning_rate": 2.5471610835231664e-06, "loss": 0.1749, "step": 30840 }, { "epoch": 1.15, "learning_rate": 2.5459225332367084e-06, "loss": 0.1721, "step": 30870 }, { "epoch": 1.15, "learning_rate": 2.5446857879203222e-06, "loss": 0.1674, "step": 30900 }, { "epoch": 1.15, "learning_rate": 2.5434508431942174e-06, "loss": 0.1674, "step": 30930 }, { "epoch": 1.15, "learning_rate": 2.5422176946934667e-06, "loss": 0.1527, "step": 30960 }, { "epoch": 1.16, "learning_rate": 2.5409863380679436e-06, "loss": 0.1803, "step": 30990 }, { "epoch": 1.16, "learning_rate": 2.539756768982254e-06, "loss": 0.1536, "step": 31020 }, { "epoch": 1.16, "learning_rate": 2.5385289831156773e-06, "loss": 0.1405, "step": 31050 }, { "epoch": 1.16, "learning_rate": 2.5373029761620997e-06, "loss": 0.1691, "step": 31080 }, { "epoch": 1.16, "learning_rate": 2.5360787438299507e-06, "loss": 0.1617, "step": 31110 }, { "epoch": 1.16, "learning_rate": 2.5348562818421386e-06, "loss": 0.158, "step": 31140 }, { "epoch": 1.16, "learning_rate": 2.5336355859359934e-06, "loss": 0.1632, "step": 31170 }, { "epoch": 1.16, "learning_rate": 2.5324166518631965e-06, "loss": 0.175, "step": 31200 }, { "epoch": 1.16, "learning_rate": 2.531199475389726e-06, "loss": 0.1579, "step": 31230 }, { "epoch": 1.17, "learning_rate": 2.5299840522957895e-06, "loss": 0.157, "step": 31260 }, { "epoch": 1.17, "learning_rate": 2.5287703783757656e-06, "loss": 0.1626, "step": 31290 }, { "epoch": 1.17, "learning_rate": 2.5275584494381406e-06, "loss": 0.1753, "step": 31320 }, { "epoch": 1.17, "learning_rate": 2.5263482613054506e-06, "loss": 0.1429, "step": 31350 }, { "epoch": 1.17, "learning_rate": 2.525139809814219e-06, "loss": 0.1551, "step": 31380 }, { "epoch": 1.17, "learning_rate": 2.523933090814896e-06, "loss": 0.1516, "step": 31410 }, { "epoch": 1.17, "learning_rate": 2.5227281001718014e-06, "loss": 0.1365, "step": 31440 }, { "epoch": 1.17, "learning_rate": 2.52152483376306e-06, "loss": 0.1482, "step": 31470 }, { "epoch": 1.17, "learning_rate": 2.5203633113534356e-06, "loss": 0.1979, "step": 31500 }, { "epoch": 1.18, "learning_rate": 2.519163423967481e-06, "loss": 0.1598, "step": 31530 }, { "epoch": 1.18, "learning_rate": 2.5179652486683264e-06, "loss": 0.153, "step": 31560 }, { "epoch": 1.18, "learning_rate": 2.5167687813882913e-06, "loss": 0.1523, "step": 31590 }, { "epoch": 1.18, "learning_rate": 2.5155740180732113e-06, "loss": 0.1552, "step": 31620 }, { "epoch": 1.18, "learning_rate": 2.514380954682381e-06, "loss": 0.1824, "step": 31650 }, { "epoch": 1.18, "learning_rate": 2.5131895871884982e-06, "loss": 0.2012, "step": 31680 }, { "epoch": 1.18, "learning_rate": 2.511999911577605e-06, "loss": 0.2118, "step": 31710 }, { "epoch": 1.18, "learning_rate": 2.5108119238490314e-06, "loss": 0.1636, "step": 31740 }, { "epoch": 1.18, "learning_rate": 2.50962562001534e-06, "loss": 0.153, "step": 31770 }, { "epoch": 1.19, "learning_rate": 2.508440996102268e-06, "loss": 0.1606, "step": 31800 }, { "epoch": 1.19, "learning_rate": 2.507258048148672e-06, "loss": 0.1909, "step": 31830 }, { "epoch": 1.19, "learning_rate": 2.506076772206472e-06, "loss": 0.1504, "step": 31860 }, { "epoch": 1.19, "learning_rate": 2.5048971643405985e-06, "loss": 0.1589, "step": 31890 }, { "epoch": 1.19, "learning_rate": 2.5037192206289322e-06, "loss": 0.1534, "step": 31920 }, { "epoch": 1.19, "learning_rate": 2.5025429371622568e-06, "loss": 0.1991, "step": 31950 }, { "epoch": 1.19, "learning_rate": 2.5013683100441947e-06, "loss": 0.1461, "step": 31980 }, { "epoch": 1.19, "learning_rate": 2.5001953353911644e-06, "loss": 0.1715, "step": 32010 }, { "epoch": 1.19, "learning_rate": 2.4990240093323155e-06, "loss": 0.1519, "step": 32040 }, { "epoch": 1.2, "learning_rate": 2.4978543280094823e-06, "loss": 0.1527, "step": 32070 }, { "epoch": 1.2, "learning_rate": 2.4966862875771277e-06, "loss": 0.1782, "step": 32100 }, { "epoch": 1.2, "learning_rate": 2.4955198842022903e-06, "loss": 0.1457, "step": 32130 }, { "epoch": 1.2, "learning_rate": 2.494355114064532e-06, "loss": 0.1619, "step": 32160 }, { "epoch": 1.2, "learning_rate": 2.493191973355886e-06, "loss": 0.1677, "step": 32190 }, { "epoch": 1.2, "learning_rate": 2.4920304582808026e-06, "loss": 0.185, "step": 32220 }, { "epoch": 1.2, "learning_rate": 2.4908705650560973e-06, "loss": 0.1631, "step": 32250 }, { "epoch": 1.2, "learning_rate": 2.489712289910904e-06, "loss": 0.1768, "step": 32280 }, { "epoch": 1.21, "learning_rate": 2.488555629086615e-06, "loss": 0.1695, "step": 32310 }, { "epoch": 1.21, "learning_rate": 2.487400578836838e-06, "loss": 0.1578, "step": 32340 }, { "epoch": 1.21, "learning_rate": 2.4862471354273387e-06, "loss": 0.1481, "step": 32370 }, { "epoch": 1.21, "learning_rate": 2.4851336640235753e-06, "loss": 0.17, "step": 32400 }, { "epoch": 1.21, "learning_rate": 2.483983369886367e-06, "loss": 0.1522, "step": 32430 }, { "epoch": 1.21, "learning_rate": 2.482834671582272e-06, "loss": 0.1716, "step": 32460 }, { "epoch": 1.21, "learning_rate": 2.481687565424798e-06, "loss": 0.1523, "step": 32490 }, { "epoch": 1.21, "learning_rate": 2.4805420477393657e-06, "loss": 0.1714, "step": 32520 }, { "epoch": 1.21, "learning_rate": 2.479398114863255e-06, "loss": 0.1585, "step": 32550 }, { "epoch": 1.22, "learning_rate": 2.4782557631455603e-06, "loss": 0.1632, "step": 32580 }, { "epoch": 1.22, "learning_rate": 2.47711498894714e-06, "loss": 0.1372, "step": 32610 }, { "epoch": 1.22, "learning_rate": 2.4759757886405678e-06, "loss": 0.1625, "step": 32640 }, { "epoch": 1.22, "learning_rate": 2.474838158610084e-06, "loss": 0.15, "step": 32670 }, { "epoch": 1.22, "learning_rate": 2.4737020952515466e-06, "loss": 0.1996, "step": 32700 }, { "epoch": 1.22, "learning_rate": 2.4725675949723856e-06, "loss": 0.1648, "step": 32730 }, { "epoch": 1.22, "learning_rate": 2.471434654191553e-06, "loss": 0.1608, "step": 32760 }, { "epoch": 1.22, "learning_rate": 2.4703032693394766e-06, "loss": 0.1501, "step": 32790 }, { "epoch": 1.22, "learning_rate": 2.4691734368580124e-06, "loss": 0.2002, "step": 32820 }, { "epoch": 1.23, "learning_rate": 2.4680451532003975e-06, "loss": 0.1717, "step": 32850 }, { "epoch": 1.23, "learning_rate": 2.4669184148312046e-06, "loss": 0.1575, "step": 32880 }, { "epoch": 1.23, "learning_rate": 2.465793218226291e-06, "loss": 0.191, "step": 32910 }, { "epoch": 1.23, "learning_rate": 2.4646695598727596e-06, "loss": 0.156, "step": 32940 }, { "epoch": 1.23, "learning_rate": 2.463547436268907e-06, "loss": 0.1589, "step": 32970 }, { "epoch": 1.23, "learning_rate": 2.4624268439241786e-06, "loss": 0.1618, "step": 33000 }, { "epoch": 1.23, "learning_rate": 2.4613077793591255e-06, "loss": 0.1438, "step": 33030 }, { "epoch": 1.23, "learning_rate": 2.460190239105358e-06, "loss": 0.171, "step": 33060 }, { "epoch": 1.23, "learning_rate": 2.459074219705497e-06, "loss": 0.1617, "step": 33090 }, { "epoch": 1.24, "learning_rate": 2.457959717713135e-06, "loss": 0.1846, "step": 33120 }, { "epoch": 1.24, "learning_rate": 2.456846729692788e-06, "loss": 0.1812, "step": 33150 }, { "epoch": 1.24, "learning_rate": 2.4557352522198512e-06, "loss": 0.1568, "step": 33180 }, { "epoch": 1.24, "learning_rate": 2.454625281880554e-06, "loss": 0.1354, "step": 33210 }, { "epoch": 1.24, "learning_rate": 2.4535168152719188e-06, "loss": 0.185, "step": 33240 }, { "epoch": 1.24, "learning_rate": 2.4524098490017136e-06, "loss": 0.1512, "step": 33270 }, { "epoch": 1.24, "learning_rate": 2.4513043796884104e-06, "loss": 0.1976, "step": 33300 }, { "epoch": 1.24, "learning_rate": 2.4502004039611437e-06, "loss": 0.1595, "step": 33330 }, { "epoch": 1.24, "learning_rate": 2.4490979184596634e-06, "loss": 0.1411, "step": 33360 }, { "epoch": 1.25, "learning_rate": 2.447996919834293e-06, "loss": 0.131, "step": 33390 }, { "epoch": 1.25, "learning_rate": 2.44689740474589e-06, "loss": 0.1429, "step": 33420 }, { "epoch": 1.25, "learning_rate": 2.445799369865799e-06, "loss": 0.1504, "step": 33450 }, { "epoch": 1.25, "learning_rate": 2.4447028118758115e-06, "loss": 0.1438, "step": 33480 }, { "epoch": 1.25, "learning_rate": 2.443607727468123e-06, "loss": 0.1468, "step": 33510 }, { "epoch": 1.25, "learning_rate": 2.4425141133452936e-06, "loss": 0.1323, "step": 33540 }, { "epoch": 1.25, "learning_rate": 2.441421966220202e-06, "loss": 0.1852, "step": 33570 }, { "epoch": 1.25, "learning_rate": 2.440331282816008e-06, "loss": 0.1523, "step": 33600 }, { "epoch": 1.25, "learning_rate": 2.4392420598661103e-06, "loss": 0.1491, "step": 33630 }, { "epoch": 1.26, "learning_rate": 2.4381542941141006e-06, "loss": 0.1689, "step": 33660 }, { "epoch": 1.26, "learning_rate": 2.4370679823137313e-06, "loss": 0.1446, "step": 33690 }, { "epoch": 1.26, "learning_rate": 2.4359831212288684e-06, "loss": 0.1563, "step": 33720 }, { "epoch": 1.26, "learning_rate": 2.434899707633453e-06, "loss": 0.1501, "step": 33750 }, { "epoch": 1.26, "learning_rate": 2.4338177383114604e-06, "loss": 0.1654, "step": 33780 }, { "epoch": 1.26, "learning_rate": 2.432737210056862e-06, "loss": 0.1693, "step": 33810 }, { "epoch": 1.26, "learning_rate": 2.4316581196735815e-06, "loss": 0.16, "step": 33840 }, { "epoch": 1.26, "learning_rate": 2.4305804639754597e-06, "loss": 0.1636, "step": 33870 }, { "epoch": 1.26, "learning_rate": 2.4295042397862126e-06, "loss": 0.1367, "step": 33900 }, { "epoch": 1.27, "learning_rate": 2.428429443939391e-06, "loss": 0.1561, "step": 33930 }, { "epoch": 1.27, "learning_rate": 2.427356073278345e-06, "loss": 0.1421, "step": 33960 }, { "epoch": 1.27, "learning_rate": 2.42628412465618e-06, "loss": 0.1553, "step": 33990 }, { "epoch": 1.27, "learning_rate": 2.425213594935723e-06, "loss": 0.2056, "step": 34020 }, { "epoch": 1.27, "learning_rate": 2.424144480989481e-06, "loss": 0.1346, "step": 34050 }, { "epoch": 1.27, "learning_rate": 2.423076779699603e-06, "loss": 0.1716, "step": 34080 }, { "epoch": 1.27, "learning_rate": 2.422010487957844e-06, "loss": 0.1501, "step": 34110 }, { "epoch": 1.27, "learning_rate": 2.420945602665522e-06, "loss": 0.1449, "step": 34140 }, { "epoch": 1.27, "learning_rate": 2.419882120733486e-06, "loss": 0.1514, "step": 34170 }, { "epoch": 1.28, "learning_rate": 2.4188200390820758e-06, "loss": 0.1622, "step": 34200 }, { "epoch": 1.28, "learning_rate": 2.4177593546410825e-06, "loss": 0.1641, "step": 34230 }, { "epoch": 1.28, "learning_rate": 2.4167000643497152e-06, "loss": 0.1417, "step": 34260 }, { "epoch": 1.28, "learning_rate": 2.4156421651565615e-06, "loss": 0.1399, "step": 34290 }, { "epoch": 1.28, "learning_rate": 2.4145856540195495e-06, "loss": 0.1595, "step": 34320 }, { "epoch": 1.28, "learning_rate": 2.413530527905915e-06, "loss": 0.155, "step": 34350 }, { "epoch": 1.28, "learning_rate": 2.4124767837921604e-06, "loss": 0.1723, "step": 34380 }, { "epoch": 1.28, "learning_rate": 2.411424418664022e-06, "loss": 0.1505, "step": 34410 }, { "epoch": 1.28, "learning_rate": 2.4103734295164312e-06, "loss": 0.1443, "step": 34440 }, { "epoch": 1.29, "learning_rate": 2.40932381335348e-06, "loss": 0.1555, "step": 34470 }, { "epoch": 1.29, "learning_rate": 2.408275567188384e-06, "loss": 0.1472, "step": 34500 }, { "epoch": 1.29, "learning_rate": 2.407228688043447e-06, "loss": 0.1674, "step": 34530 }, { "epoch": 1.29, "learning_rate": 2.4061831729500282e-06, "loss": 0.1735, "step": 34560 }, { "epoch": 1.29, "learning_rate": 2.4051390189485015e-06, "loss": 0.1368, "step": 34590 }, { "epoch": 1.29, "learning_rate": 2.404096223088225e-06, "loss": 0.1499, "step": 34620 }, { "epoch": 1.29, "learning_rate": 2.403054782427503e-06, "loss": 0.171, "step": 34650 }, { "epoch": 1.29, "learning_rate": 2.4020146940335533e-06, "loss": 0.1371, "step": 34680 }, { "epoch": 1.29, "learning_rate": 2.400975954982471e-06, "loss": 0.1348, "step": 34710 }, { "epoch": 1.3, "learning_rate": 2.3999385623591958e-06, "loss": 0.1607, "step": 34740 }, { "epoch": 1.3, "learning_rate": 2.3989025132574736e-06, "loss": 0.1845, "step": 34770 }, { "epoch": 1.3, "learning_rate": 2.3978678047798286e-06, "loss": 0.1799, "step": 34800 }, { "epoch": 1.3, "learning_rate": 2.396834434037523e-06, "loss": 0.1409, "step": 34830 }, { "epoch": 1.3, "learning_rate": 2.3958023981505267e-06, "loss": 0.1556, "step": 34860 }, { "epoch": 1.3, "learning_rate": 2.3947716942474835e-06, "loss": 0.161, "step": 34890 }, { "epoch": 1.3, "learning_rate": 2.3937423194656766e-06, "loss": 0.1834, "step": 34920 }, { "epoch": 1.3, "learning_rate": 2.3927142709509947e-06, "loss": 0.1527, "step": 34950 }, { "epoch": 1.3, "learning_rate": 2.3916875458579e-06, "loss": 0.1612, "step": 34980 }, { "epoch": 1.31, "learning_rate": 2.3906621413493943e-06, "loss": 0.1413, "step": 35010 }, { "epoch": 1.31, "learning_rate": 2.3896380545969873e-06, "loss": 0.2112, "step": 35040 }, { "epoch": 1.31, "learning_rate": 2.388615282780661e-06, "loss": 0.1706, "step": 35070 }, { "epoch": 1.31, "learning_rate": 2.3875938230888414e-06, "loss": 0.1461, "step": 35100 }, { "epoch": 1.31, "learning_rate": 2.386573672718362e-06, "loss": 0.1477, "step": 35130 }, { "epoch": 1.31, "learning_rate": 2.385554828874434e-06, "loss": 0.1657, "step": 35160 }, { "epoch": 1.31, "learning_rate": 2.384537288770612e-06, "loss": 0.1558, "step": 35190 }, { "epoch": 1.31, "learning_rate": 2.3835210496287646e-06, "loss": 0.1707, "step": 35220 }, { "epoch": 1.31, "learning_rate": 2.3825061086790407e-06, "loss": 0.1314, "step": 35250 }, { "epoch": 1.32, "learning_rate": 2.3814924631598384e-06, "loss": 0.1539, "step": 35280 }, { "epoch": 1.32, "learning_rate": 2.3804801103177737e-06, "loss": 0.2101, "step": 35310 }, { "epoch": 1.32, "learning_rate": 2.3794690474076476e-06, "loss": 0.2028, "step": 35340 }, { "epoch": 1.32, "learning_rate": 2.3784592716924168e-06, "loss": 0.1903, "step": 35370 }, { "epoch": 1.32, "learning_rate": 2.377450780443162e-06, "loss": 0.1499, "step": 35400 }, { "epoch": 1.32, "learning_rate": 2.3764435709390556e-06, "loss": 0.1607, "step": 35430 }, { "epoch": 1.32, "learning_rate": 2.3754376404673334e-06, "loss": 0.1493, "step": 35460 }, { "epoch": 1.32, "learning_rate": 2.3744329863232613e-06, "loss": 0.1685, "step": 35490 }, { "epoch": 1.32, "learning_rate": 2.3734296058101067e-06, "loss": 0.1396, "step": 35520 }, { "epoch": 1.33, "learning_rate": 2.372427496239106e-06, "loss": 0.148, "step": 35550 }, { "epoch": 1.33, "learning_rate": 2.371426654929437e-06, "loss": 0.1632, "step": 35580 }, { "epoch": 1.33, "learning_rate": 2.3704270792081874e-06, "loss": 0.1703, "step": 35610 }, { "epoch": 1.33, "learning_rate": 2.3694287664103236e-06, "loss": 0.1319, "step": 35640 }, { "epoch": 1.33, "learning_rate": 2.368431713878663e-06, "loss": 0.1504, "step": 35670 }, { "epoch": 1.33, "learning_rate": 2.3674359189638443e-06, "loss": 0.1268, "step": 35700 }, { "epoch": 1.33, "learning_rate": 2.3664413790242944e-06, "loss": 0.2007, "step": 35730 }, { "epoch": 1.33, "learning_rate": 2.3654480914262044e-06, "loss": 0.1817, "step": 35760 }, { "epoch": 1.33, "learning_rate": 2.3644560535434954e-06, "loss": 0.1459, "step": 35790 }, { "epoch": 1.34, "learning_rate": 2.3634652627577933e-06, "loss": 0.1827, "step": 35820 }, { "epoch": 1.34, "learning_rate": 2.3624757164583963e-06, "loss": 0.181, "step": 35850 }, { "epoch": 1.34, "learning_rate": 2.3614874120422486e-06, "loss": 0.1485, "step": 35880 }, { "epoch": 1.34, "learning_rate": 2.36050034691391e-06, "loss": 0.1573, "step": 35910 }, { "epoch": 1.34, "learning_rate": 2.3595145184855286e-06, "loss": 0.15, "step": 35940 }, { "epoch": 1.34, "learning_rate": 2.35852992417681e-06, "loss": 0.2165, "step": 35970 }, { "epoch": 1.34, "learning_rate": 2.357546561414994e-06, "loss": 0.1448, "step": 36000 }, { "epoch": 1.34, "learning_rate": 2.3565644276348197e-06, "loss": 0.1556, "step": 36030 }, { "epoch": 1.34, "learning_rate": 2.3555835202785018e-06, "loss": 0.1416, "step": 36060 }, { "epoch": 1.35, "learning_rate": 2.354603836795701e-06, "loss": 0.1705, "step": 36090 }, { "epoch": 1.35, "learning_rate": 2.3536253746434974e-06, "loss": 0.1615, "step": 36120 }, { "epoch": 1.35, "learning_rate": 2.352648131286362e-06, "loss": 0.1561, "step": 36150 }, { "epoch": 1.35, "learning_rate": 2.3516721041961274e-06, "loss": 0.1637, "step": 36180 }, { "epoch": 1.35, "learning_rate": 2.3506972908519647e-06, "loss": 0.1616, "step": 36210 }, { "epoch": 1.35, "learning_rate": 2.3497236887403514e-06, "loss": 0.1653, "step": 36240 }, { "epoch": 1.35, "learning_rate": 2.348751295355046e-06, "loss": 0.1632, "step": 36270 }, { "epoch": 1.35, "learning_rate": 2.347780108197064e-06, "loss": 0.1437, "step": 36300 }, { "epoch": 1.35, "learning_rate": 2.3468101247746447e-06, "loss": 0.1745, "step": 36330 }, { "epoch": 1.36, "learning_rate": 2.3458413426032292e-06, "loss": 0.1505, "step": 36360 }, { "epoch": 1.36, "learning_rate": 2.3448737592054326e-06, "loss": 0.1477, "step": 36390 }, { "epoch": 1.36, "learning_rate": 2.3439073721110166e-06, "loss": 0.1484, "step": 36420 }, { "epoch": 1.36, "learning_rate": 2.3429421788568627e-06, "loss": 0.149, "step": 36450 }, { "epoch": 1.36, "learning_rate": 2.3419781769869478e-06, "loss": 0.138, "step": 36480 }, { "epoch": 1.36, "learning_rate": 2.341015364052316e-06, "loss": 0.1717, "step": 36510 }, { "epoch": 1.36, "learning_rate": 2.3400537376110534e-06, "loss": 0.1377, "step": 36540 }, { "epoch": 1.36, "learning_rate": 2.3390932952282607e-06, "loss": 0.1655, "step": 36570 }, { "epoch": 1.37, "learning_rate": 2.33813403447603e-06, "loss": 0.1842, "step": 36600 }, { "epoch": 1.37, "learning_rate": 2.3371759529334183e-06, "loss": 0.1865, "step": 36630 }, { "epoch": 1.37, "learning_rate": 2.336219048186418e-06, "loss": 0.1335, "step": 36660 }, { "epoch": 1.37, "learning_rate": 2.3352633178279355e-06, "loss": 0.1753, "step": 36690 }, { "epoch": 1.37, "learning_rate": 2.3343087594577666e-06, "loss": 0.176, "step": 36720 }, { "epoch": 1.37, "learning_rate": 2.3333553706825667e-06, "loss": 0.1251, "step": 36750 }, { "epoch": 1.37, "learning_rate": 2.33240314911583e-06, "loss": 0.162, "step": 36780 }, { "epoch": 1.37, "learning_rate": 2.3314520923778613e-06, "loss": 0.144, "step": 36810 }, { "epoch": 1.37, "learning_rate": 2.330533842535043e-06, "loss": 0.1405, "step": 36840 }, { "epoch": 1.38, "learning_rate": 2.329585069711041e-06, "loss": 0.1436, "step": 36870 }, { "epoch": 1.38, "learning_rate": 2.328637454695883e-06, "loss": 0.1458, "step": 36900 }, { "epoch": 1.38, "learning_rate": 2.32769099513665e-06, "loss": 0.1645, "step": 36930 }, { "epoch": 1.38, "learning_rate": 2.32674568868711e-06, "loss": 0.1415, "step": 36960 }, { "epoch": 1.38, "learning_rate": 2.3258015330076976e-06, "loss": 0.126, "step": 36990 }, { "epoch": 1.38, "learning_rate": 2.324858525765487e-06, "loss": 0.1576, "step": 37020 }, { "epoch": 1.38, "learning_rate": 2.3239166646341674e-06, "loss": 0.143, "step": 37050 }, { "epoch": 1.38, "learning_rate": 2.322975947294021e-06, "loss": 0.1905, "step": 37080 }, { "epoch": 1.38, "learning_rate": 2.3220363714318965e-06, "loss": 0.1584, "step": 37110 }, { "epoch": 1.39, "learning_rate": 2.3210979347411877e-06, "loss": 0.1583, "step": 37140 }, { "epoch": 1.39, "learning_rate": 2.320160634921808e-06, "loss": 0.1629, "step": 37170 }, { "epoch": 1.39, "learning_rate": 2.3192244696801673e-06, "loss": 0.1418, "step": 37200 }, { "epoch": 1.39, "learning_rate": 2.318289436729148e-06, "loss": 0.1481, "step": 37230 }, { "epoch": 1.39, "learning_rate": 2.3173555337880833e-06, "loss": 0.1596, "step": 37260 }, { "epoch": 1.39, "learning_rate": 2.3164227585827304e-06, "loss": 0.1332, "step": 37290 }, { "epoch": 1.39, "learning_rate": 2.3154911088452513e-06, "loss": 0.1482, "step": 37320 }, { "epoch": 1.39, "learning_rate": 2.314560582314186e-06, "loss": 0.1413, "step": 37350 }, { "epoch": 1.39, "learning_rate": 2.313631176734432e-06, "loss": 0.1742, "step": 37380 }, { "epoch": 1.4, "learning_rate": 2.3127028898572203e-06, "loss": 0.1686, "step": 37410 }, { "epoch": 1.4, "learning_rate": 2.311775719440093e-06, "loss": 0.1497, "step": 37440 }, { "epoch": 1.4, "learning_rate": 2.310849663246879e-06, "loss": 0.1518, "step": 37470 }, { "epoch": 1.4, "learning_rate": 2.309924719047674e-06, "loss": 0.1568, "step": 37500 }, { "epoch": 1.4, "learning_rate": 2.3090008846188165e-06, "loss": 0.1332, "step": 37530 }, { "epoch": 1.4, "learning_rate": 2.308078157742863e-06, "loss": 0.1531, "step": 37560 }, { "epoch": 1.4, "learning_rate": 2.307156536208571e-06, "loss": 0.1472, "step": 37590 }, { "epoch": 1.4, "learning_rate": 2.3062360178108724e-06, "loss": 0.1369, "step": 37620 }, { "epoch": 1.4, "learning_rate": 2.3053166003508527e-06, "loss": 0.1482, "step": 37650 }, { "epoch": 1.41, "learning_rate": 2.30439828163573e-06, "loss": 0.149, "step": 37680 }, { "epoch": 1.41, "learning_rate": 2.3034810594788295e-06, "loss": 0.1617, "step": 37710 }, { "epoch": 1.41, "learning_rate": 2.3025649316995668e-06, "loss": 0.1573, "step": 37740 }, { "epoch": 1.41, "learning_rate": 2.3016498961234214e-06, "loss": 0.1497, "step": 37770 }, { "epoch": 1.41, "learning_rate": 2.300735950581918e-06, "loss": 0.1744, "step": 37800 }, { "epoch": 1.41, "learning_rate": 2.2998230929126046e-06, "loss": 0.139, "step": 37830 }, { "epoch": 1.41, "learning_rate": 2.29891132095903e-06, "loss": 0.1647, "step": 37860 }, { "epoch": 1.41, "learning_rate": 2.2980006325707215e-06, "loss": 0.1512, "step": 37890 }, { "epoch": 1.41, "learning_rate": 2.2970910256031658e-06, "loss": 0.1802, "step": 37920 }, { "epoch": 1.42, "learning_rate": 2.296182497917788e-06, "loss": 0.1654, "step": 37950 }, { "epoch": 1.42, "learning_rate": 2.2952750473819264e-06, "loss": 0.1639, "step": 37980 }, { "epoch": 1.42, "learning_rate": 2.2943686718688178e-06, "loss": 0.153, "step": 38010 }, { "epoch": 1.42, "learning_rate": 2.293463369257572e-06, "loss": 0.1656, "step": 38040 }, { "epoch": 1.42, "learning_rate": 2.2925591374331504e-06, "loss": 0.1373, "step": 38070 }, { "epoch": 1.42, "learning_rate": 2.291655974286349e-06, "loss": 0.1618, "step": 38100 }, { "epoch": 1.42, "learning_rate": 2.2907538777137752e-06, "loss": 0.1632, "step": 38130 }, { "epoch": 1.42, "learning_rate": 2.2898528456178286e-06, "loss": 0.148, "step": 38160 }, { "epoch": 1.42, "learning_rate": 2.288952875906677e-06, "loss": 0.1529, "step": 38190 }, { "epoch": 1.43, "learning_rate": 2.2880539664942414e-06, "loss": 0.1352, "step": 38220 }, { "epoch": 1.43, "learning_rate": 2.287156115300173e-06, "loss": 0.1447, "step": 38250 }, { "epoch": 1.43, "learning_rate": 2.2862593202498297e-06, "loss": 0.194, "step": 38280 }, { "epoch": 1.43, "learning_rate": 2.2853635792742636e-06, "loss": 0.1346, "step": 38310 }, { "epoch": 1.43, "learning_rate": 2.284468890310193e-06, "loss": 0.1342, "step": 38340 }, { "epoch": 1.43, "learning_rate": 2.2835752512999883e-06, "loss": 0.1408, "step": 38370 }, { "epoch": 1.43, "learning_rate": 2.282682660191648e-06, "loss": 0.1545, "step": 38400 }, { "epoch": 1.43, "learning_rate": 2.2817911149387828e-06, "loss": 0.1597, "step": 38430 }, { "epoch": 1.43, "learning_rate": 2.2809006135005923e-06, "loss": 0.1433, "step": 38460 }, { "epoch": 1.44, "learning_rate": 2.280011153841847e-06, "loss": 0.14, "step": 38490 }, { "epoch": 1.44, "learning_rate": 2.2791227339328693e-06, "loss": 0.1647, "step": 38520 }, { "epoch": 1.44, "learning_rate": 2.2782353517495133e-06, "loss": 0.1659, "step": 38550 }, { "epoch": 1.44, "learning_rate": 2.2773490052731452e-06, "loss": 0.1298, "step": 38580 }, { "epoch": 1.44, "learning_rate": 2.2764636924906245e-06, "loss": 0.1773, "step": 38610 }, { "epoch": 1.44, "learning_rate": 2.27560887083038e-06, "loss": 0.1642, "step": 38640 }, { "epoch": 1.44, "learning_rate": 2.2747255851274107e-06, "loss": 0.1638, "step": 38670 }, { "epoch": 1.44, "learning_rate": 2.2738433271781068e-06, "loss": 0.1954, "step": 38700 }, { "epoch": 1.44, "learning_rate": 2.272962094990928e-06, "loss": 0.1598, "step": 38730 }, { "epoch": 1.45, "learning_rate": 2.272081886579736e-06, "loss": 0.1741, "step": 38760 }, { "epoch": 1.45, "learning_rate": 2.2712026999637678e-06, "loss": 0.1589, "step": 38790 }, { "epoch": 1.45, "learning_rate": 2.270324533167624e-06, "loss": 0.1637, "step": 38820 }, { "epoch": 1.45, "learning_rate": 2.2694473842212473e-06, "loss": 0.197, "step": 38850 }, { "epoch": 1.45, "learning_rate": 2.2685712511599043e-06, "loss": 0.1334, "step": 38880 }, { "epoch": 1.45, "learning_rate": 2.2676961320241665e-06, "loss": 0.1649, "step": 38910 }, { "epoch": 1.45, "learning_rate": 2.266822024859894e-06, "loss": 0.1752, "step": 38940 }, { "epoch": 1.45, "learning_rate": 2.2659489277182127e-06, "loss": 0.1423, "step": 38970 }, { "epoch": 1.45, "learning_rate": 2.2650768386555003e-06, "loss": 0.172, "step": 39000 }, { "epoch": 1.46, "learning_rate": 2.264205755733369e-06, "loss": 0.1372, "step": 39030 }, { "epoch": 1.46, "learning_rate": 2.2633356770186404e-06, "loss": 0.1646, "step": 39060 }, { "epoch": 1.46, "learning_rate": 2.2624666005833367e-06, "loss": 0.1595, "step": 39090 }, { "epoch": 1.46, "learning_rate": 2.2615985245046557e-06, "loss": 0.166, "step": 39120 }, { "epoch": 1.46, "learning_rate": 2.2607314468649563e-06, "loss": 0.1402, "step": 39150 }, { "epoch": 1.46, "learning_rate": 2.25986536575174e-06, "loss": 0.1519, "step": 39180 }, { "epoch": 1.46, "learning_rate": 2.259000279257632e-06, "loss": 0.1585, "step": 39210 }, { "epoch": 1.46, "learning_rate": 2.2581361854803667e-06, "loss": 0.1605, "step": 39240 }, { "epoch": 1.46, "learning_rate": 2.2572730825227662e-06, "loss": 0.139, "step": 39270 }, { "epoch": 1.47, "learning_rate": 2.256410968492726e-06, "loss": 0.1307, "step": 39300 }, { "epoch": 1.47, "learning_rate": 2.2555498415031953e-06, "loss": 0.1381, "step": 39330 }, { "epoch": 1.47, "learning_rate": 2.2546896996721607e-06, "loss": 0.1494, "step": 39360 }, { "epoch": 1.47, "learning_rate": 2.2538305411226297e-06, "loss": 0.1582, "step": 39390 }, { "epoch": 1.47, "learning_rate": 2.2529723639826117e-06, "loss": 0.1613, "step": 39420 }, { "epoch": 1.47, "learning_rate": 2.2521151663851024e-06, "loss": 0.166, "step": 39450 }, { "epoch": 1.47, "learning_rate": 2.251258946468066e-06, "loss": 0.1548, "step": 39480 }, { "epoch": 1.47, "learning_rate": 2.2504037023744184e-06, "loss": 0.1402, "step": 39510 }, { "epoch": 1.47, "learning_rate": 2.2495494322520092e-06, "loss": 0.1515, "step": 39540 }, { "epoch": 1.48, "learning_rate": 2.248696134253608e-06, "loss": 0.1395, "step": 39570 }, { "epoch": 1.48, "learning_rate": 2.247843806536884e-06, "loss": 0.1625, "step": 39600 }, { "epoch": 1.48, "learning_rate": 2.2469924472643904e-06, "loss": 0.147, "step": 39630 }, { "epoch": 1.48, "learning_rate": 2.2461420546035494e-06, "loss": 0.1336, "step": 39660 }, { "epoch": 1.48, "learning_rate": 2.245292626726635e-06, "loss": 0.1217, "step": 39690 }, { "epoch": 1.48, "learning_rate": 2.2444441618107533e-06, "loss": 0.1453, "step": 39720 }, { "epoch": 1.48, "learning_rate": 2.243596658037831e-06, "loss": 0.1382, "step": 39750 }, { "epoch": 1.48, "learning_rate": 2.2427501135945952e-06, "loss": 0.1632, "step": 39780 }, { "epoch": 1.48, "learning_rate": 2.2419045266725605e-06, "loss": 0.1615, "step": 39810 }, { "epoch": 1.49, "learning_rate": 2.241059895468009e-06, "loss": 0.1506, "step": 39840 }, { "epoch": 1.49, "learning_rate": 2.2402162181819777e-06, "loss": 0.1484, "step": 39870 }, { "epoch": 1.49, "learning_rate": 2.2393734930202386e-06, "loss": 0.1555, "step": 39900 }, { "epoch": 1.49, "learning_rate": 2.238531718193287e-06, "loss": 0.1427, "step": 39930 }, { "epoch": 1.49, "learning_rate": 2.2376908919163214e-06, "loss": 0.1651, "step": 39960 }, { "epoch": 1.49, "learning_rate": 2.236851012409231e-06, "loss": 0.1755, "step": 39990 }, { "epoch": 1.49, "learning_rate": 2.236012077896579e-06, "loss": 0.1369, "step": 40020 }, { "epoch": 1.49, "learning_rate": 2.235174086607584e-06, "loss": 0.175, "step": 40050 }, { "epoch": 1.49, "learning_rate": 2.2343370367761084e-06, "loss": 0.1441, "step": 40080 }, { "epoch": 1.5, "learning_rate": 2.2335009266406403e-06, "loss": 0.1731, "step": 40110 }, { "epoch": 1.5, "learning_rate": 2.2326657544442797e-06, "loss": 0.1675, "step": 40140 }, { "epoch": 1.5, "learning_rate": 2.231831518434719e-06, "loss": 0.1612, "step": 40170 }, { "epoch": 1.5, "learning_rate": 2.2309982168642326e-06, "loss": 0.1538, "step": 40200 }, { "epoch": 1.5, "learning_rate": 2.2301658479896607e-06, "loss": 0.1631, "step": 40230 }, { "epoch": 1.5, "learning_rate": 2.229334410072389e-06, "loss": 0.155, "step": 40260 }, { "epoch": 1.5, "learning_rate": 2.2285039013783404e-06, "loss": 0.135, "step": 40290 }, { "epoch": 1.5, "learning_rate": 2.2276743201779543e-06, "loss": 0.1491, "step": 40320 }, { "epoch": 1.5, "learning_rate": 2.2268456647461743e-06, "loss": 0.1364, "step": 40350 }, { "epoch": 1.51, "learning_rate": 2.226017933362433e-06, "loss": 0.1611, "step": 40380 }, { "epoch": 1.51, "learning_rate": 2.2251911243106365e-06, "loss": 0.1172, "step": 40410 }, { "epoch": 1.51, "learning_rate": 2.224365235879149e-06, "loss": 0.1509, "step": 40440 }, { "epoch": 1.51, "learning_rate": 2.2235402663607773e-06, "loss": 0.1661, "step": 40470 }, { "epoch": 1.51, "learning_rate": 2.2227162140527596e-06, "loss": 0.15, "step": 40500 }, { "epoch": 1.51, "learning_rate": 2.2218930772567463e-06, "loss": 0.1425, "step": 40530 }, { "epoch": 1.51, "learning_rate": 2.221070854278788e-06, "loss": 0.1721, "step": 40560 }, { "epoch": 1.51, "learning_rate": 2.2202495434293184e-06, "loss": 0.1377, "step": 40590 }, { "epoch": 1.51, "learning_rate": 2.219429143023144e-06, "loss": 0.1455, "step": 40620 }, { "epoch": 1.52, "learning_rate": 2.2186096513794256e-06, "loss": 0.1348, "step": 40650 }, { "epoch": 1.52, "learning_rate": 2.2177910668216645e-06, "loss": 0.1614, "step": 40680 }, { "epoch": 1.52, "learning_rate": 2.2169733876776897e-06, "loss": 0.1431, "step": 40710 }, { "epoch": 1.52, "learning_rate": 2.2161566122796414e-06, "loss": 0.1521, "step": 40740 }, { "epoch": 1.52, "learning_rate": 2.2153407389639607e-06, "loss": 0.175, "step": 40770 }, { "epoch": 1.52, "learning_rate": 2.21452576607137e-06, "loss": 0.1389, "step": 40800 }, { "epoch": 1.52, "learning_rate": 2.213711691946862e-06, "loss": 0.1679, "step": 40830 }, { "epoch": 1.52, "learning_rate": 2.212898514939685e-06, "loss": 0.1289, "step": 40860 }, { "epoch": 1.53, "learning_rate": 2.212086233403329e-06, "loss": 0.1763, "step": 40890 }, { "epoch": 1.53, "learning_rate": 2.2112748456955123e-06, "loss": 0.1648, "step": 40920 }, { "epoch": 1.53, "learning_rate": 2.2104643501781657e-06, "loss": 0.1724, "step": 40950 }, { "epoch": 1.53, "learning_rate": 2.20965474521742e-06, "loss": 0.1341, "step": 40980 }, { "epoch": 1.53, "learning_rate": 2.208846029183593e-06, "loss": 0.155, "step": 41010 }, { "epoch": 1.53, "learning_rate": 2.2080382004511725e-06, "loss": 0.1591, "step": 41040 }, { "epoch": 1.53, "learning_rate": 2.207231257398806e-06, "loss": 0.1678, "step": 41070 }, { "epoch": 1.53, "learning_rate": 2.2064251984092854e-06, "loss": 0.145, "step": 41100 }, { "epoch": 1.53, "learning_rate": 2.2056200218695346e-06, "loss": 0.1617, "step": 41130 }, { "epoch": 1.54, "learning_rate": 2.2048157261705934e-06, "loss": 0.1493, "step": 41160 }, { "epoch": 1.54, "learning_rate": 2.204012309707607e-06, "loss": 0.1345, "step": 41190 }, { "epoch": 1.54, "learning_rate": 2.203209770879811e-06, "loss": 0.1389, "step": 41220 }, { "epoch": 1.54, "learning_rate": 2.202408108090517e-06, "loss": 0.1593, "step": 41250 }, { "epoch": 1.54, "learning_rate": 2.2016073197471015e-06, "loss": 0.1582, "step": 41280 }, { "epoch": 1.54, "learning_rate": 2.200807404260992e-06, "loss": 0.1529, "step": 41310 }, { "epoch": 1.54, "learning_rate": 2.2000083600476527e-06, "loss": 0.1601, "step": 41340 }, { "epoch": 1.54, "learning_rate": 2.199210185526571e-06, "loss": 0.1604, "step": 41370 }, { "epoch": 1.54, "learning_rate": 2.198412879121248e-06, "loss": 0.1478, "step": 41400 }, { "epoch": 1.55, "learning_rate": 2.1976164392591793e-06, "loss": 0.1689, "step": 41430 }, { "epoch": 1.55, "learning_rate": 2.1968208643718476e-06, "loss": 0.1644, "step": 41460 }, { "epoch": 1.55, "learning_rate": 2.196026152894708e-06, "loss": 0.1742, "step": 41490 }, { "epoch": 1.55, "learning_rate": 2.195232303267173e-06, "loss": 0.1596, "step": 41520 }, { "epoch": 1.55, "learning_rate": 2.194439313932602e-06, "loss": 0.1316, "step": 41550 }, { "epoch": 1.55, "learning_rate": 2.193647183338287e-06, "loss": 0.1481, "step": 41580 }, { "epoch": 1.55, "learning_rate": 2.1928559099354423e-06, "loss": 0.1735, "step": 41610 }, { "epoch": 1.55, "learning_rate": 2.1920654921791885e-06, "loss": 0.1617, "step": 41640 }, { "epoch": 1.55, "learning_rate": 2.191275928528542e-06, "loss": 0.15, "step": 41670 }, { "epoch": 1.56, "learning_rate": 2.190487217446401e-06, "loss": 0.1473, "step": 41700 }, { "epoch": 1.56, "learning_rate": 2.189699357399535e-06, "loss": 0.1206, "step": 41730 }, { "epoch": 1.56, "learning_rate": 2.188912346858569e-06, "loss": 0.1352, "step": 41760 }, { "epoch": 1.56, "learning_rate": 2.188126184297976e-06, "loss": 0.173, "step": 41790 }, { "epoch": 1.56, "learning_rate": 2.1873408681960583e-06, "loss": 0.1624, "step": 41820 }, { "epoch": 1.56, "learning_rate": 2.1865563970349406e-06, "loss": 0.1448, "step": 41850 }, { "epoch": 1.56, "learning_rate": 2.185772769300554e-06, "loss": 0.1501, "step": 41880 }, { "epoch": 1.56, "learning_rate": 2.1849899834826275e-06, "loss": 0.1603, "step": 41910 }, { "epoch": 1.56, "learning_rate": 2.184208038074671e-06, "loss": 0.1447, "step": 41940 }, { "epoch": 1.57, "learning_rate": 2.1834269315739657e-06, "loss": 0.1551, "step": 41970 }, { "epoch": 1.57, "learning_rate": 2.182646662481554e-06, "loss": 0.1428, "step": 42000 }, { "epoch": 1.57, "learning_rate": 2.1818672293022237e-06, "loss": 0.1594, "step": 42030 }, { "epoch": 1.57, "learning_rate": 2.1810886305444976e-06, "loss": 0.1461, "step": 42060 }, { "epoch": 1.57, "learning_rate": 2.180310864720622e-06, "loss": 0.1699, "step": 42090 }, { "epoch": 1.57, "learning_rate": 2.1795339303465547e-06, "loss": 0.158, "step": 42120 }, { "epoch": 1.57, "learning_rate": 2.178757825941951e-06, "loss": 0.1227, "step": 42150 }, { "epoch": 1.57, "learning_rate": 2.177982550030154e-06, "loss": 0.1795, "step": 42180 }, { "epoch": 1.57, "learning_rate": 2.177208101138184e-06, "loss": 0.1337, "step": 42210 }, { "epoch": 1.58, "learning_rate": 2.1764344777967235e-06, "loss": 0.1626, "step": 42240 }, { "epoch": 1.58, "learning_rate": 2.1756616785401066e-06, "loss": 0.129, "step": 42270 }, { "epoch": 1.58, "learning_rate": 2.1748897019063095e-06, "loss": 0.1494, "step": 42300 }, { "epoch": 1.58, "learning_rate": 2.1741185464369354e-06, "loss": 0.1537, "step": 42330 }, { "epoch": 1.58, "learning_rate": 2.1733482106772056e-06, "loss": 0.1555, "step": 42360 }, { "epoch": 1.58, "learning_rate": 2.1725786931759464e-06, "loss": 0.1368, "step": 42390 }, { "epoch": 1.58, "learning_rate": 2.1718099924855798e-06, "loss": 0.1489, "step": 42420 }, { "epoch": 1.58, "learning_rate": 2.171042107162109e-06, "loss": 0.1486, "step": 42450 }, { "epoch": 1.58, "learning_rate": 2.1702750357651087e-06, "loss": 0.18, "step": 42480 }, { "epoch": 1.59, "learning_rate": 2.1695087768577147e-06, "loss": 0.1532, "step": 42510 }, { "epoch": 1.59, "learning_rate": 2.16874332900661e-06, "loss": 0.147, "step": 42540 }, { "epoch": 1.59, "learning_rate": 2.1679786907820158e-06, "loss": 0.1383, "step": 42570 }, { "epoch": 1.59, "learning_rate": 2.1672148607576797e-06, "loss": 0.1606, "step": 42600 }, { "epoch": 1.59, "learning_rate": 2.1664518375108646e-06, "loss": 0.1814, "step": 42630 }, { "epoch": 1.59, "learning_rate": 2.1656896196223354e-06, "loss": 0.1362, "step": 42660 }, { "epoch": 1.59, "learning_rate": 2.1649282056763532e-06, "loss": 0.1501, "step": 42690 }, { "epoch": 1.59, "learning_rate": 2.1641675942606574e-06, "loss": 0.1471, "step": 42720 }, { "epoch": 1.59, "learning_rate": 2.1634077839664602e-06, "loss": 0.1501, "step": 42750 }, { "epoch": 1.6, "learning_rate": 2.162648773388433e-06, "loss": 0.1738, "step": 42780 }, { "epoch": 1.6, "learning_rate": 2.161890561124696e-06, "loss": 0.1404, "step": 42810 }, { "epoch": 1.6, "learning_rate": 2.161133145776808e-06, "loss": 0.2075, "step": 42840 }, { "epoch": 1.6, "learning_rate": 2.160376525949755e-06, "loss": 0.151, "step": 42870 }, { "epoch": 1.6, "learning_rate": 2.1596207002519383e-06, "loss": 0.1608, "step": 42900 }, { "epoch": 1.6, "learning_rate": 2.158865667295166e-06, "loss": 0.1798, "step": 42930 }, { "epoch": 1.6, "learning_rate": 2.1581114256946403e-06, "loss": 0.1514, "step": 42960 }, { "epoch": 1.6, "learning_rate": 2.1573579740689486e-06, "loss": 0.1378, "step": 42990 }, { "epoch": 1.6, "learning_rate": 2.1566053110400517e-06, "loss": 0.1434, "step": 43020 }, { "epoch": 1.61, "learning_rate": 2.1558534352332726e-06, "loss": 0.1566, "step": 43050 }, { "epoch": 1.61, "learning_rate": 2.1551023452772875e-06, "loss": 0.1509, "step": 43080 }, { "epoch": 1.61, "learning_rate": 2.154352039804115e-06, "loss": 0.137, "step": 43110 }, { "epoch": 1.61, "learning_rate": 2.1536025174491042e-06, "loss": 0.1861, "step": 43140 }, { "epoch": 1.61, "learning_rate": 2.1528537768509265e-06, "loss": 0.1358, "step": 43170 }, { "epoch": 1.61, "learning_rate": 2.1521058166515628e-06, "loss": 0.1538, "step": 43200 }, { "epoch": 1.61, "learning_rate": 2.1513586354962963e-06, "loss": 0.1644, "step": 43230 }, { "epoch": 1.61, "learning_rate": 2.1506122320336976e-06, "loss": 0.1482, "step": 43260 }, { "epoch": 1.61, "learning_rate": 2.1498666049156193e-06, "loss": 0.1492, "step": 43290 }, { "epoch": 1.62, "learning_rate": 2.149121752797183e-06, "loss": 0.1353, "step": 43320 }, { "epoch": 1.62, "learning_rate": 2.148377674336769e-06, "loss": 0.1671, "step": 43350 }, { "epoch": 1.62, "learning_rate": 2.1476343681960082e-06, "loss": 0.1606, "step": 43380 }, { "epoch": 1.62, "learning_rate": 2.1468918330397695e-06, "loss": 0.1622, "step": 43410 }, { "epoch": 1.62, "learning_rate": 2.146174780667022e-06, "loss": 0.1372, "step": 43440 }, { "epoch": 1.62, "learning_rate": 2.145433757897916e-06, "loss": 0.1529, "step": 43470 }, { "epoch": 1.62, "learning_rate": 2.144693502171406e-06, "loss": 0.1499, "step": 43500 }, { "epoch": 1.62, "learning_rate": 2.14395401216511e-06, "loss": 0.1556, "step": 43530 }, { "epoch": 1.62, "learning_rate": 2.143215286559837e-06, "loss": 0.15, "step": 43560 }, { "epoch": 1.63, "learning_rate": 2.1424773240395752e-06, "loss": 0.1706, "step": 43590 }, { "epoch": 1.63, "learning_rate": 2.141740123291482e-06, "loss": 0.1412, "step": 43620 }, { "epoch": 1.63, "learning_rate": 2.141003683005876e-06, "loss": 0.1537, "step": 43650 }, { "epoch": 1.63, "learning_rate": 2.1402680018762254e-06, "loss": 0.164, "step": 43680 }, { "epoch": 1.63, "learning_rate": 2.1395330785991377e-06, "loss": 0.1404, "step": 43710 }, { "epoch": 1.63, "learning_rate": 2.1387989118743533e-06, "loss": 0.1549, "step": 43740 }, { "epoch": 1.63, "learning_rate": 2.1380655004047324e-06, "loss": 0.1339, "step": 43770 }, { "epoch": 1.63, "learning_rate": 2.1373328428962457e-06, "loss": 0.1393, "step": 43800 }, { "epoch": 1.63, "learning_rate": 2.136600938057969e-06, "loss": 0.1585, "step": 43830 }, { "epoch": 1.64, "learning_rate": 2.1358697846020666e-06, "loss": 0.1746, "step": 43860 }, { "epoch": 1.64, "learning_rate": 2.135139381243788e-06, "loss": 0.1542, "step": 43890 }, { "epoch": 1.64, "learning_rate": 2.1344097267014553e-06, "loss": 0.1699, "step": 43920 }, { "epoch": 1.64, "learning_rate": 2.1336808196964546e-06, "loss": 0.1372, "step": 43950 }, { "epoch": 1.64, "learning_rate": 2.1329526589532265e-06, "loss": 0.1553, "step": 43980 }, { "epoch": 1.64, "learning_rate": 2.1322252431992567e-06, "loss": 0.1484, "step": 44010 }, { "epoch": 1.64, "learning_rate": 2.131498571165066e-06, "loss": 0.1472, "step": 44040 }, { "epoch": 1.64, "learning_rate": 2.130772641584203e-06, "loss": 0.161, "step": 44070 }, { "epoch": 1.64, "learning_rate": 2.1300474531932315e-06, "loss": 0.1696, "step": 44100 }, { "epoch": 1.65, "learning_rate": 2.1293230047317256e-06, "loss": 0.145, "step": 44130 }, { "epoch": 1.65, "learning_rate": 2.128599294942257e-06, "loss": 0.1463, "step": 44160 }, { "epoch": 1.65, "learning_rate": 2.127876322570386e-06, "loss": 0.1505, "step": 44190 }, { "epoch": 1.65, "learning_rate": 2.1271540863646543e-06, "loss": 0.1658, "step": 44220 }, { "epoch": 1.65, "learning_rate": 2.1264325850765765e-06, "loss": 0.1551, "step": 44250 }, { "epoch": 1.65, "learning_rate": 2.125711817460626e-06, "loss": 0.1468, "step": 44280 }, { "epoch": 1.65, "learning_rate": 2.124991782274232e-06, "loss": 0.1521, "step": 44310 }, { "epoch": 1.65, "learning_rate": 2.1242724782777676e-06, "loss": 0.1355, "step": 44340 }, { "epoch": 1.65, "learning_rate": 2.1235539042345404e-06, "loss": 0.1431, "step": 44370 }, { "epoch": 1.66, "learning_rate": 2.1228360589107853e-06, "loss": 0.1589, "step": 44400 }, { "epoch": 1.66, "learning_rate": 2.1221189410756536e-06, "loss": 0.171, "step": 44430 }, { "epoch": 1.66, "learning_rate": 2.1214025495012067e-06, "loss": 0.1383, "step": 44460 }, { "epoch": 1.66, "learning_rate": 2.1206868829624043e-06, "loss": 0.157, "step": 44490 }, { "epoch": 1.66, "learning_rate": 2.1199719402370975e-06, "loss": 0.1388, "step": 44520 }, { "epoch": 1.66, "learning_rate": 2.11925772010602e-06, "loss": 0.1638, "step": 44550 }, { "epoch": 1.66, "learning_rate": 2.118544221352778e-06, "loss": 0.1473, "step": 44580 }, { "epoch": 1.66, "learning_rate": 2.117831442763844e-06, "loss": 0.1358, "step": 44610 }, { "epoch": 1.66, "learning_rate": 2.1171193831285454e-06, "loss": 0.1704, "step": 44640 }, { "epoch": 1.67, "learning_rate": 2.1164080412390573e-06, "loss": 0.1302, "step": 44670 }, { "epoch": 1.67, "learning_rate": 2.115697415890394e-06, "loss": 0.1402, "step": 44700 }, { "epoch": 1.67, "learning_rate": 2.1149875058803997e-06, "loss": 0.1471, "step": 44730 }, { "epoch": 1.67, "learning_rate": 2.114278310009741e-06, "loss": 0.1479, "step": 44760 }, { "epoch": 1.67, "learning_rate": 2.113569827081896e-06, "loss": 0.1359, "step": 44790 }, { "epoch": 1.67, "learning_rate": 2.11286205590315e-06, "loss": 0.1965, "step": 44820 }, { "epoch": 1.67, "learning_rate": 2.1121549952825822e-06, "loss": 0.1311, "step": 44850 }, { "epoch": 1.67, "learning_rate": 2.1114486440320624e-06, "loss": 0.1513, "step": 44880 }, { "epoch": 1.67, "learning_rate": 2.110743000966237e-06, "loss": 0.1503, "step": 44910 }, { "epoch": 1.68, "learning_rate": 2.110038064902525e-06, "loss": 0.1677, "step": 44940 }, { "epoch": 1.68, "learning_rate": 2.109333834661109e-06, "loss": 0.1297, "step": 44970 }, { "epoch": 1.68, "learning_rate": 2.1086303090649252e-06, "loss": 0.1588, "step": 45000 }, { "epoch": 1.68, "learning_rate": 2.1079274869396553e-06, "loss": 0.144, "step": 45030 }, { "epoch": 1.68, "learning_rate": 2.1072253671137207e-06, "loss": 0.1601, "step": 45060 }, { "epoch": 1.68, "learning_rate": 2.106523948418271e-06, "loss": 0.1327, "step": 45090 }, { "epoch": 1.68, "learning_rate": 2.1058232296871785e-06, "loss": 0.1346, "step": 45120 }, { "epoch": 1.68, "learning_rate": 2.1051232097570295e-06, "loss": 0.1722, "step": 45150 }, { "epoch": 1.69, "learning_rate": 2.1044238874671146e-06, "loss": 0.1664, "step": 45180 }, { "epoch": 1.69, "learning_rate": 2.103725261659422e-06, "loss": 0.1612, "step": 45210 }, { "epoch": 1.69, "learning_rate": 2.103027331178629e-06, "loss": 0.1759, "step": 45240 }, { "epoch": 1.69, "learning_rate": 2.1023300948720953e-06, "loss": 0.1522, "step": 45270 }, { "epoch": 1.69, "learning_rate": 2.1016335515898515e-06, "loss": 0.1648, "step": 45300 }, { "epoch": 1.69, "learning_rate": 2.100937700184596e-06, "loss": 0.141, "step": 45330 }, { "epoch": 1.69, "learning_rate": 2.100242539511684e-06, "loss": 0.1659, "step": 45360 }, { "epoch": 1.69, "learning_rate": 2.099548068429119e-06, "loss": 0.131, "step": 45390 }, { "epoch": 1.69, "learning_rate": 2.098854285797546e-06, "loss": 0.1419, "step": 45420 }, { "epoch": 1.7, "learning_rate": 2.0981611904802458e-06, "loss": 0.1415, "step": 45450 }, { "epoch": 1.7, "learning_rate": 2.097468781343123e-06, "loss": 0.1619, "step": 45480 }, { "epoch": 1.7, "learning_rate": 2.096800103699349e-06, "loss": 0.1505, "step": 45510 }, { "epoch": 1.7, "learning_rate": 2.0961090407515926e-06, "loss": 0.1329, "step": 45540 }, { "epoch": 1.7, "learning_rate": 2.095418660634908e-06, "loss": 0.1501, "step": 45570 }, { "epoch": 1.7, "learning_rate": 2.0947289622255397e-06, "loss": 0.1388, "step": 45600 }, { "epoch": 1.7, "learning_rate": 2.0940399444023193e-06, "loss": 0.1288, "step": 45630 }, { "epoch": 1.7, "learning_rate": 2.0933516060466585e-06, "loss": 0.1495, "step": 45660 }, { "epoch": 1.7, "learning_rate": 2.092663946042543e-06, "loss": 0.1207, "step": 45690 }, { "epoch": 1.71, "learning_rate": 2.09197696327652e-06, "loss": 0.1552, "step": 45720 }, { "epoch": 1.71, "learning_rate": 2.0912906566376966e-06, "loss": 0.1263, "step": 45750 }, { "epoch": 1.71, "learning_rate": 2.090605025017727e-06, "loss": 0.124, "step": 45780 }, { "epoch": 1.71, "learning_rate": 2.0899200673108096e-06, "loss": 0.1456, "step": 45810 }, { "epoch": 1.71, "learning_rate": 2.0892357824136746e-06, "loss": 0.1448, "step": 45840 }, { "epoch": 1.71, "learning_rate": 2.0885521692255816e-06, "loss": 0.1373, "step": 45870 }, { "epoch": 1.71, "learning_rate": 2.0878692266483083e-06, "loss": 0.1591, "step": 45900 }, { "epoch": 1.71, "learning_rate": 2.0871869535861456e-06, "loss": 0.151, "step": 45930 }, { "epoch": 1.71, "learning_rate": 2.0865053489458874e-06, "loss": 0.1493, "step": 45960 }, { "epoch": 1.72, "learning_rate": 2.0858244116368275e-06, "loss": 0.1613, "step": 45990 }, { "epoch": 1.72, "learning_rate": 2.085144140570748e-06, "loss": 0.1259, "step": 46020 }, { "epoch": 1.72, "learning_rate": 2.0844645346619135e-06, "loss": 0.1247, "step": 46050 }, { "epoch": 1.72, "learning_rate": 2.083785592827065e-06, "loss": 0.1386, "step": 46080 }, { "epoch": 1.72, "learning_rate": 2.083107313985412e-06, "loss": 0.1597, "step": 46110 }, { "epoch": 1.72, "learning_rate": 2.0824296970586242e-06, "loss": 0.1377, "step": 46140 }, { "epoch": 1.72, "learning_rate": 2.0817527409708258e-06, "loss": 0.1499, "step": 46170 }, { "epoch": 1.72, "learning_rate": 2.0810764446485862e-06, "loss": 0.1699, "step": 46200 }, { "epoch": 1.72, "learning_rate": 2.0804008070209163e-06, "loss": 0.1369, "step": 46230 }, { "epoch": 1.73, "learning_rate": 2.0797258270192577e-06, "loss": 0.1405, "step": 46260 }, { "epoch": 1.73, "learning_rate": 2.079051503577478e-06, "loss": 0.1308, "step": 46290 }, { "epoch": 1.73, "learning_rate": 2.0783778356318633e-06, "loss": 0.1678, "step": 46320 }, { "epoch": 1.73, "learning_rate": 2.0777048221211104e-06, "loss": 0.1471, "step": 46350 }, { "epoch": 1.73, "learning_rate": 2.0770324619863196e-06, "loss": 0.1736, "step": 46380 }, { "epoch": 1.73, "learning_rate": 2.07636075417099e-06, "loss": 0.1684, "step": 46410 }, { "epoch": 1.73, "learning_rate": 2.075689697621009e-06, "loss": 0.144, "step": 46440 }, { "epoch": 1.73, "learning_rate": 2.075019291284648e-06, "loss": 0.1527, "step": 46470 }, { "epoch": 1.73, "learning_rate": 2.074349534112556e-06, "loss": 0.1447, "step": 46500 }, { "epoch": 1.74, "learning_rate": 2.0736804250577488e-06, "loss": 0.1627, "step": 46530 }, { "epoch": 1.74, "learning_rate": 2.073011963075606e-06, "loss": 0.1689, "step": 46560 }, { "epoch": 1.74, "learning_rate": 2.072344147123863e-06, "loss": 0.1459, "step": 46590 }, { "epoch": 1.74, "learning_rate": 2.0716769761626044e-06, "loss": 0.1646, "step": 46620 }, { "epoch": 1.74, "learning_rate": 2.0710104491542556e-06, "loss": 0.1403, "step": 46650 }, { "epoch": 1.74, "learning_rate": 2.070344565063579e-06, "loss": 0.1408, "step": 46680 }, { "epoch": 1.74, "learning_rate": 2.069679322857663e-06, "loss": 0.1511, "step": 46710 }, { "epoch": 1.74, "learning_rate": 2.06901472150592e-06, "loss": 0.1633, "step": 46740 }, { "epoch": 1.74, "learning_rate": 2.0683507599800766e-06, "loss": 0.132, "step": 46770 }, { "epoch": 1.75, "learning_rate": 2.067687437254168e-06, "loss": 0.137, "step": 46800 }, { "epoch": 1.75, "learning_rate": 2.067024752304531e-06, "loss": 0.1452, "step": 46830 }, { "epoch": 1.75, "learning_rate": 2.066362704109797e-06, "loss": 0.136, "step": 46860 }, { "epoch": 1.75, "learning_rate": 2.065701291650887e-06, "loss": 0.1621, "step": 46890 }, { "epoch": 1.75, "learning_rate": 2.0650405139110026e-06, "loss": 0.1482, "step": 46920 }, { "epoch": 1.75, "learning_rate": 2.064380369875622e-06, "loss": 0.1495, "step": 46950 }, { "epoch": 1.75, "learning_rate": 2.0637208585324905e-06, "loss": 0.1801, "step": 46980 }, { "epoch": 1.75, "learning_rate": 2.0630619788716174e-06, "loss": 0.1441, "step": 47010 }, { "epoch": 1.75, "learning_rate": 2.0624037298852674e-06, "loss": 0.124, "step": 47040 }, { "epoch": 1.76, "learning_rate": 2.0617461105679532e-06, "loss": 0.1459, "step": 47070 }, { "epoch": 1.76, "learning_rate": 2.0610891199164325e-06, "loss": 0.1501, "step": 47100 }, { "epoch": 1.76, "learning_rate": 2.060432756929697e-06, "loss": 0.1366, "step": 47130 }, { "epoch": 1.76, "learning_rate": 2.0597770206089703e-06, "loss": 0.1484, "step": 47160 }, { "epoch": 1.76, "learning_rate": 2.0591219099576987e-06, "loss": 0.1414, "step": 47190 }, { "epoch": 1.76, "learning_rate": 2.058467423981546e-06, "loss": 0.1673, "step": 47220 }, { "epoch": 1.76, "learning_rate": 2.0578135616883866e-06, "loss": 0.1479, "step": 47250 }, { "epoch": 1.76, "learning_rate": 2.0571603220883005e-06, "loss": 0.1706, "step": 47280 }, { "epoch": 1.76, "learning_rate": 2.0565077041935645e-06, "loss": 0.1624, "step": 47310 }, { "epoch": 1.77, "learning_rate": 2.055855707018649e-06, "loss": 0.1562, "step": 47340 }, { "epoch": 1.77, "learning_rate": 2.0552043295802093e-06, "loss": 0.1392, "step": 47370 }, { "epoch": 1.77, "learning_rate": 2.0545535708970804e-06, "loss": 0.1177, "step": 47400 }, { "epoch": 1.77, "learning_rate": 2.0539034299902704e-06, "loss": 0.1239, "step": 47430 }, { "epoch": 1.77, "learning_rate": 2.0532539058829546e-06, "loss": 0.1797, "step": 47460 }, { "epoch": 1.77, "learning_rate": 2.0526049976004704e-06, "loss": 0.1431, "step": 47490 }, { "epoch": 1.77, "learning_rate": 2.0519567041703083e-06, "loss": 0.1416, "step": 47520 }, { "epoch": 1.77, "learning_rate": 2.051309024622109e-06, "loss": 0.1593, "step": 47550 }, { "epoch": 1.77, "learning_rate": 2.050661957987655e-06, "loss": 0.1251, "step": 47580 }, { "epoch": 1.78, "learning_rate": 2.050015503300866e-06, "loss": 0.1457, "step": 47610 }, { "epoch": 1.78, "learning_rate": 2.0493696595977914e-06, "loss": 0.1571, "step": 47640 }, { "epoch": 1.78, "learning_rate": 2.0487244259166056e-06, "loss": 0.1269, "step": 47670 }, { "epoch": 1.78, "learning_rate": 2.0480798012976015e-06, "loss": 0.1328, "step": 47700 }, { "epoch": 1.78, "learning_rate": 2.0474357847831843e-06, "loss": 0.1706, "step": 47730 }, { "epoch": 1.78, "learning_rate": 2.0467923754178647e-06, "loss": 0.1635, "step": 47760 }, { "epoch": 1.78, "learning_rate": 2.0461495722482565e-06, "loss": 0.1581, "step": 47790 }, { "epoch": 1.78, "learning_rate": 2.045507374323066e-06, "loss": 0.1475, "step": 47820 }, { "epoch": 1.78, "learning_rate": 2.044865780693088e-06, "loss": 0.1588, "step": 47850 }, { "epoch": 1.79, "learning_rate": 2.044224790411201e-06, "loss": 0.1508, "step": 47880 }, { "epoch": 1.79, "learning_rate": 2.043605739099571e-06, "loss": 0.1639, "step": 47910 }, { "epoch": 1.79, "learning_rate": 2.0429659326473045e-06, "loss": 0.1608, "step": 47940 }, { "epoch": 1.79, "learning_rate": 2.0423267267455322e-06, "loss": 0.1413, "step": 47970 }, { "epoch": 1.79, "learning_rate": 2.041688120455337e-06, "loss": 0.1471, "step": 48000 }, { "epoch": 1.79, "learning_rate": 2.041050112839854e-06, "loss": 0.1402, "step": 48030 }, { "epoch": 1.79, "learning_rate": 2.0404127029642688e-06, "loss": 0.1442, "step": 48060 }, { "epoch": 1.79, "learning_rate": 2.0397758898958064e-06, "loss": 0.1605, "step": 48090 }, { "epoch": 1.79, "learning_rate": 2.0391396727037307e-06, "loss": 0.1374, "step": 48120 }, { "epoch": 1.8, "learning_rate": 2.038504050459336e-06, "loss": 0.1727, "step": 48150 }, { "epoch": 1.8, "learning_rate": 2.0378690222359403e-06, "loss": 0.1639, "step": 48180 }, { "epoch": 1.8, "learning_rate": 2.0372345871088826e-06, "loss": 0.1575, "step": 48210 }, { "epoch": 1.8, "learning_rate": 2.036600744155515e-06, "loss": 0.1342, "step": 48240 }, { "epoch": 1.8, "learning_rate": 2.035967492455198e-06, "loss": 0.1381, "step": 48270 }, { "epoch": 1.8, "learning_rate": 2.0353348310892955e-06, "loss": 0.158, "step": 48300 }, { "epoch": 1.8, "learning_rate": 2.034702759141167e-06, "loss": 0.1511, "step": 48330 }, { "epoch": 1.8, "learning_rate": 2.034071275696164e-06, "loss": 0.1147, "step": 48360 }, { "epoch": 1.8, "learning_rate": 2.033440379841623e-06, "loss": 0.1487, "step": 48390 }, { "epoch": 1.81, "learning_rate": 2.032810070666863e-06, "loss": 0.1717, "step": 48420 }, { "epoch": 1.81, "learning_rate": 2.0321803472631754e-06, "loss": 0.1458, "step": 48450 }, { "epoch": 1.81, "learning_rate": 2.0315512087238225e-06, "loss": 0.1381, "step": 48480 }, { "epoch": 1.81, "learning_rate": 2.030922654144029e-06, "loss": 0.1421, "step": 48510 }, { "epoch": 1.81, "learning_rate": 2.03029468262098e-06, "loss": 0.152, "step": 48540 }, { "epoch": 1.81, "learning_rate": 2.0296672932538105e-06, "loss": 0.1784, "step": 48570 }, { "epoch": 1.81, "learning_rate": 2.0290404851436057e-06, "loss": 0.147, "step": 48600 }, { "epoch": 1.81, "learning_rate": 2.028414257393391e-06, "loss": 0.1303, "step": 48630 }, { "epoch": 1.81, "learning_rate": 2.0277886091081305e-06, "loss": 0.1226, "step": 48660 }, { "epoch": 1.82, "learning_rate": 2.0271635393947167e-06, "loss": 0.1633, "step": 48690 }, { "epoch": 1.82, "learning_rate": 2.0265390473619705e-06, "loss": 0.1397, "step": 48720 }, { "epoch": 1.82, "learning_rate": 2.025915132120632e-06, "loss": 0.1613, "step": 48750 }, { "epoch": 1.82, "learning_rate": 2.0252917927833574e-06, "loss": 0.1628, "step": 48780 }, { "epoch": 1.82, "learning_rate": 2.024669028464713e-06, "loss": 0.1448, "step": 48810 }, { "epoch": 1.82, "learning_rate": 2.0240468382811682e-06, "loss": 0.1142, "step": 48840 }, { "epoch": 1.82, "learning_rate": 2.0234252213510938e-06, "loss": 0.1803, "step": 48870 }, { "epoch": 1.82, "learning_rate": 2.0228041767947535e-06, "loss": 0.1561, "step": 48900 }, { "epoch": 1.82, "learning_rate": 2.0221837037343006e-06, "loss": 0.1309, "step": 48930 }, { "epoch": 1.83, "learning_rate": 2.0215638012937715e-06, "loss": 0.1456, "step": 48960 }, { "epoch": 1.83, "learning_rate": 2.020944468599082e-06, "loss": 0.1509, "step": 48990 }, { "epoch": 1.83, "learning_rate": 2.02032570477802e-06, "loss": 0.1118, "step": 49020 }, { "epoch": 1.83, "learning_rate": 2.019707508960244e-06, "loss": 0.1519, "step": 49050 }, { "epoch": 1.83, "learning_rate": 2.0190898802772724e-06, "loss": 0.1298, "step": 49080 }, { "epoch": 1.83, "learning_rate": 2.0184728178624835e-06, "loss": 0.1334, "step": 49110 }, { "epoch": 1.83, "learning_rate": 2.017856320851108e-06, "loss": 0.144, "step": 49140 }, { "epoch": 1.83, "learning_rate": 2.0172403883802254e-06, "loss": 0.1516, "step": 49170 }, { "epoch": 1.83, "learning_rate": 2.0166250195887554e-06, "loss": 0.1373, "step": 49200 }, { "epoch": 1.84, "learning_rate": 2.0160102136174573e-06, "loss": 0.1762, "step": 49230 }, { "epoch": 1.84, "learning_rate": 2.015395969608922e-06, "loss": 0.1707, "step": 49260 }, { "epoch": 1.84, "learning_rate": 2.01478228670757e-06, "loss": 0.1251, "step": 49290 }, { "epoch": 1.84, "learning_rate": 2.014169164059641e-06, "loss": 0.1271, "step": 49320 }, { "epoch": 1.84, "learning_rate": 2.013556600813194e-06, "loss": 0.1403, "step": 49350 }, { "epoch": 1.84, "learning_rate": 2.0129445961181016e-06, "loss": 0.1334, "step": 49380 }, { "epoch": 1.84, "learning_rate": 2.0123331491260424e-06, "loss": 0.1512, "step": 49410 }, { "epoch": 1.84, "learning_rate": 2.0117222589904985e-06, "loss": 0.1416, "step": 49440 }, { "epoch": 1.85, "learning_rate": 2.011111924866749e-06, "loss": 0.1468, "step": 49470 }, { "epoch": 1.85, "learning_rate": 2.0105021459118675e-06, "loss": 0.1453, "step": 49500 }, { "epoch": 1.85, "learning_rate": 2.0098929212847133e-06, "loss": 0.1572, "step": 49530 }, { "epoch": 1.85, "learning_rate": 2.0092842501459305e-06, "loss": 0.1549, "step": 49560 }, { "epoch": 1.85, "learning_rate": 2.0086761316579416e-06, "loss": 0.1335, "step": 49590 }, { "epoch": 1.85, "learning_rate": 2.008068564984941e-06, "loss": 0.1544, "step": 49620 }, { "epoch": 1.85, "learning_rate": 2.0074615492928926e-06, "loss": 0.1223, "step": 49650 }, { "epoch": 1.85, "learning_rate": 2.0068550837495242e-06, "loss": 0.1296, "step": 49680 }, { "epoch": 1.85, "learning_rate": 2.0062491675243216e-06, "loss": 0.1567, "step": 49710 }, { "epoch": 1.86, "learning_rate": 2.0056437997885276e-06, "loss": 0.1718, "step": 49740 }, { "epoch": 1.86, "learning_rate": 2.00503897971513e-06, "loss": 0.1537, "step": 49770 }, { "epoch": 1.86, "learning_rate": 2.004434706478865e-06, "loss": 0.1418, "step": 49800 }, { "epoch": 1.86, "learning_rate": 2.0038309792562076e-06, "loss": 0.1969, "step": 49830 }, { "epoch": 1.86, "learning_rate": 2.003227797225367e-06, "loss": 0.1665, "step": 49860 }, { "epoch": 1.86, "learning_rate": 2.0026251595662847e-06, "loss": 0.1401, "step": 49890 }, { "epoch": 1.86, "learning_rate": 2.002023065460627e-06, "loss": 0.1375, "step": 49920 }, { "epoch": 1.86, "learning_rate": 2.001421514091782e-06, "loss": 0.1325, "step": 49950 }, { "epoch": 1.86, "learning_rate": 2.0008205046448528e-06, "loss": 0.1449, "step": 49980 }, { "epoch": 1.87, "learning_rate": 2.0002200363066566e-06, "loss": 0.1495, "step": 50010 }, { "epoch": 1.87, "learning_rate": 1.9996400971708495e-06, "loss": 0.1364, "step": 50040 }, { "epoch": 1.87, "learning_rate": 1.9990406906475044e-06, "loss": 0.1351, "step": 50070 }, { "epoch": 1.87, "learning_rate": 1.9984418228304744e-06, "loss": 0.143, "step": 50100 }, { "epoch": 1.87, "learning_rate": 1.9978434929133213e-06, "loss": 0.1434, "step": 50130 }, { "epoch": 1.87, "learning_rate": 1.997245700091295e-06, "loss": 0.1609, "step": 50160 }, { "epoch": 1.87, "learning_rate": 1.996648443561332e-06, "loss": 0.137, "step": 50190 }, { "epoch": 1.87, "learning_rate": 1.996051722522046e-06, "loss": 0.1427, "step": 50220 }, { "epoch": 1.87, "learning_rate": 1.9954555361737284e-06, "loss": 0.1237, "step": 50250 }, { "epoch": 1.88, "learning_rate": 1.9948598837183403e-06, "loss": 0.1591, "step": 50280 }, { "epoch": 1.88, "learning_rate": 1.99426476435951e-06, "loss": 0.1496, "step": 50310 }, { "epoch": 1.88, "learning_rate": 1.9936701773025286e-06, "loss": 0.1364, "step": 50340 }, { "epoch": 1.88, "learning_rate": 1.9930761217543435e-06, "loss": 0.1425, "step": 50370 }, { "epoch": 1.88, "learning_rate": 1.9924825969235547e-06, "loss": 0.165, "step": 50400 }, { "epoch": 1.88, "learning_rate": 1.9918896020204136e-06, "loss": 0.1356, "step": 50430 }, { "epoch": 1.88, "learning_rate": 1.9912971362568133e-06, "loss": 0.1314, "step": 50460 }, { "epoch": 1.88, "learning_rate": 1.9907051988462883e-06, "loss": 0.1481, "step": 50490 }, { "epoch": 1.88, "learning_rate": 1.990113789004008e-06, "loss": 0.1468, "step": 50520 }, { "epoch": 1.89, "learning_rate": 1.989522905946773e-06, "loss": 0.1308, "step": 50550 }, { "epoch": 1.89, "learning_rate": 1.9889325488930104e-06, "loss": 0.1535, "step": 50580 }, { "epoch": 1.89, "learning_rate": 1.9883427170627706e-06, "loss": 0.1222, "step": 50610 }, { "epoch": 1.89, "learning_rate": 1.9877534096777218e-06, "loss": 0.1415, "step": 50640 }, { "epoch": 1.89, "learning_rate": 1.9871646259611458e-06, "loss": 0.1625, "step": 50670 }, { "epoch": 1.89, "learning_rate": 1.9865763651379345e-06, "loss": 0.1266, "step": 50700 }, { "epoch": 1.89, "learning_rate": 1.9859886264345835e-06, "loss": 0.146, "step": 50730 }, { "epoch": 1.89, "learning_rate": 1.985420974599683e-06, "loss": 0.1416, "step": 50760 }, { "epoch": 1.89, "learning_rate": 1.9848342604817485e-06, "loss": 0.1516, "step": 50790 }, { "epoch": 1.9, "learning_rate": 1.9842480661983527e-06, "loss": 0.1389, "step": 50820 }, { "epoch": 1.9, "learning_rate": 1.983662390982317e-06, "loss": 0.1129, "step": 50850 }, { "epoch": 1.9, "learning_rate": 1.983077234068048e-06, "loss": 0.1344, "step": 50880 }, { "epoch": 1.9, "learning_rate": 1.9824925946915318e-06, "loss": 0.1372, "step": 50910 }, { "epoch": 1.9, "learning_rate": 1.98190847209033e-06, "loss": 0.142, "step": 50940 }, { "epoch": 1.9, "learning_rate": 1.981324865503577e-06, "loss": 0.1483, "step": 50970 }, { "epoch": 1.9, "learning_rate": 1.980741774171974e-06, "loss": 0.1239, "step": 51000 }, { "epoch": 1.9, "learning_rate": 1.9801591973377847e-06, "loss": 0.1557, "step": 51030 }, { "epoch": 1.9, "learning_rate": 1.979577134244833e-06, "loss": 0.1366, "step": 51060 }, { "epoch": 1.91, "learning_rate": 1.978995584138498e-06, "loss": 0.1379, "step": 51090 }, { "epoch": 1.91, "learning_rate": 1.978414546265708e-06, "loss": 0.1274, "step": 51120 }, { "epoch": 1.91, "learning_rate": 1.9778340198749395e-06, "loss": 0.148, "step": 51150 }, { "epoch": 1.91, "learning_rate": 1.9772540042162125e-06, "loss": 0.1374, "step": 51180 }, { "epoch": 1.91, "learning_rate": 1.976674498541084e-06, "loss": 0.1523, "step": 51210 }, { "epoch": 1.91, "learning_rate": 1.9760955021026456e-06, "loss": 0.1177, "step": 51240 }, { "epoch": 1.91, "learning_rate": 1.9755170141555212e-06, "loss": 0.1533, "step": 51270 }, { "epoch": 1.91, "learning_rate": 1.9749390339558593e-06, "loss": 0.1248, "step": 51300 }, { "epoch": 1.91, "learning_rate": 1.9743615607613323e-06, "loss": 0.158, "step": 51330 }, { "epoch": 1.92, "learning_rate": 1.9737845938311307e-06, "loss": 0.1513, "step": 51360 }, { "epoch": 1.92, "learning_rate": 1.973208132425959e-06, "loss": 0.1404, "step": 51390 }, { "epoch": 1.92, "learning_rate": 1.972632175808033e-06, "loss": 0.171, "step": 51420 }, { "epoch": 1.92, "learning_rate": 1.972056723241075e-06, "loss": 0.1785, "step": 51450 }, { "epoch": 1.92, "learning_rate": 1.9714817739903085e-06, "loss": 0.1428, "step": 51480 }, { "epoch": 1.92, "learning_rate": 1.9709073273224575e-06, "loss": 0.1473, "step": 51510 }, { "epoch": 1.92, "learning_rate": 1.9703333825057403e-06, "loss": 0.1568, "step": 51540 }, { "epoch": 1.92, "learning_rate": 1.9697599388098655e-06, "loss": 0.1389, "step": 51570 }, { "epoch": 1.92, "learning_rate": 1.9691869955060294e-06, "loss": 0.1472, "step": 51600 }, { "epoch": 1.93, "learning_rate": 1.9686145518669107e-06, "loss": 0.1402, "step": 51630 }, { "epoch": 1.93, "learning_rate": 1.968042607166667e-06, "loss": 0.1313, "step": 51660 }, { "epoch": 1.93, "learning_rate": 1.967471160680932e-06, "loss": 0.1738, "step": 51690 }, { "epoch": 1.93, "learning_rate": 1.966900211686811e-06, "loss": 0.1363, "step": 51720 }, { "epoch": 1.93, "learning_rate": 1.9663297594628757e-06, "loss": 0.14, "step": 51750 }, { "epoch": 1.93, "learning_rate": 1.965759803289163e-06, "loss": 0.1745, "step": 51780 }, { "epoch": 1.93, "learning_rate": 1.9651903424471683e-06, "loss": 0.1279, "step": 51810 }, { "epoch": 1.93, "learning_rate": 1.9646213762198447e-06, "loss": 0.1601, "step": 51840 }, { "epoch": 1.93, "learning_rate": 1.9640529038915965e-06, "loss": 0.1329, "step": 51870 }, { "epoch": 1.94, "learning_rate": 1.963484924748277e-06, "loss": 0.1545, "step": 51900 }, { "epoch": 1.94, "learning_rate": 1.9629174380771844e-06, "loss": 0.1474, "step": 51930 }, { "epoch": 1.94, "learning_rate": 1.9623504431670577e-06, "loss": 0.1261, "step": 51960 }, { "epoch": 1.94, "learning_rate": 1.961783939308074e-06, "loss": 0.1548, "step": 51990 }, { "epoch": 1.94, "learning_rate": 1.9612179257918425e-06, "loss": 0.1457, "step": 52020 }, { "epoch": 1.94, "learning_rate": 1.960652401911404e-06, "loss": 0.1263, "step": 52050 }, { "epoch": 1.94, "learning_rate": 1.9600873669612238e-06, "loss": 0.1153, "step": 52080 }, { "epoch": 1.94, "learning_rate": 1.959522820237191e-06, "loss": 0.1425, "step": 52110 }, { "epoch": 1.94, "learning_rate": 1.958958761036613e-06, "loss": 0.1344, "step": 52140 }, { "epoch": 1.95, "learning_rate": 1.958395188658212e-06, "loss": 0.1372, "step": 52170 }, { "epoch": 1.95, "learning_rate": 1.957832102402122e-06, "loss": 0.1396, "step": 52200 }, { "epoch": 1.95, "learning_rate": 1.9572695015698843e-06, "loss": 0.127, "step": 52230 }, { "epoch": 1.95, "learning_rate": 1.9567073854644454e-06, "loss": 0.132, "step": 52260 }, { "epoch": 1.95, "learning_rate": 1.9561457533901515e-06, "loss": 0.137, "step": 52290 }, { "epoch": 1.95, "learning_rate": 1.9555846046527456e-06, "loss": 0.1722, "step": 52320 }, { "epoch": 1.95, "learning_rate": 1.955023938559364e-06, "loss": 0.1586, "step": 52350 }, { "epoch": 1.95, "learning_rate": 1.954463754418534e-06, "loss": 0.153, "step": 52380 }, { "epoch": 1.95, "learning_rate": 1.953904051540167e-06, "loss": 0.1555, "step": 52410 }, { "epoch": 1.96, "learning_rate": 1.9533448292355585e-06, "loss": 0.1491, "step": 52440 }, { "epoch": 1.96, "learning_rate": 1.9527860868173816e-06, "loss": 0.1267, "step": 52470 }, { "epoch": 1.96, "learning_rate": 1.9522278235996874e-06, "loss": 0.1533, "step": 52500 }, { "epoch": 1.96, "learning_rate": 1.951670038897896e-06, "loss": 0.1299, "step": 52530 }, { "epoch": 1.96, "learning_rate": 1.9511127320287964e-06, "loss": 0.1336, "step": 52560 }, { "epoch": 1.96, "learning_rate": 1.9505559023105446e-06, "loss": 0.1581, "step": 52590 }, { "epoch": 1.96, "learning_rate": 1.9499995490626562e-06, "loss": 0.1434, "step": 52620 }, { "epoch": 1.96, "learning_rate": 1.9494436716060055e-06, "loss": 0.1276, "step": 52650 }, { "epoch": 1.96, "learning_rate": 1.9488882692628197e-06, "loss": 0.1335, "step": 52680 }, { "epoch": 1.97, "learning_rate": 1.9483333413566786e-06, "loss": 0.1384, "step": 52710 }, { "epoch": 1.97, "learning_rate": 1.9477788872125087e-06, "loss": 0.1393, "step": 52740 }, { "epoch": 1.97, "learning_rate": 1.94722490615658e-06, "loss": 0.1604, "step": 52770 }, { "epoch": 1.97, "learning_rate": 1.946671397516505e-06, "loss": 0.1486, "step": 52800 }, { "epoch": 1.97, "learning_rate": 1.946118360621231e-06, "loss": 0.1258, "step": 52830 }, { "epoch": 1.97, "learning_rate": 1.9455657948010406e-06, "loss": 0.1385, "step": 52860 }, { "epoch": 1.97, "learning_rate": 1.945013699387546e-06, "loss": 0.1514, "step": 52890 }, { "epoch": 1.97, "learning_rate": 1.9444620737136872e-06, "loss": 0.14, "step": 52920 }, { "epoch": 1.97, "learning_rate": 1.9439109171137267e-06, "loss": 0.1281, "step": 52950 }, { "epoch": 1.98, "learning_rate": 1.943360228923247e-06, "loss": 0.1233, "step": 52980 }, { "epoch": 1.98, "learning_rate": 1.942810008479148e-06, "loss": 0.143, "step": 53010 }, { "epoch": 1.98, "learning_rate": 1.942260255119644e-06, "loss": 0.1697, "step": 53040 }, { "epoch": 1.98, "learning_rate": 1.941710968184258e-06, "loss": 0.1515, "step": 53070 }, { "epoch": 1.98, "learning_rate": 1.9411621470138204e-06, "loss": 0.1349, "step": 53100 }, { "epoch": 1.98, "learning_rate": 1.940613790950464e-06, "loss": 0.139, "step": 53130 }, { "epoch": 1.98, "learning_rate": 1.940065899337623e-06, "loss": 0.1279, "step": 53160 }, { "epoch": 1.98, "learning_rate": 1.939518471520029e-06, "loss": 0.1355, "step": 53190 }, { "epoch": 1.98, "learning_rate": 1.9389715068437056e-06, "loss": 0.1538, "step": 53220 }, { "epoch": 1.99, "learning_rate": 1.938425004655967e-06, "loss": 0.1466, "step": 53250 }, { "epoch": 1.99, "learning_rate": 1.9378789643054147e-06, "loss": 0.1248, "step": 53280 }, { "epoch": 1.99, "learning_rate": 1.937333385141934e-06, "loss": 0.1291, "step": 53310 }, { "epoch": 1.99, "learning_rate": 1.9367882665166908e-06, "loss": 0.1363, "step": 53340 }, { "epoch": 1.99, "learning_rate": 1.9362436077821283e-06, "loss": 0.1325, "step": 53370 }, { "epoch": 1.99, "learning_rate": 1.935699408291963e-06, "loss": 0.1311, "step": 53400 }, { "epoch": 1.99, "learning_rate": 1.9351556674011827e-06, "loss": 0.1375, "step": 53430 }, { "epoch": 1.99, "learning_rate": 1.9346123844660423e-06, "loss": 0.159, "step": 53460 }, { "epoch": 2.0, "learning_rate": 1.934069558844063e-06, "loss": 0.1449, "step": 53490 }, { "epoch": 2.0, "learning_rate": 1.933527189894024e-06, "loss": 0.1417, "step": 53520 }, { "epoch": 2.0, "learning_rate": 1.932985276975965e-06, "loss": 0.111, "step": 53550 }, { "epoch": 2.0, "learning_rate": 1.93244381945118e-06, "loss": 0.163, "step": 53580 }, { "epoch": 2.0, "learning_rate": 1.9319028166822136e-06, "loss": 0.1297, "step": 53610 }, { "epoch": 2.0, "learning_rate": 1.9313622680328613e-06, "loss": 0.1178, "step": 53640 }, { "epoch": 2.0, "learning_rate": 1.9308221728681602e-06, "loss": 0.1362, "step": 53670 }, { "epoch": 2.0, "learning_rate": 1.9302825305543936e-06, "loss": 0.1115, "step": 53700 }, { "epoch": 2.0, "learning_rate": 1.929743340459082e-06, "loss": 0.1427, "step": 53730 }, { "epoch": 2.01, "learning_rate": 1.929204601950982e-06, "loss": 0.143, "step": 53760 }, { "epoch": 2.01, "learning_rate": 1.9286663144000823e-06, "loss": 0.1393, "step": 53790 }, { "epoch": 2.01, "learning_rate": 1.928128477177604e-06, "loss": 0.1221, "step": 53820 }, { "epoch": 2.01, "learning_rate": 1.927591089655992e-06, "loss": 0.1452, "step": 53850 }, { "epoch": 2.01, "learning_rate": 1.9270541512089164e-06, "loss": 0.1481, "step": 53880 }, { "epoch": 2.01, "learning_rate": 1.9265176612112675e-06, "loss": 0.1434, "step": 53910 }, { "epoch": 2.01, "learning_rate": 1.9259816190391543e-06, "loss": 0.1411, "step": 53940 }, { "epoch": 2.01, "learning_rate": 1.925446024069898e-06, "loss": 0.1442, "step": 53970 }, { "epoch": 2.01, "learning_rate": 1.9249108756820324e-06, "loss": 0.129, "step": 54000 }, { "epoch": 2.02, "learning_rate": 1.9243761732553013e-06, "loss": 0.1403, "step": 54030 }, { "epoch": 2.02, "learning_rate": 1.9238419161706506e-06, "loss": 0.1284, "step": 54060 }, { "epoch": 2.02, "learning_rate": 1.923308103810231e-06, "loss": 0.1248, "step": 54090 }, { "epoch": 2.02, "learning_rate": 1.922774735557393e-06, "loss": 0.1347, "step": 54120 }, { "epoch": 2.02, "learning_rate": 1.9222418107966813e-06, "loss": 0.1383, "step": 54150 }, { "epoch": 2.02, "learning_rate": 1.921709328913835e-06, "loss": 0.1221, "step": 54180 }, { "epoch": 2.02, "learning_rate": 1.921177289295785e-06, "loss": 0.1909, "step": 54210 }, { "epoch": 2.02, "learning_rate": 1.9206456913306475e-06, "loss": 0.145, "step": 54240 }, { "epoch": 2.02, "learning_rate": 1.9201145344077244e-06, "loss": 0.1358, "step": 54270 }, { "epoch": 2.03, "learning_rate": 1.9195838179174996e-06, "loss": 0.1371, "step": 54300 }, { "epoch": 2.03, "learning_rate": 1.9190535412516335e-06, "loss": 0.1221, "step": 54330 }, { "epoch": 2.03, "learning_rate": 1.9185237038029657e-06, "loss": 0.1306, "step": 54360 }, { "epoch": 2.03, "learning_rate": 1.9179943049655055e-06, "loss": 0.1412, "step": 54390 }, { "epoch": 2.03, "learning_rate": 1.917465344134433e-06, "loss": 0.1251, "step": 54420 }, { "epoch": 2.03, "learning_rate": 1.916936820706096e-06, "loss": 0.1566, "step": 54450 }, { "epoch": 2.03, "learning_rate": 1.916408734078005e-06, "loss": 0.161, "step": 54480 }, { "epoch": 2.03, "learning_rate": 1.915881083648834e-06, "loss": 0.1591, "step": 54510 }, { "epoch": 2.03, "learning_rate": 1.9153538688184124e-06, "loss": 0.1193, "step": 54540 }, { "epoch": 2.04, "learning_rate": 1.9148270889877275e-06, "loss": 0.157, "step": 54570 }, { "epoch": 2.04, "learning_rate": 1.914300743558917e-06, "loss": 0.1461, "step": 54600 }, { "epoch": 2.04, "learning_rate": 1.913774831935271e-06, "loss": 0.1418, "step": 54630 }, { "epoch": 2.04, "learning_rate": 1.9132493535212246e-06, "loss": 0.1584, "step": 54660 }, { "epoch": 2.04, "learning_rate": 1.9127243077223566e-06, "loss": 0.156, "step": 54690 }, { "epoch": 2.04, "learning_rate": 1.9121996939453884e-06, "loss": 0.1312, "step": 54720 }, { "epoch": 2.04, "learning_rate": 1.9116755115981794e-06, "loss": 0.1259, "step": 54750 }, { "epoch": 2.04, "learning_rate": 1.9111692115382816e-06, "loss": 0.1549, "step": 54780 }, { "epoch": 2.04, "learning_rate": 1.910645875946571e-06, "loss": 0.14, "step": 54810 }, { "epoch": 2.05, "learning_rate": 1.910122970034606e-06, "loss": 0.1441, "step": 54840 }, { "epoch": 2.05, "learning_rate": 1.9096004932147346e-06, "loss": 0.1485, "step": 54870 }, { "epoch": 2.05, "learning_rate": 1.909078444900429e-06, "loss": 0.1247, "step": 54900 }, { "epoch": 2.05, "learning_rate": 1.9085568245062825e-06, "loss": 0.1509, "step": 54930 }, { "epoch": 2.05, "learning_rate": 1.908035631448009e-06, "loss": 0.1342, "step": 54960 }, { "epoch": 2.05, "learning_rate": 1.907514865142436e-06, "loss": 0.1619, "step": 54990 }, { "epoch": 2.05, "learning_rate": 1.9069945250075084e-06, "loss": 0.1242, "step": 55020 }, { "epoch": 2.05, "learning_rate": 1.9064746104622783e-06, "loss": 0.1585, "step": 55050 }, { "epoch": 2.05, "learning_rate": 1.9059551209269078e-06, "loss": 0.1655, "step": 55080 }, { "epoch": 2.06, "learning_rate": 1.9054360558226643e-06, "loss": 0.1182, "step": 55110 }, { "epoch": 2.06, "learning_rate": 1.9049174145719173e-06, "loss": 0.129, "step": 55140 }, { "epoch": 2.06, "learning_rate": 1.9043991965981363e-06, "loss": 0.1676, "step": 55170 }, { "epoch": 2.06, "learning_rate": 1.9038814013258886e-06, "loss": 0.1417, "step": 55200 }, { "epoch": 2.06, "learning_rate": 1.9033640281808363e-06, "loss": 0.1112, "step": 55230 }, { "epoch": 2.06, "learning_rate": 1.902847076589732e-06, "loss": 0.1362, "step": 55260 }, { "epoch": 2.06, "learning_rate": 1.9023305459804192e-06, "loss": 0.1654, "step": 55290 }, { "epoch": 2.06, "learning_rate": 1.901814435781827e-06, "loss": 0.1577, "step": 55320 }, { "epoch": 2.06, "learning_rate": 1.901298745423969e-06, "loss": 0.1288, "step": 55350 }, { "epoch": 2.07, "learning_rate": 1.9007834743379393e-06, "loss": 0.1274, "step": 55380 }, { "epoch": 2.07, "learning_rate": 1.9002686219559125e-06, "loss": 0.1204, "step": 55410 }, { "epoch": 2.07, "learning_rate": 1.8997541877111366e-06, "loss": 0.1297, "step": 55440 }, { "epoch": 2.07, "learning_rate": 1.899240171037935e-06, "loss": 0.1634, "step": 55470 }, { "epoch": 2.07, "learning_rate": 1.8987265713717016e-06, "loss": 0.1315, "step": 55500 }, { "epoch": 2.07, "learning_rate": 1.8982133881488975e-06, "loss": 0.1278, "step": 55530 }, { "epoch": 2.07, "learning_rate": 1.8977006208070509e-06, "loss": 0.1762, "step": 55560 }, { "epoch": 2.07, "learning_rate": 1.8971882687847517e-06, "loss": 0.1379, "step": 55590 }, { "epoch": 2.07, "learning_rate": 1.8966763315216514e-06, "loss": 0.1479, "step": 55620 }, { "epoch": 2.08, "learning_rate": 1.8961648084584578e-06, "loss": 0.1305, "step": 55650 }, { "epoch": 2.08, "learning_rate": 1.8956536990369353e-06, "loss": 0.1345, "step": 55680 }, { "epoch": 2.08, "learning_rate": 1.8951430026999016e-06, "loss": 0.1128, "step": 55710 }, { "epoch": 2.08, "learning_rate": 1.894632718891222e-06, "loss": 0.1565, "step": 55740 }, { "epoch": 2.08, "learning_rate": 1.8941228470558122e-06, "loss": 0.1482, "step": 55770 }, { "epoch": 2.08, "learning_rate": 1.8936133866396317e-06, "loss": 0.1325, "step": 55800 }, { "epoch": 2.08, "learning_rate": 1.8931043370896827e-06, "loss": 0.1463, "step": 55830 }, { "epoch": 2.08, "learning_rate": 1.8925956978540075e-06, "loss": 0.1328, "step": 55860 }, { "epoch": 2.08, "learning_rate": 1.8920874683816859e-06, "loss": 0.1245, "step": 55890 }, { "epoch": 2.09, "learning_rate": 1.891579648122833e-06, "loss": 0.1185, "step": 55920 }, { "epoch": 2.09, "learning_rate": 1.8910722365285963e-06, "loss": 0.1188, "step": 55950 }, { "epoch": 2.09, "learning_rate": 1.890565233051153e-06, "loss": 0.1299, "step": 55980 }, { "epoch": 2.09, "learning_rate": 1.8900586371437086e-06, "loss": 0.1448, "step": 56010 }, { "epoch": 2.09, "learning_rate": 1.8895524482604927e-06, "loss": 0.1581, "step": 56040 }, { "epoch": 2.09, "learning_rate": 1.8890466658567582e-06, "loss": 0.1294, "step": 56070 }, { "epoch": 2.09, "learning_rate": 1.8885412893887783e-06, "loss": 0.1452, "step": 56100 }, { "epoch": 2.09, "learning_rate": 1.8880363183138433e-06, "loss": 0.1344, "step": 56130 }, { "epoch": 2.09, "learning_rate": 1.8875317520902595e-06, "loss": 0.1482, "step": 56160 }, { "epoch": 2.1, "learning_rate": 1.887027590177345e-06, "loss": 0.1595, "step": 56190 }, { "epoch": 2.1, "learning_rate": 1.886523832035429e-06, "loss": 0.1496, "step": 56220 }, { "epoch": 2.1, "learning_rate": 1.8860204771258493e-06, "loss": 0.1285, "step": 56250 }, { "epoch": 2.1, "learning_rate": 1.885517524910947e-06, "loss": 0.1363, "step": 56280 }, { "epoch": 2.1, "learning_rate": 1.8850149748540687e-06, "loss": 0.1447, "step": 56310 }, { "epoch": 2.1, "learning_rate": 1.8845128264195603e-06, "loss": 0.1389, "step": 56340 }, { "epoch": 2.1, "learning_rate": 1.884011079072767e-06, "loss": 0.1175, "step": 56370 }, { "epoch": 2.1, "learning_rate": 1.883509732280029e-06, "loss": 0.1539, "step": 56400 }, { "epoch": 2.1, "learning_rate": 1.8830087855086806e-06, "loss": 0.1483, "step": 56430 }, { "epoch": 2.11, "learning_rate": 1.8825082382270468e-06, "loss": 0.1369, "step": 56460 }, { "epoch": 2.11, "learning_rate": 1.882008089904442e-06, "loss": 0.1516, "step": 56490 }, { "epoch": 2.11, "learning_rate": 1.8815083400111672e-06, "loss": 0.1518, "step": 56520 }, { "epoch": 2.11, "learning_rate": 1.8810089880185063e-06, "loss": 0.16, "step": 56550 }, { "epoch": 2.11, "learning_rate": 1.8805100333987256e-06, "loss": 0.1144, "step": 56580 }, { "epoch": 2.11, "learning_rate": 1.8800114756250712e-06, "loss": 0.1198, "step": 56610 }, { "epoch": 2.11, "learning_rate": 1.8795133141717656e-06, "loss": 0.1455, "step": 56640 }, { "epoch": 2.11, "learning_rate": 1.8790155485140063e-06, "loss": 0.1119, "step": 56670 }, { "epoch": 2.11, "learning_rate": 1.8785181781279636e-06, "loss": 0.1224, "step": 56700 }, { "epoch": 2.12, "learning_rate": 1.8780212024907767e-06, "loss": 0.1444, "step": 56730 }, { "epoch": 2.12, "learning_rate": 1.877524621080554e-06, "loss": 0.1615, "step": 56760 }, { "epoch": 2.12, "learning_rate": 1.8770284333763686e-06, "loss": 0.1363, "step": 56790 }, { "epoch": 2.12, "learning_rate": 1.8765326388582567e-06, "loss": 0.1387, "step": 56820 }, { "epoch": 2.12, "learning_rate": 1.8760372370072152e-06, "loss": 0.1401, "step": 56850 }, { "epoch": 2.12, "learning_rate": 1.8755422273052006e-06, "loss": 0.128, "step": 56880 }, { "epoch": 2.12, "learning_rate": 1.875047609235125e-06, "loss": 0.1281, "step": 56910 }, { "epoch": 2.12, "learning_rate": 1.8745533822808548e-06, "loss": 0.1453, "step": 56940 }, { "epoch": 2.12, "learning_rate": 1.8740595459272077e-06, "loss": 0.1508, "step": 56970 }, { "epoch": 2.13, "learning_rate": 1.8735660996599512e-06, "loss": 0.1184, "step": 57000 }, { "epoch": 2.13, "learning_rate": 1.8730730429658007e-06, "loss": 0.1293, "step": 57030 }, { "epoch": 2.13, "learning_rate": 1.8725803753324157e-06, "loss": 0.1484, "step": 57060 }, { "epoch": 2.13, "learning_rate": 1.8720880962483992e-06, "loss": 0.1366, "step": 57090 }, { "epoch": 2.13, "learning_rate": 1.8715962052032946e-06, "loss": 0.1456, "step": 57120 }, { "epoch": 2.13, "learning_rate": 1.8711047016875836e-06, "loss": 0.14, "step": 57150 }, { "epoch": 2.13, "learning_rate": 1.8706135851926837e-06, "loss": 0.1289, "step": 57180 }, { "epoch": 2.13, "learning_rate": 1.870122855210947e-06, "loss": 0.1278, "step": 57210 }, { "epoch": 2.13, "learning_rate": 1.8696325112356574e-06, "loss": 0.1643, "step": 57240 }, { "epoch": 2.14, "learning_rate": 1.869142552761028e-06, "loss": 0.1741, "step": 57270 }, { "epoch": 2.14, "learning_rate": 1.8686529792821986e-06, "loss": 0.1421, "step": 57300 }, { "epoch": 2.14, "learning_rate": 1.868163790295235e-06, "loss": 0.1173, "step": 57330 }, { "epoch": 2.14, "learning_rate": 1.8676749852971257e-06, "loss": 0.1389, "step": 57360 }, { "epoch": 2.14, "learning_rate": 1.8671865637857806e-06, "loss": 0.1341, "step": 57390 }, { "epoch": 2.14, "learning_rate": 1.866698525260027e-06, "loss": 0.1396, "step": 57420 }, { "epoch": 2.14, "learning_rate": 1.8662108692196096e-06, "loss": 0.1264, "step": 57450 }, { "epoch": 2.14, "learning_rate": 1.8657235951651874e-06, "loss": 0.1288, "step": 57480 }, { "epoch": 2.14, "learning_rate": 1.8652367025983317e-06, "loss": 0.1226, "step": 57510 }, { "epoch": 2.15, "learning_rate": 1.8647501910215233e-06, "loss": 0.1149, "step": 57540 }, { "epoch": 2.15, "learning_rate": 1.8642640599381507e-06, "loss": 0.1227, "step": 57570 }, { "epoch": 2.15, "learning_rate": 1.8637783088525087e-06, "loss": 0.1211, "step": 57600 }, { "epoch": 2.15, "learning_rate": 1.8632929372697966e-06, "loss": 0.1226, "step": 57630 }, { "epoch": 2.15, "learning_rate": 1.8628079446961137e-06, "loss": 0.1482, "step": 57660 }, { "epoch": 2.15, "learning_rate": 1.8623233306384588e-06, "loss": 0.1585, "step": 57690 }, { "epoch": 2.15, "learning_rate": 1.8618390946047293e-06, "loss": 0.1692, "step": 57720 }, { "epoch": 2.15, "learning_rate": 1.8613552361037172e-06, "loss": 0.1351, "step": 57750 }, { "epoch": 2.16, "learning_rate": 1.860871754645107e-06, "loss": 0.112, "step": 57780 }, { "epoch": 2.16, "learning_rate": 1.8603886497394748e-06, "loss": 0.1435, "step": 57810 }, { "epoch": 2.16, "learning_rate": 1.859905920898286e-06, "loss": 0.1509, "step": 57840 }, { "epoch": 2.16, "learning_rate": 1.8594235676338921e-06, "loss": 0.1242, "step": 57870 }, { "epoch": 2.16, "learning_rate": 1.8589415894595303e-06, "loss": 0.134, "step": 57900 }, { "epoch": 2.16, "learning_rate": 1.8584599858893192e-06, "loss": 0.1328, "step": 57930 }, { "epoch": 2.16, "learning_rate": 1.85797875643826e-06, "loss": 0.1295, "step": 57960 }, { "epoch": 2.16, "learning_rate": 1.8575139231348627e-06, "loss": 0.1351, "step": 57990 }, { "epoch": 2.16, "learning_rate": 1.8570334280400031e-06, "loss": 0.1316, "step": 58020 }, { "epoch": 2.17, "learning_rate": 1.856553305630624e-06, "loss": 0.1225, "step": 58050 }, { "epoch": 2.17, "learning_rate": 1.856073555425199e-06, "loss": 0.1474, "step": 58080 }, { "epoch": 2.17, "learning_rate": 1.8555941769430718e-06, "loss": 0.1281, "step": 58110 }, { "epoch": 2.17, "learning_rate": 1.8551151697044554e-06, "loss": 0.1203, "step": 58140 }, { "epoch": 2.17, "learning_rate": 1.8546365332304288e-06, "loss": 0.1301, "step": 58170 }, { "epoch": 2.17, "learning_rate": 1.8541582670429356e-06, "loss": 0.141, "step": 58200 }, { "epoch": 2.17, "learning_rate": 1.8536803706647816e-06, "loss": 0.1162, "step": 58230 }, { "epoch": 2.17, "learning_rate": 1.853202843619634e-06, "loss": 0.1177, "step": 58260 }, { "epoch": 2.17, "learning_rate": 1.8527256854320173e-06, "loss": 0.1242, "step": 58290 }, { "epoch": 2.18, "learning_rate": 1.8522488956273137e-06, "loss": 0.1218, "step": 58320 }, { "epoch": 2.18, "learning_rate": 1.8517724737317594e-06, "loss": 0.1214, "step": 58350 }, { "epoch": 2.18, "learning_rate": 1.8512964192724428e-06, "loss": 0.1419, "step": 58380 }, { "epoch": 2.18, "learning_rate": 1.8508207317773033e-06, "loss": 0.1471, "step": 58410 }, { "epoch": 2.18, "learning_rate": 1.8503454107751295e-06, "loss": 0.1293, "step": 58440 }, { "epoch": 2.18, "learning_rate": 1.849870455795555e-06, "loss": 0.1229, "step": 58470 }, { "epoch": 2.18, "learning_rate": 1.8493958663690606e-06, "loss": 0.1366, "step": 58500 }, { "epoch": 2.18, "learning_rate": 1.8489216420269671e-06, "loss": 0.131, "step": 58530 }, { "epoch": 2.18, "learning_rate": 1.8484477823014386e-06, "loss": 0.1232, "step": 58560 }, { "epoch": 2.19, "learning_rate": 1.8479742867254763e-06, "loss": 0.1155, "step": 58590 }, { "epoch": 2.19, "learning_rate": 1.8475011548329189e-06, "loss": 0.1409, "step": 58620 }, { "epoch": 2.19, "learning_rate": 1.847028386158441e-06, "loss": 0.1163, "step": 58650 }, { "epoch": 2.19, "learning_rate": 1.8465559802375483e-06, "loss": 0.1438, "step": 58680 }, { "epoch": 2.19, "learning_rate": 1.8460839366065802e-06, "loss": 0.1332, "step": 58710 }, { "epoch": 2.19, "learning_rate": 1.8456122548027036e-06, "loss": 0.1116, "step": 58740 }, { "epoch": 2.19, "learning_rate": 1.8451409343639128e-06, "loss": 0.1541, "step": 58770 }, { "epoch": 2.19, "learning_rate": 1.8446699748290289e-06, "loss": 0.1484, "step": 58800 }, { "epoch": 2.19, "learning_rate": 1.844199375737695e-06, "loss": 0.1485, "step": 58830 }, { "epoch": 2.2, "learning_rate": 1.843729136630377e-06, "loss": 0.1469, "step": 58860 }, { "epoch": 2.2, "learning_rate": 1.8432592570483596e-06, "loss": 0.113, "step": 58890 }, { "epoch": 2.2, "learning_rate": 1.8427897365337466e-06, "loss": 0.151, "step": 58920 }, { "epoch": 2.2, "learning_rate": 1.8423205746294573e-06, "loss": 0.1283, "step": 58950 }, { "epoch": 2.2, "learning_rate": 1.841851770879224e-06, "loss": 0.1135, "step": 58980 }, { "epoch": 2.2, "learning_rate": 1.8413833248275939e-06, "loss": 0.1451, "step": 59010 }, { "epoch": 2.2, "learning_rate": 1.840915236019922e-06, "loss": 0.1232, "step": 59040 }, { "epoch": 2.2, "learning_rate": 1.840447504002374e-06, "loss": 0.1312, "step": 59070 }, { "epoch": 2.2, "learning_rate": 1.8399801283219207e-06, "loss": 0.1322, "step": 59100 }, { "epoch": 2.21, "learning_rate": 1.8395131085263387e-06, "loss": 0.1463, "step": 59130 }, { "epoch": 2.21, "learning_rate": 1.8390464441642077e-06, "loss": 0.1342, "step": 59160 }, { "epoch": 2.21, "learning_rate": 1.8385801347849084e-06, "loss": 0.1137, "step": 59190 }, { "epoch": 2.21, "learning_rate": 1.8381141799386204e-06, "loss": 0.1086, "step": 59220 }, { "epoch": 2.21, "learning_rate": 1.8376485791763221e-06, "loss": 0.1405, "step": 59250 }, { "epoch": 2.21, "learning_rate": 1.8371833320497865e-06, "loss": 0.1248, "step": 59280 }, { "epoch": 2.21, "learning_rate": 1.8367184381115817e-06, "loss": 0.1268, "step": 59310 }, { "epoch": 2.21, "learning_rate": 1.8362538969150664e-06, "loss": 0.1336, "step": 59340 }, { "epoch": 2.21, "learning_rate": 1.8357897080143913e-06, "loss": 0.1585, "step": 59370 }, { "epoch": 2.22, "learning_rate": 1.8353258709644942e-06, "loss": 0.1658, "step": 59400 }, { "epoch": 2.22, "learning_rate": 1.834862385321101e-06, "loss": 0.1299, "step": 59430 }, { "epoch": 2.22, "learning_rate": 1.8343992506407218e-06, "loss": 0.1415, "step": 59460 }, { "epoch": 2.22, "learning_rate": 1.8339364664806493e-06, "loss": 0.1314, "step": 59490 }, { "epoch": 2.22, "learning_rate": 1.8334740323989596e-06, "loss": 0.1507, "step": 59520 }, { "epoch": 2.22, "learning_rate": 1.8330119479545063e-06, "loss": 0.1176, "step": 59550 }, { "epoch": 2.22, "learning_rate": 1.832550212706922e-06, "loss": 0.1788, "step": 59580 }, { "epoch": 2.22, "learning_rate": 1.8320888262166148e-06, "loss": 0.134, "step": 59610 }, { "epoch": 2.22, "learning_rate": 1.8316277880447678e-06, "loss": 0.1447, "step": 59640 }, { "epoch": 2.23, "learning_rate": 1.8311670977533358e-06, "loss": 0.1302, "step": 59670 }, { "epoch": 2.23, "learning_rate": 1.8307067549050465e-06, "loss": 0.1325, "step": 59700 }, { "epoch": 2.23, "learning_rate": 1.8302467590633934e-06, "loss": 0.1361, "step": 59730 }, { "epoch": 2.23, "learning_rate": 1.8297871097926407e-06, "loss": 0.1315, "step": 59760 }, { "epoch": 2.23, "learning_rate": 1.8293278066578158e-06, "loss": 0.1069, "step": 59790 }, { "epoch": 2.23, "learning_rate": 1.8288688492247114e-06, "loss": 0.1539, "step": 59820 }, { "epoch": 2.23, "learning_rate": 1.8284102370598822e-06, "loss": 0.1699, "step": 59850 }, { "epoch": 2.23, "learning_rate": 1.8279519697306424e-06, "loss": 0.1171, "step": 59880 }, { "epoch": 2.23, "learning_rate": 1.8274940468050661e-06, "loss": 0.1478, "step": 59910 }, { "epoch": 2.24, "learning_rate": 1.8270364678519841e-06, "loss": 0.1364, "step": 59940 }, { "epoch": 2.24, "learning_rate": 1.8265792324409817e-06, "loss": 0.1339, "step": 59970 }, { "epoch": 2.24, "learning_rate": 1.8261223401423995e-06, "loss": 0.1459, "step": 60000 }, { "epoch": 2.24, "learning_rate": 1.8256657905273283e-06, "loss": 0.1115, "step": 60030 }, { "epoch": 2.24, "learning_rate": 1.8252095831676105e-06, "loss": 0.1448, "step": 60060 }, { "epoch": 2.24, "learning_rate": 1.8247537176358363e-06, "loss": 0.1464, "step": 60090 }, { "epoch": 2.24, "learning_rate": 1.824298193505343e-06, "loss": 0.1243, "step": 60120 }, { "epoch": 2.24, "learning_rate": 1.8238430103502131e-06, "loss": 0.1311, "step": 60150 }, { "epoch": 2.24, "learning_rate": 1.8233881677452723e-06, "loss": 0.1542, "step": 60180 }, { "epoch": 2.25, "learning_rate": 1.8229336652660886e-06, "loss": 0.1457, "step": 60210 }, { "epoch": 2.25, "learning_rate": 1.8224795024889702e-06, "loss": 0.1127, "step": 60240 }, { "epoch": 2.25, "learning_rate": 1.8220256789909638e-06, "loss": 0.1399, "step": 60270 }, { "epoch": 2.25, "learning_rate": 1.8215721943498523e-06, "loss": 0.1329, "step": 60300 }, { "epoch": 2.25, "learning_rate": 1.8211190481441551e-06, "loss": 0.1228, "step": 60330 }, { "epoch": 2.25, "learning_rate": 1.820666239953124e-06, "loss": 0.1176, "step": 60360 }, { "epoch": 2.25, "learning_rate": 1.820213769356744e-06, "loss": 0.1104, "step": 60390 }, { "epoch": 2.25, "learning_rate": 1.8197616359357285e-06, "loss": 0.0979, "step": 60420 }, { "epoch": 2.25, "learning_rate": 1.8193098392715216e-06, "loss": 0.1361, "step": 60450 }, { "epoch": 2.26, "learning_rate": 1.8188583789462934e-06, "loss": 0.1128, "step": 60480 }, { "epoch": 2.26, "learning_rate": 1.8184072545429393e-06, "loss": 0.1394, "step": 60510 }, { "epoch": 2.26, "learning_rate": 1.8179564656450797e-06, "loss": 0.1262, "step": 60540 }, { "epoch": 2.26, "learning_rate": 1.817506011837055e-06, "loss": 0.164, "step": 60570 }, { "epoch": 2.26, "learning_rate": 1.8170558927039284e-06, "loss": 0.1167, "step": 60600 }, { "epoch": 2.26, "learning_rate": 1.8166061078314813e-06, "loss": 0.1234, "step": 60630 }, { "epoch": 2.26, "learning_rate": 1.8161566568062119e-06, "loss": 0.1186, "step": 60660 }, { "epoch": 2.26, "learning_rate": 1.8157075392153348e-06, "loss": 0.1331, "step": 60690 }, { "epoch": 2.26, "learning_rate": 1.815273708771384e-06, "loss": 0.1283, "step": 60720 }, { "epoch": 2.27, "learning_rate": 1.8148252457333782e-06, "loss": 0.1571, "step": 60750 }, { "epoch": 2.27, "learning_rate": 1.8143771149093633e-06, "loss": 0.1229, "step": 60780 }, { "epoch": 2.27, "learning_rate": 1.813929315889378e-06, "loss": 0.1486, "step": 60810 }, { "epoch": 2.27, "learning_rate": 1.813481848264168e-06, "loss": 0.132, "step": 60840 }, { "epoch": 2.27, "learning_rate": 1.8130347116251856e-06, "loss": 0.1496, "step": 60870 }, { "epoch": 2.27, "learning_rate": 1.8125879055645884e-06, "loss": 0.1164, "step": 60900 }, { "epoch": 2.27, "learning_rate": 1.8121414296752363e-06, "loss": 0.1095, "step": 60930 }, { "epoch": 2.27, "learning_rate": 1.8116952835506918e-06, "loss": 0.1405, "step": 60960 }, { "epoch": 2.27, "learning_rate": 1.8112494667852163e-06, "loss": 0.1231, "step": 60990 }, { "epoch": 2.28, "learning_rate": 1.8108039789737708e-06, "loss": 0.1263, "step": 61020 }, { "epoch": 2.28, "learning_rate": 1.8103588197120128e-06, "loss": 0.125, "step": 61050 }, { "epoch": 2.28, "learning_rate": 1.8099139885962961e-06, "loss": 0.138, "step": 61080 }, { "epoch": 2.28, "learning_rate": 1.8094694852236673e-06, "loss": 0.129, "step": 61110 }, { "epoch": 2.28, "learning_rate": 1.8090253091918654e-06, "loss": 0.1303, "step": 61140 }, { "epoch": 2.28, "learning_rate": 1.8085814600993218e-06, "loss": 0.1323, "step": 61170 }, { "epoch": 2.28, "learning_rate": 1.8081379375451555e-06, "loss": 0.1063, "step": 61200 }, { "epoch": 2.28, "learning_rate": 1.807694741129174e-06, "loss": 0.0986, "step": 61230 }, { "epoch": 2.28, "learning_rate": 1.8072518704518715e-06, "loss": 0.1299, "step": 61260 }, { "epoch": 2.29, "learning_rate": 1.8068093251144268e-06, "loss": 0.1464, "step": 61290 }, { "epoch": 2.29, "learning_rate": 1.806367104718701e-06, "loss": 0.1312, "step": 61320 }, { "epoch": 2.29, "learning_rate": 1.805925208867238e-06, "loss": 0.1306, "step": 61350 }, { "epoch": 2.29, "learning_rate": 1.8054836371632622e-06, "loss": 0.1175, "step": 61380 }, { "epoch": 2.29, "learning_rate": 1.8050423892106764e-06, "loss": 0.1434, "step": 61410 }, { "epoch": 2.29, "learning_rate": 1.8046014646140606e-06, "loss": 0.1421, "step": 61440 }, { "epoch": 2.29, "learning_rate": 1.80416086297867e-06, "loss": 0.117, "step": 61470 }, { "epoch": 2.29, "learning_rate": 1.8037205839104353e-06, "loss": 0.1353, "step": 61500 }, { "epoch": 2.29, "learning_rate": 1.8032806270159595e-06, "loss": 0.1557, "step": 61530 }, { "epoch": 2.3, "learning_rate": 1.8028409919025168e-06, "loss": 0.1324, "step": 61560 }, { "epoch": 2.3, "learning_rate": 1.802401678178052e-06, "loss": 0.1611, "step": 61590 }, { "epoch": 2.3, "learning_rate": 1.8019626854511773e-06, "loss": 0.1378, "step": 61620 }, { "epoch": 2.3, "learning_rate": 1.8015240133311723e-06, "loss": 0.1334, "step": 61650 }, { "epoch": 2.3, "learning_rate": 1.8010856614279822e-06, "loss": 0.1509, "step": 61680 }, { "epoch": 2.3, "learning_rate": 1.8006476293522169e-06, "loss": 0.1196, "step": 61710 }, { "epoch": 2.3, "learning_rate": 1.8002099167151475e-06, "loss": 0.1628, "step": 61740 }, { "epoch": 2.3, "learning_rate": 1.7997725231287063e-06, "loss": 0.1287, "step": 61770 }, { "epoch": 2.3, "learning_rate": 1.7993354482054874e-06, "loss": 0.1063, "step": 61800 }, { "epoch": 2.31, "learning_rate": 1.7988986915587407e-06, "loss": 0.1428, "step": 61830 }, { "epoch": 2.31, "learning_rate": 1.798462252802374e-06, "loss": 0.1239, "step": 61860 }, { "epoch": 2.31, "learning_rate": 1.7980261315509503e-06, "loss": 0.1267, "step": 61890 }, { "epoch": 2.31, "learning_rate": 1.7975903274196872e-06, "loss": 0.1421, "step": 61920 }, { "epoch": 2.31, "learning_rate": 1.7971548400244536e-06, "loss": 0.1186, "step": 61950 }, { "epoch": 2.31, "learning_rate": 1.7967196689817703e-06, "loss": 0.1345, "step": 61980 }, { "epoch": 2.31, "learning_rate": 1.7962848139088083e-06, "loss": 0.127, "step": 62010 }, { "epoch": 2.31, "learning_rate": 1.7958502744233853e-06, "loss": 0.12, "step": 62040 }, { "epoch": 2.32, "learning_rate": 1.7954160501439674e-06, "loss": 0.1149, "step": 62070 }, { "epoch": 2.32, "learning_rate": 1.794982140689665e-06, "loss": 0.1273, "step": 62100 }, { "epoch": 2.32, "learning_rate": 1.7945485456802333e-06, "loss": 0.1217, "step": 62130 }, { "epoch": 2.32, "learning_rate": 1.7941152647360701e-06, "loss": 0.1355, "step": 62160 }, { "epoch": 2.32, "learning_rate": 1.7936822974782144e-06, "loss": 0.1411, "step": 62190 }, { "epoch": 2.32, "learning_rate": 1.7932496435283447e-06, "loss": 0.1448, "step": 62220 }, { "epoch": 2.32, "learning_rate": 1.7928173025087783e-06, "loss": 0.1502, "step": 62250 }, { "epoch": 2.32, "learning_rate": 1.7923852740424697e-06, "loss": 0.1166, "step": 62280 }, { "epoch": 2.32, "learning_rate": 1.7919535577530084e-06, "loss": 0.1408, "step": 62310 }, { "epoch": 2.33, "learning_rate": 1.7915221532646194e-06, "loss": 0.1304, "step": 62340 }, { "epoch": 2.33, "learning_rate": 1.7910910602021597e-06, "loss": 0.1646, "step": 62370 }, { "epoch": 2.33, "learning_rate": 1.7906602781911183e-06, "loss": 0.117, "step": 62400 }, { "epoch": 2.33, "learning_rate": 1.7902298068576147e-06, "loss": 0.1251, "step": 62430 }, { "epoch": 2.33, "learning_rate": 1.789799645828396e-06, "loss": 0.134, "step": 62460 }, { "epoch": 2.33, "learning_rate": 1.7893697947308383e-06, "loss": 0.1444, "step": 62490 }, { "epoch": 2.33, "learning_rate": 1.788940253192943e-06, "loss": 0.1239, "step": 62520 }, { "epoch": 2.33, "learning_rate": 1.788511020843337e-06, "loss": 0.1473, "step": 62550 }, { "epoch": 2.33, "learning_rate": 1.78808209731127e-06, "loss": 0.1201, "step": 62580 }, { "epoch": 2.34, "learning_rate": 1.7876534822266136e-06, "loss": 0.1015, "step": 62610 }, { "epoch": 2.34, "learning_rate": 1.7872251752198607e-06, "loss": 0.1279, "step": 62640 }, { "epoch": 2.34, "learning_rate": 1.786797175922124e-06, "loss": 0.1349, "step": 62670 }, { "epoch": 2.34, "learning_rate": 1.7863694839651327e-06, "loss": 0.1354, "step": 62700 }, { "epoch": 2.34, "learning_rate": 1.7859420989812349e-06, "loss": 0.1395, "step": 62730 }, { "epoch": 2.34, "learning_rate": 1.7855150206033921e-06, "loss": 0.1443, "step": 62760 }, { "epoch": 2.34, "learning_rate": 1.7850882484651816e-06, "loss": 0.1144, "step": 62790 }, { "epoch": 2.34, "learning_rate": 1.784661782200792e-06, "loss": 0.1383, "step": 62820 }, { "epoch": 2.34, "learning_rate": 1.7842356214450247e-06, "loss": 0.1158, "step": 62850 }, { "epoch": 2.35, "learning_rate": 1.78380976583329e-06, "loss": 0.1306, "step": 62880 }, { "epoch": 2.35, "learning_rate": 1.7833983951228248e-06, "loss": 0.1603, "step": 62910 }, { "epoch": 2.35, "learning_rate": 1.7829731385664483e-06, "loss": 0.1303, "step": 62940 }, { "epoch": 2.35, "learning_rate": 1.7825481860760678e-06, "loss": 0.1474, "step": 62970 }, { "epoch": 2.35, "learning_rate": 1.7821235372895012e-06, "loss": 0.142, "step": 63000 }, { "epoch": 2.35, "learning_rate": 1.7816991918451711e-06, "loss": 0.1243, "step": 63030 }, { "epoch": 2.35, "learning_rate": 1.7812751493821018e-06, "loss": 0.1289, "step": 63060 }, { "epoch": 2.35, "learning_rate": 1.7808514095399185e-06, "loss": 0.1412, "step": 63090 }, { "epoch": 2.35, "learning_rate": 1.7804279719588469e-06, "loss": 0.1573, "step": 63120 }, { "epoch": 2.36, "learning_rate": 1.7800048362797105e-06, "loss": 0.1378, "step": 63150 }, { "epoch": 2.36, "learning_rate": 1.7795820021439302e-06, "loss": 0.1555, "step": 63180 }, { "epoch": 2.36, "learning_rate": 1.7791594691935238e-06, "loss": 0.1078, "step": 63210 }, { "epoch": 2.36, "learning_rate": 1.7787372370711024e-06, "loss": 0.1395, "step": 63240 }, { "epoch": 2.36, "learning_rate": 1.778315305419871e-06, "loss": 0.1537, "step": 63270 }, { "epoch": 2.36, "learning_rate": 1.7778936738836273e-06, "loss": 0.1295, "step": 63300 }, { "epoch": 2.36, "learning_rate": 1.7774723421067596e-06, "loss": 0.1323, "step": 63330 }, { "epoch": 2.36, "learning_rate": 1.7770513097342456e-06, "loss": 0.1363, "step": 63360 }, { "epoch": 2.36, "learning_rate": 1.7766305764116515e-06, "loss": 0.1299, "step": 63390 }, { "epoch": 2.37, "learning_rate": 1.776210141785131e-06, "loss": 0.1398, "step": 63420 }, { "epoch": 2.37, "learning_rate": 1.7757900055014226e-06, "loss": 0.1318, "step": 63450 }, { "epoch": 2.37, "learning_rate": 1.7753701672078514e-06, "loss": 0.1221, "step": 63480 }, { "epoch": 2.37, "learning_rate": 1.7749506265523237e-06, "loss": 0.1205, "step": 63510 }, { "epoch": 2.37, "learning_rate": 1.7745313831833294e-06, "loss": 0.1196, "step": 63540 }, { "epoch": 2.37, "learning_rate": 1.7741124367499385e-06, "loss": 0.1355, "step": 63570 }, { "epoch": 2.37, "learning_rate": 1.7736937869018014e-06, "loss": 0.1209, "step": 63600 }, { "epoch": 2.37, "learning_rate": 1.7732754332891463e-06, "loss": 0.1121, "step": 63630 }, { "epoch": 2.37, "learning_rate": 1.7728573755627793e-06, "loss": 0.1288, "step": 63660 }, { "epoch": 2.38, "learning_rate": 1.7724396133740815e-06, "loss": 0.1344, "step": 63690 }, { "epoch": 2.38, "learning_rate": 1.7720221463750099e-06, "loss": 0.1287, "step": 63720 }, { "epoch": 2.38, "learning_rate": 1.771604974218094e-06, "loss": 0.1217, "step": 63750 }, { "epoch": 2.38, "learning_rate": 1.771188096556437e-06, "loss": 0.1205, "step": 63780 }, { "epoch": 2.38, "learning_rate": 1.7707715130437115e-06, "loss": 0.1208, "step": 63810 }, { "epoch": 2.38, "learning_rate": 1.7703552233341614e-06, "loss": 0.1376, "step": 63840 }, { "epoch": 2.38, "learning_rate": 1.7699392270825987e-06, "loss": 0.124, "step": 63870 }, { "epoch": 2.38, "learning_rate": 1.7695235239444037e-06, "loss": 0.122, "step": 63900 }, { "epoch": 2.38, "learning_rate": 1.7691081135755214e-06, "loss": 0.1829, "step": 63930 }, { "epoch": 2.39, "learning_rate": 1.7686929956324636e-06, "loss": 0.1384, "step": 63960 }, { "epoch": 2.39, "learning_rate": 1.7682781697723047e-06, "loss": 0.1203, "step": 63990 }, { "epoch": 2.39, "learning_rate": 1.7678636356526833e-06, "loss": 0.1392, "step": 64020 }, { "epoch": 2.39, "learning_rate": 1.7674493929317981e-06, "loss": 0.145, "step": 64050 }, { "epoch": 2.39, "learning_rate": 1.7670354412684088e-06, "loss": 0.1608, "step": 64080 }, { "epoch": 2.39, "learning_rate": 1.766621780321835e-06, "loss": 0.1315, "step": 64110 }, { "epoch": 2.39, "learning_rate": 1.7662084097519527e-06, "loss": 0.1225, "step": 64140 }, { "epoch": 2.39, "learning_rate": 1.765795329219196e-06, "loss": 0.1484, "step": 64170 }, { "epoch": 2.39, "learning_rate": 1.765382538384554e-06, "loss": 0.1474, "step": 64200 }, { "epoch": 2.4, "learning_rate": 1.7649700369095715e-06, "loss": 0.1302, "step": 64230 }, { "epoch": 2.4, "learning_rate": 1.7645578244563446e-06, "loss": 0.1335, "step": 64260 }, { "epoch": 2.4, "learning_rate": 1.7641459006875235e-06, "loss": 0.1331, "step": 64290 }, { "epoch": 2.4, "learning_rate": 1.7637342652663083e-06, "loss": 0.1407, "step": 64320 }, { "epoch": 2.4, "learning_rate": 1.7633229178564493e-06, "loss": 0.1179, "step": 64350 }, { "epoch": 2.4, "learning_rate": 1.7629118581222452e-06, "loss": 0.1337, "step": 64380 }, { "epoch": 2.4, "learning_rate": 1.7625010857285424e-06, "loss": 0.1431, "step": 64410 }, { "epoch": 2.4, "learning_rate": 1.7620906003407343e-06, "loss": 0.121, "step": 64440 }, { "epoch": 2.4, "learning_rate": 1.7616804016247588e-06, "loss": 0.1222, "step": 64470 }, { "epoch": 2.41, "learning_rate": 1.7612704892470981e-06, "loss": 0.1214, "step": 64500 }, { "epoch": 2.41, "learning_rate": 1.7608608628747776e-06, "loss": 0.1377, "step": 64530 }, { "epoch": 2.41, "learning_rate": 1.7604515221753638e-06, "loss": 0.138, "step": 64560 }, { "epoch": 2.41, "learning_rate": 1.7600424668169652e-06, "loss": 0.1408, "step": 64590 }, { "epoch": 2.41, "learning_rate": 1.759633696468229e-06, "loss": 0.1168, "step": 64620 }, { "epoch": 2.41, "learning_rate": 1.7592252107983406e-06, "loss": 0.1144, "step": 64650 }, { "epoch": 2.41, "learning_rate": 1.7588170094770235e-06, "loss": 0.1349, "step": 64680 }, { "epoch": 2.41, "learning_rate": 1.758409092174537e-06, "loss": 0.133, "step": 64710 }, { "epoch": 2.41, "learning_rate": 1.7580014585616753e-06, "loss": 0.1372, "step": 64740 }, { "epoch": 2.42, "learning_rate": 1.7575941083097669e-06, "loss": 0.1289, "step": 64770 }, { "epoch": 2.42, "learning_rate": 1.757187041090673e-06, "loss": 0.1299, "step": 64800 }, { "epoch": 2.42, "learning_rate": 1.756780256576786e-06, "loss": 0.135, "step": 64830 }, { "epoch": 2.42, "learning_rate": 1.756373754441031e-06, "loss": 0.1343, "step": 64860 }, { "epoch": 2.42, "learning_rate": 1.7559675343568594e-06, "loss": 0.1417, "step": 64890 }, { "epoch": 2.42, "learning_rate": 1.7555615959982538e-06, "loss": 0.1317, "step": 64920 }, { "epoch": 2.42, "learning_rate": 1.7551559390397228e-06, "loss": 0.1475, "step": 64950 }, { "epoch": 2.42, "learning_rate": 1.7547505631563013e-06, "loss": 0.1257, "step": 64980 }, { "epoch": 2.42, "learning_rate": 1.75434546802355e-06, "loss": 0.1235, "step": 65010 }, { "epoch": 2.43, "learning_rate": 1.753954142626517e-06, "loss": 0.1328, "step": 65040 }, { "epoch": 2.43, "learning_rate": 1.7535495986923073e-06, "loss": 0.1292, "step": 65070 }, { "epoch": 2.43, "learning_rate": 1.7531453345493427e-06, "loss": 0.1148, "step": 65100 }, { "epoch": 2.43, "learning_rate": 1.7527548115319052e-06, "loss": 0.1447, "step": 65130 }, { "epoch": 2.43, "learning_rate": 1.7523510967051263e-06, "loss": 0.1528, "step": 65160 }, { "epoch": 2.43, "learning_rate": 1.751947660714751e-06, "loss": 0.1356, "step": 65190 }, { "epoch": 2.43, "learning_rate": 1.7515445032399503e-06, "loss": 0.1249, "step": 65220 }, { "epoch": 2.43, "learning_rate": 1.7511416239604125e-06, "loss": 0.1283, "step": 65250 }, { "epoch": 2.43, "learning_rate": 1.7507390225563407e-06, "loss": 0.1367, "step": 65280 }, { "epoch": 2.44, "learning_rate": 1.750336698708453e-06, "loss": 0.1744, "step": 65310 }, { "epoch": 2.44, "learning_rate": 1.7499346520979806e-06, "loss": 0.13, "step": 65340 }, { "epoch": 2.44, "learning_rate": 1.749532882406668e-06, "loss": 0.1157, "step": 65370 }, { "epoch": 2.44, "learning_rate": 1.749131389316768e-06, "loss": 0.1433, "step": 65400 }, { "epoch": 2.44, "learning_rate": 1.748730172511047e-06, "loss": 0.1172, "step": 65430 }, { "epoch": 2.44, "learning_rate": 1.7483292316727791e-06, "loss": 0.133, "step": 65460 }, { "epoch": 2.44, "learning_rate": 1.7479285664857462e-06, "loss": 0.1475, "step": 65490 }, { "epoch": 2.44, "learning_rate": 1.7475281766342374e-06, "loss": 0.1359, "step": 65520 }, { "epoch": 2.44, "learning_rate": 1.7471280618030487e-06, "loss": 0.1233, "step": 65550 }, { "epoch": 2.45, "learning_rate": 1.7467282216774797e-06, "loss": 0.1339, "step": 65580 }, { "epoch": 2.45, "learning_rate": 1.7463286559433349e-06, "loss": 0.1281, "step": 65610 }, { "epoch": 2.45, "learning_rate": 1.7459293642869206e-06, "loss": 0.1236, "step": 65640 }, { "epoch": 2.45, "learning_rate": 1.7455303463950469e-06, "loss": 0.1084, "step": 65670 }, { "epoch": 2.45, "learning_rate": 1.7451316019550222e-06, "loss": 0.1366, "step": 65700 }, { "epoch": 2.45, "learning_rate": 1.7447331306546573e-06, "loss": 0.1339, "step": 65730 }, { "epoch": 2.45, "learning_rate": 1.7443349321822589e-06, "loss": 0.1312, "step": 65760 }, { "epoch": 2.45, "learning_rate": 1.743937006226634e-06, "loss": 0.148, "step": 65790 }, { "epoch": 2.45, "learning_rate": 1.7435393524770849e-06, "loss": 0.1354, "step": 65820 }, { "epoch": 2.46, "learning_rate": 1.7431419706234102e-06, "loss": 0.1295, "step": 65850 }, { "epoch": 2.46, "learning_rate": 1.7427448603559022e-06, "loss": 0.1206, "step": 65880 }, { "epoch": 2.46, "learning_rate": 1.7423480213653484e-06, "loss": 0.1504, "step": 65910 }, { "epoch": 2.46, "learning_rate": 1.741951453343028e-06, "loss": 0.1348, "step": 65940 }, { "epoch": 2.46, "learning_rate": 1.7415551559807117e-06, "loss": 0.1427, "step": 65970 }, { "epoch": 2.46, "learning_rate": 1.7411591289706613e-06, "loss": 0.1443, "step": 66000 }, { "epoch": 2.46, "learning_rate": 1.740763372005628e-06, "loss": 0.1161, "step": 66030 }, { "epoch": 2.46, "learning_rate": 1.740367884778852e-06, "loss": 0.1284, "step": 66060 }, { "epoch": 2.46, "learning_rate": 1.73997266698406e-06, "loss": 0.1192, "step": 66090 }, { "epoch": 2.47, "learning_rate": 1.739577718315467e-06, "loss": 0.1527, "step": 66120 }, { "epoch": 2.47, "learning_rate": 1.7391830384677721e-06, "loss": 0.1139, "step": 66150 }, { "epoch": 2.47, "learning_rate": 1.7387886271361605e-06, "loss": 0.1251, "step": 66180 }, { "epoch": 2.47, "learning_rate": 1.738394484016299e-06, "loss": 0.1388, "step": 66210 }, { "epoch": 2.47, "learning_rate": 1.7380006088043402e-06, "loss": 0.1317, "step": 66240 }, { "epoch": 2.47, "learning_rate": 1.737607001196915e-06, "loss": 0.1217, "step": 66270 }, { "epoch": 2.47, "learning_rate": 1.7372136608911374e-06, "loss": 0.1292, "step": 66300 }, { "epoch": 2.47, "learning_rate": 1.7368205875846004e-06, "loss": 0.1529, "step": 66330 }, { "epoch": 2.48, "learning_rate": 1.736427780975375e-06, "loss": 0.1139, "step": 66360 }, { "epoch": 2.48, "learning_rate": 1.7360352407620109e-06, "loss": 0.1318, "step": 66390 }, { "epoch": 2.48, "learning_rate": 1.7356429666435347e-06, "loss": 0.1351, "step": 66420 }, { "epoch": 2.48, "learning_rate": 1.7352509583194484e-06, "loss": 0.1298, "step": 66450 }, { "epoch": 2.48, "learning_rate": 1.7348592154897293e-06, "loss": 0.1234, "step": 66480 }, { "epoch": 2.48, "learning_rate": 1.7344677378548272e-06, "loss": 0.1144, "step": 66510 }, { "epoch": 2.48, "learning_rate": 1.734076525115667e-06, "loss": 0.1293, "step": 66540 }, { "epoch": 2.48, "learning_rate": 1.7336855769736436e-06, "loss": 0.1262, "step": 66570 }, { "epoch": 2.48, "learning_rate": 1.7332948931306248e-06, "loss": 0.1397, "step": 66600 }, { "epoch": 2.49, "learning_rate": 1.7329044732889466e-06, "loss": 0.1356, "step": 66630 }, { "epoch": 2.49, "learning_rate": 1.7325143171514161e-06, "loss": 0.1253, "step": 66660 }, { "epoch": 2.49, "learning_rate": 1.7321244244213068e-06, "loss": 0.1445, "step": 66690 }, { "epoch": 2.49, "learning_rate": 1.7317347948023603e-06, "loss": 0.1342, "step": 66720 }, { "epoch": 2.49, "learning_rate": 1.731345427998784e-06, "loss": 0.1199, "step": 66750 }, { "epoch": 2.49, "learning_rate": 1.7309563237152517e-06, "loss": 0.1211, "step": 66780 }, { "epoch": 2.49, "learning_rate": 1.7305674816569004e-06, "loss": 0.1161, "step": 66810 }, { "epoch": 2.49, "learning_rate": 1.7301789015293308e-06, "loss": 0.1222, "step": 66840 }, { "epoch": 2.49, "learning_rate": 1.7297905830386069e-06, "loss": 0.1463, "step": 66870 }, { "epoch": 2.5, "learning_rate": 1.7294025258912534e-06, "loss": 0.1206, "step": 66900 }, { "epoch": 2.5, "learning_rate": 1.729014729794256e-06, "loss": 0.1558, "step": 66930 }, { "epoch": 2.5, "learning_rate": 1.72862719445506e-06, "loss": 0.1534, "step": 66960 }, { "epoch": 2.5, "learning_rate": 1.7282399195815697e-06, "loss": 0.1256, "step": 66990 }, { "epoch": 2.5, "learning_rate": 1.7278529048821474e-06, "loss": 0.1624, "step": 67020 }, { "epoch": 2.5, "learning_rate": 1.727466150065612e-06, "loss": 0.1436, "step": 67050 }, { "epoch": 2.5, "learning_rate": 1.727079654841238e-06, "loss": 0.1193, "step": 67080 }, { "epoch": 2.5, "learning_rate": 1.7266934189187565e-06, "loss": 0.1208, "step": 67110 }, { "epoch": 2.5, "learning_rate": 1.7263074420083515e-06, "loss": 0.1451, "step": 67140 }, { "epoch": 2.51, "learning_rate": 1.7259217238206608e-06, "loss": 0.1378, "step": 67170 }, { "epoch": 2.51, "learning_rate": 1.7255362640667738e-06, "loss": 0.1341, "step": 67200 }, { "epoch": 2.51, "learning_rate": 1.725151062458233e-06, "loss": 0.1367, "step": 67230 }, { "epoch": 2.51, "learning_rate": 1.7247661187070297e-06, "loss": 0.1245, "step": 67260 }, { "epoch": 2.51, "learning_rate": 1.724394251251618e-06, "loss": 0.1341, "step": 67290 }, { "epoch": 2.51, "learning_rate": 1.724009813781396e-06, "loss": 0.1256, "step": 67320 }, { "epoch": 2.51, "learning_rate": 1.7236256333167308e-06, "loss": 0.1293, "step": 67350 }, { "epoch": 2.51, "learning_rate": 1.7232417095713918e-06, "loss": 0.1201, "step": 67380 }, { "epoch": 2.51, "learning_rate": 1.7228580422595966e-06, "loss": 0.132, "step": 67410 }, { "epoch": 2.52, "learning_rate": 1.7224746310960061e-06, "loss": 0.1452, "step": 67440 }, { "epoch": 2.52, "learning_rate": 1.7220914757957266e-06, "loss": 0.1442, "step": 67470 }, { "epoch": 2.52, "learning_rate": 1.721708576074308e-06, "loss": 0.1237, "step": 67500 }, { "epoch": 2.52, "learning_rate": 1.721325931647741e-06, "loss": 0.1099, "step": 67530 }, { "epoch": 2.52, "learning_rate": 1.72094354223246e-06, "loss": 0.1321, "step": 67560 }, { "epoch": 2.52, "learning_rate": 1.7205614075453395e-06, "loss": 0.1209, "step": 67590 }, { "epoch": 2.52, "learning_rate": 1.720179527303693e-06, "loss": 0.1123, "step": 67620 }, { "epoch": 2.52, "learning_rate": 1.7197979012252745e-06, "loss": 0.1219, "step": 67650 }, { "epoch": 2.52, "learning_rate": 1.7194165290282747e-06, "loss": 0.1198, "step": 67680 }, { "epoch": 2.53, "learning_rate": 1.7190354104313222e-06, "loss": 0.1125, "step": 67710 }, { "epoch": 2.53, "learning_rate": 1.7186545451534831e-06, "loss": 0.1147, "step": 67740 }, { "epoch": 2.53, "learning_rate": 1.7182739329142563e-06, "loss": 0.1279, "step": 67770 }, { "epoch": 2.53, "learning_rate": 1.7178935734335775e-06, "loss": 0.1507, "step": 67800 }, { "epoch": 2.53, "learning_rate": 1.7175134664318157e-06, "loss": 0.1235, "step": 67830 }, { "epoch": 2.53, "learning_rate": 1.717133611629773e-06, "loss": 0.111, "step": 67860 }, { "epoch": 2.53, "learning_rate": 1.7167540087486825e-06, "loss": 0.111, "step": 67890 }, { "epoch": 2.53, "learning_rate": 1.71637465751021e-06, "loss": 0.1187, "step": 67920 }, { "epoch": 2.53, "learning_rate": 1.7159955576364506e-06, "loss": 0.1197, "step": 67950 }, { "epoch": 2.54, "learning_rate": 1.7156167088499294e-06, "loss": 0.1162, "step": 67980 }, { "epoch": 2.54, "learning_rate": 1.7152381108735994e-06, "loss": 0.1414, "step": 68010 }, { "epoch": 2.54, "learning_rate": 1.714859763430842e-06, "loss": 0.1314, "step": 68040 }, { "epoch": 2.54, "learning_rate": 1.7144816662454657e-06, "loss": 0.1337, "step": 68070 }, { "epoch": 2.54, "learning_rate": 1.7141038190417047e-06, "loss": 0.1477, "step": 68100 }, { "epoch": 2.54, "learning_rate": 1.7137262215442186e-06, "loss": 0.1408, "step": 68130 }, { "epoch": 2.54, "learning_rate": 1.713348873478091e-06, "loss": 0.1232, "step": 68160 }, { "epoch": 2.54, "learning_rate": 1.7129717745688293e-06, "loss": 0.1486, "step": 68190 }, { "epoch": 2.54, "learning_rate": 1.712594924542364e-06, "loss": 0.1385, "step": 68220 }, { "epoch": 2.55, "learning_rate": 1.7122183231250472e-06, "loss": 0.1458, "step": 68250 }, { "epoch": 2.55, "learning_rate": 1.7118419700436517e-06, "loss": 0.1231, "step": 68280 }, { "epoch": 2.55, "learning_rate": 1.711465865025371e-06, "loss": 0.1225, "step": 68310 }, { "epoch": 2.55, "learning_rate": 1.7110900077978183e-06, "loss": 0.13, "step": 68340 }, { "epoch": 2.55, "learning_rate": 1.7107143980890237e-06, "loss": 0.1255, "step": 68370 }, { "epoch": 2.55, "learning_rate": 1.710339035627437e-06, "loss": 0.1142, "step": 68400 }, { "epoch": 2.55, "learning_rate": 1.7099639201419239e-06, "loss": 0.132, "step": 68430 }, { "epoch": 2.55, "learning_rate": 1.7095890513617658e-06, "loss": 0.1368, "step": 68460 }, { "epoch": 2.55, "learning_rate": 1.709214429016661e-06, "loss": 0.1405, "step": 68490 }, { "epoch": 2.56, "learning_rate": 1.7088400528367202e-06, "loss": 0.1366, "step": 68520 }, { "epoch": 2.56, "learning_rate": 1.708465922552469e-06, "loss": 0.1501, "step": 68550 }, { "epoch": 2.56, "learning_rate": 1.7080920378948456e-06, "loss": 0.1163, "step": 68580 }, { "epoch": 2.56, "learning_rate": 1.7077183985952001e-06, "loss": 0.1612, "step": 68610 }, { "epoch": 2.56, "learning_rate": 1.7073450043852934e-06, "loss": 0.1368, "step": 68640 }, { "epoch": 2.56, "learning_rate": 1.7069718549972975e-06, "loss": 0.1204, "step": 68670 }, { "epoch": 2.56, "learning_rate": 1.706598950163793e-06, "loss": 0.1105, "step": 68700 }, { "epoch": 2.56, "learning_rate": 1.7062262896177706e-06, "loss": 0.1209, "step": 68730 }, { "epoch": 2.56, "learning_rate": 1.7058538730926278e-06, "loss": 0.1472, "step": 68760 }, { "epoch": 2.57, "learning_rate": 1.7054817003221697e-06, "loss": 0.1081, "step": 68790 }, { "epoch": 2.57, "learning_rate": 1.7051097710406078e-06, "loss": 0.1353, "step": 68820 }, { "epoch": 2.57, "learning_rate": 1.7047380849825593e-06, "loss": 0.1516, "step": 68850 }, { "epoch": 2.57, "learning_rate": 1.7043666418830458e-06, "loss": 0.1594, "step": 68880 }, { "epoch": 2.57, "learning_rate": 1.7039954414774928e-06, "loss": 0.1348, "step": 68910 }, { "epoch": 2.57, "learning_rate": 1.703624483501729e-06, "loss": 0.1525, "step": 68940 }, { "epoch": 2.57, "learning_rate": 1.703253767691987e-06, "loss": 0.1299, "step": 68970 }, { "epoch": 2.57, "learning_rate": 1.7028832937848983e-06, "loss": 0.1177, "step": 69000 }, { "epoch": 2.57, "learning_rate": 1.7025130615174974e-06, "loss": 0.129, "step": 69030 }, { "epoch": 2.58, "learning_rate": 1.7021430706272177e-06, "loss": 0.1141, "step": 69060 }, { "epoch": 2.58, "learning_rate": 1.7017733208518927e-06, "loss": 0.1381, "step": 69090 }, { "epoch": 2.58, "learning_rate": 1.7014038119297534e-06, "loss": 0.1344, "step": 69120 }, { "epoch": 2.58, "learning_rate": 1.7010345435994293e-06, "loss": 0.1369, "step": 69150 }, { "epoch": 2.58, "learning_rate": 1.7006655155999465e-06, "loss": 0.1376, "step": 69180 }, { "epoch": 2.58, "learning_rate": 1.700296727670727e-06, "loss": 0.1413, "step": 69210 }, { "epoch": 2.58, "learning_rate": 1.699928179551589e-06, "loss": 0.1448, "step": 69240 }, { "epoch": 2.58, "learning_rate": 1.6995598709827443e-06, "loss": 0.159, "step": 69270 }, { "epoch": 2.58, "learning_rate": 1.6991918017047987e-06, "loss": 0.117, "step": 69300 }, { "epoch": 2.59, "learning_rate": 1.698823971458752e-06, "loss": 0.1308, "step": 69330 }, { "epoch": 2.59, "learning_rate": 1.6984563799859952e-06, "loss": 0.1395, "step": 69360 }, { "epoch": 2.59, "learning_rate": 1.6980890270283112e-06, "loss": 0.1099, "step": 69390 }, { "epoch": 2.59, "learning_rate": 1.697721912327874e-06, "loss": 0.132, "step": 69420 }, { "epoch": 2.59, "learning_rate": 1.6973550356272473e-06, "loss": 0.1263, "step": 69450 }, { "epoch": 2.59, "learning_rate": 1.6969883966693837e-06, "loss": 0.1259, "step": 69480 }, { "epoch": 2.59, "learning_rate": 1.6966219951976253e-06, "loss": 0.1129, "step": 69510 }, { "epoch": 2.59, "learning_rate": 1.696255830955701e-06, "loss": 0.1204, "step": 69540 }, { "epoch": 2.59, "learning_rate": 1.695889903687727e-06, "loss": 0.1258, "step": 69570 }, { "epoch": 2.6, "learning_rate": 1.6955242131382063e-06, "loss": 0.192, "step": 69600 }, { "epoch": 2.6, "learning_rate": 1.6951587590520263e-06, "loss": 0.1266, "step": 69630 }, { "epoch": 2.6, "learning_rate": 1.6947935411744596e-06, "loss": 0.1154, "step": 69660 }, { "epoch": 2.6, "learning_rate": 1.694428559251163e-06, "loss": 0.1221, "step": 69690 }, { "epoch": 2.6, "learning_rate": 1.694063813028177e-06, "loss": 0.1197, "step": 69720 }, { "epoch": 2.6, "learning_rate": 1.6936993022519233e-06, "loss": 0.1561, "step": 69750 }, { "epoch": 2.6, "learning_rate": 1.693335026669207e-06, "loss": 0.1296, "step": 69780 }, { "epoch": 2.6, "learning_rate": 1.6929709860272125e-06, "loss": 0.136, "step": 69810 }, { "epoch": 2.6, "learning_rate": 1.6926071800735058e-06, "loss": 0.1368, "step": 69840 }, { "epoch": 2.61, "learning_rate": 1.6922436085560317e-06, "loss": 0.1193, "step": 69870 }, { "epoch": 2.61, "learning_rate": 1.6918802712231148e-06, "loss": 0.1259, "step": 69900 }, { "epoch": 2.61, "learning_rate": 1.6915171678234566e-06, "loss": 0.1172, "step": 69930 }, { "epoch": 2.61, "learning_rate": 1.6911542981061369e-06, "loss": 0.142, "step": 69960 }, { "epoch": 2.61, "learning_rate": 1.6907916618206113e-06, "loss": 0.163, "step": 69990 }, { "epoch": 2.61, "learning_rate": 1.690429258716712e-06, "loss": 0.1358, "step": 70020 }, { "epoch": 2.61, "learning_rate": 1.6900670885446464e-06, "loss": 0.1268, "step": 70050 }, { "epoch": 2.61, "learning_rate": 1.6897051510549954e-06, "loss": 0.1575, "step": 70080 }, { "epoch": 2.61, "learning_rate": 1.6893434459987152e-06, "loss": 0.1099, "step": 70110 }, { "epoch": 2.62, "learning_rate": 1.688981973127133e-06, "loss": 0.1692, "step": 70140 }, { "epoch": 2.62, "learning_rate": 1.6886207321919508e-06, "loss": 0.1387, "step": 70170 }, { "epoch": 2.62, "learning_rate": 1.6882597229452405e-06, "loss": 0.1381, "step": 70200 }, { "epoch": 2.62, "learning_rate": 1.6878989451394446e-06, "loss": 0.1406, "step": 70230 }, { "epoch": 2.62, "learning_rate": 1.6875383985273766e-06, "loss": 0.1322, "step": 70260 }, { "epoch": 2.62, "learning_rate": 1.6871780828622196e-06, "loss": 0.1357, "step": 70290 }, { "epoch": 2.62, "learning_rate": 1.6868179978975248e-06, "loss": 0.129, "step": 70320 }, { "epoch": 2.62, "learning_rate": 1.6864581433872124e-06, "loss": 0.136, "step": 70350 }, { "epoch": 2.62, "learning_rate": 1.6860985190855678e-06, "loss": 0.1737, "step": 70380 }, { "epoch": 2.63, "learning_rate": 1.6857391247472457e-06, "loss": 0.1339, "step": 70410 }, { "epoch": 2.63, "learning_rate": 1.6853799601272647e-06, "loss": 0.1383, "step": 70440 }, { "epoch": 2.63, "learning_rate": 1.6850210249810098e-06, "loss": 0.1398, "step": 70470 }, { "epoch": 2.63, "learning_rate": 1.68466231906423e-06, "loss": 0.133, "step": 70500 }, { "epoch": 2.63, "learning_rate": 1.6843038421330383e-06, "loss": 0.1246, "step": 70530 }, { "epoch": 2.63, "learning_rate": 1.68394559394391e-06, "loss": 0.1165, "step": 70560 }, { "epoch": 2.63, "learning_rate": 1.6835875742536848e-06, "loss": 0.1417, "step": 70590 }, { "epoch": 2.63, "learning_rate": 1.6832297828195618e-06, "loss": 0.1204, "step": 70620 }, { "epoch": 2.64, "learning_rate": 1.6828722193991028e-06, "loss": 0.1198, "step": 70650 }, { "epoch": 2.64, "learning_rate": 1.6825148837502292e-06, "loss": 0.1255, "step": 70680 }, { "epoch": 2.64, "learning_rate": 1.6821577756312225e-06, "loss": 0.108, "step": 70710 }, { "epoch": 2.64, "learning_rate": 1.6818008948007225e-06, "loss": 0.1433, "step": 70740 }, { "epoch": 2.64, "learning_rate": 1.681444241017728e-06, "loss": 0.1285, "step": 70770 }, { "epoch": 2.64, "learning_rate": 1.6810878140415945e-06, "loss": 0.148, "step": 70800 }, { "epoch": 2.64, "learning_rate": 1.680731613632036e-06, "loss": 0.1284, "step": 70830 }, { "epoch": 2.64, "learning_rate": 1.6803756395491212e-06, "loss": 0.1459, "step": 70860 }, { "epoch": 2.64, "learning_rate": 1.6800198915532748e-06, "loss": 0.132, "step": 70890 }, { "epoch": 2.65, "learning_rate": 1.679664369405277e-06, "loss": 0.137, "step": 70920 }, { "epoch": 2.65, "learning_rate": 1.6793090728662615e-06, "loss": 0.1349, "step": 70950 }, { "epoch": 2.65, "learning_rate": 1.6789540016977157e-06, "loss": 0.1346, "step": 70980 }, { "epoch": 2.65, "learning_rate": 1.67859915566148e-06, "loss": 0.1209, "step": 71010 }, { "epoch": 2.65, "learning_rate": 1.678244534519747e-06, "loss": 0.1226, "step": 71040 }, { "epoch": 2.65, "learning_rate": 1.6778901380350609e-06, "loss": 0.1397, "step": 71070 }, { "epoch": 2.65, "learning_rate": 1.6775359659703164e-06, "loss": 0.1666, "step": 71100 }, { "epoch": 2.65, "learning_rate": 1.677182018088759e-06, "loss": 0.1219, "step": 71130 }, { "epoch": 2.65, "learning_rate": 1.6768282941539827e-06, "loss": 0.1102, "step": 71160 }, { "epoch": 2.66, "learning_rate": 1.6764747939299313e-06, "loss": 0.1129, "step": 71190 }, { "epoch": 2.66, "learning_rate": 1.676121517180897e-06, "loss": 0.1192, "step": 71220 }, { "epoch": 2.66, "learning_rate": 1.6757684636715186e-06, "loss": 0.1267, "step": 71250 }, { "epoch": 2.66, "learning_rate": 1.675415633166782e-06, "loss": 0.1253, "step": 71280 }, { "epoch": 2.66, "learning_rate": 1.6750630254320201e-06, "loss": 0.1526, "step": 71310 }, { "epoch": 2.66, "learning_rate": 1.6747106402329109e-06, "loss": 0.1352, "step": 71340 }, { "epoch": 2.66, "learning_rate": 1.6743584773354766e-06, "loss": 0.124, "step": 71370 }, { "epoch": 2.66, "learning_rate": 1.6740065365060848e-06, "loss": 0.1198, "step": 71400 }, { "epoch": 2.66, "learning_rate": 1.6736548175114462e-06, "loss": 0.119, "step": 71430 }, { "epoch": 2.67, "learning_rate": 1.6733033201186144e-06, "loss": 0.1203, "step": 71460 }, { "epoch": 2.67, "learning_rate": 1.6729520440949857e-06, "loss": 0.1236, "step": 71490 }, { "epoch": 2.67, "learning_rate": 1.6726009892082972e-06, "loss": 0.137, "step": 71520 }, { "epoch": 2.67, "learning_rate": 1.672250155226628e-06, "loss": 0.1126, "step": 71550 }, { "epoch": 2.67, "learning_rate": 1.6718995419183973e-06, "loss": 0.1264, "step": 71580 }, { "epoch": 2.67, "learning_rate": 1.6715491490523637e-06, "loss": 0.1052, "step": 71610 }, { "epoch": 2.67, "learning_rate": 1.6711989763976253e-06, "loss": 0.1434, "step": 71640 }, { "epoch": 2.67, "learning_rate": 1.6708606852710505e-06, "loss": 0.164, "step": 71670 }, { "epoch": 2.67, "learning_rate": 1.6705109450262383e-06, "loss": 0.1292, "step": 71700 }, { "epoch": 2.68, "learning_rate": 1.6701614243097006e-06, "loss": 0.1275, "step": 71730 }, { "epoch": 2.68, "learning_rate": 1.6698121228918743e-06, "loss": 0.1123, "step": 71760 }, { "epoch": 2.68, "learning_rate": 1.6694630405435322e-06, "loss": 0.1452, "step": 71790 }, { "epoch": 2.68, "learning_rate": 1.6691141770357822e-06, "loss": 0.1189, "step": 71820 }, { "epoch": 2.68, "learning_rate": 1.6687655321400668e-06, "loss": 0.1196, "step": 71850 }, { "epoch": 2.68, "learning_rate": 1.668417105628163e-06, "loss": 0.1111, "step": 71880 }, { "epoch": 2.68, "learning_rate": 1.6680688972721806e-06, "loss": 0.1195, "step": 71910 }, { "epoch": 2.68, "learning_rate": 1.667720906844563e-06, "loss": 0.1359, "step": 71940 }, { "epoch": 2.68, "learning_rate": 1.6673731341180851e-06, "loss": 0.1419, "step": 71970 }, { "epoch": 2.69, "learning_rate": 1.6670255788658535e-06, "loss": 0.1338, "step": 72000 }, { "epoch": 2.69, "learning_rate": 1.6666782408613056e-06, "loss": 0.1178, "step": 72030 }, { "epoch": 2.69, "learning_rate": 1.6663311198782094e-06, "loss": 0.1335, "step": 72060 }, { "epoch": 2.69, "learning_rate": 1.6659842156906629e-06, "loss": 0.1428, "step": 72090 }, { "epoch": 2.69, "learning_rate": 1.665637528073092e-06, "loss": 0.1141, "step": 72120 }, { "epoch": 2.69, "learning_rate": 1.6652910568002523e-06, "loss": 0.1324, "step": 72150 }, { "epoch": 2.69, "learning_rate": 1.6649448016472265e-06, "loss": 0.1105, "step": 72180 }, { "epoch": 2.69, "learning_rate": 1.6645987623894243e-06, "loss": 0.1171, "step": 72210 }, { "epoch": 2.69, "learning_rate": 1.664252938802584e-06, "loss": 0.1304, "step": 72240 }, { "epoch": 2.7, "learning_rate": 1.6639188474653653e-06, "loss": 0.1418, "step": 72270 }, { "epoch": 2.7, "learning_rate": 1.6635734473784454e-06, "loss": 0.1655, "step": 72300 }, { "epoch": 2.7, "learning_rate": 1.6632282622990878e-06, "loss": 0.1287, "step": 72330 }, { "epoch": 2.7, "learning_rate": 1.662883292004319e-06, "loss": 0.1168, "step": 72360 }, { "epoch": 2.7, "learning_rate": 1.6625385362714889e-06, "loss": 0.1226, "step": 72390 }, { "epoch": 2.7, "learning_rate": 1.6621939948782702e-06, "loss": 0.114, "step": 72420 }, { "epoch": 2.7, "learning_rate": 1.6618496676026586e-06, "loss": 0.1378, "step": 72450 }, { "epoch": 2.7, "learning_rate": 1.661505554222971e-06, "loss": 0.1486, "step": 72480 }, { "epoch": 2.7, "learning_rate": 1.6611616545178456e-06, "loss": 0.166, "step": 72510 }, { "epoch": 2.71, "learning_rate": 1.6608179682662409e-06, "loss": 0.127, "step": 72540 }, { "epoch": 2.71, "learning_rate": 1.6604744952474365e-06, "loss": 0.132, "step": 72570 }, { "epoch": 2.71, "learning_rate": 1.6601312352410301e-06, "loss": 0.1409, "step": 72600 }, { "epoch": 2.71, "learning_rate": 1.6597881880269391e-06, "loss": 0.1251, "step": 72630 }, { "epoch": 2.71, "learning_rate": 1.6594453533853988e-06, "loss": 0.1233, "step": 72660 }, { "epoch": 2.71, "learning_rate": 1.6591027310969617e-06, "loss": 0.1673, "step": 72690 }, { "epoch": 2.71, "learning_rate": 1.6587603209424982e-06, "loss": 0.1573, "step": 72720 }, { "epoch": 2.71, "learning_rate": 1.6584181227031944e-06, "loss": 0.1496, "step": 72750 }, { "epoch": 2.71, "learning_rate": 1.6580761361605527e-06, "loss": 0.1375, "step": 72780 }, { "epoch": 2.72, "learning_rate": 1.657734361096391e-06, "loss": 0.1405, "step": 72810 }, { "epoch": 2.72, "learning_rate": 1.65739279729284e-06, "loss": 0.1278, "step": 72840 }, { "epoch": 2.72, "learning_rate": 1.6570514445323473e-06, "loss": 0.1159, "step": 72870 }, { "epoch": 2.72, "learning_rate": 1.6567103025976724e-06, "loss": 0.1367, "step": 72900 }, { "epoch": 2.72, "learning_rate": 1.6563693712718881e-06, "loss": 0.1064, "step": 72930 }, { "epoch": 2.72, "learning_rate": 1.6560286503383794e-06, "loss": 0.1339, "step": 72960 }, { "epoch": 2.72, "learning_rate": 1.6556881395808428e-06, "loss": 0.1459, "step": 72990 }, { "epoch": 2.72, "learning_rate": 1.6553478387832867e-06, "loss": 0.1296, "step": 73020 }, { "epoch": 2.72, "learning_rate": 1.6550077477300302e-06, "loss": 0.1061, "step": 73050 }, { "epoch": 2.73, "learning_rate": 1.6546678662057017e-06, "loss": 0.1268, "step": 73080 }, { "epoch": 2.73, "learning_rate": 1.654328193995239e-06, "loss": 0.1635, "step": 73110 }, { "epoch": 2.73, "learning_rate": 1.65398873088389e-06, "loss": 0.1153, "step": 73140 }, { "epoch": 2.73, "learning_rate": 1.65364947665721e-06, "loss": 0.1259, "step": 73170 }, { "epoch": 2.73, "learning_rate": 1.653310431101062e-06, "loss": 0.1366, "step": 73200 }, { "epoch": 2.73, "learning_rate": 1.652971594001617e-06, "loss": 0.1298, "step": 73230 }, { "epoch": 2.73, "learning_rate": 1.6526329651453512e-06, "loss": 0.125, "step": 73260 }, { "epoch": 2.73, "learning_rate": 1.6522945443190482e-06, "loss": 0.1218, "step": 73290 }, { "epoch": 2.73, "learning_rate": 1.6519563313097975e-06, "loss": 0.1287, "step": 73320 }, { "epoch": 2.74, "learning_rate": 1.6516183259049915e-06, "loss": 0.1331, "step": 73350 }, { "epoch": 2.74, "learning_rate": 1.6512805278923288e-06, "loss": 0.1167, "step": 73380 }, { "epoch": 2.74, "learning_rate": 1.6509429370598115e-06, "loss": 0.1302, "step": 73410 }, { "epoch": 2.74, "learning_rate": 1.6506055531957439e-06, "loss": 0.1303, "step": 73440 }, { "epoch": 2.74, "learning_rate": 1.6502683760887347e-06, "loss": 0.1249, "step": 73470 }, { "epoch": 2.74, "learning_rate": 1.6499314055276932e-06, "loss": 0.1246, "step": 73500 }, { "epoch": 2.74, "learning_rate": 1.6495946413018313e-06, "loss": 0.1197, "step": 73530 }, { "epoch": 2.74, "learning_rate": 1.6492580832006616e-06, "loss": 0.1325, "step": 73560 }, { "epoch": 2.74, "learning_rate": 1.6489217310139972e-06, "loss": 0.1582, "step": 73590 }, { "epoch": 2.75, "learning_rate": 1.6485855845319507e-06, "loss": 0.1284, "step": 73620 }, { "epoch": 2.75, "learning_rate": 1.6482496435449352e-06, "loss": 0.1203, "step": 73650 }, { "epoch": 2.75, "learning_rate": 1.6479139078436612e-06, "loss": 0.1214, "step": 73680 }, { "epoch": 2.75, "learning_rate": 1.6475783772191392e-06, "loss": 0.116, "step": 73710 }, { "epoch": 2.75, "learning_rate": 1.6472430514626755e-06, "loss": 0.1251, "step": 73740 }, { "epoch": 2.75, "learning_rate": 1.6469079303658753e-06, "loss": 0.141, "step": 73770 }, { "epoch": 2.75, "learning_rate": 1.6465730137206392e-06, "loss": 0.1403, "step": 73800 }, { "epoch": 2.75, "learning_rate": 1.6462383013191651e-06, "loss": 0.145, "step": 73830 }, { "epoch": 2.75, "learning_rate": 1.645903792953945e-06, "loss": 0.1302, "step": 73860 }, { "epoch": 2.76, "learning_rate": 1.645569488417768e-06, "loss": 0.1324, "step": 73890 }, { "epoch": 2.76, "learning_rate": 1.6452353875037153e-06, "loss": 0.1237, "step": 73920 }, { "epoch": 2.76, "learning_rate": 1.6449014900051635e-06, "loss": 0.1183, "step": 73950 }, { "epoch": 2.76, "learning_rate": 1.644567795715783e-06, "loss": 0.1438, "step": 73980 }, { "epoch": 2.76, "learning_rate": 1.6442343044295355e-06, "loss": 0.0983, "step": 74010 }, { "epoch": 2.76, "learning_rate": 1.6439010159406765e-06, "loss": 0.125, "step": 74040 }, { "epoch": 2.76, "learning_rate": 1.6435679300437526e-06, "loss": 0.1353, "step": 74070 }, { "epoch": 2.76, "learning_rate": 1.6432350465336022e-06, "loss": 0.1112, "step": 74100 }, { "epoch": 2.76, "learning_rate": 1.6429023652053536e-06, "loss": 0.1583, "step": 74130 }, { "epoch": 2.77, "learning_rate": 1.642569885854426e-06, "loss": 0.1328, "step": 74160 }, { "epoch": 2.77, "learning_rate": 1.6422376082765283e-06, "loss": 0.129, "step": 74190 }, { "epoch": 2.77, "learning_rate": 1.641905532267658e-06, "loss": 0.136, "step": 74220 }, { "epoch": 2.77, "learning_rate": 1.6415736576241027e-06, "loss": 0.1275, "step": 74250 }, { "epoch": 2.77, "learning_rate": 1.6412419841424357e-06, "loss": 0.1138, "step": 74280 }, { "epoch": 2.77, "learning_rate": 1.6409105116195198e-06, "loss": 0.1318, "step": 74310 }, { "epoch": 2.77, "learning_rate": 1.6405792398525046e-06, "loss": 0.134, "step": 74340 }, { "epoch": 2.77, "learning_rate": 1.6402481686388257e-06, "loss": 0.1285, "step": 74370 }, { "epoch": 2.77, "learning_rate": 1.6399172977762055e-06, "loss": 0.1327, "step": 74400 }, { "epoch": 2.78, "learning_rate": 1.6395866270626506e-06, "loss": 0.1371, "step": 74430 }, { "epoch": 2.78, "learning_rate": 1.6392561562964538e-06, "loss": 0.1307, "step": 74460 }, { "epoch": 2.78, "learning_rate": 1.638925885276192e-06, "loss": 0.1189, "step": 74490 }, { "epoch": 2.78, "learning_rate": 1.6385958138007257e-06, "loss": 0.1392, "step": 74520 }, { "epoch": 2.78, "learning_rate": 1.6382659416692e-06, "loss": 0.1304, "step": 74550 }, { "epoch": 2.78, "learning_rate": 1.6379362686810413e-06, "loss": 0.1366, "step": 74580 }, { "epoch": 2.78, "learning_rate": 1.6376067946359594e-06, "loss": 0.122, "step": 74610 }, { "epoch": 2.78, "learning_rate": 1.637277519333946e-06, "loss": 0.117, "step": 74640 }, { "epoch": 2.78, "learning_rate": 1.6369484425752744e-06, "loss": 0.1312, "step": 74670 }, { "epoch": 2.79, "learning_rate": 1.6366195641604977e-06, "loss": 0.1446, "step": 74700 }, { "epoch": 2.79, "learning_rate": 1.6362908838904503e-06, "loss": 0.1355, "step": 74730 }, { "epoch": 2.79, "learning_rate": 1.6359624015662468e-06, "loss": 0.112, "step": 74760 }, { "epoch": 2.79, "learning_rate": 1.63563411698928e-06, "loss": 0.1381, "step": 74790 }, { "epoch": 2.79, "learning_rate": 1.6353060299612225e-06, "loss": 0.1352, "step": 74820 }, { "epoch": 2.79, "learning_rate": 1.6349781402840249e-06, "loss": 0.1279, "step": 74850 }, { "epoch": 2.79, "learning_rate": 1.634650447759916e-06, "loss": 0.1435, "step": 74880 }, { "epoch": 2.79, "learning_rate": 1.6343229521914015e-06, "loss": 0.1055, "step": 74910 }, { "epoch": 2.8, "learning_rate": 1.6339956533812637e-06, "loss": 0.1178, "step": 74940 }, { "epoch": 2.8, "learning_rate": 1.6336685511325629e-06, "loss": 0.1602, "step": 74970 }, { "epoch": 2.8, "learning_rate": 1.633341645248633e-06, "loss": 0.1273, "step": 75000 }, { "epoch": 2.8, "learning_rate": 1.6330149355330848e-06, "loss": 0.1194, "step": 75030 }, { "epoch": 2.8, "learning_rate": 1.6326884217898035e-06, "loss": 0.1113, "step": 75060 }, { "epoch": 2.8, "learning_rate": 1.6323621038229485e-06, "loss": 0.1342, "step": 75090 }, { "epoch": 2.8, "learning_rate": 1.6320359814369536e-06, "loss": 0.1232, "step": 75120 }, { "epoch": 2.8, "learning_rate": 1.6317100544365252e-06, "loss": 0.1481, "step": 75150 }, { "epoch": 2.8, "learning_rate": 1.6313843226266435e-06, "loss": 0.1225, "step": 75180 }, { "epoch": 2.81, "learning_rate": 1.6310587858125606e-06, "loss": 0.1109, "step": 75210 }, { "epoch": 2.81, "learning_rate": 1.6307334437998005e-06, "loss": 0.128, "step": 75240 }, { "epoch": 2.81, "learning_rate": 1.6304082963941586e-06, "loss": 0.1299, "step": 75270 }, { "epoch": 2.81, "learning_rate": 1.6300833434017019e-06, "loss": 0.1102, "step": 75300 }, { "epoch": 2.81, "learning_rate": 1.6297585846287667e-06, "loss": 0.1546, "step": 75330 }, { "epoch": 2.81, "learning_rate": 1.62943401988196e-06, "loss": 0.1139, "step": 75360 }, { "epoch": 2.81, "learning_rate": 1.6291096489681585e-06, "loss": 0.1585, "step": 75390 }, { "epoch": 2.81, "learning_rate": 1.6287854716945076e-06, "loss": 0.1309, "step": 75420 }, { "epoch": 2.81, "learning_rate": 1.6284614878684206e-06, "loss": 0.1177, "step": 75450 }, { "epoch": 2.82, "learning_rate": 1.6281376972975807e-06, "loss": 0.1354, "step": 75480 }, { "epoch": 2.82, "learning_rate": 1.627814099789936e-06, "loss": 0.1256, "step": 75510 }, { "epoch": 2.82, "learning_rate": 1.6274906951537042e-06, "loss": 0.1198, "step": 75540 }, { "epoch": 2.82, "learning_rate": 1.6271674831973684e-06, "loss": 0.1113, "step": 75570 }, { "epoch": 2.82, "learning_rate": 1.6268444637296777e-06, "loss": 0.1301, "step": 75600 }, { "epoch": 2.82, "learning_rate": 1.626521636559647e-06, "loss": 0.13, "step": 75630 }, { "epoch": 2.82, "learning_rate": 1.6261990014965568e-06, "loss": 0.1195, "step": 75660 }, { "epoch": 2.82, "learning_rate": 1.625876558349952e-06, "loss": 0.1487, "step": 75690 }, { "epoch": 2.82, "learning_rate": 1.6255543069296417e-06, "loss": 0.1401, "step": 75720 }, { "epoch": 2.83, "learning_rate": 1.6252322470456988e-06, "loss": 0.1356, "step": 75750 }, { "epoch": 2.83, "learning_rate": 1.62491037850846e-06, "loss": 0.1466, "step": 75780 }, { "epoch": 2.83, "learning_rate": 1.6245887011285238e-06, "loss": 0.14, "step": 75810 }, { "epoch": 2.83, "learning_rate": 1.6242672147167518e-06, "loss": 0.1438, "step": 75840 }, { "epoch": 2.83, "learning_rate": 1.6239459190842678e-06, "loss": 0.1164, "step": 75870 }, { "epoch": 2.83, "learning_rate": 1.6236248140424554e-06, "loss": 0.1274, "step": 75900 }, { "epoch": 2.83, "learning_rate": 1.6233038994029616e-06, "loss": 0.1271, "step": 75930 }, { "epoch": 2.83, "learning_rate": 1.6229831749776917e-06, "loss": 0.1332, "step": 75960 }, { "epoch": 2.83, "learning_rate": 1.6226626405788123e-06, "loss": 0.1383, "step": 75990 }, { "epoch": 2.84, "learning_rate": 1.6223422960187493e-06, "loss": 0.1406, "step": 76020 }, { "epoch": 2.84, "learning_rate": 1.6220221411101878e-06, "loss": 0.1296, "step": 76050 }, { "epoch": 2.84, "learning_rate": 1.621702175666071e-06, "loss": 0.1257, "step": 76080 }, { "epoch": 2.84, "learning_rate": 1.6213823994996006e-06, "loss": 0.1331, "step": 76110 }, { "epoch": 2.84, "learning_rate": 1.6210628124242367e-06, "loss": 0.1133, "step": 76140 }, { "epoch": 2.84, "learning_rate": 1.6207434142536954e-06, "loss": 0.1475, "step": 76170 }, { "epoch": 2.84, "learning_rate": 1.6204242048019508e-06, "loss": 0.1178, "step": 76200 }, { "epoch": 2.84, "learning_rate": 1.620105183883233e-06, "loss": 0.146, "step": 76230 }, { "epoch": 2.84, "learning_rate": 1.6197863513120277e-06, "loss": 0.1307, "step": 76260 }, { "epoch": 2.85, "learning_rate": 1.6194677069030761e-06, "loss": 0.1189, "step": 76290 }, { "epoch": 2.85, "learning_rate": 1.6191492504713747e-06, "loss": 0.1067, "step": 76320 }, { "epoch": 2.85, "learning_rate": 1.6188415877632176e-06, "loss": 0.1414, "step": 76350 }, { "epoch": 2.85, "learning_rate": 1.6185235004813931e-06, "loss": 0.1605, "step": 76380 }, { "epoch": 2.85, "learning_rate": 1.6182056006294704e-06, "loss": 0.1265, "step": 76410 }, { "epoch": 2.85, "learning_rate": 1.6178878880234536e-06, "loss": 0.1211, "step": 76440 }, { "epoch": 2.85, "learning_rate": 1.6175703624795988e-06, "loss": 0.1318, "step": 76470 }, { "epoch": 2.85, "learning_rate": 1.617253023814414e-06, "loss": 0.1321, "step": 76500 }, { "epoch": 2.85, "learning_rate": 1.6169358718446608e-06, "loss": 0.1517, "step": 76530 }, { "epoch": 2.86, "learning_rate": 1.6166189063873493e-06, "loss": 0.1042, "step": 76560 }, { "epoch": 2.86, "learning_rate": 1.6163021272597446e-06, "loss": 0.1061, "step": 76590 }, { "epoch": 2.86, "learning_rate": 1.6159855342793588e-06, "loss": 0.1301, "step": 76620 }, { "epoch": 2.86, "learning_rate": 1.6156691272639551e-06, "loss": 0.112, "step": 76650 }, { "epoch": 2.86, "learning_rate": 1.6153529060315482e-06, "loss": 0.1173, "step": 76680 }, { "epoch": 2.86, "learning_rate": 1.6150368704003994e-06, "loss": 0.1286, "step": 76710 }, { "epoch": 2.86, "learning_rate": 1.6147210201890205e-06, "loss": 0.1261, "step": 76740 }, { "epoch": 2.86, "learning_rate": 1.6144053552161706e-06, "loss": 0.1122, "step": 76770 }, { "epoch": 2.86, "learning_rate": 1.614089875300858e-06, "loss": 0.1155, "step": 76800 }, { "epoch": 2.87, "learning_rate": 1.613774580262337e-06, "loss": 0.1299, "step": 76830 }, { "epoch": 2.87, "learning_rate": 1.61345946992011e-06, "loss": 0.1319, "step": 76860 }, { "epoch": 2.87, "learning_rate": 1.613144544093926e-06, "loss": 0.141, "step": 76890 }, { "epoch": 2.87, "learning_rate": 1.6128298026037798e-06, "loss": 0.1458, "step": 76920 }, { "epoch": 2.87, "learning_rate": 1.6125152452699114e-06, "loss": 0.1293, "step": 76950 }, { "epoch": 2.87, "learning_rate": 1.6122008719128075e-06, "loss": 0.1072, "step": 76980 }, { "epoch": 2.87, "learning_rate": 1.611886682353198e-06, "loss": 0.1663, "step": 77010 }, { "epoch": 2.87, "learning_rate": 1.6115726764120597e-06, "loss": 0.1185, "step": 77040 }, { "epoch": 2.87, "learning_rate": 1.6112588539106105e-06, "loss": 0.1152, "step": 77070 }, { "epoch": 2.88, "learning_rate": 1.6109452146703141e-06, "loss": 0.1354, "step": 77100 }, { "epoch": 2.88, "learning_rate": 1.6106317585128764e-06, "loss": 0.1285, "step": 77130 }, { "epoch": 2.88, "learning_rate": 1.6103184852602463e-06, "loss": 0.1265, "step": 77160 }, { "epoch": 2.88, "learning_rate": 1.6100053947346149e-06, "loss": 0.1186, "step": 77190 }, { "epoch": 2.88, "learning_rate": 1.6096924867584152e-06, "loss": 0.1134, "step": 77220 }, { "epoch": 2.88, "learning_rate": 1.6093797611543222e-06, "loss": 0.1244, "step": 77250 }, { "epoch": 2.88, "learning_rate": 1.6090672177452509e-06, "loss": 0.1286, "step": 77280 }, { "epoch": 2.88, "learning_rate": 1.6087548563543582e-06, "loss": 0.1144, "step": 77310 }, { "epoch": 2.88, "learning_rate": 1.6084426768050402e-06, "loss": 0.1276, "step": 77340 }, { "epoch": 2.89, "learning_rate": 1.608130678920933e-06, "loss": 0.1158, "step": 77370 }, { "epoch": 2.89, "learning_rate": 1.6078188625259127e-06, "loss": 0.148, "step": 77400 }, { "epoch": 2.89, "learning_rate": 1.6075072274440942e-06, "loss": 0.1105, "step": 77430 }, { "epoch": 2.89, "learning_rate": 1.6071957734998298e-06, "loss": 0.1044, "step": 77460 }, { "epoch": 2.89, "learning_rate": 1.6068845005177113e-06, "loss": 0.1486, "step": 77490 }, { "epoch": 2.89, "learning_rate": 1.6065734083225676e-06, "loss": 0.1365, "step": 77520 }, { "epoch": 2.89, "learning_rate": 1.606262496739465e-06, "loss": 0.1277, "step": 77550 }, { "epoch": 2.89, "learning_rate": 1.6059517655937066e-06, "loss": 0.1579, "step": 77580 }, { "epoch": 2.89, "learning_rate": 1.605641214710832e-06, "loss": 0.1188, "step": 77610 }, { "epoch": 2.9, "learning_rate": 1.6053308439166174e-06, "loss": 0.117, "step": 77640 }, { "epoch": 2.9, "learning_rate": 1.6050206530370737e-06, "loss": 0.1237, "step": 77670 }, { "epoch": 2.9, "learning_rate": 1.6047106418984479e-06, "loss": 0.1474, "step": 77700 }, { "epoch": 2.9, "learning_rate": 1.604400810327221e-06, "loss": 0.1049, "step": 77730 }, { "epoch": 2.9, "learning_rate": 1.604091158150109e-06, "loss": 0.1182, "step": 77760 }, { "epoch": 2.9, "learning_rate": 1.6037816851940622e-06, "loss": 0.1306, "step": 77790 }, { "epoch": 2.9, "learning_rate": 1.6034723912862638e-06, "loss": 0.1245, "step": 77820 }, { "epoch": 2.9, "learning_rate": 1.6031632762541306e-06, "loss": 0.1207, "step": 77850 }, { "epoch": 2.9, "learning_rate": 1.602854339925312e-06, "loss": 0.1281, "step": 77880 }, { "epoch": 2.91, "learning_rate": 1.6025455821276895e-06, "loss": 0.1307, "step": 77910 }, { "epoch": 2.91, "learning_rate": 1.602237002689378e-06, "loss": 0.1328, "step": 77940 }, { "epoch": 2.91, "learning_rate": 1.6019286014387222e-06, "loss": 0.1234, "step": 77970 }, { "epoch": 2.91, "learning_rate": 1.6016203782042993e-06, "loss": 0.1513, "step": 78000 }, { "epoch": 2.91, "learning_rate": 1.6013123328149165e-06, "loss": 0.1261, "step": 78030 }, { "epoch": 2.91, "learning_rate": 1.6010044650996121e-06, "loss": 0.1743, "step": 78060 }, { "epoch": 2.91, "learning_rate": 1.6006967748876534e-06, "loss": 0.1145, "step": 78090 }, { "epoch": 2.91, "learning_rate": 1.6003892620085383e-06, "loss": 0.1329, "step": 78120 }, { "epoch": 2.91, "learning_rate": 1.600081926291993e-06, "loss": 0.1111, "step": 78150 }, { "epoch": 2.92, "learning_rate": 1.5997747675679737e-06, "loss": 0.1281, "step": 78180 }, { "epoch": 2.92, "learning_rate": 1.5994677856666639e-06, "loss": 0.1112, "step": 78210 }, { "epoch": 2.92, "learning_rate": 1.5991609804184753e-06, "loss": 0.1142, "step": 78240 }, { "epoch": 2.92, "learning_rate": 1.598854351654048e-06, "loss": 0.149, "step": 78270 }, { "epoch": 2.92, "learning_rate": 1.5985478992042483e-06, "loss": 0.1226, "step": 78300 }, { "epoch": 2.92, "learning_rate": 1.5982416229001699e-06, "loss": 0.1297, "step": 78330 }, { "epoch": 2.92, "learning_rate": 1.597935522573133e-06, "loss": 0.1264, "step": 78360 }, { "epoch": 2.92, "learning_rate": 1.5976295980546836e-06, "loss": 0.144, "step": 78390 }, { "epoch": 2.92, "learning_rate": 1.5973238491765936e-06, "loss": 0.1262, "step": 78420 }, { "epoch": 2.93, "learning_rate": 1.5970182757708593e-06, "loss": 0.1319, "step": 78450 }, { "epoch": 2.93, "learning_rate": 1.5967128776697042e-06, "loss": 0.1319, "step": 78480 }, { "epoch": 2.93, "learning_rate": 1.5964076547055735e-06, "loss": 0.1383, "step": 78510 }, { "epoch": 2.93, "learning_rate": 1.5961026067111377e-06, "loss": 0.1365, "step": 78540 }, { "epoch": 2.93, "learning_rate": 1.5957977335192917e-06, "loss": 0.1186, "step": 78570 }, { "epoch": 2.93, "learning_rate": 1.595493034963153e-06, "loss": 0.1287, "step": 78600 }, { "epoch": 2.93, "learning_rate": 1.5951885108760623e-06, "loss": 0.1259, "step": 78630 }, { "epoch": 2.93, "learning_rate": 1.5948841610915825e-06, "loss": 0.1007, "step": 78660 }, { "epoch": 2.93, "learning_rate": 1.5945799854434987e-06, "loss": 0.1322, "step": 78690 }, { "epoch": 2.94, "learning_rate": 1.594275983765819e-06, "loss": 0.1383, "step": 78720 }, { "epoch": 2.94, "learning_rate": 1.5939721558927715e-06, "loss": 0.1543, "step": 78750 }, { "epoch": 2.94, "learning_rate": 1.5936685016588058e-06, "loss": 0.1241, "step": 78780 }, { "epoch": 2.94, "learning_rate": 1.5933650208985923e-06, "loss": 0.1312, "step": 78810 }, { "epoch": 2.94, "learning_rate": 1.5930617134470222e-06, "loss": 0.1462, "step": 78840 }, { "epoch": 2.94, "learning_rate": 1.5927686808283344e-06, "loss": 0.1193, "step": 78870 }, { "epoch": 2.94, "learning_rate": 1.5924657137362828e-06, "loss": 0.1061, "step": 78900 }, { "epoch": 2.94, "learning_rate": 1.5921629194643422e-06, "loss": 0.126, "step": 78930 }, { "epoch": 2.94, "learning_rate": 1.591860297848274e-06, "loss": 0.1174, "step": 78960 }, { "epoch": 2.95, "learning_rate": 1.5915578487240574e-06, "loss": 0.1343, "step": 78990 }, { "epoch": 2.95, "learning_rate": 1.591255571927889e-06, "loss": 0.1251, "step": 79020 }, { "epoch": 2.95, "learning_rate": 1.5909534672961845e-06, "loss": 0.104, "step": 79050 }, { "epoch": 2.95, "learning_rate": 1.590651534665576e-06, "loss": 0.1217, "step": 79080 }, { "epoch": 2.95, "learning_rate": 1.5903497738729133e-06, "loss": 0.1201, "step": 79110 }, { "epoch": 2.95, "learning_rate": 1.5900481847552615e-06, "loss": 0.1156, "step": 79140 }, { "epoch": 2.95, "learning_rate": 1.5897467671499027e-06, "loss": 0.1092, "step": 79170 }, { "epoch": 2.95, "learning_rate": 1.5894455208943354e-06, "loss": 0.1086, "step": 79200 }, { "epoch": 2.96, "learning_rate": 1.5891444458262723e-06, "loss": 0.1487, "step": 79230 }, { "epoch": 2.96, "learning_rate": 1.5888435417836425e-06, "loss": 0.1268, "step": 79260 }, { "epoch": 2.96, "learning_rate": 1.588542808604589e-06, "loss": 0.1304, "step": 79290 }, { "epoch": 2.96, "learning_rate": 1.5882422461274692e-06, "loss": 0.12, "step": 79320 }, { "epoch": 2.96, "learning_rate": 1.5879418541908546e-06, "loss": 0.1192, "step": 79350 }, { "epoch": 2.96, "learning_rate": 1.587641632633531e-06, "loss": 0.1381, "step": 79380 }, { "epoch": 2.96, "learning_rate": 1.587341581294496e-06, "loss": 0.1398, "step": 79410 }, { "epoch": 2.96, "learning_rate": 1.5870417000129618e-06, "loss": 0.1147, "step": 79440 }, { "epoch": 2.96, "learning_rate": 1.5867419886283522e-06, "loss": 0.1287, "step": 79470 }, { "epoch": 2.97, "learning_rate": 1.5864424469803034e-06, "loss": 0.1311, "step": 79500 }, { "epoch": 2.97, "learning_rate": 1.5861430749086632e-06, "loss": 0.1002, "step": 79530 }, { "epoch": 2.97, "learning_rate": 1.5858438722534912e-06, "loss": 0.121, "step": 79560 }, { "epoch": 2.97, "learning_rate": 1.5855448388550579e-06, "loss": 0.1285, "step": 79590 }, { "epoch": 2.97, "learning_rate": 1.5852459745538446e-06, "loss": 0.124, "step": 79620 }, { "epoch": 2.97, "learning_rate": 1.5849472791905432e-06, "loss": 0.1084, "step": 79650 }, { "epoch": 2.97, "learning_rate": 1.584648752606055e-06, "loss": 0.1275, "step": 79680 }, { "epoch": 2.97, "learning_rate": 1.5843503946414926e-06, "loss": 0.1033, "step": 79710 }, { "epoch": 2.97, "learning_rate": 1.5840522051381754e-06, "loss": 0.1243, "step": 79740 }, { "epoch": 2.98, "learning_rate": 1.5837541839376345e-06, "loss": 0.1235, "step": 79770 }, { "epoch": 2.98, "learning_rate": 1.583456330881607e-06, "loss": 0.1354, "step": 79800 }, { "epoch": 2.98, "learning_rate": 1.5831586458120407e-06, "loss": 0.133, "step": 79830 }, { "epoch": 2.98, "learning_rate": 1.5828611285710897e-06, "loss": 0.1308, "step": 79860 }, { "epoch": 2.98, "learning_rate": 1.5825637790011156e-06, "loss": 0.1371, "step": 79890 }, { "epoch": 2.98, "learning_rate": 1.5822665969446885e-06, "loss": 0.1257, "step": 79920 }, { "epoch": 2.98, "learning_rate": 1.5819695822445844e-06, "loss": 0.1381, "step": 79950 }, { "epoch": 2.98, "learning_rate": 1.581672734743786e-06, "loss": 0.1247, "step": 79980 }, { "epoch": 2.98, "learning_rate": 1.5813760542854823e-06, "loss": 0.1273, "step": 80010 }, { "epoch": 2.99, "learning_rate": 1.581079540713068e-06, "loss": 0.1241, "step": 80040 }, { "epoch": 2.99, "learning_rate": 1.5807831938701429e-06, "loss": 0.1138, "step": 80070 }, { "epoch": 2.99, "learning_rate": 1.5804870136005133e-06, "loss": 0.1325, "step": 80100 }, { "epoch": 2.99, "learning_rate": 1.5801909997481882e-06, "loss": 0.1183, "step": 80130 }, { "epoch": 2.99, "learning_rate": 1.5798951521573824e-06, "loss": 0.1255, "step": 80160 }, { "epoch": 2.99, "learning_rate": 1.5795994706725152e-06, "loss": 0.1126, "step": 80190 }, { "epoch": 2.99, "learning_rate": 1.5793039551382084e-06, "loss": 0.1329, "step": 80220 }, { "epoch": 2.99, "learning_rate": 1.5790086053992875e-06, "loss": 0.1352, "step": 80250 }, { "epoch": 2.99, "learning_rate": 1.5787134213007817e-06, "loss": 0.1207, "step": 80280 }, { "epoch": 3.0, "learning_rate": 1.578418402687923e-06, "loss": 0.1156, "step": 80310 }, { "epoch": 3.0, "learning_rate": 1.5781235494061444e-06, "loss": 0.1152, "step": 80340 }, { "epoch": 3.0, "learning_rate": 1.5778288613010825e-06, "loss": 0.1198, "step": 80370 }, { "epoch": 3.0, "learning_rate": 1.577534338218575e-06, "loss": 0.1269, "step": 80400 }, { "epoch": 3.0, "learning_rate": 1.5772399800046603e-06, "loss": 0.1328, "step": 80430 }, { "epoch": 3.0, "learning_rate": 1.576945786505579e-06, "loss": 0.1224, "step": 80460 }, { "epoch": 3.0, "learning_rate": 1.5766517575677718e-06, "loss": 0.1253, "step": 80490 }, { "epoch": 3.0, "learning_rate": 1.5763578930378794e-06, "loss": 0.1651, "step": 80520 }, { "epoch": 3.0, "learning_rate": 1.5760641927627434e-06, "loss": 0.0942, "step": 80550 }, { "epoch": 3.01, "learning_rate": 1.5757706565894044e-06, "loss": 0.1177, "step": 80580 }, { "epoch": 3.01, "learning_rate": 1.5754772843651023e-06, "loss": 0.1323, "step": 80610 }, { "epoch": 3.01, "learning_rate": 1.5751840759372764e-06, "loss": 0.1054, "step": 80640 }, { "epoch": 3.01, "learning_rate": 1.5748910311535644e-06, "loss": 0.129, "step": 80670 }, { "epoch": 3.01, "learning_rate": 1.5745981498618028e-06, "loss": 0.1538, "step": 80700 }, { "epoch": 3.01, "learning_rate": 1.5743054319100251e-06, "loss": 0.1182, "step": 80730 }, { "epoch": 3.01, "learning_rate": 1.5740128771464635e-06, "loss": 0.1425, "step": 80760 }, { "epoch": 3.01, "learning_rate": 1.5737204854195472e-06, "loss": 0.1119, "step": 80790 }, { "epoch": 3.01, "learning_rate": 1.5734282565779023e-06, "loss": 0.1081, "step": 80820 }, { "epoch": 3.02, "learning_rate": 1.573136190470352e-06, "loss": 0.1015, "step": 80850 }, { "epoch": 3.02, "learning_rate": 1.5728442869459149e-06, "loss": 0.1261, "step": 80880 }, { "epoch": 3.02, "learning_rate": 1.5725525458538072e-06, "loss": 0.1303, "step": 80910 }, { "epoch": 3.02, "learning_rate": 1.5722609670434387e-06, "loss": 0.1189, "step": 80940 }, { "epoch": 3.02, "learning_rate": 1.5719695503644169e-06, "loss": 0.1273, "step": 80970 }, { "epoch": 3.02, "learning_rate": 1.571678295666543e-06, "loss": 0.1485, "step": 81000 }, { "epoch": 3.02, "learning_rate": 1.5713872027998126e-06, "loss": 0.1234, "step": 81030 }, { "epoch": 3.02, "learning_rate": 1.5711059667173108e-06, "loss": 0.1268, "step": 81060 }, { "epoch": 3.02, "learning_rate": 1.5708151916816517e-06, "loss": 0.1171, "step": 81090 }, { "epoch": 3.03, "learning_rate": 1.570524578033268e-06, "loss": 0.1064, "step": 81120 }, { "epoch": 3.03, "learning_rate": 1.5702341256229242e-06, "loss": 0.1265, "step": 81150 }, { "epoch": 3.03, "learning_rate": 1.5699438343015793e-06, "loss": 0.1359, "step": 81180 }, { "epoch": 3.03, "learning_rate": 1.569653703920385e-06, "loss": 0.1267, "step": 81210 }, { "epoch": 3.03, "learning_rate": 1.5693637343306845e-06, "loss": 0.1095, "step": 81240 }, { "epoch": 3.03, "learning_rate": 1.569073925384014e-06, "loss": 0.1374, "step": 81270 }, { "epoch": 3.03, "learning_rate": 1.5687842769321014e-06, "loss": 0.1208, "step": 81300 }, { "epoch": 3.03, "learning_rate": 1.568494788826866e-06, "loss": 0.1166, "step": 81330 }, { "epoch": 3.03, "learning_rate": 1.5682054609204182e-06, "loss": 0.1039, "step": 81360 }, { "epoch": 3.04, "learning_rate": 1.5679162930650595e-06, "loss": 0.1325, "step": 81390 }, { "epoch": 3.04, "learning_rate": 1.5676272851132824e-06, "loss": 0.1569, "step": 81420 }, { "epoch": 3.04, "learning_rate": 1.5673384369177683e-06, "loss": 0.1288, "step": 81450 }, { "epoch": 3.04, "learning_rate": 1.56704974833139e-06, "loss": 0.1136, "step": 81480 }, { "epoch": 3.04, "learning_rate": 1.5667612192072093e-06, "loss": 0.1078, "step": 81510 }, { "epoch": 3.04, "learning_rate": 1.5664728493984767e-06, "loss": 0.133, "step": 81540 }, { "epoch": 3.04, "learning_rate": 1.5661846387586333e-06, "loss": 0.1134, "step": 81570 }, { "epoch": 3.04, "learning_rate": 1.5658965871413072e-06, "loss": 0.1342, "step": 81600 }, { "epoch": 3.04, "learning_rate": 1.565608694400316e-06, "loss": 0.1197, "step": 81630 }, { "epoch": 3.05, "learning_rate": 1.5653209603896647e-06, "loss": 0.134, "step": 81660 }, { "epoch": 3.05, "learning_rate": 1.5650333849635466e-06, "loss": 0.124, "step": 81690 }, { "epoch": 3.05, "learning_rate": 1.5647459679763419e-06, "loss": 0.1352, "step": 81720 }, { "epoch": 3.05, "learning_rate": 1.5644587092826183e-06, "loss": 0.1432, "step": 81750 }, { "epoch": 3.05, "learning_rate": 1.5641716087371303e-06, "loss": 0.1214, "step": 81780 }, { "epoch": 3.05, "learning_rate": 1.5638846661948185e-06, "loss": 0.1189, "step": 81810 }, { "epoch": 3.05, "learning_rate": 1.5635978815108104e-06, "loss": 0.139, "step": 81840 }, { "epoch": 3.05, "learning_rate": 1.563311254540419e-06, "loss": 0.141, "step": 81870 }, { "epoch": 3.05, "learning_rate": 1.5630247851391424e-06, "loss": 0.1134, "step": 81900 }, { "epoch": 3.06, "learning_rate": 1.5627384731626653e-06, "loss": 0.1101, "step": 81930 }, { "epoch": 3.06, "learning_rate": 1.562452318466856e-06, "loss": 0.1219, "step": 81960 }, { "epoch": 3.06, "learning_rate": 1.5621663209077684e-06, "loss": 0.1193, "step": 81990 }, { "epoch": 3.06, "learning_rate": 1.56188048034164e-06, "loss": 0.1315, "step": 82020 }, { "epoch": 3.06, "learning_rate": 1.5615947966248937e-06, "loss": 0.1027, "step": 82050 }, { "epoch": 3.06, "learning_rate": 1.5613092696141343e-06, "loss": 0.1191, "step": 82080 }, { "epoch": 3.06, "learning_rate": 1.5610238991661516e-06, "loss": 0.1327, "step": 82110 }, { "epoch": 3.06, "learning_rate": 1.5607386851379177e-06, "loss": 0.1149, "step": 82140 }, { "epoch": 3.06, "learning_rate": 1.5604536273865881e-06, "loss": 0.1186, "step": 82170 }, { "epoch": 3.07, "learning_rate": 1.5601687257695005e-06, "loss": 0.1243, "step": 82200 }, { "epoch": 3.07, "learning_rate": 1.5598839801441752e-06, "loss": 0.1272, "step": 82230 }, { "epoch": 3.07, "learning_rate": 1.5595993903683138e-06, "loss": 0.1184, "step": 82260 }, { "epoch": 3.07, "learning_rate": 1.5593149562998e-06, "loss": 0.1334, "step": 82290 }, { "epoch": 3.07, "learning_rate": 1.5590306777966993e-06, "loss": 0.129, "step": 82320 }, { "epoch": 3.07, "learning_rate": 1.5587465547172577e-06, "loss": 0.1391, "step": 82350 }, { "epoch": 3.07, "learning_rate": 1.558462586919902e-06, "loss": 0.1281, "step": 82380 }, { "epoch": 3.07, "learning_rate": 1.5581787742632399e-06, "loss": 0.1231, "step": 82410 }, { "epoch": 3.07, "learning_rate": 1.5578951166060582e-06, "loss": 0.1002, "step": 82440 }, { "epoch": 3.08, "learning_rate": 1.557611613807325e-06, "loss": 0.1116, "step": 82470 }, { "epoch": 3.08, "learning_rate": 1.5573282657261874e-06, "loss": 0.1374, "step": 82500 }, { "epoch": 3.08, "learning_rate": 1.5570450722219712e-06, "loss": 0.12, "step": 82530 }, { "epoch": 3.08, "learning_rate": 1.556762033154182e-06, "loss": 0.1218, "step": 82560 }, { "epoch": 3.08, "learning_rate": 1.556479148382504e-06, "loss": 0.1386, "step": 82590 }, { "epoch": 3.08, "learning_rate": 1.5561964177667995e-06, "loss": 0.1384, "step": 82620 }, { "epoch": 3.08, "learning_rate": 1.5559138411671087e-06, "loss": 0.1249, "step": 82650 }, { "epoch": 3.08, "learning_rate": 1.5556314184436507e-06, "loss": 0.1146, "step": 82680 }, { "epoch": 3.08, "learning_rate": 1.5553491494568212e-06, "loss": 0.1199, "step": 82710 }, { "epoch": 3.09, "learning_rate": 1.555067034067193e-06, "loss": 0.1184, "step": 82740 }, { "epoch": 3.09, "learning_rate": 1.554785072135517e-06, "loss": 0.108, "step": 82770 }, { "epoch": 3.09, "learning_rate": 1.5545032635227195e-06, "loss": 0.1385, "step": 82800 }, { "epoch": 3.09, "learning_rate": 1.554221608089904e-06, "loss": 0.1237, "step": 82830 }, { "epoch": 3.09, "learning_rate": 1.5539401056983492e-06, "loss": 0.136, "step": 82860 }, { "epoch": 3.09, "learning_rate": 1.553658756209511e-06, "loss": 0.1085, "step": 82890 }, { "epoch": 3.09, "learning_rate": 1.55337755948502e-06, "loss": 0.1238, "step": 82920 }, { "epoch": 3.09, "learning_rate": 1.5530965153866817e-06, "loss": 0.1196, "step": 82950 }, { "epoch": 3.09, "learning_rate": 1.5528156237764777e-06, "loss": 0.0952, "step": 82980 }, { "epoch": 3.1, "learning_rate": 1.5525348845165627e-06, "loss": 0.1253, "step": 83010 }, { "epoch": 3.1, "learning_rate": 1.5522542974692667e-06, "loss": 0.144, "step": 83040 }, { "epoch": 3.1, "learning_rate": 1.5519738624970942e-06, "loss": 0.1095, "step": 83070 }, { "epoch": 3.1, "learning_rate": 1.551693579462723e-06, "loss": 0.1236, "step": 83100 }, { "epoch": 3.1, "learning_rate": 1.5514134482290044e-06, "loss": 0.1229, "step": 83130 }, { "epoch": 3.1, "learning_rate": 1.5511334686589628e-06, "loss": 0.129, "step": 83160 }, { "epoch": 3.1, "learning_rate": 1.550853640615796e-06, "loss": 0.1211, "step": 83190 }, { "epoch": 3.1, "learning_rate": 1.5505739639628744e-06, "loss": 0.1231, "step": 83220 }, { "epoch": 3.1, "learning_rate": 1.5502944385637406e-06, "loss": 0.1098, "step": 83250 }, { "epoch": 3.11, "learning_rate": 1.5500150642821094e-06, "loss": 0.1199, "step": 83280 }, { "epoch": 3.11, "learning_rate": 1.5497358409818678e-06, "loss": 0.1487, "step": 83310 }, { "epoch": 3.11, "learning_rate": 1.5494567685270737e-06, "loss": 0.1068, "step": 83340 }, { "epoch": 3.11, "learning_rate": 1.5491778467819573e-06, "loss": 0.118, "step": 83370 }, { "epoch": 3.11, "learning_rate": 1.5488990756109185e-06, "loss": 0.1085, "step": 83400 }, { "epoch": 3.11, "learning_rate": 1.5486204548785288e-06, "loss": 0.1082, "step": 83430 }, { "epoch": 3.11, "learning_rate": 1.5483419844495307e-06, "loss": 0.1041, "step": 83460 }, { "epoch": 3.11, "learning_rate": 1.5480636641888352e-06, "loss": 0.1008, "step": 83490 }, { "epoch": 3.12, "learning_rate": 1.547785493961525e-06, "loss": 0.1108, "step": 83520 }, { "epoch": 3.12, "learning_rate": 1.5475074736328517e-06, "loss": 0.1091, "step": 83550 }, { "epoch": 3.12, "learning_rate": 1.5472296030682354e-06, "loss": 0.1135, "step": 83580 }, { "epoch": 3.12, "learning_rate": 1.5469518821332668e-06, "loss": 0.1369, "step": 83610 }, { "epoch": 3.12, "learning_rate": 1.5466743106937049e-06, "loss": 0.1298, "step": 83640 }, { "epoch": 3.12, "learning_rate": 1.5463968886154769e-06, "loss": 0.1157, "step": 83670 }, { "epoch": 3.12, "learning_rate": 1.5461196157646783e-06, "loss": 0.1205, "step": 83700 }, { "epoch": 3.12, "learning_rate": 1.545842492007573e-06, "loss": 0.1166, "step": 83730 }, { "epoch": 3.12, "learning_rate": 1.5455655172105921e-06, "loss": 0.1373, "step": 83760 }, { "epoch": 3.13, "learning_rate": 1.545288691240335e-06, "loss": 0.1065, "step": 83790 }, { "epoch": 3.13, "learning_rate": 1.5450120139635676e-06, "loss": 0.1403, "step": 83820 }, { "epoch": 3.13, "learning_rate": 1.5447354852472227e-06, "loss": 0.1516, "step": 83850 }, { "epoch": 3.13, "learning_rate": 1.5444591049584e-06, "loss": 0.1211, "step": 83880 }, { "epoch": 3.13, "learning_rate": 1.544182872964366e-06, "loss": 0.1235, "step": 83910 }, { "epoch": 3.13, "learning_rate": 1.543906789132552e-06, "loss": 0.1217, "step": 83940 }, { "epoch": 3.13, "learning_rate": 1.543630853330557e-06, "loss": 0.1362, "step": 83970 }, { "epoch": 3.13, "learning_rate": 1.5433550654261445e-06, "loss": 0.1273, "step": 84000 }, { "epoch": 3.13, "learning_rate": 1.5430794252872425e-06, "loss": 0.1416, "step": 84030 }, { "epoch": 3.14, "learning_rate": 1.5428039327819458e-06, "loss": 0.1232, "step": 84060 }, { "epoch": 3.14, "learning_rate": 1.542528587778513e-06, "loss": 0.1246, "step": 84090 }, { "epoch": 3.14, "learning_rate": 1.542253390145367e-06, "loss": 0.1087, "step": 84120 }, { "epoch": 3.14, "learning_rate": 1.5419783397510962e-06, "loss": 0.1087, "step": 84150 }, { "epoch": 3.14, "learning_rate": 1.5417034364644514e-06, "loss": 0.1325, "step": 84180 }, { "epoch": 3.14, "learning_rate": 1.5414286801543485e-06, "loss": 0.1321, "step": 84210 }, { "epoch": 3.14, "learning_rate": 1.5411540706898656e-06, "loss": 0.116, "step": 84240 }, { "epoch": 3.14, "learning_rate": 1.5408796079402449e-06, "loss": 0.1356, "step": 84270 }, { "epoch": 3.14, "learning_rate": 1.5406052917748917e-06, "loss": 0.1228, "step": 84300 }, { "epoch": 3.15, "learning_rate": 1.5403311220633734e-06, "loss": 0.1235, "step": 84330 }, { "epoch": 3.15, "learning_rate": 1.54005709867542e-06, "loss": 0.1147, "step": 84360 }, { "epoch": 3.15, "learning_rate": 1.5397832214809234e-06, "loss": 0.1158, "step": 84390 }, { "epoch": 3.15, "learning_rate": 1.539509490349938e-06, "loss": 0.1112, "step": 84420 }, { "epoch": 3.15, "learning_rate": 1.5392359051526795e-06, "loss": 0.118, "step": 84450 }, { "epoch": 3.15, "learning_rate": 1.5389624657595257e-06, "loss": 0.1281, "step": 84480 }, { "epoch": 3.15, "learning_rate": 1.5386891720410136e-06, "loss": 0.099, "step": 84510 }, { "epoch": 3.15, "learning_rate": 1.538416023867843e-06, "loss": 0.1043, "step": 84540 }, { "epoch": 3.15, "learning_rate": 1.5381430211108741e-06, "loss": 0.1287, "step": 84570 }, { "epoch": 3.16, "learning_rate": 1.5378701636411262e-06, "loss": 0.1269, "step": 84600 }, { "epoch": 3.16, "learning_rate": 1.5375974513297803e-06, "loss": 0.1099, "step": 84630 }, { "epoch": 3.16, "learning_rate": 1.5373248840481758e-06, "loss": 0.1497, "step": 84660 }, { "epoch": 3.16, "learning_rate": 1.5370524616678132e-06, "loss": 0.1104, "step": 84690 }, { "epoch": 3.16, "learning_rate": 1.5367801840603504e-06, "loss": 0.1058, "step": 84720 }, { "epoch": 3.16, "learning_rate": 1.5365080510976067e-06, "loss": 0.1078, "step": 84750 }, { "epoch": 3.16, "learning_rate": 1.5362360626515582e-06, "loss": 0.114, "step": 84780 }, { "epoch": 3.16, "learning_rate": 1.5359642185943406e-06, "loss": 0.0988, "step": 84810 }, { "epoch": 3.16, "learning_rate": 1.535692518798248e-06, "loss": 0.1161, "step": 84840 }, { "epoch": 3.17, "learning_rate": 1.5354209631357322e-06, "loss": 0.1342, "step": 84870 }, { "epoch": 3.17, "learning_rate": 1.5351495514794033e-06, "loss": 0.1202, "step": 84900 }, { "epoch": 3.17, "learning_rate": 1.5348782837020273e-06, "loss": 0.1082, "step": 84930 }, { "epoch": 3.17, "learning_rate": 1.5346071596765304e-06, "loss": 0.1103, "step": 84960 }, { "epoch": 3.17, "learning_rate": 1.5343361792759937e-06, "loss": 0.1361, "step": 84990 }, { "epoch": 3.17, "learning_rate": 1.5340653423736556e-06, "loss": 0.1208, "step": 85020 }, { "epoch": 3.17, "learning_rate": 1.5337946488429114e-06, "loss": 0.1119, "step": 85050 }, { "epoch": 3.17, "learning_rate": 1.5335240985573125e-06, "loss": 0.1237, "step": 85080 }, { "epoch": 3.17, "learning_rate": 1.5332536913905663e-06, "loss": 0.1237, "step": 85110 }, { "epoch": 3.18, "learning_rate": 1.5329924337198969e-06, "loss": 0.1091, "step": 85140 }, { "epoch": 3.18, "learning_rate": 1.5327223076524054e-06, "loss": 0.1237, "step": 85170 }, { "epoch": 3.18, "learning_rate": 1.5324523243300148e-06, "loss": 0.1296, "step": 85200 }, { "epoch": 3.18, "learning_rate": 1.5321824836270496e-06, "loss": 0.1202, "step": 85230 }, { "epoch": 3.18, "learning_rate": 1.5319127854179885e-06, "loss": 0.146, "step": 85260 }, { "epoch": 3.18, "learning_rate": 1.5316432295774654e-06, "loss": 0.1462, "step": 85290 }, { "epoch": 3.18, "learning_rate": 1.5313738159802683e-06, "loss": 0.1368, "step": 85320 }, { "epoch": 3.18, "learning_rate": 1.5311045445013391e-06, "loss": 0.1462, "step": 85350 }, { "epoch": 3.18, "learning_rate": 1.530835415015774e-06, "loss": 0.1282, "step": 85380 }, { "epoch": 3.19, "learning_rate": 1.5305664273988224e-06, "loss": 0.1152, "step": 85410 }, { "epoch": 3.19, "learning_rate": 1.5302975815258878e-06, "loss": 0.1182, "step": 85440 }, { "epoch": 3.19, "learning_rate": 1.5300378318006353e-06, "loss": 0.1259, "step": 85470 }, { "epoch": 3.19, "learning_rate": 1.5297692643280468e-06, "loss": 0.1203, "step": 85500 }, { "epoch": 3.19, "learning_rate": 1.5295008382307369e-06, "loss": 0.102, "step": 85530 }, { "epoch": 3.19, "learning_rate": 1.5292325533847157e-06, "loss": 0.1313, "step": 85560 }, { "epoch": 3.19, "learning_rate": 1.5289644096661436e-06, "loss": 0.1287, "step": 85590 }, { "epoch": 3.19, "learning_rate": 1.528696406951334e-06, "loss": 0.138, "step": 85620 }, { "epoch": 3.19, "learning_rate": 1.5284285451167524e-06, "loss": 0.1381, "step": 85650 }, { "epoch": 3.2, "learning_rate": 1.5281608240390144e-06, "loss": 0.1392, "step": 85680 }, { "epoch": 3.2, "learning_rate": 1.5278932435948881e-06, "loss": 0.1166, "step": 85710 }, { "epoch": 3.2, "learning_rate": 1.527625803661292e-06, "loss": 0.1184, "step": 85740 }, { "epoch": 3.2, "learning_rate": 1.5273585041152956e-06, "loss": 0.1142, "step": 85770 }, { "epoch": 3.2, "learning_rate": 1.5270913448341186e-06, "loss": 0.1084, "step": 85800 }, { "epoch": 3.2, "learning_rate": 1.526824325695131e-06, "loss": 0.1404, "step": 85830 }, { "epoch": 3.2, "learning_rate": 1.5265574465758534e-06, "loss": 0.1101, "step": 85860 }, { "epoch": 3.2, "learning_rate": 1.5262907073539553e-06, "loss": 0.1247, "step": 85890 }, { "epoch": 3.2, "learning_rate": 1.5260241079072572e-06, "loss": 0.1084, "step": 85920 }, { "epoch": 3.21, "learning_rate": 1.5257576481137268e-06, "loss": 0.1379, "step": 85950 }, { "epoch": 3.21, "learning_rate": 1.5254913278514829e-06, "loss": 0.1223, "step": 85980 }, { "epoch": 3.21, "learning_rate": 1.5252251469987929e-06, "loss": 0.1168, "step": 86010 }, { "epoch": 3.21, "learning_rate": 1.524959105434071e-06, "loss": 0.1237, "step": 86040 }, { "epoch": 3.21, "learning_rate": 1.5246932030358824e-06, "loss": 0.1205, "step": 86070 }, { "epoch": 3.21, "learning_rate": 1.5244274396829384e-06, "loss": 0.118, "step": 86100 }, { "epoch": 3.21, "learning_rate": 1.5241618152540993e-06, "loss": 0.1071, "step": 86130 }, { "epoch": 3.21, "learning_rate": 1.523896329628373e-06, "loss": 0.1027, "step": 86160 }, { "epoch": 3.21, "learning_rate": 1.5236309826849143e-06, "loss": 0.1233, "step": 86190 }, { "epoch": 3.22, "learning_rate": 1.5233657743030265e-06, "loss": 0.1153, "step": 86220 }, { "epoch": 3.22, "learning_rate": 1.5231007043621581e-06, "loss": 0.1201, "step": 86250 }, { "epoch": 3.22, "learning_rate": 1.522835772741906e-06, "loss": 0.1336, "step": 86280 }, { "epoch": 3.22, "learning_rate": 1.5225709793220126e-06, "loss": 0.1331, "step": 86310 }, { "epoch": 3.22, "learning_rate": 1.5223063239823674e-06, "loss": 0.1216, "step": 86340 }, { "epoch": 3.22, "learning_rate": 1.5220418066030055e-06, "loss": 0.1039, "step": 86370 }, { "epoch": 3.22, "learning_rate": 1.5217774270641078e-06, "loss": 0.1173, "step": 86400 }, { "epoch": 3.22, "learning_rate": 1.5215131852460013e-06, "loss": 0.153, "step": 86430 }, { "epoch": 3.22, "learning_rate": 1.521249081029158e-06, "loss": 0.1116, "step": 86460 }, { "epoch": 3.23, "learning_rate": 1.5209851142941952e-06, "loss": 0.1253, "step": 86490 }, { "epoch": 3.23, "learning_rate": 1.5207212849218755e-06, "loss": 0.1241, "step": 86520 }, { "epoch": 3.23, "learning_rate": 1.5204575927931055e-06, "loss": 0.1126, "step": 86550 }, { "epoch": 3.23, "learning_rate": 1.520194037788937e-06, "loss": 0.1449, "step": 86580 }, { "epoch": 3.23, "learning_rate": 1.5199306197905658e-06, "loss": 0.1137, "step": 86610 }, { "epoch": 3.23, "learning_rate": 1.5196673386793316e-06, "loss": 0.1225, "step": 86640 }, { "epoch": 3.23, "learning_rate": 1.5194041943367178e-06, "loss": 0.1255, "step": 86670 }, { "epoch": 3.23, "learning_rate": 1.519141186644352e-06, "loss": 0.1401, "step": 86700 }, { "epoch": 3.23, "learning_rate": 1.518878315484005e-06, "loss": 0.1344, "step": 86730 }, { "epoch": 3.24, "learning_rate": 1.5186155807375907e-06, "loss": 0.1046, "step": 86760 }, { "epoch": 3.24, "learning_rate": 1.5183529822871654e-06, "loss": 0.1374, "step": 86790 }, { "epoch": 3.24, "learning_rate": 1.518090520014929e-06, "loss": 0.1191, "step": 86820 }, { "epoch": 3.24, "learning_rate": 1.5178281938032233e-06, "loss": 0.1181, "step": 86850 }, { "epoch": 3.24, "learning_rate": 1.5175660035345323e-06, "loss": 0.1411, "step": 86880 }, { "epoch": 3.24, "learning_rate": 1.5173039490914826e-06, "loss": 0.1249, "step": 86910 }, { "epoch": 3.24, "learning_rate": 1.5170420303568423e-06, "loss": 0.1299, "step": 86940 }, { "epoch": 3.24, "learning_rate": 1.5167802472135212e-06, "loss": 0.1245, "step": 86970 }, { "epoch": 3.24, "learning_rate": 1.5165185995445702e-06, "loss": 0.1229, "step": 87000 }, { "epoch": 3.25, "learning_rate": 1.5162570872331817e-06, "loss": 0.1035, "step": 87030 }, { "epoch": 3.25, "learning_rate": 1.5159957101626888e-06, "loss": 0.1268, "step": 87060 }, { "epoch": 3.25, "learning_rate": 1.5157344682165656e-06, "loss": 0.1357, "step": 87090 }, { "epoch": 3.25, "learning_rate": 1.515473361278426e-06, "loss": 0.1802, "step": 87120 }, { "epoch": 3.25, "learning_rate": 1.5152123892320259e-06, "loss": 0.1332, "step": 87150 }, { "epoch": 3.25, "learning_rate": 1.5149515519612596e-06, "loss": 0.1239, "step": 87180 }, { "epoch": 3.25, "learning_rate": 1.5146908493501613e-06, "loss": 0.1301, "step": 87210 }, { "epoch": 3.25, "learning_rate": 1.5144302812829061e-06, "loss": 0.1008, "step": 87240 }, { "epoch": 3.25, "learning_rate": 1.5141698476438073e-06, "loss": 0.1131, "step": 87270 }, { "epoch": 3.26, "learning_rate": 1.513909548317318e-06, "loss": 0.1224, "step": 87300 }, { "epoch": 3.26, "learning_rate": 1.5136493831880306e-06, "loss": 0.1287, "step": 87330 }, { "epoch": 3.26, "learning_rate": 1.5133893521406753e-06, "loss": 0.1115, "step": 87360 }, { "epoch": 3.26, "learning_rate": 1.5131294550601217e-06, "loss": 0.1134, "step": 87390 }, { "epoch": 3.26, "learning_rate": 1.5128696918313772e-06, "loss": 0.1424, "step": 87420 }, { "epoch": 3.26, "learning_rate": 1.5126100623395884e-06, "loss": 0.1172, "step": 87450 }, { "epoch": 3.26, "learning_rate": 1.512350566470038e-06, "loss": 0.107, "step": 87480 }, { "epoch": 3.26, "learning_rate": 1.5120912041081485e-06, "loss": 0.0991, "step": 87510 }, { "epoch": 3.26, "learning_rate": 1.5118319751394783e-06, "loss": 0.1176, "step": 87540 }, { "epoch": 3.27, "learning_rate": 1.5115728794497237e-06, "loss": 0.108, "step": 87570 }, { "epoch": 3.27, "learning_rate": 1.5113139169247178e-06, "loss": 0.1178, "step": 87600 }, { "epoch": 3.27, "learning_rate": 1.511063712957183e-06, "loss": 0.111, "step": 87630 }, { "epoch": 3.27, "learning_rate": 1.5108050119903283e-06, "loss": 0.111, "step": 87660 }, { "epoch": 3.27, "learning_rate": 1.5105464438503328e-06, "loss": 0.1297, "step": 87690 }, { "epoch": 3.27, "learning_rate": 1.5102880084235716e-06, "loss": 0.1191, "step": 87720 }, { "epoch": 3.27, "learning_rate": 1.5100297055965558e-06, "loss": 0.1325, "step": 87750 }, { "epoch": 3.27, "learning_rate": 1.5097715352559318e-06, "loss": 0.109, "step": 87780 }, { "epoch": 3.28, "learning_rate": 1.5095134972884821e-06, "loss": 0.0999, "step": 87810 }, { "epoch": 3.28, "learning_rate": 1.5092555915811246e-06, "loss": 0.1436, "step": 87840 }, { "epoch": 3.28, "learning_rate": 1.5089978180209128e-06, "loss": 0.1361, "step": 87870 }, { "epoch": 3.28, "learning_rate": 1.5087401764950343e-06, "loss": 0.1119, "step": 87900 }, { "epoch": 3.28, "learning_rate": 1.5084826668908123e-06, "loss": 0.1274, "step": 87930 }, { "epoch": 3.28, "learning_rate": 1.5082252890957039e-06, "loss": 0.1391, "step": 87960 }, { "epoch": 3.28, "learning_rate": 1.5079680429973012e-06, "loss": 0.1352, "step": 87990 }, { "epoch": 3.28, "learning_rate": 1.5077109284833297e-06, "loss": 0.1133, "step": 88020 }, { "epoch": 3.28, "learning_rate": 1.5074539454416498e-06, "loss": 0.1059, "step": 88050 }, { "epoch": 3.29, "learning_rate": 1.5071970937602551e-06, "loss": 0.0964, "step": 88080 }, { "epoch": 3.29, "learning_rate": 1.5069403733272725e-06, "loss": 0.1188, "step": 88110 }, { "epoch": 3.29, "learning_rate": 1.5066837840309626e-06, "loss": 0.1084, "step": 88140 }, { "epoch": 3.29, "learning_rate": 1.5064273257597195e-06, "loss": 0.1163, "step": 88170 }, { "epoch": 3.29, "learning_rate": 1.5061709984020693e-06, "loss": 0.1232, "step": 88200 }, { "epoch": 3.29, "learning_rate": 1.5059148018466718e-06, "loss": 0.1235, "step": 88230 }, { "epoch": 3.29, "learning_rate": 1.5056587359823183e-06, "loss": 0.1132, "step": 88260 }, { "epoch": 3.29, "learning_rate": 1.5054028006979338e-06, "loss": 0.1157, "step": 88290 }, { "epoch": 3.29, "learning_rate": 1.5051469958825737e-06, "loss": 0.1168, "step": 88320 }, { "epoch": 3.3, "learning_rate": 1.504891321425427e-06, "loss": 0.1184, "step": 88350 }, { "epoch": 3.3, "learning_rate": 1.5046357772158132e-06, "loss": 0.1309, "step": 88380 }, { "epoch": 3.3, "learning_rate": 1.5043803631431841e-06, "loss": 0.11, "step": 88410 }, { "epoch": 3.3, "learning_rate": 1.5041250790971224e-06, "loss": 0.132, "step": 88440 }, { "epoch": 3.3, "learning_rate": 1.5038699249673416e-06, "loss": 0.1204, "step": 88470 }, { "epoch": 3.3, "learning_rate": 1.503614900643687e-06, "loss": 0.1047, "step": 88500 }, { "epoch": 3.3, "learning_rate": 1.5033600060161343e-06, "loss": 0.1349, "step": 88530 }, { "epoch": 3.3, "learning_rate": 1.5031052409747888e-06, "loss": 0.1237, "step": 88560 }, { "epoch": 3.3, "learning_rate": 1.5028506054098876e-06, "loss": 0.1142, "step": 88590 }, { "epoch": 3.31, "learning_rate": 1.502596099211797e-06, "loss": 0.1132, "step": 88620 }, { "epoch": 3.31, "learning_rate": 1.5023417222710132e-06, "loss": 0.1193, "step": 88650 }, { "epoch": 3.31, "learning_rate": 1.5020874744781625e-06, "loss": 0.1266, "step": 88680 }, { "epoch": 3.31, "learning_rate": 1.5018333557240003e-06, "loss": 0.1131, "step": 88710 }, { "epoch": 3.31, "learning_rate": 1.501579365899412e-06, "loss": 0.1058, "step": 88740 }, { "epoch": 3.31, "learning_rate": 1.5013255048954111e-06, "loss": 0.1074, "step": 88770 }, { "epoch": 3.31, "learning_rate": 1.5010717726031415e-06, "loss": 0.1117, "step": 88800 }, { "epoch": 3.31, "learning_rate": 1.5008181689138746e-06, "loss": 0.1114, "step": 88830 }, { "epoch": 3.31, "learning_rate": 1.5005646937190105e-06, "loss": 0.1221, "step": 88860 }, { "epoch": 3.32, "learning_rate": 1.500311346910078e-06, "loss": 0.117, "step": 88890 }, { "epoch": 3.32, "learning_rate": 1.5000581283787338e-06, "loss": 0.1257, "step": 88920 }, { "epoch": 3.32, "learning_rate": 1.499805038016763e-06, "loss": 0.1129, "step": 88950 }, { "epoch": 3.32, "learning_rate": 1.4995520757160784e-06, "loss": 0.1134, "step": 88980 }, { "epoch": 3.32, "learning_rate": 1.4992992413687193e-06, "loss": 0.1142, "step": 89010 }, { "epoch": 3.32, "learning_rate": 1.499046534866854e-06, "loss": 0.1165, "step": 89040 }, { "epoch": 3.32, "learning_rate": 1.4987939561027765e-06, "loss": 0.0977, "step": 89070 }, { "epoch": 3.32, "learning_rate": 1.4985415049689088e-06, "loss": 0.1337, "step": 89100 }, { "epoch": 3.32, "learning_rate": 1.4982891813577996e-06, "loss": 0.142, "step": 89130 }, { "epoch": 3.33, "learning_rate": 1.4980369851621237e-06, "loss": 0.1403, "step": 89160 }, { "epoch": 3.33, "learning_rate": 1.4977849162746824e-06, "loss": 0.1387, "step": 89190 }, { "epoch": 3.33, "learning_rate": 1.4975329745884038e-06, "loss": 0.1118, "step": 89220 }, { "epoch": 3.33, "learning_rate": 1.4972811599963413e-06, "loss": 0.1307, "step": 89250 }, { "epoch": 3.33, "learning_rate": 1.4970294723916748e-06, "loss": 0.1178, "step": 89280 }, { "epoch": 3.33, "learning_rate": 1.4967779116677095e-06, "loss": 0.1113, "step": 89310 }, { "epoch": 3.33, "learning_rate": 1.4965264777178759e-06, "loss": 0.1112, "step": 89340 }, { "epoch": 3.33, "learning_rate": 1.4962751704357302e-06, "loss": 0.1064, "step": 89370 }, { "epoch": 3.33, "learning_rate": 1.496023989714953e-06, "loss": 0.1224, "step": 89400 }, { "epoch": 3.34, "learning_rate": 1.4957729354493509e-06, "loss": 0.106, "step": 89430 }, { "epoch": 3.34, "learning_rate": 1.4955220075328538e-06, "loss": 0.1232, "step": 89460 }, { "epoch": 3.34, "learning_rate": 1.4952712058595175e-06, "loss": 0.1527, "step": 89490 }, { "epoch": 3.34, "learning_rate": 1.495028884143625e-06, "loss": 0.1414, "step": 89520 }, { "epoch": 3.34, "learning_rate": 1.4947783304399193e-06, "loss": 0.1012, "step": 89550 }, { "epoch": 3.34, "learning_rate": 1.4945279026658018e-06, "loss": 0.139, "step": 89580 }, { "epoch": 3.34, "learning_rate": 1.49427760071582e-06, "loss": 0.1174, "step": 89610 }, { "epoch": 3.34, "learning_rate": 1.4940274244846445e-06, "loss": 0.1127, "step": 89640 }, { "epoch": 3.34, "learning_rate": 1.4937773738670697e-06, "loss": 0.1122, "step": 89670 }, { "epoch": 3.35, "learning_rate": 1.4935274487580126e-06, "loss": 0.1302, "step": 89700 }, { "epoch": 3.35, "learning_rate": 1.4932776490525144e-06, "loss": 0.1179, "step": 89730 }, { "epoch": 3.35, "learning_rate": 1.4930279746457379e-06, "loss": 0.1215, "step": 89760 }, { "epoch": 3.35, "learning_rate": 1.492778425432969e-06, "loss": 0.1021, "step": 89790 }, { "epoch": 3.35, "learning_rate": 1.4925290013096165e-06, "loss": 0.1068, "step": 89820 }, { "epoch": 3.35, "learning_rate": 1.492279702171211e-06, "loss": 0.1374, "step": 89850 }, { "epoch": 3.35, "learning_rate": 1.4920305279134051e-06, "loss": 0.1252, "step": 89880 }, { "epoch": 3.35, "learning_rate": 1.4917814784319737e-06, "loss": 0.1192, "step": 89910 }, { "epoch": 3.35, "learning_rate": 1.4915325536228134e-06, "loss": 0.1032, "step": 89940 }, { "epoch": 3.36, "learning_rate": 1.4912837533819422e-06, "loss": 0.1232, "step": 89970 }, { "epoch": 3.36, "learning_rate": 1.4910350776054995e-06, "loss": 0.1362, "step": 90000 }, { "epoch": 3.36, "learning_rate": 1.4907865261897456e-06, "loss": 0.117, "step": 90030 }, { "epoch": 3.36, "learning_rate": 1.4905380990310628e-06, "loss": 0.1541, "step": 90060 }, { "epoch": 3.36, "learning_rate": 1.490289796025953e-06, "loss": 0.1077, "step": 90090 }, { "epoch": 3.36, "learning_rate": 1.49004161707104e-06, "loss": 0.1133, "step": 90120 }, { "epoch": 3.36, "learning_rate": 1.4897935620630663e-06, "loss": 0.1163, "step": 90150 }, { "epoch": 3.36, "learning_rate": 1.4895456308988964e-06, "loss": 0.1588, "step": 90180 }, { "epoch": 3.36, "learning_rate": 1.4892978234755149e-06, "loss": 0.1337, "step": 90210 }, { "epoch": 3.37, "learning_rate": 1.4890501396900245e-06, "loss": 0.1156, "step": 90240 }, { "epoch": 3.37, "learning_rate": 1.4888025794396497e-06, "loss": 0.1172, "step": 90270 }, { "epoch": 3.37, "learning_rate": 1.4885551426217331e-06, "loss": 0.1159, "step": 90300 }, { "epoch": 3.37, "learning_rate": 1.488307829133738e-06, "loss": 0.1138, "step": 90330 }, { "epoch": 3.37, "learning_rate": 1.4880606388732456e-06, "loss": 0.121, "step": 90360 }, { "epoch": 3.37, "learning_rate": 1.4878135717379569e-06, "loss": 0.1292, "step": 90390 }, { "epoch": 3.37, "learning_rate": 1.487566627625692e-06, "loss": 0.1111, "step": 90420 }, { "epoch": 3.37, "learning_rate": 1.4873198064343886e-06, "loss": 0.1321, "step": 90450 }, { "epoch": 3.37, "learning_rate": 1.4870731080621042e-06, "loss": 0.1313, "step": 90480 }, { "epoch": 3.38, "learning_rate": 1.4868265324070142e-06, "loss": 0.1153, "step": 90510 }, { "epoch": 3.38, "learning_rate": 1.4865800793674113e-06, "loss": 0.1199, "step": 90540 }, { "epoch": 3.38, "learning_rate": 1.4863337488417067e-06, "loss": 0.1136, "step": 90570 }, { "epoch": 3.38, "learning_rate": 1.4860875407284305e-06, "loss": 0.1175, "step": 90600 }, { "epoch": 3.38, "learning_rate": 1.4858414549262287e-06, "loss": 0.1414, "step": 90630 }, { "epoch": 3.38, "learning_rate": 1.4855954913338655e-06, "loss": 0.1195, "step": 90660 }, { "epoch": 3.38, "learning_rate": 1.4853496498502227e-06, "loss": 0.1175, "step": 90690 }, { "epoch": 3.38, "learning_rate": 1.4851039303742986e-06, "loss": 0.1238, "step": 90720 }, { "epoch": 3.38, "learning_rate": 1.4848583328052088e-06, "loss": 0.1193, "step": 90750 }, { "epoch": 3.39, "learning_rate": 1.4846128570421855e-06, "loss": 0.1214, "step": 90780 }, { "epoch": 3.39, "learning_rate": 1.4843675029845775e-06, "loss": 0.1141, "step": 90810 }, { "epoch": 3.39, "learning_rate": 1.48412227053185e-06, "loss": 0.1322, "step": 90840 }, { "epoch": 3.39, "learning_rate": 1.4838771595835847e-06, "loss": 0.1243, "step": 90870 }, { "epoch": 3.39, "learning_rate": 1.4836321700394784e-06, "loss": 0.1155, "step": 90900 }, { "epoch": 3.39, "learning_rate": 1.483387301799345e-06, "loss": 0.1255, "step": 90930 }, { "epoch": 3.39, "learning_rate": 1.4831425547631134e-06, "loss": 0.0969, "step": 90960 }, { "epoch": 3.39, "learning_rate": 1.4828979288308285e-06, "loss": 0.1114, "step": 90990 }, { "epoch": 3.39, "learning_rate": 1.48265342390265e-06, "loss": 0.1182, "step": 91020 }, { "epoch": 3.4, "learning_rate": 1.4824090398788529e-06, "loss": 0.114, "step": 91050 }, { "epoch": 3.4, "learning_rate": 1.482164776659828e-06, "loss": 0.1142, "step": 91080 }, { "epoch": 3.4, "learning_rate": 1.4819206341460796e-06, "loss": 0.1193, "step": 91110 }, { "epoch": 3.4, "learning_rate": 1.4816766122382278e-06, "loss": 0.1243, "step": 91140 }, { "epoch": 3.4, "learning_rate": 1.4814327108370068e-06, "loss": 0.1066, "step": 91170 }, { "epoch": 3.4, "learning_rate": 1.4811889298432654e-06, "loss": 0.1011, "step": 91200 }, { "epoch": 3.4, "learning_rate": 1.4809452691579664e-06, "loss": 0.1181, "step": 91230 }, { "epoch": 3.4, "learning_rate": 1.4807017286821866e-06, "loss": 0.1294, "step": 91260 }, { "epoch": 3.4, "learning_rate": 1.4804583083171158e-06, "loss": 0.119, "step": 91290 }, { "epoch": 3.41, "learning_rate": 1.4802150079640592e-06, "loss": 0.1297, "step": 91320 }, { "epoch": 3.41, "learning_rate": 1.4799718275244343e-06, "loss": 0.0969, "step": 91350 }, { "epoch": 3.41, "learning_rate": 1.4797287668997723e-06, "loss": 0.1048, "step": 91380 }, { "epoch": 3.41, "learning_rate": 1.4794858259917175e-06, "loss": 0.1058, "step": 91410 }, { "epoch": 3.41, "learning_rate": 1.4792430047020267e-06, "loss": 0.1071, "step": 91440 }, { "epoch": 3.41, "learning_rate": 1.4790003029325705e-06, "loss": 0.1385, "step": 91470 }, { "epoch": 3.41, "learning_rate": 1.4787577205853318e-06, "loss": 0.0996, "step": 91500 }, { "epoch": 3.41, "learning_rate": 1.4785152575624056e-06, "loss": 0.1223, "step": 91530 }, { "epoch": 3.41, "learning_rate": 1.4782729137659996e-06, "loss": 0.1278, "step": 91560 }, { "epoch": 3.42, "learning_rate": 1.4780306890984334e-06, "loss": 0.12, "step": 91590 }, { "epoch": 3.42, "learning_rate": 1.4777885834621391e-06, "loss": 0.1116, "step": 91620 }, { "epoch": 3.42, "learning_rate": 1.47754659675966e-06, "loss": 0.1276, "step": 91650 }, { "epoch": 3.42, "learning_rate": 1.4773047288936515e-06, "loss": 0.1446, "step": 91680 }, { "epoch": 3.42, "learning_rate": 1.4770629797668806e-06, "loss": 0.1144, "step": 91710 }, { "epoch": 3.42, "learning_rate": 1.4768213492822253e-06, "loss": 0.133, "step": 91740 }, { "epoch": 3.42, "learning_rate": 1.4765798373426748e-06, "loss": 0.1227, "step": 91770 }, { "epoch": 3.42, "learning_rate": 1.4763384438513297e-06, "loss": 0.1087, "step": 91800 }, { "epoch": 3.42, "learning_rate": 1.476097168711401e-06, "loss": 0.1146, "step": 91830 }, { "epoch": 3.43, "learning_rate": 1.4758560118262105e-06, "loss": 0.1266, "step": 91860 }, { "epoch": 3.43, "learning_rate": 1.4756149730991908e-06, "loss": 0.1054, "step": 91890 }, { "epoch": 3.43, "learning_rate": 1.4753740524338848e-06, "loss": 0.1123, "step": 91920 }, { "epoch": 3.43, "learning_rate": 1.4751332497339452e-06, "loss": 0.1227, "step": 91950 }, { "epoch": 3.43, "learning_rate": 1.474892564903135e-06, "loss": 0.1042, "step": 91980 }, { "epoch": 3.43, "learning_rate": 1.4746519978453271e-06, "loss": 0.1297, "step": 92010 }, { "epoch": 3.43, "learning_rate": 1.4744115484645044e-06, "loss": 0.1177, "step": 92040 }, { "epoch": 3.43, "learning_rate": 1.474171216664759e-06, "loss": 0.1245, "step": 92070 }, { "epoch": 3.44, "learning_rate": 1.473931002350292e-06, "loss": 0.104, "step": 92100 }, { "epoch": 3.44, "learning_rate": 1.473690905425415e-06, "loss": 0.1007, "step": 92130 }, { "epoch": 3.44, "learning_rate": 1.473450925794547e-06, "loss": 0.0941, "step": 92160 }, { "epoch": 3.44, "learning_rate": 1.4732110633622177e-06, "loss": 0.1019, "step": 92190 }, { "epoch": 3.44, "learning_rate": 1.472971318033064e-06, "loss": 0.1402, "step": 92220 }, { "epoch": 3.44, "learning_rate": 1.472731689711832e-06, "loss": 0.1147, "step": 92250 }, { "epoch": 3.44, "learning_rate": 1.4724921783033766e-06, "loss": 0.128, "step": 92280 }, { "epoch": 3.44, "learning_rate": 1.472252783712661e-06, "loss": 0.1135, "step": 92310 }, { "epoch": 3.44, "learning_rate": 1.472013505844755e-06, "loss": 0.137, "step": 92340 }, { "epoch": 3.45, "learning_rate": 1.471774344604839e-06, "loss": 0.124, "step": 92370 }, { "epoch": 3.45, "learning_rate": 1.471535299898199e-06, "loss": 0.1294, "step": 92400 }, { "epoch": 3.45, "learning_rate": 1.4712963716302299e-06, "loss": 0.1158, "step": 92430 }, { "epoch": 3.45, "learning_rate": 1.4710575597064326e-06, "loss": 0.1321, "step": 92460 }, { "epoch": 3.45, "learning_rate": 1.4708188640324172e-06, "loss": 0.1243, "step": 92490 }, { "epoch": 3.45, "learning_rate": 1.4705802845138998e-06, "loss": 0.1155, "step": 92520 }, { "epoch": 3.45, "learning_rate": 1.4703418210567037e-06, "loss": 0.1176, "step": 92550 }, { "epoch": 3.45, "learning_rate": 1.4701034735667596e-06, "loss": 0.1017, "step": 92580 }, { "epoch": 3.45, "learning_rate": 1.469865241950104e-06, "loss": 0.1216, "step": 92610 }, { "epoch": 3.46, "learning_rate": 1.469627126112881e-06, "loss": 0.1047, "step": 92640 }, { "epoch": 3.46, "learning_rate": 1.4693891259613402e-06, "loss": 0.1072, "step": 92670 }, { "epoch": 3.46, "learning_rate": 1.4691512414018378e-06, "loss": 0.1367, "step": 92700 }, { "epoch": 3.46, "learning_rate": 1.468913472340836e-06, "loss": 0.1179, "step": 92730 }, { "epoch": 3.46, "learning_rate": 1.4686758186849038e-06, "loss": 0.1153, "step": 92760 }, { "epoch": 3.46, "learning_rate": 1.4684382803407139e-06, "loss": 0.109, "step": 92790 }, { "epoch": 3.46, "learning_rate": 1.4682008572150469e-06, "loss": 0.1358, "step": 92820 }, { "epoch": 3.46, "learning_rate": 1.4679635492147872e-06, "loss": 0.0986, "step": 92850 }, { "epoch": 3.46, "learning_rate": 1.467726356246926e-06, "loss": 0.1167, "step": 92880 }, { "epoch": 3.47, "learning_rate": 1.4674892782185584e-06, "loss": 0.1173, "step": 92910 }, { "epoch": 3.47, "learning_rate": 1.467252315036885e-06, "loss": 0.1071, "step": 92940 }, { "epoch": 3.47, "learning_rate": 1.4670154666092112e-06, "loss": 0.1257, "step": 92970 }, { "epoch": 3.47, "learning_rate": 1.4667787328429475e-06, "loss": 0.1203, "step": 93000 }, { "epoch": 3.47, "learning_rate": 1.4665421136456081e-06, "loss": 0.1233, "step": 93030 }, { "epoch": 3.47, "learning_rate": 1.4663056089248127e-06, "loss": 0.1099, "step": 93060 }, { "epoch": 3.47, "learning_rate": 1.4660692185882838e-06, "loss": 0.1145, "step": 93090 }, { "epoch": 3.47, "learning_rate": 1.46583294254385e-06, "loss": 0.1229, "step": 93120 }, { "epoch": 3.47, "learning_rate": 1.4655967806994417e-06, "loss": 0.1132, "step": 93150 }, { "epoch": 3.48, "learning_rate": 1.4653607329630951e-06, "loss": 0.1076, "step": 93180 }, { "epoch": 3.48, "learning_rate": 1.465124799242948e-06, "loss": 0.1297, "step": 93210 }, { "epoch": 3.48, "learning_rate": 1.4648889794472434e-06, "loss": 0.1107, "step": 93240 }, { "epoch": 3.48, "learning_rate": 1.4646532734843268e-06, "loss": 0.1041, "step": 93270 }, { "epoch": 3.48, "learning_rate": 1.4644176812626475e-06, "loss": 0.1077, "step": 93300 }, { "epoch": 3.48, "learning_rate": 1.464182202690757e-06, "loss": 0.1062, "step": 93330 }, { "epoch": 3.48, "learning_rate": 1.4639468376773105e-06, "loss": 0.1202, "step": 93360 }, { "epoch": 3.48, "learning_rate": 1.4637115861310652e-06, "loss": 0.1397, "step": 93390 }, { "epoch": 3.48, "learning_rate": 1.4634764479608819e-06, "loss": 0.1304, "step": 93420 }, { "epoch": 3.49, "learning_rate": 1.4632414230757227e-06, "loss": 0.1259, "step": 93450 }, { "epoch": 3.49, "learning_rate": 1.463006511384653e-06, "loss": 0.1204, "step": 93480 }, { "epoch": 3.49, "learning_rate": 1.4627717127968393e-06, "loss": 0.1246, "step": 93510 }, { "epoch": 3.49, "learning_rate": 1.4625370272215516e-06, "loss": 0.1108, "step": 93540 }, { "epoch": 3.49, "learning_rate": 1.4623024545681609e-06, "loss": 0.1497, "step": 93570 }, { "epoch": 3.49, "learning_rate": 1.462067994746139e-06, "loss": 0.1127, "step": 93600 }, { "epoch": 3.49, "learning_rate": 1.4618414574190027e-06, "loss": 0.1123, "step": 93630 }, { "epoch": 3.49, "learning_rate": 1.4616072192349775e-06, "loss": 0.1436, "step": 93660 }, { "epoch": 3.49, "learning_rate": 1.4613730936143547e-06, "loss": 0.135, "step": 93690 }, { "epoch": 3.5, "learning_rate": 1.4611390804670088e-06, "loss": 0.1123, "step": 93720 }, { "epoch": 3.5, "learning_rate": 1.460905179702916e-06, "loss": 0.1224, "step": 93750 }, { "epoch": 3.5, "learning_rate": 1.4606713912321522e-06, "loss": 0.1134, "step": 93780 }, { "epoch": 3.5, "learning_rate": 1.460437714964895e-06, "loss": 0.1188, "step": 93810 }, { "epoch": 3.5, "learning_rate": 1.4602041508114229e-06, "loss": 0.0994, "step": 93840 }, { "epoch": 3.5, "learning_rate": 1.4599706986821135e-06, "loss": 0.1052, "step": 93870 }, { "epoch": 3.5, "learning_rate": 1.4597373584874452e-06, "loss": 0.1548, "step": 93900 }, { "epoch": 3.5, "learning_rate": 1.459504130137997e-06, "loss": 0.103, "step": 93930 }, { "epoch": 3.5, "learning_rate": 1.4592710135444477e-06, "loss": 0.0989, "step": 93960 }, { "epoch": 3.51, "learning_rate": 1.4590380086175755e-06, "loss": 0.1272, "step": 93990 }, { "epoch": 3.51, "learning_rate": 1.4588051152682587e-06, "loss": 0.1303, "step": 94020 }, { "epoch": 3.51, "learning_rate": 1.4585723334074747e-06, "loss": 0.1212, "step": 94050 }, { "epoch": 3.51, "learning_rate": 1.4583396629463011e-06, "loss": 0.1169, "step": 94080 }, { "epoch": 3.51, "learning_rate": 1.4581071037959144e-06, "loss": 0.1174, "step": 94110 }, { "epoch": 3.51, "learning_rate": 1.4578746558675893e-06, "loss": 0.1132, "step": 94140 }, { "epoch": 3.51, "learning_rate": 1.457642319072701e-06, "loss": 0.1436, "step": 94170 }, { "epoch": 3.51, "learning_rate": 1.4574100933227226e-06, "loss": 0.1252, "step": 94200 }, { "epoch": 3.51, "learning_rate": 1.4571779785292261e-06, "loss": 0.114, "step": 94230 }, { "epoch": 3.52, "learning_rate": 1.4569459746038823e-06, "loss": 0.1098, "step": 94260 }, { "epoch": 3.52, "learning_rate": 1.4567140814584597e-06, "loss": 0.1197, "step": 94290 }, { "epoch": 3.52, "learning_rate": 1.4564822990048258e-06, "loss": 0.1026, "step": 94320 }, { "epoch": 3.52, "learning_rate": 1.4562506271549461e-06, "loss": 0.1173, "step": 94350 }, { "epoch": 3.52, "learning_rate": 1.456019065820884e-06, "loss": 0.1308, "step": 94380 }, { "epoch": 3.52, "learning_rate": 1.4557876149148007e-06, "loss": 0.1223, "step": 94410 }, { "epoch": 3.52, "learning_rate": 1.4555562743489552e-06, "loss": 0.112, "step": 94440 }, { "epoch": 3.52, "learning_rate": 1.4553250440357034e-06, "loss": 0.1283, "step": 94470 }, { "epoch": 3.52, "learning_rate": 1.4550939238875e-06, "loss": 0.1201, "step": 94500 }, { "epoch": 3.53, "learning_rate": 1.454862913816896e-06, "loss": 0.1092, "step": 94530 }, { "epoch": 3.53, "learning_rate": 1.4546320137365396e-06, "loss": 0.1174, "step": 94560 }, { "epoch": 3.53, "learning_rate": 1.454401223559176e-06, "loss": 0.1355, "step": 94590 }, { "epoch": 3.53, "learning_rate": 1.454170543197648e-06, "loss": 0.1051, "step": 94620 }, { "epoch": 3.53, "learning_rate": 1.4539399725648944e-06, "loss": 0.1139, "step": 94650 }, { "epoch": 3.53, "learning_rate": 1.453709511573951e-06, "loss": 0.1283, "step": 94680 }, { "epoch": 3.53, "learning_rate": 1.4534791601379494e-06, "loss": 0.1038, "step": 94710 }, { "epoch": 3.53, "learning_rate": 1.4532489181701182e-06, "loss": 0.1181, "step": 94740 }, { "epoch": 3.53, "learning_rate": 1.453018785583782e-06, "loss": 0.1246, "step": 94770 }, { "epoch": 3.54, "learning_rate": 1.4527887622923615e-06, "loss": 0.0963, "step": 94800 }, { "epoch": 3.54, "learning_rate": 1.4525588482093733e-06, "loss": 0.14, "step": 94830 }, { "epoch": 3.54, "learning_rate": 1.4523290432484293e-06, "loss": 0.1527, "step": 94860 }, { "epoch": 3.54, "learning_rate": 1.452099347323238e-06, "loss": 0.1099, "step": 94890 }, { "epoch": 3.54, "learning_rate": 1.4518697603476026e-06, "loss": 0.1439, "step": 94920 }, { "epoch": 3.54, "learning_rate": 1.4516402822354216e-06, "loss": 0.1077, "step": 94950 }, { "epoch": 3.54, "learning_rate": 1.4514109129006895e-06, "loss": 0.1217, "step": 94980 }, { "epoch": 3.54, "learning_rate": 1.4511816522574956e-06, "loss": 0.1292, "step": 95010 }, { "epoch": 3.54, "learning_rate": 1.4509525002200234e-06, "loss": 0.1279, "step": 95040 }, { "epoch": 3.55, "learning_rate": 1.4507234567025525e-06, "loss": 0.1096, "step": 95070 }, { "epoch": 3.55, "learning_rate": 1.4504945216194558e-06, "loss": 0.1203, "step": 95100 }, { "epoch": 3.55, "learning_rate": 1.4502656948852023e-06, "loss": 0.1156, "step": 95130 }, { "epoch": 3.55, "learning_rate": 1.4500369764143537e-06, "loss": 0.1166, "step": 95160 }, { "epoch": 3.55, "learning_rate": 1.449808366121568e-06, "loss": 0.13, "step": 95190 }, { "epoch": 3.55, "learning_rate": 1.4495798639215955e-06, "loss": 0.1074, "step": 95220 }, { "epoch": 3.55, "learning_rate": 1.4493514697292815e-06, "loss": 0.1208, "step": 95250 }, { "epoch": 3.55, "learning_rate": 1.4491231834595649e-06, "loss": 0.1303, "step": 95280 }, { "epoch": 3.55, "learning_rate": 1.4488950050274784e-06, "loss": 0.0958, "step": 95310 }, { "epoch": 3.56, "learning_rate": 1.4486669343481484e-06, "loss": 0.1031, "step": 95340 }, { "epoch": 3.56, "learning_rate": 1.448438971336795e-06, "loss": 0.121, "step": 95370 }, { "epoch": 3.56, "learning_rate": 1.4482111159087308e-06, "loss": 0.1065, "step": 95400 }, { "epoch": 3.56, "learning_rate": 1.4479833679793626e-06, "loss": 0.1094, "step": 95430 }, { "epoch": 3.56, "learning_rate": 1.44775572746419e-06, "loss": 0.1047, "step": 95460 }, { "epoch": 3.56, "learning_rate": 1.4475281942788052e-06, "loss": 0.1358, "step": 95490 }, { "epoch": 3.56, "learning_rate": 1.447300768338894e-06, "loss": 0.0914, "step": 95520 }, { "epoch": 3.56, "learning_rate": 1.447073449560234e-06, "loss": 0.119, "step": 95550 }, { "epoch": 3.56, "learning_rate": 1.446846237858696e-06, "loss": 0.1437, "step": 95580 }, { "epoch": 3.57, "learning_rate": 1.4466191331502428e-06, "loss": 0.1209, "step": 95610 }, { "epoch": 3.57, "learning_rate": 1.4463921353509293e-06, "loss": 0.1104, "step": 95640 }, { "epoch": 3.57, "learning_rate": 1.4461652443769042e-06, "loss": 0.1094, "step": 95670 }, { "epoch": 3.57, "learning_rate": 1.445938460144406e-06, "loss": 0.1296, "step": 95700 }, { "epoch": 3.57, "learning_rate": 1.4457117825697664e-06, "loss": 0.1246, "step": 95730 }, { "epoch": 3.57, "learning_rate": 1.445485211569409e-06, "loss": 0.1162, "step": 95760 }, { "epoch": 3.57, "learning_rate": 1.4452587470598483e-06, "loss": 0.1259, "step": 95790 }, { "epoch": 3.57, "learning_rate": 1.4450323889576911e-06, "loss": 0.1168, "step": 95820 }, { "epoch": 3.57, "learning_rate": 1.4448061371796352e-06, "loss": 0.1107, "step": 95850 }, { "epoch": 3.58, "learning_rate": 1.4445799916424695e-06, "loss": 0.1137, "step": 95880 }, { "epoch": 3.58, "learning_rate": 1.444353952263074e-06, "loss": 0.1077, "step": 95910 }, { "epoch": 3.58, "learning_rate": 1.4441280189584204e-06, "loss": 0.1215, "step": 95940 }, { "epoch": 3.58, "learning_rate": 1.4439021916455707e-06, "loss": 0.1089, "step": 95970 }, { "epoch": 3.58, "learning_rate": 1.443676470241678e-06, "loss": 0.0999, "step": 96000 }, { "epoch": 3.58, "learning_rate": 1.4434508546639858e-06, "loss": 0.1198, "step": 96030 }, { "epoch": 3.58, "learning_rate": 1.4432253448298278e-06, "loss": 0.1172, "step": 96060 }, { "epoch": 3.58, "learning_rate": 1.4429999406566286e-06, "loss": 0.1174, "step": 96090 }, { "epoch": 3.58, "learning_rate": 1.4427746420619032e-06, "loss": 0.1225, "step": 96120 }, { "epoch": 3.59, "learning_rate": 1.442549448963256e-06, "loss": 0.1261, "step": 96150 }, { "epoch": 3.59, "learning_rate": 1.442324361278382e-06, "loss": 0.1238, "step": 96180 }, { "epoch": 3.59, "learning_rate": 1.4420993789250654e-06, "loss": 0.1185, "step": 96210 }, { "epoch": 3.59, "learning_rate": 1.4418745018211811e-06, "loss": 0.1178, "step": 96240 }, { "epoch": 3.59, "learning_rate": 1.4416497298846933e-06, "loss": 0.1304, "step": 96270 }, { "epoch": 3.59, "learning_rate": 1.4414250630336546e-06, "loss": 0.1275, "step": 96300 }, { "epoch": 3.59, "learning_rate": 1.4412005011862088e-06, "loss": 0.1186, "step": 96330 }, { "epoch": 3.59, "learning_rate": 1.4409760442605878e-06, "loss": 0.1224, "step": 96360 }, { "epoch": 3.6, "learning_rate": 1.4407516921751121e-06, "loss": 0.1124, "step": 96390 }, { "epoch": 3.6, "learning_rate": 1.440527444848193e-06, "loss": 0.1042, "step": 96420 }, { "epoch": 3.6, "learning_rate": 1.440303302198329e-06, "loss": 0.1279, "step": 96450 }, { "epoch": 3.6, "learning_rate": 1.4400792641441078e-06, "loss": 0.1308, "step": 96480 }, { "epoch": 3.6, "learning_rate": 1.4398553306042063e-06, "loss": 0.1229, "step": 96510 }, { "epoch": 3.6, "learning_rate": 1.4396315014973892e-06, "loss": 0.1241, "step": 96540 }, { "epoch": 3.6, "learning_rate": 1.4394077767425097e-06, "loss": 0.1101, "step": 96570 }, { "epoch": 3.6, "learning_rate": 1.4391841562585096e-06, "loss": 0.1157, "step": 96600 }, { "epoch": 3.6, "learning_rate": 1.4389606399644181e-06, "loss": 0.1147, "step": 96630 }, { "epoch": 3.61, "learning_rate": 1.4387372277793537e-06, "loss": 0.1013, "step": 96660 }, { "epoch": 3.61, "learning_rate": 1.4385139196225214e-06, "loss": 0.126, "step": 96690 }, { "epoch": 3.61, "learning_rate": 1.4382907154132144e-06, "loss": 0.1176, "step": 96720 }, { "epoch": 3.61, "learning_rate": 1.4380676150708138e-06, "loss": 0.1106, "step": 96750 }, { "epoch": 3.61, "learning_rate": 1.4378446185147884e-06, "loss": 0.13, "step": 96780 }, { "epoch": 3.61, "learning_rate": 1.4376217256646936e-06, "loss": 0.1368, "step": 96810 }, { "epoch": 3.61, "learning_rate": 1.4373989364401727e-06, "loss": 0.1175, "step": 96840 }, { "epoch": 3.61, "learning_rate": 1.4371762507609556e-06, "loss": 0.1321, "step": 96870 }, { "epoch": 3.61, "learning_rate": 1.43695366854686e-06, "loss": 0.1229, "step": 96900 }, { "epoch": 3.62, "learning_rate": 1.43673118971779e-06, "loss": 0.1182, "step": 96930 }, { "epoch": 3.62, "learning_rate": 1.436508814193736e-06, "loss": 0.1125, "step": 96960 }, { "epoch": 3.62, "learning_rate": 1.4362865418947767e-06, "loss": 0.1194, "step": 96990 }, { "epoch": 3.62, "learning_rate": 1.4360643727410753e-06, "loss": 0.1089, "step": 97020 }, { "epoch": 3.62, "learning_rate": 1.4358423066528826e-06, "loss": 0.1206, "step": 97050 }, { "epoch": 3.62, "learning_rate": 1.435620343550536e-06, "loss": 0.1121, "step": 97080 }, { "epoch": 3.62, "learning_rate": 1.4353984833544576e-06, "loss": 0.1156, "step": 97110 }, { "epoch": 3.62, "learning_rate": 1.4351767259851573e-06, "loss": 0.1002, "step": 97140 }, { "epoch": 3.62, "learning_rate": 1.4349550713632295e-06, "loss": 0.1131, "step": 97170 }, { "epoch": 3.63, "learning_rate": 1.434733519409356e-06, "loss": 0.1187, "step": 97200 }, { "epoch": 3.63, "learning_rate": 1.434512070044302e-06, "loss": 0.1085, "step": 97230 }, { "epoch": 3.63, "learning_rate": 1.4342907231889208e-06, "loss": 0.1583, "step": 97260 }, { "epoch": 3.63, "learning_rate": 1.4340694787641495e-06, "loss": 0.1241, "step": 97290 }, { "epoch": 3.63, "learning_rate": 1.4338483366910113e-06, "loss": 0.151, "step": 97320 }, { "epoch": 3.63, "learning_rate": 1.4336272968906137e-06, "loss": 0.1152, "step": 97350 }, { "epoch": 3.63, "learning_rate": 1.433406359284151e-06, "loss": 0.1052, "step": 97380 }, { "epoch": 3.63, "learning_rate": 1.4331855237929008e-06, "loss": 0.172, "step": 97410 }, { "epoch": 3.63, "learning_rate": 1.432964790338226e-06, "loss": 0.1167, "step": 97440 }, { "epoch": 3.64, "learning_rate": 1.432744158841575e-06, "loss": 0.1048, "step": 97470 }, { "epoch": 3.64, "learning_rate": 1.4325236292244808e-06, "loss": 0.1389, "step": 97500 }, { "epoch": 3.64, "learning_rate": 1.4323032014085592e-06, "loss": 0.1215, "step": 97530 }, { "epoch": 3.64, "learning_rate": 1.4320828753155122e-06, "loss": 0.131, "step": 97560 }, { "epoch": 3.64, "learning_rate": 1.4318626508671255e-06, "loss": 0.1223, "step": 97590 }, { "epoch": 3.64, "learning_rate": 1.4316425279852693e-06, "loss": 0.1169, "step": 97620 }, { "epoch": 3.64, "learning_rate": 1.4314225065918974e-06, "loss": 0.131, "step": 97650 }, { "epoch": 3.64, "learning_rate": 1.4312025866090474e-06, "loss": 0.1271, "step": 97680 }, { "epoch": 3.64, "learning_rate": 1.430990093615422e-06, "loss": 0.1121, "step": 97710 }, { "epoch": 3.65, "learning_rate": 1.4307703728461559e-06, "loss": 0.1135, "step": 97740 }, { "epoch": 3.65, "learning_rate": 1.430550753256617e-06, "loss": 0.1015, "step": 97770 }, { "epoch": 3.65, "learning_rate": 1.4303312347691753e-06, "loss": 0.1113, "step": 97800 }, { "epoch": 3.65, "learning_rate": 1.4301118173062832e-06, "loss": 0.1095, "step": 97830 }, { "epoch": 3.65, "learning_rate": 1.4298925007904774e-06, "loss": 0.1054, "step": 97860 }, { "epoch": 3.65, "learning_rate": 1.4296732851443772e-06, "loss": 0.108, "step": 97890 }, { "epoch": 3.65, "learning_rate": 1.4294614724960801e-06, "loss": 0.115, "step": 97920 }, { "epoch": 3.65, "learning_rate": 1.4292424550016517e-06, "loss": 0.1127, "step": 97950 }, { "epoch": 3.65, "learning_rate": 1.4290235381478545e-06, "loss": 0.0992, "step": 97980 }, { "epoch": 3.66, "learning_rate": 1.4288047218576368e-06, "loss": 0.1295, "step": 98010 }, { "epoch": 3.66, "learning_rate": 1.428586006054029e-06, "loss": 0.1439, "step": 98040 }, { "epoch": 3.66, "learning_rate": 1.4283673906601443e-06, "loss": 0.0981, "step": 98070 }, { "epoch": 3.66, "learning_rate": 1.4281488755991779e-06, "loss": 0.1126, "step": 98100 }, { "epoch": 3.66, "learning_rate": 1.4279304607944077e-06, "loss": 0.1362, "step": 98130 }, { "epoch": 3.66, "learning_rate": 1.4277121461691937e-06, "loss": 0.1153, "step": 98160 }, { "epoch": 3.66, "learning_rate": 1.4274939316469769e-06, "loss": 0.1056, "step": 98190 }, { "epoch": 3.66, "learning_rate": 1.427275817151281e-06, "loss": 0.1356, "step": 98220 }, { "epoch": 3.66, "learning_rate": 1.4270578026057119e-06, "loss": 0.1119, "step": 98250 }, { "epoch": 3.67, "learning_rate": 1.4268398879339565e-06, "loss": 0.0941, "step": 98280 }, { "epoch": 3.67, "learning_rate": 1.426622073059783e-06, "loss": 0.1, "step": 98310 }, { "epoch": 3.67, "learning_rate": 1.4264043579070419e-06, "loss": 0.1155, "step": 98340 }, { "epoch": 3.67, "learning_rate": 1.4261867423996644e-06, "loss": 0.1066, "step": 98370 }, { "epoch": 3.67, "learning_rate": 1.4259692264616632e-06, "loss": 0.1106, "step": 98400 }, { "epoch": 3.67, "learning_rate": 1.4257518100171313e-06, "loss": 0.1162, "step": 98430 }, { "epoch": 3.67, "learning_rate": 1.4255344929902443e-06, "loss": 0.1214, "step": 98460 }, { "epoch": 3.67, "learning_rate": 1.4253172753052571e-06, "loss": 0.1344, "step": 98490 }, { "epoch": 3.67, "learning_rate": 1.425100156886506e-06, "loss": 0.1302, "step": 98520 }, { "epoch": 3.68, "learning_rate": 1.4248831376584085e-06, "loss": 0.1253, "step": 98550 }, { "epoch": 3.68, "learning_rate": 1.4246662175454615e-06, "loss": 0.11, "step": 98580 }, { "epoch": 3.68, "learning_rate": 1.424449396472243e-06, "loss": 0.1244, "step": 98610 }, { "epoch": 3.68, "learning_rate": 1.4242326743634117e-06, "loss": 0.117, "step": 98640 }, { "epoch": 3.68, "learning_rate": 1.4240160511437056e-06, "loss": 0.1077, "step": 98670 }, { "epoch": 3.68, "learning_rate": 1.4237995267379438e-06, "loss": 0.109, "step": 98700 }, { "epoch": 3.68, "learning_rate": 1.4235831010710242e-06, "loss": 0.0924, "step": 98730 }, { "epoch": 3.68, "learning_rate": 1.4233667740679259e-06, "loss": 0.1209, "step": 98760 }, { "epoch": 3.68, "learning_rate": 1.4231505456537064e-06, "loss": 0.1183, "step": 98790 }, { "epoch": 3.69, "learning_rate": 1.4229344157535044e-06, "loss": 0.1203, "step": 98820 }, { "epoch": 3.69, "learning_rate": 1.4227183842925372e-06, "loss": 0.1105, "step": 98850 }, { "epoch": 3.69, "learning_rate": 1.4225024511961013e-06, "loss": 0.1109, "step": 98880 }, { "epoch": 3.69, "learning_rate": 1.4222866163895734e-06, "loss": 0.1084, "step": 98910 }, { "epoch": 3.69, "learning_rate": 1.4220708797984086e-06, "loss": 0.1072, "step": 98940 }, { "epoch": 3.69, "learning_rate": 1.421855241348142e-06, "loss": 0.1183, "step": 98970 }, { "epoch": 3.69, "learning_rate": 1.4216397009643871e-06, "loss": 0.0961, "step": 99000 }, { "epoch": 3.69, "learning_rate": 1.421424258572836e-06, "loss": 0.1152, "step": 99030 }, { "epoch": 3.69, "learning_rate": 1.4212089140992608e-06, "loss": 0.1305, "step": 99060 }, { "epoch": 3.7, "learning_rate": 1.4209936674695108e-06, "loss": 0.1238, "step": 99090 }, { "epoch": 3.7, "learning_rate": 1.420778518609515e-06, "loss": 0.1217, "step": 99120 }, { "epoch": 3.7, "learning_rate": 1.4205634674452803e-06, "loss": 0.1249, "step": 99150 }, { "epoch": 3.7, "learning_rate": 1.4203485139028927e-06, "loss": 0.14, "step": 99180 }, { "epoch": 3.7, "learning_rate": 1.4201336579085157e-06, "loss": 0.0928, "step": 99210 }, { "epoch": 3.7, "learning_rate": 1.419918899388391e-06, "loss": 0.1117, "step": 99240 }, { "epoch": 3.7, "learning_rate": 1.419704238268839e-06, "loss": 0.1062, "step": 99270 }, { "epoch": 3.7, "learning_rate": 1.4194896744762571e-06, "loss": 0.1059, "step": 99300 }, { "epoch": 3.7, "learning_rate": 1.4192752079371213e-06, "loss": 0.1109, "step": 99330 }, { "epoch": 3.71, "learning_rate": 1.4190608385779853e-06, "loss": 0.1194, "step": 99360 }, { "epoch": 3.71, "learning_rate": 1.41884656632548e-06, "loss": 0.1202, "step": 99390 }, { "epoch": 3.71, "learning_rate": 1.4186323911063138e-06, "loss": 0.1218, "step": 99420 }, { "epoch": 3.71, "learning_rate": 1.4184183128472735e-06, "loss": 0.1167, "step": 99450 }, { "epoch": 3.71, "learning_rate": 1.4182043314752219e-06, "loss": 0.1251, "step": 99480 }, { "epoch": 3.71, "learning_rate": 1.4179904469170998e-06, "loss": 0.1127, "step": 99510 }, { "epoch": 3.71, "learning_rate": 1.4177766590999248e-06, "loss": 0.1076, "step": 99540 }, { "epoch": 3.71, "learning_rate": 1.4175629679507922e-06, "loss": 0.1442, "step": 99570 }, { "epoch": 3.71, "learning_rate": 1.4173493733968724e-06, "loss": 0.1303, "step": 99600 }, { "epoch": 3.72, "learning_rate": 1.4171358753654148e-06, "loss": 0.1263, "step": 99630 }, { "epoch": 3.72, "learning_rate": 1.4169224737837442e-06, "loss": 0.1221, "step": 99660 }, { "epoch": 3.72, "learning_rate": 1.4167091685792625e-06, "loss": 0.1102, "step": 99690 }, { "epoch": 3.72, "learning_rate": 1.4164959596794472e-06, "loss": 0.1012, "step": 99720 }, { "epoch": 3.72, "learning_rate": 1.4162828470118536e-06, "loss": 0.1182, "step": 99750 }, { "epoch": 3.72, "learning_rate": 1.4160698305041118e-06, "loss": 0.1022, "step": 99780 }, { "epoch": 3.72, "learning_rate": 1.4158569100839291e-06, "loss": 0.1242, "step": 99810 }, { "epoch": 3.72, "learning_rate": 1.4156440856790887e-06, "loss": 0.1134, "step": 99840 }, { "epoch": 3.72, "learning_rate": 1.4154313572174494e-06, "loss": 0.1051, "step": 99870 }, { "epoch": 3.73, "learning_rate": 1.415218724626946e-06, "loss": 0.0987, "step": 99900 }, { "epoch": 3.73, "learning_rate": 1.4150061878355892e-06, "loss": 0.1139, "step": 99930 }, { "epoch": 3.73, "learning_rate": 1.4147937467714653e-06, "loss": 0.1376, "step": 99960 }, { "epoch": 3.73, "learning_rate": 1.4145814013627363e-06, "loss": 0.1143, "step": 99990 }, { "epoch": 3.73, "learning_rate": 1.4143691515376389e-06, "loss": 0.1214, "step": 100020 }, { "epoch": 3.73, "learning_rate": 1.4141569972244865e-06, "loss": 0.1126, "step": 100050 }, { "epoch": 3.73, "learning_rate": 1.4139449383516666e-06, "loss": 0.1227, "step": 100080 }, { "epoch": 3.73, "learning_rate": 1.4137329748476418e-06, "loss": 0.1061, "step": 100110 }, { "epoch": 3.73, "learning_rate": 1.4135211066409513e-06, "loss": 0.101, "step": 100140 }, { "epoch": 3.74, "learning_rate": 1.4133093336602069e-06, "loss": 0.1301, "step": 100170 }, { "epoch": 3.74, "learning_rate": 1.4130976558340972e-06, "loss": 0.1159, "step": 100200 }, { "epoch": 3.74, "learning_rate": 1.4128860730913845e-06, "loss": 0.0994, "step": 100230 }, { "epoch": 3.74, "learning_rate": 1.4126745853609064e-06, "loss": 0.1119, "step": 100260 }, { "epoch": 3.74, "learning_rate": 1.4124631925715744e-06, "loss": 0.1187, "step": 100290 }, { "epoch": 3.74, "learning_rate": 1.4122518946523744e-06, "loss": 0.1317, "step": 100320 }, { "epoch": 3.74, "learning_rate": 1.4120406915323675e-06, "loss": 0.1247, "step": 100350 }, { "epoch": 3.74, "learning_rate": 1.4118295831406886e-06, "loss": 0.1153, "step": 100380 }, { "epoch": 3.74, "learning_rate": 1.4116185694065465e-06, "loss": 0.0972, "step": 100410 }, { "epoch": 3.75, "learning_rate": 1.4114076502592238e-06, "loss": 0.1053, "step": 100440 }, { "epoch": 3.75, "learning_rate": 1.411196825628078e-06, "loss": 0.1179, "step": 100470 }, { "epoch": 3.75, "learning_rate": 1.4109860954425395e-06, "loss": 0.1218, "step": 100500 }, { "epoch": 3.75, "learning_rate": 1.410775459632113e-06, "loss": 0.1105, "step": 100530 }, { "epoch": 3.75, "learning_rate": 1.4105649181263764e-06, "loss": 0.122, "step": 100560 }, { "epoch": 3.75, "learning_rate": 1.4103544708549818e-06, "loss": 0.1452, "step": 100590 }, { "epoch": 3.75, "learning_rate": 1.4101441177476543e-06, "loss": 0.0935, "step": 100620 }, { "epoch": 3.75, "learning_rate": 1.4099338587341923e-06, "loss": 0.1175, "step": 100650 }, { "epoch": 3.76, "learning_rate": 1.4097236937444675e-06, "loss": 0.1094, "step": 100680 }, { "epoch": 3.76, "learning_rate": 1.409513622708425e-06, "loss": 0.1027, "step": 100710 }, { "epoch": 3.76, "learning_rate": 1.4093036455560824e-06, "loss": 0.1261, "step": 100740 }, { "epoch": 3.76, "learning_rate": 1.4090937622175312e-06, "loss": 0.1162, "step": 100770 }, { "epoch": 3.76, "learning_rate": 1.4088839726229348e-06, "loss": 0.117, "step": 100800 }, { "epoch": 3.76, "learning_rate": 1.4086742767025294e-06, "loss": 0.1106, "step": 100830 }, { "epoch": 3.76, "learning_rate": 1.408464674386625e-06, "loss": 0.1228, "step": 100860 }, { "epoch": 3.76, "learning_rate": 1.4082551656056025e-06, "loss": 0.1195, "step": 100890 }, { "epoch": 3.76, "learning_rate": 1.408045750289917e-06, "loss": 0.1171, "step": 100920 }, { "epoch": 3.77, "learning_rate": 1.4078364283700944e-06, "loss": 0.1187, "step": 100950 }, { "epoch": 3.77, "learning_rate": 1.4076271997767338e-06, "loss": 0.0992, "step": 100980 }, { "epoch": 3.77, "learning_rate": 1.4074180644405066e-06, "loss": 0.114, "step": 101010 }, { "epoch": 3.77, "learning_rate": 1.4072090222921556e-06, "loss": 0.1496, "step": 101040 }, { "epoch": 3.77, "learning_rate": 1.407000073262496e-06, "loss": 0.106, "step": 101070 }, { "epoch": 3.77, "learning_rate": 1.4067912172824144e-06, "loss": 0.125, "step": 101100 }, { "epoch": 3.77, "learning_rate": 1.4065824542828704e-06, "loss": 0.1184, "step": 101130 }, { "epoch": 3.77, "learning_rate": 1.406373784194894e-06, "loss": 0.101, "step": 101160 }, { "epoch": 3.77, "learning_rate": 1.4061652069495874e-06, "loss": 0.1235, "step": 101190 }, { "epoch": 3.78, "learning_rate": 1.4059567224781244e-06, "loss": 0.1172, "step": 101220 }, { "epoch": 3.78, "learning_rate": 1.4057483307117492e-06, "loss": 0.1236, "step": 101250 }, { "epoch": 3.78, "learning_rate": 1.4055469733943611e-06, "loss": 0.1012, "step": 101280 }, { "epoch": 3.78, "learning_rate": 1.405338763747695e-06, "loss": 0.1181, "step": 101310 }, { "epoch": 3.78, "learning_rate": 1.4051306466025635e-06, "loss": 0.1481, "step": 101340 }, { "epoch": 3.78, "learning_rate": 1.4049226218904939e-06, "loss": 0.1125, "step": 101370 }, { "epoch": 3.78, "learning_rate": 1.4047146895430846e-06, "loss": 0.0995, "step": 101400 }, { "epoch": 3.78, "learning_rate": 1.4045068494920044e-06, "loss": 0.1279, "step": 101430 }, { "epoch": 3.78, "learning_rate": 1.4042991016689937e-06, "loss": 0.1084, "step": 101460 }, { "epoch": 3.79, "learning_rate": 1.4040914460058624e-06, "loss": 0.1215, "step": 101490 }, { "epoch": 3.79, "learning_rate": 1.4038838824344921e-06, "loss": 0.1227, "step": 101520 }, { "epoch": 3.79, "learning_rate": 1.4036764108868342e-06, "loss": 0.1338, "step": 101550 }, { "epoch": 3.79, "learning_rate": 1.4034690312949106e-06, "loss": 0.1117, "step": 101580 }, { "epoch": 3.79, "learning_rate": 1.4032617435908135e-06, "loss": 0.1473, "step": 101610 }, { "epoch": 3.79, "learning_rate": 1.4030545477067056e-06, "loss": 0.1147, "step": 101640 }, { "epoch": 3.79, "learning_rate": 1.402847443574819e-06, "loss": 0.0933, "step": 101670 }, { "epoch": 3.79, "learning_rate": 1.4026404311274564e-06, "loss": 0.101, "step": 101700 }, { "epoch": 3.79, "learning_rate": 1.40243351029699e-06, "loss": 0.111, "step": 101730 }, { "epoch": 3.8, "learning_rate": 1.4022266810158624e-06, "loss": 0.1315, "step": 101760 }, { "epoch": 3.8, "learning_rate": 1.4020199432165854e-06, "loss": 0.1191, "step": 101790 }, { "epoch": 3.8, "learning_rate": 1.4018132968317404e-06, "loss": 0.1174, "step": 101820 }, { "epoch": 3.8, "learning_rate": 1.4016067417939789e-06, "loss": 0.1159, "step": 101850 }, { "epoch": 3.8, "learning_rate": 1.401400278036021e-06, "loss": 0.1581, "step": 101880 }, { "epoch": 3.8, "learning_rate": 1.401193905490657e-06, "loss": 0.1141, "step": 101910 }, { "epoch": 3.8, "learning_rate": 1.4009876240907458e-06, "loss": 0.1043, "step": 101940 }, { "epoch": 3.8, "learning_rate": 1.4007814337692157e-06, "loss": 0.1014, "step": 101970 }, { "epoch": 3.8, "learning_rate": 1.4005753344590648e-06, "loss": 0.107, "step": 102000 }, { "epoch": 3.81, "learning_rate": 1.4003693260933584e-06, "loss": 0.1048, "step": 102030 }, { "epoch": 3.81, "learning_rate": 1.400163408605233e-06, "loss": 0.1149, "step": 102060 }, { "epoch": 3.81, "learning_rate": 1.3999575819278919e-06, "loss": 0.0958, "step": 102090 }, { "epoch": 3.81, "learning_rate": 1.399751845994608e-06, "loss": 0.1199, "step": 102120 }, { "epoch": 3.81, "learning_rate": 1.399546200738723e-06, "loss": 0.1058, "step": 102150 }, { "epoch": 3.81, "learning_rate": 1.399340646093647e-06, "loss": 0.1048, "step": 102180 }, { "epoch": 3.81, "learning_rate": 1.399135181992858e-06, "loss": 0.1233, "step": 102210 }, { "epoch": 3.81, "learning_rate": 1.3989298083699029e-06, "loss": 0.0979, "step": 102240 }, { "epoch": 3.81, "learning_rate": 1.3987245251583972e-06, "loss": 0.1196, "step": 102270 }, { "epoch": 3.82, "learning_rate": 1.3985193322920238e-06, "loss": 0.0984, "step": 102300 }, { "epoch": 3.82, "learning_rate": 1.3983142297045335e-06, "loss": 0.1046, "step": 102330 }, { "epoch": 3.82, "learning_rate": 1.3981092173297464e-06, "loss": 0.1106, "step": 102360 }, { "epoch": 3.82, "learning_rate": 1.3979042951015493e-06, "loss": 0.132, "step": 102390 }, { "epoch": 3.82, "learning_rate": 1.397699462953897e-06, "loss": 0.1121, "step": 102420 }, { "epoch": 3.82, "learning_rate": 1.3974947208208125e-06, "loss": 0.1055, "step": 102450 }, { "epoch": 3.82, "learning_rate": 1.3972900686363861e-06, "loss": 0.119, "step": 102480 }, { "epoch": 3.82, "learning_rate": 1.3970855063347757e-06, "loss": 0.1229, "step": 102510 }, { "epoch": 3.82, "learning_rate": 1.3968810338502064e-06, "loss": 0.1116, "step": 102540 }, { "epoch": 3.83, "learning_rate": 1.3966766511169712e-06, "loss": 0.1086, "step": 102570 }, { "epoch": 3.83, "learning_rate": 1.39647235806943e-06, "loss": 0.1059, "step": 102600 }, { "epoch": 3.83, "learning_rate": 1.3962681546420103e-06, "loss": 0.1363, "step": 102630 }, { "epoch": 3.83, "learning_rate": 1.3960640407692055e-06, "loss": 0.1313, "step": 102660 }, { "epoch": 3.83, "learning_rate": 1.395860016385578e-06, "loss": 0.1198, "step": 102690 }, { "epoch": 3.83, "learning_rate": 1.3956560814257553e-06, "loss": 0.1155, "step": 102720 }, { "epoch": 3.83, "learning_rate": 1.3954522358244327e-06, "loss": 0.0942, "step": 102750 }, { "epoch": 3.83, "learning_rate": 1.3952484795163714e-06, "loss": 0.0958, "step": 102780 }, { "epoch": 3.83, "learning_rate": 1.395044812436401e-06, "loss": 0.1206, "step": 102810 }, { "epoch": 3.84, "learning_rate": 1.3948412345194157e-06, "loss": 0.1209, "step": 102840 }, { "epoch": 3.84, "learning_rate": 1.3946377457003774e-06, "loss": 0.0968, "step": 102870 }, { "epoch": 3.84, "learning_rate": 1.3944343459143137e-06, "loss": 0.1192, "step": 102900 }, { "epoch": 3.84, "learning_rate": 1.394231035096319e-06, "loss": 0.1358, "step": 102930 }, { "epoch": 3.84, "learning_rate": 1.3940278131815536e-06, "loss": 0.1137, "step": 102960 }, { "epoch": 3.84, "learning_rate": 1.3938246801052445e-06, "loss": 0.1127, "step": 102990 }, { "epoch": 3.84, "learning_rate": 1.3936216358026841e-06, "loss": 0.1176, "step": 103020 }, { "epoch": 3.84, "learning_rate": 1.3934186802092306e-06, "loss": 0.0936, "step": 103050 }, { "epoch": 3.84, "learning_rate": 1.3932158132603095e-06, "loss": 0.1183, "step": 103080 }, { "epoch": 3.85, "learning_rate": 1.3930130348914097e-06, "loss": 0.1232, "step": 103110 }, { "epoch": 3.85, "learning_rate": 1.3928103450380883e-06, "loss": 0.106, "step": 103140 }, { "epoch": 3.85, "learning_rate": 1.3926077436359668e-06, "loss": 0.1055, "step": 103170 }, { "epoch": 3.85, "learning_rate": 1.3924052306207317e-06, "loss": 0.1181, "step": 103200 }, { "epoch": 3.85, "learning_rate": 1.3922028059281358e-06, "loss": 0.117, "step": 103230 }, { "epoch": 3.85, "learning_rate": 1.3920004694939975e-06, "loss": 0.1069, "step": 103260 }, { "epoch": 3.85, "learning_rate": 1.3917982212541996e-06, "loss": 0.1076, "step": 103290 }, { "epoch": 3.85, "learning_rate": 1.3915960611446907e-06, "loss": 0.103, "step": 103320 }, { "epoch": 3.85, "learning_rate": 1.3913939891014843e-06, "loss": 0.1132, "step": 103350 }, { "epoch": 3.86, "learning_rate": 1.3911920050606586e-06, "loss": 0.1139, "step": 103380 }, { "epoch": 3.86, "learning_rate": 1.3909901089583574e-06, "loss": 0.1523, "step": 103410 }, { "epoch": 3.86, "learning_rate": 1.3907883007307891e-06, "loss": 0.1019, "step": 103440 }, { "epoch": 3.86, "learning_rate": 1.3905865803142269e-06, "loss": 0.1363, "step": 103470 }, { "epoch": 3.86, "learning_rate": 1.390384947645008e-06, "loss": 0.1106, "step": 103500 }, { "epoch": 3.86, "learning_rate": 1.3901834026595352e-06, "loss": 0.1135, "step": 103530 }, { "epoch": 3.86, "learning_rate": 1.389988659128795e-06, "loss": 0.1244, "step": 103560 }, { "epoch": 3.86, "learning_rate": 1.389787286402743e-06, "loss": 0.1028, "step": 103590 }, { "epoch": 3.86, "learning_rate": 1.3895860011721435e-06, "loss": 0.1134, "step": 103620 }, { "epoch": 3.87, "learning_rate": 1.3893848033736538e-06, "loss": 0.1133, "step": 103650 }, { "epoch": 3.87, "learning_rate": 1.389183692943996e-06, "loss": 0.1096, "step": 103680 }, { "epoch": 3.87, "learning_rate": 1.3889826698199566e-06, "loss": 0.1113, "step": 103710 }, { "epoch": 3.87, "learning_rate": 1.3887817339383853e-06, "loss": 0.1163, "step": 103740 }, { "epoch": 3.87, "learning_rate": 1.388580885236196e-06, "loss": 0.1146, "step": 103770 }, { "epoch": 3.87, "learning_rate": 1.3883801236503666e-06, "loss": 0.1152, "step": 103800 }, { "epoch": 3.87, "learning_rate": 1.3881794491179394e-06, "loss": 0.1135, "step": 103830 }, { "epoch": 3.87, "learning_rate": 1.3879788615760188e-06, "loss": 0.1387, "step": 103860 }, { "epoch": 3.87, "learning_rate": 1.3877783609617737e-06, "loss": 0.1048, "step": 103890 }, { "epoch": 3.88, "learning_rate": 1.3875779472124371e-06, "loss": 0.1132, "step": 103920 }, { "epoch": 3.88, "learning_rate": 1.3873776202653045e-06, "loss": 0.1212, "step": 103950 }, { "epoch": 3.88, "learning_rate": 1.3871773800577352e-06, "loss": 0.1054, "step": 103980 }, { "epoch": 3.88, "learning_rate": 1.3869772265271516e-06, "loss": 0.1036, "step": 104010 }, { "epoch": 3.88, "learning_rate": 1.3867771596110397e-06, "loss": 0.1176, "step": 104040 }, { "epoch": 3.88, "learning_rate": 1.3865771792469473e-06, "loss": 0.1169, "step": 104070 }, { "epoch": 3.88, "learning_rate": 1.3863772853724872e-06, "loss": 0.1286, "step": 104100 }, { "epoch": 3.88, "learning_rate": 1.3861774779253334e-06, "loss": 0.1002, "step": 104130 }, { "epoch": 3.88, "learning_rate": 1.385977756843224e-06, "loss": 0.1055, "step": 104160 }, { "epoch": 3.89, "learning_rate": 1.3857781220639588e-06, "loss": 0.1271, "step": 104190 }, { "epoch": 3.89, "learning_rate": 1.3855785735254016e-06, "loss": 0.111, "step": 104220 }, { "epoch": 3.89, "learning_rate": 1.385379111165477e-06, "loss": 0.1064, "step": 104250 }, { "epoch": 3.89, "learning_rate": 1.3851797349221744e-06, "loss": 0.1118, "step": 104280 }, { "epoch": 3.89, "learning_rate": 1.3849804447335435e-06, "loss": 0.1082, "step": 104310 }, { "epoch": 3.89, "learning_rate": 1.3847812405376978e-06, "loss": 0.1194, "step": 104340 }, { "epoch": 3.89, "learning_rate": 1.3845821222728126e-06, "loss": 0.1165, "step": 104370 }, { "epoch": 3.89, "learning_rate": 1.384383089877125e-06, "loss": 0.1352, "step": 104400 }, { "epoch": 3.89, "learning_rate": 1.3841841432889352e-06, "loss": 0.1224, "step": 104430 }, { "epoch": 3.9, "learning_rate": 1.3839852824466047e-06, "loss": 0.1184, "step": 104460 }, { "epoch": 3.9, "learning_rate": 1.383786507288557e-06, "loss": 0.095, "step": 104490 }, { "epoch": 3.9, "learning_rate": 1.3835878177532778e-06, "loss": 0.114, "step": 104520 }, { "epoch": 3.9, "learning_rate": 1.3833892137793143e-06, "loss": 0.1229, "step": 104550 }, { "epoch": 3.9, "learning_rate": 1.383190695305276e-06, "loss": 0.1303, "step": 104580 }, { "epoch": 3.9, "learning_rate": 1.382992262269833e-06, "loss": 0.1409, "step": 104610 }, { "epoch": 3.9, "learning_rate": 1.3827939146117186e-06, "loss": 0.1269, "step": 104640 }, { "epoch": 3.9, "learning_rate": 1.3825956522697256e-06, "loss": 0.0983, "step": 104670 }, { "epoch": 3.9, "learning_rate": 1.3823974751827096e-06, "loss": 0.1142, "step": 104700 }, { "epoch": 3.91, "learning_rate": 1.3821993832895875e-06, "loss": 0.1333, "step": 104730 }, { "epoch": 3.91, "learning_rate": 1.3820013765293367e-06, "loss": 0.1081, "step": 104760 }, { "epoch": 3.91, "learning_rate": 1.3818034548409964e-06, "loss": 0.0999, "step": 104790 }, { "epoch": 3.91, "learning_rate": 1.3816056181636663e-06, "loss": 0.1191, "step": 104820 }, { "epoch": 3.91, "learning_rate": 1.381407866436508e-06, "loss": 0.1172, "step": 104850 }, { "epoch": 3.91, "learning_rate": 1.3812101995987435e-06, "loss": 0.1165, "step": 104880 }, { "epoch": 3.91, "learning_rate": 1.3810126175896551e-06, "loss": 0.1119, "step": 104910 }, { "epoch": 3.91, "learning_rate": 1.380815120348587e-06, "loss": 0.0985, "step": 104940 }, { "epoch": 3.92, "learning_rate": 1.3806177078149435e-06, "loss": 0.1227, "step": 104970 }, { "epoch": 3.92, "learning_rate": 1.3804203799281896e-06, "loss": 0.1505, "step": 105000 }, { "epoch": 3.92, "learning_rate": 1.3802231366278507e-06, "loss": 0.1282, "step": 105030 }, { "epoch": 3.92, "learning_rate": 1.380025977853513e-06, "loss": 0.1181, "step": 105060 }, { "epoch": 3.92, "learning_rate": 1.3798289035448228e-06, "loss": 0.1274, "step": 105090 }, { "epoch": 3.92, "learning_rate": 1.379631913641487e-06, "loss": 0.0914, "step": 105120 }, { "epoch": 3.92, "learning_rate": 1.3794350080832725e-06, "loss": 0.1216, "step": 105150 }, { "epoch": 3.92, "learning_rate": 1.3792381868100063e-06, "loss": 0.1029, "step": 105180 }, { "epoch": 3.92, "learning_rate": 1.3790414497615755e-06, "loss": 0.134, "step": 105210 }, { "epoch": 3.93, "learning_rate": 1.3788447968779277e-06, "loss": 0.1104, "step": 105240 }, { "epoch": 3.93, "learning_rate": 1.37864822809907e-06, "loss": 0.1341, "step": 105270 }, { "epoch": 3.93, "learning_rate": 1.3784582915027789e-06, "loss": 0.1317, "step": 105300 }, { "epoch": 3.93, "learning_rate": 1.3782618879552259e-06, "loss": 0.1131, "step": 105330 }, { "epoch": 3.93, "learning_rate": 1.3780655683348362e-06, "loss": 0.1161, "step": 105360 }, { "epoch": 3.93, "learning_rate": 1.3778693325818535e-06, "loss": 0.111, "step": 105390 }, { "epoch": 3.93, "learning_rate": 1.3776731806365815e-06, "loss": 0.1064, "step": 105420 }, { "epoch": 3.93, "learning_rate": 1.3774771124393832e-06, "loss": 0.1345, "step": 105450 }, { "epoch": 3.93, "learning_rate": 1.3772811279306814e-06, "loss": 0.1034, "step": 105480 }, { "epoch": 3.94, "learning_rate": 1.3770852270509577e-06, "loss": 0.1205, "step": 105510 }, { "epoch": 3.94, "learning_rate": 1.376889409740753e-06, "loss": 0.1096, "step": 105540 }, { "epoch": 3.94, "learning_rate": 1.3766936759406681e-06, "loss": 0.1226, "step": 105570 }, { "epoch": 3.94, "learning_rate": 1.376498025591362e-06, "loss": 0.1108, "step": 105600 }, { "epoch": 3.94, "learning_rate": 1.3763024586335532e-06, "loss": 0.1035, "step": 105630 }, { "epoch": 3.94, "learning_rate": 1.376106975008019e-06, "loss": 0.1237, "step": 105660 }, { "epoch": 3.94, "learning_rate": 1.375911574655595e-06, "loss": 0.1138, "step": 105690 }, { "epoch": 3.94, "learning_rate": 1.3757162575171767e-06, "loss": 0.0998, "step": 105720 }, { "epoch": 3.94, "learning_rate": 1.3755210235337177e-06, "loss": 0.1323, "step": 105750 }, { "epoch": 3.95, "learning_rate": 1.37532587264623e-06, "loss": 0.0974, "step": 105780 }, { "epoch": 3.95, "learning_rate": 1.3751308047957848e-06, "loss": 0.116, "step": 105810 }, { "epoch": 3.95, "learning_rate": 1.3749358199235115e-06, "loss": 0.1157, "step": 105840 }, { "epoch": 3.95, "learning_rate": 1.3747409179705975e-06, "loss": 0.1282, "step": 105870 }, { "epoch": 3.95, "learning_rate": 1.3745460988782894e-06, "loss": 0.1165, "step": 105900 }, { "epoch": 3.95, "learning_rate": 1.374351362587891e-06, "loss": 0.102, "step": 105930 }, { "epoch": 3.95, "learning_rate": 1.3741567090407655e-06, "loss": 0.1212, "step": 105960 }, { "epoch": 3.95, "learning_rate": 1.3739621381783332e-06, "loss": 0.1073, "step": 105990 }, { "epoch": 3.95, "learning_rate": 1.3737676499420727e-06, "loss": 0.1018, "step": 106020 }, { "epoch": 3.96, "learning_rate": 1.3735732442735211e-06, "loss": 0.0942, "step": 106050 }, { "epoch": 3.96, "learning_rate": 1.3733789211142725e-06, "loss": 0.1314, "step": 106080 }, { "epoch": 3.96, "learning_rate": 1.37318468040598e-06, "loss": 0.1032, "step": 106110 }, { "epoch": 3.96, "learning_rate": 1.3729905220903536e-06, "loss": 0.0956, "step": 106140 }, { "epoch": 3.96, "learning_rate": 1.3727964461091606e-06, "loss": 0.1217, "step": 106170 }, { "epoch": 3.96, "learning_rate": 1.372602452404227e-06, "loss": 0.1134, "step": 106200 }, { "epoch": 3.96, "learning_rate": 1.3724085409174356e-06, "loss": 0.1021, "step": 106230 }, { "epoch": 3.96, "learning_rate": 1.3722147115907266e-06, "loss": 0.1069, "step": 106260 }, { "epoch": 3.96, "learning_rate": 1.3720209643660984e-06, "loss": 0.0964, "step": 106290 }, { "epoch": 3.97, "learning_rate": 1.3718272991856054e-06, "loss": 0.1349, "step": 106320 }, { "epoch": 3.97, "learning_rate": 1.371633715991361e-06, "loss": 0.109, "step": 106350 }, { "epoch": 3.97, "learning_rate": 1.3714402147255338e-06, "loss": 0.1037, "step": 106380 }, { "epoch": 3.97, "learning_rate": 1.3712467953303504e-06, "loss": 0.1073, "step": 106410 }, { "epoch": 3.97, "learning_rate": 1.3710534577480952e-06, "loss": 0.1274, "step": 106440 }, { "epoch": 3.97, "learning_rate": 1.3708602019211083e-06, "loss": 0.1111, "step": 106470 }, { "epoch": 3.97, "learning_rate": 1.3706670277917871e-06, "loss": 0.0943, "step": 106500 }, { "epoch": 3.97, "learning_rate": 1.3704739353025865e-06, "loss": 0.1143, "step": 106530 }, { "epoch": 3.97, "learning_rate": 1.3702809243960166e-06, "loss": 0.1126, "step": 106560 }, { "epoch": 3.98, "learning_rate": 1.3700879950146456e-06, "loss": 0.1193, "step": 106590 }, { "epoch": 3.98, "learning_rate": 1.3698951471010977e-06, "loss": 0.1067, "step": 106620 }, { "epoch": 3.98, "learning_rate": 1.3697023805980537e-06, "loss": 0.1251, "step": 106650 }, { "epoch": 3.98, "learning_rate": 1.3695096954482504e-06, "loss": 0.1187, "step": 106680 }, { "epoch": 3.98, "learning_rate": 1.369317091594482e-06, "loss": 0.1151, "step": 106710 }, { "epoch": 3.98, "learning_rate": 1.3691245689795982e-06, "loss": 0.1155, "step": 106740 }, { "epoch": 3.98, "learning_rate": 1.3689321275465047e-06, "loss": 0.1101, "step": 106770 }, { "epoch": 3.98, "learning_rate": 1.3687397672381643e-06, "loss": 0.1228, "step": 106800 }, { "epoch": 3.98, "learning_rate": 1.3685474879975953e-06, "loss": 0.1129, "step": 106830 }, { "epoch": 3.99, "learning_rate": 1.3683552897678718e-06, "loss": 0.1096, "step": 106860 }, { "epoch": 3.99, "learning_rate": 1.3681631724921244e-06, "loss": 0.1081, "step": 106890 }, { "epoch": 3.99, "learning_rate": 1.367971136113539e-06, "loss": 0.1173, "step": 106920 }, { "epoch": 3.99, "learning_rate": 1.3677791805753576e-06, "loss": 0.1289, "step": 106950 }, { "epoch": 3.99, "learning_rate": 1.3675873058208785e-06, "loss": 0.1104, "step": 106980 }, { "epoch": 3.99, "learning_rate": 1.3673955117934549e-06, "loss": 0.1059, "step": 107010 }, { "epoch": 3.99, "learning_rate": 1.3672037984364952e-06, "loss": 0.1283, "step": 107040 }, { "epoch": 3.99, "learning_rate": 1.3670121656934646e-06, "loss": 0.0906, "step": 107070 }, { "epoch": 3.99, "learning_rate": 1.3668206135078825e-06, "loss": 0.1177, "step": 107100 }, { "epoch": 4.0, "learning_rate": 1.3666291418233252e-06, "loss": 0.1346, "step": 107130 }, { "epoch": 4.0, "learning_rate": 1.3664377505834228e-06, "loss": 0.126, "step": 107160 }, { "epoch": 4.0, "learning_rate": 1.366246439731861e-06, "loss": 0.144, "step": 107190 }, { "epoch": 4.0, "learning_rate": 1.366055209212381e-06, "loss": 0.1125, "step": 107220 }, { "epoch": 4.0, "learning_rate": 1.3658640589687793e-06, "loss": 0.1121, "step": 107250 }, { "epoch": 4.0, "learning_rate": 1.365672988944907e-06, "loss": 0.1044, "step": 107280 }, { "epoch": 4.0, "learning_rate": 1.3654819990846702e-06, "loss": 0.0992, "step": 107310 }, { "epoch": 4.0, "learning_rate": 1.3652910893320304e-06, "loss": 0.1141, "step": 107340 }, { "epoch": 4.0, "learning_rate": 1.3651002596310025e-06, "loss": 0.1074, "step": 107370 }, { "epoch": 4.01, "learning_rate": 1.3649095099256587e-06, "loss": 0.1144, "step": 107400 }, { "epoch": 4.01, "learning_rate": 1.3647188401601233e-06, "loss": 0.1032, "step": 107430 }, { "epoch": 4.01, "learning_rate": 1.3645282502785764e-06, "loss": 0.1415, "step": 107460 }, { "epoch": 4.01, "learning_rate": 1.3643377402252533e-06, "loss": 0.1187, "step": 107490 }, { "epoch": 4.01, "learning_rate": 1.3641473099444421e-06, "loss": 0.1106, "step": 107520 }, { "epoch": 4.01, "learning_rate": 1.3639569593804868e-06, "loss": 0.1035, "step": 107550 }, { "epoch": 4.01, "learning_rate": 1.3637666884777853e-06, "loss": 0.0971, "step": 107580 }, { "epoch": 4.01, "learning_rate": 1.3635764971807894e-06, "loss": 0.1054, "step": 107610 }, { "epoch": 4.01, "learning_rate": 1.3633863854340057e-06, "loss": 0.1055, "step": 107640 }, { "epoch": 4.02, "learning_rate": 1.363196353181994e-06, "loss": 0.0957, "step": 107670 }, { "epoch": 4.02, "learning_rate": 1.3630064003693693e-06, "loss": 0.1063, "step": 107700 }, { "epoch": 4.02, "learning_rate": 1.3628165269408007e-06, "loss": 0.1403, "step": 107730 }, { "epoch": 4.02, "learning_rate": 1.3626267328410096e-06, "loss": 0.1029, "step": 107760 }, { "epoch": 4.02, "learning_rate": 1.362437018014773e-06, "loss": 0.1215, "step": 107790 }, { "epoch": 4.02, "learning_rate": 1.3622473824069208e-06, "loss": 0.1096, "step": 107820 }, { "epoch": 4.02, "learning_rate": 1.3620578259623368e-06, "loss": 0.1101, "step": 107850 }, { "epoch": 4.02, "learning_rate": 1.361868348625959e-06, "loss": 0.1188, "step": 107880 }, { "epoch": 4.02, "learning_rate": 1.3616789503427782e-06, "loss": 0.1114, "step": 107910 }, { "epoch": 4.03, "learning_rate": 1.3614896310578394e-06, "loss": 0.1102, "step": 107940 }, { "epoch": 4.03, "learning_rate": 1.3613003907162404e-06, "loss": 0.1081, "step": 107970 }, { "epoch": 4.03, "learning_rate": 1.3611112292631326e-06, "loss": 0.0958, "step": 108000 }, { "epoch": 4.03, "learning_rate": 1.3609221466437218e-06, "loss": 0.1194, "step": 108030 }, { "epoch": 4.03, "learning_rate": 1.3607331428032656e-06, "loss": 0.1281, "step": 108060 }, { "epoch": 4.03, "learning_rate": 1.3605442176870751e-06, "loss": 0.1063, "step": 108090 }, { "epoch": 4.03, "learning_rate": 1.360355371240515e-06, "loss": 0.0942, "step": 108120 }, { "epoch": 4.03, "learning_rate": 1.3601666034090034e-06, "loss": 0.1045, "step": 108150 }, { "epoch": 4.03, "learning_rate": 1.35997791413801e-06, "loss": 0.1134, "step": 108180 }, { "epoch": 4.04, "learning_rate": 1.3597893033730595e-06, "loss": 0.1106, "step": 108210 }, { "epoch": 4.04, "learning_rate": 1.3596007710597273e-06, "loss": 0.1185, "step": 108240 }, { "epoch": 4.04, "learning_rate": 1.359412317143643e-06, "loss": 0.122, "step": 108270 }, { "epoch": 4.04, "learning_rate": 1.3592239415704883e-06, "loss": 0.1212, "step": 108300 }, { "epoch": 4.04, "learning_rate": 1.359035644285998e-06, "loss": 0.1086, "step": 108330 }, { "epoch": 4.04, "learning_rate": 1.3588536979444226e-06, "loss": 0.1204, "step": 108360 }, { "epoch": 4.04, "learning_rate": 1.3586655544695377e-06, "loss": 0.1009, "step": 108390 }, { "epoch": 4.04, "learning_rate": 1.3584774891226393e-06, "loss": 0.1142, "step": 108420 }, { "epoch": 4.04, "learning_rate": 1.35828950184967e-06, "loss": 0.119, "step": 108450 }, { "epoch": 4.05, "learning_rate": 1.3581015925966255e-06, "loss": 0.1209, "step": 108480 }, { "epoch": 4.05, "learning_rate": 1.357913761309553e-06, "loss": 0.1119, "step": 108510 }, { "epoch": 4.05, "learning_rate": 1.3577260079345526e-06, "loss": 0.1257, "step": 108540 }, { "epoch": 4.05, "learning_rate": 1.3575383324177758e-06, "loss": 0.1217, "step": 108570 }, { "epoch": 4.05, "learning_rate": 1.3573507347054265e-06, "loss": 0.1121, "step": 108600 }, { "epoch": 4.05, "learning_rate": 1.3571632147437613e-06, "loss": 0.1233, "step": 108630 }, { "epoch": 4.05, "learning_rate": 1.3569757724790883e-06, "loss": 0.1067, "step": 108660 }, { "epoch": 4.05, "learning_rate": 1.3567884078577668e-06, "loss": 0.1206, "step": 108690 }, { "epoch": 4.05, "learning_rate": 1.3566011208262083e-06, "loss": 0.1261, "step": 108720 }, { "epoch": 4.06, "learning_rate": 1.3564139113308771e-06, "loss": 0.1353, "step": 108750 }, { "epoch": 4.06, "learning_rate": 1.356226779318288e-06, "loss": 0.113, "step": 108780 }, { "epoch": 4.06, "learning_rate": 1.3560397247350075e-06, "loss": 0.1146, "step": 108810 }, { "epoch": 4.06, "learning_rate": 1.3558527475276545e-06, "loss": 0.112, "step": 108840 }, { "epoch": 4.06, "learning_rate": 1.3556658476428985e-06, "loss": 0.1154, "step": 108870 }, { "epoch": 4.06, "learning_rate": 1.3554790250274613e-06, "loss": 0.1079, "step": 108900 }, { "epoch": 4.06, "learning_rate": 1.3552922796281152e-06, "loss": 0.115, "step": 108930 }, { "epoch": 4.06, "learning_rate": 1.3551056113916844e-06, "loss": 0.106, "step": 108960 }, { "epoch": 4.06, "learning_rate": 1.3549190202650446e-06, "loss": 0.1007, "step": 108990 }, { "epoch": 4.07, "learning_rate": 1.3547325061951213e-06, "loss": 0.1171, "step": 109020 }, { "epoch": 4.07, "learning_rate": 1.3545460691288933e-06, "loss": 0.1176, "step": 109050 }, { "epoch": 4.07, "learning_rate": 1.3543597090133884e-06, "loss": 0.1057, "step": 109080 }, { "epoch": 4.07, "learning_rate": 1.3541734257956868e-06, "loss": 0.1183, "step": 109110 }, { "epoch": 4.07, "learning_rate": 1.3539872194229187e-06, "loss": 0.0971, "step": 109140 }, { "epoch": 4.07, "learning_rate": 1.3538010898422657e-06, "loss": 0.1048, "step": 109170 }, { "epoch": 4.07, "learning_rate": 1.3536150370009606e-06, "loss": 0.1265, "step": 109200 }, { "epoch": 4.07, "learning_rate": 1.353429060846286e-06, "loss": 0.1153, "step": 109230 }, { "epoch": 4.08, "learning_rate": 1.3532431613255755e-06, "loss": 0.1093, "step": 109260 }, { "epoch": 4.08, "learning_rate": 1.3530573383862138e-06, "loss": 0.1128, "step": 109290 }, { "epoch": 4.08, "learning_rate": 1.3528715919756358e-06, "loss": 0.1073, "step": 109320 }, { "epoch": 4.08, "learning_rate": 1.352685922041327e-06, "loss": 0.1076, "step": 109350 }, { "epoch": 4.08, "learning_rate": 1.352500328530823e-06, "loss": 0.1086, "step": 109380 }, { "epoch": 4.08, "learning_rate": 1.35231481139171e-06, "loss": 0.1398, "step": 109410 }, { "epoch": 4.08, "learning_rate": 1.3521293705716248e-06, "loss": 0.1132, "step": 109440 }, { "epoch": 4.08, "learning_rate": 1.3519440060182543e-06, "loss": 0.1092, "step": 109470 }, { "epoch": 4.08, "learning_rate": 1.3517587176793354e-06, "loss": 0.104, "step": 109500 }, { "epoch": 4.09, "learning_rate": 1.351573505502655e-06, "loss": 0.1141, "step": 109530 }, { "epoch": 4.09, "learning_rate": 1.3513883694360502e-06, "loss": 0.1259, "step": 109560 }, { "epoch": 4.09, "learning_rate": 1.3512033094274085e-06, "loss": 0.1059, "step": 109590 }, { "epoch": 4.09, "learning_rate": 1.3510183254246667e-06, "loss": 0.1101, "step": 109620 }, { "epoch": 4.09, "learning_rate": 1.3508334173758125e-06, "loss": 0.1061, "step": 109650 }, { "epoch": 4.09, "learning_rate": 1.3506485852288816e-06, "loss": 0.1091, "step": 109680 }, { "epoch": 4.09, "learning_rate": 1.3504638289319617e-06, "loss": 0.1138, "step": 109710 }, { "epoch": 4.09, "learning_rate": 1.3502791484331885e-06, "loss": 0.1002, "step": 109740 }, { "epoch": 4.09, "learning_rate": 1.3500945436807477e-06, "loss": 0.0967, "step": 109770 }, { "epoch": 4.1, "learning_rate": 1.3499100146228755e-06, "loss": 0.1197, "step": 109800 }, { "epoch": 4.1, "learning_rate": 1.349725561207856e-06, "loss": 0.1062, "step": 109830 }, { "epoch": 4.1, "learning_rate": 1.3495411833840244e-06, "loss": 0.1048, "step": 109860 }, { "epoch": 4.1, "learning_rate": 1.3493568810997643e-06, "loss": 0.1209, "step": 109890 }, { "epoch": 4.1, "learning_rate": 1.3491726543035087e-06, "loss": 0.1055, "step": 109920 }, { "epoch": 4.1, "learning_rate": 1.3489885029437399e-06, "loss": 0.0939, "step": 109950 }, { "epoch": 4.1, "learning_rate": 1.3488044269689899e-06, "loss": 0.1263, "step": 109980 }, { "epoch": 4.1, "learning_rate": 1.3486204263278393e-06, "loss": 0.0975, "step": 110010 }, { "epoch": 4.1, "learning_rate": 1.3484365009689182e-06, "loss": 0.1174, "step": 110040 }, { "epoch": 4.11, "learning_rate": 1.348252650840905e-06, "loss": 0.1112, "step": 110070 }, { "epoch": 4.11, "learning_rate": 1.348068875892528e-06, "loss": 0.1009, "step": 110100 }, { "epoch": 4.11, "learning_rate": 1.347885176072564e-06, "loss": 0.1322, "step": 110130 }, { "epoch": 4.11, "learning_rate": 1.347701551329839e-06, "loss": 0.1163, "step": 110160 }, { "epoch": 4.11, "learning_rate": 1.347518001613226e-06, "loss": 0.1042, "step": 110190 }, { "epoch": 4.11, "learning_rate": 1.3473345268716497e-06, "loss": 0.1109, "step": 110220 }, { "epoch": 4.11, "learning_rate": 1.3471511270540812e-06, "loss": 0.116, "step": 110250 }, { "epoch": 4.11, "learning_rate": 1.346967802109541e-06, "loss": 0.1151, "step": 110280 }, { "epoch": 4.11, "learning_rate": 1.3467845519870978e-06, "loss": 0.1289, "step": 110310 }, { "epoch": 4.12, "learning_rate": 1.3466013766358696e-06, "loss": 0.1383, "step": 110340 }, { "epoch": 4.12, "learning_rate": 1.3464182760050218e-06, "loss": 0.1085, "step": 110370 }, { "epoch": 4.12, "learning_rate": 1.3462352500437688e-06, "loss": 0.1116, "step": 110400 }, { "epoch": 4.12, "learning_rate": 1.3460522987013739e-06, "loss": 0.1335, "step": 110430 }, { "epoch": 4.12, "learning_rate": 1.3458694219271468e-06, "loss": 0.1253, "step": 110460 }, { "epoch": 4.12, "learning_rate": 1.3456866196704468e-06, "loss": 0.0908, "step": 110490 }, { "epoch": 4.12, "learning_rate": 1.3455038918806814e-06, "loss": 0.1125, "step": 110520 }, { "epoch": 4.12, "learning_rate": 1.3453212385073058e-06, "loss": 0.1235, "step": 110550 }, { "epoch": 4.12, "learning_rate": 1.3451386594998233e-06, "loss": 0.1163, "step": 110580 }, { "epoch": 4.13, "learning_rate": 1.344956154807785e-06, "loss": 0.1028, "step": 110610 }, { "epoch": 4.13, "learning_rate": 1.3447737243807896e-06, "loss": 0.1258, "step": 110640 }, { "epoch": 4.13, "learning_rate": 1.3445913681684844e-06, "loss": 0.1037, "step": 110670 }, { "epoch": 4.13, "learning_rate": 1.344409086120564e-06, "loss": 0.1141, "step": 110700 }, { "epoch": 4.13, "learning_rate": 1.3442268781867715e-06, "loss": 0.1071, "step": 110730 }, { "epoch": 4.13, "learning_rate": 1.344044744316896e-06, "loss": 0.1079, "step": 110760 }, { "epoch": 4.13, "learning_rate": 1.343862684460776e-06, "loss": 0.1089, "step": 110790 }, { "epoch": 4.13, "learning_rate": 1.3436806985682964e-06, "loss": 0.1047, "step": 110820 }, { "epoch": 4.13, "learning_rate": 1.3434987865893903e-06, "loss": 0.0858, "step": 110850 }, { "epoch": 4.14, "learning_rate": 1.3433169484740375e-06, "loss": 0.105, "step": 110880 }, { "epoch": 4.14, "learning_rate": 1.343135184172266e-06, "loss": 0.096, "step": 110910 }, { "epoch": 4.14, "learning_rate": 1.3429534936341501e-06, "loss": 0.0958, "step": 110940 }, { "epoch": 4.14, "learning_rate": 1.3427718768098128e-06, "loss": 0.1155, "step": 110970 }, { "epoch": 4.14, "learning_rate": 1.3425903336494232e-06, "loss": 0.0927, "step": 111000 }, { "epoch": 4.14, "learning_rate": 1.3424088641031974e-06, "loss": 0.1146, "step": 111030 }, { "epoch": 4.14, "learning_rate": 1.342227468121399e-06, "loss": 0.1023, "step": 111060 }, { "epoch": 4.14, "learning_rate": 1.3420461456543393e-06, "loss": 0.1133, "step": 111090 }, { "epoch": 4.14, "learning_rate": 1.3418648966523754e-06, "loss": 0.1097, "step": 111120 }, { "epoch": 4.15, "learning_rate": 1.3416837210659122e-06, "loss": 0.12, "step": 111150 }, { "epoch": 4.15, "learning_rate": 1.3415086544046006e-06, "loss": 0.1087, "step": 111180 }, { "epoch": 4.15, "learning_rate": 1.3413276230574544e-06, "loss": 0.1138, "step": 111210 }, { "epoch": 4.15, "learning_rate": 1.3411466649789512e-06, "loss": 0.1045, "step": 111240 }, { "epoch": 4.15, "learning_rate": 1.3409657801196808e-06, "loss": 0.1208, "step": 111270 }, { "epoch": 4.15, "learning_rate": 1.34078496843028e-06, "loss": 0.1282, "step": 111300 }, { "epoch": 4.15, "learning_rate": 1.3406042298614324e-06, "loss": 0.1103, "step": 111330 }, { "epoch": 4.15, "learning_rate": 1.3404235643638672e-06, "loss": 0.1066, "step": 111360 }, { "epoch": 4.15, "learning_rate": 1.3402429718883612e-06, "loss": 0.1165, "step": 111390 }, { "epoch": 4.16, "learning_rate": 1.3400624523857367e-06, "loss": 0.117, "step": 111420 }, { "epoch": 4.16, "learning_rate": 1.3398820058068632e-06, "loss": 0.1059, "step": 111450 }, { "epoch": 4.16, "learning_rate": 1.339701632102656e-06, "loss": 0.1196, "step": 111480 }, { "epoch": 4.16, "learning_rate": 1.339521331224076e-06, "loss": 0.1074, "step": 111510 }, { "epoch": 4.16, "learning_rate": 1.3393411031221322e-06, "loss": 0.1078, "step": 111540 }, { "epoch": 4.16, "learning_rate": 1.339160947747877e-06, "loss": 0.1048, "step": 111570 }, { "epoch": 4.16, "learning_rate": 1.3389808650524116e-06, "loss": 0.1235, "step": 111600 }, { "epoch": 4.16, "learning_rate": 1.3388008549868816e-06, "loss": 0.116, "step": 111630 }, { "epoch": 4.16, "learning_rate": 1.3386209175024792e-06, "loss": 0.1006, "step": 111660 }, { "epoch": 4.17, "learning_rate": 1.3384410525504416e-06, "loss": 0.1163, "step": 111690 }, { "epoch": 4.17, "learning_rate": 1.3382612600820532e-06, "loss": 0.1175, "step": 111720 }, { "epoch": 4.17, "learning_rate": 1.3380815400486433e-06, "loss": 0.1113, "step": 111750 }, { "epoch": 4.17, "learning_rate": 1.3379018924015868e-06, "loss": 0.1203, "step": 111780 }, { "epoch": 4.17, "learning_rate": 1.3377223170923053e-06, "loss": 0.1267, "step": 111810 }, { "epoch": 4.17, "learning_rate": 1.3375428140722648e-06, "loss": 0.1165, "step": 111840 }, { "epoch": 4.17, "learning_rate": 1.3373633832929783e-06, "loss": 0.1114, "step": 111870 }, { "epoch": 4.17, "learning_rate": 1.3371840247060025e-06, "loss": 0.1183, "step": 111900 }, { "epoch": 4.17, "learning_rate": 1.337004738262941e-06, "loss": 0.1023, "step": 111930 }, { "epoch": 4.18, "learning_rate": 1.3368255239154419e-06, "loss": 0.1172, "step": 111960 }, { "epoch": 4.18, "learning_rate": 1.3366463816151999e-06, "loss": 0.106, "step": 111990 }, { "epoch": 4.18, "learning_rate": 1.3364673113139533e-06, "loss": 0.1254, "step": 112020 }, { "epoch": 4.18, "learning_rate": 1.3362883129634875e-06, "loss": 0.102, "step": 112050 }, { "epoch": 4.18, "learning_rate": 1.3361093865156313e-06, "loss": 0.1292, "step": 112080 }, { "epoch": 4.18, "learning_rate": 1.33593053192226e-06, "loss": 0.1158, "step": 112110 }, { "epoch": 4.18, "learning_rate": 1.3357517491352932e-06, "loss": 0.1191, "step": 112140 }, { "epoch": 4.18, "learning_rate": 1.3355730381066959e-06, "loss": 0.1157, "step": 112170 }, { "epoch": 4.18, "learning_rate": 1.335394398788478e-06, "loss": 0.0981, "step": 112200 }, { "epoch": 4.19, "learning_rate": 1.335215831132694e-06, "loss": 0.1214, "step": 112230 }, { "epoch": 4.19, "learning_rate": 1.3350373350914441e-06, "loss": 0.114, "step": 112260 }, { "epoch": 4.19, "learning_rate": 1.3348589106168724e-06, "loss": 0.1069, "step": 112290 }, { "epoch": 4.19, "learning_rate": 1.3346805576611683e-06, "loss": 0.0948, "step": 112320 }, { "epoch": 4.19, "learning_rate": 1.3345022761765652e-06, "loss": 0.1153, "step": 112350 }, { "epoch": 4.19, "learning_rate": 1.3343240661153423e-06, "loss": 0.1226, "step": 112380 }, { "epoch": 4.19, "learning_rate": 1.3341459274298223e-06, "loss": 0.0996, "step": 112410 }, { "epoch": 4.19, "learning_rate": 1.3339678600723734e-06, "loss": 0.1247, "step": 112440 }, { "epoch": 4.19, "learning_rate": 1.3337898639954075e-06, "loss": 0.1071, "step": 112470 }, { "epoch": 4.2, "learning_rate": 1.3336119391513816e-06, "loss": 0.1086, "step": 112500 }, { "epoch": 4.2, "learning_rate": 1.333434085492796e-06, "loss": 0.1173, "step": 112530 }, { "epoch": 4.2, "learning_rate": 1.3332563029721968e-06, "loss": 0.1388, "step": 112560 }, { "epoch": 4.2, "learning_rate": 1.3330785915421732e-06, "loss": 0.1097, "step": 112590 }, { "epoch": 4.2, "learning_rate": 1.3329009511553593e-06, "loss": 0.1164, "step": 112620 }, { "epoch": 4.2, "learning_rate": 1.332723381764433e-06, "loss": 0.1041, "step": 112650 }, { "epoch": 4.2, "learning_rate": 1.3325458833221163e-06, "loss": 0.1056, "step": 112680 }, { "epoch": 4.2, "learning_rate": 1.3323684557811759e-06, "loss": 0.1168, "step": 112710 }, { "epoch": 4.2, "learning_rate": 1.3321910990944214e-06, "loss": 0.125, "step": 112740 }, { "epoch": 4.21, "learning_rate": 1.3320138132147075e-06, "loss": 0.1154, "step": 112770 }, { "epoch": 4.21, "learning_rate": 1.3318365980949322e-06, "loss": 0.0917, "step": 112800 }, { "epoch": 4.21, "learning_rate": 1.3316594536880373e-06, "loss": 0.1145, "step": 112830 }, { "epoch": 4.21, "learning_rate": 1.3314823799470083e-06, "loss": 0.1074, "step": 112860 }, { "epoch": 4.21, "learning_rate": 1.331305376824875e-06, "loss": 0.1049, "step": 112890 }, { "epoch": 4.21, "learning_rate": 1.331128444274711e-06, "loss": 0.1078, "step": 112920 }, { "epoch": 4.21, "learning_rate": 1.3309515822496324e-06, "loss": 0.1126, "step": 112950 }, { "epoch": 4.21, "learning_rate": 1.3307747907028002e-06, "loss": 0.1207, "step": 112980 }, { "epoch": 4.21, "learning_rate": 1.3305980695874179e-06, "loss": 0.1214, "step": 113010 }, { "epoch": 4.22, "learning_rate": 1.3304214188567336e-06, "loss": 0.1187, "step": 113040 }, { "epoch": 4.22, "learning_rate": 1.3302448384640373e-06, "loss": 0.1061, "step": 113070 }, { "epoch": 4.22, "learning_rate": 1.3300683283626642e-06, "loss": 0.1027, "step": 113100 }, { "epoch": 4.22, "learning_rate": 1.3298918885059912e-06, "loss": 0.1075, "step": 113130 }, { "epoch": 4.22, "learning_rate": 1.3297155188474394e-06, "loss": 0.1261, "step": 113160 }, { "epoch": 4.22, "learning_rate": 1.3295392193404732e-06, "loss": 0.109, "step": 113190 }, { "epoch": 4.22, "learning_rate": 1.3293629899385998e-06, "loss": 0.1053, "step": 113220 }, { "epoch": 4.22, "learning_rate": 1.3291868305953697e-06, "loss": 0.1162, "step": 113250 }, { "epoch": 4.22, "learning_rate": 1.329010741264376e-06, "loss": 0.1282, "step": 113280 }, { "epoch": 4.23, "learning_rate": 1.3288347218992556e-06, "loss": 0.1163, "step": 113310 }, { "epoch": 4.23, "learning_rate": 1.3286587724536878e-06, "loss": 0.113, "step": 113340 }, { "epoch": 4.23, "learning_rate": 1.3284828928813952e-06, "loss": 0.1118, "step": 113370 }, { "epoch": 4.23, "learning_rate": 1.3283070831361434e-06, "loss": 0.115, "step": 113400 }, { "epoch": 4.23, "learning_rate": 1.3281313431717397e-06, "loss": 0.0957, "step": 113430 }, { "epoch": 4.23, "learning_rate": 1.3279556729420353e-06, "loss": 0.1053, "step": 113460 }, { "epoch": 4.23, "learning_rate": 1.3277800724009243e-06, "loss": 0.1032, "step": 113490 }, { "epoch": 4.23, "learning_rate": 1.3276045415023425e-06, "loss": 0.1142, "step": 113520 }, { "epoch": 4.24, "learning_rate": 1.3274290802002687e-06, "loss": 0.0996, "step": 113550 }, { "epoch": 4.24, "learning_rate": 1.3272536884487247e-06, "loss": 0.1274, "step": 113580 }, { "epoch": 4.24, "learning_rate": 1.327078366201774e-06, "loss": 0.0937, "step": 113610 }, { "epoch": 4.24, "learning_rate": 1.3269031134135237e-06, "loss": 0.1127, "step": 113640 }, { "epoch": 4.24, "learning_rate": 1.3267279300381222e-06, "loss": 0.1036, "step": 113670 }, { "epoch": 4.24, "learning_rate": 1.3265528160297608e-06, "loss": 0.1173, "step": 113700 }, { "epoch": 4.24, "learning_rate": 1.3263777713426725e-06, "loss": 0.1237, "step": 113730 }, { "epoch": 4.24, "learning_rate": 1.3262027959311342e-06, "loss": 0.1113, "step": 113760 }, { "epoch": 4.24, "learning_rate": 1.3260278897494627e-06, "loss": 0.1228, "step": 113790 }, { "epoch": 4.25, "learning_rate": 1.3258530527520192e-06, "loss": 0.1077, "step": 113820 }, { "epoch": 4.25, "learning_rate": 1.3256782848932048e-06, "loss": 0.1051, "step": 113850 }, { "epoch": 4.25, "learning_rate": 1.325503586127465e-06, "loss": 0.1137, "step": 113880 }, { "epoch": 4.25, "learning_rate": 1.3253289564092853e-06, "loss": 0.1075, "step": 113910 }, { "epoch": 4.25, "learning_rate": 1.3251543956931945e-06, "loss": 0.0949, "step": 113940 }, { "epoch": 4.25, "learning_rate": 1.3249799039337626e-06, "loss": 0.0969, "step": 113970 }, { "epoch": 4.25, "learning_rate": 1.3248054810856017e-06, "loss": 0.0864, "step": 114000 }, { "epoch": 4.25, "learning_rate": 1.3246311271033657e-06, "loss": 0.142, "step": 114030 }, { "epoch": 4.25, "learning_rate": 1.3244568419417503e-06, "loss": 0.0939, "step": 114060 }, { "epoch": 4.26, "learning_rate": 1.3242826255554928e-06, "loss": 0.1117, "step": 114090 }, { "epoch": 4.26, "learning_rate": 1.3241084778993723e-06, "loss": 0.0976, "step": 114120 }, { "epoch": 4.26, "learning_rate": 1.3239343989282094e-06, "loss": 0.0982, "step": 114150 }, { "epoch": 4.26, "learning_rate": 1.3237603885968664e-06, "loss": 0.1163, "step": 114180 }, { "epoch": 4.26, "learning_rate": 1.323586446860247e-06, "loss": 0.1309, "step": 114210 }, { "epoch": 4.26, "learning_rate": 1.3234125736732962e-06, "loss": 0.1001, "step": 114240 }, { "epoch": 4.26, "learning_rate": 1.3232387689910007e-06, "loss": 0.1048, "step": 114270 }, { "epoch": 4.26, "learning_rate": 1.3230650327683888e-06, "loss": 0.1139, "step": 114300 }, { "epoch": 4.26, "learning_rate": 1.3228913649605295e-06, "loss": 0.1219, "step": 114330 }, { "epoch": 4.27, "learning_rate": 1.3227177655225338e-06, "loss": 0.1055, "step": 114360 }, { "epoch": 4.27, "learning_rate": 1.322544234409553e-06, "loss": 0.0944, "step": 114390 }, { "epoch": 4.27, "learning_rate": 1.32237077157678e-06, "loss": 0.1249, "step": 114420 }, { "epoch": 4.27, "learning_rate": 1.3221973769794496e-06, "loss": 0.1084, "step": 114450 }, { "epoch": 4.27, "learning_rate": 1.3220240505728365e-06, "loss": 0.0998, "step": 114480 }, { "epoch": 4.27, "learning_rate": 1.3218507923122572e-06, "loss": 0.1065, "step": 114510 }, { "epoch": 4.27, "learning_rate": 1.3216776021530689e-06, "loss": 0.1051, "step": 114540 }, { "epoch": 4.27, "learning_rate": 1.321504480050669e-06, "loss": 0.1216, "step": 114570 }, { "epoch": 4.27, "learning_rate": 1.3213314259604976e-06, "loss": 0.097, "step": 114600 }, { "epoch": 4.28, "learning_rate": 1.321158439838034e-06, "loss": 0.1079, "step": 114630 }, { "epoch": 4.28, "learning_rate": 1.3209855216387988e-06, "loss": 0.108, "step": 114660 }, { "epoch": 4.28, "learning_rate": 1.3208126713183533e-06, "loss": 0.1599, "step": 114690 }, { "epoch": 4.28, "learning_rate": 1.3206398888323003e-06, "loss": 0.1108, "step": 114720 }, { "epoch": 4.28, "learning_rate": 1.3204671741362815e-06, "loss": 0.1027, "step": 114750 }, { "epoch": 4.28, "learning_rate": 1.320294527185981e-06, "loss": 0.1121, "step": 114780 }, { "epoch": 4.28, "learning_rate": 1.3201219479371225e-06, "loss": 0.0957, "step": 114810 }, { "epoch": 4.28, "learning_rate": 1.3199494363454705e-06, "loss": 0.0941, "step": 114840 }, { "epoch": 4.28, "learning_rate": 1.319776992366829e-06, "loss": 0.1005, "step": 114870 }, { "epoch": 4.29, "learning_rate": 1.3196046159570445e-06, "loss": 0.1123, "step": 114900 }, { "epoch": 4.29, "learning_rate": 1.3194323070720016e-06, "loss": 0.11, "step": 114930 }, { "epoch": 4.29, "learning_rate": 1.3192600656676268e-06, "loss": 0.1043, "step": 114960 }, { "epoch": 4.29, "learning_rate": 1.3190878916998856e-06, "loss": 0.1014, "step": 114990 }, { "epoch": 4.29, "learning_rate": 1.3189157851247853e-06, "loss": 0.1287, "step": 115020 }, { "epoch": 4.29, "learning_rate": 1.3187437458983718e-06, "loss": 0.1203, "step": 115050 }, { "epoch": 4.29, "learning_rate": 1.318571773976732e-06, "loss": 0.1252, "step": 115080 }, { "epoch": 4.29, "learning_rate": 1.3183998693159926e-06, "loss": 0.1076, "step": 115110 }, { "epoch": 4.29, "learning_rate": 1.3182280318723206e-06, "loss": 0.0897, "step": 115140 }, { "epoch": 4.3, "learning_rate": 1.3180562616019221e-06, "loss": 0.102, "step": 115170 }, { "epoch": 4.3, "learning_rate": 1.3178845584610447e-06, "loss": 0.114, "step": 115200 }, { "epoch": 4.3, "learning_rate": 1.3177129224059746e-06, "loss": 0.127, "step": 115230 }, { "epoch": 4.3, "learning_rate": 1.3175413533930378e-06, "loss": 0.1226, "step": 115260 }, { "epoch": 4.3, "learning_rate": 1.3173698513786012e-06, "loss": 0.0991, "step": 115290 }, { "epoch": 4.3, "learning_rate": 1.3171984163190704e-06, "loss": 0.1143, "step": 115320 }, { "epoch": 4.3, "learning_rate": 1.3170270481708914e-06, "loss": 0.1015, "step": 115350 }, { "epoch": 4.3, "learning_rate": 1.3168557468905489e-06, "loss": 0.1062, "step": 115380 }, { "epoch": 4.3, "learning_rate": 1.3166845124345683e-06, "loss": 0.1077, "step": 115410 }, { "epoch": 4.31, "learning_rate": 1.316513344759514e-06, "loss": 0.1142, "step": 115440 }, { "epoch": 4.31, "learning_rate": 1.3163422438219902e-06, "loss": 0.1156, "step": 115470 }, { "epoch": 4.31, "learning_rate": 1.3161712095786402e-06, "loss": 0.118, "step": 115500 }, { "epoch": 4.31, "learning_rate": 1.3160002419861468e-06, "loss": 0.1225, "step": 115530 }, { "epoch": 4.31, "learning_rate": 1.3158293410012324e-06, "loss": 0.1199, "step": 115560 }, { "epoch": 4.31, "learning_rate": 1.3156585065806587e-06, "loss": 0.1145, "step": 115590 }, { "epoch": 4.31, "learning_rate": 1.3154877386812268e-06, "loss": 0.1273, "step": 115620 }, { "epoch": 4.31, "learning_rate": 1.315317037259776e-06, "loss": 0.1092, "step": 115650 }, { "epoch": 4.31, "learning_rate": 1.3151464022731864e-06, "loss": 0.1297, "step": 115680 }, { "epoch": 4.32, "learning_rate": 1.3149758336783764e-06, "loss": 0.1129, "step": 115710 }, { "epoch": 4.32, "learning_rate": 1.314805331432303e-06, "loss": 0.098, "step": 115740 }, { "epoch": 4.32, "learning_rate": 1.3146348954919639e-06, "loss": 0.1029, "step": 115770 }, { "epoch": 4.32, "learning_rate": 1.3144645258143936e-06, "loss": 0.1019, "step": 115800 }, { "epoch": 4.32, "learning_rate": 1.3142942223566676e-06, "loss": 0.1293, "step": 115830 }, { "epoch": 4.32, "learning_rate": 1.3141239850758991e-06, "loss": 0.1258, "step": 115860 }, { "epoch": 4.32, "learning_rate": 1.3139538139292404e-06, "loss": 0.1157, "step": 115890 }, { "epoch": 4.32, "learning_rate": 1.313783708873883e-06, "loss": 0.1181, "step": 115920 }, { "epoch": 4.32, "learning_rate": 1.3136136698670565e-06, "loss": 0.1069, "step": 115950 }, { "epoch": 4.33, "learning_rate": 1.3134436968660303e-06, "loss": 0.1075, "step": 115980 }, { "epoch": 4.33, "learning_rate": 1.3132737898281114e-06, "loss": 0.0945, "step": 116010 }, { "epoch": 4.33, "learning_rate": 1.3131039487106457e-06, "loss": 0.1112, "step": 116040 }, { "epoch": 4.33, "learning_rate": 1.3129341734710184e-06, "loss": 0.1181, "step": 116070 }, { "epoch": 4.33, "learning_rate": 1.3127644640666526e-06, "loss": 0.0994, "step": 116100 }, { "epoch": 4.33, "learning_rate": 1.3125948204550102e-06, "loss": 0.0933, "step": 116130 }, { "epoch": 4.33, "learning_rate": 1.312425242593591e-06, "loss": 0.1108, "step": 116160 }, { "epoch": 4.33, "learning_rate": 1.3122557304399341e-06, "loss": 0.0987, "step": 116190 }, { "epoch": 4.33, "learning_rate": 1.3120862839516165e-06, "loss": 0.1096, "step": 116220 }, { "epoch": 4.34, "learning_rate": 1.3119169030862539e-06, "loss": 0.1133, "step": 116250 }, { "epoch": 4.34, "learning_rate": 1.3117475878014992e-06, "loss": 0.1315, "step": 116280 }, { "epoch": 4.34, "learning_rate": 1.3115783380550451e-06, "loss": 0.1295, "step": 116310 }, { "epoch": 4.34, "learning_rate": 1.311409153804621e-06, "loss": 0.0983, "step": 116340 }, { "epoch": 4.34, "learning_rate": 1.3112400350079959e-06, "loss": 0.1068, "step": 116370 }, { "epoch": 4.34, "learning_rate": 1.3110709816229757e-06, "loss": 0.1147, "step": 116400 }, { "epoch": 4.34, "learning_rate": 1.3109019936074052e-06, "loss": 0.1137, "step": 116430 }, { "epoch": 4.34, "learning_rate": 1.3107330709191668e-06, "loss": 0.1208, "step": 116460 }, { "epoch": 4.34, "learning_rate": 1.310564213516181e-06, "loss": 0.1401, "step": 116490 }, { "epoch": 4.35, "learning_rate": 1.3103954213564058e-06, "loss": 0.1, "step": 116520 }, { "epoch": 4.35, "learning_rate": 1.310226694397838e-06, "loss": 0.0975, "step": 116550 }, { "epoch": 4.35, "learning_rate": 1.3100580325985118e-06, "loss": 0.1046, "step": 116580 }, { "epoch": 4.35, "learning_rate": 1.309889435916499e-06, "loss": 0.1025, "step": 116610 }, { "epoch": 4.35, "learning_rate": 1.3097209043099094e-06, "loss": 0.0954, "step": 116640 }, { "epoch": 4.35, "learning_rate": 1.3095524377368902e-06, "loss": 0.1025, "step": 116670 }, { "epoch": 4.35, "learning_rate": 1.3093840361556264e-06, "loss": 0.1078, "step": 116700 }, { "epoch": 4.35, "learning_rate": 1.3092156995243415e-06, "loss": 0.1226, "step": 116730 }, { "epoch": 4.35, "learning_rate": 1.3090474278012954e-06, "loss": 0.1026, "step": 116760 }, { "epoch": 4.36, "learning_rate": 1.308879220944786e-06, "loss": 0.104, "step": 116790 }, { "epoch": 4.36, "learning_rate": 1.3087110789131483e-06, "loss": 0.0929, "step": 116820 }, { "epoch": 4.36, "learning_rate": 1.3085430016647556e-06, "loss": 0.1205, "step": 116850 }, { "epoch": 4.36, "learning_rate": 1.308374989158018e-06, "loss": 0.1218, "step": 116880 }, { "epoch": 4.36, "learning_rate": 1.3082070413513832e-06, "loss": 0.0964, "step": 116910 }, { "epoch": 4.36, "learning_rate": 1.3080391582033357e-06, "loss": 0.1315, "step": 116940 }, { "epoch": 4.36, "learning_rate": 1.3078713396723986e-06, "loss": 0.1172, "step": 116970 }, { "epoch": 4.36, "learning_rate": 1.3077035857171309e-06, "loss": 0.0963, "step": 117000 }, { "epoch": 4.36, "learning_rate": 1.307535896296129e-06, "loss": 0.1049, "step": 117030 }, { "epoch": 4.37, "learning_rate": 1.3073682713680269e-06, "loss": 0.1218, "step": 117060 }, { "epoch": 4.37, "learning_rate": 1.3072007108914958e-06, "loss": 0.0959, "step": 117090 }, { "epoch": 4.37, "learning_rate": 1.3070332148252434e-06, "loss": 0.105, "step": 117120 }, { "epoch": 4.37, "learning_rate": 1.306865783128015e-06, "loss": 0.1334, "step": 117150 }, { "epoch": 4.37, "learning_rate": 1.3066984157585923e-06, "loss": 0.11, "step": 117180 }, { "epoch": 4.37, "learning_rate": 1.3065311126757948e-06, "loss": 0.098, "step": 117210 }, { "epoch": 4.37, "learning_rate": 1.3063694474317565e-06, "loss": 0.133, "step": 117240 }, { "epoch": 4.37, "learning_rate": 1.3062022706593288e-06, "loss": 0.1123, "step": 117270 }, { "epoch": 4.37, "learning_rate": 1.3060351580515726e-06, "loss": 0.1021, "step": 117300 }, { "epoch": 4.38, "learning_rate": 1.305868109567453e-06, "loss": 0.1113, "step": 117330 }, { "epoch": 4.38, "learning_rate": 1.3057011251659715e-06, "loss": 0.1077, "step": 117360 }, { "epoch": 4.38, "learning_rate": 1.3055342048061668e-06, "loss": 0.1296, "step": 117390 }, { "epoch": 4.38, "learning_rate": 1.305367348447114e-06, "loss": 0.1151, "step": 117420 }, { "epoch": 4.38, "learning_rate": 1.3052005560479244e-06, "loss": 0.1305, "step": 117450 }, { "epoch": 4.38, "learning_rate": 1.305033827567747e-06, "loss": 0.1127, "step": 117480 }, { "epoch": 4.38, "learning_rate": 1.3048671629657663e-06, "loss": 0.1051, "step": 117510 }, { "epoch": 4.38, "learning_rate": 1.3047005622012037e-06, "loss": 0.117, "step": 117540 }, { "epoch": 4.38, "learning_rate": 1.304534025233317e-06, "loss": 0.0994, "step": 117570 }, { "epoch": 4.39, "learning_rate": 1.3043675520214e-06, "loss": 0.1318, "step": 117600 }, { "epoch": 4.39, "learning_rate": 1.3042011425247836e-06, "loss": 0.1427, "step": 117630 }, { "epoch": 4.39, "learning_rate": 1.3040347967028342e-06, "loss": 0.0987, "step": 117660 }, { "epoch": 4.39, "learning_rate": 1.3038685145149548e-06, "loss": 0.1065, "step": 117690 }, { "epoch": 4.39, "learning_rate": 1.3037022959205856e-06, "loss": 0.1003, "step": 117720 }, { "epoch": 4.39, "learning_rate": 1.303536140879201e-06, "loss": 0.1064, "step": 117750 }, { "epoch": 4.39, "learning_rate": 1.303370049350313e-06, "loss": 0.0985, "step": 117780 }, { "epoch": 4.39, "learning_rate": 1.30320402129347e-06, "loss": 0.1175, "step": 117810 }, { "epoch": 4.4, "learning_rate": 1.3030380566682546e-06, "loss": 0.1028, "step": 117840 }, { "epoch": 4.4, "learning_rate": 1.302872155434287e-06, "loss": 0.1041, "step": 117870 }, { "epoch": 4.4, "learning_rate": 1.3027063175512236e-06, "loss": 0.12, "step": 117900 }, { "epoch": 4.4, "learning_rate": 1.3025405429787558e-06, "loss": 0.1162, "step": 117930 }, { "epoch": 4.4, "learning_rate": 1.3023748316766109e-06, "loss": 0.1014, "step": 117960 }, { "epoch": 4.4, "learning_rate": 1.3022091836045526e-06, "loss": 0.1341, "step": 117990 }, { "epoch": 4.4, "learning_rate": 1.3020435987223801e-06, "loss": 0.1052, "step": 118020 }, { "epoch": 4.4, "learning_rate": 1.3018780769899285e-06, "loss": 0.0979, "step": 118050 }, { "epoch": 4.4, "learning_rate": 1.301712618367069e-06, "loss": 0.1356, "step": 118080 }, { "epoch": 4.41, "learning_rate": 1.3015472228137074e-06, "loss": 0.1122, "step": 118110 }, { "epoch": 4.41, "learning_rate": 1.3013818902897862e-06, "loss": 0.1188, "step": 118140 }, { "epoch": 4.41, "learning_rate": 1.3012166207552833e-06, "loss": 0.0868, "step": 118170 }, { "epoch": 4.41, "learning_rate": 1.3010514141702115e-06, "loss": 0.1188, "step": 118200 }, { "epoch": 4.41, "learning_rate": 1.30088627049462e-06, "loss": 0.1018, "step": 118230 }, { "epoch": 4.41, "learning_rate": 1.3007211896885935e-06, "loss": 0.1174, "step": 118260 }, { "epoch": 4.41, "learning_rate": 1.3005561717122512e-06, "loss": 0.0993, "step": 118290 }, { "epoch": 4.41, "learning_rate": 1.3003912165257485e-06, "loss": 0.1044, "step": 118320 }, { "epoch": 4.41, "learning_rate": 1.300226324089276e-06, "loss": 0.1051, "step": 118350 }, { "epoch": 4.42, "learning_rate": 1.3000614943630596e-06, "loss": 0.1175, "step": 118380 }, { "epoch": 4.42, "learning_rate": 1.2998967273073604e-06, "loss": 0.1079, "step": 118410 }, { "epoch": 4.42, "learning_rate": 1.2997320228824746e-06, "loss": 0.088, "step": 118440 }, { "epoch": 4.42, "learning_rate": 1.2995673810487344e-06, "loss": 0.1007, "step": 118470 }, { "epoch": 4.42, "learning_rate": 1.2994028017665058e-06, "loss": 0.1451, "step": 118500 }, { "epoch": 4.42, "learning_rate": 1.2992382849961916e-06, "loss": 0.1068, "step": 118530 }, { "epoch": 4.42, "learning_rate": 1.299073830698228e-06, "loss": 0.0986, "step": 118560 }, { "epoch": 4.42, "learning_rate": 1.2989094388330875e-06, "loss": 0.1178, "step": 118590 }, { "epoch": 4.42, "learning_rate": 1.2987451093612773e-06, "loss": 0.1121, "step": 118620 }, { "epoch": 4.43, "learning_rate": 1.2985808422433387e-06, "loss": 0.1215, "step": 118650 }, { "epoch": 4.43, "learning_rate": 1.2984166374398496e-06, "loss": 0.1104, "step": 118680 }, { "epoch": 4.43, "learning_rate": 1.2982524949114212e-06, "loss": 0.1247, "step": 118710 }, { "epoch": 4.43, "learning_rate": 1.2980884146187004e-06, "loss": 0.1251, "step": 118740 }, { "epoch": 4.43, "learning_rate": 1.297924396522368e-06, "loss": 0.1199, "step": 118770 }, { "epoch": 4.43, "learning_rate": 1.2977604405831415e-06, "loss": 0.1071, "step": 118800 }, { "epoch": 4.43, "learning_rate": 1.297596546761771e-06, "loss": 0.0898, "step": 118830 }, { "epoch": 4.43, "learning_rate": 1.2974327150190424e-06, "loss": 0.1061, "step": 118860 }, { "epoch": 4.43, "learning_rate": 1.2972689453157762e-06, "loss": 0.1134, "step": 118890 }, { "epoch": 4.44, "learning_rate": 1.2971052376128267e-06, "loss": 0.1091, "step": 118920 }, { "epoch": 4.44, "learning_rate": 1.296947045731291e-06, "loss": 0.1343, "step": 118950 }, { "epoch": 4.44, "learning_rate": 1.2967834598482373e-06, "loss": 0.1069, "step": 118980 }, { "epoch": 4.44, "learning_rate": 1.2966199358495734e-06, "loss": 0.1203, "step": 119010 }, { "epoch": 4.44, "learning_rate": 1.2964564736962915e-06, "loss": 0.1226, "step": 119040 }, { "epoch": 4.44, "learning_rate": 1.2962930733494176e-06, "loss": 0.1056, "step": 119070 }, { "epoch": 4.44, "learning_rate": 1.2961297347700125e-06, "loss": 0.1027, "step": 119100 }, { "epoch": 4.44, "learning_rate": 1.2959664579191709e-06, "loss": 0.0915, "step": 119130 }, { "epoch": 4.44, "learning_rate": 1.295803242758022e-06, "loss": 0.0957, "step": 119160 }, { "epoch": 4.45, "learning_rate": 1.2956400892477298e-06, "loss": 0.1198, "step": 119190 }, { "epoch": 4.45, "learning_rate": 1.2954769973494915e-06, "loss": 0.1063, "step": 119220 }, { "epoch": 4.45, "learning_rate": 1.2953139670245392e-06, "loss": 0.0955, "step": 119250 }, { "epoch": 4.45, "learning_rate": 1.2951509982341392e-06, "loss": 0.1139, "step": 119280 }, { "epoch": 4.45, "learning_rate": 1.2949880909395915e-06, "loss": 0.124, "step": 119310 }, { "epoch": 4.45, "learning_rate": 1.2948252451022304e-06, "loss": 0.1036, "step": 119340 }, { "epoch": 4.45, "learning_rate": 1.2946624606834235e-06, "loss": 0.0989, "step": 119370 }, { "epoch": 4.45, "learning_rate": 1.294499737644574e-06, "loss": 0.135, "step": 119400 }, { "epoch": 4.45, "learning_rate": 1.2943370759471175e-06, "loss": 0.1173, "step": 119430 }, { "epoch": 4.46, "learning_rate": 1.2941744755525243e-06, "loss": 0.1293, "step": 119460 }, { "epoch": 4.46, "learning_rate": 1.2940119364222983e-06, "loss": 0.1396, "step": 119490 }, { "epoch": 4.46, "learning_rate": 1.2938494585179772e-06, "loss": 0.1242, "step": 119520 }, { "epoch": 4.46, "learning_rate": 1.293687041801133e-06, "loss": 0.1183, "step": 119550 }, { "epoch": 4.46, "learning_rate": 1.2935246862333709e-06, "loss": 0.1215, "step": 119580 }, { "epoch": 4.46, "learning_rate": 1.2933623917763297e-06, "loss": 0.1166, "step": 119610 }, { "epoch": 4.46, "learning_rate": 1.2932001583916825e-06, "loss": 0.1092, "step": 119640 }, { "epoch": 4.46, "learning_rate": 1.2930379860411354e-06, "loss": 0.1093, "step": 119670 }, { "epoch": 4.46, "learning_rate": 1.292875874686429e-06, "loss": 0.103, "step": 119700 }, { "epoch": 4.47, "learning_rate": 1.2927138242893364e-06, "loss": 0.1017, "step": 119730 }, { "epoch": 4.47, "learning_rate": 1.2925518348116648e-06, "loss": 0.1116, "step": 119760 }, { "epoch": 4.47, "learning_rate": 1.2923899062152547e-06, "loss": 0.1123, "step": 119790 }, { "epoch": 4.47, "learning_rate": 1.2922280384619804e-06, "loss": 0.1173, "step": 119820 }, { "epoch": 4.47, "learning_rate": 1.2920662315137495e-06, "loss": 0.0982, "step": 119850 }, { "epoch": 4.47, "learning_rate": 1.2919044853325025e-06, "loss": 0.1055, "step": 119880 }, { "epoch": 4.47, "learning_rate": 1.291742799880214e-06, "loss": 0.0932, "step": 119910 }, { "epoch": 4.47, "learning_rate": 1.291581175118891e-06, "loss": 0.1237, "step": 119940 }, { "epoch": 4.47, "learning_rate": 1.2914196110105747e-06, "loss": 0.1194, "step": 119970 }, { "epoch": 4.48, "learning_rate": 1.2912581075173393e-06, "loss": 0.1114, "step": 120000 }, { "epoch": 4.48, "learning_rate": 1.2910966646012915e-06, "loss": 0.1114, "step": 120030 }, { "epoch": 4.48, "learning_rate": 1.2909352822245722e-06, "loss": 0.1029, "step": 120060 }, { "epoch": 4.48, "learning_rate": 1.290773960349354e-06, "loss": 0.1043, "step": 120090 }, { "epoch": 4.48, "learning_rate": 1.2906126989378443e-06, "loss": 0.1089, "step": 120120 }, { "epoch": 4.48, "learning_rate": 1.2904514979522822e-06, "loss": 0.09, "step": 120150 }, { "epoch": 4.48, "learning_rate": 1.2902903573549407e-06, "loss": 0.0915, "step": 120180 }, { "epoch": 4.48, "learning_rate": 1.2901292771081256e-06, "loss": 0.0993, "step": 120210 }, { "epoch": 4.48, "learning_rate": 1.2899682571741744e-06, "loss": 0.1236, "step": 120240 }, { "epoch": 4.49, "learning_rate": 1.2898072975154596e-06, "loss": 0.096, "step": 120270 }, { "epoch": 4.49, "learning_rate": 1.289646398094385e-06, "loss": 0.1118, "step": 120300 }, { "epoch": 4.49, "learning_rate": 1.2894855588733873e-06, "loss": 0.1015, "step": 120330 }, { "epoch": 4.49, "learning_rate": 1.2893247798149368e-06, "loss": 0.1416, "step": 120360 }, { "epoch": 4.49, "learning_rate": 1.2891640608815364e-06, "loss": 0.1075, "step": 120390 }, { "epoch": 4.49, "learning_rate": 1.289003402035721e-06, "loss": 0.1143, "step": 120420 }, { "epoch": 4.49, "learning_rate": 1.2888428032400585e-06, "loss": 0.0982, "step": 120450 }, { "epoch": 4.49, "learning_rate": 1.2886822644571501e-06, "loss": 0.1061, "step": 120480 }, { "epoch": 4.49, "learning_rate": 1.2885217856496285e-06, "loss": 0.1185, "step": 120510 }, { "epoch": 4.5, "learning_rate": 1.2883613667801595e-06, "loss": 0.1238, "step": 120540 }, { "epoch": 4.5, "learning_rate": 1.288201007811442e-06, "loss": 0.1035, "step": 120570 }, { "epoch": 4.5, "learning_rate": 1.2880407087062064e-06, "loss": 0.1049, "step": 120600 }, { "epoch": 4.5, "learning_rate": 1.2878804694272156e-06, "loss": 0.1034, "step": 120630 }, { "epoch": 4.5, "learning_rate": 1.2877202899372663e-06, "loss": 0.113, "step": 120660 }, { "epoch": 4.5, "learning_rate": 1.287560170199186e-06, "loss": 0.1191, "step": 120690 }, { "epoch": 4.5, "learning_rate": 1.287400110175835e-06, "loss": 0.1073, "step": 120720 }, { "epoch": 4.5, "learning_rate": 1.2872401098301063e-06, "loss": 0.113, "step": 120750 }, { "epoch": 4.5, "learning_rate": 1.287080169124925e-06, "loss": 0.0863, "step": 120780 }, { "epoch": 4.51, "learning_rate": 1.2869202880232477e-06, "loss": 0.113, "step": 120810 }, { "epoch": 4.51, "learning_rate": 1.2867604664880645e-06, "loss": 0.1062, "step": 120840 }, { "epoch": 4.51, "learning_rate": 1.2866007044823972e-06, "loss": 0.1116, "step": 120870 }, { "epoch": 4.51, "learning_rate": 1.286441001969299e-06, "loss": 0.0937, "step": 120900 }, { "epoch": 4.51, "learning_rate": 1.2862813589118562e-06, "loss": 0.1041, "step": 120930 }, { "epoch": 4.51, "learning_rate": 1.286121775273186e-06, "loss": 0.0942, "step": 120960 }, { "epoch": 4.51, "learning_rate": 1.285962251016439e-06, "loss": 0.1193, "step": 120990 }, { "epoch": 4.51, "learning_rate": 1.2858027861047972e-06, "loss": 0.0999, "step": 121020 }, { "epoch": 4.51, "learning_rate": 1.285643380501474e-06, "loss": 0.1127, "step": 121050 }, { "epoch": 4.52, "learning_rate": 1.2854840341697152e-06, "loss": 0.1086, "step": 121080 }, { "epoch": 4.52, "learning_rate": 1.285324747072799e-06, "loss": 0.1131, "step": 121110 }, { "epoch": 4.52, "learning_rate": 1.2851655191740345e-06, "loss": 0.1009, "step": 121140 }, { "epoch": 4.52, "learning_rate": 1.285006350436763e-06, "loss": 0.119, "step": 121170 }, { "epoch": 4.52, "learning_rate": 1.2848525435259235e-06, "loss": 0.0867, "step": 121200 }, { "epoch": 4.52, "learning_rate": 1.2846934910327693e-06, "loss": 0.1177, "step": 121230 }, { "epoch": 4.52, "learning_rate": 1.2845344975925409e-06, "loss": 0.1146, "step": 121260 }, { "epoch": 4.52, "learning_rate": 1.2843755631687056e-06, "loss": 0.1109, "step": 121290 }, { "epoch": 4.52, "learning_rate": 1.2842166877247616e-06, "loss": 0.1045, "step": 121320 }, { "epoch": 4.53, "learning_rate": 1.28405787122424e-06, "loss": 0.0967, "step": 121350 }, { "epoch": 4.53, "learning_rate": 1.2838991136307022e-06, "loss": 0.1022, "step": 121380 }, { "epoch": 4.53, "learning_rate": 1.283740414907742e-06, "loss": 0.1271, "step": 121410 }, { "epoch": 4.53, "learning_rate": 1.2835817750189845e-06, "loss": 0.1612, "step": 121440 }, { "epoch": 4.53, "learning_rate": 1.2834231939280856e-06, "loss": 0.1181, "step": 121470 }, { "epoch": 4.53, "learning_rate": 1.2832646715987334e-06, "loss": 0.125, "step": 121500 }, { "epoch": 4.53, "learning_rate": 1.283106207994647e-06, "loss": 0.113, "step": 121530 }, { "epoch": 4.53, "learning_rate": 1.2829478030795776e-06, "loss": 0.109, "step": 121560 }, { "epoch": 4.53, "learning_rate": 1.282789456817306e-06, "loss": 0.112, "step": 121590 }, { "epoch": 4.54, "learning_rate": 1.2826311691716465e-06, "loss": 0.1436, "step": 121620 }, { "epoch": 4.54, "learning_rate": 1.282472940106443e-06, "loss": 0.0931, "step": 121650 }, { "epoch": 4.54, "learning_rate": 1.2823147695855707e-06, "loss": 0.1049, "step": 121680 }, { "epoch": 4.54, "learning_rate": 1.282156657572937e-06, "loss": 0.0898, "step": 121710 }, { "epoch": 4.54, "learning_rate": 1.2819986040324803e-06, "loss": 0.0961, "step": 121740 }, { "epoch": 4.54, "learning_rate": 1.2818406089281688e-06, "loss": 0.1009, "step": 121770 }, { "epoch": 4.54, "learning_rate": 1.2816826722240031e-06, "loss": 0.0844, "step": 121800 }, { "epoch": 4.54, "learning_rate": 1.2815247938840142e-06, "loss": 0.1061, "step": 121830 }, { "epoch": 4.54, "learning_rate": 1.2813669738722644e-06, "loss": 0.113, "step": 121860 }, { "epoch": 4.55, "learning_rate": 1.281209212152847e-06, "loss": 0.098, "step": 121890 }, { "epoch": 4.55, "learning_rate": 1.2810515086898859e-06, "loss": 0.1026, "step": 121920 }, { "epoch": 4.55, "learning_rate": 1.2808938634475362e-06, "loss": 0.1044, "step": 121950 }, { "epoch": 4.55, "learning_rate": 1.2807362763899839e-06, "loss": 0.1047, "step": 121980 }, { "epoch": 4.55, "learning_rate": 1.2805787474814456e-06, "loss": 0.1011, "step": 122010 }, { "epoch": 4.55, "learning_rate": 1.280421276686169e-06, "loss": 0.0955, "step": 122040 }, { "epoch": 4.55, "learning_rate": 1.2802638639684322e-06, "loss": 0.0999, "step": 122070 }, { "epoch": 4.55, "learning_rate": 1.280106509292544e-06, "loss": 0.1088, "step": 122100 }, { "epoch": 4.56, "learning_rate": 1.279949212622845e-06, "loss": 0.1164, "step": 122130 }, { "epoch": 4.56, "learning_rate": 1.2797919739237045e-06, "loss": 0.0978, "step": 122160 }, { "epoch": 4.56, "learning_rate": 1.2796347931595244e-06, "loss": 0.1141, "step": 122190 }, { "epoch": 4.56, "learning_rate": 1.2794776702947356e-06, "loss": 0.1212, "step": 122220 }, { "epoch": 4.56, "learning_rate": 1.2793206052938007e-06, "loss": 0.1063, "step": 122250 }, { "epoch": 4.56, "learning_rate": 1.2791635981212126e-06, "loss": 0.1107, "step": 122280 }, { "epoch": 4.56, "learning_rate": 1.2790066487414936e-06, "loss": 0.1031, "step": 122310 }, { "epoch": 4.56, "learning_rate": 1.2788497571191988e-06, "loss": 0.1171, "step": 122340 }, { "epoch": 4.56, "learning_rate": 1.2786929232189112e-06, "loss": 0.1054, "step": 122370 }, { "epoch": 4.57, "learning_rate": 1.2785361470052457e-06, "loss": 0.1144, "step": 122400 }, { "epoch": 4.57, "learning_rate": 1.2783794284428473e-06, "loss": 0.0912, "step": 122430 }, { "epoch": 4.57, "learning_rate": 1.2782227674963907e-06, "loss": 0.0941, "step": 122460 }, { "epoch": 4.57, "learning_rate": 1.2780661641305817e-06, "loss": 0.1035, "step": 122490 }, { "epoch": 4.57, "learning_rate": 1.2779096183101562e-06, "loss": 0.1032, "step": 122520 }, { "epoch": 4.57, "learning_rate": 1.27775312999988e-06, "loss": 0.1078, "step": 122550 }, { "epoch": 4.57, "learning_rate": 1.2775966991645491e-06, "loss": 0.0942, "step": 122580 }, { "epoch": 4.57, "learning_rate": 1.2774403257689902e-06, "loss": 0.1285, "step": 122610 }, { "epoch": 4.57, "learning_rate": 1.2772840097780594e-06, "loss": 0.0951, "step": 122640 }, { "epoch": 4.58, "learning_rate": 1.2771277511566435e-06, "loss": 0.1057, "step": 122670 }, { "epoch": 4.58, "learning_rate": 1.276971549869659e-06, "loss": 0.1358, "step": 122700 }, { "epoch": 4.58, "learning_rate": 1.2768154058820525e-06, "loss": 0.1089, "step": 122730 }, { "epoch": 4.58, "learning_rate": 1.2766593191588008e-06, "loss": 0.0994, "step": 122760 }, { "epoch": 4.58, "learning_rate": 1.2765032896649106e-06, "loss": 0.0995, "step": 122790 }, { "epoch": 4.58, "learning_rate": 1.276347317365418e-06, "loss": 0.1219, "step": 122820 }, { "epoch": 4.58, "learning_rate": 1.2761914022253899e-06, "loss": 0.1226, "step": 122850 }, { "epoch": 4.58, "learning_rate": 1.2760355442099224e-06, "loss": 0.1121, "step": 122880 }, { "epoch": 4.58, "learning_rate": 1.2758797432841417e-06, "loss": 0.1174, "step": 122910 }, { "epoch": 4.59, "learning_rate": 1.275723999413203e-06, "loss": 0.117, "step": 122940 }, { "epoch": 4.59, "learning_rate": 1.2755683125622934e-06, "loss": 0.1104, "step": 122970 }, { "epoch": 4.59, "learning_rate": 1.2754126826966274e-06, "loss": 0.122, "step": 123000 }, { "epoch": 4.59, "learning_rate": 1.2752571097814504e-06, "loss": 0.1165, "step": 123030 }, { "epoch": 4.59, "learning_rate": 1.2751015937820368e-06, "loss": 0.1147, "step": 123060 }, { "epoch": 4.59, "learning_rate": 1.2749461346636915e-06, "loss": 0.1127, "step": 123090 }, { "epoch": 4.59, "learning_rate": 1.2747907323917488e-06, "loss": 0.1077, "step": 123120 }, { "epoch": 4.59, "learning_rate": 1.2746353869315714e-06, "loss": 0.1128, "step": 123150 }, { "epoch": 4.59, "learning_rate": 1.274480098248553e-06, "loss": 0.109, "step": 123180 }, { "epoch": 4.6, "learning_rate": 1.2743248663081165e-06, "loss": 0.1067, "step": 123210 }, { "epoch": 4.6, "learning_rate": 1.2741696910757134e-06, "loss": 0.1056, "step": 123240 }, { "epoch": 4.6, "learning_rate": 1.2740145725168258e-06, "loss": 0.1165, "step": 123270 }, { "epoch": 4.6, "learning_rate": 1.2738595105969641e-06, "loss": 0.1169, "step": 123300 }, { "epoch": 4.6, "learning_rate": 1.2737045052816693e-06, "loss": 0.1161, "step": 123330 }, { "epoch": 4.6, "learning_rate": 1.2735495565365108e-06, "loss": 0.1099, "step": 123360 }, { "epoch": 4.6, "learning_rate": 1.2733946643270871e-06, "loss": 0.1046, "step": 123390 }, { "epoch": 4.6, "learning_rate": 1.2732398286190276e-06, "loss": 0.11, "step": 123420 }, { "epoch": 4.6, "learning_rate": 1.2730850493779893e-06, "loss": 0.1129, "step": 123450 }, { "epoch": 4.61, "learning_rate": 1.2729303265696585e-06, "loss": 0.0978, "step": 123480 }, { "epoch": 4.61, "learning_rate": 1.2727756601597516e-06, "loss": 0.1122, "step": 123510 }, { "epoch": 4.61, "learning_rate": 1.2726210501140138e-06, "loss": 0.1107, "step": 123540 }, { "epoch": 4.61, "learning_rate": 1.272466496398219e-06, "loss": 0.1242, "step": 123570 }, { "epoch": 4.61, "learning_rate": 1.2723119989781708e-06, "loss": 0.1147, "step": 123600 }, { "epoch": 4.61, "learning_rate": 1.2721575578197013e-06, "loss": 0.1031, "step": 123630 }, { "epoch": 4.61, "learning_rate": 1.2720031728886717e-06, "loss": 0.118, "step": 123660 }, { "epoch": 4.61, "learning_rate": 1.271848844150973e-06, "loss": 0.1143, "step": 123690 }, { "epoch": 4.61, "learning_rate": 1.2716945715725241e-06, "loss": 0.1111, "step": 123720 }, { "epoch": 4.62, "learning_rate": 1.2715403551192732e-06, "loss": 0.0922, "step": 123750 }, { "epoch": 4.62, "learning_rate": 1.2713861947571978e-06, "loss": 0.1152, "step": 123780 }, { "epoch": 4.62, "learning_rate": 1.2712320904523037e-06, "loss": 0.1104, "step": 123810 }, { "epoch": 4.62, "learning_rate": 1.2710780421706256e-06, "loss": 0.1041, "step": 123840 }, { "epoch": 4.62, "learning_rate": 1.270924049878228e-06, "loss": 0.1147, "step": 123870 }, { "epoch": 4.62, "learning_rate": 1.2707701135412023e-06, "loss": 0.1253, "step": 123900 }, { "epoch": 4.62, "learning_rate": 1.27061623312567e-06, "loss": 0.0983, "step": 123930 }, { "epoch": 4.62, "learning_rate": 1.2704624085977816e-06, "loss": 0.1467, "step": 123960 }, { "epoch": 4.62, "learning_rate": 1.2703086399237149e-06, "loss": 0.1096, "step": 123990 }, { "epoch": 4.63, "learning_rate": 1.2701549270696776e-06, "loss": 0.1055, "step": 124020 }, { "epoch": 4.63, "learning_rate": 1.270001270001905e-06, "loss": 0.0958, "step": 124050 }, { "epoch": 4.63, "learning_rate": 1.2698476686866623e-06, "loss": 0.1173, "step": 124080 }, { "epoch": 4.63, "learning_rate": 1.2696941230902421e-06, "loss": 0.1011, "step": 124110 }, { "epoch": 4.63, "learning_rate": 1.2695406331789656e-06, "loss": 0.0969, "step": 124140 }, { "epoch": 4.63, "learning_rate": 1.2693871989191832e-06, "loss": 0.1129, "step": 124170 }, { "epoch": 4.63, "learning_rate": 1.2692338202772736e-06, "loss": 0.0921, "step": 124200 }, { "epoch": 4.63, "learning_rate": 1.2690804972196432e-06, "loss": 0.1043, "step": 124230 }, { "epoch": 4.63, "learning_rate": 1.2689272297127274e-06, "loss": 0.1239, "step": 124260 }, { "epoch": 4.64, "learning_rate": 1.26877401772299e-06, "loss": 0.1071, "step": 124290 }, { "epoch": 4.64, "learning_rate": 1.2686208612169226e-06, "loss": 0.106, "step": 124320 }, { "epoch": 4.64, "learning_rate": 1.268467760161046e-06, "loss": 0.0871, "step": 124350 }, { "epoch": 4.64, "learning_rate": 1.2683147145219085e-06, "loss": 0.0989, "step": 124380 }, { "epoch": 4.64, "learning_rate": 1.268161724266087e-06, "loss": 0.1087, "step": 124410 }, { "epoch": 4.64, "learning_rate": 1.2680138862989871e-06, "loss": 0.1111, "step": 124440 }, { "epoch": 4.64, "learning_rate": 1.2678610048662932e-06, "loss": 0.1169, "step": 124470 }, { "epoch": 4.64, "learning_rate": 1.2677081787179258e-06, "loss": 0.1131, "step": 124500 }, { "epoch": 4.64, "learning_rate": 1.2675554078205732e-06, "loss": 0.0931, "step": 124530 }, { "epoch": 4.65, "learning_rate": 1.2674026921409524e-06, "loss": 0.1193, "step": 124560 }, { "epoch": 4.65, "learning_rate": 1.267250031645808e-06, "loss": 0.1002, "step": 124590 }, { "epoch": 4.65, "learning_rate": 1.267097426301913e-06, "loss": 0.1363, "step": 124620 }, { "epoch": 4.65, "learning_rate": 1.2669448760760672e-06, "loss": 0.1128, "step": 124650 }, { "epoch": 4.65, "learning_rate": 1.2667923809351006e-06, "loss": 0.1391, "step": 124680 }, { "epoch": 4.65, "learning_rate": 1.2666399408458692e-06, "loss": 0.1102, "step": 124710 }, { "epoch": 4.65, "learning_rate": 1.2664875557752578e-06, "loss": 0.1066, "step": 124740 }, { "epoch": 4.65, "learning_rate": 1.2663352256901788e-06, "loss": 0.093, "step": 124770 }, { "epoch": 4.65, "learning_rate": 1.2661829505575724e-06, "loss": 0.1056, "step": 124800 }, { "epoch": 4.66, "learning_rate": 1.266030730344407e-06, "loss": 0.1005, "step": 124830 }, { "epoch": 4.66, "learning_rate": 1.265878565017678e-06, "loss": 0.1037, "step": 124860 }, { "epoch": 4.66, "learning_rate": 1.2657264545444095e-06, "loss": 0.1174, "step": 124890 }, { "epoch": 4.66, "learning_rate": 1.2655743988916528e-06, "loss": 0.1028, "step": 124920 }, { "epoch": 4.66, "learning_rate": 1.2654223980264873e-06, "loss": 0.1058, "step": 124950 }, { "epoch": 4.66, "learning_rate": 1.2652704519160192e-06, "loss": 0.1087, "step": 124980 }, { "epoch": 4.66, "learning_rate": 1.2651185605273832e-06, "loss": 0.1167, "step": 125010 }, { "epoch": 4.66, "learning_rate": 1.2649667238277418e-06, "loss": 0.1114, "step": 125040 }, { "epoch": 4.66, "learning_rate": 1.2648149417842841e-06, "loss": 0.088, "step": 125070 }, { "epoch": 4.67, "learning_rate": 1.2646632143642272e-06, "loss": 0.1024, "step": 125100 }, { "epoch": 4.67, "learning_rate": 1.2645115415348164e-06, "loss": 0.091, "step": 125130 }, { "epoch": 4.67, "learning_rate": 1.2643599232633234e-06, "loss": 0.0915, "step": 125160 }, { "epoch": 4.67, "learning_rate": 1.2642083595170476e-06, "loss": 0.0983, "step": 125190 }, { "epoch": 4.67, "learning_rate": 1.2640568502633168e-06, "loss": 0.1039, "step": 125220 }, { "epoch": 4.67, "learning_rate": 1.263905395469485e-06, "loss": 0.1241, "step": 125250 }, { "epoch": 4.67, "learning_rate": 1.263753995102934e-06, "loss": 0.113, "step": 125280 }, { "epoch": 4.67, "learning_rate": 1.2636026491310733e-06, "loss": 0.1267, "step": 125310 }, { "epoch": 4.67, "learning_rate": 1.263451357521339e-06, "loss": 0.1088, "step": 125340 }, { "epoch": 4.68, "learning_rate": 1.2633001202411954e-06, "loss": 0.1209, "step": 125370 }, { "epoch": 4.68, "learning_rate": 1.2631489372581335e-06, "loss": 0.1221, "step": 125400 }, { "epoch": 4.68, "learning_rate": 1.2630028452896997e-06, "loss": 0.1268, "step": 125430 }, { "epoch": 4.68, "learning_rate": 1.2628517689961675e-06, "loss": 0.1132, "step": 125460 }, { "epoch": 4.68, "learning_rate": 1.2627007469034332e-06, "loss": 0.1092, "step": 125490 }, { "epoch": 4.68, "learning_rate": 1.2625497789790967e-06, "loss": 0.0943, "step": 125520 }, { "epoch": 4.68, "learning_rate": 1.2623988651907834e-06, "loss": 0.1242, "step": 125550 }, { "epoch": 4.68, "learning_rate": 1.2622480055061464e-06, "loss": 0.1133, "step": 125580 }, { "epoch": 4.68, "learning_rate": 1.262097199892866e-06, "loss": 0.1141, "step": 125610 }, { "epoch": 4.69, "learning_rate": 1.2619464483186494e-06, "loss": 0.1023, "step": 125640 }, { "epoch": 4.69, "learning_rate": 1.2617957507512305e-06, "loss": 0.0932, "step": 125670 }, { "epoch": 4.69, "learning_rate": 1.2616451071583708e-06, "loss": 0.1173, "step": 125700 }, { "epoch": 4.69, "learning_rate": 1.2614945175078577e-06, "loss": 0.1018, "step": 125730 }, { "epoch": 4.69, "learning_rate": 1.2613439817675066e-06, "loss": 0.1226, "step": 125760 }, { "epoch": 4.69, "learning_rate": 1.2611934999051594e-06, "loss": 0.0986, "step": 125790 }, { "epoch": 4.69, "learning_rate": 1.2610430718886849e-06, "loss": 0.1193, "step": 125820 }, { "epoch": 4.69, "learning_rate": 1.260892697685978e-06, "loss": 0.0996, "step": 125850 }, { "epoch": 4.69, "learning_rate": 1.2607423772649616e-06, "loss": 0.0975, "step": 125880 }, { "epoch": 4.7, "learning_rate": 1.2605921105935848e-06, "loss": 0.1183, "step": 125910 }, { "epoch": 4.7, "learning_rate": 1.2604418976398225e-06, "loss": 0.1177, "step": 125940 }, { "epoch": 4.7, "learning_rate": 1.2602917383716784e-06, "loss": 0.1219, "step": 125970 }, { "epoch": 4.7, "learning_rate": 1.2601416327571808e-06, "loss": 0.1232, "step": 126000 }, { "epoch": 4.7, "learning_rate": 1.2599915807643858e-06, "loss": 0.108, "step": 126030 }, { "epoch": 4.7, "learning_rate": 1.2598415823613763e-06, "loss": 0.1135, "step": 126060 }, { "epoch": 4.7, "learning_rate": 1.2596916375162605e-06, "loss": 0.1191, "step": 126090 }, { "epoch": 4.7, "learning_rate": 1.2595417461971747e-06, "loss": 0.1177, "step": 126120 }, { "epoch": 4.7, "learning_rate": 1.2593919083722806e-06, "loss": 0.1073, "step": 126150 }, { "epoch": 4.71, "learning_rate": 1.2592421240097672e-06, "loss": 0.1122, "step": 126180 }, { "epoch": 4.71, "learning_rate": 1.2590923930778489e-06, "loss": 0.1189, "step": 126210 }, { "epoch": 4.71, "learning_rate": 1.2589427155447684e-06, "loss": 0.1024, "step": 126240 }, { "epoch": 4.71, "learning_rate": 1.2587930913787927e-06, "loss": 0.115, "step": 126270 }, { "epoch": 4.71, "learning_rate": 1.2586435205482167e-06, "loss": 0.0978, "step": 126300 }, { "epoch": 4.71, "learning_rate": 1.258494003021361e-06, "loss": 0.109, "step": 126330 }, { "epoch": 4.71, "learning_rate": 1.2583445387665727e-06, "loss": 0.1314, "step": 126360 }, { "epoch": 4.71, "learning_rate": 1.2581951277522253e-06, "loss": 0.1095, "step": 126390 }, { "epoch": 4.72, "learning_rate": 1.2580457699467181e-06, "loss": 0.1293, "step": 126420 }, { "epoch": 4.72, "learning_rate": 1.2578964653184774e-06, "loss": 0.1144, "step": 126450 }, { "epoch": 4.72, "learning_rate": 1.2577472138359553e-06, "loss": 0.1177, "step": 126480 }, { "epoch": 4.72, "learning_rate": 1.25759801546763e-06, "loss": 0.0867, "step": 126510 }, { "epoch": 4.72, "learning_rate": 1.2574488701820063e-06, "loss": 0.0993, "step": 126540 }, { "epoch": 4.72, "learning_rate": 1.2572997779476142e-06, "loss": 0.1046, "step": 126570 }, { "epoch": 4.72, "learning_rate": 1.2571507387330115e-06, "loss": 0.1035, "step": 126600 }, { "epoch": 4.72, "learning_rate": 1.2570017525067802e-06, "loss": 0.1182, "step": 126630 }, { "epoch": 4.72, "learning_rate": 1.2568528192375293e-06, "loss": 0.0898, "step": 126660 }, { "epoch": 4.73, "learning_rate": 1.256703938893894e-06, "loss": 0.1042, "step": 126690 }, { "epoch": 4.73, "learning_rate": 1.2565551114445355e-06, "loss": 0.1312, "step": 126720 }, { "epoch": 4.73, "learning_rate": 1.25640633685814e-06, "loss": 0.1011, "step": 126750 }, { "epoch": 4.73, "learning_rate": 1.2562576151034208e-06, "loss": 0.0966, "step": 126780 }, { "epoch": 4.73, "learning_rate": 1.2561089461491165e-06, "loss": 0.1087, "step": 126810 }, { "epoch": 4.73, "learning_rate": 1.2559603299639922e-06, "loss": 0.1019, "step": 126840 }, { "epoch": 4.73, "learning_rate": 1.255811766516838e-06, "loss": 0.1032, "step": 126870 }, { "epoch": 4.73, "learning_rate": 1.25566325577647e-06, "loss": 0.1446, "step": 126900 }, { "epoch": 4.73, "learning_rate": 1.255514797711731e-06, "loss": 0.085, "step": 126930 }, { "epoch": 4.74, "learning_rate": 1.2553663922914887e-06, "loss": 0.1077, "step": 126960 }, { "epoch": 4.74, "learning_rate": 1.2552180394846366e-06, "loss": 0.1019, "step": 126990 }, { "epoch": 4.74, "learning_rate": 1.2550697392600945e-06, "loss": 0.1007, "step": 127020 }, { "epoch": 4.74, "learning_rate": 1.2549214915868075e-06, "loss": 0.1203, "step": 127050 }, { "epoch": 4.74, "learning_rate": 1.254773296433746e-06, "loss": 0.1135, "step": 127080 }, { "epoch": 4.74, "learning_rate": 1.2546251537699069e-06, "loss": 0.0953, "step": 127110 }, { "epoch": 4.74, "learning_rate": 1.2544770635643121e-06, "loss": 0.1147, "step": 127140 }, { "epoch": 4.74, "learning_rate": 1.254329025786009e-06, "loss": 0.1283, "step": 127170 }, { "epoch": 4.74, "learning_rate": 1.254181040404071e-06, "loss": 0.1249, "step": 127200 }, { "epoch": 4.75, "learning_rate": 1.2540331073875973e-06, "loss": 0.1066, "step": 127230 }, { "epoch": 4.75, "learning_rate": 1.2538852267057116e-06, "loss": 0.0965, "step": 127260 }, { "epoch": 4.75, "learning_rate": 1.2537373983275636e-06, "loss": 0.1096, "step": 127290 }, { "epoch": 4.75, "learning_rate": 1.2535896222223288e-06, "loss": 0.1008, "step": 127320 }, { "epoch": 4.75, "learning_rate": 1.2534418983592075e-06, "loss": 0.0941, "step": 127350 }, { "epoch": 4.75, "learning_rate": 1.2532942267074264e-06, "loss": 0.0951, "step": 127380 }, { "epoch": 4.75, "learning_rate": 1.2531466072362361e-06, "loss": 0.1065, "step": 127410 }, { "epoch": 4.75, "learning_rate": 1.2529990399149137e-06, "loss": 0.1239, "step": 127440 }, { "epoch": 4.75, "learning_rate": 1.2528515247127609e-06, "loss": 0.1015, "step": 127470 }, { "epoch": 4.76, "learning_rate": 1.2527040615991059e-06, "loss": 0.0923, "step": 127500 }, { "epoch": 4.76, "learning_rate": 1.2525566505433004e-06, "loss": 0.1019, "step": 127530 }, { "epoch": 4.76, "learning_rate": 1.2524092915147229e-06, "loss": 0.1318, "step": 127560 }, { "epoch": 4.76, "learning_rate": 1.252261984482776e-06, "loss": 0.0982, "step": 127590 }, { "epoch": 4.76, "learning_rate": 1.252114729416888e-06, "loss": 0.1154, "step": 127620 }, { "epoch": 4.76, "learning_rate": 1.2519675262865127e-06, "loss": 0.1057, "step": 127650 }, { "epoch": 4.76, "learning_rate": 1.2518203750611286e-06, "loss": 0.1046, "step": 127680 }, { "epoch": 4.76, "learning_rate": 1.2516732757102388e-06, "loss": 0.1086, "step": 127710 }, { "epoch": 4.76, "learning_rate": 1.2515262282033725e-06, "loss": 0.1039, "step": 127740 }, { "epoch": 4.77, "learning_rate": 1.2513792325100835e-06, "loss": 0.1005, "step": 127770 }, { "epoch": 4.77, "learning_rate": 1.2512322885999503e-06, "loss": 0.1234, "step": 127800 }, { "epoch": 4.77, "learning_rate": 1.251085396442577e-06, "loss": 0.1053, "step": 127830 }, { "epoch": 4.77, "learning_rate": 1.250938556007592e-06, "loss": 0.1155, "step": 127860 }, { "epoch": 4.77, "learning_rate": 1.2507917672646498e-06, "loss": 0.1063, "step": 127890 }, { "epoch": 4.77, "learning_rate": 1.2506450301834284e-06, "loss": 0.096, "step": 127920 }, { "epoch": 4.77, "learning_rate": 1.2504983447336312e-06, "loss": 0.1392, "step": 127950 }, { "epoch": 4.77, "learning_rate": 1.2503517108849869e-06, "loss": 0.0902, "step": 127980 }, { "epoch": 4.77, "learning_rate": 1.2502051286072488e-06, "loss": 0.1048, "step": 128010 }, { "epoch": 4.78, "learning_rate": 1.2500585978701952e-06, "loss": 0.1352, "step": 128040 }, { "epoch": 4.78, "learning_rate": 1.2499121186436284e-06, "loss": 0.103, "step": 128070 }, { "epoch": 4.78, "learning_rate": 1.249765690897376e-06, "loss": 0.1082, "step": 128100 }, { "epoch": 4.78, "learning_rate": 1.2496193146012914e-06, "loss": 0.1334, "step": 128130 }, { "epoch": 4.78, "learning_rate": 1.2494729897252504e-06, "loss": 0.1062, "step": 128160 }, { "epoch": 4.78, "learning_rate": 1.2493267162391553e-06, "loss": 0.1236, "step": 128190 }, { "epoch": 4.78, "learning_rate": 1.2491804941129326e-06, "loss": 0.0906, "step": 128220 }, { "epoch": 4.78, "learning_rate": 1.2490343233165332e-06, "loss": 0.1046, "step": 128250 }, { "epoch": 4.78, "learning_rate": 1.2488882038199327e-06, "loss": 0.1001, "step": 128280 }, { "epoch": 4.79, "learning_rate": 1.2487421355931315e-06, "loss": 0.1148, "step": 128310 }, { "epoch": 4.79, "learning_rate": 1.2485961186061542e-06, "loss": 0.0994, "step": 128340 }, { "epoch": 4.79, "learning_rate": 1.2484501528290504e-06, "loss": 0.1428, "step": 128370 }, { "epoch": 4.79, "learning_rate": 1.2483042382318938e-06, "loss": 0.1143, "step": 128400 }, { "epoch": 4.79, "learning_rate": 1.2481583747847825e-06, "loss": 0.1171, "step": 128430 }, { "epoch": 4.79, "learning_rate": 1.2480125624578395e-06, "loss": 0.1124, "step": 128460 }, { "epoch": 4.79, "learning_rate": 1.2478668012212119e-06, "loss": 0.0794, "step": 128490 }, { "epoch": 4.79, "learning_rate": 1.247721091045071e-06, "loss": 0.1156, "step": 128520 }, { "epoch": 4.79, "learning_rate": 1.2475754318996134e-06, "loss": 0.095, "step": 128550 }, { "epoch": 4.8, "learning_rate": 1.2474298237550587e-06, "loss": 0.111, "step": 128580 }, { "epoch": 4.8, "learning_rate": 1.2472842665816518e-06, "loss": 0.0807, "step": 128610 }, { "epoch": 4.8, "learning_rate": 1.247138760349662e-06, "loss": 0.0867, "step": 128640 }, { "epoch": 4.8, "learning_rate": 1.2469933050293821e-06, "loss": 0.1201, "step": 128670 }, { "epoch": 4.8, "learning_rate": 1.2468479005911294e-06, "loss": 0.0993, "step": 128700 }, { "epoch": 4.8, "learning_rate": 1.246702547005246e-06, "loss": 0.0974, "step": 128730 }, { "epoch": 4.8, "learning_rate": 1.2465572442420977e-06, "loss": 0.122, "step": 128760 }, { "epoch": 4.8, "learning_rate": 1.2464119922720743e-06, "loss": 0.1179, "step": 128790 }, { "epoch": 4.8, "learning_rate": 1.2462667910655897e-06, "loss": 0.1133, "step": 128820 }, { "epoch": 4.81, "learning_rate": 1.2461216405930831e-06, "loss": 0.1158, "step": 128850 }, { "epoch": 4.81, "learning_rate": 1.245976540825016e-06, "loss": 0.1173, "step": 128880 }, { "epoch": 4.81, "learning_rate": 1.2458314917318756e-06, "loss": 0.1182, "step": 128910 }, { "epoch": 4.81, "learning_rate": 1.2456864932841722e-06, "loss": 0.0994, "step": 128940 }, { "epoch": 4.81, "learning_rate": 1.24554154545244e-06, "loss": 0.1251, "step": 128970 }, { "epoch": 4.81, "learning_rate": 1.2453966482072375e-06, "loss": 0.1237, "step": 129000 }, { "epoch": 4.81, "learning_rate": 1.2452518015191477e-06, "loss": 0.1108, "step": 129030 }, { "epoch": 4.81, "learning_rate": 1.2451070053587767e-06, "loss": 0.1188, "step": 129060 }, { "epoch": 4.81, "learning_rate": 1.2449622596967553e-06, "loss": 0.0938, "step": 129090 }, { "epoch": 4.82, "learning_rate": 1.244817564503737e-06, "loss": 0.1308, "step": 129120 }, { "epoch": 4.82, "learning_rate": 1.2446729197504007e-06, "loss": 0.0957, "step": 129150 }, { "epoch": 4.82, "learning_rate": 1.2445283254074479e-06, "loss": 0.1108, "step": 129180 }, { "epoch": 4.82, "learning_rate": 1.2443837814456042e-06, "loss": 0.1098, "step": 129210 }, { "epoch": 4.82, "learning_rate": 1.24424410347837e-06, "loss": 0.0849, "step": 129240 }, { "epoch": 4.82, "learning_rate": 1.2440996585140674e-06, "loss": 0.1112, "step": 129270 }, { "epoch": 4.82, "learning_rate": 1.2439552638441673e-06, "loss": 0.1154, "step": 129300 }, { "epoch": 4.82, "learning_rate": 1.24381091943949e-06, "loss": 0.1224, "step": 129330 }, { "epoch": 4.82, "learning_rate": 1.2436666252708784e-06, "loss": 0.1096, "step": 129360 }, { "epoch": 4.83, "learning_rate": 1.2435223813092007e-06, "loss": 0.0951, "step": 129390 }, { "epoch": 4.83, "learning_rate": 1.2433781875253476e-06, "loss": 0.1117, "step": 129420 }, { "epoch": 4.83, "learning_rate": 1.2432340438902337e-06, "loss": 0.1054, "step": 129450 }, { "epoch": 4.83, "learning_rate": 1.2430899503747977e-06, "loss": 0.114, "step": 129480 }, { "epoch": 4.83, "learning_rate": 1.242945906950001e-06, "loss": 0.147, "step": 129510 }, { "epoch": 4.83, "learning_rate": 1.2428019135868293e-06, "loss": 0.1143, "step": 129540 }, { "epoch": 4.83, "learning_rate": 1.2426579702562912e-06, "loss": 0.1192, "step": 129570 }, { "epoch": 4.83, "learning_rate": 1.2425140769294192e-06, "loss": 0.098, "step": 129600 }, { "epoch": 4.83, "learning_rate": 1.2423702335772691e-06, "loss": 0.0906, "step": 129630 }, { "epoch": 4.84, "learning_rate": 1.2422264401709206e-06, "loss": 0.0879, "step": 129660 }, { "epoch": 4.84, "learning_rate": 1.2420826966814762e-06, "loss": 0.1079, "step": 129690 }, { "epoch": 4.84, "learning_rate": 1.2419390030800618e-06, "loss": 0.0955, "step": 129720 }, { "epoch": 4.84, "learning_rate": 1.2417953593378274e-06, "loss": 0.0878, "step": 129750 }, { "epoch": 4.84, "learning_rate": 1.2416517654259457e-06, "loss": 0.1031, "step": 129780 }, { "epoch": 4.84, "learning_rate": 1.241508221315613e-06, "loss": 0.0887, "step": 129810 }, { "epoch": 4.84, "learning_rate": 1.2413647269780489e-06, "loss": 0.1279, "step": 129840 }, { "epoch": 4.84, "learning_rate": 1.2412212823844956e-06, "loss": 0.1016, "step": 129870 }, { "epoch": 4.84, "learning_rate": 1.2410778875062197e-06, "loss": 0.1015, "step": 129900 }, { "epoch": 4.85, "learning_rate": 1.2409345423145103e-06, "loss": 0.1133, "step": 129930 }, { "epoch": 4.85, "learning_rate": 1.2407912467806803e-06, "loss": 0.1121, "step": 129960 }, { "epoch": 4.85, "learning_rate": 1.2406480008760647e-06, "loss": 0.1025, "step": 129990 }, { "epoch": 4.85, "learning_rate": 1.240504804572023e-06, "loss": 0.0929, "step": 130020 }, { "epoch": 4.85, "learning_rate": 1.2403616578399369e-06, "loss": 0.1031, "step": 130050 }, { "epoch": 4.85, "learning_rate": 1.2402185606512112e-06, "loss": 0.0915, "step": 130080 }, { "epoch": 4.85, "learning_rate": 1.2400755129772745e-06, "loss": 0.1082, "step": 130110 }, { "epoch": 4.85, "learning_rate": 1.2399325147895776e-06, "loss": 0.1257, "step": 130140 }, { "epoch": 4.85, "learning_rate": 1.2397895660595954e-06, "loss": 0.1182, "step": 130170 }, { "epoch": 4.86, "learning_rate": 1.2396466667588247e-06, "loss": 0.1072, "step": 130200 }, { "epoch": 4.86, "learning_rate": 1.2395038168587853e-06, "loss": 0.1104, "step": 130230 }, { "epoch": 4.86, "learning_rate": 1.239361016331022e-06, "loss": 0.0867, "step": 130260 }, { "epoch": 4.86, "learning_rate": 1.2392182651470993e-06, "loss": 0.118, "step": 130290 }, { "epoch": 4.86, "learning_rate": 1.2390755632786075e-06, "loss": 0.1032, "step": 130320 }, { "epoch": 4.86, "learning_rate": 1.238932910697158e-06, "loss": 0.1065, "step": 130350 }, { "epoch": 4.86, "learning_rate": 1.2387903073743856e-06, "loss": 0.1119, "step": 130380 }, { "epoch": 4.86, "learning_rate": 1.2386477532819482e-06, "loss": 0.1183, "step": 130410 }, { "epoch": 4.86, "learning_rate": 1.2385052483915269e-06, "loss": 0.1116, "step": 130440 }, { "epoch": 4.87, "learning_rate": 1.2383627926748244e-06, "loss": 0.1086, "step": 130470 }, { "epoch": 4.87, "learning_rate": 1.2382203861035668e-06, "loss": 0.1153, "step": 130500 }, { "epoch": 4.87, "learning_rate": 1.2380780286495036e-06, "loss": 0.1252, "step": 130530 }, { "epoch": 4.87, "learning_rate": 1.2379357202844055e-06, "loss": 0.1114, "step": 130560 }, { "epoch": 4.87, "learning_rate": 1.2377934609800675e-06, "loss": 0.1164, "step": 130590 }, { "epoch": 4.87, "learning_rate": 1.2376512507083065e-06, "loss": 0.1095, "step": 130620 }, { "epoch": 4.87, "learning_rate": 1.2375090894409624e-06, "loss": 0.0931, "step": 130650 }, { "epoch": 4.87, "learning_rate": 1.237366977149897e-06, "loss": 0.093, "step": 130680 }, { "epoch": 4.88, "learning_rate": 1.2372249138069957e-06, "loss": 0.122, "step": 130710 }, { "epoch": 4.88, "learning_rate": 1.2370828993841653e-06, "loss": 0.1082, "step": 130740 }, { "epoch": 4.88, "learning_rate": 1.2369409338533365e-06, "loss": 0.1006, "step": 130770 }, { "epoch": 4.88, "learning_rate": 1.2367990171864617e-06, "loss": 0.0942, "step": 130800 }, { "epoch": 4.88, "learning_rate": 1.2366571493555165e-06, "loss": 0.1071, "step": 130830 }, { "epoch": 4.88, "learning_rate": 1.2365153303324978e-06, "loss": 0.1091, "step": 130860 }, { "epoch": 4.88, "learning_rate": 1.236373560089426e-06, "loss": 0.0992, "step": 130890 }, { "epoch": 4.88, "learning_rate": 1.2362318385983439e-06, "loss": 0.1205, "step": 130920 }, { "epoch": 4.88, "learning_rate": 1.2360901658313162e-06, "loss": 0.1319, "step": 130950 }, { "epoch": 4.89, "learning_rate": 1.2359485417604307e-06, "loss": 0.1027, "step": 130980 }, { "epoch": 4.89, "learning_rate": 1.2358069663577966e-06, "loss": 0.1099, "step": 131010 }, { "epoch": 4.89, "learning_rate": 1.2356654395955465e-06, "loss": 0.1503, "step": 131040 }, { "epoch": 4.89, "learning_rate": 1.2355239614458344e-06, "loss": 0.1108, "step": 131070 }, { "epoch": 4.89, "learning_rate": 1.2353825318808377e-06, "loss": 0.1128, "step": 131100 }, { "epoch": 4.89, "learning_rate": 1.235241150872755e-06, "loss": 0.1135, "step": 131130 }, { "epoch": 4.89, "learning_rate": 1.2350998183938077e-06, "loss": 0.1158, "step": 131160 }, { "epoch": 4.89, "learning_rate": 1.2349585344162398e-06, "loss": 0.0993, "step": 131190 }, { "epoch": 4.89, "learning_rate": 1.2348172989123166e-06, "loss": 0.1053, "step": 131220 }, { "epoch": 4.9, "learning_rate": 1.2346761118543262e-06, "loss": 0.1223, "step": 131250 }, { "epoch": 4.9, "learning_rate": 1.2345349732145788e-06, "loss": 0.1265, "step": 131280 }, { "epoch": 4.9, "learning_rate": 1.2343938829654069e-06, "loss": 0.0982, "step": 131310 }, { "epoch": 4.9, "learning_rate": 1.2342528410791647e-06, "loss": 0.0982, "step": 131340 }, { "epoch": 4.9, "learning_rate": 1.234111847528229e-06, "loss": 0.1148, "step": 131370 }, { "epoch": 4.9, "learning_rate": 1.2339709022849983e-06, "loss": 0.0997, "step": 131400 }, { "epoch": 4.9, "learning_rate": 1.2338300053218935e-06, "loss": 0.1387, "step": 131430 }, { "epoch": 4.9, "learning_rate": 1.2336891566113572e-06, "loss": 0.1052, "step": 131460 }, { "epoch": 4.9, "learning_rate": 1.233548356125854e-06, "loss": 0.1157, "step": 131490 }, { "epoch": 4.91, "learning_rate": 1.2334076038378707e-06, "loss": 0.1167, "step": 131520 }, { "epoch": 4.91, "learning_rate": 1.2332668997199166e-06, "loss": 0.0956, "step": 131550 }, { "epoch": 4.91, "learning_rate": 1.2331262437445215e-06, "loss": 0.1308, "step": 131580 }, { "epoch": 4.91, "learning_rate": 1.2329856358842388e-06, "loss": 0.1052, "step": 131610 }, { "epoch": 4.91, "learning_rate": 1.2328450761116422e-06, "loss": 0.1155, "step": 131640 }, { "epoch": 4.91, "learning_rate": 1.2327045643993285e-06, "loss": 0.1028, "step": 131670 }, { "epoch": 4.91, "learning_rate": 1.2325641007199162e-06, "loss": 0.0988, "step": 131700 }, { "epoch": 4.91, "learning_rate": 1.232423685046045e-06, "loss": 0.1303, "step": 131730 }, { "epoch": 4.91, "learning_rate": 1.2322833173503769e-06, "loss": 0.1042, "step": 131760 }, { "epoch": 4.92, "learning_rate": 1.2321429976055954e-06, "loss": 0.1129, "step": 131790 }, { "epoch": 4.92, "learning_rate": 1.2320027257844061e-06, "loss": 0.1206, "step": 131820 }, { "epoch": 4.92, "learning_rate": 1.2318625018595365e-06, "loss": 0.1058, "step": 131850 }, { "epoch": 4.92, "learning_rate": 1.231722325803735e-06, "loss": 0.1102, "step": 131880 }, { "epoch": 4.92, "learning_rate": 1.2315821975897726e-06, "loss": 0.1181, "step": 131910 }, { "epoch": 4.92, "learning_rate": 1.2314421171904414e-06, "loss": 0.1001, "step": 131940 }, { "epoch": 4.92, "learning_rate": 1.2313020845785554e-06, "loss": 0.101, "step": 131970 }, { "epoch": 4.92, "learning_rate": 1.2311620997269502e-06, "loss": 0.1221, "step": 132000 }, { "epoch": 4.92, "learning_rate": 1.231022162608483e-06, "loss": 0.0918, "step": 132030 }, { "epoch": 4.93, "learning_rate": 1.2308822731960328e-06, "loss": 0.1022, "step": 132060 }, { "epoch": 4.93, "learning_rate": 1.2307424314624995e-06, "loss": 0.1079, "step": 132090 }, { "epoch": 4.93, "learning_rate": 1.2306026373808053e-06, "loss": 0.1142, "step": 132120 }, { "epoch": 4.93, "learning_rate": 1.230462890923894e-06, "loss": 0.1201, "step": 132150 }, { "epoch": 4.93, "learning_rate": 1.2303231920647298e-06, "loss": 0.1014, "step": 132180 }, { "epoch": 4.93, "learning_rate": 1.2301835407762994e-06, "loss": 0.1395, "step": 132210 }, { "epoch": 4.93, "learning_rate": 1.2300439370316107e-06, "loss": 0.1087, "step": 132240 }, { "epoch": 4.93, "learning_rate": 1.2299043808036933e-06, "loss": 0.1153, "step": 132270 }, { "epoch": 4.93, "learning_rate": 1.2297648720655975e-06, "loss": 0.0962, "step": 132300 }, { "epoch": 4.94, "learning_rate": 1.229625410790396e-06, "loss": 0.0934, "step": 132330 }, { "epoch": 4.94, "learning_rate": 1.2294859969511816e-06, "loss": 0.1272, "step": 132360 }, { "epoch": 4.94, "learning_rate": 1.2293466305210697e-06, "loss": 0.0931, "step": 132390 }, { "epoch": 4.94, "learning_rate": 1.229207311473196e-06, "loss": 0.1055, "step": 132420 }, { "epoch": 4.94, "learning_rate": 1.2290680397807183e-06, "loss": 0.1143, "step": 132450 }, { "epoch": 4.94, "learning_rate": 1.2289288154168154e-06, "loss": 0.104, "step": 132480 }, { "epoch": 4.94, "learning_rate": 1.2287896383546869e-06, "loss": 0.1297, "step": 132510 }, { "epoch": 4.94, "learning_rate": 1.2286505085675547e-06, "loss": 0.1069, "step": 132540 }, { "epoch": 4.94, "learning_rate": 1.2285114260286608e-06, "loss": 0.1035, "step": 132570 }, { "epoch": 4.95, "learning_rate": 1.2283723907112692e-06, "loss": 0.0854, "step": 132600 }, { "epoch": 4.95, "learning_rate": 1.2282334025886644e-06, "loss": 0.117, "step": 132630 }, { "epoch": 4.95, "learning_rate": 1.2280944616341529e-06, "loss": 0.121, "step": 132660 }, { "epoch": 4.95, "learning_rate": 1.2279555678210612e-06, "loss": 0.1162, "step": 132690 }, { "epoch": 4.95, "learning_rate": 1.2278167211227382e-06, "loss": 0.1071, "step": 132720 }, { "epoch": 4.95, "learning_rate": 1.2276779215125527e-06, "loss": 0.0994, "step": 132750 }, { "epoch": 4.95, "learning_rate": 1.2275391689638956e-06, "loss": 0.1016, "step": 132780 }, { "epoch": 4.95, "learning_rate": 1.2274004634501783e-06, "loss": 0.1168, "step": 132810 }, { "epoch": 4.95, "learning_rate": 1.2272618049448327e-06, "loss": 0.1099, "step": 132840 }, { "epoch": 4.96, "learning_rate": 1.2271231934213132e-06, "loss": 0.1209, "step": 132870 }, { "epoch": 4.96, "learning_rate": 1.2269846288530937e-06, "loss": 0.1109, "step": 132900 }, { "epoch": 4.96, "learning_rate": 1.2268461112136695e-06, "loss": 0.1087, "step": 132930 }, { "epoch": 4.96, "learning_rate": 1.2267076404765577e-06, "loss": 0.1185, "step": 132960 }, { "epoch": 4.96, "learning_rate": 1.2265692166152949e-06, "loss": 0.1128, "step": 132990 }, { "epoch": 4.96, "learning_rate": 1.2264308396034396e-06, "loss": 0.1114, "step": 133020 }, { "epoch": 4.96, "learning_rate": 1.226292509414571e-06, "loss": 0.1145, "step": 133050 }, { "epoch": 4.96, "learning_rate": 1.2261542260222886e-06, "loss": 0.1106, "step": 133080 }, { "epoch": 4.96, "learning_rate": 1.2260159894002136e-06, "loss": 0.1064, "step": 133110 }, { "epoch": 4.97, "learning_rate": 1.2258777995219873e-06, "loss": 0.1026, "step": 133140 }, { "epoch": 4.97, "learning_rate": 1.2257396563612719e-06, "loss": 0.1053, "step": 133170 }, { "epoch": 4.97, "learning_rate": 1.2256015598917514e-06, "loss": 0.1187, "step": 133200 }, { "epoch": 4.97, "learning_rate": 1.225463510087129e-06, "loss": 0.1068, "step": 133230 }, { "epoch": 4.97, "learning_rate": 1.2253255069211288e-06, "loss": 0.0971, "step": 133260 }, { "epoch": 4.97, "learning_rate": 1.2251875503674974e-06, "loss": 0.0877, "step": 133290 }, { "epoch": 4.97, "learning_rate": 1.2250496403999999e-06, "loss": 0.1098, "step": 133320 }, { "epoch": 4.97, "learning_rate": 1.2249117769924233e-06, "loss": 0.1212, "step": 133350 }, { "epoch": 4.97, "learning_rate": 1.2247739601185745e-06, "loss": 0.1139, "step": 133380 }, { "epoch": 4.98, "learning_rate": 1.2246361897522823e-06, "loss": 0.1067, "step": 133410 }, { "epoch": 4.98, "learning_rate": 1.2244984658673947e-06, "loss": 0.1118, "step": 133440 }, { "epoch": 4.98, "learning_rate": 1.2243607884377809e-06, "loss": 0.0962, "step": 133470 }, { "epoch": 4.98, "learning_rate": 1.2242231574373308e-06, "loss": 0.1155, "step": 133500 }, { "epoch": 4.98, "learning_rate": 1.224085572839954e-06, "loss": 0.0926, "step": 133530 }, { "epoch": 4.98, "learning_rate": 1.2239480346195822e-06, "loss": 0.0914, "step": 133560 }, { "epoch": 4.98, "learning_rate": 1.2238105427501658e-06, "loss": 0.1052, "step": 133590 }, { "epoch": 4.98, "learning_rate": 1.2236730972056771e-06, "loss": 0.0977, "step": 133620 }, { "epoch": 4.98, "learning_rate": 1.223535697960108e-06, "loss": 0.0894, "step": 133650 }, { "epoch": 4.99, "learning_rate": 1.2233983449874714e-06, "loss": 0.1005, "step": 133680 }, { "epoch": 4.99, "learning_rate": 1.2232610382618002e-06, "loss": 0.1008, "step": 133710 }, { "epoch": 4.99, "learning_rate": 1.2231237777571477e-06, "loss": 0.115, "step": 133740 }, { "epoch": 4.99, "learning_rate": 1.222986563447588e-06, "loss": 0.1066, "step": 133770 }, { "epoch": 4.99, "learning_rate": 1.2228493953072147e-06, "loss": 0.1144, "step": 133800 }, { "epoch": 4.99, "learning_rate": 1.2227122733101433e-06, "loss": 0.0956, "step": 133830 }, { "epoch": 4.99, "learning_rate": 1.222575197430508e-06, "loss": 0.0942, "step": 133860 }, { "epoch": 4.99, "learning_rate": 1.2224427345597513e-06, "loss": 0.1127, "step": 133890 }, { "epoch": 4.99, "learning_rate": 1.222305749302364e-06, "loss": 0.1057, "step": 133920 }, { "epoch": 5.0, "learning_rate": 1.222168810085799e-06, "loss": 0.1057, "step": 133950 }, { "epoch": 5.0, "learning_rate": 1.2220319168842711e-06, "loss": 0.1067, "step": 133980 }, { "epoch": 5.0, "learning_rate": 1.2218950696720162e-06, "loss": 0.1108, "step": 134010 }, { "epoch": 5.0, "learning_rate": 1.2217582684232896e-06, "loss": 0.1227, "step": 134040 }, { "epoch": 5.0, "step": 134060, "total_flos": 1.587534895646056e+21, "train_loss": 0.1688916235028426, "train_runtime": 101673.2407, "train_samples_per_second": 10.548, "train_steps_per_second": 1.319 } ], "logging_steps": 30, "max_steps": 134060, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1.587534895646056e+21, "train_batch_size": 8, "trial_name": null, "trial_params": null }