diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,26844 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 134060, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0, + "loss": 8.0891, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.04e-06, + "loss": 7.3717, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 2.24e-06, + "loss": 5.2176, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 3.44e-06, + "loss": 4.35, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 4.6400000000000005e-06, + "loss": 3.4418, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 5.84e-06, + "loss": 2.8796, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 7.04e-06, + "loss": 2.5285, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 8.24e-06, + "loss": 2.2902, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 9.440000000000001e-06, + "loss": 2.0726, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 1.0640000000000001e-05, + "loss": 1.9922, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 1.184e-05, + "loss": 1.8281, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 1.3040000000000002e-05, + "loss": 1.8522, + "step": 330 + }, + { + "epoch": 0.01, + "learning_rate": 1.4240000000000001e-05, + "loss": 1.694, + "step": 360 + }, + { + "epoch": 0.01, + "learning_rate": 1.544e-05, + "loss": 1.5824, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 1.664e-05, + "loss": 1.52, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 1.7840000000000002e-05, + "loss": 1.5005, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 1.904e-05, + "loss": 1.474, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 1.9881069312188603e-05, + "loss": 1.4102, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 1.9316685232156396e-05, + "loss": 1.3253, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 1.879778950992281e-05, + "loss": 1.3384, + "step": 570 + }, + { + "epoch": 0.02, + "learning_rate": 1.8318582636182796e-05, + "loss": 1.1948, + "step": 600 + }, + { + "epoch": 0.02, + "learning_rate": 1.7874250135079064e-05, + "loss": 1.1314, + "step": 630 + }, + { + "epoch": 0.02, + "learning_rate": 1.7460757394239457e-05, + "loss": 1.062, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 1.707469441906277e-05, + "loss": 1.0079, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 1.6713156761621893e-05, + "loss": 1.0524, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 1.6373653066597823e-05, + "loss": 1.0395, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 1.605403247669839e-05, + "loss": 0.9386, + "step": 780 + }, + { + "epoch": 0.03, + "learning_rate": 1.5752427045264396e-05, + "loss": 0.9778, + "step": 810 + }, + { + "epoch": 0.03, + "learning_rate": 1.546720562224365e-05, + "loss": 0.9788, + "step": 840 + }, + { + "epoch": 0.03, + "learning_rate": 1.5196936606339123e-05, + "loss": 0.9897, + "step": 870 + }, + { + "epoch": 0.03, + "learning_rate": 1.494035761667992e-05, + "loss": 0.9103, + "step": 900 + }, + { + "epoch": 0.03, + "learning_rate": 1.4696350614431104e-05, + "loss": 0.916, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 1.4463921353509297e-05, + "loss": 0.8597, + "step": 960 + }, + { + "epoch": 0.04, + "learning_rate": 1.4242182297397127e-05, + "loss": 0.7784, + "step": 990 + }, + { + "epoch": 0.04, + "learning_rate": 1.4030338331657845e-05, + "loss": 0.8172, + "step": 1020 + }, + { + "epoch": 0.04, + "learning_rate": 1.3827674747047456e-05, + "loss": 0.8181, + "step": 1050 + }, + { + "epoch": 0.04, + "learning_rate": 1.3633547078730296e-05, + "loss": 0.7576, + "step": 1080 + }, + { + "epoch": 0.04, + "learning_rate": 1.3447372472027636e-05, + "loss": 0.7949, + "step": 1110 + }, + { + "epoch": 0.04, + "learning_rate": 1.3268622310856882e-05, + "loss": 0.7204, + "step": 1140 + }, + { + "epoch": 0.04, + "learning_rate": 1.3096815896275181e-05, + "loss": 0.7064, + "step": 1170 + }, + { + "epoch": 0.04, + "learning_rate": 1.2931515002796793e-05, + "loss": 0.7781, + "step": 1200 + }, + { + "epoch": 0.05, + "learning_rate": 1.2772319171982632e-05, + "loss": 0.6695, + "step": 1230 + }, + { + "epoch": 0.05, + "learning_rate": 1.261886162812672e-05, + "loss": 0.7092, + "step": 1260 + }, + { + "epoch": 0.05, + "learning_rate": 1.2470805721138076e-05, + "loss": 0.6964, + "step": 1290 + }, + { + "epoch": 0.05, + "learning_rate": 1.2327841818038448e-05, + "loss": 0.6547, + "step": 1320 + }, + { + "epoch": 0.05, + "learning_rate": 1.2189684577707986e-05, + "loss": 0.6966, + "step": 1350 + }, + { + "epoch": 0.05, + "learning_rate": 1.2056070554260304e-05, + "loss": 0.707, + "step": 1380 + }, + { + "epoch": 0.05, + "learning_rate": 1.1926756083217403e-05, + "loss": 0.7237, + "step": 1410 + }, + { + "epoch": 0.05, + "learning_rate": 1.1801515411874576e-05, + "loss": 0.6281, + "step": 1440 + }, + { + "epoch": 0.05, + "learning_rate": 1.1680139041202725e-05, + "loss": 0.6783, + "step": 1470 + }, + { + "epoch": 0.06, + "learning_rate": 1.156243225157201e-05, + "loss": 0.628, + "step": 1500 + }, + { + "epoch": 0.06, + "learning_rate": 1.1448213788688051e-05, + "loss": 0.6361, + "step": 1530 + }, + { + "epoch": 0.06, + "learning_rate": 1.1337314689562955e-05, + "loss": 0.5683, + "step": 1560 + }, + { + "epoch": 0.06, + "learning_rate": 1.1229577231219766e-05, + "loss": 0.5641, + "step": 1590 + }, + { + "epoch": 0.06, + "learning_rate": 1.112485398724962e-05, + "loss": 0.6237, + "step": 1620 + }, + { + "epoch": 0.06, + "learning_rate": 1.1023006979384424e-05, + "loss": 0.5927, + "step": 1650 + }, + { + "epoch": 0.06, + "learning_rate": 1.0923906912979292e-05, + "loss": 0.6144, + "step": 1680 + }, + { + "epoch": 0.06, + "learning_rate": 1.0827432486770289e-05, + "loss": 0.5461, + "step": 1710 + }, + { + "epoch": 0.06, + "learning_rate": 1.0733469768527298e-05, + "loss": 0.5728, + "step": 1740 + }, + { + "epoch": 0.07, + "learning_rate": 1.0641911629294233e-05, + "loss": 0.5896, + "step": 1770 + }, + { + "epoch": 0.07, + "learning_rate": 1.0552657229828183e-05, + "loss": 0.5095, + "step": 1800 + }, + { + "epoch": 0.07, + "learning_rate": 1.0465611553639654e-05, + "loss": 0.58, + "step": 1830 + }, + { + "epoch": 0.07, + "learning_rate": 1.0380684981717496e-05, + "loss": 0.5911, + "step": 1860 + }, + { + "epoch": 0.07, + "learning_rate": 1.0297792904611e-05, + "loss": 0.5328, + "step": 1890 + }, + { + "epoch": 0.07, + "learning_rate": 1.0216855368051905e-05, + "loss": 0.5636, + "step": 1920 + }, + { + "epoch": 0.07, + "learning_rate": 1.0137796748742198e-05, + "loss": 0.5733, + "step": 1950 + }, + { + "epoch": 0.07, + "learning_rate": 1.0060545457319173e-05, + "loss": 0.5326, + "step": 1980 + }, + { + "epoch": 0.07, + "learning_rate": 9.98503366584589e-06, + "loss": 0.5016, + "step": 2010 + }, + { + "epoch": 0.08, + "learning_rate": 9.911197057469108e-06, + "loss": 0.5416, + "step": 2040 + }, + { + "epoch": 0.08, + "learning_rate": 9.838974596144754e-06, + "loss": 0.6255, + "step": 2070 + }, + { + "epoch": 0.08, + "learning_rate": 9.768308314557045e-06, + "loss": 0.4626, + "step": 2100 + }, + { + "epoch": 0.08, + "learning_rate": 9.699143118556543e-06, + "loss": 0.496, + "step": 2130 + }, + { + "epoch": 0.08, + "learning_rate": 9.631426606617744e-06, + "loss": 0.5069, + "step": 2160 + }, + { + "epoch": 0.08, + "learning_rate": 9.565108902971731e-06, + "loss": 0.4882, + "step": 2190 + }, + { + "epoch": 0.08, + "learning_rate": 9.50014250320633e-06, + "loss": 0.5291, + "step": 2220 + }, + { + "epoch": 0.08, + "learning_rate": 9.436482131247697e-06, + "loss": 0.541, + "step": 2250 + }, + { + "epoch": 0.09, + "learning_rate": 9.374084606744877e-06, + "loss": 0.5595, + "step": 2280 + }, + { + "epoch": 0.09, + "learning_rate": 9.312908721974755e-06, + "loss": 0.5041, + "step": 2310 + }, + { + "epoch": 0.09, + "learning_rate": 9.252915127470068e-06, + "loss": 0.568, + "step": 2340 + }, + { + "epoch": 0.09, + "learning_rate": 9.194066225649182e-06, + "loss": 0.4467, + "step": 2370 + }, + { + "epoch": 0.09, + "learning_rate": 9.13632607179441e-06, + "loss": 0.471, + "step": 2400 + }, + { + "epoch": 0.09, + "learning_rate": 9.079660281786226e-06, + "loss": 0.5507, + "step": 2430 + }, + { + "epoch": 0.09, + "learning_rate": 9.02403594605542e-06, + "loss": 0.4895, + "step": 2460 + }, + { + "epoch": 0.09, + "learning_rate": 8.969421549263868e-06, + "loss": 0.5492, + "step": 2490 + }, + { + "epoch": 0.09, + "learning_rate": 8.915786895268652e-06, + "loss": 0.4661, + "step": 2520 + }, + { + "epoch": 0.1, + "learning_rate": 8.863103036963604e-06, + "loss": 0.4957, + "step": 2550 + }, + { + "epoch": 0.1, + "learning_rate": 8.811342210628018e-06, + "loss": 0.4698, + "step": 2580 + }, + { + "epoch": 0.1, + "learning_rate": 8.760477774444217e-06, + "loss": 0.4813, + "step": 2610 + }, + { + "epoch": 0.1, + "learning_rate": 8.710484150874759e-06, + "loss": 0.4382, + "step": 2640 + }, + { + "epoch": 0.1, + "learning_rate": 8.661336772616119e-06, + "loss": 0.5073, + "step": 2670 + }, + { + "epoch": 0.1, + "learning_rate": 8.613012031869432e-06, + "loss": 0.4712, + "step": 2700 + }, + { + "epoch": 0.1, + "learning_rate": 8.5654872326904e-06, + "loss": 0.5269, + "step": 2730 + }, + { + "epoch": 0.1, + "learning_rate": 8.51874054619982e-06, + "loss": 0.4369, + "step": 2760 + }, + { + "epoch": 0.1, + "learning_rate": 8.472750968454027e-06, + "loss": 0.4954, + "step": 2790 + }, + { + "epoch": 0.11, + "learning_rate": 8.427498280790527e-06, + "loss": 0.4503, + "step": 2820 + }, + { + "epoch": 0.11, + "learning_rate": 8.382963012478792e-06, + "loss": 0.4511, + "step": 2850 + }, + { + "epoch": 0.11, + "learning_rate": 8.339126405519484e-06, + "loss": 0.4604, + "step": 2880 + }, + { + "epoch": 0.11, + "learning_rate": 8.295970381447577e-06, + "loss": 0.4256, + "step": 2910 + }, + { + "epoch": 0.11, + "learning_rate": 8.253477510005973e-06, + "loss": 0.4544, + "step": 2940 + }, + { + "epoch": 0.11, + "learning_rate": 8.211630979566229e-06, + "loss": 0.4404, + "step": 2970 + }, + { + "epoch": 0.11, + "learning_rate": 8.170414569182507e-06, + "loss": 0.4183, + "step": 3000 + }, + { + "epoch": 0.11, + "learning_rate": 8.129812622173143e-06, + "loss": 0.4439, + "step": 3030 + }, + { + "epoch": 0.11, + "learning_rate": 8.08981002113217e-06, + "loss": 0.4319, + "step": 3060 + }, + { + "epoch": 0.12, + "learning_rate": 8.050392164280187e-06, + "loss": 0.4402, + "step": 3090 + }, + { + "epoch": 0.12, + "learning_rate": 8.011544943070564e-06, + "loss": 0.4871, + "step": 3120 + }, + { + "epoch": 0.12, + "learning_rate": 7.973254720972994e-06, + "loss": 0.4748, + "step": 3150 + }, + { + "epoch": 0.12, + "learning_rate": 7.935508313361898e-06, + "loss": 0.5215, + "step": 3180 + }, + { + "epoch": 0.12, + "learning_rate": 7.898292968442374e-06, + "loss": 0.4496, + "step": 3210 + }, + { + "epoch": 0.12, + "learning_rate": 7.861596349150974e-06, + "loss": 0.4912, + "step": 3240 + }, + { + "epoch": 0.12, + "learning_rate": 7.825406515973003e-06, + "loss": 0.4403, + "step": 3270 + }, + { + "epoch": 0.12, + "learning_rate": 7.78971191062195e-06, + "loss": 0.4216, + "step": 3300 + }, + { + "epoch": 0.12, + "learning_rate": 7.754501340530456e-06, + "loss": 0.4121, + "step": 3330 + }, + { + "epoch": 0.13, + "learning_rate": 7.719763964105497e-06, + "loss": 0.4315, + "step": 3360 + }, + { + "epoch": 0.13, + "learning_rate": 7.68548927670368e-06, + "loss": 0.4272, + "step": 3390 + }, + { + "epoch": 0.13, + "learning_rate": 7.651667097285501e-06, + "loss": 0.4234, + "step": 3420 + }, + { + "epoch": 0.13, + "learning_rate": 7.618287555709996e-06, + "loss": 0.5091, + "step": 3450 + }, + { + "epoch": 0.13, + "learning_rate": 7.585341080633831e-06, + "loss": 0.4435, + "step": 3480 + }, + { + "epoch": 0.13, + "learning_rate": 7.552818387981102e-06, + "loss": 0.3596, + "step": 3510 + }, + { + "epoch": 0.13, + "learning_rate": 7.520710469952336e-06, + "loss": 0.4096, + "step": 3540 + }, + { + "epoch": 0.13, + "learning_rate": 7.489008584543126e-06, + "loss": 0.3699, + "step": 3570 + }, + { + "epoch": 0.13, + "learning_rate": 7.457704245544709e-06, + "loss": 0.4257, + "step": 3600 + }, + { + "epoch": 0.14, + "learning_rate": 7.4267892130005204e-06, + "loss": 0.3542, + "step": 3630 + }, + { + "epoch": 0.14, + "learning_rate": 7.396255484094341e-06, + "loss": 0.4619, + "step": 3660 + }, + { + "epoch": 0.14, + "learning_rate": 7.366095284447187e-06, + "loss": 0.442, + "step": 3690 + }, + { + "epoch": 0.14, + "learning_rate": 7.336301059801395e-06, + "loss": 0.3701, + "step": 3720 + }, + { + "epoch": 0.14, + "learning_rate": 7.306865468071773e-06, + "loss": 0.4379, + "step": 3750 + }, + { + "epoch": 0.14, + "learning_rate": 7.277781371744775e-06, + "loss": 0.4511, + "step": 3780 + }, + { + "epoch": 0.14, + "learning_rate": 7.24904183060784e-06, + "loss": 0.3869, + "step": 3810 + }, + { + "epoch": 0.14, + "learning_rate": 7.220640094792103e-06, + "loss": 0.3576, + "step": 3840 + }, + { + "epoch": 0.14, + "learning_rate": 7.1925695981126075e-06, + "loss": 0.344, + "step": 3870 + }, + { + "epoch": 0.15, + "learning_rate": 7.164823951691131e-06, + "loss": 0.3708, + "step": 3900 + }, + { + "epoch": 0.15, + "learning_rate": 7.137396937847532e-06, + "loss": 0.414, + "step": 3930 + }, + { + "epoch": 0.15, + "learning_rate": 7.110282504246376e-06, + "loss": 0.4553, + "step": 3960 + }, + { + "epoch": 0.15, + "learning_rate": 7.083474758286323e-06, + "loss": 0.4011, + "step": 3990 + }, + { + "epoch": 0.15, + "learning_rate": 7.056967961720459e-06, + "loss": 0.3563, + "step": 4020 + }, + { + "epoch": 0.15, + "learning_rate": 7.0307565254964336e-06, + "loss": 0.3607, + "step": 4050 + }, + { + "epoch": 0.15, + "learning_rate": 7.004835004805859e-06, + "loss": 0.3634, + "step": 4080 + }, + { + "epoch": 0.15, + "learning_rate": 6.979198094333013e-06, + "loss": 0.3656, + "step": 4110 + }, + { + "epoch": 0.15, + "learning_rate": 6.953840623693443e-06, + "loss": 0.4157, + "step": 4140 + }, + { + "epoch": 0.16, + "learning_rate": 6.928757553053551e-06, + "loss": 0.3799, + "step": 4170 + }, + { + "epoch": 0.16, + "learning_rate": 6.9039439689227496e-06, + "loss": 0.3595, + "step": 4200 + }, + { + "epoch": 0.16, + "learning_rate": 6.879395080110185e-06, + "loss": 0.3912, + "step": 4230 + }, + { + "epoch": 0.16, + "learning_rate": 6.855106213838523e-06, + "loss": 0.3785, + "step": 4260 + }, + { + "epoch": 0.16, + "learning_rate": 6.83107281200758e-06, + "loss": 0.4702, + "step": 4290 + }, + { + "epoch": 0.16, + "learning_rate": 6.807290427601057e-06, + "loss": 0.3987, + "step": 4320 + }, + { + "epoch": 0.16, + "learning_rate": 6.7837547212299395e-06, + "loss": 0.3743, + "step": 4350 + }, + { + "epoch": 0.16, + "learning_rate": 6.760461457806433e-06, + "loss": 0.3816, + "step": 4380 + }, + { + "epoch": 0.16, + "learning_rate": 6.737406503342708e-06, + "loss": 0.3293, + "step": 4410 + }, + { + "epoch": 0.17, + "learning_rate": 6.714585821868879e-06, + "loss": 0.4261, + "step": 4440 + }, + { + "epoch": 0.17, + "learning_rate": 6.691995472465075e-06, + "loss": 0.3562, + "step": 4470 + }, + { + "epoch": 0.17, + "learning_rate": 6.6696316064026025e-06, + "loss": 0.3437, + "step": 4500 + }, + { + "epoch": 0.17, + "learning_rate": 6.6474904643895035e-06, + "loss": 0.3696, + "step": 4530 + }, + { + "epoch": 0.17, + "learning_rate": 6.625568373916034e-06, + "loss": 0.4247, + "step": 4560 + }, + { + "epoch": 0.17, + "learning_rate": 6.603861746695817e-06, + "loss": 0.3945, + "step": 4590 + }, + { + "epoch": 0.17, + "learning_rate": 6.582367076198595e-06, + "loss": 0.3798, + "step": 4620 + }, + { + "epoch": 0.17, + "learning_rate": 6.561080935270765e-06, + "loss": 0.3464, + "step": 4650 + }, + { + "epoch": 0.17, + "learning_rate": 6.53999997384e-06, + "loss": 0.3523, + "step": 4680 + }, + { + "epoch": 0.18, + "learning_rate": 6.5191209167004895e-06, + "loss": 0.363, + "step": 4710 + }, + { + "epoch": 0.18, + "learning_rate": 6.4984405613754545e-06, + "loss": 0.3986, + "step": 4740 + }, + { + "epoch": 0.18, + "learning_rate": 6.477955776053786e-06, + "loss": 0.3417, + "step": 4770 + }, + { + "epoch": 0.18, + "learning_rate": 6.457663497597783e-06, + "loss": 0.366, + "step": 4800 + }, + { + "epoch": 0.18, + "learning_rate": 6.437560729619116e-06, + "loss": 0.3212, + "step": 4830 + }, + { + "epoch": 0.18, + "learning_rate": 6.41764454062025e-06, + "loss": 0.42, + "step": 4860 + }, + { + "epoch": 0.18, + "learning_rate": 6.3979120621987555e-06, + "loss": 0.3657, + "step": 4890 + }, + { + "epoch": 0.18, + "learning_rate": 6.378360487311966e-06, + "loss": 0.3274, + "step": 4920 + }, + { + "epoch": 0.18, + "learning_rate": 6.358987068599611e-06, + "loss": 0.3556, + "step": 4950 + }, + { + "epoch": 0.19, + "learning_rate": 6.339789116762173e-06, + "loss": 0.3901, + "step": 4980 + }, + { + "epoch": 0.19, + "learning_rate": 6.320763998992753e-06, + "loss": 0.3037, + "step": 5010 + }, + { + "epoch": 0.19, + "learning_rate": 6.301909137460409e-06, + "loss": 0.3604, + "step": 5040 + }, + { + "epoch": 0.19, + "learning_rate": 6.283222007842957e-06, + "loss": 0.3274, + "step": 5070 + }, + { + "epoch": 0.19, + "learning_rate": 6.264700137907351e-06, + "loss": 0.349, + "step": 5100 + }, + { + "epoch": 0.19, + "learning_rate": 6.246341106135824e-06, + "loss": 0.3592, + "step": 5130 + }, + { + "epoch": 0.19, + "learning_rate": 6.22814254039606e-06, + "loss": 0.3222, + "step": 5160 + }, + { + "epoch": 0.19, + "learning_rate": 6.21010211665374e-06, + "loss": 0.3953, + "step": 5190 + }, + { + "epoch": 0.19, + "learning_rate": 6.192217557725875e-06, + "loss": 0.3456, + "step": 5220 + }, + { + "epoch": 0.2, + "learning_rate": 6.174486632073397e-06, + "loss": 0.2952, + "step": 5250 + }, + { + "epoch": 0.2, + "learning_rate": 6.156907152631576e-06, + "loss": 0.3385, + "step": 5280 + }, + { + "epoch": 0.2, + "learning_rate": 6.1394769756768524e-06, + "loss": 0.3676, + "step": 5310 + }, + { + "epoch": 0.2, + "learning_rate": 6.12219399972876e-06, + "loss": 0.3084, + "step": 5340 + }, + { + "epoch": 0.2, + "learning_rate": 6.105056164485665e-06, + "loss": 0.3262, + "step": 5370 + }, + { + "epoch": 0.2, + "learning_rate": 6.0880614497930825e-06, + "loss": 0.3254, + "step": 5400 + }, + { + "epoch": 0.2, + "learning_rate": 6.071207874643425e-06, + "loss": 0.3312, + "step": 5430 + }, + { + "epoch": 0.2, + "learning_rate": 6.054493496206035e-06, + "loss": 0.2912, + "step": 5460 + }, + { + "epoch": 0.2, + "learning_rate": 6.037916408886427e-06, + "loss": 0.3067, + "step": 5490 + }, + { + "epoch": 0.21, + "learning_rate": 6.021474743413714e-06, + "loss": 0.2967, + "step": 5520 + }, + { + "epoch": 0.21, + "learning_rate": 6.005166665955221e-06, + "loss": 0.3792, + "step": 5550 + }, + { + "epoch": 0.21, + "learning_rate": 5.988990377257332e-06, + "loss": 0.3027, + "step": 5580 + }, + { + "epoch": 0.21, + "learning_rate": 5.972944111811658e-06, + "loss": 0.3165, + "step": 5610 + }, + { + "epoch": 0.21, + "learning_rate": 5.9570261370456484e-06, + "loss": 0.331, + "step": 5640 + }, + { + "epoch": 0.21, + "learning_rate": 5.941234752536812e-06, + "loss": 0.3406, + "step": 5670 + }, + { + "epoch": 0.21, + "learning_rate": 5.925568289249716e-06, + "loss": 0.3086, + "step": 5700 + }, + { + "epoch": 0.21, + "learning_rate": 5.910025108795013e-06, + "loss": 0.335, + "step": 5730 + }, + { + "epoch": 0.21, + "learning_rate": 5.894603602709729e-06, + "loss": 0.3411, + "step": 5760 + }, + { + "epoch": 0.22, + "learning_rate": 5.879302191758114e-06, + "loss": 0.3122, + "step": 5790 + }, + { + "epoch": 0.22, + "learning_rate": 5.864119325252328e-06, + "loss": 0.3138, + "step": 5820 + }, + { + "epoch": 0.22, + "learning_rate": 5.8490534803923536e-06, + "loss": 0.2888, + "step": 5850 + }, + { + "epoch": 0.22, + "learning_rate": 5.8341031616244595e-06, + "loss": 0.3032, + "step": 5880 + }, + { + "epoch": 0.22, + "learning_rate": 5.819266900017594e-06, + "loss": 0.3255, + "step": 5910 + }, + { + "epoch": 0.22, + "learning_rate": 5.804543252657151e-06, + "loss": 0.3174, + "step": 5940 + }, + { + "epoch": 0.22, + "learning_rate": 5.789930802055526e-06, + "loss": 0.3083, + "step": 5970 + }, + { + "epoch": 0.22, + "learning_rate": 5.775428155578901e-06, + "loss": 0.292, + "step": 6000 + }, + { + "epoch": 0.22, + "learning_rate": 5.761033944889748e-06, + "loss": 0.3553, + "step": 6030 + }, + { + "epoch": 0.23, + "learning_rate": 5.746746825404546e-06, + "loss": 0.316, + "step": 6060 + }, + { + "epoch": 0.23, + "learning_rate": 5.7325654757662186e-06, + "loss": 0.3385, + "step": 6090 + }, + { + "epoch": 0.23, + "learning_rate": 5.718956157005957e-06, + "loss": 0.3383, + "step": 6120 + }, + { + "epoch": 0.23, + "learning_rate": 5.704979053851296e-06, + "loss": 0.3152, + "step": 6150 + }, + { + "epoch": 0.23, + "learning_rate": 5.691103932244723e-06, + "loss": 0.2978, + "step": 6180 + }, + { + "epoch": 0.23, + "learning_rate": 5.677329558036595e-06, + "loss": 0.3089, + "step": 6210 + }, + { + "epoch": 0.23, + "learning_rate": 5.66365471788599e-06, + "loss": 0.3295, + "step": 6240 + }, + { + "epoch": 0.23, + "learning_rate": 5.650078218811767e-06, + "loss": 0.2739, + "step": 6270 + }, + { + "epoch": 0.23, + "learning_rate": 5.636598887755416e-06, + "loss": 0.3879, + "step": 6300 + }, + { + "epoch": 0.24, + "learning_rate": 5.623215571155333e-06, + "loss": 0.2913, + "step": 6330 + }, + { + "epoch": 0.24, + "learning_rate": 5.609927134532144e-06, + "loss": 0.3459, + "step": 6360 + }, + { + "epoch": 0.24, + "learning_rate": 5.596732462084774e-06, + "loss": 0.3362, + "step": 6390 + }, + { + "epoch": 0.24, + "learning_rate": 5.583630456296903e-06, + "loss": 0.3566, + "step": 6420 + }, + { + "epoch": 0.24, + "learning_rate": 5.570620037553528e-06, + "loss": 0.2793, + "step": 6450 + }, + { + "epoch": 0.24, + "learning_rate": 5.557700143767285e-06, + "loss": 0.3272, + "step": 6480 + }, + { + "epoch": 0.24, + "learning_rate": 5.544869730014276e-06, + "loss": 0.2916, + "step": 6510 + }, + { + "epoch": 0.24, + "learning_rate": 5.532127768179103e-06, + "loss": 0.327, + "step": 6540 + }, + { + "epoch": 0.25, + "learning_rate": 5.519473246608816e-06, + "loss": 0.33, + "step": 6570 + }, + { + "epoch": 0.25, + "learning_rate": 5.50690516977555e-06, + "loss": 0.2756, + "step": 6600 + }, + { + "epoch": 0.25, + "learning_rate": 5.494422557947562e-06, + "loss": 0.3566, + "step": 6630 + }, + { + "epoch": 0.25, + "learning_rate": 5.482024446868429e-06, + "loss": 0.3124, + "step": 6660 + }, + { + "epoch": 0.25, + "learning_rate": 5.469709887444195e-06, + "loss": 0.3601, + "step": 6690 + }, + { + "epoch": 0.25, + "learning_rate": 5.457477945438182e-06, + "loss": 0.3182, + "step": 6720 + }, + { + "epoch": 0.25, + "learning_rate": 5.445327701173302e-06, + "loss": 0.3418, + "step": 6750 + }, + { + "epoch": 0.25, + "learning_rate": 5.433258249241613e-06, + "loss": 0.2957, + "step": 6780 + }, + { + "epoch": 0.25, + "learning_rate": 5.4212686982209314e-06, + "loss": 0.3465, + "step": 6810 + }, + { + "epoch": 0.26, + "learning_rate": 5.409358170398289e-06, + "loss": 0.3435, + "step": 6840 + }, + { + "epoch": 0.26, + "learning_rate": 5.397525801500045e-06, + "loss": 0.3535, + "step": 6870 + }, + { + "epoch": 0.26, + "learning_rate": 5.385770740428468e-06, + "loss": 0.313, + "step": 6900 + }, + { + "epoch": 0.26, + "learning_rate": 5.37409214900459e-06, + "loss": 0.3222, + "step": 6930 + }, + { + "epoch": 0.26, + "learning_rate": 5.362489201717187e-06, + "loss": 0.2975, + "step": 6960 + }, + { + "epoch": 0.26, + "learning_rate": 5.350961085477669e-06, + "loss": 0.3027, + "step": 6990 + }, + { + "epoch": 0.26, + "learning_rate": 5.339506999380768e-06, + "loss": 0.2935, + "step": 7020 + }, + { + "epoch": 0.26, + "learning_rate": 5.3281261544708195e-06, + "loss": 0.2793, + "step": 7050 + }, + { + "epoch": 0.26, + "learning_rate": 5.316817773513507e-06, + "loss": 0.2699, + "step": 7080 + }, + { + "epoch": 0.27, + "learning_rate": 5.30558109077291e-06, + "loss": 0.3002, + "step": 7110 + }, + { + "epoch": 0.27, + "learning_rate": 5.2947864079915216e-06, + "loss": 0.3071, + "step": 7140 + }, + { + "epoch": 0.27, + "learning_rate": 5.283688541235345e-06, + "loss": 0.2798, + "step": 7170 + }, + { + "epoch": 0.27, + "learning_rate": 5.272660166570372e-06, + "loss": 0.3221, + "step": 7200 + }, + { + "epoch": 0.27, + "learning_rate": 5.261700561773909e-06, + "loss": 0.2804, + "step": 7230 + }, + { + "epoch": 0.27, + "learning_rate": 5.250809015088132e-06, + "loss": 0.3268, + "step": 7260 + }, + { + "epoch": 0.27, + "learning_rate": 5.239984825025921e-06, + "loss": 0.3198, + "step": 7290 + }, + { + "epoch": 0.27, + "learning_rate": 5.229227300181094e-06, + "loss": 0.308, + "step": 7320 + }, + { + "epoch": 0.27, + "learning_rate": 5.218535759042913e-06, + "loss": 0.3606, + "step": 7350 + }, + { + "epoch": 0.28, + "learning_rate": 5.2079095298147244e-06, + "loss": 0.295, + "step": 7380 + }, + { + "epoch": 0.28, + "learning_rate": 5.197347950236668e-06, + "loss": 0.3313, + "step": 7410 + }, + { + "epoch": 0.28, + "learning_rate": 5.18685036741231e-06, + "loss": 0.3253, + "step": 7440 + }, + { + "epoch": 0.28, + "learning_rate": 5.176416137639122e-06, + "loss": 0.2724, + "step": 7470 + }, + { + "epoch": 0.28, + "learning_rate": 5.166044626242685e-06, + "loss": 0.3162, + "step": 7500 + }, + { + "epoch": 0.28, + "learning_rate": 5.155735207414551e-06, + "loss": 0.3187, + "step": 7530 + }, + { + "epoch": 0.28, + "learning_rate": 5.145487264053619e-06, + "loss": 0.2858, + "step": 7560 + }, + { + "epoch": 0.28, + "learning_rate": 5.135300187611006e-06, + "loss": 0.3, + "step": 7590 + }, + { + "epoch": 0.28, + "learning_rate": 5.125173377938251e-06, + "loss": 0.3222, + "step": 7620 + }, + { + "epoch": 0.29, + "learning_rate": 5.115106243138807e-06, + "loss": 0.2799, + "step": 7650 + }, + { + "epoch": 0.29, + "learning_rate": 5.105098199422739e-06, + "loss": 0.2617, + "step": 7680 + }, + { + "epoch": 0.29, + "learning_rate": 5.095148670964526e-06, + "loss": 0.3158, + "step": 7710 + }, + { + "epoch": 0.29, + "learning_rate": 5.085257089763904e-06, + "loss": 0.2533, + "step": 7740 + }, + { + "epoch": 0.29, + "learning_rate": 5.075422895509673e-06, + "loss": 0.2548, + "step": 7770 + }, + { + "epoch": 0.29, + "learning_rate": 5.065645535446375e-06, + "loss": 0.2719, + "step": 7800 + }, + { + "epoch": 0.29, + "learning_rate": 5.055924464243811e-06, + "loss": 0.2877, + "step": 7830 + }, + { + "epoch": 0.29, + "learning_rate": 5.046259143869278e-06, + "loss": 0.3081, + "step": 7860 + }, + { + "epoch": 0.29, + "learning_rate": 5.0366490434624956e-06, + "loss": 0.3128, + "step": 7890 + }, + { + "epoch": 0.3, + "learning_rate": 5.027093639213143e-06, + "loss": 0.323, + "step": 7920 + }, + { + "epoch": 0.3, + "learning_rate": 5.017592414240932e-06, + "loss": 0.2887, + "step": 7950 + }, + { + "epoch": 0.3, + "learning_rate": 5.008144858478176e-06, + "loss": 0.3268, + "step": 7980 + }, + { + "epoch": 0.3, + "learning_rate": 4.9987504685547725e-06, + "loss": 0.3533, + "step": 8010 + }, + { + "epoch": 0.3, + "learning_rate": 4.989408747685563e-06, + "loss": 0.2842, + "step": 8040 + }, + { + "epoch": 0.3, + "learning_rate": 4.980119205559974e-06, + "loss": 0.254, + "step": 8070 + }, + { + "epoch": 0.3, + "learning_rate": 4.970881358233946e-06, + "loss": 0.2835, + "step": 8100 + }, + { + "epoch": 0.3, + "learning_rate": 4.961694728024028e-06, + "loss": 0.3396, + "step": 8130 + }, + { + "epoch": 0.3, + "learning_rate": 4.952558843403633e-06, + "loss": 0.3007, + "step": 8160 + }, + { + "epoch": 0.31, + "learning_rate": 4.943473238901383e-06, + "loss": 0.3001, + "step": 8190 + }, + { + "epoch": 0.31, + "learning_rate": 4.934437455001497e-06, + "loss": 0.3292, + "step": 8220 + }, + { + "epoch": 0.31, + "learning_rate": 4.925451038046182e-06, + "loss": 0.2772, + "step": 8250 + }, + { + "epoch": 0.31, + "learning_rate": 4.916513540139955e-06, + "loss": 0.3337, + "step": 8280 + }, + { + "epoch": 0.31, + "learning_rate": 4.907624519055888e-06, + "loss": 0.2756, + "step": 8310 + }, + { + "epoch": 0.31, + "learning_rate": 4.898783538143701e-06, + "loss": 0.3178, + "step": 8340 + }, + { + "epoch": 0.31, + "learning_rate": 4.889990166239664e-06, + "loss": 0.321, + "step": 8370 + }, + { + "epoch": 0.31, + "learning_rate": 4.881243977578285e-06, + "loss": 0.3156, + "step": 8400 + }, + { + "epoch": 0.31, + "learning_rate": 4.872544551705718e-06, + "loss": 0.2833, + "step": 8430 + }, + { + "epoch": 0.32, + "learning_rate": 4.8638914733948715e-06, + "loss": 0.2803, + "step": 8460 + }, + { + "epoch": 0.32, + "learning_rate": 4.85528433256216e-06, + "loss": 0.2551, + "step": 8490 + }, + { + "epoch": 0.32, + "learning_rate": 4.8467227241858775e-06, + "loss": 0.2435, + "step": 8520 + }, + { + "epoch": 0.32, + "learning_rate": 4.838206248226147e-06, + "loss": 0.2682, + "step": 8550 + }, + { + "epoch": 0.32, + "learning_rate": 4.829734509546414e-06, + "loss": 0.3036, + "step": 8580 + }, + { + "epoch": 0.32, + "learning_rate": 4.821307117836431e-06, + "loss": 0.2704, + "step": 8610 + }, + { + "epoch": 0.32, + "learning_rate": 4.8129236875367506e-06, + "loss": 0.299, + "step": 8640 + }, + { + "epoch": 0.32, + "learning_rate": 4.804583837764616e-06, + "loss": 0.2679, + "step": 8670 + }, + { + "epoch": 0.32, + "learning_rate": 4.796287192241285e-06, + "loss": 0.2749, + "step": 8700 + }, + { + "epoch": 0.33, + "learning_rate": 4.788033379220728e-06, + "loss": 0.2858, + "step": 8730 + }, + { + "epoch": 0.33, + "learning_rate": 4.779822031419661e-06, + "loss": 0.2544, + "step": 8760 + }, + { + "epoch": 0.33, + "learning_rate": 4.771652785948902e-06, + "loss": 0.3259, + "step": 8790 + }, + { + "epoch": 0.33, + "learning_rate": 4.763525284246008e-06, + "loss": 0.3, + "step": 8820 + }, + { + "epoch": 0.33, + "learning_rate": 4.755439172009178e-06, + "loss": 0.2682, + "step": 8850 + }, + { + "epoch": 0.33, + "learning_rate": 4.7473940991323755e-06, + "loss": 0.2911, + "step": 8880 + }, + { + "epoch": 0.33, + "learning_rate": 4.739389719641665e-06, + "loss": 0.2617, + "step": 8910 + }, + { + "epoch": 0.33, + "learning_rate": 4.731425691632717e-06, + "loss": 0.2739, + "step": 8940 + }, + { + "epoch": 0.33, + "learning_rate": 4.723501677209465e-06, + "loss": 0.259, + "step": 8970 + }, + { + "epoch": 0.34, + "learning_rate": 4.715617342423896e-06, + "loss": 0.3113, + "step": 9000 + }, + { + "epoch": 0.34, + "learning_rate": 4.707772357216934e-06, + "loss": 0.2924, + "step": 9030 + }, + { + "epoch": 0.34, + "learning_rate": 4.69996639536041e-06, + "loss": 0.2422, + "step": 9060 + }, + { + "epoch": 0.34, + "learning_rate": 4.692199134400076e-06, + "loss": 0.2919, + "step": 9090 + }, + { + "epoch": 0.34, + "learning_rate": 4.684470255599664e-06, + "loss": 0.2697, + "step": 9120 + }, + { + "epoch": 0.34, + "learning_rate": 4.676779443885949e-06, + "loss": 0.2681, + "step": 9150 + }, + { + "epoch": 0.34, + "learning_rate": 4.669126387794794e-06, + "loss": 0.2753, + "step": 9180 + }, + { + "epoch": 0.34, + "learning_rate": 4.6615107794181814e-06, + "loss": 0.2761, + "step": 9210 + }, + { + "epoch": 0.34, + "learning_rate": 4.653932314352172e-06, + "loss": 0.2615, + "step": 9240 + }, + { + "epoch": 0.35, + "learning_rate": 4.646390691645805e-06, + "loss": 0.2758, + "step": 9270 + }, + { + "epoch": 0.35, + "learning_rate": 4.638885613750897e-06, + "loss": 0.2362, + "step": 9300 + }, + { + "epoch": 0.35, + "learning_rate": 4.6314167864727375e-06, + "loss": 0.2778, + "step": 9330 + }, + { + "epoch": 0.35, + "learning_rate": 4.62398391892165e-06, + "loss": 0.2577, + "step": 9360 + }, + { + "epoch": 0.35, + "learning_rate": 4.616586723465408e-06, + "loss": 0.2716, + "step": 9390 + }, + { + "epoch": 0.35, + "learning_rate": 4.609224915682485e-06, + "loss": 0.2567, + "step": 9420 + }, + { + "epoch": 0.35, + "learning_rate": 4.601898214316119e-06, + "loss": 0.2956, + "step": 9450 + }, + { + "epoch": 0.35, + "learning_rate": 4.5946063412291834e-06, + "loss": 0.2766, + "step": 9480 + }, + { + "epoch": 0.35, + "learning_rate": 4.587349021359836e-06, + "loss": 0.2743, + "step": 9510 + }, + { + "epoch": 0.36, + "learning_rate": 4.580125982677943e-06, + "loss": 0.2914, + "step": 9540 + }, + { + "epoch": 0.36, + "learning_rate": 4.572936956142248e-06, + "loss": 0.2849, + "step": 9570 + }, + { + "epoch": 0.36, + "learning_rate": 4.565781675658282e-06, + "loss": 0.2763, + "step": 9600 + }, + { + "epoch": 0.36, + "learning_rate": 4.5586598780369954e-06, + "loss": 0.2888, + "step": 9630 + }, + { + "epoch": 0.36, + "learning_rate": 4.5515713029541005e-06, + "loss": 0.2379, + "step": 9660 + }, + { + "epoch": 0.36, + "learning_rate": 4.5445156929101e-06, + "loss": 0.2716, + "step": 9690 + }, + { + "epoch": 0.36, + "learning_rate": 4.537492793190998e-06, + "loss": 0.2842, + "step": 9720 + }, + { + "epoch": 0.36, + "learning_rate": 4.530502351829687e-06, + "loss": 0.2865, + "step": 9750 + }, + { + "epoch": 0.36, + "learning_rate": 4.5235441195679665e-06, + "loss": 0.2819, + "step": 9780 + }, + { + "epoch": 0.37, + "learning_rate": 4.51661784981922e-06, + "loss": 0.2526, + "step": 9810 + }, + { + "epoch": 0.37, + "learning_rate": 4.509723298631711e-06, + "loss": 0.274, + "step": 9840 + }, + { + "epoch": 0.37, + "learning_rate": 4.5028602246524934e-06, + "loss": 0.2711, + "step": 9870 + }, + { + "epoch": 0.37, + "learning_rate": 4.496028389091924e-06, + "loss": 0.2126, + "step": 9900 + }, + { + "epoch": 0.37, + "learning_rate": 4.489227555688767e-06, + "loss": 0.2575, + "step": 9930 + }, + { + "epoch": 0.37, + "learning_rate": 4.482457490675879e-06, + "loss": 0.2622, + "step": 9960 + }, + { + "epoch": 0.37, + "learning_rate": 4.475717962746456e-06, + "loss": 0.2528, + "step": 9990 + }, + { + "epoch": 0.37, + "learning_rate": 4.469008743020842e-06, + "loss": 0.2992, + "step": 10020 + }, + { + "epoch": 0.37, + "learning_rate": 4.462329605013882e-06, + "loss": 0.3006, + "step": 10050 + }, + { + "epoch": 0.38, + "learning_rate": 4.455680324602811e-06, + "loss": 0.2729, + "step": 10080 + }, + { + "epoch": 0.38, + "learning_rate": 4.4490606799956615e-06, + "loss": 0.2525, + "step": 10110 + }, + { + "epoch": 0.38, + "learning_rate": 4.442470451700195e-06, + "loss": 0.215, + "step": 10140 + }, + { + "epoch": 0.38, + "learning_rate": 4.435909422493333e-06, + "loss": 0.2413, + "step": 10170 + }, + { + "epoch": 0.38, + "learning_rate": 4.429377377391085e-06, + "loss": 0.2817, + "step": 10200 + }, + { + "epoch": 0.38, + "learning_rate": 4.42287410361896e-06, + "loss": 0.3017, + "step": 10230 + }, + { + "epoch": 0.38, + "learning_rate": 4.416399390582857e-06, + "loss": 0.2611, + "step": 10260 + }, + { + "epoch": 0.38, + "learning_rate": 4.409953029840417e-06, + "loss": 0.3096, + "step": 10290 + }, + { + "epoch": 0.38, + "learning_rate": 4.403534815072848e-06, + "loss": 0.2704, + "step": 10320 + }, + { + "epoch": 0.39, + "learning_rate": 4.397144542057179e-06, + "loss": 0.2775, + "step": 10350 + }, + { + "epoch": 0.39, + "learning_rate": 4.390782008638974e-06, + "loss": 0.3019, + "step": 10380 + }, + { + "epoch": 0.39, + "learning_rate": 4.3844470147054704e-06, + "loss": 0.2804, + "step": 10410 + }, + { + "epoch": 0.39, + "learning_rate": 4.378139362159141e-06, + "loss": 0.274, + "step": 10440 + }, + { + "epoch": 0.39, + "learning_rate": 4.371858854891681e-06, + "loss": 0.2692, + "step": 10470 + }, + { + "epoch": 0.39, + "learning_rate": 4.365605298758394e-06, + "loss": 0.2608, + "step": 10500 + }, + { + "epoch": 0.39, + "learning_rate": 4.359378501552986e-06, + "loss": 0.245, + "step": 10530 + }, + { + "epoch": 0.39, + "learning_rate": 4.353178272982758e-06, + "loss": 0.2776, + "step": 10560 + }, + { + "epoch": 0.39, + "learning_rate": 4.347004424644176e-06, + "loss": 0.2599, + "step": 10590 + }, + { + "epoch": 0.4, + "learning_rate": 4.340856769998823e-06, + "loss": 0.2604, + "step": 10620 + }, + { + "epoch": 0.4, + "learning_rate": 4.3347351243497375e-06, + "loss": 0.2437, + "step": 10650 + }, + { + "epoch": 0.4, + "learning_rate": 4.3286393048180905e-06, + "loss": 0.2808, + "step": 10680 + }, + { + "epoch": 0.4, + "learning_rate": 4.322569130320256e-06, + "loss": 0.2593, + "step": 10710 + }, + { + "epoch": 0.4, + "learning_rate": 4.316524421545204e-06, + "loss": 0.278, + "step": 10740 + }, + { + "epoch": 0.4, + "learning_rate": 4.310505000932264e-06, + "loss": 0.242, + "step": 10770 + }, + { + "epoch": 0.4, + "learning_rate": 4.304510692649209e-06, + "loss": 0.2666, + "step": 10800 + }, + { + "epoch": 0.4, + "learning_rate": 4.298541322570686e-06, + "loss": 0.2539, + "step": 10830 + }, + { + "epoch": 0.41, + "learning_rate": 4.2925967182569775e-06, + "loss": 0.2709, + "step": 10860 + }, + { + "epoch": 0.41, + "learning_rate": 4.286676708933067e-06, + "loss": 0.2264, + "step": 10890 + }, + { + "epoch": 0.41, + "learning_rate": 4.280781125468046e-06, + "loss": 0.2364, + "step": 10920 + }, + { + "epoch": 0.41, + "learning_rate": 4.274909800354809e-06, + "loss": 0.2534, + "step": 10950 + }, + { + "epoch": 0.41, + "learning_rate": 4.269062567690073e-06, + "loss": 0.248, + "step": 10980 + }, + { + "epoch": 0.41, + "learning_rate": 4.263239263154682e-06, + "loss": 0.2808, + "step": 11010 + }, + { + "epoch": 0.41, + "learning_rate": 4.257439723994211e-06, + "loss": 0.2815, + "step": 11040 + }, + { + "epoch": 0.41, + "learning_rate": 4.251663788999866e-06, + "loss": 0.2839, + "step": 11070 + }, + { + "epoch": 0.41, + "learning_rate": 4.245911298489661e-06, + "loss": 0.2275, + "step": 11100 + }, + { + "epoch": 0.42, + "learning_rate": 4.24018209428987e-06, + "loss": 0.2606, + "step": 11130 + }, + { + "epoch": 0.42, + "learning_rate": 4.234476019716772e-06, + "loss": 0.2492, + "step": 11160 + }, + { + "epoch": 0.42, + "learning_rate": 4.228792919558642e-06, + "loss": 0.244, + "step": 11190 + }, + { + "epoch": 0.42, + "learning_rate": 4.2233209499878915e-06, + "loss": 0.2441, + "step": 11220 + }, + { + "epoch": 0.42, + "learning_rate": 4.217682585651912e-06, + "loss": 0.2401, + "step": 11250 + }, + { + "epoch": 0.42, + "learning_rate": 4.212066743759049e-06, + "loss": 0.3018, + "step": 11280 + }, + { + "epoch": 0.42, + "learning_rate": 4.206473274764352e-06, + "loss": 0.2571, + "step": 11310 + }, + { + "epoch": 0.42, + "learning_rate": 4.200902030509323e-06, + "loss": 0.2204, + "step": 11340 + }, + { + "epoch": 0.42, + "learning_rate": 4.195352864205429e-06, + "loss": 0.2832, + "step": 11370 + }, + { + "epoch": 0.43, + "learning_rate": 4.18982563041786e-06, + "loss": 0.2632, + "step": 11400 + }, + { + "epoch": 0.43, + "learning_rate": 4.184320185049522e-06, + "loss": 0.2718, + "step": 11430 + }, + { + "epoch": 0.43, + "learning_rate": 4.178836385325257e-06, + "loss": 0.2863, + "step": 11460 + }, + { + "epoch": 0.43, + "learning_rate": 4.173374089776288e-06, + "loss": 0.221, + "step": 11490 + }, + { + "epoch": 0.43, + "learning_rate": 4.167933158224896e-06, + "loss": 0.2486, + "step": 11520 + }, + { + "epoch": 0.43, + "learning_rate": 4.1625134517693085e-06, + "loss": 0.2539, + "step": 11550 + }, + { + "epoch": 0.43, + "learning_rate": 4.157114832768805e-06, + "loss": 0.2283, + "step": 11580 + }, + { + "epoch": 0.43, + "learning_rate": 4.151737164829035e-06, + "loss": 0.2161, + "step": 11610 + }, + { + "epoch": 0.43, + "learning_rate": 4.146380312787547e-06, + "loss": 0.2378, + "step": 11640 + }, + { + "epoch": 0.44, + "learning_rate": 4.141044142699519e-06, + "loss": 0.2391, + "step": 11670 + }, + { + "epoch": 0.44, + "learning_rate": 4.135728521823688e-06, + "loss": 0.2288, + "step": 11700 + }, + { + "epoch": 0.44, + "learning_rate": 4.130433318608488e-06, + "loss": 0.2284, + "step": 11730 + }, + { + "epoch": 0.44, + "learning_rate": 4.125158402678369e-06, + "loss": 0.2769, + "step": 11760 + }, + { + "epoch": 0.44, + "learning_rate": 4.11990364482032e-06, + "loss": 0.2132, + "step": 11790 + }, + { + "epoch": 0.44, + "learning_rate": 4.114668916970569e-06, + "loss": 0.2419, + "step": 11820 + }, + { + "epoch": 0.44, + "learning_rate": 4.109454092201482e-06, + "loss": 0.2575, + "step": 11850 + }, + { + "epoch": 0.44, + "learning_rate": 4.104259044708631e-06, + "loss": 0.242, + "step": 11880 + }, + { + "epoch": 0.44, + "learning_rate": 4.099083649798046e-06, + "loss": 0.2375, + "step": 11910 + }, + { + "epoch": 0.45, + "learning_rate": 4.0939277838736505e-06, + "loss": 0.2461, + "step": 11940 + }, + { + "epoch": 0.45, + "learning_rate": 4.0887913244248565e-06, + "loss": 0.2147, + "step": 11970 + }, + { + "epoch": 0.45, + "learning_rate": 4.083674150014344e-06, + "loss": 0.2503, + "step": 12000 + }, + { + "epoch": 0.45, + "learning_rate": 4.078576140266e-06, + "loss": 0.2648, + "step": 12030 + }, + { + "epoch": 0.45, + "learning_rate": 4.073497175853025e-06, + "loss": 0.227, + "step": 12060 + }, + { + "epoch": 0.45, + "learning_rate": 4.068437138486202e-06, + "loss": 0.2633, + "step": 12090 + }, + { + "epoch": 0.45, + "learning_rate": 4.0633959109023236e-06, + "loss": 0.2051, + "step": 12120 + }, + { + "epoch": 0.45, + "learning_rate": 4.0583733768527826e-06, + "loss": 0.2756, + "step": 12150 + }, + { + "epoch": 0.45, + "learning_rate": 4.053369421092312e-06, + "loss": 0.258, + "step": 12180 + }, + { + "epoch": 0.46, + "learning_rate": 4.048383929367871e-06, + "loss": 0.2311, + "step": 12210 + }, + { + "epoch": 0.46, + "learning_rate": 4.043416788407701e-06, + "loss": 0.2362, + "step": 12240 + }, + { + "epoch": 0.46, + "learning_rate": 4.038467885910504e-06, + "loss": 0.2256, + "step": 12270 + }, + { + "epoch": 0.46, + "learning_rate": 4.03353711053479e-06, + "loss": 0.2869, + "step": 12300 + }, + { + "epoch": 0.46, + "learning_rate": 4.0286243518883425e-06, + "loss": 0.2358, + "step": 12330 + }, + { + "epoch": 0.46, + "learning_rate": 4.023729500517853e-06, + "loss": 0.2447, + "step": 12360 + }, + { + "epoch": 0.46, + "learning_rate": 4.018852447898667e-06, + "loss": 0.2623, + "step": 12390 + }, + { + "epoch": 0.46, + "learning_rate": 4.013993086424682e-06, + "loss": 0.2221, + "step": 12420 + }, + { + "epoch": 0.46, + "learning_rate": 4.009151309398378e-06, + "loss": 0.2294, + "step": 12450 + }, + { + "epoch": 0.47, + "learning_rate": 4.004327011020975e-06, + "loss": 0.2331, + "step": 12480 + }, + { + "epoch": 0.47, + "learning_rate": 3.999520086382724e-06, + "loss": 0.2695, + "step": 12510 + }, + { + "epoch": 0.47, + "learning_rate": 3.994730431453324e-06, + "loss": 0.2675, + "step": 12540 + }, + { + "epoch": 0.47, + "learning_rate": 3.989957943072466e-06, + "loss": 0.2398, + "step": 12570 + }, + { + "epoch": 0.47, + "learning_rate": 3.985202518940505e-06, + "loss": 0.2182, + "step": 12600 + }, + { + "epoch": 0.47, + "learning_rate": 3.980464057609243e-06, + "loss": 0.2384, + "step": 12630 + }, + { + "epoch": 0.47, + "learning_rate": 3.975742458472844e-06, + "loss": 0.2626, + "step": 12660 + }, + { + "epoch": 0.47, + "learning_rate": 3.971037621758861e-06, + "loss": 0.2213, + "step": 12690 + }, + { + "epoch": 0.47, + "learning_rate": 3.966349448519381e-06, + "loss": 0.2146, + "step": 12720 + }, + { + "epoch": 0.48, + "learning_rate": 3.961677840622287e-06, + "loss": 0.2705, + "step": 12750 + }, + { + "epoch": 0.48, + "learning_rate": 3.957022700742623e-06, + "loss": 0.2623, + "step": 12780 + }, + { + "epoch": 0.48, + "learning_rate": 3.952383932354089e-06, + "loss": 0.2269, + "step": 12810 + }, + { + "epoch": 0.48, + "learning_rate": 3.947761439720625e-06, + "loss": 0.2925, + "step": 12840 + }, + { + "epoch": 0.48, + "learning_rate": 3.94315512788812e-06, + "loss": 0.1932, + "step": 12870 + }, + { + "epoch": 0.48, + "learning_rate": 3.9385649026762125e-06, + "loss": 0.2326, + "step": 12900 + }, + { + "epoch": 0.48, + "learning_rate": 3.9339906706702065e-06, + "loss": 0.205, + "step": 12930 + }, + { + "epoch": 0.48, + "learning_rate": 3.929432339213089e-06, + "loss": 0.2248, + "step": 12960 + }, + { + "epoch": 0.48, + "learning_rate": 3.92488981639764e-06, + "loss": 0.2375, + "step": 12990 + }, + { + "epoch": 0.49, + "learning_rate": 3.920363011058658e-06, + "loss": 0.2357, + "step": 13020 + }, + { + "epoch": 0.49, + "learning_rate": 3.91585183276527e-06, + "loss": 0.2548, + "step": 13050 + }, + { + "epoch": 0.49, + "learning_rate": 3.911356191813345e-06, + "loss": 0.2319, + "step": 13080 + }, + { + "epoch": 0.49, + "learning_rate": 3.906875999218003e-06, + "loss": 0.2322, + "step": 13110 + }, + { + "epoch": 0.49, + "learning_rate": 3.902411166706214e-06, + "loss": 0.2329, + "step": 13140 + }, + { + "epoch": 0.49, + "learning_rate": 3.897961606709499e-06, + "loss": 0.2551, + "step": 13170 + }, + { + "epoch": 0.49, + "learning_rate": 3.893527232356712e-06, + "loss": 0.2108, + "step": 13200 + }, + { + "epoch": 0.49, + "learning_rate": 3.8891079574669165e-06, + "loss": 0.2038, + "step": 13230 + }, + { + "epoch": 0.49, + "learning_rate": 3.884703696542354e-06, + "loss": 0.2323, + "step": 13260 + }, + { + "epoch": 0.5, + "learning_rate": 3.8803143647614936e-06, + "loss": 0.2142, + "step": 13290 + }, + { + "epoch": 0.5, + "learning_rate": 3.87593987797217e-06, + "loss": 0.2314, + "step": 13320 + }, + { + "epoch": 0.5, + "learning_rate": 3.871580152684815e-06, + "loss": 0.2061, + "step": 13350 + }, + { + "epoch": 0.5, + "learning_rate": 3.867235106065752e-06, + "loss": 0.2245, + "step": 13380 + }, + { + "epoch": 0.5, + "learning_rate": 3.862904655930595e-06, + "loss": 0.2586, + "step": 13410 + }, + { + "epoch": 0.5, + "learning_rate": 3.858588720737721e-06, + "loss": 0.2408, + "step": 13440 + }, + { + "epoch": 0.5, + "learning_rate": 3.854287219581815e-06, + "loss": 0.2, + "step": 13470 + }, + { + "epoch": 0.5, + "learning_rate": 3.850000072187502e-06, + "loss": 0.225, + "step": 13500 + }, + { + "epoch": 0.5, + "learning_rate": 3.8457271989030575e-06, + "loss": 0.2431, + "step": 13530 + }, + { + "epoch": 0.51, + "learning_rate": 3.841468520694188e-06, + "loss": 0.2341, + "step": 13560 + }, + { + "epoch": 0.51, + "learning_rate": 3.837223959137896e-06, + "loss": 0.2108, + "step": 13590 + }, + { + "epoch": 0.51, + "learning_rate": 3.832993436416406e-06, + "loss": 0.206, + "step": 13620 + }, + { + "epoch": 0.51, + "learning_rate": 3.828776875311186e-06, + "loss": 0.2474, + "step": 13650 + }, + { + "epoch": 0.51, + "learning_rate": 3.824574199197011e-06, + "loss": 0.2402, + "step": 13680 + }, + { + "epoch": 0.51, + "learning_rate": 3.820524739262436e-06, + "loss": 0.2061, + "step": 13710 + }, + { + "epoch": 0.51, + "learning_rate": 3.8163491490251235e-06, + "loss": 0.2226, + "step": 13740 + }, + { + "epoch": 0.51, + "learning_rate": 3.8121872198917364e-06, + "loss": 0.1776, + "step": 13770 + }, + { + "epoch": 0.51, + "learning_rate": 3.8080388775336093e-06, + "loss": 0.2507, + "step": 13800 + }, + { + "epoch": 0.52, + "learning_rate": 3.8039040481870244e-06, + "loss": 0.1796, + "step": 13830 + }, + { + "epoch": 0.52, + "learning_rate": 3.7997826586477104e-06, + "loss": 0.2571, + "step": 13860 + }, + { + "epoch": 0.52, + "learning_rate": 3.795674636265392e-06, + "loss": 0.2295, + "step": 13890 + }, + { + "epoch": 0.52, + "learning_rate": 3.7915799089384153e-06, + "loss": 0.2282, + "step": 13920 + }, + { + "epoch": 0.52, + "learning_rate": 3.7874984051084297e-06, + "loss": 0.2232, + "step": 13950 + }, + { + "epoch": 0.52, + "learning_rate": 3.7834300537551376e-06, + "loss": 0.2261, + "step": 13980 + }, + { + "epoch": 0.52, + "learning_rate": 3.7793747843911037e-06, + "loss": 0.2474, + "step": 14010 + }, + { + "epoch": 0.52, + "learning_rate": 3.775332527056626e-06, + "loss": 0.2066, + "step": 14040 + }, + { + "epoch": 0.52, + "learning_rate": 3.7713032123146647e-06, + "loss": 0.2008, + "step": 14070 + }, + { + "epoch": 0.53, + "learning_rate": 3.7672867712458343e-06, + "loss": 0.2688, + "step": 14100 + }, + { + "epoch": 0.53, + "learning_rate": 3.763283135443452e-06, + "loss": 0.2385, + "step": 14130 + }, + { + "epoch": 0.53, + "learning_rate": 3.7592922370086455e-06, + "loss": 0.24, + "step": 14160 + }, + { + "epoch": 0.53, + "learning_rate": 3.7553140085455136e-06, + "loss": 0.1846, + "step": 14190 + }, + { + "epoch": 0.53, + "learning_rate": 3.7513483831563473e-06, + "loss": 0.2544, + "step": 14220 + }, + { + "epoch": 0.53, + "learning_rate": 3.7473952944369068e-06, + "loss": 0.1966, + "step": 14250 + }, + { + "epoch": 0.53, + "learning_rate": 3.7434546764717492e-06, + "loss": 0.2499, + "step": 14280 + }, + { + "epoch": 0.53, + "learning_rate": 3.7395264638296123e-06, + "loss": 0.2198, + "step": 14310 + }, + { + "epoch": 0.53, + "learning_rate": 3.735610591558852e-06, + "loss": 0.227, + "step": 14340 + }, + { + "epoch": 0.54, + "learning_rate": 3.7318369179611134e-06, + "loss": 0.2223, + "step": 14370 + }, + { + "epoch": 0.54, + "learning_rate": 3.7279451274374513e-06, + "loss": 0.2398, + "step": 14400 + }, + { + "epoch": 0.54, + "learning_rate": 3.7240654873736155e-06, + "loss": 0.2662, + "step": 14430 + }, + { + "epoch": 0.54, + "learning_rate": 3.720197934676515e-06, + "loss": 0.2502, + "step": 14460 + }, + { + "epoch": 0.54, + "learning_rate": 3.7163424067107786e-06, + "loss": 0.2303, + "step": 14490 + }, + { + "epoch": 0.54, + "learning_rate": 3.712498841294488e-06, + "loss": 0.2081, + "step": 14520 + }, + { + "epoch": 0.54, + "learning_rate": 3.7086671766949745e-06, + "loss": 0.2286, + "step": 14550 + }, + { + "epoch": 0.54, + "learning_rate": 3.7048473516246474e-06, + "loss": 0.2246, + "step": 14580 + }, + { + "epoch": 0.54, + "learning_rate": 3.7010393052368808e-06, + "loss": 0.2142, + "step": 14610 + }, + { + "epoch": 0.55, + "learning_rate": 3.697242977121942e-06, + "loss": 0.2154, + "step": 14640 + }, + { + "epoch": 0.55, + "learning_rate": 3.693458307302965e-06, + "loss": 0.2299, + "step": 14670 + }, + { + "epoch": 0.55, + "learning_rate": 3.6896852362319716e-06, + "loss": 0.1961, + "step": 14700 + }, + { + "epoch": 0.55, + "learning_rate": 3.685923704785937e-06, + "loss": 0.2212, + "step": 14730 + }, + { + "epoch": 0.55, + "learning_rate": 3.6821736542628983e-06, + "loss": 0.2012, + "step": 14760 + }, + { + "epoch": 0.55, + "learning_rate": 3.6784350263781066e-06, + "loss": 0.1914, + "step": 14790 + }, + { + "epoch": 0.55, + "learning_rate": 3.674707763260221e-06, + "loss": 0.2305, + "step": 14820 + }, + { + "epoch": 0.55, + "learning_rate": 3.670991807447548e-06, + "loss": 0.2325, + "step": 14850 + }, + { + "epoch": 0.55, + "learning_rate": 3.6672871018843207e-06, + "loss": 0.222, + "step": 14880 + }, + { + "epoch": 0.56, + "learning_rate": 3.663593589917019e-06, + "loss": 0.2182, + "step": 14910 + }, + { + "epoch": 0.56, + "learning_rate": 3.65991121529073e-06, + "loss": 0.256, + "step": 14940 + }, + { + "epoch": 0.56, + "learning_rate": 3.656239922145549e-06, + "loss": 0.1909, + "step": 14970 + }, + { + "epoch": 0.56, + "learning_rate": 3.652579655013025e-06, + "loss": 0.1799, + "step": 15000 + }, + { + "epoch": 0.56, + "learning_rate": 3.6489303588126325e-06, + "loss": 0.2382, + "step": 15030 + }, + { + "epoch": 0.56, + "learning_rate": 3.645291978848299e-06, + "loss": 0.2181, + "step": 15060 + }, + { + "epoch": 0.56, + "learning_rate": 3.641664460804956e-06, + "loss": 0.2192, + "step": 15090 + }, + { + "epoch": 0.56, + "learning_rate": 3.6380477507451357e-06, + "loss": 0.2245, + "step": 15120 + }, + { + "epoch": 0.57, + "learning_rate": 3.6344417951056025e-06, + "loss": 0.2237, + "step": 15150 + }, + { + "epoch": 0.57, + "learning_rate": 3.6308465406940214e-06, + "loss": 0.2335, + "step": 15180 + }, + { + "epoch": 0.57, + "learning_rate": 3.62726193468566e-06, + "loss": 0.2129, + "step": 15210 + }, + { + "epoch": 0.57, + "learning_rate": 3.623687924620131e-06, + "loss": 0.1975, + "step": 15240 + }, + { + "epoch": 0.57, + "learning_rate": 3.6201244583981697e-06, + "loss": 0.2239, + "step": 15270 + }, + { + "epoch": 0.57, + "learning_rate": 3.6165714842784393e-06, + "loss": 0.2059, + "step": 15300 + }, + { + "epoch": 0.57, + "learning_rate": 3.6130289508743783e-06, + "loss": 0.22, + "step": 15330 + }, + { + "epoch": 0.57, + "learning_rate": 3.609496807151077e-06, + "loss": 0.2146, + "step": 15360 + }, + { + "epoch": 0.57, + "learning_rate": 3.6059750024221956e-06, + "loss": 0.2378, + "step": 15390 + }, + { + "epoch": 0.58, + "learning_rate": 3.6024634863468995e-06, + "loss": 0.2177, + "step": 15420 + }, + { + "epoch": 0.58, + "learning_rate": 3.598962208926845e-06, + "loss": 0.2528, + "step": 15450 + }, + { + "epoch": 0.58, + "learning_rate": 3.5954711205031867e-06, + "loss": 0.2179, + "step": 15480 + }, + { + "epoch": 0.58, + "learning_rate": 3.5919901717536176e-06, + "loss": 0.225, + "step": 15510 + }, + { + "epoch": 0.58, + "learning_rate": 3.588519313689446e-06, + "loss": 0.2327, + "step": 15540 + }, + { + "epoch": 0.58, + "learning_rate": 3.585058497652696e-06, + "loss": 0.2267, + "step": 15570 + }, + { + "epoch": 0.58, + "learning_rate": 3.5816076753132444e-06, + "loss": 0.1847, + "step": 15600 + }, + { + "epoch": 0.58, + "learning_rate": 3.5781667986659885e-06, + "loss": 0.218, + "step": 15630 + }, + { + "epoch": 0.58, + "learning_rate": 3.574735820028035e-06, + "loss": 0.2386, + "step": 15660 + }, + { + "epoch": 0.59, + "learning_rate": 3.57131469203593e-06, + "loss": 0.2043, + "step": 15690 + }, + { + "epoch": 0.59, + "learning_rate": 3.5679033676429114e-06, + "loss": 0.2702, + "step": 15720 + }, + { + "epoch": 0.59, + "learning_rate": 3.5645018001161923e-06, + "loss": 0.2302, + "step": 15750 + }, + { + "epoch": 0.59, + "learning_rate": 3.5611099430342683e-06, + "loss": 0.2406, + "step": 15780 + }, + { + "epoch": 0.59, + "learning_rate": 3.5577277502842616e-06, + "loss": 0.2107, + "step": 15810 + }, + { + "epoch": 0.59, + "learning_rate": 3.5543551760592854e-06, + "loss": 0.2309, + "step": 15840 + }, + { + "epoch": 0.59, + "learning_rate": 3.5509921748558384e-06, + "loss": 0.2256, + "step": 15870 + }, + { + "epoch": 0.59, + "learning_rate": 3.547638701471227e-06, + "loss": 0.2328, + "step": 15900 + }, + { + "epoch": 0.59, + "learning_rate": 3.5442947110010123e-06, + "loss": 0.2417, + "step": 15930 + }, + { + "epoch": 0.6, + "learning_rate": 3.5409601588364854e-06, + "loss": 0.197, + "step": 15960 + }, + { + "epoch": 0.6, + "learning_rate": 3.5376350006621686e-06, + "loss": 0.2551, + "step": 15990 + }, + { + "epoch": 0.6, + "learning_rate": 3.53431919245334e-06, + "loss": 0.1884, + "step": 16020 + }, + { + "epoch": 0.6, + "learning_rate": 3.531012690473587e-06, + "loss": 0.2096, + "step": 16050 + }, + { + "epoch": 0.6, + "learning_rate": 3.5277154512723837e-06, + "loss": 0.1941, + "step": 16080 + }, + { + "epoch": 0.6, + "learning_rate": 3.5244274316826905e-06, + "loss": 0.2387, + "step": 16110 + }, + { + "epoch": 0.6, + "learning_rate": 3.5211485888185817e-06, + "loss": 0.1942, + "step": 16140 + }, + { + "epoch": 0.6, + "learning_rate": 3.5178788800728976e-06, + "loss": 0.2117, + "step": 16170 + }, + { + "epoch": 0.6, + "learning_rate": 3.5146182631149166e-06, + "loss": 0.2085, + "step": 16200 + }, + { + "epoch": 0.61, + "learning_rate": 3.511366695888056e-06, + "loss": 0.2429, + "step": 16230 + }, + { + "epoch": 0.61, + "learning_rate": 3.50812413660759e-06, + "loss": 0.2289, + "step": 16260 + }, + { + "epoch": 0.61, + "learning_rate": 3.504890543758399e-06, + "loss": 0.2241, + "step": 16290 + }, + { + "epoch": 0.61, + "learning_rate": 3.5016658760927338e-06, + "loss": 0.195, + "step": 16320 + }, + { + "epoch": 0.61, + "learning_rate": 3.4984500926280053e-06, + "loss": 0.2107, + "step": 16350 + }, + { + "epoch": 0.61, + "learning_rate": 3.4952431526446017e-06, + "loss": 0.1986, + "step": 16380 + }, + { + "epoch": 0.61, + "learning_rate": 3.492045015683717e-06, + "loss": 0.2294, + "step": 16410 + }, + { + "epoch": 0.61, + "learning_rate": 3.4888556415452106e-06, + "loss": 0.2219, + "step": 16440 + }, + { + "epoch": 0.61, + "learning_rate": 3.485674990285484e-06, + "loss": 0.2011, + "step": 16470 + }, + { + "epoch": 0.62, + "learning_rate": 3.48250302221538e-06, + "loss": 0.2119, + "step": 16500 + }, + { + "epoch": 0.62, + "learning_rate": 3.4793396978981016e-06, + "loss": 0.2136, + "step": 16530 + }, + { + "epoch": 0.62, + "learning_rate": 3.4761849781471552e-06, + "loss": 0.1887, + "step": 16560 + }, + { + "epoch": 0.62, + "learning_rate": 3.4730388240243084e-06, + "loss": 0.2112, + "step": 16590 + }, + { + "epoch": 0.62, + "learning_rate": 3.469901196837573e-06, + "loss": 0.1988, + "step": 16620 + }, + { + "epoch": 0.62, + "learning_rate": 3.4667720581392095e-06, + "loss": 0.1944, + "step": 16650 + }, + { + "epoch": 0.62, + "learning_rate": 3.4636513697237406e-06, + "loss": 0.208, + "step": 16680 + }, + { + "epoch": 0.62, + "learning_rate": 3.460539093626001e-06, + "loss": 0.1934, + "step": 16710 + }, + { + "epoch": 0.62, + "learning_rate": 3.45743519211919e-06, + "loss": 0.175, + "step": 16740 + }, + { + "epoch": 0.63, + "learning_rate": 3.4543396277129578e-06, + "loss": 0.2059, + "step": 16770 + }, + { + "epoch": 0.63, + "learning_rate": 3.451252363151495e-06, + "loss": 0.2617, + "step": 16800 + }, + { + "epoch": 0.63, + "learning_rate": 3.4481733614116573e-06, + "loss": 0.1825, + "step": 16830 + }, + { + "epoch": 0.63, + "learning_rate": 3.4451025857010983e-06, + "loss": 0.2182, + "step": 16860 + }, + { + "epoch": 0.63, + "learning_rate": 3.4420399994564192e-06, + "loss": 0.1792, + "step": 16890 + }, + { + "epoch": 0.63, + "learning_rate": 3.438985566341346e-06, + "loss": 0.2609, + "step": 16920 + }, + { + "epoch": 0.63, + "learning_rate": 3.4359392502449156e-06, + "loss": 0.2008, + "step": 16950 + }, + { + "epoch": 0.63, + "learning_rate": 3.4329010152796816e-06, + "loss": 0.1738, + "step": 16980 + }, + { + "epoch": 0.63, + "learning_rate": 3.429870825779942e-06, + "loss": 0.2357, + "step": 17010 + }, + { + "epoch": 0.64, + "learning_rate": 3.426848646299979e-06, + "loss": 0.2309, + "step": 17040 + }, + { + "epoch": 0.64, + "learning_rate": 3.423834441612316e-06, + "loss": 0.2102, + "step": 17070 + }, + { + "epoch": 0.64, + "learning_rate": 3.4208281767059957e-06, + "loss": 0.1933, + "step": 17100 + }, + { + "epoch": 0.64, + "learning_rate": 3.4178298167848716e-06, + "loss": 0.1512, + "step": 17130 + }, + { + "epoch": 0.64, + "learning_rate": 3.414839327265915e-06, + "loss": 0.1976, + "step": 17160 + }, + { + "epoch": 0.64, + "learning_rate": 3.4118566737775383e-06, + "loss": 0.2193, + "step": 17190 + }, + { + "epoch": 0.64, + "learning_rate": 3.4088818221579424e-06, + "loss": 0.2124, + "step": 17220 + }, + { + "epoch": 0.64, + "learning_rate": 3.4059147384534635e-06, + "loss": 0.2116, + "step": 17250 + }, + { + "epoch": 0.64, + "learning_rate": 3.4029553889169558e-06, + "loss": 0.1893, + "step": 17280 + }, + { + "epoch": 0.65, + "learning_rate": 3.4000037400061716e-06, + "loss": 0.2162, + "step": 17310 + }, + { + "epoch": 0.65, + "learning_rate": 3.3970597583821664e-06, + "loss": 0.2022, + "step": 17340 + }, + { + "epoch": 0.65, + "learning_rate": 3.394123410907721e-06, + "loss": 0.2171, + "step": 17370 + }, + { + "epoch": 0.65, + "learning_rate": 3.3911946646457707e-06, + "loss": 0.2321, + "step": 17400 + }, + { + "epoch": 0.65, + "learning_rate": 3.3882734868578534e-06, + "loss": 0.2312, + "step": 17430 + }, + { + "epoch": 0.65, + "learning_rate": 3.3853598450025764e-06, + "loss": 0.1934, + "step": 17460 + }, + { + "epoch": 0.65, + "learning_rate": 3.3824537067340902e-06, + "loss": 0.177, + "step": 17490 + }, + { + "epoch": 0.65, + "learning_rate": 3.379555039900584e-06, + "loss": 0.193, + "step": 17520 + }, + { + "epoch": 0.65, + "learning_rate": 3.3766638125427857e-06, + "loss": 0.2091, + "step": 17550 + }, + { + "epoch": 0.66, + "learning_rate": 3.373779992892488e-06, + "loss": 0.2174, + "step": 17580 + }, + { + "epoch": 0.66, + "learning_rate": 3.3709035493710807e-06, + "loss": 0.2565, + "step": 17610 + }, + { + "epoch": 0.66, + "learning_rate": 3.368034450588095e-06, + "loss": 0.2153, + "step": 17640 + }, + { + "epoch": 0.66, + "learning_rate": 3.3651726653397697e-06, + "loss": 0.2146, + "step": 17670 + }, + { + "epoch": 0.66, + "learning_rate": 3.3623181626076225e-06, + "loss": 0.1969, + "step": 17700 + }, + { + "epoch": 0.66, + "learning_rate": 3.359470911557039e-06, + "loss": 0.2297, + "step": 17730 + }, + { + "epoch": 0.66, + "learning_rate": 3.3566308815358723e-06, + "loss": 0.2054, + "step": 17760 + }, + { + "epoch": 0.66, + "learning_rate": 3.3537980420730624e-06, + "loss": 0.2202, + "step": 17790 + }, + { + "epoch": 0.66, + "learning_rate": 3.3509723628772543e-06, + "loss": 0.2301, + "step": 17820 + }, + { + "epoch": 0.67, + "learning_rate": 3.348153813835445e-06, + "loss": 0.2316, + "step": 17850 + }, + { + "epoch": 0.67, + "learning_rate": 3.3453423650116334e-06, + "loss": 0.2068, + "step": 17880 + }, + { + "epoch": 0.67, + "learning_rate": 3.342537986645483e-06, + "loss": 0.2173, + "step": 17910 + }, + { + "epoch": 0.67, + "learning_rate": 3.3397406491510005e-06, + "loss": 0.1801, + "step": 17940 + }, + { + "epoch": 0.67, + "learning_rate": 3.336950323115227e-06, + "loss": 0.2013, + "step": 17970 + }, + { + "epoch": 0.67, + "learning_rate": 3.3341669792969324e-06, + "loss": 0.2041, + "step": 18000 + }, + { + "epoch": 0.67, + "learning_rate": 3.3313905886253366e-06, + "loss": 0.178, + "step": 18030 + }, + { + "epoch": 0.67, + "learning_rate": 3.3286211221988295e-06, + "loss": 0.1897, + "step": 18060 + }, + { + "epoch": 0.67, + "learning_rate": 3.325858551283706e-06, + "loss": 0.2258, + "step": 18090 + }, + { + "epoch": 0.68, + "learning_rate": 3.3231028473129184e-06, + "loss": 0.191, + "step": 18120 + }, + { + "epoch": 0.68, + "learning_rate": 3.3203539818848333e-06, + "loss": 0.1796, + "step": 18150 + }, + { + "epoch": 0.68, + "learning_rate": 3.3176119267620007e-06, + "loss": 0.2318, + "step": 18180 + }, + { + "epoch": 0.68, + "learning_rate": 3.3148766538699385e-06, + "loss": 0.1636, + "step": 18210 + }, + { + "epoch": 0.68, + "learning_rate": 3.3121481352959235e-06, + "loss": 0.1984, + "step": 18240 + }, + { + "epoch": 0.68, + "learning_rate": 3.3094263432877955e-06, + "loss": 0.1771, + "step": 18270 + }, + { + "epoch": 0.68, + "learning_rate": 3.306711250252772e-06, + "loss": 0.1933, + "step": 18300 + }, + { + "epoch": 0.68, + "learning_rate": 3.304002828756273e-06, + "loss": 0.1874, + "step": 18330 + }, + { + "epoch": 0.68, + "learning_rate": 3.3013010515207573e-06, + "loss": 0.2102, + "step": 18360 + }, + { + "epoch": 0.69, + "learning_rate": 3.298605891424567e-06, + "loss": 0.1955, + "step": 18390 + }, + { + "epoch": 0.69, + "learning_rate": 3.2959173215007867e-06, + "loss": 0.1924, + "step": 18420 + }, + { + "epoch": 0.69, + "learning_rate": 3.293235314936108e-06, + "loss": 0.1786, + "step": 18450 + }, + { + "epoch": 0.69, + "learning_rate": 3.2905598450697064e-06, + "loss": 0.2073, + "step": 18480 + }, + { + "epoch": 0.69, + "learning_rate": 3.2878908853921286e-06, + "loss": 0.1909, + "step": 18510 + }, + { + "epoch": 0.69, + "learning_rate": 3.2852284095441923e-06, + "loss": 0.1895, + "step": 18540 + }, + { + "epoch": 0.69, + "learning_rate": 3.282572391315887e-06, + "loss": 0.1972, + "step": 18570 + }, + { + "epoch": 0.69, + "learning_rate": 3.279922804645294e-06, + "loss": 0.1673, + "step": 18600 + }, + { + "epoch": 0.69, + "learning_rate": 3.2772796236175144e-06, + "loss": 0.225, + "step": 18630 + }, + { + "epoch": 0.7, + "learning_rate": 3.274642822463597e-06, + "loss": 0.2317, + "step": 18660 + }, + { + "epoch": 0.7, + "learning_rate": 3.2720999550184424e-06, + "loss": 0.2248, + "step": 18690 + }, + { + "epoch": 0.7, + "learning_rate": 3.269475626333966e-06, + "loss": 0.2053, + "step": 18720 + }, + { + "epoch": 0.7, + "learning_rate": 3.266857601924006e-06, + "loss": 0.2262, + "step": 18750 + }, + { + "epoch": 0.7, + "learning_rate": 3.2642458565882563e-06, + "loss": 0.2002, + "step": 18780 + }, + { + "epoch": 0.7, + "learning_rate": 3.2616403652672114e-06, + "loss": 0.1788, + "step": 18810 + }, + { + "epoch": 0.7, + "learning_rate": 3.2590411030411596e-06, + "loss": 0.1958, + "step": 18840 + }, + { + "epoch": 0.7, + "learning_rate": 3.2564480451291814e-06, + "loss": 0.1836, + "step": 18870 + }, + { + "epoch": 0.7, + "learning_rate": 3.253861166888155e-06, + "loss": 0.2255, + "step": 18900 + }, + { + "epoch": 0.71, + "learning_rate": 3.2512804438117756e-06, + "loss": 0.1828, + "step": 18930 + }, + { + "epoch": 0.71, + "learning_rate": 3.24870585152958e-06, + "loss": 0.1978, + "step": 18960 + }, + { + "epoch": 0.71, + "learning_rate": 3.2461373658059773e-06, + "loss": 0.1853, + "step": 18990 + }, + { + "epoch": 0.71, + "learning_rate": 3.243574962539294e-06, + "loss": 0.1664, + "step": 19020 + }, + { + "epoch": 0.71, + "learning_rate": 3.241018617760822e-06, + "loss": 0.1852, + "step": 19050 + }, + { + "epoch": 0.71, + "learning_rate": 3.2384683076338774e-06, + "loss": 0.193, + "step": 19080 + }, + { + "epoch": 0.71, + "learning_rate": 3.235924008452868e-06, + "loss": 0.2052, + "step": 19110 + }, + { + "epoch": 0.71, + "learning_rate": 3.233385696642366e-06, + "loss": 0.1822, + "step": 19140 + }, + { + "epoch": 0.71, + "learning_rate": 3.230853348756193e-06, + "loss": 0.2126, + "step": 19170 + }, + { + "epoch": 0.72, + "learning_rate": 3.2283269414765067e-06, + "loss": 0.1973, + "step": 19200 + }, + { + "epoch": 0.72, + "learning_rate": 3.225806451612903e-06, + "loss": 0.2046, + "step": 19230 + }, + { + "epoch": 0.72, + "learning_rate": 3.2232918561015214e-06, + "loss": 0.2206, + "step": 19260 + }, + { + "epoch": 0.72, + "learning_rate": 3.2207831320041542e-06, + "loss": 0.1786, + "step": 19290 + }, + { + "epoch": 0.72, + "learning_rate": 3.2182802565073746e-06, + "loss": 0.1932, + "step": 19320 + }, + { + "epoch": 0.72, + "learning_rate": 3.215783206921658e-06, + "loss": 0.1883, + "step": 19350 + }, + { + "epoch": 0.72, + "learning_rate": 3.213291960680524e-06, + "loss": 0.2078, + "step": 19380 + }, + { + "epoch": 0.72, + "learning_rate": 3.210806495339678e-06, + "loss": 0.1932, + "step": 19410 + }, + { + "epoch": 0.73, + "learning_rate": 3.208326788576161e-06, + "loss": 0.1897, + "step": 19440 + }, + { + "epoch": 0.73, + "learning_rate": 3.205852818187507e-06, + "loss": 0.1865, + "step": 19470 + }, + { + "epoch": 0.73, + "learning_rate": 3.2033845620909114e-06, + "loss": 0.1811, + "step": 19500 + }, + { + "epoch": 0.73, + "learning_rate": 3.2009219983224e-06, + "loss": 0.1725, + "step": 19530 + }, + { + "epoch": 0.73, + "learning_rate": 3.1984651050360064e-06, + "loss": 0.2187, + "step": 19560 + }, + { + "epoch": 0.73, + "learning_rate": 3.1960138605029663e-06, + "loss": 0.2321, + "step": 19590 + }, + { + "epoch": 0.73, + "learning_rate": 3.1935682431108977e-06, + "loss": 0.2007, + "step": 19620 + }, + { + "epoch": 0.73, + "learning_rate": 3.1911282313630144e-06, + "loss": 0.1824, + "step": 19650 + }, + { + "epoch": 0.73, + "learning_rate": 3.18869380387732e-06, + "loss": 0.2498, + "step": 19680 + }, + { + "epoch": 0.74, + "learning_rate": 3.1862649393858314e-06, + "loss": 0.2345, + "step": 19710 + }, + { + "epoch": 0.74, + "learning_rate": 3.183841616733788e-06, + "loss": 0.202, + "step": 19740 + }, + { + "epoch": 0.74, + "learning_rate": 3.181423814878889e-06, + "loss": 0.2071, + "step": 19770 + }, + { + "epoch": 0.74, + "learning_rate": 3.1790115128905165e-06, + "loss": 0.1641, + "step": 19800 + }, + { + "epoch": 0.74, + "learning_rate": 3.17660468994898e-06, + "loss": 0.2057, + "step": 19830 + }, + { + "epoch": 0.74, + "learning_rate": 3.1742033253447586e-06, + "loss": 0.1941, + "step": 19860 + }, + { + "epoch": 0.74, + "learning_rate": 3.1718073984777564e-06, + "loss": 0.2097, + "step": 19890 + }, + { + "epoch": 0.74, + "learning_rate": 3.169416888856555e-06, + "loss": 0.1986, + "step": 19920 + }, + { + "epoch": 0.74, + "learning_rate": 3.167031776097682e-06, + "loss": 0.1843, + "step": 19950 + }, + { + "epoch": 0.75, + "learning_rate": 3.1646520399248797e-06, + "loss": 0.2038, + "step": 19980 + }, + { + "epoch": 0.75, + "learning_rate": 3.1622776601683796e-06, + "loss": 0.1745, + "step": 20010 + }, + { + "epoch": 0.75, + "learning_rate": 3.159908616764186e-06, + "loss": 0.199, + "step": 20040 + }, + { + "epoch": 0.75, + "learning_rate": 3.1575448897533633e-06, + "loss": 0.1845, + "step": 20070 + }, + { + "epoch": 0.75, + "learning_rate": 3.15518645928133e-06, + "loss": 0.206, + "step": 20100 + }, + { + "epoch": 0.75, + "learning_rate": 3.1528333055971584e-06, + "loss": 0.2374, + "step": 20130 + }, + { + "epoch": 0.75, + "learning_rate": 3.1504854090528795e-06, + "loss": 0.2106, + "step": 20160 + }, + { + "epoch": 0.75, + "learning_rate": 3.1481427501027932e-06, + "loss": 0.1568, + "step": 20190 + }, + { + "epoch": 0.75, + "learning_rate": 3.1458053093027873e-06, + "loss": 0.1677, + "step": 20220 + }, + { + "epoch": 0.76, + "learning_rate": 3.1434730673096573e-06, + "loss": 0.187, + "step": 20250 + }, + { + "epoch": 0.76, + "learning_rate": 3.141146004880436e-06, + "loss": 0.1736, + "step": 20280 + }, + { + "epoch": 0.76, + "learning_rate": 3.1388241028717226e-06, + "loss": 0.2169, + "step": 20310 + }, + { + "epoch": 0.76, + "learning_rate": 3.136507342239028e-06, + "loss": 0.1903, + "step": 20340 + }, + { + "epoch": 0.76, + "learning_rate": 3.1341957040361138e-06, + "loss": 0.1915, + "step": 20370 + }, + { + "epoch": 0.76, + "learning_rate": 3.131889169414341e-06, + "loss": 0.1775, + "step": 20400 + }, + { + "epoch": 0.76, + "learning_rate": 3.1295877196220293e-06, + "loss": 0.2063, + "step": 20430 + }, + { + "epoch": 0.76, + "learning_rate": 3.127291336003811e-06, + "loss": 0.1973, + "step": 20460 + }, + { + "epoch": 0.76, + "learning_rate": 3.125e-06, + "loss": 0.1864, + "step": 20490 + }, + { + "epoch": 0.77, + "learning_rate": 3.1227136931459613e-06, + "loss": 0.1893, + "step": 20520 + }, + { + "epoch": 0.77, + "learning_rate": 3.1204323970714836e-06, + "loss": 0.23, + "step": 20550 + }, + { + "epoch": 0.77, + "learning_rate": 3.118156093500161e-06, + "loss": 0.2014, + "step": 20580 + }, + { + "epoch": 0.77, + "learning_rate": 3.1158847642487794e-06, + "loss": 0.1873, + "step": 20610 + }, + { + "epoch": 0.77, + "learning_rate": 3.1136183912267038e-06, + "loss": 0.2029, + "step": 20640 + }, + { + "epoch": 0.77, + "learning_rate": 3.111356956435275e-06, + "loss": 0.1946, + "step": 20670 + }, + { + "epoch": 0.77, + "learning_rate": 3.1091004419672094e-06, + "loss": 0.2022, + "step": 20700 + }, + { + "epoch": 0.77, + "learning_rate": 3.1068488300060003e-06, + "loss": 0.1916, + "step": 20730 + }, + { + "epoch": 0.77, + "learning_rate": 3.1046021028253316e-06, + "loss": 0.1954, + "step": 20760 + }, + { + "epoch": 0.78, + "learning_rate": 3.1023602427884887e-06, + "loss": 0.1604, + "step": 20790 + }, + { + "epoch": 0.78, + "learning_rate": 3.1001232323477775e-06, + "loss": 0.2243, + "step": 20820 + }, + { + "epoch": 0.78, + "learning_rate": 3.0978910540439495e-06, + "loss": 0.214, + "step": 20850 + }, + { + "epoch": 0.78, + "learning_rate": 3.095663690505624e-06, + "loss": 0.1776, + "step": 20880 + }, + { + "epoch": 0.78, + "learning_rate": 3.09344112444873e-06, + "loss": 0.203, + "step": 20910 + }, + { + "epoch": 0.78, + "learning_rate": 3.0912233386759318e-06, + "loss": 0.185, + "step": 20940 + }, + { + "epoch": 0.78, + "learning_rate": 3.0890103160760777e-06, + "loss": 0.1801, + "step": 20970 + }, + { + "epoch": 0.78, + "learning_rate": 3.0868020396236425e-06, + "loss": 0.1748, + "step": 21000 + }, + { + "epoch": 0.78, + "learning_rate": 3.0845984923781787e-06, + "loss": 0.2048, + "step": 21030 + }, + { + "epoch": 0.79, + "learning_rate": 3.0823996574837696e-06, + "loss": 0.1797, + "step": 21060 + }, + { + "epoch": 0.79, + "learning_rate": 3.0802055181684876e-06, + "loss": 0.1694, + "step": 21090 + }, + { + "epoch": 0.79, + "learning_rate": 3.0780889645514934e-06, + "loss": 0.1713, + "step": 21120 + }, + { + "epoch": 0.79, + "learning_rate": 3.075904011269121e-06, + "loss": 0.1855, + "step": 21150 + }, + { + "epoch": 0.79, + "learning_rate": 3.0737237042981292e-06, + "loss": 0.2117, + "step": 21180 + }, + { + "epoch": 0.79, + "learning_rate": 3.0715480271945e-06, + "loss": 0.1906, + "step": 21210 + }, + { + "epoch": 0.79, + "learning_rate": 3.0693769635955745e-06, + "loss": 0.248, + "step": 21240 + }, + { + "epoch": 0.79, + "learning_rate": 3.067210497219538e-06, + "loss": 0.1815, + "step": 21270 + }, + { + "epoch": 0.79, + "learning_rate": 3.0650486118649127e-06, + "loss": 0.1695, + "step": 21300 + }, + { + "epoch": 0.8, + "learning_rate": 3.0628912914100383e-06, + "loss": 0.1733, + "step": 21330 + }, + { + "epoch": 0.8, + "learning_rate": 3.0607385198125768e-06, + "loss": 0.187, + "step": 21360 + }, + { + "epoch": 0.8, + "learning_rate": 3.0585902811090056e-06, + "loss": 0.1775, + "step": 21390 + }, + { + "epoch": 0.8, + "learning_rate": 3.056446559414125e-06, + "loss": 0.2013, + "step": 21420 + }, + { + "epoch": 0.8, + "learning_rate": 3.0543073389205587e-06, + "loss": 0.1936, + "step": 21450 + }, + { + "epoch": 0.8, + "learning_rate": 3.05217260389827e-06, + "loss": 0.1781, + "step": 21480 + }, + { + "epoch": 0.8, + "learning_rate": 3.0500423386940733e-06, + "loss": 0.1818, + "step": 21510 + }, + { + "epoch": 0.8, + "learning_rate": 3.0479165277311516e-06, + "loss": 0.1745, + "step": 21540 + }, + { + "epoch": 0.8, + "learning_rate": 3.0457951555085815e-06, + "loss": 0.1871, + "step": 21570 + }, + { + "epoch": 0.81, + "learning_rate": 3.0436782066008522e-06, + "loss": 0.2433, + "step": 21600 + }, + { + "epoch": 0.81, + "learning_rate": 3.0415656656574006e-06, + "loss": 0.1986, + "step": 21630 + }, + { + "epoch": 0.81, + "learning_rate": 3.0394575174021406e-06, + "loss": 0.176, + "step": 21660 + }, + { + "epoch": 0.81, + "learning_rate": 3.0373537466330005e-06, + "loss": 0.2019, + "step": 21690 + }, + { + "epoch": 0.81, + "learning_rate": 3.0352543382214616e-06, + "loss": 0.2233, + "step": 21720 + }, + { + "epoch": 0.81, + "learning_rate": 3.0332290426009857e-06, + "loss": 0.2413, + "step": 21750 + }, + { + "epoch": 0.81, + "learning_rate": 3.0311381696410036e-06, + "loss": 0.1946, + "step": 21780 + }, + { + "epoch": 0.81, + "learning_rate": 3.0290516145856953e-06, + "loss": 0.1893, + "step": 21810 + }, + { + "epoch": 0.81, + "learning_rate": 3.0269693625938358e-06, + "loss": 0.2216, + "step": 21840 + }, + { + "epoch": 0.82, + "learning_rate": 3.0248913988955188e-06, + "loss": 0.1971, + "step": 21870 + }, + { + "epoch": 0.82, + "learning_rate": 3.0228177087917153e-06, + "loss": 0.1776, + "step": 21900 + }, + { + "epoch": 0.82, + "learning_rate": 3.0207482776538377e-06, + "loss": 0.1729, + "step": 21930 + }, + { + "epoch": 0.82, + "learning_rate": 3.0186830909233056e-06, + "loss": 0.2129, + "step": 21960 + }, + { + "epoch": 0.82, + "learning_rate": 3.0166221341111156e-06, + "loss": 0.1983, + "step": 21990 + }, + { + "epoch": 0.82, + "learning_rate": 3.014565392797416e-06, + "loss": 0.1759, + "step": 22020 + }, + { + "epoch": 0.82, + "learning_rate": 3.01251285263108e-06, + "loss": 0.1762, + "step": 22050 + }, + { + "epoch": 0.82, + "learning_rate": 3.01046449932929e-06, + "loss": 0.1912, + "step": 22080 + }, + { + "epoch": 0.82, + "learning_rate": 3.008420318677114e-06, + "loss": 0.1635, + "step": 22110 + }, + { + "epoch": 0.83, + "learning_rate": 3.006380296527096e-06, + "loss": 0.1975, + "step": 22140 + }, + { + "epoch": 0.83, + "learning_rate": 3.004344418798843e-06, + "loss": 0.1715, + "step": 22170 + }, + { + "epoch": 0.83, + "learning_rate": 3.0023126714786185e-06, + "loss": 0.172, + "step": 22200 + }, + { + "epoch": 0.83, + "learning_rate": 3.0002850406189315e-06, + "loss": 0.1759, + "step": 22230 + }, + { + "epoch": 0.83, + "learning_rate": 2.9982615123381435e-06, + "loss": 0.1836, + "step": 22260 + }, + { + "epoch": 0.83, + "learning_rate": 2.9962420728200638e-06, + "loss": 0.1822, + "step": 22290 + }, + { + "epoch": 0.83, + "learning_rate": 2.99422670831355e-06, + "loss": 0.1924, + "step": 22320 + }, + { + "epoch": 0.83, + "learning_rate": 2.992215405132124e-06, + "loss": 0.1958, + "step": 22350 + }, + { + "epoch": 0.83, + "learning_rate": 2.990208149653574e-06, + "loss": 0.1966, + "step": 22380 + }, + { + "epoch": 0.84, + "learning_rate": 2.988204928319569e-06, + "loss": 0.1612, + "step": 22410 + }, + { + "epoch": 0.84, + "learning_rate": 2.9862057276352764e-06, + "loss": 0.1816, + "step": 22440 + }, + { + "epoch": 0.84, + "learning_rate": 2.984210534168976e-06, + "loss": 0.2078, + "step": 22470 + }, + { + "epoch": 0.84, + "learning_rate": 2.982219334551684e-06, + "loss": 0.1877, + "step": 22500 + }, + { + "epoch": 0.84, + "learning_rate": 2.9802321154767783e-06, + "loss": 0.1862, + "step": 22530 + }, + { + "epoch": 0.84, + "learning_rate": 2.978248863699621e-06, + "loss": 0.2263, + "step": 22560 + }, + { + "epoch": 0.84, + "learning_rate": 2.9762695660371887e-06, + "loss": 0.1911, + "step": 22590 + }, + { + "epoch": 0.84, + "learning_rate": 2.9742942093677073e-06, + "loss": 0.1784, + "step": 22620 + }, + { + "epoch": 0.84, + "learning_rate": 2.9723227806302822e-06, + "loss": 0.2086, + "step": 22650 + }, + { + "epoch": 0.85, + "learning_rate": 2.9703552668245405e-06, + "loss": 0.2191, + "step": 22680 + }, + { + "epoch": 0.85, + "learning_rate": 2.9683916550102658e-06, + "loss": 0.1633, + "step": 22710 + }, + { + "epoch": 0.85, + "learning_rate": 2.9664319323070455e-06, + "loss": 0.181, + "step": 22740 + }, + { + "epoch": 0.85, + "learning_rate": 2.964476085893913e-06, + "loss": 0.2081, + "step": 22770 + }, + { + "epoch": 0.85, + "learning_rate": 2.9625241030089954e-06, + "loss": 0.1903, + "step": 22800 + }, + { + "epoch": 0.85, + "learning_rate": 2.9605759709491647e-06, + "loss": 0.1801, + "step": 22830 + }, + { + "epoch": 0.85, + "learning_rate": 2.9586316770696934e-06, + "loss": 0.223, + "step": 22860 + }, + { + "epoch": 0.85, + "learning_rate": 2.9566912087839018e-06, + "loss": 0.1891, + "step": 22890 + }, + { + "epoch": 0.85, + "learning_rate": 2.954754553562824e-06, + "loss": 0.1841, + "step": 22920 + }, + { + "epoch": 0.86, + "learning_rate": 2.9528216989348657e-06, + "loss": 0.2035, + "step": 22950 + }, + { + "epoch": 0.86, + "learning_rate": 2.9508926324854625e-06, + "loss": 0.1673, + "step": 22980 + }, + { + "epoch": 0.86, + "learning_rate": 2.9489673418567527e-06, + "loss": 0.1781, + "step": 23010 + }, + { + "epoch": 0.86, + "learning_rate": 2.947045814747238e-06, + "loss": 0.1882, + "step": 23040 + }, + { + "epoch": 0.86, + "learning_rate": 2.9451280389114573e-06, + "loss": 0.1896, + "step": 23070 + }, + { + "epoch": 0.86, + "learning_rate": 2.9432140021596566e-06, + "loss": 0.1775, + "step": 23100 + }, + { + "epoch": 0.86, + "learning_rate": 2.941303692357466e-06, + "loss": 0.1693, + "step": 23130 + }, + { + "epoch": 0.86, + "learning_rate": 2.9393970974255757e-06, + "loss": 0.2118, + "step": 23160 + }, + { + "epoch": 0.86, + "learning_rate": 2.937494205339412e-06, + "loss": 0.1826, + "step": 23190 + }, + { + "epoch": 0.87, + "learning_rate": 2.9355950041288252e-06, + "loss": 0.1622, + "step": 23220 + }, + { + "epoch": 0.87, + "learning_rate": 2.9336994818777668e-06, + "loss": 0.1661, + "step": 23250 + }, + { + "epoch": 0.87, + "learning_rate": 2.93180762672398e-06, + "loss": 0.1776, + "step": 23280 + }, + { + "epoch": 0.87, + "learning_rate": 2.9299194268586878e-06, + "loss": 0.1617, + "step": 23310 + }, + { + "epoch": 0.87, + "learning_rate": 2.9280348705262775e-06, + "loss": 0.165, + "step": 23340 + }, + { + "epoch": 0.87, + "learning_rate": 2.926153946024002e-06, + "loss": 0.1841, + "step": 23370 + }, + { + "epoch": 0.87, + "learning_rate": 2.9242766417016677e-06, + "loss": 0.1686, + "step": 23400 + }, + { + "epoch": 0.87, + "learning_rate": 2.922402945961337e-06, + "loss": 0.179, + "step": 23430 + }, + { + "epoch": 0.87, + "learning_rate": 2.9205328472570204e-06, + "loss": 0.1653, + "step": 23460 + }, + { + "epoch": 0.88, + "learning_rate": 2.9186663340943855e-06, + "loss": 0.212, + "step": 23490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9168033950304554e-06, + "loss": 0.1896, + "step": 23520 + }, + { + "epoch": 0.88, + "learning_rate": 2.9149440186733125e-06, + "loss": 0.1886, + "step": 23550 + }, + { + "epoch": 0.88, + "learning_rate": 2.9130881936818123e-06, + "loss": 0.1959, + "step": 23580 + }, + { + "epoch": 0.88, + "learning_rate": 2.9112359087652875e-06, + "loss": 0.211, + "step": 23610 + }, + { + "epoch": 0.88, + "learning_rate": 2.909387152683261e-06, + "loss": 0.1904, + "step": 23640 + }, + { + "epoch": 0.88, + "learning_rate": 2.9075419142451583e-06, + "loss": 0.1705, + "step": 23670 + }, + { + "epoch": 0.88, + "learning_rate": 2.9057001823100285e-06, + "loss": 0.1779, + "step": 23700 + }, + { + "epoch": 0.89, + "learning_rate": 2.903861945786251e-06, + "loss": 0.1967, + "step": 23730 + }, + { + "epoch": 0.89, + "learning_rate": 2.9020271936312684e-06, + "loss": 0.1746, + "step": 23760 + }, + { + "epoch": 0.89, + "learning_rate": 2.9001959148512965e-06, + "loss": 0.2198, + "step": 23790 + }, + { + "epoch": 0.89, + "learning_rate": 2.898368098501052e-06, + "loss": 0.1712, + "step": 23820 + }, + { + "epoch": 0.89, + "learning_rate": 2.89654373368348e-06, + "loss": 0.218, + "step": 23850 + }, + { + "epoch": 0.89, + "learning_rate": 2.894722809549479e-06, + "loss": 0.1932, + "step": 23880 + }, + { + "epoch": 0.89, + "learning_rate": 2.8929053152976272e-06, + "loss": 0.1703, + "step": 23910 + }, + { + "epoch": 0.89, + "learning_rate": 2.8910912401739185e-06, + "loss": 0.2124, + "step": 23940 + }, + { + "epoch": 0.89, + "learning_rate": 2.8892805734714926e-06, + "loss": 0.2129, + "step": 23970 + }, + { + "epoch": 0.9, + "learning_rate": 2.8874733045303684e-06, + "loss": 0.1994, + "step": 24000 + }, + { + "epoch": 0.9, + "learning_rate": 2.885669422737183e-06, + "loss": 0.1716, + "step": 24030 + }, + { + "epoch": 0.9, + "learning_rate": 2.8838689175249286e-06, + "loss": 0.2043, + "step": 24060 + }, + { + "epoch": 0.9, + "learning_rate": 2.8820717783726926e-06, + "loss": 0.1575, + "step": 24090 + }, + { + "epoch": 0.9, + "learning_rate": 2.8802779948053998e-06, + "loss": 0.1846, + "step": 24120 + }, + { + "epoch": 0.9, + "learning_rate": 2.878487556393554e-06, + "loss": 0.2028, + "step": 24150 + }, + { + "epoch": 0.9, + "learning_rate": 2.8767004527529884e-06, + "loss": 0.1681, + "step": 24180 + }, + { + "epoch": 0.9, + "learning_rate": 2.8749166735446054e-06, + "loss": 0.2006, + "step": 24210 + }, + { + "epoch": 0.9, + "learning_rate": 2.873136208474131e-06, + "loss": 0.1828, + "step": 24240 + }, + { + "epoch": 0.91, + "learning_rate": 2.8713590472918633e-06, + "loss": 0.1757, + "step": 24270 + }, + { + "epoch": 0.91, + "learning_rate": 2.8695851797924253e-06, + "loss": 0.1763, + "step": 24300 + }, + { + "epoch": 0.91, + "learning_rate": 2.8678145958145158e-06, + "loss": 0.2044, + "step": 24330 + }, + { + "epoch": 0.91, + "learning_rate": 2.8660472852406714e-06, + "loss": 0.2031, + "step": 24360 + }, + { + "epoch": 0.91, + "learning_rate": 2.8642832379970157e-06, + "loss": 0.2166, + "step": 24390 + }, + { + "epoch": 0.91, + "learning_rate": 2.862522444053026e-06, + "loss": 0.1725, + "step": 24420 + }, + { + "epoch": 0.91, + "learning_rate": 2.860764893421287e-06, + "loss": 0.1614, + "step": 24450 + }, + { + "epoch": 0.91, + "learning_rate": 2.859010576157256e-06, + "loss": 0.1866, + "step": 24480 + }, + { + "epoch": 0.91, + "learning_rate": 2.857259482359027e-06, + "loss": 0.1888, + "step": 24510 + }, + { + "epoch": 0.92, + "learning_rate": 2.855511602167096e-06, + "loss": 0.1799, + "step": 24540 + }, + { + "epoch": 0.92, + "learning_rate": 2.8537669257641252e-06, + "loss": 0.1852, + "step": 24570 + }, + { + "epoch": 0.92, + "learning_rate": 2.8520254433747156e-06, + "loss": 0.1932, + "step": 24600 + }, + { + "epoch": 0.92, + "learning_rate": 2.8502871452651733e-06, + "loss": 0.1782, + "step": 24630 + }, + { + "epoch": 0.92, + "learning_rate": 2.8485520217432816e-06, + "loss": 0.1932, + "step": 24660 + }, + { + "epoch": 0.92, + "learning_rate": 2.8468200631580763e-06, + "loss": 0.1474, + "step": 24690 + }, + { + "epoch": 0.92, + "learning_rate": 2.8450912598996183e-06, + "loss": 0.1396, + "step": 24720 + }, + { + "epoch": 0.92, + "learning_rate": 2.8433656023987705e-06, + "loss": 0.1527, + "step": 24750 + }, + { + "epoch": 0.92, + "learning_rate": 2.841643081126973e-06, + "loss": 0.2015, + "step": 24780 + }, + { + "epoch": 0.93, + "learning_rate": 2.8399236865960273e-06, + "loss": 0.1824, + "step": 24810 + }, + { + "epoch": 0.93, + "learning_rate": 2.8382074093578704e-06, + "loss": 0.1834, + "step": 24840 + }, + { + "epoch": 0.93, + "learning_rate": 2.836494240004362e-06, + "loss": 0.1714, + "step": 24870 + }, + { + "epoch": 0.93, + "learning_rate": 2.8347841691670646e-06, + "loss": 0.1673, + "step": 24900 + }, + { + "epoch": 0.93, + "learning_rate": 2.833077187517031e-06, + "loss": 0.1669, + "step": 24930 + }, + { + "epoch": 0.93, + "learning_rate": 2.8313732857645867e-06, + "loss": 0.2079, + "step": 24960 + }, + { + "epoch": 0.93, + "learning_rate": 2.829672454659122e-06, + "loss": 0.1795, + "step": 24990 + }, + { + "epoch": 0.93, + "learning_rate": 2.827974684988878e-06, + "loss": 0.1923, + "step": 25020 + }, + { + "epoch": 0.93, + "learning_rate": 2.826279967580738e-06, + "loss": 0.1613, + "step": 25050 + }, + { + "epoch": 0.94, + "learning_rate": 2.8245882933000197e-06, + "loss": 0.2025, + "step": 25080 + }, + { + "epoch": 0.94, + "learning_rate": 2.8228996530502693e-06, + "loss": 0.2004, + "step": 25110 + }, + { + "epoch": 0.94, + "learning_rate": 2.8212140377730524e-06, + "loss": 0.1627, + "step": 25140 + }, + { + "epoch": 0.94, + "learning_rate": 2.819531438447754e-06, + "loss": 0.1889, + "step": 25170 + }, + { + "epoch": 0.94, + "learning_rate": 2.8178518460913747e-06, + "loss": 0.1689, + "step": 25200 + }, + { + "epoch": 0.94, + "learning_rate": 2.8161752517583257e-06, + "loss": 0.1703, + "step": 25230 + }, + { + "epoch": 0.94, + "learning_rate": 2.8145016465402352e-06, + "loss": 0.1798, + "step": 25260 + }, + { + "epoch": 0.94, + "learning_rate": 2.812831021565744e-06, + "loss": 0.2057, + "step": 25290 + }, + { + "epoch": 0.94, + "learning_rate": 2.811163368000311e-06, + "loss": 0.1988, + "step": 25320 + }, + { + "epoch": 0.95, + "learning_rate": 2.809498677046015e-06, + "loss": 0.1747, + "step": 25350 + }, + { + "epoch": 0.95, + "learning_rate": 2.807836939941363e-06, + "loss": 0.1707, + "step": 25380 + }, + { + "epoch": 0.95, + "learning_rate": 2.806178147961091e-06, + "loss": 0.1702, + "step": 25410 + }, + { + "epoch": 0.95, + "learning_rate": 2.8045222924159783e-06, + "loss": 0.1531, + "step": 25440 + }, + { + "epoch": 0.95, + "learning_rate": 2.8028693646526505e-06, + "loss": 0.1773, + "step": 25470 + }, + { + "epoch": 0.95, + "learning_rate": 2.8012193560533934e-06, + "loss": 0.2086, + "step": 25500 + }, + { + "epoch": 0.95, + "learning_rate": 2.799572258035962e-06, + "loss": 0.1826, + "step": 25530 + }, + { + "epoch": 0.95, + "learning_rate": 2.797928062053395e-06, + "loss": 0.1524, + "step": 25560 + }, + { + "epoch": 0.95, + "learning_rate": 2.7962867595938266e-06, + "loss": 0.1954, + "step": 25590 + }, + { + "epoch": 0.96, + "learning_rate": 2.794648342180302e-06, + "loss": 0.1594, + "step": 25620 + }, + { + "epoch": 0.96, + "learning_rate": 2.7930128013705946e-06, + "loss": 0.1492, + "step": 25650 + }, + { + "epoch": 0.96, + "learning_rate": 2.791380128757023e-06, + "loss": 0.1868, + "step": 25680 + }, + { + "epoch": 0.96, + "learning_rate": 2.789750315966267e-06, + "loss": 0.1852, + "step": 25710 + }, + { + "epoch": 0.96, + "learning_rate": 2.7881233546591905e-06, + "loss": 0.1662, + "step": 25740 + }, + { + "epoch": 0.96, + "learning_rate": 2.786499236530662e-06, + "loss": 0.188, + "step": 25770 + }, + { + "epoch": 0.96, + "learning_rate": 2.7848779533093735e-06, + "loss": 0.1579, + "step": 25800 + }, + { + "epoch": 0.96, + "learning_rate": 2.783259496757666e-06, + "loss": 0.1935, + "step": 25830 + }, + { + "epoch": 0.96, + "learning_rate": 2.7816438586713548e-06, + "loss": 0.1776, + "step": 25860 + }, + { + "epoch": 0.97, + "learning_rate": 2.7800310308795516e-06, + "loss": 0.182, + "step": 25890 + }, + { + "epoch": 0.97, + "learning_rate": 2.7784210052444933e-06, + "loss": 0.1845, + "step": 25920 + }, + { + "epoch": 0.97, + "learning_rate": 2.77681377366137e-06, + "loss": 0.1834, + "step": 25950 + }, + { + "epoch": 0.97, + "learning_rate": 2.775209328058151e-06, + "loss": 0.1745, + "step": 25980 + }, + { + "epoch": 0.97, + "learning_rate": 2.7736076603954173e-06, + "loss": 0.1624, + "step": 26010 + }, + { + "epoch": 0.97, + "learning_rate": 2.772062014714772e-06, + "loss": 0.1656, + "step": 26040 + }, + { + "epoch": 0.97, + "learning_rate": 2.7704657870071948e-06, + "loss": 0.1633, + "step": 26070 + }, + { + "epoch": 0.97, + "learning_rate": 2.7689253850700936e-06, + "loss": 0.1738, + "step": 26100 + }, + { + "epoch": 0.97, + "learning_rate": 2.7673345665931835e-06, + "loss": 0.1711, + "step": 26130 + }, + { + "epoch": 0.98, + "learning_rate": 2.7657464868672066e-06, + "loss": 0.173, + "step": 26160 + }, + { + "epoch": 0.98, + "learning_rate": 2.764161138042791e-06, + "loss": 0.1626, + "step": 26190 + }, + { + "epoch": 0.98, + "learning_rate": 2.762578512302018e-06, + "loss": 0.182, + "step": 26220 + }, + { + "epoch": 0.98, + "learning_rate": 2.760998601858272e-06, + "loss": 0.1748, + "step": 26250 + }, + { + "epoch": 0.98, + "learning_rate": 2.7594213989560703e-06, + "loss": 0.1944, + "step": 26280 + }, + { + "epoch": 0.98, + "learning_rate": 2.7578468958709083e-06, + "loss": 0.1825, + "step": 26310 + }, + { + "epoch": 0.98, + "learning_rate": 2.7562750849090968e-06, + "loss": 0.1798, + "step": 26340 + }, + { + "epoch": 0.98, + "learning_rate": 2.7547059584076095e-06, + "loss": 0.1606, + "step": 26370 + }, + { + "epoch": 0.98, + "learning_rate": 2.753139508733921e-06, + "loss": 0.1856, + "step": 26400 + }, + { + "epoch": 0.99, + "learning_rate": 2.7515757282858534e-06, + "loss": 0.1706, + "step": 26430 + }, + { + "epoch": 0.99, + "learning_rate": 2.75001460949142e-06, + "loss": 0.1915, + "step": 26460 + }, + { + "epoch": 0.99, + "learning_rate": 2.7484561448086723e-06, + "loss": 0.195, + "step": 26490 + }, + { + "epoch": 0.99, + "learning_rate": 2.746900326725547e-06, + "loss": 0.1873, + "step": 26520 + }, + { + "epoch": 0.99, + "learning_rate": 2.7453471477597134e-06, + "loss": 0.1907, + "step": 26550 + }, + { + "epoch": 0.99, + "learning_rate": 2.743796600458422e-06, + "loss": 0.1844, + "step": 26580 + }, + { + "epoch": 0.99, + "learning_rate": 2.742248677398357e-06, + "loss": 0.1556, + "step": 26610 + }, + { + "epoch": 0.99, + "learning_rate": 2.7407033711854815e-06, + "loss": 0.1819, + "step": 26640 + }, + { + "epoch": 0.99, + "learning_rate": 2.739160674454895e-06, + "loss": 0.1591, + "step": 26670 + }, + { + "epoch": 1.0, + "learning_rate": 2.7376205798706813e-06, + "loss": 0.1474, + "step": 26700 + }, + { + "epoch": 1.0, + "learning_rate": 2.7360830801257666e-06, + "loss": 0.1874, + "step": 26730 + }, + { + "epoch": 1.0, + "learning_rate": 2.73454816794177e-06, + "loss": 0.1631, + "step": 26760 + }, + { + "epoch": 1.0, + "learning_rate": 2.7330158360688597e-06, + "loss": 0.1639, + "step": 26790 + }, + { + "epoch": 1.0, + "learning_rate": 2.7314860772856113e-06, + "loss": 0.2042, + "step": 26820 + }, + { + "epoch": 1.0, + "learning_rate": 2.729958884398861e-06, + "loss": 0.1806, + "step": 26850 + }, + { + "epoch": 1.0, + "learning_rate": 2.7284342502435685e-06, + "loss": 0.183, + "step": 26880 + }, + { + "epoch": 1.0, + "learning_rate": 2.7269121676826703e-06, + "loss": 0.219, + "step": 26910 + }, + { + "epoch": 1.0, + "learning_rate": 2.7253926296069453e-06, + "loss": 0.1616, + "step": 26940 + }, + { + "epoch": 1.01, + "learning_rate": 2.723875628934869e-06, + "loss": 0.1491, + "step": 26970 + }, + { + "epoch": 1.01, + "learning_rate": 2.7223611586124805e-06, + "loss": 0.2203, + "step": 27000 + }, + { + "epoch": 1.01, + "learning_rate": 2.7208492116132414e-06, + "loss": 0.1466, + "step": 27030 + }, + { + "epoch": 1.01, + "learning_rate": 2.719339780937899e-06, + "loss": 0.1725, + "step": 27060 + }, + { + "epoch": 1.01, + "learning_rate": 2.717832859614352e-06, + "loss": 0.1656, + "step": 27090 + }, + { + "epoch": 1.01, + "learning_rate": 2.716328440697516e-06, + "loss": 0.198, + "step": 27120 + }, + { + "epoch": 1.01, + "learning_rate": 2.714826517269184e-06, + "loss": 0.1988, + "step": 27150 + }, + { + "epoch": 1.01, + "learning_rate": 2.7133270824378975e-06, + "loss": 0.1754, + "step": 27180 + }, + { + "epoch": 1.01, + "learning_rate": 2.711830129338813e-06, + "loss": 0.1788, + "step": 27210 + }, + { + "epoch": 1.02, + "learning_rate": 2.7103356511335694e-06, + "loss": 0.1858, + "step": 27240 + }, + { + "epoch": 1.02, + "learning_rate": 2.7088436410101547e-06, + "loss": 0.1464, + "step": 27270 + }, + { + "epoch": 1.02, + "learning_rate": 2.70735409218278e-06, + "loss": 0.1739, + "step": 27300 + }, + { + "epoch": 1.02, + "learning_rate": 2.7058669978917464e-06, + "loss": 0.1741, + "step": 27330 + }, + { + "epoch": 1.02, + "learning_rate": 2.7043823514033168e-06, + "loss": 0.1454, + "step": 27360 + }, + { + "epoch": 1.02, + "learning_rate": 2.7029001460095887e-06, + "loss": 0.1555, + "step": 27390 + }, + { + "epoch": 1.02, + "learning_rate": 2.701420375028368e-06, + "loss": 0.1698, + "step": 27420 + }, + { + "epoch": 1.02, + "learning_rate": 2.6999430318030373e-06, + "loss": 0.1675, + "step": 27450 + }, + { + "epoch": 1.02, + "learning_rate": 2.698468109702438e-06, + "loss": 0.1998, + "step": 27480 + }, + { + "epoch": 1.03, + "learning_rate": 2.696995602120738e-06, + "loss": 0.2141, + "step": 27510 + }, + { + "epoch": 1.03, + "learning_rate": 2.695525502477311e-06, + "loss": 0.1873, + "step": 27540 + }, + { + "epoch": 1.03, + "learning_rate": 2.6940578042166143e-06, + "loss": 0.1848, + "step": 27570 + }, + { + "epoch": 1.03, + "learning_rate": 2.692592500808063e-06, + "loss": 0.1503, + "step": 27600 + }, + { + "epoch": 1.03, + "learning_rate": 2.691129585745908e-06, + "loss": 0.1921, + "step": 27630 + }, + { + "epoch": 1.03, + "learning_rate": 2.689669052549118e-06, + "loss": 0.1928, + "step": 27660 + }, + { + "epoch": 1.03, + "learning_rate": 2.6882108947612555e-06, + "loss": 0.1454, + "step": 27690 + }, + { + "epoch": 1.03, + "learning_rate": 2.6867551059503586e-06, + "loss": 0.1938, + "step": 27720 + }, + { + "epoch": 1.03, + "learning_rate": 2.685301679708821e-06, + "loss": 0.1702, + "step": 27750 + }, + { + "epoch": 1.04, + "learning_rate": 2.6838506096532747e-06, + "loss": 0.161, + "step": 27780 + }, + { + "epoch": 1.04, + "learning_rate": 2.682401889424471e-06, + "loss": 0.1699, + "step": 27810 + }, + { + "epoch": 1.04, + "learning_rate": 2.6809555126871633e-06, + "loss": 0.1529, + "step": 27840 + }, + { + "epoch": 1.04, + "learning_rate": 2.6795114731299932e-06, + "loss": 0.1697, + "step": 27870 + }, + { + "epoch": 1.04, + "learning_rate": 2.6780697644653724e-06, + "loss": 0.1936, + "step": 27900 + }, + { + "epoch": 1.04, + "learning_rate": 2.676630380429367e-06, + "loss": 0.1625, + "step": 27930 + }, + { + "epoch": 1.04, + "learning_rate": 2.675193314781585e-06, + "loss": 0.1762, + "step": 27960 + }, + { + "epoch": 1.04, + "learning_rate": 2.6737585613050653e-06, + "loss": 0.173, + "step": 27990 + }, + { + "epoch": 1.05, + "learning_rate": 2.672326113806157e-06, + "loss": 0.181, + "step": 28020 + }, + { + "epoch": 1.05, + "learning_rate": 2.670895966114414e-06, + "loss": 0.1551, + "step": 28050 + }, + { + "epoch": 1.05, + "learning_rate": 2.6694681120824823e-06, + "loss": 0.1743, + "step": 28080 + }, + { + "epoch": 1.05, + "learning_rate": 2.668042545585986e-06, + "loss": 0.1613, + "step": 28110 + }, + { + "epoch": 1.05, + "learning_rate": 2.6666192605234194e-06, + "loss": 0.1659, + "step": 28140 + }, + { + "epoch": 1.05, + "learning_rate": 2.665198250816037e-06, + "loss": 0.1945, + "step": 28170 + }, + { + "epoch": 1.05, + "learning_rate": 2.663779510407744e-06, + "loss": 0.1741, + "step": 28200 + }, + { + "epoch": 1.05, + "learning_rate": 2.662363033264988e-06, + "loss": 0.1546, + "step": 28230 + }, + { + "epoch": 1.05, + "learning_rate": 2.6609488133766516e-06, + "loss": 0.1615, + "step": 28260 + }, + { + "epoch": 1.06, + "learning_rate": 2.659536844753944e-06, + "loss": 0.1553, + "step": 28290 + }, + { + "epoch": 1.06, + "learning_rate": 2.6581271214302974e-06, + "loss": 0.1362, + "step": 28320 + }, + { + "epoch": 1.06, + "learning_rate": 2.656719637461258e-06, + "loss": 0.1575, + "step": 28350 + }, + { + "epoch": 1.06, + "learning_rate": 2.6553143869243816e-06, + "loss": 0.1635, + "step": 28380 + }, + { + "epoch": 1.06, + "learning_rate": 2.653911363919129e-06, + "loss": 0.1524, + "step": 28410 + }, + { + "epoch": 1.06, + "learning_rate": 2.652510562566764e-06, + "loss": 0.1629, + "step": 28440 + }, + { + "epoch": 1.06, + "learning_rate": 2.651111977010246e-06, + "loss": 0.1578, + "step": 28470 + }, + { + "epoch": 1.06, + "learning_rate": 2.6497156014141305e-06, + "loss": 0.1817, + "step": 28500 + }, + { + "epoch": 1.06, + "learning_rate": 2.6483214299644663e-06, + "loss": 0.1375, + "step": 28530 + }, + { + "epoch": 1.07, + "learning_rate": 2.6469294568686905e-06, + "loss": 0.1616, + "step": 28560 + }, + { + "epoch": 1.07, + "learning_rate": 2.6455396763555318e-06, + "loss": 0.1864, + "step": 28590 + }, + { + "epoch": 1.07, + "learning_rate": 2.6441520826749096e-06, + "loss": 0.1538, + "step": 28620 + }, + { + "epoch": 1.07, + "learning_rate": 2.6427666700978293e-06, + "loss": 0.1763, + "step": 28650 + }, + { + "epoch": 1.07, + "learning_rate": 2.64138343291629e-06, + "loss": 0.171, + "step": 28680 + }, + { + "epoch": 1.07, + "learning_rate": 2.6400023654431796e-06, + "loss": 0.1688, + "step": 28710 + }, + { + "epoch": 1.07, + "learning_rate": 2.6386234620121797e-06, + "loss": 0.1436, + "step": 28740 + }, + { + "epoch": 1.07, + "learning_rate": 2.6372467169776683e-06, + "loss": 0.1494, + "step": 28770 + }, + { + "epoch": 1.07, + "learning_rate": 2.6358721247146202e-06, + "loss": 0.1806, + "step": 28800 + }, + { + "epoch": 1.08, + "learning_rate": 2.634499679618514e-06, + "loss": 0.1994, + "step": 28830 + }, + { + "epoch": 1.08, + "learning_rate": 2.6331293761052316e-06, + "loss": 0.1753, + "step": 28860 + }, + { + "epoch": 1.08, + "learning_rate": 2.631761208610969e-06, + "loss": 0.1863, + "step": 28890 + }, + { + "epoch": 1.08, + "learning_rate": 2.630395171592134e-06, + "loss": 0.1742, + "step": 28920 + }, + { + "epoch": 1.08, + "learning_rate": 2.6290312595252576e-06, + "loss": 0.1663, + "step": 28950 + }, + { + "epoch": 1.08, + "learning_rate": 2.6276694669068985e-06, + "loss": 0.1601, + "step": 28980 + }, + { + "epoch": 1.08, + "learning_rate": 2.6263097882535484e-06, + "loss": 0.1424, + "step": 29010 + }, + { + "epoch": 1.08, + "learning_rate": 2.6249522181015406e-06, + "loss": 0.1756, + "step": 29040 + }, + { + "epoch": 1.08, + "learning_rate": 2.623596751006959e-06, + "loss": 0.1582, + "step": 29070 + }, + { + "epoch": 1.09, + "learning_rate": 2.6222433815455445e-06, + "loss": 0.1803, + "step": 29100 + }, + { + "epoch": 1.09, + "learning_rate": 2.6208921043126025e-06, + "loss": 0.1791, + "step": 29130 + }, + { + "epoch": 1.09, + "learning_rate": 2.6195429139229154e-06, + "loss": 0.1579, + "step": 29160 + }, + { + "epoch": 1.09, + "learning_rate": 2.618195805010653e-06, + "loss": 0.1633, + "step": 29190 + }, + { + "epoch": 1.09, + "learning_rate": 2.616850772229277e-06, + "loss": 0.1399, + "step": 29220 + }, + { + "epoch": 1.09, + "learning_rate": 2.6155078102514603e-06, + "loss": 0.1731, + "step": 29250 + }, + { + "epoch": 1.09, + "learning_rate": 2.6141669137689908e-06, + "loss": 0.1989, + "step": 29280 + }, + { + "epoch": 1.09, + "learning_rate": 2.612828077492688e-06, + "loss": 0.1508, + "step": 29310 + }, + { + "epoch": 1.09, + "learning_rate": 2.611491296152313e-06, + "loss": 0.1514, + "step": 29340 + }, + { + "epoch": 1.1, + "learning_rate": 2.6101565644964837e-06, + "loss": 0.1835, + "step": 29370 + }, + { + "epoch": 1.1, + "learning_rate": 2.6088238772925843e-06, + "loss": 0.1823, + "step": 29400 + }, + { + "epoch": 1.1, + "learning_rate": 2.607493229326684e-06, + "loss": 0.1761, + "step": 29430 + }, + { + "epoch": 1.1, + "learning_rate": 2.606164615403447e-06, + "loss": 0.1523, + "step": 29460 + }, + { + "epoch": 1.1, + "learning_rate": 2.6048380303460504e-06, + "loss": 0.167, + "step": 29490 + }, + { + "epoch": 1.1, + "learning_rate": 2.6035134689960962e-06, + "loss": 0.1705, + "step": 29520 + }, + { + "epoch": 1.1, + "learning_rate": 2.602190926213532e-06, + "loss": 0.185, + "step": 29550 + }, + { + "epoch": 1.1, + "learning_rate": 2.6008703968765623e-06, + "loss": 0.1641, + "step": 29580 + }, + { + "epoch": 1.1, + "learning_rate": 2.599551875881568e-06, + "loss": 0.1599, + "step": 29610 + }, + { + "epoch": 1.11, + "learning_rate": 2.598235358143022e-06, + "loss": 0.1412, + "step": 29640 + }, + { + "epoch": 1.11, + "learning_rate": 2.5969208385934085e-06, + "loss": 0.1463, + "step": 29670 + }, + { + "epoch": 1.11, + "learning_rate": 2.5956083121831394e-06, + "loss": 0.1586, + "step": 29700 + }, + { + "epoch": 1.11, + "learning_rate": 2.5942977738804735e-06, + "loss": 0.1557, + "step": 29730 + }, + { + "epoch": 1.11, + "learning_rate": 2.5929892186714367e-06, + "loss": 0.157, + "step": 29760 + }, + { + "epoch": 1.11, + "learning_rate": 2.5916826415597386e-06, + "loss": 0.1673, + "step": 29790 + }, + { + "epoch": 1.11, + "learning_rate": 2.590378037566695e-06, + "loss": 0.1452, + "step": 29820 + }, + { + "epoch": 1.11, + "learning_rate": 2.589075401731149e-06, + "loss": 0.1613, + "step": 29850 + }, + { + "epoch": 1.11, + "learning_rate": 2.5877747291093864e-06, + "loss": 0.1588, + "step": 29880 + }, + { + "epoch": 1.12, + "learning_rate": 2.586476014775063e-06, + "loss": 0.2077, + "step": 29910 + }, + { + "epoch": 1.12, + "learning_rate": 2.5851792538191226e-06, + "loss": 0.1625, + "step": 29940 + }, + { + "epoch": 1.12, + "learning_rate": 2.5838844413497223e-06, + "loss": 0.1911, + "step": 29970 + }, + { + "epoch": 1.12, + "learning_rate": 2.5825915724921503e-06, + "loss": 0.1794, + "step": 30000 + }, + { + "epoch": 1.12, + "learning_rate": 2.581300642388753e-06, + "loss": 0.148, + "step": 30030 + }, + { + "epoch": 1.12, + "learning_rate": 2.5800116461988566e-06, + "loss": 0.1765, + "step": 30060 + }, + { + "epoch": 1.12, + "learning_rate": 2.578724579098691e-06, + "loss": 0.1541, + "step": 30090 + }, + { + "epoch": 1.12, + "learning_rate": 2.577439436281313e-06, + "loss": 0.1802, + "step": 30120 + }, + { + "epoch": 1.12, + "learning_rate": 2.576156212956535e-06, + "loss": 0.181, + "step": 30150 + }, + { + "epoch": 1.13, + "learning_rate": 2.574874904350842e-06, + "loss": 0.1844, + "step": 30180 + }, + { + "epoch": 1.13, + "learning_rate": 2.573595505707327e-06, + "loss": 0.1545, + "step": 30210 + }, + { + "epoch": 1.13, + "learning_rate": 2.5723180122856094e-06, + "loss": 0.1621, + "step": 30240 + }, + { + "epoch": 1.13, + "learning_rate": 2.5710424193617634e-06, + "loss": 0.1829, + "step": 30270 + }, + { + "epoch": 1.13, + "learning_rate": 2.569768722228245e-06, + "loss": 0.1689, + "step": 30300 + }, + { + "epoch": 1.13, + "learning_rate": 2.56849691619382e-06, + "loss": 0.1457, + "step": 30330 + }, + { + "epoch": 1.13, + "learning_rate": 2.5672269965834895e-06, + "loss": 0.1635, + "step": 30360 + }, + { + "epoch": 1.13, + "learning_rate": 2.5659589587384176e-06, + "loss": 0.1363, + "step": 30390 + }, + { + "epoch": 1.13, + "learning_rate": 2.564692798015863e-06, + "loss": 0.161, + "step": 30420 + }, + { + "epoch": 1.14, + "learning_rate": 2.5634285097891025e-06, + "loss": 0.1627, + "step": 30450 + }, + { + "epoch": 1.14, + "learning_rate": 2.5621660894473655e-06, + "loss": 0.1606, + "step": 30480 + }, + { + "epoch": 1.14, + "learning_rate": 2.560905532395757e-06, + "loss": 0.1622, + "step": 30510 + }, + { + "epoch": 1.14, + "learning_rate": 2.559646834055194e-06, + "loss": 0.1622, + "step": 30540 + }, + { + "epoch": 1.14, + "learning_rate": 2.5583899898623316e-06, + "loss": 0.1603, + "step": 30570 + }, + { + "epoch": 1.14, + "learning_rate": 2.557134995269493e-06, + "loss": 0.1517, + "step": 30600 + }, + { + "epoch": 1.14, + "learning_rate": 2.5558818457446043e-06, + "loss": 0.169, + "step": 30630 + }, + { + "epoch": 1.14, + "learning_rate": 2.554630536771122e-06, + "loss": 0.1671, + "step": 30660 + }, + { + "epoch": 1.14, + "learning_rate": 2.553381063847966e-06, + "loss": 0.1823, + "step": 30690 + }, + { + "epoch": 1.15, + "learning_rate": 2.5521334224894508e-06, + "loss": 0.1486, + "step": 30720 + }, + { + "epoch": 1.15, + "learning_rate": 2.5508876082252192e-06, + "loss": 0.1592, + "step": 30750 + }, + { + "epoch": 1.15, + "learning_rate": 2.5496436166001757e-06, + "loss": 0.1751, + "step": 30780 + }, + { + "epoch": 1.15, + "learning_rate": 2.5484014431744175e-06, + "loss": 0.1687, + "step": 30810 + }, + { + "epoch": 1.15, + "learning_rate": 2.5471610835231664e-06, + "loss": 0.1749, + "step": 30840 + }, + { + "epoch": 1.15, + "learning_rate": 2.5459225332367084e-06, + "loss": 0.1721, + "step": 30870 + }, + { + "epoch": 1.15, + "learning_rate": 2.5446857879203222e-06, + "loss": 0.1674, + "step": 30900 + }, + { + "epoch": 1.15, + "learning_rate": 2.5434508431942174e-06, + "loss": 0.1674, + "step": 30930 + }, + { + "epoch": 1.15, + "learning_rate": 2.5422176946934667e-06, + "loss": 0.1527, + "step": 30960 + }, + { + "epoch": 1.16, + "learning_rate": 2.5409863380679436e-06, + "loss": 0.1803, + "step": 30990 + }, + { + "epoch": 1.16, + "learning_rate": 2.539756768982254e-06, + "loss": 0.1536, + "step": 31020 + }, + { + "epoch": 1.16, + "learning_rate": 2.5385289831156773e-06, + "loss": 0.1405, + "step": 31050 + }, + { + "epoch": 1.16, + "learning_rate": 2.5373029761620997e-06, + "loss": 0.1691, + "step": 31080 + }, + { + "epoch": 1.16, + "learning_rate": 2.5360787438299507e-06, + "loss": 0.1617, + "step": 31110 + }, + { + "epoch": 1.16, + "learning_rate": 2.5348562818421386e-06, + "loss": 0.158, + "step": 31140 + }, + { + "epoch": 1.16, + "learning_rate": 2.5336355859359934e-06, + "loss": 0.1632, + "step": 31170 + }, + { + "epoch": 1.16, + "learning_rate": 2.5324166518631965e-06, + "loss": 0.175, + "step": 31200 + }, + { + "epoch": 1.16, + "learning_rate": 2.531199475389726e-06, + "loss": 0.1579, + "step": 31230 + }, + { + "epoch": 1.17, + "learning_rate": 2.5299840522957895e-06, + "loss": 0.157, + "step": 31260 + }, + { + "epoch": 1.17, + "learning_rate": 2.5287703783757656e-06, + "loss": 0.1626, + "step": 31290 + }, + { + "epoch": 1.17, + "learning_rate": 2.5275584494381406e-06, + "loss": 0.1753, + "step": 31320 + }, + { + "epoch": 1.17, + "learning_rate": 2.5263482613054506e-06, + "loss": 0.1429, + "step": 31350 + }, + { + "epoch": 1.17, + "learning_rate": 2.525139809814219e-06, + "loss": 0.1551, + "step": 31380 + }, + { + "epoch": 1.17, + "learning_rate": 2.523933090814896e-06, + "loss": 0.1516, + "step": 31410 + }, + { + "epoch": 1.17, + "learning_rate": 2.5227281001718014e-06, + "loss": 0.1365, + "step": 31440 + }, + { + "epoch": 1.17, + "learning_rate": 2.52152483376306e-06, + "loss": 0.1482, + "step": 31470 + }, + { + "epoch": 1.17, + "learning_rate": 2.5203633113534356e-06, + "loss": 0.1979, + "step": 31500 + }, + { + "epoch": 1.18, + "learning_rate": 2.519163423967481e-06, + "loss": 0.1598, + "step": 31530 + }, + { + "epoch": 1.18, + "learning_rate": 2.5179652486683264e-06, + "loss": 0.153, + "step": 31560 + }, + { + "epoch": 1.18, + "learning_rate": 2.5167687813882913e-06, + "loss": 0.1523, + "step": 31590 + }, + { + "epoch": 1.18, + "learning_rate": 2.5155740180732113e-06, + "loss": 0.1552, + "step": 31620 + }, + { + "epoch": 1.18, + "learning_rate": 2.514380954682381e-06, + "loss": 0.1824, + "step": 31650 + }, + { + "epoch": 1.18, + "learning_rate": 2.5131895871884982e-06, + "loss": 0.2012, + "step": 31680 + }, + { + "epoch": 1.18, + "learning_rate": 2.511999911577605e-06, + "loss": 0.2118, + "step": 31710 + }, + { + "epoch": 1.18, + "learning_rate": 2.5108119238490314e-06, + "loss": 0.1636, + "step": 31740 + }, + { + "epoch": 1.18, + "learning_rate": 2.50962562001534e-06, + "loss": 0.153, + "step": 31770 + }, + { + "epoch": 1.19, + "learning_rate": 2.508440996102268e-06, + "loss": 0.1606, + "step": 31800 + }, + { + "epoch": 1.19, + "learning_rate": 2.507258048148672e-06, + "loss": 0.1909, + "step": 31830 + }, + { + "epoch": 1.19, + "learning_rate": 2.506076772206472e-06, + "loss": 0.1504, + "step": 31860 + }, + { + "epoch": 1.19, + "learning_rate": 2.5048971643405985e-06, + "loss": 0.1589, + "step": 31890 + }, + { + "epoch": 1.19, + "learning_rate": 2.5037192206289322e-06, + "loss": 0.1534, + "step": 31920 + }, + { + "epoch": 1.19, + "learning_rate": 2.5025429371622568e-06, + "loss": 0.1991, + "step": 31950 + }, + { + "epoch": 1.19, + "learning_rate": 2.5013683100441947e-06, + "loss": 0.1461, + "step": 31980 + }, + { + "epoch": 1.19, + "learning_rate": 2.5001953353911644e-06, + "loss": 0.1715, + "step": 32010 + }, + { + "epoch": 1.19, + "learning_rate": 2.4990240093323155e-06, + "loss": 0.1519, + "step": 32040 + }, + { + "epoch": 1.2, + "learning_rate": 2.4978543280094823e-06, + "loss": 0.1527, + "step": 32070 + }, + { + "epoch": 1.2, + "learning_rate": 2.4966862875771277e-06, + "loss": 0.1782, + "step": 32100 + }, + { + "epoch": 1.2, + "learning_rate": 2.4955198842022903e-06, + "loss": 0.1457, + "step": 32130 + }, + { + "epoch": 1.2, + "learning_rate": 2.494355114064532e-06, + "loss": 0.1619, + "step": 32160 + }, + { + "epoch": 1.2, + "learning_rate": 2.493191973355886e-06, + "loss": 0.1677, + "step": 32190 + }, + { + "epoch": 1.2, + "learning_rate": 2.4920304582808026e-06, + "loss": 0.185, + "step": 32220 + }, + { + "epoch": 1.2, + "learning_rate": 2.4908705650560973e-06, + "loss": 0.1631, + "step": 32250 + }, + { + "epoch": 1.2, + "learning_rate": 2.489712289910904e-06, + "loss": 0.1768, + "step": 32280 + }, + { + "epoch": 1.21, + "learning_rate": 2.488555629086615e-06, + "loss": 0.1695, + "step": 32310 + }, + { + "epoch": 1.21, + "learning_rate": 2.487400578836838e-06, + "loss": 0.1578, + "step": 32340 + }, + { + "epoch": 1.21, + "learning_rate": 2.4862471354273387e-06, + "loss": 0.1481, + "step": 32370 + }, + { + "epoch": 1.21, + "learning_rate": 2.4851336640235753e-06, + "loss": 0.17, + "step": 32400 + }, + { + "epoch": 1.21, + "learning_rate": 2.483983369886367e-06, + "loss": 0.1522, + "step": 32430 + }, + { + "epoch": 1.21, + "learning_rate": 2.482834671582272e-06, + "loss": 0.1716, + "step": 32460 + }, + { + "epoch": 1.21, + "learning_rate": 2.481687565424798e-06, + "loss": 0.1523, + "step": 32490 + }, + { + "epoch": 1.21, + "learning_rate": 2.4805420477393657e-06, + "loss": 0.1714, + "step": 32520 + }, + { + "epoch": 1.21, + "learning_rate": 2.479398114863255e-06, + "loss": 0.1585, + "step": 32550 + }, + { + "epoch": 1.22, + "learning_rate": 2.4782557631455603e-06, + "loss": 0.1632, + "step": 32580 + }, + { + "epoch": 1.22, + "learning_rate": 2.47711498894714e-06, + "loss": 0.1372, + "step": 32610 + }, + { + "epoch": 1.22, + "learning_rate": 2.4759757886405678e-06, + "loss": 0.1625, + "step": 32640 + }, + { + "epoch": 1.22, + "learning_rate": 2.474838158610084e-06, + "loss": 0.15, + "step": 32670 + }, + { + "epoch": 1.22, + "learning_rate": 2.4737020952515466e-06, + "loss": 0.1996, + "step": 32700 + }, + { + "epoch": 1.22, + "learning_rate": 2.4725675949723856e-06, + "loss": 0.1648, + "step": 32730 + }, + { + "epoch": 1.22, + "learning_rate": 2.471434654191553e-06, + "loss": 0.1608, + "step": 32760 + }, + { + "epoch": 1.22, + "learning_rate": 2.4703032693394766e-06, + "loss": 0.1501, + "step": 32790 + }, + { + "epoch": 1.22, + "learning_rate": 2.4691734368580124e-06, + "loss": 0.2002, + "step": 32820 + }, + { + "epoch": 1.23, + "learning_rate": 2.4680451532003975e-06, + "loss": 0.1717, + "step": 32850 + }, + { + "epoch": 1.23, + "learning_rate": 2.4669184148312046e-06, + "loss": 0.1575, + "step": 32880 + }, + { + "epoch": 1.23, + "learning_rate": 2.465793218226291e-06, + "loss": 0.191, + "step": 32910 + }, + { + "epoch": 1.23, + "learning_rate": 2.4646695598727596e-06, + "loss": 0.156, + "step": 32940 + }, + { + "epoch": 1.23, + "learning_rate": 2.463547436268907e-06, + "loss": 0.1589, + "step": 32970 + }, + { + "epoch": 1.23, + "learning_rate": 2.4624268439241786e-06, + "loss": 0.1618, + "step": 33000 + }, + { + "epoch": 1.23, + "learning_rate": 2.4613077793591255e-06, + "loss": 0.1438, + "step": 33030 + }, + { + "epoch": 1.23, + "learning_rate": 2.460190239105358e-06, + "loss": 0.171, + "step": 33060 + }, + { + "epoch": 1.23, + "learning_rate": 2.459074219705497e-06, + "loss": 0.1617, + "step": 33090 + }, + { + "epoch": 1.24, + "learning_rate": 2.457959717713135e-06, + "loss": 0.1846, + "step": 33120 + }, + { + "epoch": 1.24, + "learning_rate": 2.456846729692788e-06, + "loss": 0.1812, + "step": 33150 + }, + { + "epoch": 1.24, + "learning_rate": 2.4557352522198512e-06, + "loss": 0.1568, + "step": 33180 + }, + { + "epoch": 1.24, + "learning_rate": 2.454625281880554e-06, + "loss": 0.1354, + "step": 33210 + }, + { + "epoch": 1.24, + "learning_rate": 2.4535168152719188e-06, + "loss": 0.185, + "step": 33240 + }, + { + "epoch": 1.24, + "learning_rate": 2.4524098490017136e-06, + "loss": 0.1512, + "step": 33270 + }, + { + "epoch": 1.24, + "learning_rate": 2.4513043796884104e-06, + "loss": 0.1976, + "step": 33300 + }, + { + "epoch": 1.24, + "learning_rate": 2.4502004039611437e-06, + "loss": 0.1595, + "step": 33330 + }, + { + "epoch": 1.24, + "learning_rate": 2.4490979184596634e-06, + "loss": 0.1411, + "step": 33360 + }, + { + "epoch": 1.25, + "learning_rate": 2.447996919834293e-06, + "loss": 0.131, + "step": 33390 + }, + { + "epoch": 1.25, + "learning_rate": 2.44689740474589e-06, + "loss": 0.1429, + "step": 33420 + }, + { + "epoch": 1.25, + "learning_rate": 2.445799369865799e-06, + "loss": 0.1504, + "step": 33450 + }, + { + "epoch": 1.25, + "learning_rate": 2.4447028118758115e-06, + "loss": 0.1438, + "step": 33480 + }, + { + "epoch": 1.25, + "learning_rate": 2.443607727468123e-06, + "loss": 0.1468, + "step": 33510 + }, + { + "epoch": 1.25, + "learning_rate": 2.4425141133452936e-06, + "loss": 0.1323, + "step": 33540 + }, + { + "epoch": 1.25, + "learning_rate": 2.441421966220202e-06, + "loss": 0.1852, + "step": 33570 + }, + { + "epoch": 1.25, + "learning_rate": 2.440331282816008e-06, + "loss": 0.1523, + "step": 33600 + }, + { + "epoch": 1.25, + "learning_rate": 2.4392420598661103e-06, + "loss": 0.1491, + "step": 33630 + }, + { + "epoch": 1.26, + "learning_rate": 2.4381542941141006e-06, + "loss": 0.1689, + "step": 33660 + }, + { + "epoch": 1.26, + "learning_rate": 2.4370679823137313e-06, + "loss": 0.1446, + "step": 33690 + }, + { + "epoch": 1.26, + "learning_rate": 2.4359831212288684e-06, + "loss": 0.1563, + "step": 33720 + }, + { + "epoch": 1.26, + "learning_rate": 2.434899707633453e-06, + "loss": 0.1501, + "step": 33750 + }, + { + "epoch": 1.26, + "learning_rate": 2.4338177383114604e-06, + "loss": 0.1654, + "step": 33780 + }, + { + "epoch": 1.26, + "learning_rate": 2.432737210056862e-06, + "loss": 0.1693, + "step": 33810 + }, + { + "epoch": 1.26, + "learning_rate": 2.4316581196735815e-06, + "loss": 0.16, + "step": 33840 + }, + { + "epoch": 1.26, + "learning_rate": 2.4305804639754597e-06, + "loss": 0.1636, + "step": 33870 + }, + { + "epoch": 1.26, + "learning_rate": 2.4295042397862126e-06, + "loss": 0.1367, + "step": 33900 + }, + { + "epoch": 1.27, + "learning_rate": 2.428429443939391e-06, + "loss": 0.1561, + "step": 33930 + }, + { + "epoch": 1.27, + "learning_rate": 2.427356073278345e-06, + "loss": 0.1421, + "step": 33960 + }, + { + "epoch": 1.27, + "learning_rate": 2.42628412465618e-06, + "loss": 0.1553, + "step": 33990 + }, + { + "epoch": 1.27, + "learning_rate": 2.425213594935723e-06, + "loss": 0.2056, + "step": 34020 + }, + { + "epoch": 1.27, + "learning_rate": 2.424144480989481e-06, + "loss": 0.1346, + "step": 34050 + }, + { + "epoch": 1.27, + "learning_rate": 2.423076779699603e-06, + "loss": 0.1716, + "step": 34080 + }, + { + "epoch": 1.27, + "learning_rate": 2.422010487957844e-06, + "loss": 0.1501, + "step": 34110 + }, + { + "epoch": 1.27, + "learning_rate": 2.420945602665522e-06, + "loss": 0.1449, + "step": 34140 + }, + { + "epoch": 1.27, + "learning_rate": 2.419882120733486e-06, + "loss": 0.1514, + "step": 34170 + }, + { + "epoch": 1.28, + "learning_rate": 2.4188200390820758e-06, + "loss": 0.1622, + "step": 34200 + }, + { + "epoch": 1.28, + "learning_rate": 2.4177593546410825e-06, + "loss": 0.1641, + "step": 34230 + }, + { + "epoch": 1.28, + "learning_rate": 2.4167000643497152e-06, + "loss": 0.1417, + "step": 34260 + }, + { + "epoch": 1.28, + "learning_rate": 2.4156421651565615e-06, + "loss": 0.1399, + "step": 34290 + }, + { + "epoch": 1.28, + "learning_rate": 2.4145856540195495e-06, + "loss": 0.1595, + "step": 34320 + }, + { + "epoch": 1.28, + "learning_rate": 2.413530527905915e-06, + "loss": 0.155, + "step": 34350 + }, + { + "epoch": 1.28, + "learning_rate": 2.4124767837921604e-06, + "loss": 0.1723, + "step": 34380 + }, + { + "epoch": 1.28, + "learning_rate": 2.411424418664022e-06, + "loss": 0.1505, + "step": 34410 + }, + { + "epoch": 1.28, + "learning_rate": 2.4103734295164312e-06, + "loss": 0.1443, + "step": 34440 + }, + { + "epoch": 1.29, + "learning_rate": 2.40932381335348e-06, + "loss": 0.1555, + "step": 34470 + }, + { + "epoch": 1.29, + "learning_rate": 2.408275567188384e-06, + "loss": 0.1472, + "step": 34500 + }, + { + "epoch": 1.29, + "learning_rate": 2.407228688043447e-06, + "loss": 0.1674, + "step": 34530 + }, + { + "epoch": 1.29, + "learning_rate": 2.4061831729500282e-06, + "loss": 0.1735, + "step": 34560 + }, + { + "epoch": 1.29, + "learning_rate": 2.4051390189485015e-06, + "loss": 0.1368, + "step": 34590 + }, + { + "epoch": 1.29, + "learning_rate": 2.404096223088225e-06, + "loss": 0.1499, + "step": 34620 + }, + { + "epoch": 1.29, + "learning_rate": 2.403054782427503e-06, + "loss": 0.171, + "step": 34650 + }, + { + "epoch": 1.29, + "learning_rate": 2.4020146940335533e-06, + "loss": 0.1371, + "step": 34680 + }, + { + "epoch": 1.29, + "learning_rate": 2.400975954982471e-06, + "loss": 0.1348, + "step": 34710 + }, + { + "epoch": 1.3, + "learning_rate": 2.3999385623591958e-06, + "loss": 0.1607, + "step": 34740 + }, + { + "epoch": 1.3, + "learning_rate": 2.3989025132574736e-06, + "loss": 0.1845, + "step": 34770 + }, + { + "epoch": 1.3, + "learning_rate": 2.3978678047798286e-06, + "loss": 0.1799, + "step": 34800 + }, + { + "epoch": 1.3, + "learning_rate": 2.396834434037523e-06, + "loss": 0.1409, + "step": 34830 + }, + { + "epoch": 1.3, + "learning_rate": 2.3958023981505267e-06, + "loss": 0.1556, + "step": 34860 + }, + { + "epoch": 1.3, + "learning_rate": 2.3947716942474835e-06, + "loss": 0.161, + "step": 34890 + }, + { + "epoch": 1.3, + "learning_rate": 2.3937423194656766e-06, + "loss": 0.1834, + "step": 34920 + }, + { + "epoch": 1.3, + "learning_rate": 2.3927142709509947e-06, + "loss": 0.1527, + "step": 34950 + }, + { + "epoch": 1.3, + "learning_rate": 2.3916875458579e-06, + "loss": 0.1612, + "step": 34980 + }, + { + "epoch": 1.31, + "learning_rate": 2.3906621413493943e-06, + "loss": 0.1413, + "step": 35010 + }, + { + "epoch": 1.31, + "learning_rate": 2.3896380545969873e-06, + "loss": 0.2112, + "step": 35040 + }, + { + "epoch": 1.31, + "learning_rate": 2.388615282780661e-06, + "loss": 0.1706, + "step": 35070 + }, + { + "epoch": 1.31, + "learning_rate": 2.3875938230888414e-06, + "loss": 0.1461, + "step": 35100 + }, + { + "epoch": 1.31, + "learning_rate": 2.386573672718362e-06, + "loss": 0.1477, + "step": 35130 + }, + { + "epoch": 1.31, + "learning_rate": 2.385554828874434e-06, + "loss": 0.1657, + "step": 35160 + }, + { + "epoch": 1.31, + "learning_rate": 2.384537288770612e-06, + "loss": 0.1558, + "step": 35190 + }, + { + "epoch": 1.31, + "learning_rate": 2.3835210496287646e-06, + "loss": 0.1707, + "step": 35220 + }, + { + "epoch": 1.31, + "learning_rate": 2.3825061086790407e-06, + "loss": 0.1314, + "step": 35250 + }, + { + "epoch": 1.32, + "learning_rate": 2.3814924631598384e-06, + "loss": 0.1539, + "step": 35280 + }, + { + "epoch": 1.32, + "learning_rate": 2.3804801103177737e-06, + "loss": 0.2101, + "step": 35310 + }, + { + "epoch": 1.32, + "learning_rate": 2.3794690474076476e-06, + "loss": 0.2028, + "step": 35340 + }, + { + "epoch": 1.32, + "learning_rate": 2.3784592716924168e-06, + "loss": 0.1903, + "step": 35370 + }, + { + "epoch": 1.32, + "learning_rate": 2.377450780443162e-06, + "loss": 0.1499, + "step": 35400 + }, + { + "epoch": 1.32, + "learning_rate": 2.3764435709390556e-06, + "loss": 0.1607, + "step": 35430 + }, + { + "epoch": 1.32, + "learning_rate": 2.3754376404673334e-06, + "loss": 0.1493, + "step": 35460 + }, + { + "epoch": 1.32, + "learning_rate": 2.3744329863232613e-06, + "loss": 0.1685, + "step": 35490 + }, + { + "epoch": 1.32, + "learning_rate": 2.3734296058101067e-06, + "loss": 0.1396, + "step": 35520 + }, + { + "epoch": 1.33, + "learning_rate": 2.372427496239106e-06, + "loss": 0.148, + "step": 35550 + }, + { + "epoch": 1.33, + "learning_rate": 2.371426654929437e-06, + "loss": 0.1632, + "step": 35580 + }, + { + "epoch": 1.33, + "learning_rate": 2.3704270792081874e-06, + "loss": 0.1703, + "step": 35610 + }, + { + "epoch": 1.33, + "learning_rate": 2.3694287664103236e-06, + "loss": 0.1319, + "step": 35640 + }, + { + "epoch": 1.33, + "learning_rate": 2.368431713878663e-06, + "loss": 0.1504, + "step": 35670 + }, + { + "epoch": 1.33, + "learning_rate": 2.3674359189638443e-06, + "loss": 0.1268, + "step": 35700 + }, + { + "epoch": 1.33, + "learning_rate": 2.3664413790242944e-06, + "loss": 0.2007, + "step": 35730 + }, + { + "epoch": 1.33, + "learning_rate": 2.3654480914262044e-06, + "loss": 0.1817, + "step": 35760 + }, + { + "epoch": 1.33, + "learning_rate": 2.3644560535434954e-06, + "loss": 0.1459, + "step": 35790 + }, + { + "epoch": 1.34, + "learning_rate": 2.3634652627577933e-06, + "loss": 0.1827, + "step": 35820 + }, + { + "epoch": 1.34, + "learning_rate": 2.3624757164583963e-06, + "loss": 0.181, + "step": 35850 + }, + { + "epoch": 1.34, + "learning_rate": 2.3614874120422486e-06, + "loss": 0.1485, + "step": 35880 + }, + { + "epoch": 1.34, + "learning_rate": 2.36050034691391e-06, + "loss": 0.1573, + "step": 35910 + }, + { + "epoch": 1.34, + "learning_rate": 2.3595145184855286e-06, + "loss": 0.15, + "step": 35940 + }, + { + "epoch": 1.34, + "learning_rate": 2.35852992417681e-06, + "loss": 0.2165, + "step": 35970 + }, + { + "epoch": 1.34, + "learning_rate": 2.357546561414994e-06, + "loss": 0.1448, + "step": 36000 + }, + { + "epoch": 1.34, + "learning_rate": 2.3565644276348197e-06, + "loss": 0.1556, + "step": 36030 + }, + { + "epoch": 1.34, + "learning_rate": 2.3555835202785018e-06, + "loss": 0.1416, + "step": 36060 + }, + { + "epoch": 1.35, + "learning_rate": 2.354603836795701e-06, + "loss": 0.1705, + "step": 36090 + }, + { + "epoch": 1.35, + "learning_rate": 2.3536253746434974e-06, + "loss": 0.1615, + "step": 36120 + }, + { + "epoch": 1.35, + "learning_rate": 2.352648131286362e-06, + "loss": 0.1561, + "step": 36150 + }, + { + "epoch": 1.35, + "learning_rate": 2.3516721041961274e-06, + "loss": 0.1637, + "step": 36180 + }, + { + "epoch": 1.35, + "learning_rate": 2.3506972908519647e-06, + "loss": 0.1616, + "step": 36210 + }, + { + "epoch": 1.35, + "learning_rate": 2.3497236887403514e-06, + "loss": 0.1653, + "step": 36240 + }, + { + "epoch": 1.35, + "learning_rate": 2.348751295355046e-06, + "loss": 0.1632, + "step": 36270 + }, + { + "epoch": 1.35, + "learning_rate": 2.347780108197064e-06, + "loss": 0.1437, + "step": 36300 + }, + { + "epoch": 1.35, + "learning_rate": 2.3468101247746447e-06, + "loss": 0.1745, + "step": 36330 + }, + { + "epoch": 1.36, + "learning_rate": 2.3458413426032292e-06, + "loss": 0.1505, + "step": 36360 + }, + { + "epoch": 1.36, + "learning_rate": 2.3448737592054326e-06, + "loss": 0.1477, + "step": 36390 + }, + { + "epoch": 1.36, + "learning_rate": 2.3439073721110166e-06, + "loss": 0.1484, + "step": 36420 + }, + { + "epoch": 1.36, + "learning_rate": 2.3429421788568627e-06, + "loss": 0.149, + "step": 36450 + }, + { + "epoch": 1.36, + "learning_rate": 2.3419781769869478e-06, + "loss": 0.138, + "step": 36480 + }, + { + "epoch": 1.36, + "learning_rate": 2.341015364052316e-06, + "loss": 0.1717, + "step": 36510 + }, + { + "epoch": 1.36, + "learning_rate": 2.3400537376110534e-06, + "loss": 0.1377, + "step": 36540 + }, + { + "epoch": 1.36, + "learning_rate": 2.3390932952282607e-06, + "loss": 0.1655, + "step": 36570 + }, + { + "epoch": 1.37, + "learning_rate": 2.33813403447603e-06, + "loss": 0.1842, + "step": 36600 + }, + { + "epoch": 1.37, + "learning_rate": 2.3371759529334183e-06, + "loss": 0.1865, + "step": 36630 + }, + { + "epoch": 1.37, + "learning_rate": 2.336219048186418e-06, + "loss": 0.1335, + "step": 36660 + }, + { + "epoch": 1.37, + "learning_rate": 2.3352633178279355e-06, + "loss": 0.1753, + "step": 36690 + }, + { + "epoch": 1.37, + "learning_rate": 2.3343087594577666e-06, + "loss": 0.176, + "step": 36720 + }, + { + "epoch": 1.37, + "learning_rate": 2.3333553706825667e-06, + "loss": 0.1251, + "step": 36750 + }, + { + "epoch": 1.37, + "learning_rate": 2.33240314911583e-06, + "loss": 0.162, + "step": 36780 + }, + { + "epoch": 1.37, + "learning_rate": 2.3314520923778613e-06, + "loss": 0.144, + "step": 36810 + }, + { + "epoch": 1.37, + "learning_rate": 2.330533842535043e-06, + "loss": 0.1405, + "step": 36840 + }, + { + "epoch": 1.38, + "learning_rate": 2.329585069711041e-06, + "loss": 0.1436, + "step": 36870 + }, + { + "epoch": 1.38, + "learning_rate": 2.328637454695883e-06, + "loss": 0.1458, + "step": 36900 + }, + { + "epoch": 1.38, + "learning_rate": 2.32769099513665e-06, + "loss": 0.1645, + "step": 36930 + }, + { + "epoch": 1.38, + "learning_rate": 2.32674568868711e-06, + "loss": 0.1415, + "step": 36960 + }, + { + "epoch": 1.38, + "learning_rate": 2.3258015330076976e-06, + "loss": 0.126, + "step": 36990 + }, + { + "epoch": 1.38, + "learning_rate": 2.324858525765487e-06, + "loss": 0.1576, + "step": 37020 + }, + { + "epoch": 1.38, + "learning_rate": 2.3239166646341674e-06, + "loss": 0.143, + "step": 37050 + }, + { + "epoch": 1.38, + "learning_rate": 2.322975947294021e-06, + "loss": 0.1905, + "step": 37080 + }, + { + "epoch": 1.38, + "learning_rate": 2.3220363714318965e-06, + "loss": 0.1584, + "step": 37110 + }, + { + "epoch": 1.39, + "learning_rate": 2.3210979347411877e-06, + "loss": 0.1583, + "step": 37140 + }, + { + "epoch": 1.39, + "learning_rate": 2.320160634921808e-06, + "loss": 0.1629, + "step": 37170 + }, + { + "epoch": 1.39, + "learning_rate": 2.3192244696801673e-06, + "loss": 0.1418, + "step": 37200 + }, + { + "epoch": 1.39, + "learning_rate": 2.318289436729148e-06, + "loss": 0.1481, + "step": 37230 + }, + { + "epoch": 1.39, + "learning_rate": 2.3173555337880833e-06, + "loss": 0.1596, + "step": 37260 + }, + { + "epoch": 1.39, + "learning_rate": 2.3164227585827304e-06, + "loss": 0.1332, + "step": 37290 + }, + { + "epoch": 1.39, + "learning_rate": 2.3154911088452513e-06, + "loss": 0.1482, + "step": 37320 + }, + { + "epoch": 1.39, + "learning_rate": 2.314560582314186e-06, + "loss": 0.1413, + "step": 37350 + }, + { + "epoch": 1.39, + "learning_rate": 2.313631176734432e-06, + "loss": 0.1742, + "step": 37380 + }, + { + "epoch": 1.4, + "learning_rate": 2.3127028898572203e-06, + "loss": 0.1686, + "step": 37410 + }, + { + "epoch": 1.4, + "learning_rate": 2.311775719440093e-06, + "loss": 0.1497, + "step": 37440 + }, + { + "epoch": 1.4, + "learning_rate": 2.310849663246879e-06, + "loss": 0.1518, + "step": 37470 + }, + { + "epoch": 1.4, + "learning_rate": 2.309924719047674e-06, + "loss": 0.1568, + "step": 37500 + }, + { + "epoch": 1.4, + "learning_rate": 2.3090008846188165e-06, + "loss": 0.1332, + "step": 37530 + }, + { + "epoch": 1.4, + "learning_rate": 2.308078157742863e-06, + "loss": 0.1531, + "step": 37560 + }, + { + "epoch": 1.4, + "learning_rate": 2.307156536208571e-06, + "loss": 0.1472, + "step": 37590 + }, + { + "epoch": 1.4, + "learning_rate": 2.3062360178108724e-06, + "loss": 0.1369, + "step": 37620 + }, + { + "epoch": 1.4, + "learning_rate": 2.3053166003508527e-06, + "loss": 0.1482, + "step": 37650 + }, + { + "epoch": 1.41, + "learning_rate": 2.30439828163573e-06, + "loss": 0.149, + "step": 37680 + }, + { + "epoch": 1.41, + "learning_rate": 2.3034810594788295e-06, + "loss": 0.1617, + "step": 37710 + }, + { + "epoch": 1.41, + "learning_rate": 2.3025649316995668e-06, + "loss": 0.1573, + "step": 37740 + }, + { + "epoch": 1.41, + "learning_rate": 2.3016498961234214e-06, + "loss": 0.1497, + "step": 37770 + }, + { + "epoch": 1.41, + "learning_rate": 2.300735950581918e-06, + "loss": 0.1744, + "step": 37800 + }, + { + "epoch": 1.41, + "learning_rate": 2.2998230929126046e-06, + "loss": 0.139, + "step": 37830 + }, + { + "epoch": 1.41, + "learning_rate": 2.29891132095903e-06, + "loss": 0.1647, + "step": 37860 + }, + { + "epoch": 1.41, + "learning_rate": 2.2980006325707215e-06, + "loss": 0.1512, + "step": 37890 + }, + { + "epoch": 1.41, + "learning_rate": 2.2970910256031658e-06, + "loss": 0.1802, + "step": 37920 + }, + { + "epoch": 1.42, + "learning_rate": 2.296182497917788e-06, + "loss": 0.1654, + "step": 37950 + }, + { + "epoch": 1.42, + "learning_rate": 2.2952750473819264e-06, + "loss": 0.1639, + "step": 37980 + }, + { + "epoch": 1.42, + "learning_rate": 2.2943686718688178e-06, + "loss": 0.153, + "step": 38010 + }, + { + "epoch": 1.42, + "learning_rate": 2.293463369257572e-06, + "loss": 0.1656, + "step": 38040 + }, + { + "epoch": 1.42, + "learning_rate": 2.2925591374331504e-06, + "loss": 0.1373, + "step": 38070 + }, + { + "epoch": 1.42, + "learning_rate": 2.291655974286349e-06, + "loss": 0.1618, + "step": 38100 + }, + { + "epoch": 1.42, + "learning_rate": 2.2907538777137752e-06, + "loss": 0.1632, + "step": 38130 + }, + { + "epoch": 1.42, + "learning_rate": 2.2898528456178286e-06, + "loss": 0.148, + "step": 38160 + }, + { + "epoch": 1.42, + "learning_rate": 2.288952875906677e-06, + "loss": 0.1529, + "step": 38190 + }, + { + "epoch": 1.43, + "learning_rate": 2.2880539664942414e-06, + "loss": 0.1352, + "step": 38220 + }, + { + "epoch": 1.43, + "learning_rate": 2.287156115300173e-06, + "loss": 0.1447, + "step": 38250 + }, + { + "epoch": 1.43, + "learning_rate": 2.2862593202498297e-06, + "loss": 0.194, + "step": 38280 + }, + { + "epoch": 1.43, + "learning_rate": 2.2853635792742636e-06, + "loss": 0.1346, + "step": 38310 + }, + { + "epoch": 1.43, + "learning_rate": 2.284468890310193e-06, + "loss": 0.1342, + "step": 38340 + }, + { + "epoch": 1.43, + "learning_rate": 2.2835752512999883e-06, + "loss": 0.1408, + "step": 38370 + }, + { + "epoch": 1.43, + "learning_rate": 2.282682660191648e-06, + "loss": 0.1545, + "step": 38400 + }, + { + "epoch": 1.43, + "learning_rate": 2.2817911149387828e-06, + "loss": 0.1597, + "step": 38430 + }, + { + "epoch": 1.43, + "learning_rate": 2.2809006135005923e-06, + "loss": 0.1433, + "step": 38460 + }, + { + "epoch": 1.44, + "learning_rate": 2.280011153841847e-06, + "loss": 0.14, + "step": 38490 + }, + { + "epoch": 1.44, + "learning_rate": 2.2791227339328693e-06, + "loss": 0.1647, + "step": 38520 + }, + { + "epoch": 1.44, + "learning_rate": 2.2782353517495133e-06, + "loss": 0.1659, + "step": 38550 + }, + { + "epoch": 1.44, + "learning_rate": 2.2773490052731452e-06, + "loss": 0.1298, + "step": 38580 + }, + { + "epoch": 1.44, + "learning_rate": 2.2764636924906245e-06, + "loss": 0.1773, + "step": 38610 + }, + { + "epoch": 1.44, + "learning_rate": 2.27560887083038e-06, + "loss": 0.1642, + "step": 38640 + }, + { + "epoch": 1.44, + "learning_rate": 2.2747255851274107e-06, + "loss": 0.1638, + "step": 38670 + }, + { + "epoch": 1.44, + "learning_rate": 2.2738433271781068e-06, + "loss": 0.1954, + "step": 38700 + }, + { + "epoch": 1.44, + "learning_rate": 2.272962094990928e-06, + "loss": 0.1598, + "step": 38730 + }, + { + "epoch": 1.45, + "learning_rate": 2.272081886579736e-06, + "loss": 0.1741, + "step": 38760 + }, + { + "epoch": 1.45, + "learning_rate": 2.2712026999637678e-06, + "loss": 0.1589, + "step": 38790 + }, + { + "epoch": 1.45, + "learning_rate": 2.270324533167624e-06, + "loss": 0.1637, + "step": 38820 + }, + { + "epoch": 1.45, + "learning_rate": 2.2694473842212473e-06, + "loss": 0.197, + "step": 38850 + }, + { + "epoch": 1.45, + "learning_rate": 2.2685712511599043e-06, + "loss": 0.1334, + "step": 38880 + }, + { + "epoch": 1.45, + "learning_rate": 2.2676961320241665e-06, + "loss": 0.1649, + "step": 38910 + }, + { + "epoch": 1.45, + "learning_rate": 2.266822024859894e-06, + "loss": 0.1752, + "step": 38940 + }, + { + "epoch": 1.45, + "learning_rate": 2.2659489277182127e-06, + "loss": 0.1423, + "step": 38970 + }, + { + "epoch": 1.45, + "learning_rate": 2.2650768386555003e-06, + "loss": 0.172, + "step": 39000 + }, + { + "epoch": 1.46, + "learning_rate": 2.264205755733369e-06, + "loss": 0.1372, + "step": 39030 + }, + { + "epoch": 1.46, + "learning_rate": 2.2633356770186404e-06, + "loss": 0.1646, + "step": 39060 + }, + { + "epoch": 1.46, + "learning_rate": 2.2624666005833367e-06, + "loss": 0.1595, + "step": 39090 + }, + { + "epoch": 1.46, + "learning_rate": 2.2615985245046557e-06, + "loss": 0.166, + "step": 39120 + }, + { + "epoch": 1.46, + "learning_rate": 2.2607314468649563e-06, + "loss": 0.1402, + "step": 39150 + }, + { + "epoch": 1.46, + "learning_rate": 2.25986536575174e-06, + "loss": 0.1519, + "step": 39180 + }, + { + "epoch": 1.46, + "learning_rate": 2.259000279257632e-06, + "loss": 0.1585, + "step": 39210 + }, + { + "epoch": 1.46, + "learning_rate": 2.2581361854803667e-06, + "loss": 0.1605, + "step": 39240 + }, + { + "epoch": 1.46, + "learning_rate": 2.2572730825227662e-06, + "loss": 0.139, + "step": 39270 + }, + { + "epoch": 1.47, + "learning_rate": 2.256410968492726e-06, + "loss": 0.1307, + "step": 39300 + }, + { + "epoch": 1.47, + "learning_rate": 2.2555498415031953e-06, + "loss": 0.1381, + "step": 39330 + }, + { + "epoch": 1.47, + "learning_rate": 2.2546896996721607e-06, + "loss": 0.1494, + "step": 39360 + }, + { + "epoch": 1.47, + "learning_rate": 2.2538305411226297e-06, + "loss": 0.1582, + "step": 39390 + }, + { + "epoch": 1.47, + "learning_rate": 2.2529723639826117e-06, + "loss": 0.1613, + "step": 39420 + }, + { + "epoch": 1.47, + "learning_rate": 2.2521151663851024e-06, + "loss": 0.166, + "step": 39450 + }, + { + "epoch": 1.47, + "learning_rate": 2.251258946468066e-06, + "loss": 0.1548, + "step": 39480 + }, + { + "epoch": 1.47, + "learning_rate": 2.2504037023744184e-06, + "loss": 0.1402, + "step": 39510 + }, + { + "epoch": 1.47, + "learning_rate": 2.2495494322520092e-06, + "loss": 0.1515, + "step": 39540 + }, + { + "epoch": 1.48, + "learning_rate": 2.248696134253608e-06, + "loss": 0.1395, + "step": 39570 + }, + { + "epoch": 1.48, + "learning_rate": 2.247843806536884e-06, + "loss": 0.1625, + "step": 39600 + }, + { + "epoch": 1.48, + "learning_rate": 2.2469924472643904e-06, + "loss": 0.147, + "step": 39630 + }, + { + "epoch": 1.48, + "learning_rate": 2.2461420546035494e-06, + "loss": 0.1336, + "step": 39660 + }, + { + "epoch": 1.48, + "learning_rate": 2.245292626726635e-06, + "loss": 0.1217, + "step": 39690 + }, + { + "epoch": 1.48, + "learning_rate": 2.2444441618107533e-06, + "loss": 0.1453, + "step": 39720 + }, + { + "epoch": 1.48, + "learning_rate": 2.243596658037831e-06, + "loss": 0.1382, + "step": 39750 + }, + { + "epoch": 1.48, + "learning_rate": 2.2427501135945952e-06, + "loss": 0.1632, + "step": 39780 + }, + { + "epoch": 1.48, + "learning_rate": 2.2419045266725605e-06, + "loss": 0.1615, + "step": 39810 + }, + { + "epoch": 1.49, + "learning_rate": 2.241059895468009e-06, + "loss": 0.1506, + "step": 39840 + }, + { + "epoch": 1.49, + "learning_rate": 2.2402162181819777e-06, + "loss": 0.1484, + "step": 39870 + }, + { + "epoch": 1.49, + "learning_rate": 2.2393734930202386e-06, + "loss": 0.1555, + "step": 39900 + }, + { + "epoch": 1.49, + "learning_rate": 2.238531718193287e-06, + "loss": 0.1427, + "step": 39930 + }, + { + "epoch": 1.49, + "learning_rate": 2.2376908919163214e-06, + "loss": 0.1651, + "step": 39960 + }, + { + "epoch": 1.49, + "learning_rate": 2.236851012409231e-06, + "loss": 0.1755, + "step": 39990 + }, + { + "epoch": 1.49, + "learning_rate": 2.236012077896579e-06, + "loss": 0.1369, + "step": 40020 + }, + { + "epoch": 1.49, + "learning_rate": 2.235174086607584e-06, + "loss": 0.175, + "step": 40050 + }, + { + "epoch": 1.49, + "learning_rate": 2.2343370367761084e-06, + "loss": 0.1441, + "step": 40080 + }, + { + "epoch": 1.5, + "learning_rate": 2.2335009266406403e-06, + "loss": 0.1731, + "step": 40110 + }, + { + "epoch": 1.5, + "learning_rate": 2.2326657544442797e-06, + "loss": 0.1675, + "step": 40140 + }, + { + "epoch": 1.5, + "learning_rate": 2.231831518434719e-06, + "loss": 0.1612, + "step": 40170 + }, + { + "epoch": 1.5, + "learning_rate": 2.2309982168642326e-06, + "loss": 0.1538, + "step": 40200 + }, + { + "epoch": 1.5, + "learning_rate": 2.2301658479896607e-06, + "loss": 0.1631, + "step": 40230 + }, + { + "epoch": 1.5, + "learning_rate": 2.229334410072389e-06, + "loss": 0.155, + "step": 40260 + }, + { + "epoch": 1.5, + "learning_rate": 2.2285039013783404e-06, + "loss": 0.135, + "step": 40290 + }, + { + "epoch": 1.5, + "learning_rate": 2.2276743201779543e-06, + "loss": 0.1491, + "step": 40320 + }, + { + "epoch": 1.5, + "learning_rate": 2.2268456647461743e-06, + "loss": 0.1364, + "step": 40350 + }, + { + "epoch": 1.51, + "learning_rate": 2.226017933362433e-06, + "loss": 0.1611, + "step": 40380 + }, + { + "epoch": 1.51, + "learning_rate": 2.2251911243106365e-06, + "loss": 0.1172, + "step": 40410 + }, + { + "epoch": 1.51, + "learning_rate": 2.224365235879149e-06, + "loss": 0.1509, + "step": 40440 + }, + { + "epoch": 1.51, + "learning_rate": 2.2235402663607773e-06, + "loss": 0.1661, + "step": 40470 + }, + { + "epoch": 1.51, + "learning_rate": 2.2227162140527596e-06, + "loss": 0.15, + "step": 40500 + }, + { + "epoch": 1.51, + "learning_rate": 2.2218930772567463e-06, + "loss": 0.1425, + "step": 40530 + }, + { + "epoch": 1.51, + "learning_rate": 2.221070854278788e-06, + "loss": 0.1721, + "step": 40560 + }, + { + "epoch": 1.51, + "learning_rate": 2.2202495434293184e-06, + "loss": 0.1377, + "step": 40590 + }, + { + "epoch": 1.51, + "learning_rate": 2.219429143023144e-06, + "loss": 0.1455, + "step": 40620 + }, + { + "epoch": 1.52, + "learning_rate": 2.2186096513794256e-06, + "loss": 0.1348, + "step": 40650 + }, + { + "epoch": 1.52, + "learning_rate": 2.2177910668216645e-06, + "loss": 0.1614, + "step": 40680 + }, + { + "epoch": 1.52, + "learning_rate": 2.2169733876776897e-06, + "loss": 0.1431, + "step": 40710 + }, + { + "epoch": 1.52, + "learning_rate": 2.2161566122796414e-06, + "loss": 0.1521, + "step": 40740 + }, + { + "epoch": 1.52, + "learning_rate": 2.2153407389639607e-06, + "loss": 0.175, + "step": 40770 + }, + { + "epoch": 1.52, + "learning_rate": 2.21452576607137e-06, + "loss": 0.1389, + "step": 40800 + }, + { + "epoch": 1.52, + "learning_rate": 2.213711691946862e-06, + "loss": 0.1679, + "step": 40830 + }, + { + "epoch": 1.52, + "learning_rate": 2.212898514939685e-06, + "loss": 0.1289, + "step": 40860 + }, + { + "epoch": 1.53, + "learning_rate": 2.212086233403329e-06, + "loss": 0.1763, + "step": 40890 + }, + { + "epoch": 1.53, + "learning_rate": 2.2112748456955123e-06, + "loss": 0.1648, + "step": 40920 + }, + { + "epoch": 1.53, + "learning_rate": 2.2104643501781657e-06, + "loss": 0.1724, + "step": 40950 + }, + { + "epoch": 1.53, + "learning_rate": 2.20965474521742e-06, + "loss": 0.1341, + "step": 40980 + }, + { + "epoch": 1.53, + "learning_rate": 2.208846029183593e-06, + "loss": 0.155, + "step": 41010 + }, + { + "epoch": 1.53, + "learning_rate": 2.2080382004511725e-06, + "loss": 0.1591, + "step": 41040 + }, + { + "epoch": 1.53, + "learning_rate": 2.207231257398806e-06, + "loss": 0.1678, + "step": 41070 + }, + { + "epoch": 1.53, + "learning_rate": 2.2064251984092854e-06, + "loss": 0.145, + "step": 41100 + }, + { + "epoch": 1.53, + "learning_rate": 2.2056200218695346e-06, + "loss": 0.1617, + "step": 41130 + }, + { + "epoch": 1.54, + "learning_rate": 2.2048157261705934e-06, + "loss": 0.1493, + "step": 41160 + }, + { + "epoch": 1.54, + "learning_rate": 2.204012309707607e-06, + "loss": 0.1345, + "step": 41190 + }, + { + "epoch": 1.54, + "learning_rate": 2.203209770879811e-06, + "loss": 0.1389, + "step": 41220 + }, + { + "epoch": 1.54, + "learning_rate": 2.202408108090517e-06, + "loss": 0.1593, + "step": 41250 + }, + { + "epoch": 1.54, + "learning_rate": 2.2016073197471015e-06, + "loss": 0.1582, + "step": 41280 + }, + { + "epoch": 1.54, + "learning_rate": 2.200807404260992e-06, + "loss": 0.1529, + "step": 41310 + }, + { + "epoch": 1.54, + "learning_rate": 2.2000083600476527e-06, + "loss": 0.1601, + "step": 41340 + }, + { + "epoch": 1.54, + "learning_rate": 2.199210185526571e-06, + "loss": 0.1604, + "step": 41370 + }, + { + "epoch": 1.54, + "learning_rate": 2.198412879121248e-06, + "loss": 0.1478, + "step": 41400 + }, + { + "epoch": 1.55, + "learning_rate": 2.1976164392591793e-06, + "loss": 0.1689, + "step": 41430 + }, + { + "epoch": 1.55, + "learning_rate": 2.1968208643718476e-06, + "loss": 0.1644, + "step": 41460 + }, + { + "epoch": 1.55, + "learning_rate": 2.196026152894708e-06, + "loss": 0.1742, + "step": 41490 + }, + { + "epoch": 1.55, + "learning_rate": 2.195232303267173e-06, + "loss": 0.1596, + "step": 41520 + }, + { + "epoch": 1.55, + "learning_rate": 2.194439313932602e-06, + "loss": 0.1316, + "step": 41550 + }, + { + "epoch": 1.55, + "learning_rate": 2.193647183338287e-06, + "loss": 0.1481, + "step": 41580 + }, + { + "epoch": 1.55, + "learning_rate": 2.1928559099354423e-06, + "loss": 0.1735, + "step": 41610 + }, + { + "epoch": 1.55, + "learning_rate": 2.1920654921791885e-06, + "loss": 0.1617, + "step": 41640 + }, + { + "epoch": 1.55, + "learning_rate": 2.191275928528542e-06, + "loss": 0.15, + "step": 41670 + }, + { + "epoch": 1.56, + "learning_rate": 2.190487217446401e-06, + "loss": 0.1473, + "step": 41700 + }, + { + "epoch": 1.56, + "learning_rate": 2.189699357399535e-06, + "loss": 0.1206, + "step": 41730 + }, + { + "epoch": 1.56, + "learning_rate": 2.188912346858569e-06, + "loss": 0.1352, + "step": 41760 + }, + { + "epoch": 1.56, + "learning_rate": 2.188126184297976e-06, + "loss": 0.173, + "step": 41790 + }, + { + "epoch": 1.56, + "learning_rate": 2.1873408681960583e-06, + "loss": 0.1624, + "step": 41820 + }, + { + "epoch": 1.56, + "learning_rate": 2.1865563970349406e-06, + "loss": 0.1448, + "step": 41850 + }, + { + "epoch": 1.56, + "learning_rate": 2.185772769300554e-06, + "loss": 0.1501, + "step": 41880 + }, + { + "epoch": 1.56, + "learning_rate": 2.1849899834826275e-06, + "loss": 0.1603, + "step": 41910 + }, + { + "epoch": 1.56, + "learning_rate": 2.184208038074671e-06, + "loss": 0.1447, + "step": 41940 + }, + { + "epoch": 1.57, + "learning_rate": 2.1834269315739657e-06, + "loss": 0.1551, + "step": 41970 + }, + { + "epoch": 1.57, + "learning_rate": 2.182646662481554e-06, + "loss": 0.1428, + "step": 42000 + }, + { + "epoch": 1.57, + "learning_rate": 2.1818672293022237e-06, + "loss": 0.1594, + "step": 42030 + }, + { + "epoch": 1.57, + "learning_rate": 2.1810886305444976e-06, + "loss": 0.1461, + "step": 42060 + }, + { + "epoch": 1.57, + "learning_rate": 2.180310864720622e-06, + "loss": 0.1699, + "step": 42090 + }, + { + "epoch": 1.57, + "learning_rate": 2.1795339303465547e-06, + "loss": 0.158, + "step": 42120 + }, + { + "epoch": 1.57, + "learning_rate": 2.178757825941951e-06, + "loss": 0.1227, + "step": 42150 + }, + { + "epoch": 1.57, + "learning_rate": 2.177982550030154e-06, + "loss": 0.1795, + "step": 42180 + }, + { + "epoch": 1.57, + "learning_rate": 2.177208101138184e-06, + "loss": 0.1337, + "step": 42210 + }, + { + "epoch": 1.58, + "learning_rate": 2.1764344777967235e-06, + "loss": 0.1626, + "step": 42240 + }, + { + "epoch": 1.58, + "learning_rate": 2.1756616785401066e-06, + "loss": 0.129, + "step": 42270 + }, + { + "epoch": 1.58, + "learning_rate": 2.1748897019063095e-06, + "loss": 0.1494, + "step": 42300 + }, + { + "epoch": 1.58, + "learning_rate": 2.1741185464369354e-06, + "loss": 0.1537, + "step": 42330 + }, + { + "epoch": 1.58, + "learning_rate": 2.1733482106772056e-06, + "loss": 0.1555, + "step": 42360 + }, + { + "epoch": 1.58, + "learning_rate": 2.1725786931759464e-06, + "loss": 0.1368, + "step": 42390 + }, + { + "epoch": 1.58, + "learning_rate": 2.1718099924855798e-06, + "loss": 0.1489, + "step": 42420 + }, + { + "epoch": 1.58, + "learning_rate": 2.171042107162109e-06, + "loss": 0.1486, + "step": 42450 + }, + { + "epoch": 1.58, + "learning_rate": 2.1702750357651087e-06, + "loss": 0.18, + "step": 42480 + }, + { + "epoch": 1.59, + "learning_rate": 2.1695087768577147e-06, + "loss": 0.1532, + "step": 42510 + }, + { + "epoch": 1.59, + "learning_rate": 2.16874332900661e-06, + "loss": 0.147, + "step": 42540 + }, + { + "epoch": 1.59, + "learning_rate": 2.1679786907820158e-06, + "loss": 0.1383, + "step": 42570 + }, + { + "epoch": 1.59, + "learning_rate": 2.1672148607576797e-06, + "loss": 0.1606, + "step": 42600 + }, + { + "epoch": 1.59, + "learning_rate": 2.1664518375108646e-06, + "loss": 0.1814, + "step": 42630 + }, + { + "epoch": 1.59, + "learning_rate": 2.1656896196223354e-06, + "loss": 0.1362, + "step": 42660 + }, + { + "epoch": 1.59, + "learning_rate": 2.1649282056763532e-06, + "loss": 0.1501, + "step": 42690 + }, + { + "epoch": 1.59, + "learning_rate": 2.1641675942606574e-06, + "loss": 0.1471, + "step": 42720 + }, + { + "epoch": 1.59, + "learning_rate": 2.1634077839664602e-06, + "loss": 0.1501, + "step": 42750 + }, + { + "epoch": 1.6, + "learning_rate": 2.162648773388433e-06, + "loss": 0.1738, + "step": 42780 + }, + { + "epoch": 1.6, + "learning_rate": 2.161890561124696e-06, + "loss": 0.1404, + "step": 42810 + }, + { + "epoch": 1.6, + "learning_rate": 2.161133145776808e-06, + "loss": 0.2075, + "step": 42840 + }, + { + "epoch": 1.6, + "learning_rate": 2.160376525949755e-06, + "loss": 0.151, + "step": 42870 + }, + { + "epoch": 1.6, + "learning_rate": 2.1596207002519383e-06, + "loss": 0.1608, + "step": 42900 + }, + { + "epoch": 1.6, + "learning_rate": 2.158865667295166e-06, + "loss": 0.1798, + "step": 42930 + }, + { + "epoch": 1.6, + "learning_rate": 2.1581114256946403e-06, + "loss": 0.1514, + "step": 42960 + }, + { + "epoch": 1.6, + "learning_rate": 2.1573579740689486e-06, + "loss": 0.1378, + "step": 42990 + }, + { + "epoch": 1.6, + "learning_rate": 2.1566053110400517e-06, + "loss": 0.1434, + "step": 43020 + }, + { + "epoch": 1.61, + "learning_rate": 2.1558534352332726e-06, + "loss": 0.1566, + "step": 43050 + }, + { + "epoch": 1.61, + "learning_rate": 2.1551023452772875e-06, + "loss": 0.1509, + "step": 43080 + }, + { + "epoch": 1.61, + "learning_rate": 2.154352039804115e-06, + "loss": 0.137, + "step": 43110 + }, + { + "epoch": 1.61, + "learning_rate": 2.1536025174491042e-06, + "loss": 0.1861, + "step": 43140 + }, + { + "epoch": 1.61, + "learning_rate": 2.1528537768509265e-06, + "loss": 0.1358, + "step": 43170 + }, + { + "epoch": 1.61, + "learning_rate": 2.1521058166515628e-06, + "loss": 0.1538, + "step": 43200 + }, + { + "epoch": 1.61, + "learning_rate": 2.1513586354962963e-06, + "loss": 0.1644, + "step": 43230 + }, + { + "epoch": 1.61, + "learning_rate": 2.1506122320336976e-06, + "loss": 0.1482, + "step": 43260 + }, + { + "epoch": 1.61, + "learning_rate": 2.1498666049156193e-06, + "loss": 0.1492, + "step": 43290 + }, + { + "epoch": 1.62, + "learning_rate": 2.149121752797183e-06, + "loss": 0.1353, + "step": 43320 + }, + { + "epoch": 1.62, + "learning_rate": 2.148377674336769e-06, + "loss": 0.1671, + "step": 43350 + }, + { + "epoch": 1.62, + "learning_rate": 2.1476343681960082e-06, + "loss": 0.1606, + "step": 43380 + }, + { + "epoch": 1.62, + "learning_rate": 2.1468918330397695e-06, + "loss": 0.1622, + "step": 43410 + }, + { + "epoch": 1.62, + "learning_rate": 2.146174780667022e-06, + "loss": 0.1372, + "step": 43440 + }, + { + "epoch": 1.62, + "learning_rate": 2.145433757897916e-06, + "loss": 0.1529, + "step": 43470 + }, + { + "epoch": 1.62, + "learning_rate": 2.144693502171406e-06, + "loss": 0.1499, + "step": 43500 + }, + { + "epoch": 1.62, + "learning_rate": 2.14395401216511e-06, + "loss": 0.1556, + "step": 43530 + }, + { + "epoch": 1.62, + "learning_rate": 2.143215286559837e-06, + "loss": 0.15, + "step": 43560 + }, + { + "epoch": 1.63, + "learning_rate": 2.1424773240395752e-06, + "loss": 0.1706, + "step": 43590 + }, + { + "epoch": 1.63, + "learning_rate": 2.141740123291482e-06, + "loss": 0.1412, + "step": 43620 + }, + { + "epoch": 1.63, + "learning_rate": 2.141003683005876e-06, + "loss": 0.1537, + "step": 43650 + }, + { + "epoch": 1.63, + "learning_rate": 2.1402680018762254e-06, + "loss": 0.164, + "step": 43680 + }, + { + "epoch": 1.63, + "learning_rate": 2.1395330785991377e-06, + "loss": 0.1404, + "step": 43710 + }, + { + "epoch": 1.63, + "learning_rate": 2.1387989118743533e-06, + "loss": 0.1549, + "step": 43740 + }, + { + "epoch": 1.63, + "learning_rate": 2.1380655004047324e-06, + "loss": 0.1339, + "step": 43770 + }, + { + "epoch": 1.63, + "learning_rate": 2.1373328428962457e-06, + "loss": 0.1393, + "step": 43800 + }, + { + "epoch": 1.63, + "learning_rate": 2.136600938057969e-06, + "loss": 0.1585, + "step": 43830 + }, + { + "epoch": 1.64, + "learning_rate": 2.1358697846020666e-06, + "loss": 0.1746, + "step": 43860 + }, + { + "epoch": 1.64, + "learning_rate": 2.135139381243788e-06, + "loss": 0.1542, + "step": 43890 + }, + { + "epoch": 1.64, + "learning_rate": 2.1344097267014553e-06, + "loss": 0.1699, + "step": 43920 + }, + { + "epoch": 1.64, + "learning_rate": 2.1336808196964546e-06, + "loss": 0.1372, + "step": 43950 + }, + { + "epoch": 1.64, + "learning_rate": 2.1329526589532265e-06, + "loss": 0.1553, + "step": 43980 + }, + { + "epoch": 1.64, + "learning_rate": 2.1322252431992567e-06, + "loss": 0.1484, + "step": 44010 + }, + { + "epoch": 1.64, + "learning_rate": 2.131498571165066e-06, + "loss": 0.1472, + "step": 44040 + }, + { + "epoch": 1.64, + "learning_rate": 2.130772641584203e-06, + "loss": 0.161, + "step": 44070 + }, + { + "epoch": 1.64, + "learning_rate": 2.1300474531932315e-06, + "loss": 0.1696, + "step": 44100 + }, + { + "epoch": 1.65, + "learning_rate": 2.1293230047317256e-06, + "loss": 0.145, + "step": 44130 + }, + { + "epoch": 1.65, + "learning_rate": 2.128599294942257e-06, + "loss": 0.1463, + "step": 44160 + }, + { + "epoch": 1.65, + "learning_rate": 2.127876322570386e-06, + "loss": 0.1505, + "step": 44190 + }, + { + "epoch": 1.65, + "learning_rate": 2.1271540863646543e-06, + "loss": 0.1658, + "step": 44220 + }, + { + "epoch": 1.65, + "learning_rate": 2.1264325850765765e-06, + "loss": 0.1551, + "step": 44250 + }, + { + "epoch": 1.65, + "learning_rate": 2.125711817460626e-06, + "loss": 0.1468, + "step": 44280 + }, + { + "epoch": 1.65, + "learning_rate": 2.124991782274232e-06, + "loss": 0.1521, + "step": 44310 + }, + { + "epoch": 1.65, + "learning_rate": 2.1242724782777676e-06, + "loss": 0.1355, + "step": 44340 + }, + { + "epoch": 1.65, + "learning_rate": 2.1235539042345404e-06, + "loss": 0.1431, + "step": 44370 + }, + { + "epoch": 1.66, + "learning_rate": 2.1228360589107853e-06, + "loss": 0.1589, + "step": 44400 + }, + { + "epoch": 1.66, + "learning_rate": 2.1221189410756536e-06, + "loss": 0.171, + "step": 44430 + }, + { + "epoch": 1.66, + "learning_rate": 2.1214025495012067e-06, + "loss": 0.1383, + "step": 44460 + }, + { + "epoch": 1.66, + "learning_rate": 2.1206868829624043e-06, + "loss": 0.157, + "step": 44490 + }, + { + "epoch": 1.66, + "learning_rate": 2.1199719402370975e-06, + "loss": 0.1388, + "step": 44520 + }, + { + "epoch": 1.66, + "learning_rate": 2.11925772010602e-06, + "loss": 0.1638, + "step": 44550 + }, + { + "epoch": 1.66, + "learning_rate": 2.118544221352778e-06, + "loss": 0.1473, + "step": 44580 + }, + { + "epoch": 1.66, + "learning_rate": 2.117831442763844e-06, + "loss": 0.1358, + "step": 44610 + }, + { + "epoch": 1.66, + "learning_rate": 2.1171193831285454e-06, + "loss": 0.1704, + "step": 44640 + }, + { + "epoch": 1.67, + "learning_rate": 2.1164080412390573e-06, + "loss": 0.1302, + "step": 44670 + }, + { + "epoch": 1.67, + "learning_rate": 2.115697415890394e-06, + "loss": 0.1402, + "step": 44700 + }, + { + "epoch": 1.67, + "learning_rate": 2.1149875058803997e-06, + "loss": 0.1471, + "step": 44730 + }, + { + "epoch": 1.67, + "learning_rate": 2.114278310009741e-06, + "loss": 0.1479, + "step": 44760 + }, + { + "epoch": 1.67, + "learning_rate": 2.113569827081896e-06, + "loss": 0.1359, + "step": 44790 + }, + { + "epoch": 1.67, + "learning_rate": 2.11286205590315e-06, + "loss": 0.1965, + "step": 44820 + }, + { + "epoch": 1.67, + "learning_rate": 2.1121549952825822e-06, + "loss": 0.1311, + "step": 44850 + }, + { + "epoch": 1.67, + "learning_rate": 2.1114486440320624e-06, + "loss": 0.1513, + "step": 44880 + }, + { + "epoch": 1.67, + "learning_rate": 2.110743000966237e-06, + "loss": 0.1503, + "step": 44910 + }, + { + "epoch": 1.68, + "learning_rate": 2.110038064902525e-06, + "loss": 0.1677, + "step": 44940 + }, + { + "epoch": 1.68, + "learning_rate": 2.109333834661109e-06, + "loss": 0.1297, + "step": 44970 + }, + { + "epoch": 1.68, + "learning_rate": 2.1086303090649252e-06, + "loss": 0.1588, + "step": 45000 + }, + { + "epoch": 1.68, + "learning_rate": 2.1079274869396553e-06, + "loss": 0.144, + "step": 45030 + }, + { + "epoch": 1.68, + "learning_rate": 2.1072253671137207e-06, + "loss": 0.1601, + "step": 45060 + }, + { + "epoch": 1.68, + "learning_rate": 2.106523948418271e-06, + "loss": 0.1327, + "step": 45090 + }, + { + "epoch": 1.68, + "learning_rate": 2.1058232296871785e-06, + "loss": 0.1346, + "step": 45120 + }, + { + "epoch": 1.68, + "learning_rate": 2.1051232097570295e-06, + "loss": 0.1722, + "step": 45150 + }, + { + "epoch": 1.69, + "learning_rate": 2.1044238874671146e-06, + "loss": 0.1664, + "step": 45180 + }, + { + "epoch": 1.69, + "learning_rate": 2.103725261659422e-06, + "loss": 0.1612, + "step": 45210 + }, + { + "epoch": 1.69, + "learning_rate": 2.103027331178629e-06, + "loss": 0.1759, + "step": 45240 + }, + { + "epoch": 1.69, + "learning_rate": 2.1023300948720953e-06, + "loss": 0.1522, + "step": 45270 + }, + { + "epoch": 1.69, + "learning_rate": 2.1016335515898515e-06, + "loss": 0.1648, + "step": 45300 + }, + { + "epoch": 1.69, + "learning_rate": 2.100937700184596e-06, + "loss": 0.141, + "step": 45330 + }, + { + "epoch": 1.69, + "learning_rate": 2.100242539511684e-06, + "loss": 0.1659, + "step": 45360 + }, + { + "epoch": 1.69, + "learning_rate": 2.099548068429119e-06, + "loss": 0.131, + "step": 45390 + }, + { + "epoch": 1.69, + "learning_rate": 2.098854285797546e-06, + "loss": 0.1419, + "step": 45420 + }, + { + "epoch": 1.7, + "learning_rate": 2.0981611904802458e-06, + "loss": 0.1415, + "step": 45450 + }, + { + "epoch": 1.7, + "learning_rate": 2.097468781343123e-06, + "loss": 0.1619, + "step": 45480 + }, + { + "epoch": 1.7, + "learning_rate": 2.096800103699349e-06, + "loss": 0.1505, + "step": 45510 + }, + { + "epoch": 1.7, + "learning_rate": 2.0961090407515926e-06, + "loss": 0.1329, + "step": 45540 + }, + { + "epoch": 1.7, + "learning_rate": 2.095418660634908e-06, + "loss": 0.1501, + "step": 45570 + }, + { + "epoch": 1.7, + "learning_rate": 2.0947289622255397e-06, + "loss": 0.1388, + "step": 45600 + }, + { + "epoch": 1.7, + "learning_rate": 2.0940399444023193e-06, + "loss": 0.1288, + "step": 45630 + }, + { + "epoch": 1.7, + "learning_rate": 2.0933516060466585e-06, + "loss": 0.1495, + "step": 45660 + }, + { + "epoch": 1.7, + "learning_rate": 2.092663946042543e-06, + "loss": 0.1207, + "step": 45690 + }, + { + "epoch": 1.71, + "learning_rate": 2.09197696327652e-06, + "loss": 0.1552, + "step": 45720 + }, + { + "epoch": 1.71, + "learning_rate": 2.0912906566376966e-06, + "loss": 0.1263, + "step": 45750 + }, + { + "epoch": 1.71, + "learning_rate": 2.090605025017727e-06, + "loss": 0.124, + "step": 45780 + }, + { + "epoch": 1.71, + "learning_rate": 2.0899200673108096e-06, + "loss": 0.1456, + "step": 45810 + }, + { + "epoch": 1.71, + "learning_rate": 2.0892357824136746e-06, + "loss": 0.1448, + "step": 45840 + }, + { + "epoch": 1.71, + "learning_rate": 2.0885521692255816e-06, + "loss": 0.1373, + "step": 45870 + }, + { + "epoch": 1.71, + "learning_rate": 2.0878692266483083e-06, + "loss": 0.1591, + "step": 45900 + }, + { + "epoch": 1.71, + "learning_rate": 2.0871869535861456e-06, + "loss": 0.151, + "step": 45930 + }, + { + "epoch": 1.71, + "learning_rate": 2.0865053489458874e-06, + "loss": 0.1493, + "step": 45960 + }, + { + "epoch": 1.72, + "learning_rate": 2.0858244116368275e-06, + "loss": 0.1613, + "step": 45990 + }, + { + "epoch": 1.72, + "learning_rate": 2.085144140570748e-06, + "loss": 0.1259, + "step": 46020 + }, + { + "epoch": 1.72, + "learning_rate": 2.0844645346619135e-06, + "loss": 0.1247, + "step": 46050 + }, + { + "epoch": 1.72, + "learning_rate": 2.083785592827065e-06, + "loss": 0.1386, + "step": 46080 + }, + { + "epoch": 1.72, + "learning_rate": 2.083107313985412e-06, + "loss": 0.1597, + "step": 46110 + }, + { + "epoch": 1.72, + "learning_rate": 2.0824296970586242e-06, + "loss": 0.1377, + "step": 46140 + }, + { + "epoch": 1.72, + "learning_rate": 2.0817527409708258e-06, + "loss": 0.1499, + "step": 46170 + }, + { + "epoch": 1.72, + "learning_rate": 2.0810764446485862e-06, + "loss": 0.1699, + "step": 46200 + }, + { + "epoch": 1.72, + "learning_rate": 2.0804008070209163e-06, + "loss": 0.1369, + "step": 46230 + }, + { + "epoch": 1.73, + "learning_rate": 2.0797258270192577e-06, + "loss": 0.1405, + "step": 46260 + }, + { + "epoch": 1.73, + "learning_rate": 2.079051503577478e-06, + "loss": 0.1308, + "step": 46290 + }, + { + "epoch": 1.73, + "learning_rate": 2.0783778356318633e-06, + "loss": 0.1678, + "step": 46320 + }, + { + "epoch": 1.73, + "learning_rate": 2.0777048221211104e-06, + "loss": 0.1471, + "step": 46350 + }, + { + "epoch": 1.73, + "learning_rate": 2.0770324619863196e-06, + "loss": 0.1736, + "step": 46380 + }, + { + "epoch": 1.73, + "learning_rate": 2.07636075417099e-06, + "loss": 0.1684, + "step": 46410 + }, + { + "epoch": 1.73, + "learning_rate": 2.075689697621009e-06, + "loss": 0.144, + "step": 46440 + }, + { + "epoch": 1.73, + "learning_rate": 2.075019291284648e-06, + "loss": 0.1527, + "step": 46470 + }, + { + "epoch": 1.73, + "learning_rate": 2.074349534112556e-06, + "loss": 0.1447, + "step": 46500 + }, + { + "epoch": 1.74, + "learning_rate": 2.0736804250577488e-06, + "loss": 0.1627, + "step": 46530 + }, + { + "epoch": 1.74, + "learning_rate": 2.073011963075606e-06, + "loss": 0.1689, + "step": 46560 + }, + { + "epoch": 1.74, + "learning_rate": 2.072344147123863e-06, + "loss": 0.1459, + "step": 46590 + }, + { + "epoch": 1.74, + "learning_rate": 2.0716769761626044e-06, + "loss": 0.1646, + "step": 46620 + }, + { + "epoch": 1.74, + "learning_rate": 2.0710104491542556e-06, + "loss": 0.1403, + "step": 46650 + }, + { + "epoch": 1.74, + "learning_rate": 2.070344565063579e-06, + "loss": 0.1408, + "step": 46680 + }, + { + "epoch": 1.74, + "learning_rate": 2.069679322857663e-06, + "loss": 0.1511, + "step": 46710 + }, + { + "epoch": 1.74, + "learning_rate": 2.06901472150592e-06, + "loss": 0.1633, + "step": 46740 + }, + { + "epoch": 1.74, + "learning_rate": 2.0683507599800766e-06, + "loss": 0.132, + "step": 46770 + }, + { + "epoch": 1.75, + "learning_rate": 2.067687437254168e-06, + "loss": 0.137, + "step": 46800 + }, + { + "epoch": 1.75, + "learning_rate": 2.067024752304531e-06, + "loss": 0.1452, + "step": 46830 + }, + { + "epoch": 1.75, + "learning_rate": 2.066362704109797e-06, + "loss": 0.136, + "step": 46860 + }, + { + "epoch": 1.75, + "learning_rate": 2.065701291650887e-06, + "loss": 0.1621, + "step": 46890 + }, + { + "epoch": 1.75, + "learning_rate": 2.0650405139110026e-06, + "loss": 0.1482, + "step": 46920 + }, + { + "epoch": 1.75, + "learning_rate": 2.064380369875622e-06, + "loss": 0.1495, + "step": 46950 + }, + { + "epoch": 1.75, + "learning_rate": 2.0637208585324905e-06, + "loss": 0.1801, + "step": 46980 + }, + { + "epoch": 1.75, + "learning_rate": 2.0630619788716174e-06, + "loss": 0.1441, + "step": 47010 + }, + { + "epoch": 1.75, + "learning_rate": 2.0624037298852674e-06, + "loss": 0.124, + "step": 47040 + }, + { + "epoch": 1.76, + "learning_rate": 2.0617461105679532e-06, + "loss": 0.1459, + "step": 47070 + }, + { + "epoch": 1.76, + "learning_rate": 2.0610891199164325e-06, + "loss": 0.1501, + "step": 47100 + }, + { + "epoch": 1.76, + "learning_rate": 2.060432756929697e-06, + "loss": 0.1366, + "step": 47130 + }, + { + "epoch": 1.76, + "learning_rate": 2.0597770206089703e-06, + "loss": 0.1484, + "step": 47160 + }, + { + "epoch": 1.76, + "learning_rate": 2.0591219099576987e-06, + "loss": 0.1414, + "step": 47190 + }, + { + "epoch": 1.76, + "learning_rate": 2.058467423981546e-06, + "loss": 0.1673, + "step": 47220 + }, + { + "epoch": 1.76, + "learning_rate": 2.0578135616883866e-06, + "loss": 0.1479, + "step": 47250 + }, + { + "epoch": 1.76, + "learning_rate": 2.0571603220883005e-06, + "loss": 0.1706, + "step": 47280 + }, + { + "epoch": 1.76, + "learning_rate": 2.0565077041935645e-06, + "loss": 0.1624, + "step": 47310 + }, + { + "epoch": 1.77, + "learning_rate": 2.055855707018649e-06, + "loss": 0.1562, + "step": 47340 + }, + { + "epoch": 1.77, + "learning_rate": 2.0552043295802093e-06, + "loss": 0.1392, + "step": 47370 + }, + { + "epoch": 1.77, + "learning_rate": 2.0545535708970804e-06, + "loss": 0.1177, + "step": 47400 + }, + { + "epoch": 1.77, + "learning_rate": 2.0539034299902704e-06, + "loss": 0.1239, + "step": 47430 + }, + { + "epoch": 1.77, + "learning_rate": 2.0532539058829546e-06, + "loss": 0.1797, + "step": 47460 + }, + { + "epoch": 1.77, + "learning_rate": 2.0526049976004704e-06, + "loss": 0.1431, + "step": 47490 + }, + { + "epoch": 1.77, + "learning_rate": 2.0519567041703083e-06, + "loss": 0.1416, + "step": 47520 + }, + { + "epoch": 1.77, + "learning_rate": 2.051309024622109e-06, + "loss": 0.1593, + "step": 47550 + }, + { + "epoch": 1.77, + "learning_rate": 2.050661957987655e-06, + "loss": 0.1251, + "step": 47580 + }, + { + "epoch": 1.78, + "learning_rate": 2.050015503300866e-06, + "loss": 0.1457, + "step": 47610 + }, + { + "epoch": 1.78, + "learning_rate": 2.0493696595977914e-06, + "loss": 0.1571, + "step": 47640 + }, + { + "epoch": 1.78, + "learning_rate": 2.0487244259166056e-06, + "loss": 0.1269, + "step": 47670 + }, + { + "epoch": 1.78, + "learning_rate": 2.0480798012976015e-06, + "loss": 0.1328, + "step": 47700 + }, + { + "epoch": 1.78, + "learning_rate": 2.0474357847831843e-06, + "loss": 0.1706, + "step": 47730 + }, + { + "epoch": 1.78, + "learning_rate": 2.0467923754178647e-06, + "loss": 0.1635, + "step": 47760 + }, + { + "epoch": 1.78, + "learning_rate": 2.0461495722482565e-06, + "loss": 0.1581, + "step": 47790 + }, + { + "epoch": 1.78, + "learning_rate": 2.045507374323066e-06, + "loss": 0.1475, + "step": 47820 + }, + { + "epoch": 1.78, + "learning_rate": 2.044865780693088e-06, + "loss": 0.1588, + "step": 47850 + }, + { + "epoch": 1.79, + "learning_rate": 2.044224790411201e-06, + "loss": 0.1508, + "step": 47880 + }, + { + "epoch": 1.79, + "learning_rate": 2.043605739099571e-06, + "loss": 0.1639, + "step": 47910 + }, + { + "epoch": 1.79, + "learning_rate": 2.0429659326473045e-06, + "loss": 0.1608, + "step": 47940 + }, + { + "epoch": 1.79, + "learning_rate": 2.0423267267455322e-06, + "loss": 0.1413, + "step": 47970 + }, + { + "epoch": 1.79, + "learning_rate": 2.041688120455337e-06, + "loss": 0.1471, + "step": 48000 + }, + { + "epoch": 1.79, + "learning_rate": 2.041050112839854e-06, + "loss": 0.1402, + "step": 48030 + }, + { + "epoch": 1.79, + "learning_rate": 2.0404127029642688e-06, + "loss": 0.1442, + "step": 48060 + }, + { + "epoch": 1.79, + "learning_rate": 2.0397758898958064e-06, + "loss": 0.1605, + "step": 48090 + }, + { + "epoch": 1.79, + "learning_rate": 2.0391396727037307e-06, + "loss": 0.1374, + "step": 48120 + }, + { + "epoch": 1.8, + "learning_rate": 2.038504050459336e-06, + "loss": 0.1727, + "step": 48150 + }, + { + "epoch": 1.8, + "learning_rate": 2.0378690222359403e-06, + "loss": 0.1639, + "step": 48180 + }, + { + "epoch": 1.8, + "learning_rate": 2.0372345871088826e-06, + "loss": 0.1575, + "step": 48210 + }, + { + "epoch": 1.8, + "learning_rate": 2.036600744155515e-06, + "loss": 0.1342, + "step": 48240 + }, + { + "epoch": 1.8, + "learning_rate": 2.035967492455198e-06, + "loss": 0.1381, + "step": 48270 + }, + { + "epoch": 1.8, + "learning_rate": 2.0353348310892955e-06, + "loss": 0.158, + "step": 48300 + }, + { + "epoch": 1.8, + "learning_rate": 2.034702759141167e-06, + "loss": 0.1511, + "step": 48330 + }, + { + "epoch": 1.8, + "learning_rate": 2.034071275696164e-06, + "loss": 0.1147, + "step": 48360 + }, + { + "epoch": 1.8, + "learning_rate": 2.033440379841623e-06, + "loss": 0.1487, + "step": 48390 + }, + { + "epoch": 1.81, + "learning_rate": 2.032810070666863e-06, + "loss": 0.1717, + "step": 48420 + }, + { + "epoch": 1.81, + "learning_rate": 2.0321803472631754e-06, + "loss": 0.1458, + "step": 48450 + }, + { + "epoch": 1.81, + "learning_rate": 2.0315512087238225e-06, + "loss": 0.1381, + "step": 48480 + }, + { + "epoch": 1.81, + "learning_rate": 2.030922654144029e-06, + "loss": 0.1421, + "step": 48510 + }, + { + "epoch": 1.81, + "learning_rate": 2.03029468262098e-06, + "loss": 0.152, + "step": 48540 + }, + { + "epoch": 1.81, + "learning_rate": 2.0296672932538105e-06, + "loss": 0.1784, + "step": 48570 + }, + { + "epoch": 1.81, + "learning_rate": 2.0290404851436057e-06, + "loss": 0.147, + "step": 48600 + }, + { + "epoch": 1.81, + "learning_rate": 2.028414257393391e-06, + "loss": 0.1303, + "step": 48630 + }, + { + "epoch": 1.81, + "learning_rate": 2.0277886091081305e-06, + "loss": 0.1226, + "step": 48660 + }, + { + "epoch": 1.82, + "learning_rate": 2.0271635393947167e-06, + "loss": 0.1633, + "step": 48690 + }, + { + "epoch": 1.82, + "learning_rate": 2.0265390473619705e-06, + "loss": 0.1397, + "step": 48720 + }, + { + "epoch": 1.82, + "learning_rate": 2.025915132120632e-06, + "loss": 0.1613, + "step": 48750 + }, + { + "epoch": 1.82, + "learning_rate": 2.0252917927833574e-06, + "loss": 0.1628, + "step": 48780 + }, + { + "epoch": 1.82, + "learning_rate": 2.024669028464713e-06, + "loss": 0.1448, + "step": 48810 + }, + { + "epoch": 1.82, + "learning_rate": 2.0240468382811682e-06, + "loss": 0.1142, + "step": 48840 + }, + { + "epoch": 1.82, + "learning_rate": 2.0234252213510938e-06, + "loss": 0.1803, + "step": 48870 + }, + { + "epoch": 1.82, + "learning_rate": 2.0228041767947535e-06, + "loss": 0.1561, + "step": 48900 + }, + { + "epoch": 1.82, + "learning_rate": 2.0221837037343006e-06, + "loss": 0.1309, + "step": 48930 + }, + { + "epoch": 1.83, + "learning_rate": 2.0215638012937715e-06, + "loss": 0.1456, + "step": 48960 + }, + { + "epoch": 1.83, + "learning_rate": 2.020944468599082e-06, + "loss": 0.1509, + "step": 48990 + }, + { + "epoch": 1.83, + "learning_rate": 2.02032570477802e-06, + "loss": 0.1118, + "step": 49020 + }, + { + "epoch": 1.83, + "learning_rate": 2.019707508960244e-06, + "loss": 0.1519, + "step": 49050 + }, + { + "epoch": 1.83, + "learning_rate": 2.0190898802772724e-06, + "loss": 0.1298, + "step": 49080 + }, + { + "epoch": 1.83, + "learning_rate": 2.0184728178624835e-06, + "loss": 0.1334, + "step": 49110 + }, + { + "epoch": 1.83, + "learning_rate": 2.017856320851108e-06, + "loss": 0.144, + "step": 49140 + }, + { + "epoch": 1.83, + "learning_rate": 2.0172403883802254e-06, + "loss": 0.1516, + "step": 49170 + }, + { + "epoch": 1.83, + "learning_rate": 2.0166250195887554e-06, + "loss": 0.1373, + "step": 49200 + }, + { + "epoch": 1.84, + "learning_rate": 2.0160102136174573e-06, + "loss": 0.1762, + "step": 49230 + }, + { + "epoch": 1.84, + "learning_rate": 2.015395969608922e-06, + "loss": 0.1707, + "step": 49260 + }, + { + "epoch": 1.84, + "learning_rate": 2.01478228670757e-06, + "loss": 0.1251, + "step": 49290 + }, + { + "epoch": 1.84, + "learning_rate": 2.014169164059641e-06, + "loss": 0.1271, + "step": 49320 + }, + { + "epoch": 1.84, + "learning_rate": 2.013556600813194e-06, + "loss": 0.1403, + "step": 49350 + }, + { + "epoch": 1.84, + "learning_rate": 2.0129445961181016e-06, + "loss": 0.1334, + "step": 49380 + }, + { + "epoch": 1.84, + "learning_rate": 2.0123331491260424e-06, + "loss": 0.1512, + "step": 49410 + }, + { + "epoch": 1.84, + "learning_rate": 2.0117222589904985e-06, + "loss": 0.1416, + "step": 49440 + }, + { + "epoch": 1.85, + "learning_rate": 2.011111924866749e-06, + "loss": 0.1468, + "step": 49470 + }, + { + "epoch": 1.85, + "learning_rate": 2.0105021459118675e-06, + "loss": 0.1453, + "step": 49500 + }, + { + "epoch": 1.85, + "learning_rate": 2.0098929212847133e-06, + "loss": 0.1572, + "step": 49530 + }, + { + "epoch": 1.85, + "learning_rate": 2.0092842501459305e-06, + "loss": 0.1549, + "step": 49560 + }, + { + "epoch": 1.85, + "learning_rate": 2.0086761316579416e-06, + "loss": 0.1335, + "step": 49590 + }, + { + "epoch": 1.85, + "learning_rate": 2.008068564984941e-06, + "loss": 0.1544, + "step": 49620 + }, + { + "epoch": 1.85, + "learning_rate": 2.0074615492928926e-06, + "loss": 0.1223, + "step": 49650 + }, + { + "epoch": 1.85, + "learning_rate": 2.0068550837495242e-06, + "loss": 0.1296, + "step": 49680 + }, + { + "epoch": 1.85, + "learning_rate": 2.0062491675243216e-06, + "loss": 0.1567, + "step": 49710 + }, + { + "epoch": 1.86, + "learning_rate": 2.0056437997885276e-06, + "loss": 0.1718, + "step": 49740 + }, + { + "epoch": 1.86, + "learning_rate": 2.00503897971513e-06, + "loss": 0.1537, + "step": 49770 + }, + { + "epoch": 1.86, + "learning_rate": 2.004434706478865e-06, + "loss": 0.1418, + "step": 49800 + }, + { + "epoch": 1.86, + "learning_rate": 2.0038309792562076e-06, + "loss": 0.1969, + "step": 49830 + }, + { + "epoch": 1.86, + "learning_rate": 2.003227797225367e-06, + "loss": 0.1665, + "step": 49860 + }, + { + "epoch": 1.86, + "learning_rate": 2.0026251595662847e-06, + "loss": 0.1401, + "step": 49890 + }, + { + "epoch": 1.86, + "learning_rate": 2.002023065460627e-06, + "loss": 0.1375, + "step": 49920 + }, + { + "epoch": 1.86, + "learning_rate": 2.001421514091782e-06, + "loss": 0.1325, + "step": 49950 + }, + { + "epoch": 1.86, + "learning_rate": 2.0008205046448528e-06, + "loss": 0.1449, + "step": 49980 + }, + { + "epoch": 1.87, + "learning_rate": 2.0002200363066566e-06, + "loss": 0.1495, + "step": 50010 + }, + { + "epoch": 1.87, + "learning_rate": 1.9996400971708495e-06, + "loss": 0.1364, + "step": 50040 + }, + { + "epoch": 1.87, + "learning_rate": 1.9990406906475044e-06, + "loss": 0.1351, + "step": 50070 + }, + { + "epoch": 1.87, + "learning_rate": 1.9984418228304744e-06, + "loss": 0.143, + "step": 50100 + }, + { + "epoch": 1.87, + "learning_rate": 1.9978434929133213e-06, + "loss": 0.1434, + "step": 50130 + }, + { + "epoch": 1.87, + "learning_rate": 1.997245700091295e-06, + "loss": 0.1609, + "step": 50160 + }, + { + "epoch": 1.87, + "learning_rate": 1.996648443561332e-06, + "loss": 0.137, + "step": 50190 + }, + { + "epoch": 1.87, + "learning_rate": 1.996051722522046e-06, + "loss": 0.1427, + "step": 50220 + }, + { + "epoch": 1.87, + "learning_rate": 1.9954555361737284e-06, + "loss": 0.1237, + "step": 50250 + }, + { + "epoch": 1.88, + "learning_rate": 1.9948598837183403e-06, + "loss": 0.1591, + "step": 50280 + }, + { + "epoch": 1.88, + "learning_rate": 1.99426476435951e-06, + "loss": 0.1496, + "step": 50310 + }, + { + "epoch": 1.88, + "learning_rate": 1.9936701773025286e-06, + "loss": 0.1364, + "step": 50340 + }, + { + "epoch": 1.88, + "learning_rate": 1.9930761217543435e-06, + "loss": 0.1425, + "step": 50370 + }, + { + "epoch": 1.88, + "learning_rate": 1.9924825969235547e-06, + "loss": 0.165, + "step": 50400 + }, + { + "epoch": 1.88, + "learning_rate": 1.9918896020204136e-06, + "loss": 0.1356, + "step": 50430 + }, + { + "epoch": 1.88, + "learning_rate": 1.9912971362568133e-06, + "loss": 0.1314, + "step": 50460 + }, + { + "epoch": 1.88, + "learning_rate": 1.9907051988462883e-06, + "loss": 0.1481, + "step": 50490 + }, + { + "epoch": 1.88, + "learning_rate": 1.990113789004008e-06, + "loss": 0.1468, + "step": 50520 + }, + { + "epoch": 1.89, + "learning_rate": 1.989522905946773e-06, + "loss": 0.1308, + "step": 50550 + }, + { + "epoch": 1.89, + "learning_rate": 1.9889325488930104e-06, + "loss": 0.1535, + "step": 50580 + }, + { + "epoch": 1.89, + "learning_rate": 1.9883427170627706e-06, + "loss": 0.1222, + "step": 50610 + }, + { + "epoch": 1.89, + "learning_rate": 1.9877534096777218e-06, + "loss": 0.1415, + "step": 50640 + }, + { + "epoch": 1.89, + "learning_rate": 1.9871646259611458e-06, + "loss": 0.1625, + "step": 50670 + }, + { + "epoch": 1.89, + "learning_rate": 1.9865763651379345e-06, + "loss": 0.1266, + "step": 50700 + }, + { + "epoch": 1.89, + "learning_rate": 1.9859886264345835e-06, + "loss": 0.146, + "step": 50730 + }, + { + "epoch": 1.89, + "learning_rate": 1.985420974599683e-06, + "loss": 0.1416, + "step": 50760 + }, + { + "epoch": 1.89, + "learning_rate": 1.9848342604817485e-06, + "loss": 0.1516, + "step": 50790 + }, + { + "epoch": 1.9, + "learning_rate": 1.9842480661983527e-06, + "loss": 0.1389, + "step": 50820 + }, + { + "epoch": 1.9, + "learning_rate": 1.983662390982317e-06, + "loss": 0.1129, + "step": 50850 + }, + { + "epoch": 1.9, + "learning_rate": 1.983077234068048e-06, + "loss": 0.1344, + "step": 50880 + }, + { + "epoch": 1.9, + "learning_rate": 1.9824925946915318e-06, + "loss": 0.1372, + "step": 50910 + }, + { + "epoch": 1.9, + "learning_rate": 1.98190847209033e-06, + "loss": 0.142, + "step": 50940 + }, + { + "epoch": 1.9, + "learning_rate": 1.981324865503577e-06, + "loss": 0.1483, + "step": 50970 + }, + { + "epoch": 1.9, + "learning_rate": 1.980741774171974e-06, + "loss": 0.1239, + "step": 51000 + }, + { + "epoch": 1.9, + "learning_rate": 1.9801591973377847e-06, + "loss": 0.1557, + "step": 51030 + }, + { + "epoch": 1.9, + "learning_rate": 1.979577134244833e-06, + "loss": 0.1366, + "step": 51060 + }, + { + "epoch": 1.91, + "learning_rate": 1.978995584138498e-06, + "loss": 0.1379, + "step": 51090 + }, + { + "epoch": 1.91, + "learning_rate": 1.978414546265708e-06, + "loss": 0.1274, + "step": 51120 + }, + { + "epoch": 1.91, + "learning_rate": 1.9778340198749395e-06, + "loss": 0.148, + "step": 51150 + }, + { + "epoch": 1.91, + "learning_rate": 1.9772540042162125e-06, + "loss": 0.1374, + "step": 51180 + }, + { + "epoch": 1.91, + "learning_rate": 1.976674498541084e-06, + "loss": 0.1523, + "step": 51210 + }, + { + "epoch": 1.91, + "learning_rate": 1.9760955021026456e-06, + "loss": 0.1177, + "step": 51240 + }, + { + "epoch": 1.91, + "learning_rate": 1.9755170141555212e-06, + "loss": 0.1533, + "step": 51270 + }, + { + "epoch": 1.91, + "learning_rate": 1.9749390339558593e-06, + "loss": 0.1248, + "step": 51300 + }, + { + "epoch": 1.91, + "learning_rate": 1.9743615607613323e-06, + "loss": 0.158, + "step": 51330 + }, + { + "epoch": 1.92, + "learning_rate": 1.9737845938311307e-06, + "loss": 0.1513, + "step": 51360 + }, + { + "epoch": 1.92, + "learning_rate": 1.973208132425959e-06, + "loss": 0.1404, + "step": 51390 + }, + { + "epoch": 1.92, + "learning_rate": 1.972632175808033e-06, + "loss": 0.171, + "step": 51420 + }, + { + "epoch": 1.92, + "learning_rate": 1.972056723241075e-06, + "loss": 0.1785, + "step": 51450 + }, + { + "epoch": 1.92, + "learning_rate": 1.9714817739903085e-06, + "loss": 0.1428, + "step": 51480 + }, + { + "epoch": 1.92, + "learning_rate": 1.9709073273224575e-06, + "loss": 0.1473, + "step": 51510 + }, + { + "epoch": 1.92, + "learning_rate": 1.9703333825057403e-06, + "loss": 0.1568, + "step": 51540 + }, + { + "epoch": 1.92, + "learning_rate": 1.9697599388098655e-06, + "loss": 0.1389, + "step": 51570 + }, + { + "epoch": 1.92, + "learning_rate": 1.9691869955060294e-06, + "loss": 0.1472, + "step": 51600 + }, + { + "epoch": 1.93, + "learning_rate": 1.9686145518669107e-06, + "loss": 0.1402, + "step": 51630 + }, + { + "epoch": 1.93, + "learning_rate": 1.968042607166667e-06, + "loss": 0.1313, + "step": 51660 + }, + { + "epoch": 1.93, + "learning_rate": 1.967471160680932e-06, + "loss": 0.1738, + "step": 51690 + }, + { + "epoch": 1.93, + "learning_rate": 1.966900211686811e-06, + "loss": 0.1363, + "step": 51720 + }, + { + "epoch": 1.93, + "learning_rate": 1.9663297594628757e-06, + "loss": 0.14, + "step": 51750 + }, + { + "epoch": 1.93, + "learning_rate": 1.965759803289163e-06, + "loss": 0.1745, + "step": 51780 + }, + { + "epoch": 1.93, + "learning_rate": 1.9651903424471683e-06, + "loss": 0.1279, + "step": 51810 + }, + { + "epoch": 1.93, + "learning_rate": 1.9646213762198447e-06, + "loss": 0.1601, + "step": 51840 + }, + { + "epoch": 1.93, + "learning_rate": 1.9640529038915965e-06, + "loss": 0.1329, + "step": 51870 + }, + { + "epoch": 1.94, + "learning_rate": 1.963484924748277e-06, + "loss": 0.1545, + "step": 51900 + }, + { + "epoch": 1.94, + "learning_rate": 1.9629174380771844e-06, + "loss": 0.1474, + "step": 51930 + }, + { + "epoch": 1.94, + "learning_rate": 1.9623504431670577e-06, + "loss": 0.1261, + "step": 51960 + }, + { + "epoch": 1.94, + "learning_rate": 1.961783939308074e-06, + "loss": 0.1548, + "step": 51990 + }, + { + "epoch": 1.94, + "learning_rate": 1.9612179257918425e-06, + "loss": 0.1457, + "step": 52020 + }, + { + "epoch": 1.94, + "learning_rate": 1.960652401911404e-06, + "loss": 0.1263, + "step": 52050 + }, + { + "epoch": 1.94, + "learning_rate": 1.9600873669612238e-06, + "loss": 0.1153, + "step": 52080 + }, + { + "epoch": 1.94, + "learning_rate": 1.959522820237191e-06, + "loss": 0.1425, + "step": 52110 + }, + { + "epoch": 1.94, + "learning_rate": 1.958958761036613e-06, + "loss": 0.1344, + "step": 52140 + }, + { + "epoch": 1.95, + "learning_rate": 1.958395188658212e-06, + "loss": 0.1372, + "step": 52170 + }, + { + "epoch": 1.95, + "learning_rate": 1.957832102402122e-06, + "loss": 0.1396, + "step": 52200 + }, + { + "epoch": 1.95, + "learning_rate": 1.9572695015698843e-06, + "loss": 0.127, + "step": 52230 + }, + { + "epoch": 1.95, + "learning_rate": 1.9567073854644454e-06, + "loss": 0.132, + "step": 52260 + }, + { + "epoch": 1.95, + "learning_rate": 1.9561457533901515e-06, + "loss": 0.137, + "step": 52290 + }, + { + "epoch": 1.95, + "learning_rate": 1.9555846046527456e-06, + "loss": 0.1722, + "step": 52320 + }, + { + "epoch": 1.95, + "learning_rate": 1.955023938559364e-06, + "loss": 0.1586, + "step": 52350 + }, + { + "epoch": 1.95, + "learning_rate": 1.954463754418534e-06, + "loss": 0.153, + "step": 52380 + }, + { + "epoch": 1.95, + "learning_rate": 1.953904051540167e-06, + "loss": 0.1555, + "step": 52410 + }, + { + "epoch": 1.96, + "learning_rate": 1.9533448292355585e-06, + "loss": 0.1491, + "step": 52440 + }, + { + "epoch": 1.96, + "learning_rate": 1.9527860868173816e-06, + "loss": 0.1267, + "step": 52470 + }, + { + "epoch": 1.96, + "learning_rate": 1.9522278235996874e-06, + "loss": 0.1533, + "step": 52500 + }, + { + "epoch": 1.96, + "learning_rate": 1.951670038897896e-06, + "loss": 0.1299, + "step": 52530 + }, + { + "epoch": 1.96, + "learning_rate": 1.9511127320287964e-06, + "loss": 0.1336, + "step": 52560 + }, + { + "epoch": 1.96, + "learning_rate": 1.9505559023105446e-06, + "loss": 0.1581, + "step": 52590 + }, + { + "epoch": 1.96, + "learning_rate": 1.9499995490626562e-06, + "loss": 0.1434, + "step": 52620 + }, + { + "epoch": 1.96, + "learning_rate": 1.9494436716060055e-06, + "loss": 0.1276, + "step": 52650 + }, + { + "epoch": 1.96, + "learning_rate": 1.9488882692628197e-06, + "loss": 0.1335, + "step": 52680 + }, + { + "epoch": 1.97, + "learning_rate": 1.9483333413566786e-06, + "loss": 0.1384, + "step": 52710 + }, + { + "epoch": 1.97, + "learning_rate": 1.9477788872125087e-06, + "loss": 0.1393, + "step": 52740 + }, + { + "epoch": 1.97, + "learning_rate": 1.94722490615658e-06, + "loss": 0.1604, + "step": 52770 + }, + { + "epoch": 1.97, + "learning_rate": 1.946671397516505e-06, + "loss": 0.1486, + "step": 52800 + }, + { + "epoch": 1.97, + "learning_rate": 1.946118360621231e-06, + "loss": 0.1258, + "step": 52830 + }, + { + "epoch": 1.97, + "learning_rate": 1.9455657948010406e-06, + "loss": 0.1385, + "step": 52860 + }, + { + "epoch": 1.97, + "learning_rate": 1.945013699387546e-06, + "loss": 0.1514, + "step": 52890 + }, + { + "epoch": 1.97, + "learning_rate": 1.9444620737136872e-06, + "loss": 0.14, + "step": 52920 + }, + { + "epoch": 1.97, + "learning_rate": 1.9439109171137267e-06, + "loss": 0.1281, + "step": 52950 + }, + { + "epoch": 1.98, + "learning_rate": 1.943360228923247e-06, + "loss": 0.1233, + "step": 52980 + }, + { + "epoch": 1.98, + "learning_rate": 1.942810008479148e-06, + "loss": 0.143, + "step": 53010 + }, + { + "epoch": 1.98, + "learning_rate": 1.942260255119644e-06, + "loss": 0.1697, + "step": 53040 + }, + { + "epoch": 1.98, + "learning_rate": 1.941710968184258e-06, + "loss": 0.1515, + "step": 53070 + }, + { + "epoch": 1.98, + "learning_rate": 1.9411621470138204e-06, + "loss": 0.1349, + "step": 53100 + }, + { + "epoch": 1.98, + "learning_rate": 1.940613790950464e-06, + "loss": 0.139, + "step": 53130 + }, + { + "epoch": 1.98, + "learning_rate": 1.940065899337623e-06, + "loss": 0.1279, + "step": 53160 + }, + { + "epoch": 1.98, + "learning_rate": 1.939518471520029e-06, + "loss": 0.1355, + "step": 53190 + }, + { + "epoch": 1.98, + "learning_rate": 1.9389715068437056e-06, + "loss": 0.1538, + "step": 53220 + }, + { + "epoch": 1.99, + "learning_rate": 1.938425004655967e-06, + "loss": 0.1466, + "step": 53250 + }, + { + "epoch": 1.99, + "learning_rate": 1.9378789643054147e-06, + "loss": 0.1248, + "step": 53280 + }, + { + "epoch": 1.99, + "learning_rate": 1.937333385141934e-06, + "loss": 0.1291, + "step": 53310 + }, + { + "epoch": 1.99, + "learning_rate": 1.9367882665166908e-06, + "loss": 0.1363, + "step": 53340 + }, + { + "epoch": 1.99, + "learning_rate": 1.9362436077821283e-06, + "loss": 0.1325, + "step": 53370 + }, + { + "epoch": 1.99, + "learning_rate": 1.935699408291963e-06, + "loss": 0.1311, + "step": 53400 + }, + { + "epoch": 1.99, + "learning_rate": 1.9351556674011827e-06, + "loss": 0.1375, + "step": 53430 + }, + { + "epoch": 1.99, + "learning_rate": 1.9346123844660423e-06, + "loss": 0.159, + "step": 53460 + }, + { + "epoch": 2.0, + "learning_rate": 1.934069558844063e-06, + "loss": 0.1449, + "step": 53490 + }, + { + "epoch": 2.0, + "learning_rate": 1.933527189894024e-06, + "loss": 0.1417, + "step": 53520 + }, + { + "epoch": 2.0, + "learning_rate": 1.932985276975965e-06, + "loss": 0.111, + "step": 53550 + }, + { + "epoch": 2.0, + "learning_rate": 1.93244381945118e-06, + "loss": 0.163, + "step": 53580 + }, + { + "epoch": 2.0, + "learning_rate": 1.9319028166822136e-06, + "loss": 0.1297, + "step": 53610 + }, + { + "epoch": 2.0, + "learning_rate": 1.9313622680328613e-06, + "loss": 0.1178, + "step": 53640 + }, + { + "epoch": 2.0, + "learning_rate": 1.9308221728681602e-06, + "loss": 0.1362, + "step": 53670 + }, + { + "epoch": 2.0, + "learning_rate": 1.9302825305543936e-06, + "loss": 0.1115, + "step": 53700 + }, + { + "epoch": 2.0, + "learning_rate": 1.929743340459082e-06, + "loss": 0.1427, + "step": 53730 + }, + { + "epoch": 2.01, + "learning_rate": 1.929204601950982e-06, + "loss": 0.143, + "step": 53760 + }, + { + "epoch": 2.01, + "learning_rate": 1.9286663144000823e-06, + "loss": 0.1393, + "step": 53790 + }, + { + "epoch": 2.01, + "learning_rate": 1.928128477177604e-06, + "loss": 0.1221, + "step": 53820 + }, + { + "epoch": 2.01, + "learning_rate": 1.927591089655992e-06, + "loss": 0.1452, + "step": 53850 + }, + { + "epoch": 2.01, + "learning_rate": 1.9270541512089164e-06, + "loss": 0.1481, + "step": 53880 + }, + { + "epoch": 2.01, + "learning_rate": 1.9265176612112675e-06, + "loss": 0.1434, + "step": 53910 + }, + { + "epoch": 2.01, + "learning_rate": 1.9259816190391543e-06, + "loss": 0.1411, + "step": 53940 + }, + { + "epoch": 2.01, + "learning_rate": 1.925446024069898e-06, + "loss": 0.1442, + "step": 53970 + }, + { + "epoch": 2.01, + "learning_rate": 1.9249108756820324e-06, + "loss": 0.129, + "step": 54000 + }, + { + "epoch": 2.02, + "learning_rate": 1.9243761732553013e-06, + "loss": 0.1403, + "step": 54030 + }, + { + "epoch": 2.02, + "learning_rate": 1.9238419161706506e-06, + "loss": 0.1284, + "step": 54060 + }, + { + "epoch": 2.02, + "learning_rate": 1.923308103810231e-06, + "loss": 0.1248, + "step": 54090 + }, + { + "epoch": 2.02, + "learning_rate": 1.922774735557393e-06, + "loss": 0.1347, + "step": 54120 + }, + { + "epoch": 2.02, + "learning_rate": 1.9222418107966813e-06, + "loss": 0.1383, + "step": 54150 + }, + { + "epoch": 2.02, + "learning_rate": 1.921709328913835e-06, + "loss": 0.1221, + "step": 54180 + }, + { + "epoch": 2.02, + "learning_rate": 1.921177289295785e-06, + "loss": 0.1909, + "step": 54210 + }, + { + "epoch": 2.02, + "learning_rate": 1.9206456913306475e-06, + "loss": 0.145, + "step": 54240 + }, + { + "epoch": 2.02, + "learning_rate": 1.9201145344077244e-06, + "loss": 0.1358, + "step": 54270 + }, + { + "epoch": 2.03, + "learning_rate": 1.9195838179174996e-06, + "loss": 0.1371, + "step": 54300 + }, + { + "epoch": 2.03, + "learning_rate": 1.9190535412516335e-06, + "loss": 0.1221, + "step": 54330 + }, + { + "epoch": 2.03, + "learning_rate": 1.9185237038029657e-06, + "loss": 0.1306, + "step": 54360 + }, + { + "epoch": 2.03, + "learning_rate": 1.9179943049655055e-06, + "loss": 0.1412, + "step": 54390 + }, + { + "epoch": 2.03, + "learning_rate": 1.917465344134433e-06, + "loss": 0.1251, + "step": 54420 + }, + { + "epoch": 2.03, + "learning_rate": 1.916936820706096e-06, + "loss": 0.1566, + "step": 54450 + }, + { + "epoch": 2.03, + "learning_rate": 1.916408734078005e-06, + "loss": 0.161, + "step": 54480 + }, + { + "epoch": 2.03, + "learning_rate": 1.915881083648834e-06, + "loss": 0.1591, + "step": 54510 + }, + { + "epoch": 2.03, + "learning_rate": 1.9153538688184124e-06, + "loss": 0.1193, + "step": 54540 + }, + { + "epoch": 2.04, + "learning_rate": 1.9148270889877275e-06, + "loss": 0.157, + "step": 54570 + }, + { + "epoch": 2.04, + "learning_rate": 1.914300743558917e-06, + "loss": 0.1461, + "step": 54600 + }, + { + "epoch": 2.04, + "learning_rate": 1.913774831935271e-06, + "loss": 0.1418, + "step": 54630 + }, + { + "epoch": 2.04, + "learning_rate": 1.9132493535212246e-06, + "loss": 0.1584, + "step": 54660 + }, + { + "epoch": 2.04, + "learning_rate": 1.9127243077223566e-06, + "loss": 0.156, + "step": 54690 + }, + { + "epoch": 2.04, + "learning_rate": 1.9121996939453884e-06, + "loss": 0.1312, + "step": 54720 + }, + { + "epoch": 2.04, + "learning_rate": 1.9116755115981794e-06, + "loss": 0.1259, + "step": 54750 + }, + { + "epoch": 2.04, + "learning_rate": 1.9111692115382816e-06, + "loss": 0.1549, + "step": 54780 + }, + { + "epoch": 2.04, + "learning_rate": 1.910645875946571e-06, + "loss": 0.14, + "step": 54810 + }, + { + "epoch": 2.05, + "learning_rate": 1.910122970034606e-06, + "loss": 0.1441, + "step": 54840 + }, + { + "epoch": 2.05, + "learning_rate": 1.9096004932147346e-06, + "loss": 0.1485, + "step": 54870 + }, + { + "epoch": 2.05, + "learning_rate": 1.909078444900429e-06, + "loss": 0.1247, + "step": 54900 + }, + { + "epoch": 2.05, + "learning_rate": 1.9085568245062825e-06, + "loss": 0.1509, + "step": 54930 + }, + { + "epoch": 2.05, + "learning_rate": 1.908035631448009e-06, + "loss": 0.1342, + "step": 54960 + }, + { + "epoch": 2.05, + "learning_rate": 1.907514865142436e-06, + "loss": 0.1619, + "step": 54990 + }, + { + "epoch": 2.05, + "learning_rate": 1.9069945250075084e-06, + "loss": 0.1242, + "step": 55020 + }, + { + "epoch": 2.05, + "learning_rate": 1.9064746104622783e-06, + "loss": 0.1585, + "step": 55050 + }, + { + "epoch": 2.05, + "learning_rate": 1.9059551209269078e-06, + "loss": 0.1655, + "step": 55080 + }, + { + "epoch": 2.06, + "learning_rate": 1.9054360558226643e-06, + "loss": 0.1182, + "step": 55110 + }, + { + "epoch": 2.06, + "learning_rate": 1.9049174145719173e-06, + "loss": 0.129, + "step": 55140 + }, + { + "epoch": 2.06, + "learning_rate": 1.9043991965981363e-06, + "loss": 0.1676, + "step": 55170 + }, + { + "epoch": 2.06, + "learning_rate": 1.9038814013258886e-06, + "loss": 0.1417, + "step": 55200 + }, + { + "epoch": 2.06, + "learning_rate": 1.9033640281808363e-06, + "loss": 0.1112, + "step": 55230 + }, + { + "epoch": 2.06, + "learning_rate": 1.902847076589732e-06, + "loss": 0.1362, + "step": 55260 + }, + { + "epoch": 2.06, + "learning_rate": 1.9023305459804192e-06, + "loss": 0.1654, + "step": 55290 + }, + { + "epoch": 2.06, + "learning_rate": 1.901814435781827e-06, + "loss": 0.1577, + "step": 55320 + }, + { + "epoch": 2.06, + "learning_rate": 1.901298745423969e-06, + "loss": 0.1288, + "step": 55350 + }, + { + "epoch": 2.07, + "learning_rate": 1.9007834743379393e-06, + "loss": 0.1274, + "step": 55380 + }, + { + "epoch": 2.07, + "learning_rate": 1.9002686219559125e-06, + "loss": 0.1204, + "step": 55410 + }, + { + "epoch": 2.07, + "learning_rate": 1.8997541877111366e-06, + "loss": 0.1297, + "step": 55440 + }, + { + "epoch": 2.07, + "learning_rate": 1.899240171037935e-06, + "loss": 0.1634, + "step": 55470 + }, + { + "epoch": 2.07, + "learning_rate": 1.8987265713717016e-06, + "loss": 0.1315, + "step": 55500 + }, + { + "epoch": 2.07, + "learning_rate": 1.8982133881488975e-06, + "loss": 0.1278, + "step": 55530 + }, + { + "epoch": 2.07, + "learning_rate": 1.8977006208070509e-06, + "loss": 0.1762, + "step": 55560 + }, + { + "epoch": 2.07, + "learning_rate": 1.8971882687847517e-06, + "loss": 0.1379, + "step": 55590 + }, + { + "epoch": 2.07, + "learning_rate": 1.8966763315216514e-06, + "loss": 0.1479, + "step": 55620 + }, + { + "epoch": 2.08, + "learning_rate": 1.8961648084584578e-06, + "loss": 0.1305, + "step": 55650 + }, + { + "epoch": 2.08, + "learning_rate": 1.8956536990369353e-06, + "loss": 0.1345, + "step": 55680 + }, + { + "epoch": 2.08, + "learning_rate": 1.8951430026999016e-06, + "loss": 0.1128, + "step": 55710 + }, + { + "epoch": 2.08, + "learning_rate": 1.894632718891222e-06, + "loss": 0.1565, + "step": 55740 + }, + { + "epoch": 2.08, + "learning_rate": 1.8941228470558122e-06, + "loss": 0.1482, + "step": 55770 + }, + { + "epoch": 2.08, + "learning_rate": 1.8936133866396317e-06, + "loss": 0.1325, + "step": 55800 + }, + { + "epoch": 2.08, + "learning_rate": 1.8931043370896827e-06, + "loss": 0.1463, + "step": 55830 + }, + { + "epoch": 2.08, + "learning_rate": 1.8925956978540075e-06, + "loss": 0.1328, + "step": 55860 + }, + { + "epoch": 2.08, + "learning_rate": 1.8920874683816859e-06, + "loss": 0.1245, + "step": 55890 + }, + { + "epoch": 2.09, + "learning_rate": 1.891579648122833e-06, + "loss": 0.1185, + "step": 55920 + }, + { + "epoch": 2.09, + "learning_rate": 1.8910722365285963e-06, + "loss": 0.1188, + "step": 55950 + }, + { + "epoch": 2.09, + "learning_rate": 1.890565233051153e-06, + "loss": 0.1299, + "step": 55980 + }, + { + "epoch": 2.09, + "learning_rate": 1.8900586371437086e-06, + "loss": 0.1448, + "step": 56010 + }, + { + "epoch": 2.09, + "learning_rate": 1.8895524482604927e-06, + "loss": 0.1581, + "step": 56040 + }, + { + "epoch": 2.09, + "learning_rate": 1.8890466658567582e-06, + "loss": 0.1294, + "step": 56070 + }, + { + "epoch": 2.09, + "learning_rate": 1.8885412893887783e-06, + "loss": 0.1452, + "step": 56100 + }, + { + "epoch": 2.09, + "learning_rate": 1.8880363183138433e-06, + "loss": 0.1344, + "step": 56130 + }, + { + "epoch": 2.09, + "learning_rate": 1.8875317520902595e-06, + "loss": 0.1482, + "step": 56160 + }, + { + "epoch": 2.1, + "learning_rate": 1.887027590177345e-06, + "loss": 0.1595, + "step": 56190 + }, + { + "epoch": 2.1, + "learning_rate": 1.886523832035429e-06, + "loss": 0.1496, + "step": 56220 + }, + { + "epoch": 2.1, + "learning_rate": 1.8860204771258493e-06, + "loss": 0.1285, + "step": 56250 + }, + { + "epoch": 2.1, + "learning_rate": 1.885517524910947e-06, + "loss": 0.1363, + "step": 56280 + }, + { + "epoch": 2.1, + "learning_rate": 1.8850149748540687e-06, + "loss": 0.1447, + "step": 56310 + }, + { + "epoch": 2.1, + "learning_rate": 1.8845128264195603e-06, + "loss": 0.1389, + "step": 56340 + }, + { + "epoch": 2.1, + "learning_rate": 1.884011079072767e-06, + "loss": 0.1175, + "step": 56370 + }, + { + "epoch": 2.1, + "learning_rate": 1.883509732280029e-06, + "loss": 0.1539, + "step": 56400 + }, + { + "epoch": 2.1, + "learning_rate": 1.8830087855086806e-06, + "loss": 0.1483, + "step": 56430 + }, + { + "epoch": 2.11, + "learning_rate": 1.8825082382270468e-06, + "loss": 0.1369, + "step": 56460 + }, + { + "epoch": 2.11, + "learning_rate": 1.882008089904442e-06, + "loss": 0.1516, + "step": 56490 + }, + { + "epoch": 2.11, + "learning_rate": 1.8815083400111672e-06, + "loss": 0.1518, + "step": 56520 + }, + { + "epoch": 2.11, + "learning_rate": 1.8810089880185063e-06, + "loss": 0.16, + "step": 56550 + }, + { + "epoch": 2.11, + "learning_rate": 1.8805100333987256e-06, + "loss": 0.1144, + "step": 56580 + }, + { + "epoch": 2.11, + "learning_rate": 1.8800114756250712e-06, + "loss": 0.1198, + "step": 56610 + }, + { + "epoch": 2.11, + "learning_rate": 1.8795133141717656e-06, + "loss": 0.1455, + "step": 56640 + }, + { + "epoch": 2.11, + "learning_rate": 1.8790155485140063e-06, + "loss": 0.1119, + "step": 56670 + }, + { + "epoch": 2.11, + "learning_rate": 1.8785181781279636e-06, + "loss": 0.1224, + "step": 56700 + }, + { + "epoch": 2.12, + "learning_rate": 1.8780212024907767e-06, + "loss": 0.1444, + "step": 56730 + }, + { + "epoch": 2.12, + "learning_rate": 1.877524621080554e-06, + "loss": 0.1615, + "step": 56760 + }, + { + "epoch": 2.12, + "learning_rate": 1.8770284333763686e-06, + "loss": 0.1363, + "step": 56790 + }, + { + "epoch": 2.12, + "learning_rate": 1.8765326388582567e-06, + "loss": 0.1387, + "step": 56820 + }, + { + "epoch": 2.12, + "learning_rate": 1.8760372370072152e-06, + "loss": 0.1401, + "step": 56850 + }, + { + "epoch": 2.12, + "learning_rate": 1.8755422273052006e-06, + "loss": 0.128, + "step": 56880 + }, + { + "epoch": 2.12, + "learning_rate": 1.875047609235125e-06, + "loss": 0.1281, + "step": 56910 + }, + { + "epoch": 2.12, + "learning_rate": 1.8745533822808548e-06, + "loss": 0.1453, + "step": 56940 + }, + { + "epoch": 2.12, + "learning_rate": 1.8740595459272077e-06, + "loss": 0.1508, + "step": 56970 + }, + { + "epoch": 2.13, + "learning_rate": 1.8735660996599512e-06, + "loss": 0.1184, + "step": 57000 + }, + { + "epoch": 2.13, + "learning_rate": 1.8730730429658007e-06, + "loss": 0.1293, + "step": 57030 + }, + { + "epoch": 2.13, + "learning_rate": 1.8725803753324157e-06, + "loss": 0.1484, + "step": 57060 + }, + { + "epoch": 2.13, + "learning_rate": 1.8720880962483992e-06, + "loss": 0.1366, + "step": 57090 + }, + { + "epoch": 2.13, + "learning_rate": 1.8715962052032946e-06, + "loss": 0.1456, + "step": 57120 + }, + { + "epoch": 2.13, + "learning_rate": 1.8711047016875836e-06, + "loss": 0.14, + "step": 57150 + }, + { + "epoch": 2.13, + "learning_rate": 1.8706135851926837e-06, + "loss": 0.1289, + "step": 57180 + }, + { + "epoch": 2.13, + "learning_rate": 1.870122855210947e-06, + "loss": 0.1278, + "step": 57210 + }, + { + "epoch": 2.13, + "learning_rate": 1.8696325112356574e-06, + "loss": 0.1643, + "step": 57240 + }, + { + "epoch": 2.14, + "learning_rate": 1.869142552761028e-06, + "loss": 0.1741, + "step": 57270 + }, + { + "epoch": 2.14, + "learning_rate": 1.8686529792821986e-06, + "loss": 0.1421, + "step": 57300 + }, + { + "epoch": 2.14, + "learning_rate": 1.868163790295235e-06, + "loss": 0.1173, + "step": 57330 + }, + { + "epoch": 2.14, + "learning_rate": 1.8676749852971257e-06, + "loss": 0.1389, + "step": 57360 + }, + { + "epoch": 2.14, + "learning_rate": 1.8671865637857806e-06, + "loss": 0.1341, + "step": 57390 + }, + { + "epoch": 2.14, + "learning_rate": 1.866698525260027e-06, + "loss": 0.1396, + "step": 57420 + }, + { + "epoch": 2.14, + "learning_rate": 1.8662108692196096e-06, + "loss": 0.1264, + "step": 57450 + }, + { + "epoch": 2.14, + "learning_rate": 1.8657235951651874e-06, + "loss": 0.1288, + "step": 57480 + }, + { + "epoch": 2.14, + "learning_rate": 1.8652367025983317e-06, + "loss": 0.1226, + "step": 57510 + }, + { + "epoch": 2.15, + "learning_rate": 1.8647501910215233e-06, + "loss": 0.1149, + "step": 57540 + }, + { + "epoch": 2.15, + "learning_rate": 1.8642640599381507e-06, + "loss": 0.1227, + "step": 57570 + }, + { + "epoch": 2.15, + "learning_rate": 1.8637783088525087e-06, + "loss": 0.1211, + "step": 57600 + }, + { + "epoch": 2.15, + "learning_rate": 1.8632929372697966e-06, + "loss": 0.1226, + "step": 57630 + }, + { + "epoch": 2.15, + "learning_rate": 1.8628079446961137e-06, + "loss": 0.1482, + "step": 57660 + }, + { + "epoch": 2.15, + "learning_rate": 1.8623233306384588e-06, + "loss": 0.1585, + "step": 57690 + }, + { + "epoch": 2.15, + "learning_rate": 1.8618390946047293e-06, + "loss": 0.1692, + "step": 57720 + }, + { + "epoch": 2.15, + "learning_rate": 1.8613552361037172e-06, + "loss": 0.1351, + "step": 57750 + }, + { + "epoch": 2.16, + "learning_rate": 1.860871754645107e-06, + "loss": 0.112, + "step": 57780 + }, + { + "epoch": 2.16, + "learning_rate": 1.8603886497394748e-06, + "loss": 0.1435, + "step": 57810 + }, + { + "epoch": 2.16, + "learning_rate": 1.859905920898286e-06, + "loss": 0.1509, + "step": 57840 + }, + { + "epoch": 2.16, + "learning_rate": 1.8594235676338921e-06, + "loss": 0.1242, + "step": 57870 + }, + { + "epoch": 2.16, + "learning_rate": 1.8589415894595303e-06, + "loss": 0.134, + "step": 57900 + }, + { + "epoch": 2.16, + "learning_rate": 1.8584599858893192e-06, + "loss": 0.1328, + "step": 57930 + }, + { + "epoch": 2.16, + "learning_rate": 1.85797875643826e-06, + "loss": 0.1295, + "step": 57960 + }, + { + "epoch": 2.16, + "learning_rate": 1.8575139231348627e-06, + "loss": 0.1351, + "step": 57990 + }, + { + "epoch": 2.16, + "learning_rate": 1.8570334280400031e-06, + "loss": 0.1316, + "step": 58020 + }, + { + "epoch": 2.17, + "learning_rate": 1.856553305630624e-06, + "loss": 0.1225, + "step": 58050 + }, + { + "epoch": 2.17, + "learning_rate": 1.856073555425199e-06, + "loss": 0.1474, + "step": 58080 + }, + { + "epoch": 2.17, + "learning_rate": 1.8555941769430718e-06, + "loss": 0.1281, + "step": 58110 + }, + { + "epoch": 2.17, + "learning_rate": 1.8551151697044554e-06, + "loss": 0.1203, + "step": 58140 + }, + { + "epoch": 2.17, + "learning_rate": 1.8546365332304288e-06, + "loss": 0.1301, + "step": 58170 + }, + { + "epoch": 2.17, + "learning_rate": 1.8541582670429356e-06, + "loss": 0.141, + "step": 58200 + }, + { + "epoch": 2.17, + "learning_rate": 1.8536803706647816e-06, + "loss": 0.1162, + "step": 58230 + }, + { + "epoch": 2.17, + "learning_rate": 1.853202843619634e-06, + "loss": 0.1177, + "step": 58260 + }, + { + "epoch": 2.17, + "learning_rate": 1.8527256854320173e-06, + "loss": 0.1242, + "step": 58290 + }, + { + "epoch": 2.18, + "learning_rate": 1.8522488956273137e-06, + "loss": 0.1218, + "step": 58320 + }, + { + "epoch": 2.18, + "learning_rate": 1.8517724737317594e-06, + "loss": 0.1214, + "step": 58350 + }, + { + "epoch": 2.18, + "learning_rate": 1.8512964192724428e-06, + "loss": 0.1419, + "step": 58380 + }, + { + "epoch": 2.18, + "learning_rate": 1.8508207317773033e-06, + "loss": 0.1471, + "step": 58410 + }, + { + "epoch": 2.18, + "learning_rate": 1.8503454107751295e-06, + "loss": 0.1293, + "step": 58440 + }, + { + "epoch": 2.18, + "learning_rate": 1.849870455795555e-06, + "loss": 0.1229, + "step": 58470 + }, + { + "epoch": 2.18, + "learning_rate": 1.8493958663690606e-06, + "loss": 0.1366, + "step": 58500 + }, + { + "epoch": 2.18, + "learning_rate": 1.8489216420269671e-06, + "loss": 0.131, + "step": 58530 + }, + { + "epoch": 2.18, + "learning_rate": 1.8484477823014386e-06, + "loss": 0.1232, + "step": 58560 + }, + { + "epoch": 2.19, + "learning_rate": 1.8479742867254763e-06, + "loss": 0.1155, + "step": 58590 + }, + { + "epoch": 2.19, + "learning_rate": 1.8475011548329189e-06, + "loss": 0.1409, + "step": 58620 + }, + { + "epoch": 2.19, + "learning_rate": 1.847028386158441e-06, + "loss": 0.1163, + "step": 58650 + }, + { + "epoch": 2.19, + "learning_rate": 1.8465559802375483e-06, + "loss": 0.1438, + "step": 58680 + }, + { + "epoch": 2.19, + "learning_rate": 1.8460839366065802e-06, + "loss": 0.1332, + "step": 58710 + }, + { + "epoch": 2.19, + "learning_rate": 1.8456122548027036e-06, + "loss": 0.1116, + "step": 58740 + }, + { + "epoch": 2.19, + "learning_rate": 1.8451409343639128e-06, + "loss": 0.1541, + "step": 58770 + }, + { + "epoch": 2.19, + "learning_rate": 1.8446699748290289e-06, + "loss": 0.1484, + "step": 58800 + }, + { + "epoch": 2.19, + "learning_rate": 1.844199375737695e-06, + "loss": 0.1485, + "step": 58830 + }, + { + "epoch": 2.2, + "learning_rate": 1.843729136630377e-06, + "loss": 0.1469, + "step": 58860 + }, + { + "epoch": 2.2, + "learning_rate": 1.8432592570483596e-06, + "loss": 0.113, + "step": 58890 + }, + { + "epoch": 2.2, + "learning_rate": 1.8427897365337466e-06, + "loss": 0.151, + "step": 58920 + }, + { + "epoch": 2.2, + "learning_rate": 1.8423205746294573e-06, + "loss": 0.1283, + "step": 58950 + }, + { + "epoch": 2.2, + "learning_rate": 1.841851770879224e-06, + "loss": 0.1135, + "step": 58980 + }, + { + "epoch": 2.2, + "learning_rate": 1.8413833248275939e-06, + "loss": 0.1451, + "step": 59010 + }, + { + "epoch": 2.2, + "learning_rate": 1.840915236019922e-06, + "loss": 0.1232, + "step": 59040 + }, + { + "epoch": 2.2, + "learning_rate": 1.840447504002374e-06, + "loss": 0.1312, + "step": 59070 + }, + { + "epoch": 2.2, + "learning_rate": 1.8399801283219207e-06, + "loss": 0.1322, + "step": 59100 + }, + { + "epoch": 2.21, + "learning_rate": 1.8395131085263387e-06, + "loss": 0.1463, + "step": 59130 + }, + { + "epoch": 2.21, + "learning_rate": 1.8390464441642077e-06, + "loss": 0.1342, + "step": 59160 + }, + { + "epoch": 2.21, + "learning_rate": 1.8385801347849084e-06, + "loss": 0.1137, + "step": 59190 + }, + { + "epoch": 2.21, + "learning_rate": 1.8381141799386204e-06, + "loss": 0.1086, + "step": 59220 + }, + { + "epoch": 2.21, + "learning_rate": 1.8376485791763221e-06, + "loss": 0.1405, + "step": 59250 + }, + { + "epoch": 2.21, + "learning_rate": 1.8371833320497865e-06, + "loss": 0.1248, + "step": 59280 + }, + { + "epoch": 2.21, + "learning_rate": 1.8367184381115817e-06, + "loss": 0.1268, + "step": 59310 + }, + { + "epoch": 2.21, + "learning_rate": 1.8362538969150664e-06, + "loss": 0.1336, + "step": 59340 + }, + { + "epoch": 2.21, + "learning_rate": 1.8357897080143913e-06, + "loss": 0.1585, + "step": 59370 + }, + { + "epoch": 2.22, + "learning_rate": 1.8353258709644942e-06, + "loss": 0.1658, + "step": 59400 + }, + { + "epoch": 2.22, + "learning_rate": 1.834862385321101e-06, + "loss": 0.1299, + "step": 59430 + }, + { + "epoch": 2.22, + "learning_rate": 1.8343992506407218e-06, + "loss": 0.1415, + "step": 59460 + }, + { + "epoch": 2.22, + "learning_rate": 1.8339364664806493e-06, + "loss": 0.1314, + "step": 59490 + }, + { + "epoch": 2.22, + "learning_rate": 1.8334740323989596e-06, + "loss": 0.1507, + "step": 59520 + }, + { + "epoch": 2.22, + "learning_rate": 1.8330119479545063e-06, + "loss": 0.1176, + "step": 59550 + }, + { + "epoch": 2.22, + "learning_rate": 1.832550212706922e-06, + "loss": 0.1788, + "step": 59580 + }, + { + "epoch": 2.22, + "learning_rate": 1.8320888262166148e-06, + "loss": 0.134, + "step": 59610 + }, + { + "epoch": 2.22, + "learning_rate": 1.8316277880447678e-06, + "loss": 0.1447, + "step": 59640 + }, + { + "epoch": 2.23, + "learning_rate": 1.8311670977533358e-06, + "loss": 0.1302, + "step": 59670 + }, + { + "epoch": 2.23, + "learning_rate": 1.8307067549050465e-06, + "loss": 0.1325, + "step": 59700 + }, + { + "epoch": 2.23, + "learning_rate": 1.8302467590633934e-06, + "loss": 0.1361, + "step": 59730 + }, + { + "epoch": 2.23, + "learning_rate": 1.8297871097926407e-06, + "loss": 0.1315, + "step": 59760 + }, + { + "epoch": 2.23, + "learning_rate": 1.8293278066578158e-06, + "loss": 0.1069, + "step": 59790 + }, + { + "epoch": 2.23, + "learning_rate": 1.8288688492247114e-06, + "loss": 0.1539, + "step": 59820 + }, + { + "epoch": 2.23, + "learning_rate": 1.8284102370598822e-06, + "loss": 0.1699, + "step": 59850 + }, + { + "epoch": 2.23, + "learning_rate": 1.8279519697306424e-06, + "loss": 0.1171, + "step": 59880 + }, + { + "epoch": 2.23, + "learning_rate": 1.8274940468050661e-06, + "loss": 0.1478, + "step": 59910 + }, + { + "epoch": 2.24, + "learning_rate": 1.8270364678519841e-06, + "loss": 0.1364, + "step": 59940 + }, + { + "epoch": 2.24, + "learning_rate": 1.8265792324409817e-06, + "loss": 0.1339, + "step": 59970 + }, + { + "epoch": 2.24, + "learning_rate": 1.8261223401423995e-06, + "loss": 0.1459, + "step": 60000 + }, + { + "epoch": 2.24, + "learning_rate": 1.8256657905273283e-06, + "loss": 0.1115, + "step": 60030 + }, + { + "epoch": 2.24, + "learning_rate": 1.8252095831676105e-06, + "loss": 0.1448, + "step": 60060 + }, + { + "epoch": 2.24, + "learning_rate": 1.8247537176358363e-06, + "loss": 0.1464, + "step": 60090 + }, + { + "epoch": 2.24, + "learning_rate": 1.824298193505343e-06, + "loss": 0.1243, + "step": 60120 + }, + { + "epoch": 2.24, + "learning_rate": 1.8238430103502131e-06, + "loss": 0.1311, + "step": 60150 + }, + { + "epoch": 2.24, + "learning_rate": 1.8233881677452723e-06, + "loss": 0.1542, + "step": 60180 + }, + { + "epoch": 2.25, + "learning_rate": 1.8229336652660886e-06, + "loss": 0.1457, + "step": 60210 + }, + { + "epoch": 2.25, + "learning_rate": 1.8224795024889702e-06, + "loss": 0.1127, + "step": 60240 + }, + { + "epoch": 2.25, + "learning_rate": 1.8220256789909638e-06, + "loss": 0.1399, + "step": 60270 + }, + { + "epoch": 2.25, + "learning_rate": 1.8215721943498523e-06, + "loss": 0.1329, + "step": 60300 + }, + { + "epoch": 2.25, + "learning_rate": 1.8211190481441551e-06, + "loss": 0.1228, + "step": 60330 + }, + { + "epoch": 2.25, + "learning_rate": 1.820666239953124e-06, + "loss": 0.1176, + "step": 60360 + }, + { + "epoch": 2.25, + "learning_rate": 1.820213769356744e-06, + "loss": 0.1104, + "step": 60390 + }, + { + "epoch": 2.25, + "learning_rate": 1.8197616359357285e-06, + "loss": 0.0979, + "step": 60420 + }, + { + "epoch": 2.25, + "learning_rate": 1.8193098392715216e-06, + "loss": 0.1361, + "step": 60450 + }, + { + "epoch": 2.26, + "learning_rate": 1.8188583789462934e-06, + "loss": 0.1128, + "step": 60480 + }, + { + "epoch": 2.26, + "learning_rate": 1.8184072545429393e-06, + "loss": 0.1394, + "step": 60510 + }, + { + "epoch": 2.26, + "learning_rate": 1.8179564656450797e-06, + "loss": 0.1262, + "step": 60540 + }, + { + "epoch": 2.26, + "learning_rate": 1.817506011837055e-06, + "loss": 0.164, + "step": 60570 + }, + { + "epoch": 2.26, + "learning_rate": 1.8170558927039284e-06, + "loss": 0.1167, + "step": 60600 + }, + { + "epoch": 2.26, + "learning_rate": 1.8166061078314813e-06, + "loss": 0.1234, + "step": 60630 + }, + { + "epoch": 2.26, + "learning_rate": 1.8161566568062119e-06, + "loss": 0.1186, + "step": 60660 + }, + { + "epoch": 2.26, + "learning_rate": 1.8157075392153348e-06, + "loss": 0.1331, + "step": 60690 + }, + { + "epoch": 2.26, + "learning_rate": 1.815273708771384e-06, + "loss": 0.1283, + "step": 60720 + }, + { + "epoch": 2.27, + "learning_rate": 1.8148252457333782e-06, + "loss": 0.1571, + "step": 60750 + }, + { + "epoch": 2.27, + "learning_rate": 1.8143771149093633e-06, + "loss": 0.1229, + "step": 60780 + }, + { + "epoch": 2.27, + "learning_rate": 1.813929315889378e-06, + "loss": 0.1486, + "step": 60810 + }, + { + "epoch": 2.27, + "learning_rate": 1.813481848264168e-06, + "loss": 0.132, + "step": 60840 + }, + { + "epoch": 2.27, + "learning_rate": 1.8130347116251856e-06, + "loss": 0.1496, + "step": 60870 + }, + { + "epoch": 2.27, + "learning_rate": 1.8125879055645884e-06, + "loss": 0.1164, + "step": 60900 + }, + { + "epoch": 2.27, + "learning_rate": 1.8121414296752363e-06, + "loss": 0.1095, + "step": 60930 + }, + { + "epoch": 2.27, + "learning_rate": 1.8116952835506918e-06, + "loss": 0.1405, + "step": 60960 + }, + { + "epoch": 2.27, + "learning_rate": 1.8112494667852163e-06, + "loss": 0.1231, + "step": 60990 + }, + { + "epoch": 2.28, + "learning_rate": 1.8108039789737708e-06, + "loss": 0.1263, + "step": 61020 + }, + { + "epoch": 2.28, + "learning_rate": 1.8103588197120128e-06, + "loss": 0.125, + "step": 61050 + }, + { + "epoch": 2.28, + "learning_rate": 1.8099139885962961e-06, + "loss": 0.138, + "step": 61080 + }, + { + "epoch": 2.28, + "learning_rate": 1.8094694852236673e-06, + "loss": 0.129, + "step": 61110 + }, + { + "epoch": 2.28, + "learning_rate": 1.8090253091918654e-06, + "loss": 0.1303, + "step": 61140 + }, + { + "epoch": 2.28, + "learning_rate": 1.8085814600993218e-06, + "loss": 0.1323, + "step": 61170 + }, + { + "epoch": 2.28, + "learning_rate": 1.8081379375451555e-06, + "loss": 0.1063, + "step": 61200 + }, + { + "epoch": 2.28, + "learning_rate": 1.807694741129174e-06, + "loss": 0.0986, + "step": 61230 + }, + { + "epoch": 2.28, + "learning_rate": 1.8072518704518715e-06, + "loss": 0.1299, + "step": 61260 + }, + { + "epoch": 2.29, + "learning_rate": 1.8068093251144268e-06, + "loss": 0.1464, + "step": 61290 + }, + { + "epoch": 2.29, + "learning_rate": 1.806367104718701e-06, + "loss": 0.1312, + "step": 61320 + }, + { + "epoch": 2.29, + "learning_rate": 1.805925208867238e-06, + "loss": 0.1306, + "step": 61350 + }, + { + "epoch": 2.29, + "learning_rate": 1.8054836371632622e-06, + "loss": 0.1175, + "step": 61380 + }, + { + "epoch": 2.29, + "learning_rate": 1.8050423892106764e-06, + "loss": 0.1434, + "step": 61410 + }, + { + "epoch": 2.29, + "learning_rate": 1.8046014646140606e-06, + "loss": 0.1421, + "step": 61440 + }, + { + "epoch": 2.29, + "learning_rate": 1.80416086297867e-06, + "loss": 0.117, + "step": 61470 + }, + { + "epoch": 2.29, + "learning_rate": 1.8037205839104353e-06, + "loss": 0.1353, + "step": 61500 + }, + { + "epoch": 2.29, + "learning_rate": 1.8032806270159595e-06, + "loss": 0.1557, + "step": 61530 + }, + { + "epoch": 2.3, + "learning_rate": 1.8028409919025168e-06, + "loss": 0.1324, + "step": 61560 + }, + { + "epoch": 2.3, + "learning_rate": 1.802401678178052e-06, + "loss": 0.1611, + "step": 61590 + }, + { + "epoch": 2.3, + "learning_rate": 1.8019626854511773e-06, + "loss": 0.1378, + "step": 61620 + }, + { + "epoch": 2.3, + "learning_rate": 1.8015240133311723e-06, + "loss": 0.1334, + "step": 61650 + }, + { + "epoch": 2.3, + "learning_rate": 1.8010856614279822e-06, + "loss": 0.1509, + "step": 61680 + }, + { + "epoch": 2.3, + "learning_rate": 1.8006476293522169e-06, + "loss": 0.1196, + "step": 61710 + }, + { + "epoch": 2.3, + "learning_rate": 1.8002099167151475e-06, + "loss": 0.1628, + "step": 61740 + }, + { + "epoch": 2.3, + "learning_rate": 1.7997725231287063e-06, + "loss": 0.1287, + "step": 61770 + }, + { + "epoch": 2.3, + "learning_rate": 1.7993354482054874e-06, + "loss": 0.1063, + "step": 61800 + }, + { + "epoch": 2.31, + "learning_rate": 1.7988986915587407e-06, + "loss": 0.1428, + "step": 61830 + }, + { + "epoch": 2.31, + "learning_rate": 1.798462252802374e-06, + "loss": 0.1239, + "step": 61860 + }, + { + "epoch": 2.31, + "learning_rate": 1.7980261315509503e-06, + "loss": 0.1267, + "step": 61890 + }, + { + "epoch": 2.31, + "learning_rate": 1.7975903274196872e-06, + "loss": 0.1421, + "step": 61920 + }, + { + "epoch": 2.31, + "learning_rate": 1.7971548400244536e-06, + "loss": 0.1186, + "step": 61950 + }, + { + "epoch": 2.31, + "learning_rate": 1.7967196689817703e-06, + "loss": 0.1345, + "step": 61980 + }, + { + "epoch": 2.31, + "learning_rate": 1.7962848139088083e-06, + "loss": 0.127, + "step": 62010 + }, + { + "epoch": 2.31, + "learning_rate": 1.7958502744233853e-06, + "loss": 0.12, + "step": 62040 + }, + { + "epoch": 2.32, + "learning_rate": 1.7954160501439674e-06, + "loss": 0.1149, + "step": 62070 + }, + { + "epoch": 2.32, + "learning_rate": 1.794982140689665e-06, + "loss": 0.1273, + "step": 62100 + }, + { + "epoch": 2.32, + "learning_rate": 1.7945485456802333e-06, + "loss": 0.1217, + "step": 62130 + }, + { + "epoch": 2.32, + "learning_rate": 1.7941152647360701e-06, + "loss": 0.1355, + "step": 62160 + }, + { + "epoch": 2.32, + "learning_rate": 1.7936822974782144e-06, + "loss": 0.1411, + "step": 62190 + }, + { + "epoch": 2.32, + "learning_rate": 1.7932496435283447e-06, + "loss": 0.1448, + "step": 62220 + }, + { + "epoch": 2.32, + "learning_rate": 1.7928173025087783e-06, + "loss": 0.1502, + "step": 62250 + }, + { + "epoch": 2.32, + "learning_rate": 1.7923852740424697e-06, + "loss": 0.1166, + "step": 62280 + }, + { + "epoch": 2.32, + "learning_rate": 1.7919535577530084e-06, + "loss": 0.1408, + "step": 62310 + }, + { + "epoch": 2.33, + "learning_rate": 1.7915221532646194e-06, + "loss": 0.1304, + "step": 62340 + }, + { + "epoch": 2.33, + "learning_rate": 1.7910910602021597e-06, + "loss": 0.1646, + "step": 62370 + }, + { + "epoch": 2.33, + "learning_rate": 1.7906602781911183e-06, + "loss": 0.117, + "step": 62400 + }, + { + "epoch": 2.33, + "learning_rate": 1.7902298068576147e-06, + "loss": 0.1251, + "step": 62430 + }, + { + "epoch": 2.33, + "learning_rate": 1.789799645828396e-06, + "loss": 0.134, + "step": 62460 + }, + { + "epoch": 2.33, + "learning_rate": 1.7893697947308383e-06, + "loss": 0.1444, + "step": 62490 + }, + { + "epoch": 2.33, + "learning_rate": 1.788940253192943e-06, + "loss": 0.1239, + "step": 62520 + }, + { + "epoch": 2.33, + "learning_rate": 1.788511020843337e-06, + "loss": 0.1473, + "step": 62550 + }, + { + "epoch": 2.33, + "learning_rate": 1.78808209731127e-06, + "loss": 0.1201, + "step": 62580 + }, + { + "epoch": 2.34, + "learning_rate": 1.7876534822266136e-06, + "loss": 0.1015, + "step": 62610 + }, + { + "epoch": 2.34, + "learning_rate": 1.7872251752198607e-06, + "loss": 0.1279, + "step": 62640 + }, + { + "epoch": 2.34, + "learning_rate": 1.786797175922124e-06, + "loss": 0.1349, + "step": 62670 + }, + { + "epoch": 2.34, + "learning_rate": 1.7863694839651327e-06, + "loss": 0.1354, + "step": 62700 + }, + { + "epoch": 2.34, + "learning_rate": 1.7859420989812349e-06, + "loss": 0.1395, + "step": 62730 + }, + { + "epoch": 2.34, + "learning_rate": 1.7855150206033921e-06, + "loss": 0.1443, + "step": 62760 + }, + { + "epoch": 2.34, + "learning_rate": 1.7850882484651816e-06, + "loss": 0.1144, + "step": 62790 + }, + { + "epoch": 2.34, + "learning_rate": 1.784661782200792e-06, + "loss": 0.1383, + "step": 62820 + }, + { + "epoch": 2.34, + "learning_rate": 1.7842356214450247e-06, + "loss": 0.1158, + "step": 62850 + }, + { + "epoch": 2.35, + "learning_rate": 1.78380976583329e-06, + "loss": 0.1306, + "step": 62880 + }, + { + "epoch": 2.35, + "learning_rate": 1.7833983951228248e-06, + "loss": 0.1603, + "step": 62910 + }, + { + "epoch": 2.35, + "learning_rate": 1.7829731385664483e-06, + "loss": 0.1303, + "step": 62940 + }, + { + "epoch": 2.35, + "learning_rate": 1.7825481860760678e-06, + "loss": 0.1474, + "step": 62970 + }, + { + "epoch": 2.35, + "learning_rate": 1.7821235372895012e-06, + "loss": 0.142, + "step": 63000 + }, + { + "epoch": 2.35, + "learning_rate": 1.7816991918451711e-06, + "loss": 0.1243, + "step": 63030 + }, + { + "epoch": 2.35, + "learning_rate": 1.7812751493821018e-06, + "loss": 0.1289, + "step": 63060 + }, + { + "epoch": 2.35, + "learning_rate": 1.7808514095399185e-06, + "loss": 0.1412, + "step": 63090 + }, + { + "epoch": 2.35, + "learning_rate": 1.7804279719588469e-06, + "loss": 0.1573, + "step": 63120 + }, + { + "epoch": 2.36, + "learning_rate": 1.7800048362797105e-06, + "loss": 0.1378, + "step": 63150 + }, + { + "epoch": 2.36, + "learning_rate": 1.7795820021439302e-06, + "loss": 0.1555, + "step": 63180 + }, + { + "epoch": 2.36, + "learning_rate": 1.7791594691935238e-06, + "loss": 0.1078, + "step": 63210 + }, + { + "epoch": 2.36, + "learning_rate": 1.7787372370711024e-06, + "loss": 0.1395, + "step": 63240 + }, + { + "epoch": 2.36, + "learning_rate": 1.778315305419871e-06, + "loss": 0.1537, + "step": 63270 + }, + { + "epoch": 2.36, + "learning_rate": 1.7778936738836273e-06, + "loss": 0.1295, + "step": 63300 + }, + { + "epoch": 2.36, + "learning_rate": 1.7774723421067596e-06, + "loss": 0.1323, + "step": 63330 + }, + { + "epoch": 2.36, + "learning_rate": 1.7770513097342456e-06, + "loss": 0.1363, + "step": 63360 + }, + { + "epoch": 2.36, + "learning_rate": 1.7766305764116515e-06, + "loss": 0.1299, + "step": 63390 + }, + { + "epoch": 2.37, + "learning_rate": 1.776210141785131e-06, + "loss": 0.1398, + "step": 63420 + }, + { + "epoch": 2.37, + "learning_rate": 1.7757900055014226e-06, + "loss": 0.1318, + "step": 63450 + }, + { + "epoch": 2.37, + "learning_rate": 1.7753701672078514e-06, + "loss": 0.1221, + "step": 63480 + }, + { + "epoch": 2.37, + "learning_rate": 1.7749506265523237e-06, + "loss": 0.1205, + "step": 63510 + }, + { + "epoch": 2.37, + "learning_rate": 1.7745313831833294e-06, + "loss": 0.1196, + "step": 63540 + }, + { + "epoch": 2.37, + "learning_rate": 1.7741124367499385e-06, + "loss": 0.1355, + "step": 63570 + }, + { + "epoch": 2.37, + "learning_rate": 1.7736937869018014e-06, + "loss": 0.1209, + "step": 63600 + }, + { + "epoch": 2.37, + "learning_rate": 1.7732754332891463e-06, + "loss": 0.1121, + "step": 63630 + }, + { + "epoch": 2.37, + "learning_rate": 1.7728573755627793e-06, + "loss": 0.1288, + "step": 63660 + }, + { + "epoch": 2.38, + "learning_rate": 1.7724396133740815e-06, + "loss": 0.1344, + "step": 63690 + }, + { + "epoch": 2.38, + "learning_rate": 1.7720221463750099e-06, + "loss": 0.1287, + "step": 63720 + }, + { + "epoch": 2.38, + "learning_rate": 1.771604974218094e-06, + "loss": 0.1217, + "step": 63750 + }, + { + "epoch": 2.38, + "learning_rate": 1.771188096556437e-06, + "loss": 0.1205, + "step": 63780 + }, + { + "epoch": 2.38, + "learning_rate": 1.7707715130437115e-06, + "loss": 0.1208, + "step": 63810 + }, + { + "epoch": 2.38, + "learning_rate": 1.7703552233341614e-06, + "loss": 0.1376, + "step": 63840 + }, + { + "epoch": 2.38, + "learning_rate": 1.7699392270825987e-06, + "loss": 0.124, + "step": 63870 + }, + { + "epoch": 2.38, + "learning_rate": 1.7695235239444037e-06, + "loss": 0.122, + "step": 63900 + }, + { + "epoch": 2.38, + "learning_rate": 1.7691081135755214e-06, + "loss": 0.1829, + "step": 63930 + }, + { + "epoch": 2.39, + "learning_rate": 1.7686929956324636e-06, + "loss": 0.1384, + "step": 63960 + }, + { + "epoch": 2.39, + "learning_rate": 1.7682781697723047e-06, + "loss": 0.1203, + "step": 63990 + }, + { + "epoch": 2.39, + "learning_rate": 1.7678636356526833e-06, + "loss": 0.1392, + "step": 64020 + }, + { + "epoch": 2.39, + "learning_rate": 1.7674493929317981e-06, + "loss": 0.145, + "step": 64050 + }, + { + "epoch": 2.39, + "learning_rate": 1.7670354412684088e-06, + "loss": 0.1608, + "step": 64080 + }, + { + "epoch": 2.39, + "learning_rate": 1.766621780321835e-06, + "loss": 0.1315, + "step": 64110 + }, + { + "epoch": 2.39, + "learning_rate": 1.7662084097519527e-06, + "loss": 0.1225, + "step": 64140 + }, + { + "epoch": 2.39, + "learning_rate": 1.765795329219196e-06, + "loss": 0.1484, + "step": 64170 + }, + { + "epoch": 2.39, + "learning_rate": 1.765382538384554e-06, + "loss": 0.1474, + "step": 64200 + }, + { + "epoch": 2.4, + "learning_rate": 1.7649700369095715e-06, + "loss": 0.1302, + "step": 64230 + }, + { + "epoch": 2.4, + "learning_rate": 1.7645578244563446e-06, + "loss": 0.1335, + "step": 64260 + }, + { + "epoch": 2.4, + "learning_rate": 1.7641459006875235e-06, + "loss": 0.1331, + "step": 64290 + }, + { + "epoch": 2.4, + "learning_rate": 1.7637342652663083e-06, + "loss": 0.1407, + "step": 64320 + }, + { + "epoch": 2.4, + "learning_rate": 1.7633229178564493e-06, + "loss": 0.1179, + "step": 64350 + }, + { + "epoch": 2.4, + "learning_rate": 1.7629118581222452e-06, + "loss": 0.1337, + "step": 64380 + }, + { + "epoch": 2.4, + "learning_rate": 1.7625010857285424e-06, + "loss": 0.1431, + "step": 64410 + }, + { + "epoch": 2.4, + "learning_rate": 1.7620906003407343e-06, + "loss": 0.121, + "step": 64440 + }, + { + "epoch": 2.4, + "learning_rate": 1.7616804016247588e-06, + "loss": 0.1222, + "step": 64470 + }, + { + "epoch": 2.41, + "learning_rate": 1.7612704892470981e-06, + "loss": 0.1214, + "step": 64500 + }, + { + "epoch": 2.41, + "learning_rate": 1.7608608628747776e-06, + "loss": 0.1377, + "step": 64530 + }, + { + "epoch": 2.41, + "learning_rate": 1.7604515221753638e-06, + "loss": 0.138, + "step": 64560 + }, + { + "epoch": 2.41, + "learning_rate": 1.7600424668169652e-06, + "loss": 0.1408, + "step": 64590 + }, + { + "epoch": 2.41, + "learning_rate": 1.759633696468229e-06, + "loss": 0.1168, + "step": 64620 + }, + { + "epoch": 2.41, + "learning_rate": 1.7592252107983406e-06, + "loss": 0.1144, + "step": 64650 + }, + { + "epoch": 2.41, + "learning_rate": 1.7588170094770235e-06, + "loss": 0.1349, + "step": 64680 + }, + { + "epoch": 2.41, + "learning_rate": 1.758409092174537e-06, + "loss": 0.133, + "step": 64710 + }, + { + "epoch": 2.41, + "learning_rate": 1.7580014585616753e-06, + "loss": 0.1372, + "step": 64740 + }, + { + "epoch": 2.42, + "learning_rate": 1.7575941083097669e-06, + "loss": 0.1289, + "step": 64770 + }, + { + "epoch": 2.42, + "learning_rate": 1.757187041090673e-06, + "loss": 0.1299, + "step": 64800 + }, + { + "epoch": 2.42, + "learning_rate": 1.756780256576786e-06, + "loss": 0.135, + "step": 64830 + }, + { + "epoch": 2.42, + "learning_rate": 1.756373754441031e-06, + "loss": 0.1343, + "step": 64860 + }, + { + "epoch": 2.42, + "learning_rate": 1.7559675343568594e-06, + "loss": 0.1417, + "step": 64890 + }, + { + "epoch": 2.42, + "learning_rate": 1.7555615959982538e-06, + "loss": 0.1317, + "step": 64920 + }, + { + "epoch": 2.42, + "learning_rate": 1.7551559390397228e-06, + "loss": 0.1475, + "step": 64950 + }, + { + "epoch": 2.42, + "learning_rate": 1.7547505631563013e-06, + "loss": 0.1257, + "step": 64980 + }, + { + "epoch": 2.42, + "learning_rate": 1.75434546802355e-06, + "loss": 0.1235, + "step": 65010 + }, + { + "epoch": 2.43, + "learning_rate": 1.753954142626517e-06, + "loss": 0.1328, + "step": 65040 + }, + { + "epoch": 2.43, + "learning_rate": 1.7535495986923073e-06, + "loss": 0.1292, + "step": 65070 + }, + { + "epoch": 2.43, + "learning_rate": 1.7531453345493427e-06, + "loss": 0.1148, + "step": 65100 + }, + { + "epoch": 2.43, + "learning_rate": 1.7527548115319052e-06, + "loss": 0.1447, + "step": 65130 + }, + { + "epoch": 2.43, + "learning_rate": 1.7523510967051263e-06, + "loss": 0.1528, + "step": 65160 + }, + { + "epoch": 2.43, + "learning_rate": 1.751947660714751e-06, + "loss": 0.1356, + "step": 65190 + }, + { + "epoch": 2.43, + "learning_rate": 1.7515445032399503e-06, + "loss": 0.1249, + "step": 65220 + }, + { + "epoch": 2.43, + "learning_rate": 1.7511416239604125e-06, + "loss": 0.1283, + "step": 65250 + }, + { + "epoch": 2.43, + "learning_rate": 1.7507390225563407e-06, + "loss": 0.1367, + "step": 65280 + }, + { + "epoch": 2.44, + "learning_rate": 1.750336698708453e-06, + "loss": 0.1744, + "step": 65310 + }, + { + "epoch": 2.44, + "learning_rate": 1.7499346520979806e-06, + "loss": 0.13, + "step": 65340 + }, + { + "epoch": 2.44, + "learning_rate": 1.749532882406668e-06, + "loss": 0.1157, + "step": 65370 + }, + { + "epoch": 2.44, + "learning_rate": 1.749131389316768e-06, + "loss": 0.1433, + "step": 65400 + }, + { + "epoch": 2.44, + "learning_rate": 1.748730172511047e-06, + "loss": 0.1172, + "step": 65430 + }, + { + "epoch": 2.44, + "learning_rate": 1.7483292316727791e-06, + "loss": 0.133, + "step": 65460 + }, + { + "epoch": 2.44, + "learning_rate": 1.7479285664857462e-06, + "loss": 0.1475, + "step": 65490 + }, + { + "epoch": 2.44, + "learning_rate": 1.7475281766342374e-06, + "loss": 0.1359, + "step": 65520 + }, + { + "epoch": 2.44, + "learning_rate": 1.7471280618030487e-06, + "loss": 0.1233, + "step": 65550 + }, + { + "epoch": 2.45, + "learning_rate": 1.7467282216774797e-06, + "loss": 0.1339, + "step": 65580 + }, + { + "epoch": 2.45, + "learning_rate": 1.7463286559433349e-06, + "loss": 0.1281, + "step": 65610 + }, + { + "epoch": 2.45, + "learning_rate": 1.7459293642869206e-06, + "loss": 0.1236, + "step": 65640 + }, + { + "epoch": 2.45, + "learning_rate": 1.7455303463950469e-06, + "loss": 0.1084, + "step": 65670 + }, + { + "epoch": 2.45, + "learning_rate": 1.7451316019550222e-06, + "loss": 0.1366, + "step": 65700 + }, + { + "epoch": 2.45, + "learning_rate": 1.7447331306546573e-06, + "loss": 0.1339, + "step": 65730 + }, + { + "epoch": 2.45, + "learning_rate": 1.7443349321822589e-06, + "loss": 0.1312, + "step": 65760 + }, + { + "epoch": 2.45, + "learning_rate": 1.743937006226634e-06, + "loss": 0.148, + "step": 65790 + }, + { + "epoch": 2.45, + "learning_rate": 1.7435393524770849e-06, + "loss": 0.1354, + "step": 65820 + }, + { + "epoch": 2.46, + "learning_rate": 1.7431419706234102e-06, + "loss": 0.1295, + "step": 65850 + }, + { + "epoch": 2.46, + "learning_rate": 1.7427448603559022e-06, + "loss": 0.1206, + "step": 65880 + }, + { + "epoch": 2.46, + "learning_rate": 1.7423480213653484e-06, + "loss": 0.1504, + "step": 65910 + }, + { + "epoch": 2.46, + "learning_rate": 1.741951453343028e-06, + "loss": 0.1348, + "step": 65940 + }, + { + "epoch": 2.46, + "learning_rate": 1.7415551559807117e-06, + "loss": 0.1427, + "step": 65970 + }, + { + "epoch": 2.46, + "learning_rate": 1.7411591289706613e-06, + "loss": 0.1443, + "step": 66000 + }, + { + "epoch": 2.46, + "learning_rate": 1.740763372005628e-06, + "loss": 0.1161, + "step": 66030 + }, + { + "epoch": 2.46, + "learning_rate": 1.740367884778852e-06, + "loss": 0.1284, + "step": 66060 + }, + { + "epoch": 2.46, + "learning_rate": 1.73997266698406e-06, + "loss": 0.1192, + "step": 66090 + }, + { + "epoch": 2.47, + "learning_rate": 1.739577718315467e-06, + "loss": 0.1527, + "step": 66120 + }, + { + "epoch": 2.47, + "learning_rate": 1.7391830384677721e-06, + "loss": 0.1139, + "step": 66150 + }, + { + "epoch": 2.47, + "learning_rate": 1.7387886271361605e-06, + "loss": 0.1251, + "step": 66180 + }, + { + "epoch": 2.47, + "learning_rate": 1.738394484016299e-06, + "loss": 0.1388, + "step": 66210 + }, + { + "epoch": 2.47, + "learning_rate": 1.7380006088043402e-06, + "loss": 0.1317, + "step": 66240 + }, + { + "epoch": 2.47, + "learning_rate": 1.737607001196915e-06, + "loss": 0.1217, + "step": 66270 + }, + { + "epoch": 2.47, + "learning_rate": 1.7372136608911374e-06, + "loss": 0.1292, + "step": 66300 + }, + { + "epoch": 2.47, + "learning_rate": 1.7368205875846004e-06, + "loss": 0.1529, + "step": 66330 + }, + { + "epoch": 2.48, + "learning_rate": 1.736427780975375e-06, + "loss": 0.1139, + "step": 66360 + }, + { + "epoch": 2.48, + "learning_rate": 1.7360352407620109e-06, + "loss": 0.1318, + "step": 66390 + }, + { + "epoch": 2.48, + "learning_rate": 1.7356429666435347e-06, + "loss": 0.1351, + "step": 66420 + }, + { + "epoch": 2.48, + "learning_rate": 1.7352509583194484e-06, + "loss": 0.1298, + "step": 66450 + }, + { + "epoch": 2.48, + "learning_rate": 1.7348592154897293e-06, + "loss": 0.1234, + "step": 66480 + }, + { + "epoch": 2.48, + "learning_rate": 1.7344677378548272e-06, + "loss": 0.1144, + "step": 66510 + }, + { + "epoch": 2.48, + "learning_rate": 1.734076525115667e-06, + "loss": 0.1293, + "step": 66540 + }, + { + "epoch": 2.48, + "learning_rate": 1.7336855769736436e-06, + "loss": 0.1262, + "step": 66570 + }, + { + "epoch": 2.48, + "learning_rate": 1.7332948931306248e-06, + "loss": 0.1397, + "step": 66600 + }, + { + "epoch": 2.49, + "learning_rate": 1.7329044732889466e-06, + "loss": 0.1356, + "step": 66630 + }, + { + "epoch": 2.49, + "learning_rate": 1.7325143171514161e-06, + "loss": 0.1253, + "step": 66660 + }, + { + "epoch": 2.49, + "learning_rate": 1.7321244244213068e-06, + "loss": 0.1445, + "step": 66690 + }, + { + "epoch": 2.49, + "learning_rate": 1.7317347948023603e-06, + "loss": 0.1342, + "step": 66720 + }, + { + "epoch": 2.49, + "learning_rate": 1.731345427998784e-06, + "loss": 0.1199, + "step": 66750 + }, + { + "epoch": 2.49, + "learning_rate": 1.7309563237152517e-06, + "loss": 0.1211, + "step": 66780 + }, + { + "epoch": 2.49, + "learning_rate": 1.7305674816569004e-06, + "loss": 0.1161, + "step": 66810 + }, + { + "epoch": 2.49, + "learning_rate": 1.7301789015293308e-06, + "loss": 0.1222, + "step": 66840 + }, + { + "epoch": 2.49, + "learning_rate": 1.7297905830386069e-06, + "loss": 0.1463, + "step": 66870 + }, + { + "epoch": 2.5, + "learning_rate": 1.7294025258912534e-06, + "loss": 0.1206, + "step": 66900 + }, + { + "epoch": 2.5, + "learning_rate": 1.729014729794256e-06, + "loss": 0.1558, + "step": 66930 + }, + { + "epoch": 2.5, + "learning_rate": 1.72862719445506e-06, + "loss": 0.1534, + "step": 66960 + }, + { + "epoch": 2.5, + "learning_rate": 1.7282399195815697e-06, + "loss": 0.1256, + "step": 66990 + }, + { + "epoch": 2.5, + "learning_rate": 1.7278529048821474e-06, + "loss": 0.1624, + "step": 67020 + }, + { + "epoch": 2.5, + "learning_rate": 1.727466150065612e-06, + "loss": 0.1436, + "step": 67050 + }, + { + "epoch": 2.5, + "learning_rate": 1.727079654841238e-06, + "loss": 0.1193, + "step": 67080 + }, + { + "epoch": 2.5, + "learning_rate": 1.7266934189187565e-06, + "loss": 0.1208, + "step": 67110 + }, + { + "epoch": 2.5, + "learning_rate": 1.7263074420083515e-06, + "loss": 0.1451, + "step": 67140 + }, + { + "epoch": 2.51, + "learning_rate": 1.7259217238206608e-06, + "loss": 0.1378, + "step": 67170 + }, + { + "epoch": 2.51, + "learning_rate": 1.7255362640667738e-06, + "loss": 0.1341, + "step": 67200 + }, + { + "epoch": 2.51, + "learning_rate": 1.725151062458233e-06, + "loss": 0.1367, + "step": 67230 + }, + { + "epoch": 2.51, + "learning_rate": 1.7247661187070297e-06, + "loss": 0.1245, + "step": 67260 + }, + { + "epoch": 2.51, + "learning_rate": 1.724394251251618e-06, + "loss": 0.1341, + "step": 67290 + }, + { + "epoch": 2.51, + "learning_rate": 1.724009813781396e-06, + "loss": 0.1256, + "step": 67320 + }, + { + "epoch": 2.51, + "learning_rate": 1.7236256333167308e-06, + "loss": 0.1293, + "step": 67350 + }, + { + "epoch": 2.51, + "learning_rate": 1.7232417095713918e-06, + "loss": 0.1201, + "step": 67380 + }, + { + "epoch": 2.51, + "learning_rate": 1.7228580422595966e-06, + "loss": 0.132, + "step": 67410 + }, + { + "epoch": 2.52, + "learning_rate": 1.7224746310960061e-06, + "loss": 0.1452, + "step": 67440 + }, + { + "epoch": 2.52, + "learning_rate": 1.7220914757957266e-06, + "loss": 0.1442, + "step": 67470 + }, + { + "epoch": 2.52, + "learning_rate": 1.721708576074308e-06, + "loss": 0.1237, + "step": 67500 + }, + { + "epoch": 2.52, + "learning_rate": 1.721325931647741e-06, + "loss": 0.1099, + "step": 67530 + }, + { + "epoch": 2.52, + "learning_rate": 1.72094354223246e-06, + "loss": 0.1321, + "step": 67560 + }, + { + "epoch": 2.52, + "learning_rate": 1.7205614075453395e-06, + "loss": 0.1209, + "step": 67590 + }, + { + "epoch": 2.52, + "learning_rate": 1.720179527303693e-06, + "loss": 0.1123, + "step": 67620 + }, + { + "epoch": 2.52, + "learning_rate": 1.7197979012252745e-06, + "loss": 0.1219, + "step": 67650 + }, + { + "epoch": 2.52, + "learning_rate": 1.7194165290282747e-06, + "loss": 0.1198, + "step": 67680 + }, + { + "epoch": 2.53, + "learning_rate": 1.7190354104313222e-06, + "loss": 0.1125, + "step": 67710 + }, + { + "epoch": 2.53, + "learning_rate": 1.7186545451534831e-06, + "loss": 0.1147, + "step": 67740 + }, + { + "epoch": 2.53, + "learning_rate": 1.7182739329142563e-06, + "loss": 0.1279, + "step": 67770 + }, + { + "epoch": 2.53, + "learning_rate": 1.7178935734335775e-06, + "loss": 0.1507, + "step": 67800 + }, + { + "epoch": 2.53, + "learning_rate": 1.7175134664318157e-06, + "loss": 0.1235, + "step": 67830 + }, + { + "epoch": 2.53, + "learning_rate": 1.717133611629773e-06, + "loss": 0.111, + "step": 67860 + }, + { + "epoch": 2.53, + "learning_rate": 1.7167540087486825e-06, + "loss": 0.111, + "step": 67890 + }, + { + "epoch": 2.53, + "learning_rate": 1.71637465751021e-06, + "loss": 0.1187, + "step": 67920 + }, + { + "epoch": 2.53, + "learning_rate": 1.7159955576364506e-06, + "loss": 0.1197, + "step": 67950 + }, + { + "epoch": 2.54, + "learning_rate": 1.7156167088499294e-06, + "loss": 0.1162, + "step": 67980 + }, + { + "epoch": 2.54, + "learning_rate": 1.7152381108735994e-06, + "loss": 0.1414, + "step": 68010 + }, + { + "epoch": 2.54, + "learning_rate": 1.714859763430842e-06, + "loss": 0.1314, + "step": 68040 + }, + { + "epoch": 2.54, + "learning_rate": 1.7144816662454657e-06, + "loss": 0.1337, + "step": 68070 + }, + { + "epoch": 2.54, + "learning_rate": 1.7141038190417047e-06, + "loss": 0.1477, + "step": 68100 + }, + { + "epoch": 2.54, + "learning_rate": 1.7137262215442186e-06, + "loss": 0.1408, + "step": 68130 + }, + { + "epoch": 2.54, + "learning_rate": 1.713348873478091e-06, + "loss": 0.1232, + "step": 68160 + }, + { + "epoch": 2.54, + "learning_rate": 1.7129717745688293e-06, + "loss": 0.1486, + "step": 68190 + }, + { + "epoch": 2.54, + "learning_rate": 1.712594924542364e-06, + "loss": 0.1385, + "step": 68220 + }, + { + "epoch": 2.55, + "learning_rate": 1.7122183231250472e-06, + "loss": 0.1458, + "step": 68250 + }, + { + "epoch": 2.55, + "learning_rate": 1.7118419700436517e-06, + "loss": 0.1231, + "step": 68280 + }, + { + "epoch": 2.55, + "learning_rate": 1.711465865025371e-06, + "loss": 0.1225, + "step": 68310 + }, + { + "epoch": 2.55, + "learning_rate": 1.7110900077978183e-06, + "loss": 0.13, + "step": 68340 + }, + { + "epoch": 2.55, + "learning_rate": 1.7107143980890237e-06, + "loss": 0.1255, + "step": 68370 + }, + { + "epoch": 2.55, + "learning_rate": 1.710339035627437e-06, + "loss": 0.1142, + "step": 68400 + }, + { + "epoch": 2.55, + "learning_rate": 1.7099639201419239e-06, + "loss": 0.132, + "step": 68430 + }, + { + "epoch": 2.55, + "learning_rate": 1.7095890513617658e-06, + "loss": 0.1368, + "step": 68460 + }, + { + "epoch": 2.55, + "learning_rate": 1.709214429016661e-06, + "loss": 0.1405, + "step": 68490 + }, + { + "epoch": 2.56, + "learning_rate": 1.7088400528367202e-06, + "loss": 0.1366, + "step": 68520 + }, + { + "epoch": 2.56, + "learning_rate": 1.708465922552469e-06, + "loss": 0.1501, + "step": 68550 + }, + { + "epoch": 2.56, + "learning_rate": 1.7080920378948456e-06, + "loss": 0.1163, + "step": 68580 + }, + { + "epoch": 2.56, + "learning_rate": 1.7077183985952001e-06, + "loss": 0.1612, + "step": 68610 + }, + { + "epoch": 2.56, + "learning_rate": 1.7073450043852934e-06, + "loss": 0.1368, + "step": 68640 + }, + { + "epoch": 2.56, + "learning_rate": 1.7069718549972975e-06, + "loss": 0.1204, + "step": 68670 + }, + { + "epoch": 2.56, + "learning_rate": 1.706598950163793e-06, + "loss": 0.1105, + "step": 68700 + }, + { + "epoch": 2.56, + "learning_rate": 1.7062262896177706e-06, + "loss": 0.1209, + "step": 68730 + }, + { + "epoch": 2.56, + "learning_rate": 1.7058538730926278e-06, + "loss": 0.1472, + "step": 68760 + }, + { + "epoch": 2.57, + "learning_rate": 1.7054817003221697e-06, + "loss": 0.1081, + "step": 68790 + }, + { + "epoch": 2.57, + "learning_rate": 1.7051097710406078e-06, + "loss": 0.1353, + "step": 68820 + }, + { + "epoch": 2.57, + "learning_rate": 1.7047380849825593e-06, + "loss": 0.1516, + "step": 68850 + }, + { + "epoch": 2.57, + "learning_rate": 1.7043666418830458e-06, + "loss": 0.1594, + "step": 68880 + }, + { + "epoch": 2.57, + "learning_rate": 1.7039954414774928e-06, + "loss": 0.1348, + "step": 68910 + }, + { + "epoch": 2.57, + "learning_rate": 1.703624483501729e-06, + "loss": 0.1525, + "step": 68940 + }, + { + "epoch": 2.57, + "learning_rate": 1.703253767691987e-06, + "loss": 0.1299, + "step": 68970 + }, + { + "epoch": 2.57, + "learning_rate": 1.7028832937848983e-06, + "loss": 0.1177, + "step": 69000 + }, + { + "epoch": 2.57, + "learning_rate": 1.7025130615174974e-06, + "loss": 0.129, + "step": 69030 + }, + { + "epoch": 2.58, + "learning_rate": 1.7021430706272177e-06, + "loss": 0.1141, + "step": 69060 + }, + { + "epoch": 2.58, + "learning_rate": 1.7017733208518927e-06, + "loss": 0.1381, + "step": 69090 + }, + { + "epoch": 2.58, + "learning_rate": 1.7014038119297534e-06, + "loss": 0.1344, + "step": 69120 + }, + { + "epoch": 2.58, + "learning_rate": 1.7010345435994293e-06, + "loss": 0.1369, + "step": 69150 + }, + { + "epoch": 2.58, + "learning_rate": 1.7006655155999465e-06, + "loss": 0.1376, + "step": 69180 + }, + { + "epoch": 2.58, + "learning_rate": 1.700296727670727e-06, + "loss": 0.1413, + "step": 69210 + }, + { + "epoch": 2.58, + "learning_rate": 1.699928179551589e-06, + "loss": 0.1448, + "step": 69240 + }, + { + "epoch": 2.58, + "learning_rate": 1.6995598709827443e-06, + "loss": 0.159, + "step": 69270 + }, + { + "epoch": 2.58, + "learning_rate": 1.6991918017047987e-06, + "loss": 0.117, + "step": 69300 + }, + { + "epoch": 2.59, + "learning_rate": 1.698823971458752e-06, + "loss": 0.1308, + "step": 69330 + }, + { + "epoch": 2.59, + "learning_rate": 1.6984563799859952e-06, + "loss": 0.1395, + "step": 69360 + }, + { + "epoch": 2.59, + "learning_rate": 1.6980890270283112e-06, + "loss": 0.1099, + "step": 69390 + }, + { + "epoch": 2.59, + "learning_rate": 1.697721912327874e-06, + "loss": 0.132, + "step": 69420 + }, + { + "epoch": 2.59, + "learning_rate": 1.6973550356272473e-06, + "loss": 0.1263, + "step": 69450 + }, + { + "epoch": 2.59, + "learning_rate": 1.6969883966693837e-06, + "loss": 0.1259, + "step": 69480 + }, + { + "epoch": 2.59, + "learning_rate": 1.6966219951976253e-06, + "loss": 0.1129, + "step": 69510 + }, + { + "epoch": 2.59, + "learning_rate": 1.696255830955701e-06, + "loss": 0.1204, + "step": 69540 + }, + { + "epoch": 2.59, + "learning_rate": 1.695889903687727e-06, + "loss": 0.1258, + "step": 69570 + }, + { + "epoch": 2.6, + "learning_rate": 1.6955242131382063e-06, + "loss": 0.192, + "step": 69600 + }, + { + "epoch": 2.6, + "learning_rate": 1.6951587590520263e-06, + "loss": 0.1266, + "step": 69630 + }, + { + "epoch": 2.6, + "learning_rate": 1.6947935411744596e-06, + "loss": 0.1154, + "step": 69660 + }, + { + "epoch": 2.6, + "learning_rate": 1.694428559251163e-06, + "loss": 0.1221, + "step": 69690 + }, + { + "epoch": 2.6, + "learning_rate": 1.694063813028177e-06, + "loss": 0.1197, + "step": 69720 + }, + { + "epoch": 2.6, + "learning_rate": 1.6936993022519233e-06, + "loss": 0.1561, + "step": 69750 + }, + { + "epoch": 2.6, + "learning_rate": 1.693335026669207e-06, + "loss": 0.1296, + "step": 69780 + }, + { + "epoch": 2.6, + "learning_rate": 1.6929709860272125e-06, + "loss": 0.136, + "step": 69810 + }, + { + "epoch": 2.6, + "learning_rate": 1.6926071800735058e-06, + "loss": 0.1368, + "step": 69840 + }, + { + "epoch": 2.61, + "learning_rate": 1.6922436085560317e-06, + "loss": 0.1193, + "step": 69870 + }, + { + "epoch": 2.61, + "learning_rate": 1.6918802712231148e-06, + "loss": 0.1259, + "step": 69900 + }, + { + "epoch": 2.61, + "learning_rate": 1.6915171678234566e-06, + "loss": 0.1172, + "step": 69930 + }, + { + "epoch": 2.61, + "learning_rate": 1.6911542981061369e-06, + "loss": 0.142, + "step": 69960 + }, + { + "epoch": 2.61, + "learning_rate": 1.6907916618206113e-06, + "loss": 0.163, + "step": 69990 + }, + { + "epoch": 2.61, + "learning_rate": 1.690429258716712e-06, + "loss": 0.1358, + "step": 70020 + }, + { + "epoch": 2.61, + "learning_rate": 1.6900670885446464e-06, + "loss": 0.1268, + "step": 70050 + }, + { + "epoch": 2.61, + "learning_rate": 1.6897051510549954e-06, + "loss": 0.1575, + "step": 70080 + }, + { + "epoch": 2.61, + "learning_rate": 1.6893434459987152e-06, + "loss": 0.1099, + "step": 70110 + }, + { + "epoch": 2.62, + "learning_rate": 1.688981973127133e-06, + "loss": 0.1692, + "step": 70140 + }, + { + "epoch": 2.62, + "learning_rate": 1.6886207321919508e-06, + "loss": 0.1387, + "step": 70170 + }, + { + "epoch": 2.62, + "learning_rate": 1.6882597229452405e-06, + "loss": 0.1381, + "step": 70200 + }, + { + "epoch": 2.62, + "learning_rate": 1.6878989451394446e-06, + "loss": 0.1406, + "step": 70230 + }, + { + "epoch": 2.62, + "learning_rate": 1.6875383985273766e-06, + "loss": 0.1322, + "step": 70260 + }, + { + "epoch": 2.62, + "learning_rate": 1.6871780828622196e-06, + "loss": 0.1357, + "step": 70290 + }, + { + "epoch": 2.62, + "learning_rate": 1.6868179978975248e-06, + "loss": 0.129, + "step": 70320 + }, + { + "epoch": 2.62, + "learning_rate": 1.6864581433872124e-06, + "loss": 0.136, + "step": 70350 + }, + { + "epoch": 2.62, + "learning_rate": 1.6860985190855678e-06, + "loss": 0.1737, + "step": 70380 + }, + { + "epoch": 2.63, + "learning_rate": 1.6857391247472457e-06, + "loss": 0.1339, + "step": 70410 + }, + { + "epoch": 2.63, + "learning_rate": 1.6853799601272647e-06, + "loss": 0.1383, + "step": 70440 + }, + { + "epoch": 2.63, + "learning_rate": 1.6850210249810098e-06, + "loss": 0.1398, + "step": 70470 + }, + { + "epoch": 2.63, + "learning_rate": 1.68466231906423e-06, + "loss": 0.133, + "step": 70500 + }, + { + "epoch": 2.63, + "learning_rate": 1.6843038421330383e-06, + "loss": 0.1246, + "step": 70530 + }, + { + "epoch": 2.63, + "learning_rate": 1.68394559394391e-06, + "loss": 0.1165, + "step": 70560 + }, + { + "epoch": 2.63, + "learning_rate": 1.6835875742536848e-06, + "loss": 0.1417, + "step": 70590 + }, + { + "epoch": 2.63, + "learning_rate": 1.6832297828195618e-06, + "loss": 0.1204, + "step": 70620 + }, + { + "epoch": 2.64, + "learning_rate": 1.6828722193991028e-06, + "loss": 0.1198, + "step": 70650 + }, + { + "epoch": 2.64, + "learning_rate": 1.6825148837502292e-06, + "loss": 0.1255, + "step": 70680 + }, + { + "epoch": 2.64, + "learning_rate": 1.6821577756312225e-06, + "loss": 0.108, + "step": 70710 + }, + { + "epoch": 2.64, + "learning_rate": 1.6818008948007225e-06, + "loss": 0.1433, + "step": 70740 + }, + { + "epoch": 2.64, + "learning_rate": 1.681444241017728e-06, + "loss": 0.1285, + "step": 70770 + }, + { + "epoch": 2.64, + "learning_rate": 1.6810878140415945e-06, + "loss": 0.148, + "step": 70800 + }, + { + "epoch": 2.64, + "learning_rate": 1.680731613632036e-06, + "loss": 0.1284, + "step": 70830 + }, + { + "epoch": 2.64, + "learning_rate": 1.6803756395491212e-06, + "loss": 0.1459, + "step": 70860 + }, + { + "epoch": 2.64, + "learning_rate": 1.6800198915532748e-06, + "loss": 0.132, + "step": 70890 + }, + { + "epoch": 2.65, + "learning_rate": 1.679664369405277e-06, + "loss": 0.137, + "step": 70920 + }, + { + "epoch": 2.65, + "learning_rate": 1.6793090728662615e-06, + "loss": 0.1349, + "step": 70950 + }, + { + "epoch": 2.65, + "learning_rate": 1.6789540016977157e-06, + "loss": 0.1346, + "step": 70980 + }, + { + "epoch": 2.65, + "learning_rate": 1.67859915566148e-06, + "loss": 0.1209, + "step": 71010 + }, + { + "epoch": 2.65, + "learning_rate": 1.678244534519747e-06, + "loss": 0.1226, + "step": 71040 + }, + { + "epoch": 2.65, + "learning_rate": 1.6778901380350609e-06, + "loss": 0.1397, + "step": 71070 + }, + { + "epoch": 2.65, + "learning_rate": 1.6775359659703164e-06, + "loss": 0.1666, + "step": 71100 + }, + { + "epoch": 2.65, + "learning_rate": 1.677182018088759e-06, + "loss": 0.1219, + "step": 71130 + }, + { + "epoch": 2.65, + "learning_rate": 1.6768282941539827e-06, + "loss": 0.1102, + "step": 71160 + }, + { + "epoch": 2.66, + "learning_rate": 1.6764747939299313e-06, + "loss": 0.1129, + "step": 71190 + }, + { + "epoch": 2.66, + "learning_rate": 1.676121517180897e-06, + "loss": 0.1192, + "step": 71220 + }, + { + "epoch": 2.66, + "learning_rate": 1.6757684636715186e-06, + "loss": 0.1267, + "step": 71250 + }, + { + "epoch": 2.66, + "learning_rate": 1.675415633166782e-06, + "loss": 0.1253, + "step": 71280 + }, + { + "epoch": 2.66, + "learning_rate": 1.6750630254320201e-06, + "loss": 0.1526, + "step": 71310 + }, + { + "epoch": 2.66, + "learning_rate": 1.6747106402329109e-06, + "loss": 0.1352, + "step": 71340 + }, + { + "epoch": 2.66, + "learning_rate": 1.6743584773354766e-06, + "loss": 0.124, + "step": 71370 + }, + { + "epoch": 2.66, + "learning_rate": 1.6740065365060848e-06, + "loss": 0.1198, + "step": 71400 + }, + { + "epoch": 2.66, + "learning_rate": 1.6736548175114462e-06, + "loss": 0.119, + "step": 71430 + }, + { + "epoch": 2.67, + "learning_rate": 1.6733033201186144e-06, + "loss": 0.1203, + "step": 71460 + }, + { + "epoch": 2.67, + "learning_rate": 1.6729520440949857e-06, + "loss": 0.1236, + "step": 71490 + }, + { + "epoch": 2.67, + "learning_rate": 1.6726009892082972e-06, + "loss": 0.137, + "step": 71520 + }, + { + "epoch": 2.67, + "learning_rate": 1.672250155226628e-06, + "loss": 0.1126, + "step": 71550 + }, + { + "epoch": 2.67, + "learning_rate": 1.6718995419183973e-06, + "loss": 0.1264, + "step": 71580 + }, + { + "epoch": 2.67, + "learning_rate": 1.6715491490523637e-06, + "loss": 0.1052, + "step": 71610 + }, + { + "epoch": 2.67, + "learning_rate": 1.6711989763976253e-06, + "loss": 0.1434, + "step": 71640 + }, + { + "epoch": 2.67, + "learning_rate": 1.6708606852710505e-06, + "loss": 0.164, + "step": 71670 + }, + { + "epoch": 2.67, + "learning_rate": 1.6705109450262383e-06, + "loss": 0.1292, + "step": 71700 + }, + { + "epoch": 2.68, + "learning_rate": 1.6701614243097006e-06, + "loss": 0.1275, + "step": 71730 + }, + { + "epoch": 2.68, + "learning_rate": 1.6698121228918743e-06, + "loss": 0.1123, + "step": 71760 + }, + { + "epoch": 2.68, + "learning_rate": 1.6694630405435322e-06, + "loss": 0.1452, + "step": 71790 + }, + { + "epoch": 2.68, + "learning_rate": 1.6691141770357822e-06, + "loss": 0.1189, + "step": 71820 + }, + { + "epoch": 2.68, + "learning_rate": 1.6687655321400668e-06, + "loss": 0.1196, + "step": 71850 + }, + { + "epoch": 2.68, + "learning_rate": 1.668417105628163e-06, + "loss": 0.1111, + "step": 71880 + }, + { + "epoch": 2.68, + "learning_rate": 1.6680688972721806e-06, + "loss": 0.1195, + "step": 71910 + }, + { + "epoch": 2.68, + "learning_rate": 1.667720906844563e-06, + "loss": 0.1359, + "step": 71940 + }, + { + "epoch": 2.68, + "learning_rate": 1.6673731341180851e-06, + "loss": 0.1419, + "step": 71970 + }, + { + "epoch": 2.69, + "learning_rate": 1.6670255788658535e-06, + "loss": 0.1338, + "step": 72000 + }, + { + "epoch": 2.69, + "learning_rate": 1.6666782408613056e-06, + "loss": 0.1178, + "step": 72030 + }, + { + "epoch": 2.69, + "learning_rate": 1.6663311198782094e-06, + "loss": 0.1335, + "step": 72060 + }, + { + "epoch": 2.69, + "learning_rate": 1.6659842156906629e-06, + "loss": 0.1428, + "step": 72090 + }, + { + "epoch": 2.69, + "learning_rate": 1.665637528073092e-06, + "loss": 0.1141, + "step": 72120 + }, + { + "epoch": 2.69, + "learning_rate": 1.6652910568002523e-06, + "loss": 0.1324, + "step": 72150 + }, + { + "epoch": 2.69, + "learning_rate": 1.6649448016472265e-06, + "loss": 0.1105, + "step": 72180 + }, + { + "epoch": 2.69, + "learning_rate": 1.6645987623894243e-06, + "loss": 0.1171, + "step": 72210 + }, + { + "epoch": 2.69, + "learning_rate": 1.664252938802584e-06, + "loss": 0.1304, + "step": 72240 + }, + { + "epoch": 2.7, + "learning_rate": 1.6639188474653653e-06, + "loss": 0.1418, + "step": 72270 + }, + { + "epoch": 2.7, + "learning_rate": 1.6635734473784454e-06, + "loss": 0.1655, + "step": 72300 + }, + { + "epoch": 2.7, + "learning_rate": 1.6632282622990878e-06, + "loss": 0.1287, + "step": 72330 + }, + { + "epoch": 2.7, + "learning_rate": 1.662883292004319e-06, + "loss": 0.1168, + "step": 72360 + }, + { + "epoch": 2.7, + "learning_rate": 1.6625385362714889e-06, + "loss": 0.1226, + "step": 72390 + }, + { + "epoch": 2.7, + "learning_rate": 1.6621939948782702e-06, + "loss": 0.114, + "step": 72420 + }, + { + "epoch": 2.7, + "learning_rate": 1.6618496676026586e-06, + "loss": 0.1378, + "step": 72450 + }, + { + "epoch": 2.7, + "learning_rate": 1.661505554222971e-06, + "loss": 0.1486, + "step": 72480 + }, + { + "epoch": 2.7, + "learning_rate": 1.6611616545178456e-06, + "loss": 0.166, + "step": 72510 + }, + { + "epoch": 2.71, + "learning_rate": 1.6608179682662409e-06, + "loss": 0.127, + "step": 72540 + }, + { + "epoch": 2.71, + "learning_rate": 1.6604744952474365e-06, + "loss": 0.132, + "step": 72570 + }, + { + "epoch": 2.71, + "learning_rate": 1.6601312352410301e-06, + "loss": 0.1409, + "step": 72600 + }, + { + "epoch": 2.71, + "learning_rate": 1.6597881880269391e-06, + "loss": 0.1251, + "step": 72630 + }, + { + "epoch": 2.71, + "learning_rate": 1.6594453533853988e-06, + "loss": 0.1233, + "step": 72660 + }, + { + "epoch": 2.71, + "learning_rate": 1.6591027310969617e-06, + "loss": 0.1673, + "step": 72690 + }, + { + "epoch": 2.71, + "learning_rate": 1.6587603209424982e-06, + "loss": 0.1573, + "step": 72720 + }, + { + "epoch": 2.71, + "learning_rate": 1.6584181227031944e-06, + "loss": 0.1496, + "step": 72750 + }, + { + "epoch": 2.71, + "learning_rate": 1.6580761361605527e-06, + "loss": 0.1375, + "step": 72780 + }, + { + "epoch": 2.72, + "learning_rate": 1.657734361096391e-06, + "loss": 0.1405, + "step": 72810 + }, + { + "epoch": 2.72, + "learning_rate": 1.65739279729284e-06, + "loss": 0.1278, + "step": 72840 + }, + { + "epoch": 2.72, + "learning_rate": 1.6570514445323473e-06, + "loss": 0.1159, + "step": 72870 + }, + { + "epoch": 2.72, + "learning_rate": 1.6567103025976724e-06, + "loss": 0.1367, + "step": 72900 + }, + { + "epoch": 2.72, + "learning_rate": 1.6563693712718881e-06, + "loss": 0.1064, + "step": 72930 + }, + { + "epoch": 2.72, + "learning_rate": 1.6560286503383794e-06, + "loss": 0.1339, + "step": 72960 + }, + { + "epoch": 2.72, + "learning_rate": 1.6556881395808428e-06, + "loss": 0.1459, + "step": 72990 + }, + { + "epoch": 2.72, + "learning_rate": 1.6553478387832867e-06, + "loss": 0.1296, + "step": 73020 + }, + { + "epoch": 2.72, + "learning_rate": 1.6550077477300302e-06, + "loss": 0.1061, + "step": 73050 + }, + { + "epoch": 2.73, + "learning_rate": 1.6546678662057017e-06, + "loss": 0.1268, + "step": 73080 + }, + { + "epoch": 2.73, + "learning_rate": 1.654328193995239e-06, + "loss": 0.1635, + "step": 73110 + }, + { + "epoch": 2.73, + "learning_rate": 1.65398873088389e-06, + "loss": 0.1153, + "step": 73140 + }, + { + "epoch": 2.73, + "learning_rate": 1.65364947665721e-06, + "loss": 0.1259, + "step": 73170 + }, + { + "epoch": 2.73, + "learning_rate": 1.653310431101062e-06, + "loss": 0.1366, + "step": 73200 + }, + { + "epoch": 2.73, + "learning_rate": 1.652971594001617e-06, + "loss": 0.1298, + "step": 73230 + }, + { + "epoch": 2.73, + "learning_rate": 1.6526329651453512e-06, + "loss": 0.125, + "step": 73260 + }, + { + "epoch": 2.73, + "learning_rate": 1.6522945443190482e-06, + "loss": 0.1218, + "step": 73290 + }, + { + "epoch": 2.73, + "learning_rate": 1.6519563313097975e-06, + "loss": 0.1287, + "step": 73320 + }, + { + "epoch": 2.74, + "learning_rate": 1.6516183259049915e-06, + "loss": 0.1331, + "step": 73350 + }, + { + "epoch": 2.74, + "learning_rate": 1.6512805278923288e-06, + "loss": 0.1167, + "step": 73380 + }, + { + "epoch": 2.74, + "learning_rate": 1.6509429370598115e-06, + "loss": 0.1302, + "step": 73410 + }, + { + "epoch": 2.74, + "learning_rate": 1.6506055531957439e-06, + "loss": 0.1303, + "step": 73440 + }, + { + "epoch": 2.74, + "learning_rate": 1.6502683760887347e-06, + "loss": 0.1249, + "step": 73470 + }, + { + "epoch": 2.74, + "learning_rate": 1.6499314055276932e-06, + "loss": 0.1246, + "step": 73500 + }, + { + "epoch": 2.74, + "learning_rate": 1.6495946413018313e-06, + "loss": 0.1197, + "step": 73530 + }, + { + "epoch": 2.74, + "learning_rate": 1.6492580832006616e-06, + "loss": 0.1325, + "step": 73560 + }, + { + "epoch": 2.74, + "learning_rate": 1.6489217310139972e-06, + "loss": 0.1582, + "step": 73590 + }, + { + "epoch": 2.75, + "learning_rate": 1.6485855845319507e-06, + "loss": 0.1284, + "step": 73620 + }, + { + "epoch": 2.75, + "learning_rate": 1.6482496435449352e-06, + "loss": 0.1203, + "step": 73650 + }, + { + "epoch": 2.75, + "learning_rate": 1.6479139078436612e-06, + "loss": 0.1214, + "step": 73680 + }, + { + "epoch": 2.75, + "learning_rate": 1.6475783772191392e-06, + "loss": 0.116, + "step": 73710 + }, + { + "epoch": 2.75, + "learning_rate": 1.6472430514626755e-06, + "loss": 0.1251, + "step": 73740 + }, + { + "epoch": 2.75, + "learning_rate": 1.6469079303658753e-06, + "loss": 0.141, + "step": 73770 + }, + { + "epoch": 2.75, + "learning_rate": 1.6465730137206392e-06, + "loss": 0.1403, + "step": 73800 + }, + { + "epoch": 2.75, + "learning_rate": 1.6462383013191651e-06, + "loss": 0.145, + "step": 73830 + }, + { + "epoch": 2.75, + "learning_rate": 1.645903792953945e-06, + "loss": 0.1302, + "step": 73860 + }, + { + "epoch": 2.76, + "learning_rate": 1.645569488417768e-06, + "loss": 0.1324, + "step": 73890 + }, + { + "epoch": 2.76, + "learning_rate": 1.6452353875037153e-06, + "loss": 0.1237, + "step": 73920 + }, + { + "epoch": 2.76, + "learning_rate": 1.6449014900051635e-06, + "loss": 0.1183, + "step": 73950 + }, + { + "epoch": 2.76, + "learning_rate": 1.644567795715783e-06, + "loss": 0.1438, + "step": 73980 + }, + { + "epoch": 2.76, + "learning_rate": 1.6442343044295355e-06, + "loss": 0.0983, + "step": 74010 + }, + { + "epoch": 2.76, + "learning_rate": 1.6439010159406765e-06, + "loss": 0.125, + "step": 74040 + }, + { + "epoch": 2.76, + "learning_rate": 1.6435679300437526e-06, + "loss": 0.1353, + "step": 74070 + }, + { + "epoch": 2.76, + "learning_rate": 1.6432350465336022e-06, + "loss": 0.1112, + "step": 74100 + }, + { + "epoch": 2.76, + "learning_rate": 1.6429023652053536e-06, + "loss": 0.1583, + "step": 74130 + }, + { + "epoch": 2.77, + "learning_rate": 1.642569885854426e-06, + "loss": 0.1328, + "step": 74160 + }, + { + "epoch": 2.77, + "learning_rate": 1.6422376082765283e-06, + "loss": 0.129, + "step": 74190 + }, + { + "epoch": 2.77, + "learning_rate": 1.641905532267658e-06, + "loss": 0.136, + "step": 74220 + }, + { + "epoch": 2.77, + "learning_rate": 1.6415736576241027e-06, + "loss": 0.1275, + "step": 74250 + }, + { + "epoch": 2.77, + "learning_rate": 1.6412419841424357e-06, + "loss": 0.1138, + "step": 74280 + }, + { + "epoch": 2.77, + "learning_rate": 1.6409105116195198e-06, + "loss": 0.1318, + "step": 74310 + }, + { + "epoch": 2.77, + "learning_rate": 1.6405792398525046e-06, + "loss": 0.134, + "step": 74340 + }, + { + "epoch": 2.77, + "learning_rate": 1.6402481686388257e-06, + "loss": 0.1285, + "step": 74370 + }, + { + "epoch": 2.77, + "learning_rate": 1.6399172977762055e-06, + "loss": 0.1327, + "step": 74400 + }, + { + "epoch": 2.78, + "learning_rate": 1.6395866270626506e-06, + "loss": 0.1371, + "step": 74430 + }, + { + "epoch": 2.78, + "learning_rate": 1.6392561562964538e-06, + "loss": 0.1307, + "step": 74460 + }, + { + "epoch": 2.78, + "learning_rate": 1.638925885276192e-06, + "loss": 0.1189, + "step": 74490 + }, + { + "epoch": 2.78, + "learning_rate": 1.6385958138007257e-06, + "loss": 0.1392, + "step": 74520 + }, + { + "epoch": 2.78, + "learning_rate": 1.6382659416692e-06, + "loss": 0.1304, + "step": 74550 + }, + { + "epoch": 2.78, + "learning_rate": 1.6379362686810413e-06, + "loss": 0.1366, + "step": 74580 + }, + { + "epoch": 2.78, + "learning_rate": 1.6376067946359594e-06, + "loss": 0.122, + "step": 74610 + }, + { + "epoch": 2.78, + "learning_rate": 1.637277519333946e-06, + "loss": 0.117, + "step": 74640 + }, + { + "epoch": 2.78, + "learning_rate": 1.6369484425752744e-06, + "loss": 0.1312, + "step": 74670 + }, + { + "epoch": 2.79, + "learning_rate": 1.6366195641604977e-06, + "loss": 0.1446, + "step": 74700 + }, + { + "epoch": 2.79, + "learning_rate": 1.6362908838904503e-06, + "loss": 0.1355, + "step": 74730 + }, + { + "epoch": 2.79, + "learning_rate": 1.6359624015662468e-06, + "loss": 0.112, + "step": 74760 + }, + { + "epoch": 2.79, + "learning_rate": 1.63563411698928e-06, + "loss": 0.1381, + "step": 74790 + }, + { + "epoch": 2.79, + "learning_rate": 1.6353060299612225e-06, + "loss": 0.1352, + "step": 74820 + }, + { + "epoch": 2.79, + "learning_rate": 1.6349781402840249e-06, + "loss": 0.1279, + "step": 74850 + }, + { + "epoch": 2.79, + "learning_rate": 1.634650447759916e-06, + "loss": 0.1435, + "step": 74880 + }, + { + "epoch": 2.79, + "learning_rate": 1.6343229521914015e-06, + "loss": 0.1055, + "step": 74910 + }, + { + "epoch": 2.8, + "learning_rate": 1.6339956533812637e-06, + "loss": 0.1178, + "step": 74940 + }, + { + "epoch": 2.8, + "learning_rate": 1.6336685511325629e-06, + "loss": 0.1602, + "step": 74970 + }, + { + "epoch": 2.8, + "learning_rate": 1.633341645248633e-06, + "loss": 0.1273, + "step": 75000 + }, + { + "epoch": 2.8, + "learning_rate": 1.6330149355330848e-06, + "loss": 0.1194, + "step": 75030 + }, + { + "epoch": 2.8, + "learning_rate": 1.6326884217898035e-06, + "loss": 0.1113, + "step": 75060 + }, + { + "epoch": 2.8, + "learning_rate": 1.6323621038229485e-06, + "loss": 0.1342, + "step": 75090 + }, + { + "epoch": 2.8, + "learning_rate": 1.6320359814369536e-06, + "loss": 0.1232, + "step": 75120 + }, + { + "epoch": 2.8, + "learning_rate": 1.6317100544365252e-06, + "loss": 0.1481, + "step": 75150 + }, + { + "epoch": 2.8, + "learning_rate": 1.6313843226266435e-06, + "loss": 0.1225, + "step": 75180 + }, + { + "epoch": 2.81, + "learning_rate": 1.6310587858125606e-06, + "loss": 0.1109, + "step": 75210 + }, + { + "epoch": 2.81, + "learning_rate": 1.6307334437998005e-06, + "loss": 0.128, + "step": 75240 + }, + { + "epoch": 2.81, + "learning_rate": 1.6304082963941586e-06, + "loss": 0.1299, + "step": 75270 + }, + { + "epoch": 2.81, + "learning_rate": 1.6300833434017019e-06, + "loss": 0.1102, + "step": 75300 + }, + { + "epoch": 2.81, + "learning_rate": 1.6297585846287667e-06, + "loss": 0.1546, + "step": 75330 + }, + { + "epoch": 2.81, + "learning_rate": 1.62943401988196e-06, + "loss": 0.1139, + "step": 75360 + }, + { + "epoch": 2.81, + "learning_rate": 1.6291096489681585e-06, + "loss": 0.1585, + "step": 75390 + }, + { + "epoch": 2.81, + "learning_rate": 1.6287854716945076e-06, + "loss": 0.1309, + "step": 75420 + }, + { + "epoch": 2.81, + "learning_rate": 1.6284614878684206e-06, + "loss": 0.1177, + "step": 75450 + }, + { + "epoch": 2.82, + "learning_rate": 1.6281376972975807e-06, + "loss": 0.1354, + "step": 75480 + }, + { + "epoch": 2.82, + "learning_rate": 1.627814099789936e-06, + "loss": 0.1256, + "step": 75510 + }, + { + "epoch": 2.82, + "learning_rate": 1.6274906951537042e-06, + "loss": 0.1198, + "step": 75540 + }, + { + "epoch": 2.82, + "learning_rate": 1.6271674831973684e-06, + "loss": 0.1113, + "step": 75570 + }, + { + "epoch": 2.82, + "learning_rate": 1.6268444637296777e-06, + "loss": 0.1301, + "step": 75600 + }, + { + "epoch": 2.82, + "learning_rate": 1.626521636559647e-06, + "loss": 0.13, + "step": 75630 + }, + { + "epoch": 2.82, + "learning_rate": 1.6261990014965568e-06, + "loss": 0.1195, + "step": 75660 + }, + { + "epoch": 2.82, + "learning_rate": 1.625876558349952e-06, + "loss": 0.1487, + "step": 75690 + }, + { + "epoch": 2.82, + "learning_rate": 1.6255543069296417e-06, + "loss": 0.1401, + "step": 75720 + }, + { + "epoch": 2.83, + "learning_rate": 1.6252322470456988e-06, + "loss": 0.1356, + "step": 75750 + }, + { + "epoch": 2.83, + "learning_rate": 1.62491037850846e-06, + "loss": 0.1466, + "step": 75780 + }, + { + "epoch": 2.83, + "learning_rate": 1.6245887011285238e-06, + "loss": 0.14, + "step": 75810 + }, + { + "epoch": 2.83, + "learning_rate": 1.6242672147167518e-06, + "loss": 0.1438, + "step": 75840 + }, + { + "epoch": 2.83, + "learning_rate": 1.6239459190842678e-06, + "loss": 0.1164, + "step": 75870 + }, + { + "epoch": 2.83, + "learning_rate": 1.6236248140424554e-06, + "loss": 0.1274, + "step": 75900 + }, + { + "epoch": 2.83, + "learning_rate": 1.6233038994029616e-06, + "loss": 0.1271, + "step": 75930 + }, + { + "epoch": 2.83, + "learning_rate": 1.6229831749776917e-06, + "loss": 0.1332, + "step": 75960 + }, + { + "epoch": 2.83, + "learning_rate": 1.6226626405788123e-06, + "loss": 0.1383, + "step": 75990 + }, + { + "epoch": 2.84, + "learning_rate": 1.6223422960187493e-06, + "loss": 0.1406, + "step": 76020 + }, + { + "epoch": 2.84, + "learning_rate": 1.6220221411101878e-06, + "loss": 0.1296, + "step": 76050 + }, + { + "epoch": 2.84, + "learning_rate": 1.621702175666071e-06, + "loss": 0.1257, + "step": 76080 + }, + { + "epoch": 2.84, + "learning_rate": 1.6213823994996006e-06, + "loss": 0.1331, + "step": 76110 + }, + { + "epoch": 2.84, + "learning_rate": 1.6210628124242367e-06, + "loss": 0.1133, + "step": 76140 + }, + { + "epoch": 2.84, + "learning_rate": 1.6207434142536954e-06, + "loss": 0.1475, + "step": 76170 + }, + { + "epoch": 2.84, + "learning_rate": 1.6204242048019508e-06, + "loss": 0.1178, + "step": 76200 + }, + { + "epoch": 2.84, + "learning_rate": 1.620105183883233e-06, + "loss": 0.146, + "step": 76230 + }, + { + "epoch": 2.84, + "learning_rate": 1.6197863513120277e-06, + "loss": 0.1307, + "step": 76260 + }, + { + "epoch": 2.85, + "learning_rate": 1.6194677069030761e-06, + "loss": 0.1189, + "step": 76290 + }, + { + "epoch": 2.85, + "learning_rate": 1.6191492504713747e-06, + "loss": 0.1067, + "step": 76320 + }, + { + "epoch": 2.85, + "learning_rate": 1.6188415877632176e-06, + "loss": 0.1414, + "step": 76350 + }, + { + "epoch": 2.85, + "learning_rate": 1.6185235004813931e-06, + "loss": 0.1605, + "step": 76380 + }, + { + "epoch": 2.85, + "learning_rate": 1.6182056006294704e-06, + "loss": 0.1265, + "step": 76410 + }, + { + "epoch": 2.85, + "learning_rate": 1.6178878880234536e-06, + "loss": 0.1211, + "step": 76440 + }, + { + "epoch": 2.85, + "learning_rate": 1.6175703624795988e-06, + "loss": 0.1318, + "step": 76470 + }, + { + "epoch": 2.85, + "learning_rate": 1.617253023814414e-06, + "loss": 0.1321, + "step": 76500 + }, + { + "epoch": 2.85, + "learning_rate": 1.6169358718446608e-06, + "loss": 0.1517, + "step": 76530 + }, + { + "epoch": 2.86, + "learning_rate": 1.6166189063873493e-06, + "loss": 0.1042, + "step": 76560 + }, + { + "epoch": 2.86, + "learning_rate": 1.6163021272597446e-06, + "loss": 0.1061, + "step": 76590 + }, + { + "epoch": 2.86, + "learning_rate": 1.6159855342793588e-06, + "loss": 0.1301, + "step": 76620 + }, + { + "epoch": 2.86, + "learning_rate": 1.6156691272639551e-06, + "loss": 0.112, + "step": 76650 + }, + { + "epoch": 2.86, + "learning_rate": 1.6153529060315482e-06, + "loss": 0.1173, + "step": 76680 + }, + { + "epoch": 2.86, + "learning_rate": 1.6150368704003994e-06, + "loss": 0.1286, + "step": 76710 + }, + { + "epoch": 2.86, + "learning_rate": 1.6147210201890205e-06, + "loss": 0.1261, + "step": 76740 + }, + { + "epoch": 2.86, + "learning_rate": 1.6144053552161706e-06, + "loss": 0.1122, + "step": 76770 + }, + { + "epoch": 2.86, + "learning_rate": 1.614089875300858e-06, + "loss": 0.1155, + "step": 76800 + }, + { + "epoch": 2.87, + "learning_rate": 1.613774580262337e-06, + "loss": 0.1299, + "step": 76830 + }, + { + "epoch": 2.87, + "learning_rate": 1.61345946992011e-06, + "loss": 0.1319, + "step": 76860 + }, + { + "epoch": 2.87, + "learning_rate": 1.613144544093926e-06, + "loss": 0.141, + "step": 76890 + }, + { + "epoch": 2.87, + "learning_rate": 1.6128298026037798e-06, + "loss": 0.1458, + "step": 76920 + }, + { + "epoch": 2.87, + "learning_rate": 1.6125152452699114e-06, + "loss": 0.1293, + "step": 76950 + }, + { + "epoch": 2.87, + "learning_rate": 1.6122008719128075e-06, + "loss": 0.1072, + "step": 76980 + }, + { + "epoch": 2.87, + "learning_rate": 1.611886682353198e-06, + "loss": 0.1663, + "step": 77010 + }, + { + "epoch": 2.87, + "learning_rate": 1.6115726764120597e-06, + "loss": 0.1185, + "step": 77040 + }, + { + "epoch": 2.87, + "learning_rate": 1.6112588539106105e-06, + "loss": 0.1152, + "step": 77070 + }, + { + "epoch": 2.88, + "learning_rate": 1.6109452146703141e-06, + "loss": 0.1354, + "step": 77100 + }, + { + "epoch": 2.88, + "learning_rate": 1.6106317585128764e-06, + "loss": 0.1285, + "step": 77130 + }, + { + "epoch": 2.88, + "learning_rate": 1.6103184852602463e-06, + "loss": 0.1265, + "step": 77160 + }, + { + "epoch": 2.88, + "learning_rate": 1.6100053947346149e-06, + "loss": 0.1186, + "step": 77190 + }, + { + "epoch": 2.88, + "learning_rate": 1.6096924867584152e-06, + "loss": 0.1134, + "step": 77220 + }, + { + "epoch": 2.88, + "learning_rate": 1.6093797611543222e-06, + "loss": 0.1244, + "step": 77250 + }, + { + "epoch": 2.88, + "learning_rate": 1.6090672177452509e-06, + "loss": 0.1286, + "step": 77280 + }, + { + "epoch": 2.88, + "learning_rate": 1.6087548563543582e-06, + "loss": 0.1144, + "step": 77310 + }, + { + "epoch": 2.88, + "learning_rate": 1.6084426768050402e-06, + "loss": 0.1276, + "step": 77340 + }, + { + "epoch": 2.89, + "learning_rate": 1.608130678920933e-06, + "loss": 0.1158, + "step": 77370 + }, + { + "epoch": 2.89, + "learning_rate": 1.6078188625259127e-06, + "loss": 0.148, + "step": 77400 + }, + { + "epoch": 2.89, + "learning_rate": 1.6075072274440942e-06, + "loss": 0.1105, + "step": 77430 + }, + { + "epoch": 2.89, + "learning_rate": 1.6071957734998298e-06, + "loss": 0.1044, + "step": 77460 + }, + { + "epoch": 2.89, + "learning_rate": 1.6068845005177113e-06, + "loss": 0.1486, + "step": 77490 + }, + { + "epoch": 2.89, + "learning_rate": 1.6065734083225676e-06, + "loss": 0.1365, + "step": 77520 + }, + { + "epoch": 2.89, + "learning_rate": 1.606262496739465e-06, + "loss": 0.1277, + "step": 77550 + }, + { + "epoch": 2.89, + "learning_rate": 1.6059517655937066e-06, + "loss": 0.1579, + "step": 77580 + }, + { + "epoch": 2.89, + "learning_rate": 1.605641214710832e-06, + "loss": 0.1188, + "step": 77610 + }, + { + "epoch": 2.9, + "learning_rate": 1.6053308439166174e-06, + "loss": 0.117, + "step": 77640 + }, + { + "epoch": 2.9, + "learning_rate": 1.6050206530370737e-06, + "loss": 0.1237, + "step": 77670 + }, + { + "epoch": 2.9, + "learning_rate": 1.6047106418984479e-06, + "loss": 0.1474, + "step": 77700 + }, + { + "epoch": 2.9, + "learning_rate": 1.604400810327221e-06, + "loss": 0.1049, + "step": 77730 + }, + { + "epoch": 2.9, + "learning_rate": 1.604091158150109e-06, + "loss": 0.1182, + "step": 77760 + }, + { + "epoch": 2.9, + "learning_rate": 1.6037816851940622e-06, + "loss": 0.1306, + "step": 77790 + }, + { + "epoch": 2.9, + "learning_rate": 1.6034723912862638e-06, + "loss": 0.1245, + "step": 77820 + }, + { + "epoch": 2.9, + "learning_rate": 1.6031632762541306e-06, + "loss": 0.1207, + "step": 77850 + }, + { + "epoch": 2.9, + "learning_rate": 1.602854339925312e-06, + "loss": 0.1281, + "step": 77880 + }, + { + "epoch": 2.91, + "learning_rate": 1.6025455821276895e-06, + "loss": 0.1307, + "step": 77910 + }, + { + "epoch": 2.91, + "learning_rate": 1.602237002689378e-06, + "loss": 0.1328, + "step": 77940 + }, + { + "epoch": 2.91, + "learning_rate": 1.6019286014387222e-06, + "loss": 0.1234, + "step": 77970 + }, + { + "epoch": 2.91, + "learning_rate": 1.6016203782042993e-06, + "loss": 0.1513, + "step": 78000 + }, + { + "epoch": 2.91, + "learning_rate": 1.6013123328149165e-06, + "loss": 0.1261, + "step": 78030 + }, + { + "epoch": 2.91, + "learning_rate": 1.6010044650996121e-06, + "loss": 0.1743, + "step": 78060 + }, + { + "epoch": 2.91, + "learning_rate": 1.6006967748876534e-06, + "loss": 0.1145, + "step": 78090 + }, + { + "epoch": 2.91, + "learning_rate": 1.6003892620085383e-06, + "loss": 0.1329, + "step": 78120 + }, + { + "epoch": 2.91, + "learning_rate": 1.600081926291993e-06, + "loss": 0.1111, + "step": 78150 + }, + { + "epoch": 2.92, + "learning_rate": 1.5997747675679737e-06, + "loss": 0.1281, + "step": 78180 + }, + { + "epoch": 2.92, + "learning_rate": 1.5994677856666639e-06, + "loss": 0.1112, + "step": 78210 + }, + { + "epoch": 2.92, + "learning_rate": 1.5991609804184753e-06, + "loss": 0.1142, + "step": 78240 + }, + { + "epoch": 2.92, + "learning_rate": 1.598854351654048e-06, + "loss": 0.149, + "step": 78270 + }, + { + "epoch": 2.92, + "learning_rate": 1.5985478992042483e-06, + "loss": 0.1226, + "step": 78300 + }, + { + "epoch": 2.92, + "learning_rate": 1.5982416229001699e-06, + "loss": 0.1297, + "step": 78330 + }, + { + "epoch": 2.92, + "learning_rate": 1.597935522573133e-06, + "loss": 0.1264, + "step": 78360 + }, + { + "epoch": 2.92, + "learning_rate": 1.5976295980546836e-06, + "loss": 0.144, + "step": 78390 + }, + { + "epoch": 2.92, + "learning_rate": 1.5973238491765936e-06, + "loss": 0.1262, + "step": 78420 + }, + { + "epoch": 2.93, + "learning_rate": 1.5970182757708593e-06, + "loss": 0.1319, + "step": 78450 + }, + { + "epoch": 2.93, + "learning_rate": 1.5967128776697042e-06, + "loss": 0.1319, + "step": 78480 + }, + { + "epoch": 2.93, + "learning_rate": 1.5964076547055735e-06, + "loss": 0.1383, + "step": 78510 + }, + { + "epoch": 2.93, + "learning_rate": 1.5961026067111377e-06, + "loss": 0.1365, + "step": 78540 + }, + { + "epoch": 2.93, + "learning_rate": 1.5957977335192917e-06, + "loss": 0.1186, + "step": 78570 + }, + { + "epoch": 2.93, + "learning_rate": 1.595493034963153e-06, + "loss": 0.1287, + "step": 78600 + }, + { + "epoch": 2.93, + "learning_rate": 1.5951885108760623e-06, + "loss": 0.1259, + "step": 78630 + }, + { + "epoch": 2.93, + "learning_rate": 1.5948841610915825e-06, + "loss": 0.1007, + "step": 78660 + }, + { + "epoch": 2.93, + "learning_rate": 1.5945799854434987e-06, + "loss": 0.1322, + "step": 78690 + }, + { + "epoch": 2.94, + "learning_rate": 1.594275983765819e-06, + "loss": 0.1383, + "step": 78720 + }, + { + "epoch": 2.94, + "learning_rate": 1.5939721558927715e-06, + "loss": 0.1543, + "step": 78750 + }, + { + "epoch": 2.94, + "learning_rate": 1.5936685016588058e-06, + "loss": 0.1241, + "step": 78780 + }, + { + "epoch": 2.94, + "learning_rate": 1.5933650208985923e-06, + "loss": 0.1312, + "step": 78810 + }, + { + "epoch": 2.94, + "learning_rate": 1.5930617134470222e-06, + "loss": 0.1462, + "step": 78840 + }, + { + "epoch": 2.94, + "learning_rate": 1.5927686808283344e-06, + "loss": 0.1193, + "step": 78870 + }, + { + "epoch": 2.94, + "learning_rate": 1.5924657137362828e-06, + "loss": 0.1061, + "step": 78900 + }, + { + "epoch": 2.94, + "learning_rate": 1.5921629194643422e-06, + "loss": 0.126, + "step": 78930 + }, + { + "epoch": 2.94, + "learning_rate": 1.591860297848274e-06, + "loss": 0.1174, + "step": 78960 + }, + { + "epoch": 2.95, + "learning_rate": 1.5915578487240574e-06, + "loss": 0.1343, + "step": 78990 + }, + { + "epoch": 2.95, + "learning_rate": 1.591255571927889e-06, + "loss": 0.1251, + "step": 79020 + }, + { + "epoch": 2.95, + "learning_rate": 1.5909534672961845e-06, + "loss": 0.104, + "step": 79050 + }, + { + "epoch": 2.95, + "learning_rate": 1.590651534665576e-06, + "loss": 0.1217, + "step": 79080 + }, + { + "epoch": 2.95, + "learning_rate": 1.5903497738729133e-06, + "loss": 0.1201, + "step": 79110 + }, + { + "epoch": 2.95, + "learning_rate": 1.5900481847552615e-06, + "loss": 0.1156, + "step": 79140 + }, + { + "epoch": 2.95, + "learning_rate": 1.5897467671499027e-06, + "loss": 0.1092, + "step": 79170 + }, + { + "epoch": 2.95, + "learning_rate": 1.5894455208943354e-06, + "loss": 0.1086, + "step": 79200 + }, + { + "epoch": 2.96, + "learning_rate": 1.5891444458262723e-06, + "loss": 0.1487, + "step": 79230 + }, + { + "epoch": 2.96, + "learning_rate": 1.5888435417836425e-06, + "loss": 0.1268, + "step": 79260 + }, + { + "epoch": 2.96, + "learning_rate": 1.588542808604589e-06, + "loss": 0.1304, + "step": 79290 + }, + { + "epoch": 2.96, + "learning_rate": 1.5882422461274692e-06, + "loss": 0.12, + "step": 79320 + }, + { + "epoch": 2.96, + "learning_rate": 1.5879418541908546e-06, + "loss": 0.1192, + "step": 79350 + }, + { + "epoch": 2.96, + "learning_rate": 1.587641632633531e-06, + "loss": 0.1381, + "step": 79380 + }, + { + "epoch": 2.96, + "learning_rate": 1.587341581294496e-06, + "loss": 0.1398, + "step": 79410 + }, + { + "epoch": 2.96, + "learning_rate": 1.5870417000129618e-06, + "loss": 0.1147, + "step": 79440 + }, + { + "epoch": 2.96, + "learning_rate": 1.5867419886283522e-06, + "loss": 0.1287, + "step": 79470 + }, + { + "epoch": 2.97, + "learning_rate": 1.5864424469803034e-06, + "loss": 0.1311, + "step": 79500 + }, + { + "epoch": 2.97, + "learning_rate": 1.5861430749086632e-06, + "loss": 0.1002, + "step": 79530 + }, + { + "epoch": 2.97, + "learning_rate": 1.5858438722534912e-06, + "loss": 0.121, + "step": 79560 + }, + { + "epoch": 2.97, + "learning_rate": 1.5855448388550579e-06, + "loss": 0.1285, + "step": 79590 + }, + { + "epoch": 2.97, + "learning_rate": 1.5852459745538446e-06, + "loss": 0.124, + "step": 79620 + }, + { + "epoch": 2.97, + "learning_rate": 1.5849472791905432e-06, + "loss": 0.1084, + "step": 79650 + }, + { + "epoch": 2.97, + "learning_rate": 1.584648752606055e-06, + "loss": 0.1275, + "step": 79680 + }, + { + "epoch": 2.97, + "learning_rate": 1.5843503946414926e-06, + "loss": 0.1033, + "step": 79710 + }, + { + "epoch": 2.97, + "learning_rate": 1.5840522051381754e-06, + "loss": 0.1243, + "step": 79740 + }, + { + "epoch": 2.98, + "learning_rate": 1.5837541839376345e-06, + "loss": 0.1235, + "step": 79770 + }, + { + "epoch": 2.98, + "learning_rate": 1.583456330881607e-06, + "loss": 0.1354, + "step": 79800 + }, + { + "epoch": 2.98, + "learning_rate": 1.5831586458120407e-06, + "loss": 0.133, + "step": 79830 + }, + { + "epoch": 2.98, + "learning_rate": 1.5828611285710897e-06, + "loss": 0.1308, + "step": 79860 + }, + { + "epoch": 2.98, + "learning_rate": 1.5825637790011156e-06, + "loss": 0.1371, + "step": 79890 + }, + { + "epoch": 2.98, + "learning_rate": 1.5822665969446885e-06, + "loss": 0.1257, + "step": 79920 + }, + { + "epoch": 2.98, + "learning_rate": 1.5819695822445844e-06, + "loss": 0.1381, + "step": 79950 + }, + { + "epoch": 2.98, + "learning_rate": 1.581672734743786e-06, + "loss": 0.1247, + "step": 79980 + }, + { + "epoch": 2.98, + "learning_rate": 1.5813760542854823e-06, + "loss": 0.1273, + "step": 80010 + }, + { + "epoch": 2.99, + "learning_rate": 1.581079540713068e-06, + "loss": 0.1241, + "step": 80040 + }, + { + "epoch": 2.99, + "learning_rate": 1.5807831938701429e-06, + "loss": 0.1138, + "step": 80070 + }, + { + "epoch": 2.99, + "learning_rate": 1.5804870136005133e-06, + "loss": 0.1325, + "step": 80100 + }, + { + "epoch": 2.99, + "learning_rate": 1.5801909997481882e-06, + "loss": 0.1183, + "step": 80130 + }, + { + "epoch": 2.99, + "learning_rate": 1.5798951521573824e-06, + "loss": 0.1255, + "step": 80160 + }, + { + "epoch": 2.99, + "learning_rate": 1.5795994706725152e-06, + "loss": 0.1126, + "step": 80190 + }, + { + "epoch": 2.99, + "learning_rate": 1.5793039551382084e-06, + "loss": 0.1329, + "step": 80220 + }, + { + "epoch": 2.99, + "learning_rate": 1.5790086053992875e-06, + "loss": 0.1352, + "step": 80250 + }, + { + "epoch": 2.99, + "learning_rate": 1.5787134213007817e-06, + "loss": 0.1207, + "step": 80280 + }, + { + "epoch": 3.0, + "learning_rate": 1.578418402687923e-06, + "loss": 0.1156, + "step": 80310 + }, + { + "epoch": 3.0, + "learning_rate": 1.5781235494061444e-06, + "loss": 0.1152, + "step": 80340 + }, + { + "epoch": 3.0, + "learning_rate": 1.5778288613010825e-06, + "loss": 0.1198, + "step": 80370 + }, + { + "epoch": 3.0, + "learning_rate": 1.577534338218575e-06, + "loss": 0.1269, + "step": 80400 + }, + { + "epoch": 3.0, + "learning_rate": 1.5772399800046603e-06, + "loss": 0.1328, + "step": 80430 + }, + { + "epoch": 3.0, + "learning_rate": 1.576945786505579e-06, + "loss": 0.1224, + "step": 80460 + }, + { + "epoch": 3.0, + "learning_rate": 1.5766517575677718e-06, + "loss": 0.1253, + "step": 80490 + }, + { + "epoch": 3.0, + "learning_rate": 1.5763578930378794e-06, + "loss": 0.1651, + "step": 80520 + }, + { + "epoch": 3.0, + "learning_rate": 1.5760641927627434e-06, + "loss": 0.0942, + "step": 80550 + }, + { + "epoch": 3.01, + "learning_rate": 1.5757706565894044e-06, + "loss": 0.1177, + "step": 80580 + }, + { + "epoch": 3.01, + "learning_rate": 1.5754772843651023e-06, + "loss": 0.1323, + "step": 80610 + }, + { + "epoch": 3.01, + "learning_rate": 1.5751840759372764e-06, + "loss": 0.1054, + "step": 80640 + }, + { + "epoch": 3.01, + "learning_rate": 1.5748910311535644e-06, + "loss": 0.129, + "step": 80670 + }, + { + "epoch": 3.01, + "learning_rate": 1.5745981498618028e-06, + "loss": 0.1538, + "step": 80700 + }, + { + "epoch": 3.01, + "learning_rate": 1.5743054319100251e-06, + "loss": 0.1182, + "step": 80730 + }, + { + "epoch": 3.01, + "learning_rate": 1.5740128771464635e-06, + "loss": 0.1425, + "step": 80760 + }, + { + "epoch": 3.01, + "learning_rate": 1.5737204854195472e-06, + "loss": 0.1119, + "step": 80790 + }, + { + "epoch": 3.01, + "learning_rate": 1.5734282565779023e-06, + "loss": 0.1081, + "step": 80820 + }, + { + "epoch": 3.02, + "learning_rate": 1.573136190470352e-06, + "loss": 0.1015, + "step": 80850 + }, + { + "epoch": 3.02, + "learning_rate": 1.5728442869459149e-06, + "loss": 0.1261, + "step": 80880 + }, + { + "epoch": 3.02, + "learning_rate": 1.5725525458538072e-06, + "loss": 0.1303, + "step": 80910 + }, + { + "epoch": 3.02, + "learning_rate": 1.5722609670434387e-06, + "loss": 0.1189, + "step": 80940 + }, + { + "epoch": 3.02, + "learning_rate": 1.5719695503644169e-06, + "loss": 0.1273, + "step": 80970 + }, + { + "epoch": 3.02, + "learning_rate": 1.571678295666543e-06, + "loss": 0.1485, + "step": 81000 + }, + { + "epoch": 3.02, + "learning_rate": 1.5713872027998126e-06, + "loss": 0.1234, + "step": 81030 + }, + { + "epoch": 3.02, + "learning_rate": 1.5711059667173108e-06, + "loss": 0.1268, + "step": 81060 + }, + { + "epoch": 3.02, + "learning_rate": 1.5708151916816517e-06, + "loss": 0.1171, + "step": 81090 + }, + { + "epoch": 3.03, + "learning_rate": 1.570524578033268e-06, + "loss": 0.1064, + "step": 81120 + }, + { + "epoch": 3.03, + "learning_rate": 1.5702341256229242e-06, + "loss": 0.1265, + "step": 81150 + }, + { + "epoch": 3.03, + "learning_rate": 1.5699438343015793e-06, + "loss": 0.1359, + "step": 81180 + }, + { + "epoch": 3.03, + "learning_rate": 1.569653703920385e-06, + "loss": 0.1267, + "step": 81210 + }, + { + "epoch": 3.03, + "learning_rate": 1.5693637343306845e-06, + "loss": 0.1095, + "step": 81240 + }, + { + "epoch": 3.03, + "learning_rate": 1.569073925384014e-06, + "loss": 0.1374, + "step": 81270 + }, + { + "epoch": 3.03, + "learning_rate": 1.5687842769321014e-06, + "loss": 0.1208, + "step": 81300 + }, + { + "epoch": 3.03, + "learning_rate": 1.568494788826866e-06, + "loss": 0.1166, + "step": 81330 + }, + { + "epoch": 3.03, + "learning_rate": 1.5682054609204182e-06, + "loss": 0.1039, + "step": 81360 + }, + { + "epoch": 3.04, + "learning_rate": 1.5679162930650595e-06, + "loss": 0.1325, + "step": 81390 + }, + { + "epoch": 3.04, + "learning_rate": 1.5676272851132824e-06, + "loss": 0.1569, + "step": 81420 + }, + { + "epoch": 3.04, + "learning_rate": 1.5673384369177683e-06, + "loss": 0.1288, + "step": 81450 + }, + { + "epoch": 3.04, + "learning_rate": 1.56704974833139e-06, + "loss": 0.1136, + "step": 81480 + }, + { + "epoch": 3.04, + "learning_rate": 1.5667612192072093e-06, + "loss": 0.1078, + "step": 81510 + }, + { + "epoch": 3.04, + "learning_rate": 1.5664728493984767e-06, + "loss": 0.133, + "step": 81540 + }, + { + "epoch": 3.04, + "learning_rate": 1.5661846387586333e-06, + "loss": 0.1134, + "step": 81570 + }, + { + "epoch": 3.04, + "learning_rate": 1.5658965871413072e-06, + "loss": 0.1342, + "step": 81600 + }, + { + "epoch": 3.04, + "learning_rate": 1.565608694400316e-06, + "loss": 0.1197, + "step": 81630 + }, + { + "epoch": 3.05, + "learning_rate": 1.5653209603896647e-06, + "loss": 0.134, + "step": 81660 + }, + { + "epoch": 3.05, + "learning_rate": 1.5650333849635466e-06, + "loss": 0.124, + "step": 81690 + }, + { + "epoch": 3.05, + "learning_rate": 1.5647459679763419e-06, + "loss": 0.1352, + "step": 81720 + }, + { + "epoch": 3.05, + "learning_rate": 1.5644587092826183e-06, + "loss": 0.1432, + "step": 81750 + }, + { + "epoch": 3.05, + "learning_rate": 1.5641716087371303e-06, + "loss": 0.1214, + "step": 81780 + }, + { + "epoch": 3.05, + "learning_rate": 1.5638846661948185e-06, + "loss": 0.1189, + "step": 81810 + }, + { + "epoch": 3.05, + "learning_rate": 1.5635978815108104e-06, + "loss": 0.139, + "step": 81840 + }, + { + "epoch": 3.05, + "learning_rate": 1.563311254540419e-06, + "loss": 0.141, + "step": 81870 + }, + { + "epoch": 3.05, + "learning_rate": 1.5630247851391424e-06, + "loss": 0.1134, + "step": 81900 + }, + { + "epoch": 3.06, + "learning_rate": 1.5627384731626653e-06, + "loss": 0.1101, + "step": 81930 + }, + { + "epoch": 3.06, + "learning_rate": 1.562452318466856e-06, + "loss": 0.1219, + "step": 81960 + }, + { + "epoch": 3.06, + "learning_rate": 1.5621663209077684e-06, + "loss": 0.1193, + "step": 81990 + }, + { + "epoch": 3.06, + "learning_rate": 1.56188048034164e-06, + "loss": 0.1315, + "step": 82020 + }, + { + "epoch": 3.06, + "learning_rate": 1.5615947966248937e-06, + "loss": 0.1027, + "step": 82050 + }, + { + "epoch": 3.06, + "learning_rate": 1.5613092696141343e-06, + "loss": 0.1191, + "step": 82080 + }, + { + "epoch": 3.06, + "learning_rate": 1.5610238991661516e-06, + "loss": 0.1327, + "step": 82110 + }, + { + "epoch": 3.06, + "learning_rate": 1.5607386851379177e-06, + "loss": 0.1149, + "step": 82140 + }, + { + "epoch": 3.06, + "learning_rate": 1.5604536273865881e-06, + "loss": 0.1186, + "step": 82170 + }, + { + "epoch": 3.07, + "learning_rate": 1.5601687257695005e-06, + "loss": 0.1243, + "step": 82200 + }, + { + "epoch": 3.07, + "learning_rate": 1.5598839801441752e-06, + "loss": 0.1272, + "step": 82230 + }, + { + "epoch": 3.07, + "learning_rate": 1.5595993903683138e-06, + "loss": 0.1184, + "step": 82260 + }, + { + "epoch": 3.07, + "learning_rate": 1.5593149562998e-06, + "loss": 0.1334, + "step": 82290 + }, + { + "epoch": 3.07, + "learning_rate": 1.5590306777966993e-06, + "loss": 0.129, + "step": 82320 + }, + { + "epoch": 3.07, + "learning_rate": 1.5587465547172577e-06, + "loss": 0.1391, + "step": 82350 + }, + { + "epoch": 3.07, + "learning_rate": 1.558462586919902e-06, + "loss": 0.1281, + "step": 82380 + }, + { + "epoch": 3.07, + "learning_rate": 1.5581787742632399e-06, + "loss": 0.1231, + "step": 82410 + }, + { + "epoch": 3.07, + "learning_rate": 1.5578951166060582e-06, + "loss": 0.1002, + "step": 82440 + }, + { + "epoch": 3.08, + "learning_rate": 1.557611613807325e-06, + "loss": 0.1116, + "step": 82470 + }, + { + "epoch": 3.08, + "learning_rate": 1.5573282657261874e-06, + "loss": 0.1374, + "step": 82500 + }, + { + "epoch": 3.08, + "learning_rate": 1.5570450722219712e-06, + "loss": 0.12, + "step": 82530 + }, + { + "epoch": 3.08, + "learning_rate": 1.556762033154182e-06, + "loss": 0.1218, + "step": 82560 + }, + { + "epoch": 3.08, + "learning_rate": 1.556479148382504e-06, + "loss": 0.1386, + "step": 82590 + }, + { + "epoch": 3.08, + "learning_rate": 1.5561964177667995e-06, + "loss": 0.1384, + "step": 82620 + }, + { + "epoch": 3.08, + "learning_rate": 1.5559138411671087e-06, + "loss": 0.1249, + "step": 82650 + }, + { + "epoch": 3.08, + "learning_rate": 1.5556314184436507e-06, + "loss": 0.1146, + "step": 82680 + }, + { + "epoch": 3.08, + "learning_rate": 1.5553491494568212e-06, + "loss": 0.1199, + "step": 82710 + }, + { + "epoch": 3.09, + "learning_rate": 1.555067034067193e-06, + "loss": 0.1184, + "step": 82740 + }, + { + "epoch": 3.09, + "learning_rate": 1.554785072135517e-06, + "loss": 0.108, + "step": 82770 + }, + { + "epoch": 3.09, + "learning_rate": 1.5545032635227195e-06, + "loss": 0.1385, + "step": 82800 + }, + { + "epoch": 3.09, + "learning_rate": 1.554221608089904e-06, + "loss": 0.1237, + "step": 82830 + }, + { + "epoch": 3.09, + "learning_rate": 1.5539401056983492e-06, + "loss": 0.136, + "step": 82860 + }, + { + "epoch": 3.09, + "learning_rate": 1.553658756209511e-06, + "loss": 0.1085, + "step": 82890 + }, + { + "epoch": 3.09, + "learning_rate": 1.55337755948502e-06, + "loss": 0.1238, + "step": 82920 + }, + { + "epoch": 3.09, + "learning_rate": 1.5530965153866817e-06, + "loss": 0.1196, + "step": 82950 + }, + { + "epoch": 3.09, + "learning_rate": 1.5528156237764777e-06, + "loss": 0.0952, + "step": 82980 + }, + { + "epoch": 3.1, + "learning_rate": 1.5525348845165627e-06, + "loss": 0.1253, + "step": 83010 + }, + { + "epoch": 3.1, + "learning_rate": 1.5522542974692667e-06, + "loss": 0.144, + "step": 83040 + }, + { + "epoch": 3.1, + "learning_rate": 1.5519738624970942e-06, + "loss": 0.1095, + "step": 83070 + }, + { + "epoch": 3.1, + "learning_rate": 1.551693579462723e-06, + "loss": 0.1236, + "step": 83100 + }, + { + "epoch": 3.1, + "learning_rate": 1.5514134482290044e-06, + "loss": 0.1229, + "step": 83130 + }, + { + "epoch": 3.1, + "learning_rate": 1.5511334686589628e-06, + "loss": 0.129, + "step": 83160 + }, + { + "epoch": 3.1, + "learning_rate": 1.550853640615796e-06, + "loss": 0.1211, + "step": 83190 + }, + { + "epoch": 3.1, + "learning_rate": 1.5505739639628744e-06, + "loss": 0.1231, + "step": 83220 + }, + { + "epoch": 3.1, + "learning_rate": 1.5502944385637406e-06, + "loss": 0.1098, + "step": 83250 + }, + { + "epoch": 3.11, + "learning_rate": 1.5500150642821094e-06, + "loss": 0.1199, + "step": 83280 + }, + { + "epoch": 3.11, + "learning_rate": 1.5497358409818678e-06, + "loss": 0.1487, + "step": 83310 + }, + { + "epoch": 3.11, + "learning_rate": 1.5494567685270737e-06, + "loss": 0.1068, + "step": 83340 + }, + { + "epoch": 3.11, + "learning_rate": 1.5491778467819573e-06, + "loss": 0.118, + "step": 83370 + }, + { + "epoch": 3.11, + "learning_rate": 1.5488990756109185e-06, + "loss": 0.1085, + "step": 83400 + }, + { + "epoch": 3.11, + "learning_rate": 1.5486204548785288e-06, + "loss": 0.1082, + "step": 83430 + }, + { + "epoch": 3.11, + "learning_rate": 1.5483419844495307e-06, + "loss": 0.1041, + "step": 83460 + }, + { + "epoch": 3.11, + "learning_rate": 1.5480636641888352e-06, + "loss": 0.1008, + "step": 83490 + }, + { + "epoch": 3.12, + "learning_rate": 1.547785493961525e-06, + "loss": 0.1108, + "step": 83520 + }, + { + "epoch": 3.12, + "learning_rate": 1.5475074736328517e-06, + "loss": 0.1091, + "step": 83550 + }, + { + "epoch": 3.12, + "learning_rate": 1.5472296030682354e-06, + "loss": 0.1135, + "step": 83580 + }, + { + "epoch": 3.12, + "learning_rate": 1.5469518821332668e-06, + "loss": 0.1369, + "step": 83610 + }, + { + "epoch": 3.12, + "learning_rate": 1.5466743106937049e-06, + "loss": 0.1298, + "step": 83640 + }, + { + "epoch": 3.12, + "learning_rate": 1.5463968886154769e-06, + "loss": 0.1157, + "step": 83670 + }, + { + "epoch": 3.12, + "learning_rate": 1.5461196157646783e-06, + "loss": 0.1205, + "step": 83700 + }, + { + "epoch": 3.12, + "learning_rate": 1.545842492007573e-06, + "loss": 0.1166, + "step": 83730 + }, + { + "epoch": 3.12, + "learning_rate": 1.5455655172105921e-06, + "loss": 0.1373, + "step": 83760 + }, + { + "epoch": 3.13, + "learning_rate": 1.545288691240335e-06, + "loss": 0.1065, + "step": 83790 + }, + { + "epoch": 3.13, + "learning_rate": 1.5450120139635676e-06, + "loss": 0.1403, + "step": 83820 + }, + { + "epoch": 3.13, + "learning_rate": 1.5447354852472227e-06, + "loss": 0.1516, + "step": 83850 + }, + { + "epoch": 3.13, + "learning_rate": 1.5444591049584e-06, + "loss": 0.1211, + "step": 83880 + }, + { + "epoch": 3.13, + "learning_rate": 1.544182872964366e-06, + "loss": 0.1235, + "step": 83910 + }, + { + "epoch": 3.13, + "learning_rate": 1.543906789132552e-06, + "loss": 0.1217, + "step": 83940 + }, + { + "epoch": 3.13, + "learning_rate": 1.543630853330557e-06, + "loss": 0.1362, + "step": 83970 + }, + { + "epoch": 3.13, + "learning_rate": 1.5433550654261445e-06, + "loss": 0.1273, + "step": 84000 + }, + { + "epoch": 3.13, + "learning_rate": 1.5430794252872425e-06, + "loss": 0.1416, + "step": 84030 + }, + { + "epoch": 3.14, + "learning_rate": 1.5428039327819458e-06, + "loss": 0.1232, + "step": 84060 + }, + { + "epoch": 3.14, + "learning_rate": 1.542528587778513e-06, + "loss": 0.1246, + "step": 84090 + }, + { + "epoch": 3.14, + "learning_rate": 1.542253390145367e-06, + "loss": 0.1087, + "step": 84120 + }, + { + "epoch": 3.14, + "learning_rate": 1.5419783397510962e-06, + "loss": 0.1087, + "step": 84150 + }, + { + "epoch": 3.14, + "learning_rate": 1.5417034364644514e-06, + "loss": 0.1325, + "step": 84180 + }, + { + "epoch": 3.14, + "learning_rate": 1.5414286801543485e-06, + "loss": 0.1321, + "step": 84210 + }, + { + "epoch": 3.14, + "learning_rate": 1.5411540706898656e-06, + "loss": 0.116, + "step": 84240 + }, + { + "epoch": 3.14, + "learning_rate": 1.5408796079402449e-06, + "loss": 0.1356, + "step": 84270 + }, + { + "epoch": 3.14, + "learning_rate": 1.5406052917748917e-06, + "loss": 0.1228, + "step": 84300 + }, + { + "epoch": 3.15, + "learning_rate": 1.5403311220633734e-06, + "loss": 0.1235, + "step": 84330 + }, + { + "epoch": 3.15, + "learning_rate": 1.54005709867542e-06, + "loss": 0.1147, + "step": 84360 + }, + { + "epoch": 3.15, + "learning_rate": 1.5397832214809234e-06, + "loss": 0.1158, + "step": 84390 + }, + { + "epoch": 3.15, + "learning_rate": 1.539509490349938e-06, + "loss": 0.1112, + "step": 84420 + }, + { + "epoch": 3.15, + "learning_rate": 1.5392359051526795e-06, + "loss": 0.118, + "step": 84450 + }, + { + "epoch": 3.15, + "learning_rate": 1.5389624657595257e-06, + "loss": 0.1281, + "step": 84480 + }, + { + "epoch": 3.15, + "learning_rate": 1.5386891720410136e-06, + "loss": 0.099, + "step": 84510 + }, + { + "epoch": 3.15, + "learning_rate": 1.538416023867843e-06, + "loss": 0.1043, + "step": 84540 + }, + { + "epoch": 3.15, + "learning_rate": 1.5381430211108741e-06, + "loss": 0.1287, + "step": 84570 + }, + { + "epoch": 3.16, + "learning_rate": 1.5378701636411262e-06, + "loss": 0.1269, + "step": 84600 + }, + { + "epoch": 3.16, + "learning_rate": 1.5375974513297803e-06, + "loss": 0.1099, + "step": 84630 + }, + { + "epoch": 3.16, + "learning_rate": 1.5373248840481758e-06, + "loss": 0.1497, + "step": 84660 + }, + { + "epoch": 3.16, + "learning_rate": 1.5370524616678132e-06, + "loss": 0.1104, + "step": 84690 + }, + { + "epoch": 3.16, + "learning_rate": 1.5367801840603504e-06, + "loss": 0.1058, + "step": 84720 + }, + { + "epoch": 3.16, + "learning_rate": 1.5365080510976067e-06, + "loss": 0.1078, + "step": 84750 + }, + { + "epoch": 3.16, + "learning_rate": 1.5362360626515582e-06, + "loss": 0.114, + "step": 84780 + }, + { + "epoch": 3.16, + "learning_rate": 1.5359642185943406e-06, + "loss": 0.0988, + "step": 84810 + }, + { + "epoch": 3.16, + "learning_rate": 1.535692518798248e-06, + "loss": 0.1161, + "step": 84840 + }, + { + "epoch": 3.17, + "learning_rate": 1.5354209631357322e-06, + "loss": 0.1342, + "step": 84870 + }, + { + "epoch": 3.17, + "learning_rate": 1.5351495514794033e-06, + "loss": 0.1202, + "step": 84900 + }, + { + "epoch": 3.17, + "learning_rate": 1.5348782837020273e-06, + "loss": 0.1082, + "step": 84930 + }, + { + "epoch": 3.17, + "learning_rate": 1.5346071596765304e-06, + "loss": 0.1103, + "step": 84960 + }, + { + "epoch": 3.17, + "learning_rate": 1.5343361792759937e-06, + "loss": 0.1361, + "step": 84990 + }, + { + "epoch": 3.17, + "learning_rate": 1.5340653423736556e-06, + "loss": 0.1208, + "step": 85020 + }, + { + "epoch": 3.17, + "learning_rate": 1.5337946488429114e-06, + "loss": 0.1119, + "step": 85050 + }, + { + "epoch": 3.17, + "learning_rate": 1.5335240985573125e-06, + "loss": 0.1237, + "step": 85080 + }, + { + "epoch": 3.17, + "learning_rate": 1.5332536913905663e-06, + "loss": 0.1237, + "step": 85110 + }, + { + "epoch": 3.18, + "learning_rate": 1.5329924337198969e-06, + "loss": 0.1091, + "step": 85140 + }, + { + "epoch": 3.18, + "learning_rate": 1.5327223076524054e-06, + "loss": 0.1237, + "step": 85170 + }, + { + "epoch": 3.18, + "learning_rate": 1.5324523243300148e-06, + "loss": 0.1296, + "step": 85200 + }, + { + "epoch": 3.18, + "learning_rate": 1.5321824836270496e-06, + "loss": 0.1202, + "step": 85230 + }, + { + "epoch": 3.18, + "learning_rate": 1.5319127854179885e-06, + "loss": 0.146, + "step": 85260 + }, + { + "epoch": 3.18, + "learning_rate": 1.5316432295774654e-06, + "loss": 0.1462, + "step": 85290 + }, + { + "epoch": 3.18, + "learning_rate": 1.5313738159802683e-06, + "loss": 0.1368, + "step": 85320 + }, + { + "epoch": 3.18, + "learning_rate": 1.5311045445013391e-06, + "loss": 0.1462, + "step": 85350 + }, + { + "epoch": 3.18, + "learning_rate": 1.530835415015774e-06, + "loss": 0.1282, + "step": 85380 + }, + { + "epoch": 3.19, + "learning_rate": 1.5305664273988224e-06, + "loss": 0.1152, + "step": 85410 + }, + { + "epoch": 3.19, + "learning_rate": 1.5302975815258878e-06, + "loss": 0.1182, + "step": 85440 + }, + { + "epoch": 3.19, + "learning_rate": 1.5300378318006353e-06, + "loss": 0.1259, + "step": 85470 + }, + { + "epoch": 3.19, + "learning_rate": 1.5297692643280468e-06, + "loss": 0.1203, + "step": 85500 + }, + { + "epoch": 3.19, + "learning_rate": 1.5295008382307369e-06, + "loss": 0.102, + "step": 85530 + }, + { + "epoch": 3.19, + "learning_rate": 1.5292325533847157e-06, + "loss": 0.1313, + "step": 85560 + }, + { + "epoch": 3.19, + "learning_rate": 1.5289644096661436e-06, + "loss": 0.1287, + "step": 85590 + }, + { + "epoch": 3.19, + "learning_rate": 1.528696406951334e-06, + "loss": 0.138, + "step": 85620 + }, + { + "epoch": 3.19, + "learning_rate": 1.5284285451167524e-06, + "loss": 0.1381, + "step": 85650 + }, + { + "epoch": 3.2, + "learning_rate": 1.5281608240390144e-06, + "loss": 0.1392, + "step": 85680 + }, + { + "epoch": 3.2, + "learning_rate": 1.5278932435948881e-06, + "loss": 0.1166, + "step": 85710 + }, + { + "epoch": 3.2, + "learning_rate": 1.527625803661292e-06, + "loss": 0.1184, + "step": 85740 + }, + { + "epoch": 3.2, + "learning_rate": 1.5273585041152956e-06, + "loss": 0.1142, + "step": 85770 + }, + { + "epoch": 3.2, + "learning_rate": 1.5270913448341186e-06, + "loss": 0.1084, + "step": 85800 + }, + { + "epoch": 3.2, + "learning_rate": 1.526824325695131e-06, + "loss": 0.1404, + "step": 85830 + }, + { + "epoch": 3.2, + "learning_rate": 1.5265574465758534e-06, + "loss": 0.1101, + "step": 85860 + }, + { + "epoch": 3.2, + "learning_rate": 1.5262907073539553e-06, + "loss": 0.1247, + "step": 85890 + }, + { + "epoch": 3.2, + "learning_rate": 1.5260241079072572e-06, + "loss": 0.1084, + "step": 85920 + }, + { + "epoch": 3.21, + "learning_rate": 1.5257576481137268e-06, + "loss": 0.1379, + "step": 85950 + }, + { + "epoch": 3.21, + "learning_rate": 1.5254913278514829e-06, + "loss": 0.1223, + "step": 85980 + }, + { + "epoch": 3.21, + "learning_rate": 1.5252251469987929e-06, + "loss": 0.1168, + "step": 86010 + }, + { + "epoch": 3.21, + "learning_rate": 1.524959105434071e-06, + "loss": 0.1237, + "step": 86040 + }, + { + "epoch": 3.21, + "learning_rate": 1.5246932030358824e-06, + "loss": 0.1205, + "step": 86070 + }, + { + "epoch": 3.21, + "learning_rate": 1.5244274396829384e-06, + "loss": 0.118, + "step": 86100 + }, + { + "epoch": 3.21, + "learning_rate": 1.5241618152540993e-06, + "loss": 0.1071, + "step": 86130 + }, + { + "epoch": 3.21, + "learning_rate": 1.523896329628373e-06, + "loss": 0.1027, + "step": 86160 + }, + { + "epoch": 3.21, + "learning_rate": 1.5236309826849143e-06, + "loss": 0.1233, + "step": 86190 + }, + { + "epoch": 3.22, + "learning_rate": 1.5233657743030265e-06, + "loss": 0.1153, + "step": 86220 + }, + { + "epoch": 3.22, + "learning_rate": 1.5231007043621581e-06, + "loss": 0.1201, + "step": 86250 + }, + { + "epoch": 3.22, + "learning_rate": 1.522835772741906e-06, + "loss": 0.1336, + "step": 86280 + }, + { + "epoch": 3.22, + "learning_rate": 1.5225709793220126e-06, + "loss": 0.1331, + "step": 86310 + }, + { + "epoch": 3.22, + "learning_rate": 1.5223063239823674e-06, + "loss": 0.1216, + "step": 86340 + }, + { + "epoch": 3.22, + "learning_rate": 1.5220418066030055e-06, + "loss": 0.1039, + "step": 86370 + }, + { + "epoch": 3.22, + "learning_rate": 1.5217774270641078e-06, + "loss": 0.1173, + "step": 86400 + }, + { + "epoch": 3.22, + "learning_rate": 1.5215131852460013e-06, + "loss": 0.153, + "step": 86430 + }, + { + "epoch": 3.22, + "learning_rate": 1.521249081029158e-06, + "loss": 0.1116, + "step": 86460 + }, + { + "epoch": 3.23, + "learning_rate": 1.5209851142941952e-06, + "loss": 0.1253, + "step": 86490 + }, + { + "epoch": 3.23, + "learning_rate": 1.5207212849218755e-06, + "loss": 0.1241, + "step": 86520 + }, + { + "epoch": 3.23, + "learning_rate": 1.5204575927931055e-06, + "loss": 0.1126, + "step": 86550 + }, + { + "epoch": 3.23, + "learning_rate": 1.520194037788937e-06, + "loss": 0.1449, + "step": 86580 + }, + { + "epoch": 3.23, + "learning_rate": 1.5199306197905658e-06, + "loss": 0.1137, + "step": 86610 + }, + { + "epoch": 3.23, + "learning_rate": 1.5196673386793316e-06, + "loss": 0.1225, + "step": 86640 + }, + { + "epoch": 3.23, + "learning_rate": 1.5194041943367178e-06, + "loss": 0.1255, + "step": 86670 + }, + { + "epoch": 3.23, + "learning_rate": 1.519141186644352e-06, + "loss": 0.1401, + "step": 86700 + }, + { + "epoch": 3.23, + "learning_rate": 1.518878315484005e-06, + "loss": 0.1344, + "step": 86730 + }, + { + "epoch": 3.24, + "learning_rate": 1.5186155807375907e-06, + "loss": 0.1046, + "step": 86760 + }, + { + "epoch": 3.24, + "learning_rate": 1.5183529822871654e-06, + "loss": 0.1374, + "step": 86790 + }, + { + "epoch": 3.24, + "learning_rate": 1.518090520014929e-06, + "loss": 0.1191, + "step": 86820 + }, + { + "epoch": 3.24, + "learning_rate": 1.5178281938032233e-06, + "loss": 0.1181, + "step": 86850 + }, + { + "epoch": 3.24, + "learning_rate": 1.5175660035345323e-06, + "loss": 0.1411, + "step": 86880 + }, + { + "epoch": 3.24, + "learning_rate": 1.5173039490914826e-06, + "loss": 0.1249, + "step": 86910 + }, + { + "epoch": 3.24, + "learning_rate": 1.5170420303568423e-06, + "loss": 0.1299, + "step": 86940 + }, + { + "epoch": 3.24, + "learning_rate": 1.5167802472135212e-06, + "loss": 0.1245, + "step": 86970 + }, + { + "epoch": 3.24, + "learning_rate": 1.5165185995445702e-06, + "loss": 0.1229, + "step": 87000 + }, + { + "epoch": 3.25, + "learning_rate": 1.5162570872331817e-06, + "loss": 0.1035, + "step": 87030 + }, + { + "epoch": 3.25, + "learning_rate": 1.5159957101626888e-06, + "loss": 0.1268, + "step": 87060 + }, + { + "epoch": 3.25, + "learning_rate": 1.5157344682165656e-06, + "loss": 0.1357, + "step": 87090 + }, + { + "epoch": 3.25, + "learning_rate": 1.515473361278426e-06, + "loss": 0.1802, + "step": 87120 + }, + { + "epoch": 3.25, + "learning_rate": 1.5152123892320259e-06, + "loss": 0.1332, + "step": 87150 + }, + { + "epoch": 3.25, + "learning_rate": 1.5149515519612596e-06, + "loss": 0.1239, + "step": 87180 + }, + { + "epoch": 3.25, + "learning_rate": 1.5146908493501613e-06, + "loss": 0.1301, + "step": 87210 + }, + { + "epoch": 3.25, + "learning_rate": 1.5144302812829061e-06, + "loss": 0.1008, + "step": 87240 + }, + { + "epoch": 3.25, + "learning_rate": 1.5141698476438073e-06, + "loss": 0.1131, + "step": 87270 + }, + { + "epoch": 3.26, + "learning_rate": 1.513909548317318e-06, + "loss": 0.1224, + "step": 87300 + }, + { + "epoch": 3.26, + "learning_rate": 1.5136493831880306e-06, + "loss": 0.1287, + "step": 87330 + }, + { + "epoch": 3.26, + "learning_rate": 1.5133893521406753e-06, + "loss": 0.1115, + "step": 87360 + }, + { + "epoch": 3.26, + "learning_rate": 1.5131294550601217e-06, + "loss": 0.1134, + "step": 87390 + }, + { + "epoch": 3.26, + "learning_rate": 1.5128696918313772e-06, + "loss": 0.1424, + "step": 87420 + }, + { + "epoch": 3.26, + "learning_rate": 1.5126100623395884e-06, + "loss": 0.1172, + "step": 87450 + }, + { + "epoch": 3.26, + "learning_rate": 1.512350566470038e-06, + "loss": 0.107, + "step": 87480 + }, + { + "epoch": 3.26, + "learning_rate": 1.5120912041081485e-06, + "loss": 0.0991, + "step": 87510 + }, + { + "epoch": 3.26, + "learning_rate": 1.5118319751394783e-06, + "loss": 0.1176, + "step": 87540 + }, + { + "epoch": 3.27, + "learning_rate": 1.5115728794497237e-06, + "loss": 0.108, + "step": 87570 + }, + { + "epoch": 3.27, + "learning_rate": 1.5113139169247178e-06, + "loss": 0.1178, + "step": 87600 + }, + { + "epoch": 3.27, + "learning_rate": 1.511063712957183e-06, + "loss": 0.111, + "step": 87630 + }, + { + "epoch": 3.27, + "learning_rate": 1.5108050119903283e-06, + "loss": 0.111, + "step": 87660 + }, + { + "epoch": 3.27, + "learning_rate": 1.5105464438503328e-06, + "loss": 0.1297, + "step": 87690 + }, + { + "epoch": 3.27, + "learning_rate": 1.5102880084235716e-06, + "loss": 0.1191, + "step": 87720 + }, + { + "epoch": 3.27, + "learning_rate": 1.5100297055965558e-06, + "loss": 0.1325, + "step": 87750 + }, + { + "epoch": 3.27, + "learning_rate": 1.5097715352559318e-06, + "loss": 0.109, + "step": 87780 + }, + { + "epoch": 3.28, + "learning_rate": 1.5095134972884821e-06, + "loss": 0.0999, + "step": 87810 + }, + { + "epoch": 3.28, + "learning_rate": 1.5092555915811246e-06, + "loss": 0.1436, + "step": 87840 + }, + { + "epoch": 3.28, + "learning_rate": 1.5089978180209128e-06, + "loss": 0.1361, + "step": 87870 + }, + { + "epoch": 3.28, + "learning_rate": 1.5087401764950343e-06, + "loss": 0.1119, + "step": 87900 + }, + { + "epoch": 3.28, + "learning_rate": 1.5084826668908123e-06, + "loss": 0.1274, + "step": 87930 + }, + { + "epoch": 3.28, + "learning_rate": 1.5082252890957039e-06, + "loss": 0.1391, + "step": 87960 + }, + { + "epoch": 3.28, + "learning_rate": 1.5079680429973012e-06, + "loss": 0.1352, + "step": 87990 + }, + { + "epoch": 3.28, + "learning_rate": 1.5077109284833297e-06, + "loss": 0.1133, + "step": 88020 + }, + { + "epoch": 3.28, + "learning_rate": 1.5074539454416498e-06, + "loss": 0.1059, + "step": 88050 + }, + { + "epoch": 3.29, + "learning_rate": 1.5071970937602551e-06, + "loss": 0.0964, + "step": 88080 + }, + { + "epoch": 3.29, + "learning_rate": 1.5069403733272725e-06, + "loss": 0.1188, + "step": 88110 + }, + { + "epoch": 3.29, + "learning_rate": 1.5066837840309626e-06, + "loss": 0.1084, + "step": 88140 + }, + { + "epoch": 3.29, + "learning_rate": 1.5064273257597195e-06, + "loss": 0.1163, + "step": 88170 + }, + { + "epoch": 3.29, + "learning_rate": 1.5061709984020693e-06, + "loss": 0.1232, + "step": 88200 + }, + { + "epoch": 3.29, + "learning_rate": 1.5059148018466718e-06, + "loss": 0.1235, + "step": 88230 + }, + { + "epoch": 3.29, + "learning_rate": 1.5056587359823183e-06, + "loss": 0.1132, + "step": 88260 + }, + { + "epoch": 3.29, + "learning_rate": 1.5054028006979338e-06, + "loss": 0.1157, + "step": 88290 + }, + { + "epoch": 3.29, + "learning_rate": 1.5051469958825737e-06, + "loss": 0.1168, + "step": 88320 + }, + { + "epoch": 3.3, + "learning_rate": 1.504891321425427e-06, + "loss": 0.1184, + "step": 88350 + }, + { + "epoch": 3.3, + "learning_rate": 1.5046357772158132e-06, + "loss": 0.1309, + "step": 88380 + }, + { + "epoch": 3.3, + "learning_rate": 1.5043803631431841e-06, + "loss": 0.11, + "step": 88410 + }, + { + "epoch": 3.3, + "learning_rate": 1.5041250790971224e-06, + "loss": 0.132, + "step": 88440 + }, + { + "epoch": 3.3, + "learning_rate": 1.5038699249673416e-06, + "loss": 0.1204, + "step": 88470 + }, + { + "epoch": 3.3, + "learning_rate": 1.503614900643687e-06, + "loss": 0.1047, + "step": 88500 + }, + { + "epoch": 3.3, + "learning_rate": 1.5033600060161343e-06, + "loss": 0.1349, + "step": 88530 + }, + { + "epoch": 3.3, + "learning_rate": 1.5031052409747888e-06, + "loss": 0.1237, + "step": 88560 + }, + { + "epoch": 3.3, + "learning_rate": 1.5028506054098876e-06, + "loss": 0.1142, + "step": 88590 + }, + { + "epoch": 3.31, + "learning_rate": 1.502596099211797e-06, + "loss": 0.1132, + "step": 88620 + }, + { + "epoch": 3.31, + "learning_rate": 1.5023417222710132e-06, + "loss": 0.1193, + "step": 88650 + }, + { + "epoch": 3.31, + "learning_rate": 1.5020874744781625e-06, + "loss": 0.1266, + "step": 88680 + }, + { + "epoch": 3.31, + "learning_rate": 1.5018333557240003e-06, + "loss": 0.1131, + "step": 88710 + }, + { + "epoch": 3.31, + "learning_rate": 1.501579365899412e-06, + "loss": 0.1058, + "step": 88740 + }, + { + "epoch": 3.31, + "learning_rate": 1.5013255048954111e-06, + "loss": 0.1074, + "step": 88770 + }, + { + "epoch": 3.31, + "learning_rate": 1.5010717726031415e-06, + "loss": 0.1117, + "step": 88800 + }, + { + "epoch": 3.31, + "learning_rate": 1.5008181689138746e-06, + "loss": 0.1114, + "step": 88830 + }, + { + "epoch": 3.31, + "learning_rate": 1.5005646937190105e-06, + "loss": 0.1221, + "step": 88860 + }, + { + "epoch": 3.32, + "learning_rate": 1.500311346910078e-06, + "loss": 0.117, + "step": 88890 + }, + { + "epoch": 3.32, + "learning_rate": 1.5000581283787338e-06, + "loss": 0.1257, + "step": 88920 + }, + { + "epoch": 3.32, + "learning_rate": 1.499805038016763e-06, + "loss": 0.1129, + "step": 88950 + }, + { + "epoch": 3.32, + "learning_rate": 1.4995520757160784e-06, + "loss": 0.1134, + "step": 88980 + }, + { + "epoch": 3.32, + "learning_rate": 1.4992992413687193e-06, + "loss": 0.1142, + "step": 89010 + }, + { + "epoch": 3.32, + "learning_rate": 1.499046534866854e-06, + "loss": 0.1165, + "step": 89040 + }, + { + "epoch": 3.32, + "learning_rate": 1.4987939561027765e-06, + "loss": 0.0977, + "step": 89070 + }, + { + "epoch": 3.32, + "learning_rate": 1.4985415049689088e-06, + "loss": 0.1337, + "step": 89100 + }, + { + "epoch": 3.32, + "learning_rate": 1.4982891813577996e-06, + "loss": 0.142, + "step": 89130 + }, + { + "epoch": 3.33, + "learning_rate": 1.4980369851621237e-06, + "loss": 0.1403, + "step": 89160 + }, + { + "epoch": 3.33, + "learning_rate": 1.4977849162746824e-06, + "loss": 0.1387, + "step": 89190 + }, + { + "epoch": 3.33, + "learning_rate": 1.4975329745884038e-06, + "loss": 0.1118, + "step": 89220 + }, + { + "epoch": 3.33, + "learning_rate": 1.4972811599963413e-06, + "loss": 0.1307, + "step": 89250 + }, + { + "epoch": 3.33, + "learning_rate": 1.4970294723916748e-06, + "loss": 0.1178, + "step": 89280 + }, + { + "epoch": 3.33, + "learning_rate": 1.4967779116677095e-06, + "loss": 0.1113, + "step": 89310 + }, + { + "epoch": 3.33, + "learning_rate": 1.4965264777178759e-06, + "loss": 0.1112, + "step": 89340 + }, + { + "epoch": 3.33, + "learning_rate": 1.4962751704357302e-06, + "loss": 0.1064, + "step": 89370 + }, + { + "epoch": 3.33, + "learning_rate": 1.496023989714953e-06, + "loss": 0.1224, + "step": 89400 + }, + { + "epoch": 3.34, + "learning_rate": 1.4957729354493509e-06, + "loss": 0.106, + "step": 89430 + }, + { + "epoch": 3.34, + "learning_rate": 1.4955220075328538e-06, + "loss": 0.1232, + "step": 89460 + }, + { + "epoch": 3.34, + "learning_rate": 1.4952712058595175e-06, + "loss": 0.1527, + "step": 89490 + }, + { + "epoch": 3.34, + "learning_rate": 1.495028884143625e-06, + "loss": 0.1414, + "step": 89520 + }, + { + "epoch": 3.34, + "learning_rate": 1.4947783304399193e-06, + "loss": 0.1012, + "step": 89550 + }, + { + "epoch": 3.34, + "learning_rate": 1.4945279026658018e-06, + "loss": 0.139, + "step": 89580 + }, + { + "epoch": 3.34, + "learning_rate": 1.49427760071582e-06, + "loss": 0.1174, + "step": 89610 + }, + { + "epoch": 3.34, + "learning_rate": 1.4940274244846445e-06, + "loss": 0.1127, + "step": 89640 + }, + { + "epoch": 3.34, + "learning_rate": 1.4937773738670697e-06, + "loss": 0.1122, + "step": 89670 + }, + { + "epoch": 3.35, + "learning_rate": 1.4935274487580126e-06, + "loss": 0.1302, + "step": 89700 + }, + { + "epoch": 3.35, + "learning_rate": 1.4932776490525144e-06, + "loss": 0.1179, + "step": 89730 + }, + { + "epoch": 3.35, + "learning_rate": 1.4930279746457379e-06, + "loss": 0.1215, + "step": 89760 + }, + { + "epoch": 3.35, + "learning_rate": 1.492778425432969e-06, + "loss": 0.1021, + "step": 89790 + }, + { + "epoch": 3.35, + "learning_rate": 1.4925290013096165e-06, + "loss": 0.1068, + "step": 89820 + }, + { + "epoch": 3.35, + "learning_rate": 1.492279702171211e-06, + "loss": 0.1374, + "step": 89850 + }, + { + "epoch": 3.35, + "learning_rate": 1.4920305279134051e-06, + "loss": 0.1252, + "step": 89880 + }, + { + "epoch": 3.35, + "learning_rate": 1.4917814784319737e-06, + "loss": 0.1192, + "step": 89910 + }, + { + "epoch": 3.35, + "learning_rate": 1.4915325536228134e-06, + "loss": 0.1032, + "step": 89940 + }, + { + "epoch": 3.36, + "learning_rate": 1.4912837533819422e-06, + "loss": 0.1232, + "step": 89970 + }, + { + "epoch": 3.36, + "learning_rate": 1.4910350776054995e-06, + "loss": 0.1362, + "step": 90000 + }, + { + "epoch": 3.36, + "learning_rate": 1.4907865261897456e-06, + "loss": 0.117, + "step": 90030 + }, + { + "epoch": 3.36, + "learning_rate": 1.4905380990310628e-06, + "loss": 0.1541, + "step": 90060 + }, + { + "epoch": 3.36, + "learning_rate": 1.490289796025953e-06, + "loss": 0.1077, + "step": 90090 + }, + { + "epoch": 3.36, + "learning_rate": 1.49004161707104e-06, + "loss": 0.1133, + "step": 90120 + }, + { + "epoch": 3.36, + "learning_rate": 1.4897935620630663e-06, + "loss": 0.1163, + "step": 90150 + }, + { + "epoch": 3.36, + "learning_rate": 1.4895456308988964e-06, + "loss": 0.1588, + "step": 90180 + }, + { + "epoch": 3.36, + "learning_rate": 1.4892978234755149e-06, + "loss": 0.1337, + "step": 90210 + }, + { + "epoch": 3.37, + "learning_rate": 1.4890501396900245e-06, + "loss": 0.1156, + "step": 90240 + }, + { + "epoch": 3.37, + "learning_rate": 1.4888025794396497e-06, + "loss": 0.1172, + "step": 90270 + }, + { + "epoch": 3.37, + "learning_rate": 1.4885551426217331e-06, + "loss": 0.1159, + "step": 90300 + }, + { + "epoch": 3.37, + "learning_rate": 1.488307829133738e-06, + "loss": 0.1138, + "step": 90330 + }, + { + "epoch": 3.37, + "learning_rate": 1.4880606388732456e-06, + "loss": 0.121, + "step": 90360 + }, + { + "epoch": 3.37, + "learning_rate": 1.4878135717379569e-06, + "loss": 0.1292, + "step": 90390 + }, + { + "epoch": 3.37, + "learning_rate": 1.487566627625692e-06, + "loss": 0.1111, + "step": 90420 + }, + { + "epoch": 3.37, + "learning_rate": 1.4873198064343886e-06, + "loss": 0.1321, + "step": 90450 + }, + { + "epoch": 3.37, + "learning_rate": 1.4870731080621042e-06, + "loss": 0.1313, + "step": 90480 + }, + { + "epoch": 3.38, + "learning_rate": 1.4868265324070142e-06, + "loss": 0.1153, + "step": 90510 + }, + { + "epoch": 3.38, + "learning_rate": 1.4865800793674113e-06, + "loss": 0.1199, + "step": 90540 + }, + { + "epoch": 3.38, + "learning_rate": 1.4863337488417067e-06, + "loss": 0.1136, + "step": 90570 + }, + { + "epoch": 3.38, + "learning_rate": 1.4860875407284305e-06, + "loss": 0.1175, + "step": 90600 + }, + { + "epoch": 3.38, + "learning_rate": 1.4858414549262287e-06, + "loss": 0.1414, + "step": 90630 + }, + { + "epoch": 3.38, + "learning_rate": 1.4855954913338655e-06, + "loss": 0.1195, + "step": 90660 + }, + { + "epoch": 3.38, + "learning_rate": 1.4853496498502227e-06, + "loss": 0.1175, + "step": 90690 + }, + { + "epoch": 3.38, + "learning_rate": 1.4851039303742986e-06, + "loss": 0.1238, + "step": 90720 + }, + { + "epoch": 3.38, + "learning_rate": 1.4848583328052088e-06, + "loss": 0.1193, + "step": 90750 + }, + { + "epoch": 3.39, + "learning_rate": 1.4846128570421855e-06, + "loss": 0.1214, + "step": 90780 + }, + { + "epoch": 3.39, + "learning_rate": 1.4843675029845775e-06, + "loss": 0.1141, + "step": 90810 + }, + { + "epoch": 3.39, + "learning_rate": 1.48412227053185e-06, + "loss": 0.1322, + "step": 90840 + }, + { + "epoch": 3.39, + "learning_rate": 1.4838771595835847e-06, + "loss": 0.1243, + "step": 90870 + }, + { + "epoch": 3.39, + "learning_rate": 1.4836321700394784e-06, + "loss": 0.1155, + "step": 90900 + }, + { + "epoch": 3.39, + "learning_rate": 1.483387301799345e-06, + "loss": 0.1255, + "step": 90930 + }, + { + "epoch": 3.39, + "learning_rate": 1.4831425547631134e-06, + "loss": 0.0969, + "step": 90960 + }, + { + "epoch": 3.39, + "learning_rate": 1.4828979288308285e-06, + "loss": 0.1114, + "step": 90990 + }, + { + "epoch": 3.39, + "learning_rate": 1.48265342390265e-06, + "loss": 0.1182, + "step": 91020 + }, + { + "epoch": 3.4, + "learning_rate": 1.4824090398788529e-06, + "loss": 0.114, + "step": 91050 + }, + { + "epoch": 3.4, + "learning_rate": 1.482164776659828e-06, + "loss": 0.1142, + "step": 91080 + }, + { + "epoch": 3.4, + "learning_rate": 1.4819206341460796e-06, + "loss": 0.1193, + "step": 91110 + }, + { + "epoch": 3.4, + "learning_rate": 1.4816766122382278e-06, + "loss": 0.1243, + "step": 91140 + }, + { + "epoch": 3.4, + "learning_rate": 1.4814327108370068e-06, + "loss": 0.1066, + "step": 91170 + }, + { + "epoch": 3.4, + "learning_rate": 1.4811889298432654e-06, + "loss": 0.1011, + "step": 91200 + }, + { + "epoch": 3.4, + "learning_rate": 1.4809452691579664e-06, + "loss": 0.1181, + "step": 91230 + }, + { + "epoch": 3.4, + "learning_rate": 1.4807017286821866e-06, + "loss": 0.1294, + "step": 91260 + }, + { + "epoch": 3.4, + "learning_rate": 1.4804583083171158e-06, + "loss": 0.119, + "step": 91290 + }, + { + "epoch": 3.41, + "learning_rate": 1.4802150079640592e-06, + "loss": 0.1297, + "step": 91320 + }, + { + "epoch": 3.41, + "learning_rate": 1.4799718275244343e-06, + "loss": 0.0969, + "step": 91350 + }, + { + "epoch": 3.41, + "learning_rate": 1.4797287668997723e-06, + "loss": 0.1048, + "step": 91380 + }, + { + "epoch": 3.41, + "learning_rate": 1.4794858259917175e-06, + "loss": 0.1058, + "step": 91410 + }, + { + "epoch": 3.41, + "learning_rate": 1.4792430047020267e-06, + "loss": 0.1071, + "step": 91440 + }, + { + "epoch": 3.41, + "learning_rate": 1.4790003029325705e-06, + "loss": 0.1385, + "step": 91470 + }, + { + "epoch": 3.41, + "learning_rate": 1.4787577205853318e-06, + "loss": 0.0996, + "step": 91500 + }, + { + "epoch": 3.41, + "learning_rate": 1.4785152575624056e-06, + "loss": 0.1223, + "step": 91530 + }, + { + "epoch": 3.41, + "learning_rate": 1.4782729137659996e-06, + "loss": 0.1278, + "step": 91560 + }, + { + "epoch": 3.42, + "learning_rate": 1.4780306890984334e-06, + "loss": 0.12, + "step": 91590 + }, + { + "epoch": 3.42, + "learning_rate": 1.4777885834621391e-06, + "loss": 0.1116, + "step": 91620 + }, + { + "epoch": 3.42, + "learning_rate": 1.47754659675966e-06, + "loss": 0.1276, + "step": 91650 + }, + { + "epoch": 3.42, + "learning_rate": 1.4773047288936515e-06, + "loss": 0.1446, + "step": 91680 + }, + { + "epoch": 3.42, + "learning_rate": 1.4770629797668806e-06, + "loss": 0.1144, + "step": 91710 + }, + { + "epoch": 3.42, + "learning_rate": 1.4768213492822253e-06, + "loss": 0.133, + "step": 91740 + }, + { + "epoch": 3.42, + "learning_rate": 1.4765798373426748e-06, + "loss": 0.1227, + "step": 91770 + }, + { + "epoch": 3.42, + "learning_rate": 1.4763384438513297e-06, + "loss": 0.1087, + "step": 91800 + }, + { + "epoch": 3.42, + "learning_rate": 1.476097168711401e-06, + "loss": 0.1146, + "step": 91830 + }, + { + "epoch": 3.43, + "learning_rate": 1.4758560118262105e-06, + "loss": 0.1266, + "step": 91860 + }, + { + "epoch": 3.43, + "learning_rate": 1.4756149730991908e-06, + "loss": 0.1054, + "step": 91890 + }, + { + "epoch": 3.43, + "learning_rate": 1.4753740524338848e-06, + "loss": 0.1123, + "step": 91920 + }, + { + "epoch": 3.43, + "learning_rate": 1.4751332497339452e-06, + "loss": 0.1227, + "step": 91950 + }, + { + "epoch": 3.43, + "learning_rate": 1.474892564903135e-06, + "loss": 0.1042, + "step": 91980 + }, + { + "epoch": 3.43, + "learning_rate": 1.4746519978453271e-06, + "loss": 0.1297, + "step": 92010 + }, + { + "epoch": 3.43, + "learning_rate": 1.4744115484645044e-06, + "loss": 0.1177, + "step": 92040 + }, + { + "epoch": 3.43, + "learning_rate": 1.474171216664759e-06, + "loss": 0.1245, + "step": 92070 + }, + { + "epoch": 3.44, + "learning_rate": 1.473931002350292e-06, + "loss": 0.104, + "step": 92100 + }, + { + "epoch": 3.44, + "learning_rate": 1.473690905425415e-06, + "loss": 0.1007, + "step": 92130 + }, + { + "epoch": 3.44, + "learning_rate": 1.473450925794547e-06, + "loss": 0.0941, + "step": 92160 + }, + { + "epoch": 3.44, + "learning_rate": 1.4732110633622177e-06, + "loss": 0.1019, + "step": 92190 + }, + { + "epoch": 3.44, + "learning_rate": 1.472971318033064e-06, + "loss": 0.1402, + "step": 92220 + }, + { + "epoch": 3.44, + "learning_rate": 1.472731689711832e-06, + "loss": 0.1147, + "step": 92250 + }, + { + "epoch": 3.44, + "learning_rate": 1.4724921783033766e-06, + "loss": 0.128, + "step": 92280 + }, + { + "epoch": 3.44, + "learning_rate": 1.472252783712661e-06, + "loss": 0.1135, + "step": 92310 + }, + { + "epoch": 3.44, + "learning_rate": 1.472013505844755e-06, + "loss": 0.137, + "step": 92340 + }, + { + "epoch": 3.45, + "learning_rate": 1.471774344604839e-06, + "loss": 0.124, + "step": 92370 + }, + { + "epoch": 3.45, + "learning_rate": 1.471535299898199e-06, + "loss": 0.1294, + "step": 92400 + }, + { + "epoch": 3.45, + "learning_rate": 1.4712963716302299e-06, + "loss": 0.1158, + "step": 92430 + }, + { + "epoch": 3.45, + "learning_rate": 1.4710575597064326e-06, + "loss": 0.1321, + "step": 92460 + }, + { + "epoch": 3.45, + "learning_rate": 1.4708188640324172e-06, + "loss": 0.1243, + "step": 92490 + }, + { + "epoch": 3.45, + "learning_rate": 1.4705802845138998e-06, + "loss": 0.1155, + "step": 92520 + }, + { + "epoch": 3.45, + "learning_rate": 1.4703418210567037e-06, + "loss": 0.1176, + "step": 92550 + }, + { + "epoch": 3.45, + "learning_rate": 1.4701034735667596e-06, + "loss": 0.1017, + "step": 92580 + }, + { + "epoch": 3.45, + "learning_rate": 1.469865241950104e-06, + "loss": 0.1216, + "step": 92610 + }, + { + "epoch": 3.46, + "learning_rate": 1.469627126112881e-06, + "loss": 0.1047, + "step": 92640 + }, + { + "epoch": 3.46, + "learning_rate": 1.4693891259613402e-06, + "loss": 0.1072, + "step": 92670 + }, + { + "epoch": 3.46, + "learning_rate": 1.4691512414018378e-06, + "loss": 0.1367, + "step": 92700 + }, + { + "epoch": 3.46, + "learning_rate": 1.468913472340836e-06, + "loss": 0.1179, + "step": 92730 + }, + { + "epoch": 3.46, + "learning_rate": 1.4686758186849038e-06, + "loss": 0.1153, + "step": 92760 + }, + { + "epoch": 3.46, + "learning_rate": 1.4684382803407139e-06, + "loss": 0.109, + "step": 92790 + }, + { + "epoch": 3.46, + "learning_rate": 1.4682008572150469e-06, + "loss": 0.1358, + "step": 92820 + }, + { + "epoch": 3.46, + "learning_rate": 1.4679635492147872e-06, + "loss": 0.0986, + "step": 92850 + }, + { + "epoch": 3.46, + "learning_rate": 1.467726356246926e-06, + "loss": 0.1167, + "step": 92880 + }, + { + "epoch": 3.47, + "learning_rate": 1.4674892782185584e-06, + "loss": 0.1173, + "step": 92910 + }, + { + "epoch": 3.47, + "learning_rate": 1.467252315036885e-06, + "loss": 0.1071, + "step": 92940 + }, + { + "epoch": 3.47, + "learning_rate": 1.4670154666092112e-06, + "loss": 0.1257, + "step": 92970 + }, + { + "epoch": 3.47, + "learning_rate": 1.4667787328429475e-06, + "loss": 0.1203, + "step": 93000 + }, + { + "epoch": 3.47, + "learning_rate": 1.4665421136456081e-06, + "loss": 0.1233, + "step": 93030 + }, + { + "epoch": 3.47, + "learning_rate": 1.4663056089248127e-06, + "loss": 0.1099, + "step": 93060 + }, + { + "epoch": 3.47, + "learning_rate": 1.4660692185882838e-06, + "loss": 0.1145, + "step": 93090 + }, + { + "epoch": 3.47, + "learning_rate": 1.46583294254385e-06, + "loss": 0.1229, + "step": 93120 + }, + { + "epoch": 3.47, + "learning_rate": 1.4655967806994417e-06, + "loss": 0.1132, + "step": 93150 + }, + { + "epoch": 3.48, + "learning_rate": 1.4653607329630951e-06, + "loss": 0.1076, + "step": 93180 + }, + { + "epoch": 3.48, + "learning_rate": 1.465124799242948e-06, + "loss": 0.1297, + "step": 93210 + }, + { + "epoch": 3.48, + "learning_rate": 1.4648889794472434e-06, + "loss": 0.1107, + "step": 93240 + }, + { + "epoch": 3.48, + "learning_rate": 1.4646532734843268e-06, + "loss": 0.1041, + "step": 93270 + }, + { + "epoch": 3.48, + "learning_rate": 1.4644176812626475e-06, + "loss": 0.1077, + "step": 93300 + }, + { + "epoch": 3.48, + "learning_rate": 1.464182202690757e-06, + "loss": 0.1062, + "step": 93330 + }, + { + "epoch": 3.48, + "learning_rate": 1.4639468376773105e-06, + "loss": 0.1202, + "step": 93360 + }, + { + "epoch": 3.48, + "learning_rate": 1.4637115861310652e-06, + "loss": 0.1397, + "step": 93390 + }, + { + "epoch": 3.48, + "learning_rate": 1.4634764479608819e-06, + "loss": 0.1304, + "step": 93420 + }, + { + "epoch": 3.49, + "learning_rate": 1.4632414230757227e-06, + "loss": 0.1259, + "step": 93450 + }, + { + "epoch": 3.49, + "learning_rate": 1.463006511384653e-06, + "loss": 0.1204, + "step": 93480 + }, + { + "epoch": 3.49, + "learning_rate": 1.4627717127968393e-06, + "loss": 0.1246, + "step": 93510 + }, + { + "epoch": 3.49, + "learning_rate": 1.4625370272215516e-06, + "loss": 0.1108, + "step": 93540 + }, + { + "epoch": 3.49, + "learning_rate": 1.4623024545681609e-06, + "loss": 0.1497, + "step": 93570 + }, + { + "epoch": 3.49, + "learning_rate": 1.462067994746139e-06, + "loss": 0.1127, + "step": 93600 + }, + { + "epoch": 3.49, + "learning_rate": 1.4618414574190027e-06, + "loss": 0.1123, + "step": 93630 + }, + { + "epoch": 3.49, + "learning_rate": 1.4616072192349775e-06, + "loss": 0.1436, + "step": 93660 + }, + { + "epoch": 3.49, + "learning_rate": 1.4613730936143547e-06, + "loss": 0.135, + "step": 93690 + }, + { + "epoch": 3.5, + "learning_rate": 1.4611390804670088e-06, + "loss": 0.1123, + "step": 93720 + }, + { + "epoch": 3.5, + "learning_rate": 1.460905179702916e-06, + "loss": 0.1224, + "step": 93750 + }, + { + "epoch": 3.5, + "learning_rate": 1.4606713912321522e-06, + "loss": 0.1134, + "step": 93780 + }, + { + "epoch": 3.5, + "learning_rate": 1.460437714964895e-06, + "loss": 0.1188, + "step": 93810 + }, + { + "epoch": 3.5, + "learning_rate": 1.4602041508114229e-06, + "loss": 0.0994, + "step": 93840 + }, + { + "epoch": 3.5, + "learning_rate": 1.4599706986821135e-06, + "loss": 0.1052, + "step": 93870 + }, + { + "epoch": 3.5, + "learning_rate": 1.4597373584874452e-06, + "loss": 0.1548, + "step": 93900 + }, + { + "epoch": 3.5, + "learning_rate": 1.459504130137997e-06, + "loss": 0.103, + "step": 93930 + }, + { + "epoch": 3.5, + "learning_rate": 1.4592710135444477e-06, + "loss": 0.0989, + "step": 93960 + }, + { + "epoch": 3.51, + "learning_rate": 1.4590380086175755e-06, + "loss": 0.1272, + "step": 93990 + }, + { + "epoch": 3.51, + "learning_rate": 1.4588051152682587e-06, + "loss": 0.1303, + "step": 94020 + }, + { + "epoch": 3.51, + "learning_rate": 1.4585723334074747e-06, + "loss": 0.1212, + "step": 94050 + }, + { + "epoch": 3.51, + "learning_rate": 1.4583396629463011e-06, + "loss": 0.1169, + "step": 94080 + }, + { + "epoch": 3.51, + "learning_rate": 1.4581071037959144e-06, + "loss": 0.1174, + "step": 94110 + }, + { + "epoch": 3.51, + "learning_rate": 1.4578746558675893e-06, + "loss": 0.1132, + "step": 94140 + }, + { + "epoch": 3.51, + "learning_rate": 1.457642319072701e-06, + "loss": 0.1436, + "step": 94170 + }, + { + "epoch": 3.51, + "learning_rate": 1.4574100933227226e-06, + "loss": 0.1252, + "step": 94200 + }, + { + "epoch": 3.51, + "learning_rate": 1.4571779785292261e-06, + "loss": 0.114, + "step": 94230 + }, + { + "epoch": 3.52, + "learning_rate": 1.4569459746038823e-06, + "loss": 0.1098, + "step": 94260 + }, + { + "epoch": 3.52, + "learning_rate": 1.4567140814584597e-06, + "loss": 0.1197, + "step": 94290 + }, + { + "epoch": 3.52, + "learning_rate": 1.4564822990048258e-06, + "loss": 0.1026, + "step": 94320 + }, + { + "epoch": 3.52, + "learning_rate": 1.4562506271549461e-06, + "loss": 0.1173, + "step": 94350 + }, + { + "epoch": 3.52, + "learning_rate": 1.456019065820884e-06, + "loss": 0.1308, + "step": 94380 + }, + { + "epoch": 3.52, + "learning_rate": 1.4557876149148007e-06, + "loss": 0.1223, + "step": 94410 + }, + { + "epoch": 3.52, + "learning_rate": 1.4555562743489552e-06, + "loss": 0.112, + "step": 94440 + }, + { + "epoch": 3.52, + "learning_rate": 1.4553250440357034e-06, + "loss": 0.1283, + "step": 94470 + }, + { + "epoch": 3.52, + "learning_rate": 1.4550939238875e-06, + "loss": 0.1201, + "step": 94500 + }, + { + "epoch": 3.53, + "learning_rate": 1.454862913816896e-06, + "loss": 0.1092, + "step": 94530 + }, + { + "epoch": 3.53, + "learning_rate": 1.4546320137365396e-06, + "loss": 0.1174, + "step": 94560 + }, + { + "epoch": 3.53, + "learning_rate": 1.454401223559176e-06, + "loss": 0.1355, + "step": 94590 + }, + { + "epoch": 3.53, + "learning_rate": 1.454170543197648e-06, + "loss": 0.1051, + "step": 94620 + }, + { + "epoch": 3.53, + "learning_rate": 1.4539399725648944e-06, + "loss": 0.1139, + "step": 94650 + }, + { + "epoch": 3.53, + "learning_rate": 1.453709511573951e-06, + "loss": 0.1283, + "step": 94680 + }, + { + "epoch": 3.53, + "learning_rate": 1.4534791601379494e-06, + "loss": 0.1038, + "step": 94710 + }, + { + "epoch": 3.53, + "learning_rate": 1.4532489181701182e-06, + "loss": 0.1181, + "step": 94740 + }, + { + "epoch": 3.53, + "learning_rate": 1.453018785583782e-06, + "loss": 0.1246, + "step": 94770 + }, + { + "epoch": 3.54, + "learning_rate": 1.4527887622923615e-06, + "loss": 0.0963, + "step": 94800 + }, + { + "epoch": 3.54, + "learning_rate": 1.4525588482093733e-06, + "loss": 0.14, + "step": 94830 + }, + { + "epoch": 3.54, + "learning_rate": 1.4523290432484293e-06, + "loss": 0.1527, + "step": 94860 + }, + { + "epoch": 3.54, + "learning_rate": 1.452099347323238e-06, + "loss": 0.1099, + "step": 94890 + }, + { + "epoch": 3.54, + "learning_rate": 1.4518697603476026e-06, + "loss": 0.1439, + "step": 94920 + }, + { + "epoch": 3.54, + "learning_rate": 1.4516402822354216e-06, + "loss": 0.1077, + "step": 94950 + }, + { + "epoch": 3.54, + "learning_rate": 1.4514109129006895e-06, + "loss": 0.1217, + "step": 94980 + }, + { + "epoch": 3.54, + "learning_rate": 1.4511816522574956e-06, + "loss": 0.1292, + "step": 95010 + }, + { + "epoch": 3.54, + "learning_rate": 1.4509525002200234e-06, + "loss": 0.1279, + "step": 95040 + }, + { + "epoch": 3.55, + "learning_rate": 1.4507234567025525e-06, + "loss": 0.1096, + "step": 95070 + }, + { + "epoch": 3.55, + "learning_rate": 1.4504945216194558e-06, + "loss": 0.1203, + "step": 95100 + }, + { + "epoch": 3.55, + "learning_rate": 1.4502656948852023e-06, + "loss": 0.1156, + "step": 95130 + }, + { + "epoch": 3.55, + "learning_rate": 1.4500369764143537e-06, + "loss": 0.1166, + "step": 95160 + }, + { + "epoch": 3.55, + "learning_rate": 1.449808366121568e-06, + "loss": 0.13, + "step": 95190 + }, + { + "epoch": 3.55, + "learning_rate": 1.4495798639215955e-06, + "loss": 0.1074, + "step": 95220 + }, + { + "epoch": 3.55, + "learning_rate": 1.4493514697292815e-06, + "loss": 0.1208, + "step": 95250 + }, + { + "epoch": 3.55, + "learning_rate": 1.4491231834595649e-06, + "loss": 0.1303, + "step": 95280 + }, + { + "epoch": 3.55, + "learning_rate": 1.4488950050274784e-06, + "loss": 0.0958, + "step": 95310 + }, + { + "epoch": 3.56, + "learning_rate": 1.4486669343481484e-06, + "loss": 0.1031, + "step": 95340 + }, + { + "epoch": 3.56, + "learning_rate": 1.448438971336795e-06, + "loss": 0.121, + "step": 95370 + }, + { + "epoch": 3.56, + "learning_rate": 1.4482111159087308e-06, + "loss": 0.1065, + "step": 95400 + }, + { + "epoch": 3.56, + "learning_rate": 1.4479833679793626e-06, + "loss": 0.1094, + "step": 95430 + }, + { + "epoch": 3.56, + "learning_rate": 1.44775572746419e-06, + "loss": 0.1047, + "step": 95460 + }, + { + "epoch": 3.56, + "learning_rate": 1.4475281942788052e-06, + "loss": 0.1358, + "step": 95490 + }, + { + "epoch": 3.56, + "learning_rate": 1.447300768338894e-06, + "loss": 0.0914, + "step": 95520 + }, + { + "epoch": 3.56, + "learning_rate": 1.447073449560234e-06, + "loss": 0.119, + "step": 95550 + }, + { + "epoch": 3.56, + "learning_rate": 1.446846237858696e-06, + "loss": 0.1437, + "step": 95580 + }, + { + "epoch": 3.57, + "learning_rate": 1.4466191331502428e-06, + "loss": 0.1209, + "step": 95610 + }, + { + "epoch": 3.57, + "learning_rate": 1.4463921353509293e-06, + "loss": 0.1104, + "step": 95640 + }, + { + "epoch": 3.57, + "learning_rate": 1.4461652443769042e-06, + "loss": 0.1094, + "step": 95670 + }, + { + "epoch": 3.57, + "learning_rate": 1.445938460144406e-06, + "loss": 0.1296, + "step": 95700 + }, + { + "epoch": 3.57, + "learning_rate": 1.4457117825697664e-06, + "loss": 0.1246, + "step": 95730 + }, + { + "epoch": 3.57, + "learning_rate": 1.445485211569409e-06, + "loss": 0.1162, + "step": 95760 + }, + { + "epoch": 3.57, + "learning_rate": 1.4452587470598483e-06, + "loss": 0.1259, + "step": 95790 + }, + { + "epoch": 3.57, + "learning_rate": 1.4450323889576911e-06, + "loss": 0.1168, + "step": 95820 + }, + { + "epoch": 3.57, + "learning_rate": 1.4448061371796352e-06, + "loss": 0.1107, + "step": 95850 + }, + { + "epoch": 3.58, + "learning_rate": 1.4445799916424695e-06, + "loss": 0.1137, + "step": 95880 + }, + { + "epoch": 3.58, + "learning_rate": 1.444353952263074e-06, + "loss": 0.1077, + "step": 95910 + }, + { + "epoch": 3.58, + "learning_rate": 1.4441280189584204e-06, + "loss": 0.1215, + "step": 95940 + }, + { + "epoch": 3.58, + "learning_rate": 1.4439021916455707e-06, + "loss": 0.1089, + "step": 95970 + }, + { + "epoch": 3.58, + "learning_rate": 1.443676470241678e-06, + "loss": 0.0999, + "step": 96000 + }, + { + "epoch": 3.58, + "learning_rate": 1.4434508546639858e-06, + "loss": 0.1198, + "step": 96030 + }, + { + "epoch": 3.58, + "learning_rate": 1.4432253448298278e-06, + "loss": 0.1172, + "step": 96060 + }, + { + "epoch": 3.58, + "learning_rate": 1.4429999406566286e-06, + "loss": 0.1174, + "step": 96090 + }, + { + "epoch": 3.58, + "learning_rate": 1.4427746420619032e-06, + "loss": 0.1225, + "step": 96120 + }, + { + "epoch": 3.59, + "learning_rate": 1.442549448963256e-06, + "loss": 0.1261, + "step": 96150 + }, + { + "epoch": 3.59, + "learning_rate": 1.442324361278382e-06, + "loss": 0.1238, + "step": 96180 + }, + { + "epoch": 3.59, + "learning_rate": 1.4420993789250654e-06, + "loss": 0.1185, + "step": 96210 + }, + { + "epoch": 3.59, + "learning_rate": 1.4418745018211811e-06, + "loss": 0.1178, + "step": 96240 + }, + { + "epoch": 3.59, + "learning_rate": 1.4416497298846933e-06, + "loss": 0.1304, + "step": 96270 + }, + { + "epoch": 3.59, + "learning_rate": 1.4414250630336546e-06, + "loss": 0.1275, + "step": 96300 + }, + { + "epoch": 3.59, + "learning_rate": 1.4412005011862088e-06, + "loss": 0.1186, + "step": 96330 + }, + { + "epoch": 3.59, + "learning_rate": 1.4409760442605878e-06, + "loss": 0.1224, + "step": 96360 + }, + { + "epoch": 3.6, + "learning_rate": 1.4407516921751121e-06, + "loss": 0.1124, + "step": 96390 + }, + { + "epoch": 3.6, + "learning_rate": 1.440527444848193e-06, + "loss": 0.1042, + "step": 96420 + }, + { + "epoch": 3.6, + "learning_rate": 1.440303302198329e-06, + "loss": 0.1279, + "step": 96450 + }, + { + "epoch": 3.6, + "learning_rate": 1.4400792641441078e-06, + "loss": 0.1308, + "step": 96480 + }, + { + "epoch": 3.6, + "learning_rate": 1.4398553306042063e-06, + "loss": 0.1229, + "step": 96510 + }, + { + "epoch": 3.6, + "learning_rate": 1.4396315014973892e-06, + "loss": 0.1241, + "step": 96540 + }, + { + "epoch": 3.6, + "learning_rate": 1.4394077767425097e-06, + "loss": 0.1101, + "step": 96570 + }, + { + "epoch": 3.6, + "learning_rate": 1.4391841562585096e-06, + "loss": 0.1157, + "step": 96600 + }, + { + "epoch": 3.6, + "learning_rate": 1.4389606399644181e-06, + "loss": 0.1147, + "step": 96630 + }, + { + "epoch": 3.61, + "learning_rate": 1.4387372277793537e-06, + "loss": 0.1013, + "step": 96660 + }, + { + "epoch": 3.61, + "learning_rate": 1.4385139196225214e-06, + "loss": 0.126, + "step": 96690 + }, + { + "epoch": 3.61, + "learning_rate": 1.4382907154132144e-06, + "loss": 0.1176, + "step": 96720 + }, + { + "epoch": 3.61, + "learning_rate": 1.4380676150708138e-06, + "loss": 0.1106, + "step": 96750 + }, + { + "epoch": 3.61, + "learning_rate": 1.4378446185147884e-06, + "loss": 0.13, + "step": 96780 + }, + { + "epoch": 3.61, + "learning_rate": 1.4376217256646936e-06, + "loss": 0.1368, + "step": 96810 + }, + { + "epoch": 3.61, + "learning_rate": 1.4373989364401727e-06, + "loss": 0.1175, + "step": 96840 + }, + { + "epoch": 3.61, + "learning_rate": 1.4371762507609556e-06, + "loss": 0.1321, + "step": 96870 + }, + { + "epoch": 3.61, + "learning_rate": 1.43695366854686e-06, + "loss": 0.1229, + "step": 96900 + }, + { + "epoch": 3.62, + "learning_rate": 1.43673118971779e-06, + "loss": 0.1182, + "step": 96930 + }, + { + "epoch": 3.62, + "learning_rate": 1.436508814193736e-06, + "loss": 0.1125, + "step": 96960 + }, + { + "epoch": 3.62, + "learning_rate": 1.4362865418947767e-06, + "loss": 0.1194, + "step": 96990 + }, + { + "epoch": 3.62, + "learning_rate": 1.4360643727410753e-06, + "loss": 0.1089, + "step": 97020 + }, + { + "epoch": 3.62, + "learning_rate": 1.4358423066528826e-06, + "loss": 0.1206, + "step": 97050 + }, + { + "epoch": 3.62, + "learning_rate": 1.435620343550536e-06, + "loss": 0.1121, + "step": 97080 + }, + { + "epoch": 3.62, + "learning_rate": 1.4353984833544576e-06, + "loss": 0.1156, + "step": 97110 + }, + { + "epoch": 3.62, + "learning_rate": 1.4351767259851573e-06, + "loss": 0.1002, + "step": 97140 + }, + { + "epoch": 3.62, + "learning_rate": 1.4349550713632295e-06, + "loss": 0.1131, + "step": 97170 + }, + { + "epoch": 3.63, + "learning_rate": 1.434733519409356e-06, + "loss": 0.1187, + "step": 97200 + }, + { + "epoch": 3.63, + "learning_rate": 1.434512070044302e-06, + "loss": 0.1085, + "step": 97230 + }, + { + "epoch": 3.63, + "learning_rate": 1.4342907231889208e-06, + "loss": 0.1583, + "step": 97260 + }, + { + "epoch": 3.63, + "learning_rate": 1.4340694787641495e-06, + "loss": 0.1241, + "step": 97290 + }, + { + "epoch": 3.63, + "learning_rate": 1.4338483366910113e-06, + "loss": 0.151, + "step": 97320 + }, + { + "epoch": 3.63, + "learning_rate": 1.4336272968906137e-06, + "loss": 0.1152, + "step": 97350 + }, + { + "epoch": 3.63, + "learning_rate": 1.433406359284151e-06, + "loss": 0.1052, + "step": 97380 + }, + { + "epoch": 3.63, + "learning_rate": 1.4331855237929008e-06, + "loss": 0.172, + "step": 97410 + }, + { + "epoch": 3.63, + "learning_rate": 1.432964790338226e-06, + "loss": 0.1167, + "step": 97440 + }, + { + "epoch": 3.64, + "learning_rate": 1.432744158841575e-06, + "loss": 0.1048, + "step": 97470 + }, + { + "epoch": 3.64, + "learning_rate": 1.4325236292244808e-06, + "loss": 0.1389, + "step": 97500 + }, + { + "epoch": 3.64, + "learning_rate": 1.4323032014085592e-06, + "loss": 0.1215, + "step": 97530 + }, + { + "epoch": 3.64, + "learning_rate": 1.4320828753155122e-06, + "loss": 0.131, + "step": 97560 + }, + { + "epoch": 3.64, + "learning_rate": 1.4318626508671255e-06, + "loss": 0.1223, + "step": 97590 + }, + { + "epoch": 3.64, + "learning_rate": 1.4316425279852693e-06, + "loss": 0.1169, + "step": 97620 + }, + { + "epoch": 3.64, + "learning_rate": 1.4314225065918974e-06, + "loss": 0.131, + "step": 97650 + }, + { + "epoch": 3.64, + "learning_rate": 1.4312025866090474e-06, + "loss": 0.1271, + "step": 97680 + }, + { + "epoch": 3.64, + "learning_rate": 1.430990093615422e-06, + "loss": 0.1121, + "step": 97710 + }, + { + "epoch": 3.65, + "learning_rate": 1.4307703728461559e-06, + "loss": 0.1135, + "step": 97740 + }, + { + "epoch": 3.65, + "learning_rate": 1.430550753256617e-06, + "loss": 0.1015, + "step": 97770 + }, + { + "epoch": 3.65, + "learning_rate": 1.4303312347691753e-06, + "loss": 0.1113, + "step": 97800 + }, + { + "epoch": 3.65, + "learning_rate": 1.4301118173062832e-06, + "loss": 0.1095, + "step": 97830 + }, + { + "epoch": 3.65, + "learning_rate": 1.4298925007904774e-06, + "loss": 0.1054, + "step": 97860 + }, + { + "epoch": 3.65, + "learning_rate": 1.4296732851443772e-06, + "loss": 0.108, + "step": 97890 + }, + { + "epoch": 3.65, + "learning_rate": 1.4294614724960801e-06, + "loss": 0.115, + "step": 97920 + }, + { + "epoch": 3.65, + "learning_rate": 1.4292424550016517e-06, + "loss": 0.1127, + "step": 97950 + }, + { + "epoch": 3.65, + "learning_rate": 1.4290235381478545e-06, + "loss": 0.0992, + "step": 97980 + }, + { + "epoch": 3.66, + "learning_rate": 1.4288047218576368e-06, + "loss": 0.1295, + "step": 98010 + }, + { + "epoch": 3.66, + "learning_rate": 1.428586006054029e-06, + "loss": 0.1439, + "step": 98040 + }, + { + "epoch": 3.66, + "learning_rate": 1.4283673906601443e-06, + "loss": 0.0981, + "step": 98070 + }, + { + "epoch": 3.66, + "learning_rate": 1.4281488755991779e-06, + "loss": 0.1126, + "step": 98100 + }, + { + "epoch": 3.66, + "learning_rate": 1.4279304607944077e-06, + "loss": 0.1362, + "step": 98130 + }, + { + "epoch": 3.66, + "learning_rate": 1.4277121461691937e-06, + "loss": 0.1153, + "step": 98160 + }, + { + "epoch": 3.66, + "learning_rate": 1.4274939316469769e-06, + "loss": 0.1056, + "step": 98190 + }, + { + "epoch": 3.66, + "learning_rate": 1.427275817151281e-06, + "loss": 0.1356, + "step": 98220 + }, + { + "epoch": 3.66, + "learning_rate": 1.4270578026057119e-06, + "loss": 0.1119, + "step": 98250 + }, + { + "epoch": 3.67, + "learning_rate": 1.4268398879339565e-06, + "loss": 0.0941, + "step": 98280 + }, + { + "epoch": 3.67, + "learning_rate": 1.426622073059783e-06, + "loss": 0.1, + "step": 98310 + }, + { + "epoch": 3.67, + "learning_rate": 1.4264043579070419e-06, + "loss": 0.1155, + "step": 98340 + }, + { + "epoch": 3.67, + "learning_rate": 1.4261867423996644e-06, + "loss": 0.1066, + "step": 98370 + }, + { + "epoch": 3.67, + "learning_rate": 1.4259692264616632e-06, + "loss": 0.1106, + "step": 98400 + }, + { + "epoch": 3.67, + "learning_rate": 1.4257518100171313e-06, + "loss": 0.1162, + "step": 98430 + }, + { + "epoch": 3.67, + "learning_rate": 1.4255344929902443e-06, + "loss": 0.1214, + "step": 98460 + }, + { + "epoch": 3.67, + "learning_rate": 1.4253172753052571e-06, + "loss": 0.1344, + "step": 98490 + }, + { + "epoch": 3.67, + "learning_rate": 1.425100156886506e-06, + "loss": 0.1302, + "step": 98520 + }, + { + "epoch": 3.68, + "learning_rate": 1.4248831376584085e-06, + "loss": 0.1253, + "step": 98550 + }, + { + "epoch": 3.68, + "learning_rate": 1.4246662175454615e-06, + "loss": 0.11, + "step": 98580 + }, + { + "epoch": 3.68, + "learning_rate": 1.424449396472243e-06, + "loss": 0.1244, + "step": 98610 + }, + { + "epoch": 3.68, + "learning_rate": 1.4242326743634117e-06, + "loss": 0.117, + "step": 98640 + }, + { + "epoch": 3.68, + "learning_rate": 1.4240160511437056e-06, + "loss": 0.1077, + "step": 98670 + }, + { + "epoch": 3.68, + "learning_rate": 1.4237995267379438e-06, + "loss": 0.109, + "step": 98700 + }, + { + "epoch": 3.68, + "learning_rate": 1.4235831010710242e-06, + "loss": 0.0924, + "step": 98730 + }, + { + "epoch": 3.68, + "learning_rate": 1.4233667740679259e-06, + "loss": 0.1209, + "step": 98760 + }, + { + "epoch": 3.68, + "learning_rate": 1.4231505456537064e-06, + "loss": 0.1183, + "step": 98790 + }, + { + "epoch": 3.69, + "learning_rate": 1.4229344157535044e-06, + "loss": 0.1203, + "step": 98820 + }, + { + "epoch": 3.69, + "learning_rate": 1.4227183842925372e-06, + "loss": 0.1105, + "step": 98850 + }, + { + "epoch": 3.69, + "learning_rate": 1.4225024511961013e-06, + "loss": 0.1109, + "step": 98880 + }, + { + "epoch": 3.69, + "learning_rate": 1.4222866163895734e-06, + "loss": 0.1084, + "step": 98910 + }, + { + "epoch": 3.69, + "learning_rate": 1.4220708797984086e-06, + "loss": 0.1072, + "step": 98940 + }, + { + "epoch": 3.69, + "learning_rate": 1.421855241348142e-06, + "loss": 0.1183, + "step": 98970 + }, + { + "epoch": 3.69, + "learning_rate": 1.4216397009643871e-06, + "loss": 0.0961, + "step": 99000 + }, + { + "epoch": 3.69, + "learning_rate": 1.421424258572836e-06, + "loss": 0.1152, + "step": 99030 + }, + { + "epoch": 3.69, + "learning_rate": 1.4212089140992608e-06, + "loss": 0.1305, + "step": 99060 + }, + { + "epoch": 3.7, + "learning_rate": 1.4209936674695108e-06, + "loss": 0.1238, + "step": 99090 + }, + { + "epoch": 3.7, + "learning_rate": 1.420778518609515e-06, + "loss": 0.1217, + "step": 99120 + }, + { + "epoch": 3.7, + "learning_rate": 1.4205634674452803e-06, + "loss": 0.1249, + "step": 99150 + }, + { + "epoch": 3.7, + "learning_rate": 1.4203485139028927e-06, + "loss": 0.14, + "step": 99180 + }, + { + "epoch": 3.7, + "learning_rate": 1.4201336579085157e-06, + "loss": 0.0928, + "step": 99210 + }, + { + "epoch": 3.7, + "learning_rate": 1.419918899388391e-06, + "loss": 0.1117, + "step": 99240 + }, + { + "epoch": 3.7, + "learning_rate": 1.419704238268839e-06, + "loss": 0.1062, + "step": 99270 + }, + { + "epoch": 3.7, + "learning_rate": 1.4194896744762571e-06, + "loss": 0.1059, + "step": 99300 + }, + { + "epoch": 3.7, + "learning_rate": 1.4192752079371213e-06, + "loss": 0.1109, + "step": 99330 + }, + { + "epoch": 3.71, + "learning_rate": 1.4190608385779853e-06, + "loss": 0.1194, + "step": 99360 + }, + { + "epoch": 3.71, + "learning_rate": 1.41884656632548e-06, + "loss": 0.1202, + "step": 99390 + }, + { + "epoch": 3.71, + "learning_rate": 1.4186323911063138e-06, + "loss": 0.1218, + "step": 99420 + }, + { + "epoch": 3.71, + "learning_rate": 1.4184183128472735e-06, + "loss": 0.1167, + "step": 99450 + }, + { + "epoch": 3.71, + "learning_rate": 1.4182043314752219e-06, + "loss": 0.1251, + "step": 99480 + }, + { + "epoch": 3.71, + "learning_rate": 1.4179904469170998e-06, + "loss": 0.1127, + "step": 99510 + }, + { + "epoch": 3.71, + "learning_rate": 1.4177766590999248e-06, + "loss": 0.1076, + "step": 99540 + }, + { + "epoch": 3.71, + "learning_rate": 1.4175629679507922e-06, + "loss": 0.1442, + "step": 99570 + }, + { + "epoch": 3.71, + "learning_rate": 1.4173493733968724e-06, + "loss": 0.1303, + "step": 99600 + }, + { + "epoch": 3.72, + "learning_rate": 1.4171358753654148e-06, + "loss": 0.1263, + "step": 99630 + }, + { + "epoch": 3.72, + "learning_rate": 1.4169224737837442e-06, + "loss": 0.1221, + "step": 99660 + }, + { + "epoch": 3.72, + "learning_rate": 1.4167091685792625e-06, + "loss": 0.1102, + "step": 99690 + }, + { + "epoch": 3.72, + "learning_rate": 1.4164959596794472e-06, + "loss": 0.1012, + "step": 99720 + }, + { + "epoch": 3.72, + "learning_rate": 1.4162828470118536e-06, + "loss": 0.1182, + "step": 99750 + }, + { + "epoch": 3.72, + "learning_rate": 1.4160698305041118e-06, + "loss": 0.1022, + "step": 99780 + }, + { + "epoch": 3.72, + "learning_rate": 1.4158569100839291e-06, + "loss": 0.1242, + "step": 99810 + }, + { + "epoch": 3.72, + "learning_rate": 1.4156440856790887e-06, + "loss": 0.1134, + "step": 99840 + }, + { + "epoch": 3.72, + "learning_rate": 1.4154313572174494e-06, + "loss": 0.1051, + "step": 99870 + }, + { + "epoch": 3.73, + "learning_rate": 1.415218724626946e-06, + "loss": 0.0987, + "step": 99900 + }, + { + "epoch": 3.73, + "learning_rate": 1.4150061878355892e-06, + "loss": 0.1139, + "step": 99930 + }, + { + "epoch": 3.73, + "learning_rate": 1.4147937467714653e-06, + "loss": 0.1376, + "step": 99960 + }, + { + "epoch": 3.73, + "learning_rate": 1.4145814013627363e-06, + "loss": 0.1143, + "step": 99990 + }, + { + "epoch": 3.73, + "learning_rate": 1.4143691515376389e-06, + "loss": 0.1214, + "step": 100020 + }, + { + "epoch": 3.73, + "learning_rate": 1.4141569972244865e-06, + "loss": 0.1126, + "step": 100050 + }, + { + "epoch": 3.73, + "learning_rate": 1.4139449383516666e-06, + "loss": 0.1227, + "step": 100080 + }, + { + "epoch": 3.73, + "learning_rate": 1.4137329748476418e-06, + "loss": 0.1061, + "step": 100110 + }, + { + "epoch": 3.73, + "learning_rate": 1.4135211066409513e-06, + "loss": 0.101, + "step": 100140 + }, + { + "epoch": 3.74, + "learning_rate": 1.4133093336602069e-06, + "loss": 0.1301, + "step": 100170 + }, + { + "epoch": 3.74, + "learning_rate": 1.4130976558340972e-06, + "loss": 0.1159, + "step": 100200 + }, + { + "epoch": 3.74, + "learning_rate": 1.4128860730913845e-06, + "loss": 0.0994, + "step": 100230 + }, + { + "epoch": 3.74, + "learning_rate": 1.4126745853609064e-06, + "loss": 0.1119, + "step": 100260 + }, + { + "epoch": 3.74, + "learning_rate": 1.4124631925715744e-06, + "loss": 0.1187, + "step": 100290 + }, + { + "epoch": 3.74, + "learning_rate": 1.4122518946523744e-06, + "loss": 0.1317, + "step": 100320 + }, + { + "epoch": 3.74, + "learning_rate": 1.4120406915323675e-06, + "loss": 0.1247, + "step": 100350 + }, + { + "epoch": 3.74, + "learning_rate": 1.4118295831406886e-06, + "loss": 0.1153, + "step": 100380 + }, + { + "epoch": 3.74, + "learning_rate": 1.4116185694065465e-06, + "loss": 0.0972, + "step": 100410 + }, + { + "epoch": 3.75, + "learning_rate": 1.4114076502592238e-06, + "loss": 0.1053, + "step": 100440 + }, + { + "epoch": 3.75, + "learning_rate": 1.411196825628078e-06, + "loss": 0.1179, + "step": 100470 + }, + { + "epoch": 3.75, + "learning_rate": 1.4109860954425395e-06, + "loss": 0.1218, + "step": 100500 + }, + { + "epoch": 3.75, + "learning_rate": 1.410775459632113e-06, + "loss": 0.1105, + "step": 100530 + }, + { + "epoch": 3.75, + "learning_rate": 1.4105649181263764e-06, + "loss": 0.122, + "step": 100560 + }, + { + "epoch": 3.75, + "learning_rate": 1.4103544708549818e-06, + "loss": 0.1452, + "step": 100590 + }, + { + "epoch": 3.75, + "learning_rate": 1.4101441177476543e-06, + "loss": 0.0935, + "step": 100620 + }, + { + "epoch": 3.75, + "learning_rate": 1.4099338587341923e-06, + "loss": 0.1175, + "step": 100650 + }, + { + "epoch": 3.76, + "learning_rate": 1.4097236937444675e-06, + "loss": 0.1094, + "step": 100680 + }, + { + "epoch": 3.76, + "learning_rate": 1.409513622708425e-06, + "loss": 0.1027, + "step": 100710 + }, + { + "epoch": 3.76, + "learning_rate": 1.4093036455560824e-06, + "loss": 0.1261, + "step": 100740 + }, + { + "epoch": 3.76, + "learning_rate": 1.4090937622175312e-06, + "loss": 0.1162, + "step": 100770 + }, + { + "epoch": 3.76, + "learning_rate": 1.4088839726229348e-06, + "loss": 0.117, + "step": 100800 + }, + { + "epoch": 3.76, + "learning_rate": 1.4086742767025294e-06, + "loss": 0.1106, + "step": 100830 + }, + { + "epoch": 3.76, + "learning_rate": 1.408464674386625e-06, + "loss": 0.1228, + "step": 100860 + }, + { + "epoch": 3.76, + "learning_rate": 1.4082551656056025e-06, + "loss": 0.1195, + "step": 100890 + }, + { + "epoch": 3.76, + "learning_rate": 1.408045750289917e-06, + "loss": 0.1171, + "step": 100920 + }, + { + "epoch": 3.77, + "learning_rate": 1.4078364283700944e-06, + "loss": 0.1187, + "step": 100950 + }, + { + "epoch": 3.77, + "learning_rate": 1.4076271997767338e-06, + "loss": 0.0992, + "step": 100980 + }, + { + "epoch": 3.77, + "learning_rate": 1.4074180644405066e-06, + "loss": 0.114, + "step": 101010 + }, + { + "epoch": 3.77, + "learning_rate": 1.4072090222921556e-06, + "loss": 0.1496, + "step": 101040 + }, + { + "epoch": 3.77, + "learning_rate": 1.407000073262496e-06, + "loss": 0.106, + "step": 101070 + }, + { + "epoch": 3.77, + "learning_rate": 1.4067912172824144e-06, + "loss": 0.125, + "step": 101100 + }, + { + "epoch": 3.77, + "learning_rate": 1.4065824542828704e-06, + "loss": 0.1184, + "step": 101130 + }, + { + "epoch": 3.77, + "learning_rate": 1.406373784194894e-06, + "loss": 0.101, + "step": 101160 + }, + { + "epoch": 3.77, + "learning_rate": 1.4061652069495874e-06, + "loss": 0.1235, + "step": 101190 + }, + { + "epoch": 3.78, + "learning_rate": 1.4059567224781244e-06, + "loss": 0.1172, + "step": 101220 + }, + { + "epoch": 3.78, + "learning_rate": 1.4057483307117492e-06, + "loss": 0.1236, + "step": 101250 + }, + { + "epoch": 3.78, + "learning_rate": 1.4055469733943611e-06, + "loss": 0.1012, + "step": 101280 + }, + { + "epoch": 3.78, + "learning_rate": 1.405338763747695e-06, + "loss": 0.1181, + "step": 101310 + }, + { + "epoch": 3.78, + "learning_rate": 1.4051306466025635e-06, + "loss": 0.1481, + "step": 101340 + }, + { + "epoch": 3.78, + "learning_rate": 1.4049226218904939e-06, + "loss": 0.1125, + "step": 101370 + }, + { + "epoch": 3.78, + "learning_rate": 1.4047146895430846e-06, + "loss": 0.0995, + "step": 101400 + }, + { + "epoch": 3.78, + "learning_rate": 1.4045068494920044e-06, + "loss": 0.1279, + "step": 101430 + }, + { + "epoch": 3.78, + "learning_rate": 1.4042991016689937e-06, + "loss": 0.1084, + "step": 101460 + }, + { + "epoch": 3.79, + "learning_rate": 1.4040914460058624e-06, + "loss": 0.1215, + "step": 101490 + }, + { + "epoch": 3.79, + "learning_rate": 1.4038838824344921e-06, + "loss": 0.1227, + "step": 101520 + }, + { + "epoch": 3.79, + "learning_rate": 1.4036764108868342e-06, + "loss": 0.1338, + "step": 101550 + }, + { + "epoch": 3.79, + "learning_rate": 1.4034690312949106e-06, + "loss": 0.1117, + "step": 101580 + }, + { + "epoch": 3.79, + "learning_rate": 1.4032617435908135e-06, + "loss": 0.1473, + "step": 101610 + }, + { + "epoch": 3.79, + "learning_rate": 1.4030545477067056e-06, + "loss": 0.1147, + "step": 101640 + }, + { + "epoch": 3.79, + "learning_rate": 1.402847443574819e-06, + "loss": 0.0933, + "step": 101670 + }, + { + "epoch": 3.79, + "learning_rate": 1.4026404311274564e-06, + "loss": 0.101, + "step": 101700 + }, + { + "epoch": 3.79, + "learning_rate": 1.40243351029699e-06, + "loss": 0.111, + "step": 101730 + }, + { + "epoch": 3.8, + "learning_rate": 1.4022266810158624e-06, + "loss": 0.1315, + "step": 101760 + }, + { + "epoch": 3.8, + "learning_rate": 1.4020199432165854e-06, + "loss": 0.1191, + "step": 101790 + }, + { + "epoch": 3.8, + "learning_rate": 1.4018132968317404e-06, + "loss": 0.1174, + "step": 101820 + }, + { + "epoch": 3.8, + "learning_rate": 1.4016067417939789e-06, + "loss": 0.1159, + "step": 101850 + }, + { + "epoch": 3.8, + "learning_rate": 1.401400278036021e-06, + "loss": 0.1581, + "step": 101880 + }, + { + "epoch": 3.8, + "learning_rate": 1.401193905490657e-06, + "loss": 0.1141, + "step": 101910 + }, + { + "epoch": 3.8, + "learning_rate": 1.4009876240907458e-06, + "loss": 0.1043, + "step": 101940 + }, + { + "epoch": 3.8, + "learning_rate": 1.4007814337692157e-06, + "loss": 0.1014, + "step": 101970 + }, + { + "epoch": 3.8, + "learning_rate": 1.4005753344590648e-06, + "loss": 0.107, + "step": 102000 + }, + { + "epoch": 3.81, + "learning_rate": 1.4003693260933584e-06, + "loss": 0.1048, + "step": 102030 + }, + { + "epoch": 3.81, + "learning_rate": 1.400163408605233e-06, + "loss": 0.1149, + "step": 102060 + }, + { + "epoch": 3.81, + "learning_rate": 1.3999575819278919e-06, + "loss": 0.0958, + "step": 102090 + }, + { + "epoch": 3.81, + "learning_rate": 1.399751845994608e-06, + "loss": 0.1199, + "step": 102120 + }, + { + "epoch": 3.81, + "learning_rate": 1.399546200738723e-06, + "loss": 0.1058, + "step": 102150 + }, + { + "epoch": 3.81, + "learning_rate": 1.399340646093647e-06, + "loss": 0.1048, + "step": 102180 + }, + { + "epoch": 3.81, + "learning_rate": 1.399135181992858e-06, + "loss": 0.1233, + "step": 102210 + }, + { + "epoch": 3.81, + "learning_rate": 1.3989298083699029e-06, + "loss": 0.0979, + "step": 102240 + }, + { + "epoch": 3.81, + "learning_rate": 1.3987245251583972e-06, + "loss": 0.1196, + "step": 102270 + }, + { + "epoch": 3.82, + "learning_rate": 1.3985193322920238e-06, + "loss": 0.0984, + "step": 102300 + }, + { + "epoch": 3.82, + "learning_rate": 1.3983142297045335e-06, + "loss": 0.1046, + "step": 102330 + }, + { + "epoch": 3.82, + "learning_rate": 1.3981092173297464e-06, + "loss": 0.1106, + "step": 102360 + }, + { + "epoch": 3.82, + "learning_rate": 1.3979042951015493e-06, + "loss": 0.132, + "step": 102390 + }, + { + "epoch": 3.82, + "learning_rate": 1.397699462953897e-06, + "loss": 0.1121, + "step": 102420 + }, + { + "epoch": 3.82, + "learning_rate": 1.3974947208208125e-06, + "loss": 0.1055, + "step": 102450 + }, + { + "epoch": 3.82, + "learning_rate": 1.3972900686363861e-06, + "loss": 0.119, + "step": 102480 + }, + { + "epoch": 3.82, + "learning_rate": 1.3970855063347757e-06, + "loss": 0.1229, + "step": 102510 + }, + { + "epoch": 3.82, + "learning_rate": 1.3968810338502064e-06, + "loss": 0.1116, + "step": 102540 + }, + { + "epoch": 3.83, + "learning_rate": 1.3966766511169712e-06, + "loss": 0.1086, + "step": 102570 + }, + { + "epoch": 3.83, + "learning_rate": 1.39647235806943e-06, + "loss": 0.1059, + "step": 102600 + }, + { + "epoch": 3.83, + "learning_rate": 1.3962681546420103e-06, + "loss": 0.1363, + "step": 102630 + }, + { + "epoch": 3.83, + "learning_rate": 1.3960640407692055e-06, + "loss": 0.1313, + "step": 102660 + }, + { + "epoch": 3.83, + "learning_rate": 1.395860016385578e-06, + "loss": 0.1198, + "step": 102690 + }, + { + "epoch": 3.83, + "learning_rate": 1.3956560814257553e-06, + "loss": 0.1155, + "step": 102720 + }, + { + "epoch": 3.83, + "learning_rate": 1.3954522358244327e-06, + "loss": 0.0942, + "step": 102750 + }, + { + "epoch": 3.83, + "learning_rate": 1.3952484795163714e-06, + "loss": 0.0958, + "step": 102780 + }, + { + "epoch": 3.83, + "learning_rate": 1.395044812436401e-06, + "loss": 0.1206, + "step": 102810 + }, + { + "epoch": 3.84, + "learning_rate": 1.3948412345194157e-06, + "loss": 0.1209, + "step": 102840 + }, + { + "epoch": 3.84, + "learning_rate": 1.3946377457003774e-06, + "loss": 0.0968, + "step": 102870 + }, + { + "epoch": 3.84, + "learning_rate": 1.3944343459143137e-06, + "loss": 0.1192, + "step": 102900 + }, + { + "epoch": 3.84, + "learning_rate": 1.394231035096319e-06, + "loss": 0.1358, + "step": 102930 + }, + { + "epoch": 3.84, + "learning_rate": 1.3940278131815536e-06, + "loss": 0.1137, + "step": 102960 + }, + { + "epoch": 3.84, + "learning_rate": 1.3938246801052445e-06, + "loss": 0.1127, + "step": 102990 + }, + { + "epoch": 3.84, + "learning_rate": 1.3936216358026841e-06, + "loss": 0.1176, + "step": 103020 + }, + { + "epoch": 3.84, + "learning_rate": 1.3934186802092306e-06, + "loss": 0.0936, + "step": 103050 + }, + { + "epoch": 3.84, + "learning_rate": 1.3932158132603095e-06, + "loss": 0.1183, + "step": 103080 + }, + { + "epoch": 3.85, + "learning_rate": 1.3930130348914097e-06, + "loss": 0.1232, + "step": 103110 + }, + { + "epoch": 3.85, + "learning_rate": 1.3928103450380883e-06, + "loss": 0.106, + "step": 103140 + }, + { + "epoch": 3.85, + "learning_rate": 1.3926077436359668e-06, + "loss": 0.1055, + "step": 103170 + }, + { + "epoch": 3.85, + "learning_rate": 1.3924052306207317e-06, + "loss": 0.1181, + "step": 103200 + }, + { + "epoch": 3.85, + "learning_rate": 1.3922028059281358e-06, + "loss": 0.117, + "step": 103230 + }, + { + "epoch": 3.85, + "learning_rate": 1.3920004694939975e-06, + "loss": 0.1069, + "step": 103260 + }, + { + "epoch": 3.85, + "learning_rate": 1.3917982212541996e-06, + "loss": 0.1076, + "step": 103290 + }, + { + "epoch": 3.85, + "learning_rate": 1.3915960611446907e-06, + "loss": 0.103, + "step": 103320 + }, + { + "epoch": 3.85, + "learning_rate": 1.3913939891014843e-06, + "loss": 0.1132, + "step": 103350 + }, + { + "epoch": 3.86, + "learning_rate": 1.3911920050606586e-06, + "loss": 0.1139, + "step": 103380 + }, + { + "epoch": 3.86, + "learning_rate": 1.3909901089583574e-06, + "loss": 0.1523, + "step": 103410 + }, + { + "epoch": 3.86, + "learning_rate": 1.3907883007307891e-06, + "loss": 0.1019, + "step": 103440 + }, + { + "epoch": 3.86, + "learning_rate": 1.3905865803142269e-06, + "loss": 0.1363, + "step": 103470 + }, + { + "epoch": 3.86, + "learning_rate": 1.390384947645008e-06, + "loss": 0.1106, + "step": 103500 + }, + { + "epoch": 3.86, + "learning_rate": 1.3901834026595352e-06, + "loss": 0.1135, + "step": 103530 + }, + { + "epoch": 3.86, + "learning_rate": 1.389988659128795e-06, + "loss": 0.1244, + "step": 103560 + }, + { + "epoch": 3.86, + "learning_rate": 1.389787286402743e-06, + "loss": 0.1028, + "step": 103590 + }, + { + "epoch": 3.86, + "learning_rate": 1.3895860011721435e-06, + "loss": 0.1134, + "step": 103620 + }, + { + "epoch": 3.87, + "learning_rate": 1.3893848033736538e-06, + "loss": 0.1133, + "step": 103650 + }, + { + "epoch": 3.87, + "learning_rate": 1.389183692943996e-06, + "loss": 0.1096, + "step": 103680 + }, + { + "epoch": 3.87, + "learning_rate": 1.3889826698199566e-06, + "loss": 0.1113, + "step": 103710 + }, + { + "epoch": 3.87, + "learning_rate": 1.3887817339383853e-06, + "loss": 0.1163, + "step": 103740 + }, + { + "epoch": 3.87, + "learning_rate": 1.388580885236196e-06, + "loss": 0.1146, + "step": 103770 + }, + { + "epoch": 3.87, + "learning_rate": 1.3883801236503666e-06, + "loss": 0.1152, + "step": 103800 + }, + { + "epoch": 3.87, + "learning_rate": 1.3881794491179394e-06, + "loss": 0.1135, + "step": 103830 + }, + { + "epoch": 3.87, + "learning_rate": 1.3879788615760188e-06, + "loss": 0.1387, + "step": 103860 + }, + { + "epoch": 3.87, + "learning_rate": 1.3877783609617737e-06, + "loss": 0.1048, + "step": 103890 + }, + { + "epoch": 3.88, + "learning_rate": 1.3875779472124371e-06, + "loss": 0.1132, + "step": 103920 + }, + { + "epoch": 3.88, + "learning_rate": 1.3873776202653045e-06, + "loss": 0.1212, + "step": 103950 + }, + { + "epoch": 3.88, + "learning_rate": 1.3871773800577352e-06, + "loss": 0.1054, + "step": 103980 + }, + { + "epoch": 3.88, + "learning_rate": 1.3869772265271516e-06, + "loss": 0.1036, + "step": 104010 + }, + { + "epoch": 3.88, + "learning_rate": 1.3867771596110397e-06, + "loss": 0.1176, + "step": 104040 + }, + { + "epoch": 3.88, + "learning_rate": 1.3865771792469473e-06, + "loss": 0.1169, + "step": 104070 + }, + { + "epoch": 3.88, + "learning_rate": 1.3863772853724872e-06, + "loss": 0.1286, + "step": 104100 + }, + { + "epoch": 3.88, + "learning_rate": 1.3861774779253334e-06, + "loss": 0.1002, + "step": 104130 + }, + { + "epoch": 3.88, + "learning_rate": 1.385977756843224e-06, + "loss": 0.1055, + "step": 104160 + }, + { + "epoch": 3.89, + "learning_rate": 1.3857781220639588e-06, + "loss": 0.1271, + "step": 104190 + }, + { + "epoch": 3.89, + "learning_rate": 1.3855785735254016e-06, + "loss": 0.111, + "step": 104220 + }, + { + "epoch": 3.89, + "learning_rate": 1.385379111165477e-06, + "loss": 0.1064, + "step": 104250 + }, + { + "epoch": 3.89, + "learning_rate": 1.3851797349221744e-06, + "loss": 0.1118, + "step": 104280 + }, + { + "epoch": 3.89, + "learning_rate": 1.3849804447335435e-06, + "loss": 0.1082, + "step": 104310 + }, + { + "epoch": 3.89, + "learning_rate": 1.3847812405376978e-06, + "loss": 0.1194, + "step": 104340 + }, + { + "epoch": 3.89, + "learning_rate": 1.3845821222728126e-06, + "loss": 0.1165, + "step": 104370 + }, + { + "epoch": 3.89, + "learning_rate": 1.384383089877125e-06, + "loss": 0.1352, + "step": 104400 + }, + { + "epoch": 3.89, + "learning_rate": 1.3841841432889352e-06, + "loss": 0.1224, + "step": 104430 + }, + { + "epoch": 3.9, + "learning_rate": 1.3839852824466047e-06, + "loss": 0.1184, + "step": 104460 + }, + { + "epoch": 3.9, + "learning_rate": 1.383786507288557e-06, + "loss": 0.095, + "step": 104490 + }, + { + "epoch": 3.9, + "learning_rate": 1.3835878177532778e-06, + "loss": 0.114, + "step": 104520 + }, + { + "epoch": 3.9, + "learning_rate": 1.3833892137793143e-06, + "loss": 0.1229, + "step": 104550 + }, + { + "epoch": 3.9, + "learning_rate": 1.383190695305276e-06, + "loss": 0.1303, + "step": 104580 + }, + { + "epoch": 3.9, + "learning_rate": 1.382992262269833e-06, + "loss": 0.1409, + "step": 104610 + }, + { + "epoch": 3.9, + "learning_rate": 1.3827939146117186e-06, + "loss": 0.1269, + "step": 104640 + }, + { + "epoch": 3.9, + "learning_rate": 1.3825956522697256e-06, + "loss": 0.0983, + "step": 104670 + }, + { + "epoch": 3.9, + "learning_rate": 1.3823974751827096e-06, + "loss": 0.1142, + "step": 104700 + }, + { + "epoch": 3.91, + "learning_rate": 1.3821993832895875e-06, + "loss": 0.1333, + "step": 104730 + }, + { + "epoch": 3.91, + "learning_rate": 1.3820013765293367e-06, + "loss": 0.1081, + "step": 104760 + }, + { + "epoch": 3.91, + "learning_rate": 1.3818034548409964e-06, + "loss": 0.0999, + "step": 104790 + }, + { + "epoch": 3.91, + "learning_rate": 1.3816056181636663e-06, + "loss": 0.1191, + "step": 104820 + }, + { + "epoch": 3.91, + "learning_rate": 1.381407866436508e-06, + "loss": 0.1172, + "step": 104850 + }, + { + "epoch": 3.91, + "learning_rate": 1.3812101995987435e-06, + "loss": 0.1165, + "step": 104880 + }, + { + "epoch": 3.91, + "learning_rate": 1.3810126175896551e-06, + "loss": 0.1119, + "step": 104910 + }, + { + "epoch": 3.91, + "learning_rate": 1.380815120348587e-06, + "loss": 0.0985, + "step": 104940 + }, + { + "epoch": 3.92, + "learning_rate": 1.3806177078149435e-06, + "loss": 0.1227, + "step": 104970 + }, + { + "epoch": 3.92, + "learning_rate": 1.3804203799281896e-06, + "loss": 0.1505, + "step": 105000 + }, + { + "epoch": 3.92, + "learning_rate": 1.3802231366278507e-06, + "loss": 0.1282, + "step": 105030 + }, + { + "epoch": 3.92, + "learning_rate": 1.380025977853513e-06, + "loss": 0.1181, + "step": 105060 + }, + { + "epoch": 3.92, + "learning_rate": 1.3798289035448228e-06, + "loss": 0.1274, + "step": 105090 + }, + { + "epoch": 3.92, + "learning_rate": 1.379631913641487e-06, + "loss": 0.0914, + "step": 105120 + }, + { + "epoch": 3.92, + "learning_rate": 1.3794350080832725e-06, + "loss": 0.1216, + "step": 105150 + }, + { + "epoch": 3.92, + "learning_rate": 1.3792381868100063e-06, + "loss": 0.1029, + "step": 105180 + }, + { + "epoch": 3.92, + "learning_rate": 1.3790414497615755e-06, + "loss": 0.134, + "step": 105210 + }, + { + "epoch": 3.93, + "learning_rate": 1.3788447968779277e-06, + "loss": 0.1104, + "step": 105240 + }, + { + "epoch": 3.93, + "learning_rate": 1.37864822809907e-06, + "loss": 0.1341, + "step": 105270 + }, + { + "epoch": 3.93, + "learning_rate": 1.3784582915027789e-06, + "loss": 0.1317, + "step": 105300 + }, + { + "epoch": 3.93, + "learning_rate": 1.3782618879552259e-06, + "loss": 0.1131, + "step": 105330 + }, + { + "epoch": 3.93, + "learning_rate": 1.3780655683348362e-06, + "loss": 0.1161, + "step": 105360 + }, + { + "epoch": 3.93, + "learning_rate": 1.3778693325818535e-06, + "loss": 0.111, + "step": 105390 + }, + { + "epoch": 3.93, + "learning_rate": 1.3776731806365815e-06, + "loss": 0.1064, + "step": 105420 + }, + { + "epoch": 3.93, + "learning_rate": 1.3774771124393832e-06, + "loss": 0.1345, + "step": 105450 + }, + { + "epoch": 3.93, + "learning_rate": 1.3772811279306814e-06, + "loss": 0.1034, + "step": 105480 + }, + { + "epoch": 3.94, + "learning_rate": 1.3770852270509577e-06, + "loss": 0.1205, + "step": 105510 + }, + { + "epoch": 3.94, + "learning_rate": 1.376889409740753e-06, + "loss": 0.1096, + "step": 105540 + }, + { + "epoch": 3.94, + "learning_rate": 1.3766936759406681e-06, + "loss": 0.1226, + "step": 105570 + }, + { + "epoch": 3.94, + "learning_rate": 1.376498025591362e-06, + "loss": 0.1108, + "step": 105600 + }, + { + "epoch": 3.94, + "learning_rate": 1.3763024586335532e-06, + "loss": 0.1035, + "step": 105630 + }, + { + "epoch": 3.94, + "learning_rate": 1.376106975008019e-06, + "loss": 0.1237, + "step": 105660 + }, + { + "epoch": 3.94, + "learning_rate": 1.375911574655595e-06, + "loss": 0.1138, + "step": 105690 + }, + { + "epoch": 3.94, + "learning_rate": 1.3757162575171767e-06, + "loss": 0.0998, + "step": 105720 + }, + { + "epoch": 3.94, + "learning_rate": 1.3755210235337177e-06, + "loss": 0.1323, + "step": 105750 + }, + { + "epoch": 3.95, + "learning_rate": 1.37532587264623e-06, + "loss": 0.0974, + "step": 105780 + }, + { + "epoch": 3.95, + "learning_rate": 1.3751308047957848e-06, + "loss": 0.116, + "step": 105810 + }, + { + "epoch": 3.95, + "learning_rate": 1.3749358199235115e-06, + "loss": 0.1157, + "step": 105840 + }, + { + "epoch": 3.95, + "learning_rate": 1.3747409179705975e-06, + "loss": 0.1282, + "step": 105870 + }, + { + "epoch": 3.95, + "learning_rate": 1.3745460988782894e-06, + "loss": 0.1165, + "step": 105900 + }, + { + "epoch": 3.95, + "learning_rate": 1.374351362587891e-06, + "loss": 0.102, + "step": 105930 + }, + { + "epoch": 3.95, + "learning_rate": 1.3741567090407655e-06, + "loss": 0.1212, + "step": 105960 + }, + { + "epoch": 3.95, + "learning_rate": 1.3739621381783332e-06, + "loss": 0.1073, + "step": 105990 + }, + { + "epoch": 3.95, + "learning_rate": 1.3737676499420727e-06, + "loss": 0.1018, + "step": 106020 + }, + { + "epoch": 3.96, + "learning_rate": 1.3735732442735211e-06, + "loss": 0.0942, + "step": 106050 + }, + { + "epoch": 3.96, + "learning_rate": 1.3733789211142725e-06, + "loss": 0.1314, + "step": 106080 + }, + { + "epoch": 3.96, + "learning_rate": 1.37318468040598e-06, + "loss": 0.1032, + "step": 106110 + }, + { + "epoch": 3.96, + "learning_rate": 1.3729905220903536e-06, + "loss": 0.0956, + "step": 106140 + }, + { + "epoch": 3.96, + "learning_rate": 1.3727964461091606e-06, + "loss": 0.1217, + "step": 106170 + }, + { + "epoch": 3.96, + "learning_rate": 1.372602452404227e-06, + "loss": 0.1134, + "step": 106200 + }, + { + "epoch": 3.96, + "learning_rate": 1.3724085409174356e-06, + "loss": 0.1021, + "step": 106230 + }, + { + "epoch": 3.96, + "learning_rate": 1.3722147115907266e-06, + "loss": 0.1069, + "step": 106260 + }, + { + "epoch": 3.96, + "learning_rate": 1.3720209643660984e-06, + "loss": 0.0964, + "step": 106290 + }, + { + "epoch": 3.97, + "learning_rate": 1.3718272991856054e-06, + "loss": 0.1349, + "step": 106320 + }, + { + "epoch": 3.97, + "learning_rate": 1.371633715991361e-06, + "loss": 0.109, + "step": 106350 + }, + { + "epoch": 3.97, + "learning_rate": 1.3714402147255338e-06, + "loss": 0.1037, + "step": 106380 + }, + { + "epoch": 3.97, + "learning_rate": 1.3712467953303504e-06, + "loss": 0.1073, + "step": 106410 + }, + { + "epoch": 3.97, + "learning_rate": 1.3710534577480952e-06, + "loss": 0.1274, + "step": 106440 + }, + { + "epoch": 3.97, + "learning_rate": 1.3708602019211083e-06, + "loss": 0.1111, + "step": 106470 + }, + { + "epoch": 3.97, + "learning_rate": 1.3706670277917871e-06, + "loss": 0.0943, + "step": 106500 + }, + { + "epoch": 3.97, + "learning_rate": 1.3704739353025865e-06, + "loss": 0.1143, + "step": 106530 + }, + { + "epoch": 3.97, + "learning_rate": 1.3702809243960166e-06, + "loss": 0.1126, + "step": 106560 + }, + { + "epoch": 3.98, + "learning_rate": 1.3700879950146456e-06, + "loss": 0.1193, + "step": 106590 + }, + { + "epoch": 3.98, + "learning_rate": 1.3698951471010977e-06, + "loss": 0.1067, + "step": 106620 + }, + { + "epoch": 3.98, + "learning_rate": 1.3697023805980537e-06, + "loss": 0.1251, + "step": 106650 + }, + { + "epoch": 3.98, + "learning_rate": 1.3695096954482504e-06, + "loss": 0.1187, + "step": 106680 + }, + { + "epoch": 3.98, + "learning_rate": 1.369317091594482e-06, + "loss": 0.1151, + "step": 106710 + }, + { + "epoch": 3.98, + "learning_rate": 1.3691245689795982e-06, + "loss": 0.1155, + "step": 106740 + }, + { + "epoch": 3.98, + "learning_rate": 1.3689321275465047e-06, + "loss": 0.1101, + "step": 106770 + }, + { + "epoch": 3.98, + "learning_rate": 1.3687397672381643e-06, + "loss": 0.1228, + "step": 106800 + }, + { + "epoch": 3.98, + "learning_rate": 1.3685474879975953e-06, + "loss": 0.1129, + "step": 106830 + }, + { + "epoch": 3.99, + "learning_rate": 1.3683552897678718e-06, + "loss": 0.1096, + "step": 106860 + }, + { + "epoch": 3.99, + "learning_rate": 1.3681631724921244e-06, + "loss": 0.1081, + "step": 106890 + }, + { + "epoch": 3.99, + "learning_rate": 1.367971136113539e-06, + "loss": 0.1173, + "step": 106920 + }, + { + "epoch": 3.99, + "learning_rate": 1.3677791805753576e-06, + "loss": 0.1289, + "step": 106950 + }, + { + "epoch": 3.99, + "learning_rate": 1.3675873058208785e-06, + "loss": 0.1104, + "step": 106980 + }, + { + "epoch": 3.99, + "learning_rate": 1.3673955117934549e-06, + "loss": 0.1059, + "step": 107010 + }, + { + "epoch": 3.99, + "learning_rate": 1.3672037984364952e-06, + "loss": 0.1283, + "step": 107040 + }, + { + "epoch": 3.99, + "learning_rate": 1.3670121656934646e-06, + "loss": 0.0906, + "step": 107070 + }, + { + "epoch": 3.99, + "learning_rate": 1.3668206135078825e-06, + "loss": 0.1177, + "step": 107100 + }, + { + "epoch": 4.0, + "learning_rate": 1.3666291418233252e-06, + "loss": 0.1346, + "step": 107130 + }, + { + "epoch": 4.0, + "learning_rate": 1.3664377505834228e-06, + "loss": 0.126, + "step": 107160 + }, + { + "epoch": 4.0, + "learning_rate": 1.366246439731861e-06, + "loss": 0.144, + "step": 107190 + }, + { + "epoch": 4.0, + "learning_rate": 1.366055209212381e-06, + "loss": 0.1125, + "step": 107220 + }, + { + "epoch": 4.0, + "learning_rate": 1.3658640589687793e-06, + "loss": 0.1121, + "step": 107250 + }, + { + "epoch": 4.0, + "learning_rate": 1.365672988944907e-06, + "loss": 0.1044, + "step": 107280 + }, + { + "epoch": 4.0, + "learning_rate": 1.3654819990846702e-06, + "loss": 0.0992, + "step": 107310 + }, + { + "epoch": 4.0, + "learning_rate": 1.3652910893320304e-06, + "loss": 0.1141, + "step": 107340 + }, + { + "epoch": 4.0, + "learning_rate": 1.3651002596310025e-06, + "loss": 0.1074, + "step": 107370 + }, + { + "epoch": 4.01, + "learning_rate": 1.3649095099256587e-06, + "loss": 0.1144, + "step": 107400 + }, + { + "epoch": 4.01, + "learning_rate": 1.3647188401601233e-06, + "loss": 0.1032, + "step": 107430 + }, + { + "epoch": 4.01, + "learning_rate": 1.3645282502785764e-06, + "loss": 0.1415, + "step": 107460 + }, + { + "epoch": 4.01, + "learning_rate": 1.3643377402252533e-06, + "loss": 0.1187, + "step": 107490 + }, + { + "epoch": 4.01, + "learning_rate": 1.3641473099444421e-06, + "loss": 0.1106, + "step": 107520 + }, + { + "epoch": 4.01, + "learning_rate": 1.3639569593804868e-06, + "loss": 0.1035, + "step": 107550 + }, + { + "epoch": 4.01, + "learning_rate": 1.3637666884777853e-06, + "loss": 0.0971, + "step": 107580 + }, + { + "epoch": 4.01, + "learning_rate": 1.3635764971807894e-06, + "loss": 0.1054, + "step": 107610 + }, + { + "epoch": 4.01, + "learning_rate": 1.3633863854340057e-06, + "loss": 0.1055, + "step": 107640 + }, + { + "epoch": 4.02, + "learning_rate": 1.363196353181994e-06, + "loss": 0.0957, + "step": 107670 + }, + { + "epoch": 4.02, + "learning_rate": 1.3630064003693693e-06, + "loss": 0.1063, + "step": 107700 + }, + { + "epoch": 4.02, + "learning_rate": 1.3628165269408007e-06, + "loss": 0.1403, + "step": 107730 + }, + { + "epoch": 4.02, + "learning_rate": 1.3626267328410096e-06, + "loss": 0.1029, + "step": 107760 + }, + { + "epoch": 4.02, + "learning_rate": 1.362437018014773e-06, + "loss": 0.1215, + "step": 107790 + }, + { + "epoch": 4.02, + "learning_rate": 1.3622473824069208e-06, + "loss": 0.1096, + "step": 107820 + }, + { + "epoch": 4.02, + "learning_rate": 1.3620578259623368e-06, + "loss": 0.1101, + "step": 107850 + }, + { + "epoch": 4.02, + "learning_rate": 1.361868348625959e-06, + "loss": 0.1188, + "step": 107880 + }, + { + "epoch": 4.02, + "learning_rate": 1.3616789503427782e-06, + "loss": 0.1114, + "step": 107910 + }, + { + "epoch": 4.03, + "learning_rate": 1.3614896310578394e-06, + "loss": 0.1102, + "step": 107940 + }, + { + "epoch": 4.03, + "learning_rate": 1.3613003907162404e-06, + "loss": 0.1081, + "step": 107970 + }, + { + "epoch": 4.03, + "learning_rate": 1.3611112292631326e-06, + "loss": 0.0958, + "step": 108000 + }, + { + "epoch": 4.03, + "learning_rate": 1.3609221466437218e-06, + "loss": 0.1194, + "step": 108030 + }, + { + "epoch": 4.03, + "learning_rate": 1.3607331428032656e-06, + "loss": 0.1281, + "step": 108060 + }, + { + "epoch": 4.03, + "learning_rate": 1.3605442176870751e-06, + "loss": 0.1063, + "step": 108090 + }, + { + "epoch": 4.03, + "learning_rate": 1.360355371240515e-06, + "loss": 0.0942, + "step": 108120 + }, + { + "epoch": 4.03, + "learning_rate": 1.3601666034090034e-06, + "loss": 0.1045, + "step": 108150 + }, + { + "epoch": 4.03, + "learning_rate": 1.35997791413801e-06, + "loss": 0.1134, + "step": 108180 + }, + { + "epoch": 4.04, + "learning_rate": 1.3597893033730595e-06, + "loss": 0.1106, + "step": 108210 + }, + { + "epoch": 4.04, + "learning_rate": 1.3596007710597273e-06, + "loss": 0.1185, + "step": 108240 + }, + { + "epoch": 4.04, + "learning_rate": 1.359412317143643e-06, + "loss": 0.122, + "step": 108270 + }, + { + "epoch": 4.04, + "learning_rate": 1.3592239415704883e-06, + "loss": 0.1212, + "step": 108300 + }, + { + "epoch": 4.04, + "learning_rate": 1.359035644285998e-06, + "loss": 0.1086, + "step": 108330 + }, + { + "epoch": 4.04, + "learning_rate": 1.3588536979444226e-06, + "loss": 0.1204, + "step": 108360 + }, + { + "epoch": 4.04, + "learning_rate": 1.3586655544695377e-06, + "loss": 0.1009, + "step": 108390 + }, + { + "epoch": 4.04, + "learning_rate": 1.3584774891226393e-06, + "loss": 0.1142, + "step": 108420 + }, + { + "epoch": 4.04, + "learning_rate": 1.35828950184967e-06, + "loss": 0.119, + "step": 108450 + }, + { + "epoch": 4.05, + "learning_rate": 1.3581015925966255e-06, + "loss": 0.1209, + "step": 108480 + }, + { + "epoch": 4.05, + "learning_rate": 1.357913761309553e-06, + "loss": 0.1119, + "step": 108510 + }, + { + "epoch": 4.05, + "learning_rate": 1.3577260079345526e-06, + "loss": 0.1257, + "step": 108540 + }, + { + "epoch": 4.05, + "learning_rate": 1.3575383324177758e-06, + "loss": 0.1217, + "step": 108570 + }, + { + "epoch": 4.05, + "learning_rate": 1.3573507347054265e-06, + "loss": 0.1121, + "step": 108600 + }, + { + "epoch": 4.05, + "learning_rate": 1.3571632147437613e-06, + "loss": 0.1233, + "step": 108630 + }, + { + "epoch": 4.05, + "learning_rate": 1.3569757724790883e-06, + "loss": 0.1067, + "step": 108660 + }, + { + "epoch": 4.05, + "learning_rate": 1.3567884078577668e-06, + "loss": 0.1206, + "step": 108690 + }, + { + "epoch": 4.05, + "learning_rate": 1.3566011208262083e-06, + "loss": 0.1261, + "step": 108720 + }, + { + "epoch": 4.06, + "learning_rate": 1.3564139113308771e-06, + "loss": 0.1353, + "step": 108750 + }, + { + "epoch": 4.06, + "learning_rate": 1.356226779318288e-06, + "loss": 0.113, + "step": 108780 + }, + { + "epoch": 4.06, + "learning_rate": 1.3560397247350075e-06, + "loss": 0.1146, + "step": 108810 + }, + { + "epoch": 4.06, + "learning_rate": 1.3558527475276545e-06, + "loss": 0.112, + "step": 108840 + }, + { + "epoch": 4.06, + "learning_rate": 1.3556658476428985e-06, + "loss": 0.1154, + "step": 108870 + }, + { + "epoch": 4.06, + "learning_rate": 1.3554790250274613e-06, + "loss": 0.1079, + "step": 108900 + }, + { + "epoch": 4.06, + "learning_rate": 1.3552922796281152e-06, + "loss": 0.115, + "step": 108930 + }, + { + "epoch": 4.06, + "learning_rate": 1.3551056113916844e-06, + "loss": 0.106, + "step": 108960 + }, + { + "epoch": 4.06, + "learning_rate": 1.3549190202650446e-06, + "loss": 0.1007, + "step": 108990 + }, + { + "epoch": 4.07, + "learning_rate": 1.3547325061951213e-06, + "loss": 0.1171, + "step": 109020 + }, + { + "epoch": 4.07, + "learning_rate": 1.3545460691288933e-06, + "loss": 0.1176, + "step": 109050 + }, + { + "epoch": 4.07, + "learning_rate": 1.3543597090133884e-06, + "loss": 0.1057, + "step": 109080 + }, + { + "epoch": 4.07, + "learning_rate": 1.3541734257956868e-06, + "loss": 0.1183, + "step": 109110 + }, + { + "epoch": 4.07, + "learning_rate": 1.3539872194229187e-06, + "loss": 0.0971, + "step": 109140 + }, + { + "epoch": 4.07, + "learning_rate": 1.3538010898422657e-06, + "loss": 0.1048, + "step": 109170 + }, + { + "epoch": 4.07, + "learning_rate": 1.3536150370009606e-06, + "loss": 0.1265, + "step": 109200 + }, + { + "epoch": 4.07, + "learning_rate": 1.353429060846286e-06, + "loss": 0.1153, + "step": 109230 + }, + { + "epoch": 4.08, + "learning_rate": 1.3532431613255755e-06, + "loss": 0.1093, + "step": 109260 + }, + { + "epoch": 4.08, + "learning_rate": 1.3530573383862138e-06, + "loss": 0.1128, + "step": 109290 + }, + { + "epoch": 4.08, + "learning_rate": 1.3528715919756358e-06, + "loss": 0.1073, + "step": 109320 + }, + { + "epoch": 4.08, + "learning_rate": 1.352685922041327e-06, + "loss": 0.1076, + "step": 109350 + }, + { + "epoch": 4.08, + "learning_rate": 1.352500328530823e-06, + "loss": 0.1086, + "step": 109380 + }, + { + "epoch": 4.08, + "learning_rate": 1.35231481139171e-06, + "loss": 0.1398, + "step": 109410 + }, + { + "epoch": 4.08, + "learning_rate": 1.3521293705716248e-06, + "loss": 0.1132, + "step": 109440 + }, + { + "epoch": 4.08, + "learning_rate": 1.3519440060182543e-06, + "loss": 0.1092, + "step": 109470 + }, + { + "epoch": 4.08, + "learning_rate": 1.3517587176793354e-06, + "loss": 0.104, + "step": 109500 + }, + { + "epoch": 4.09, + "learning_rate": 1.351573505502655e-06, + "loss": 0.1141, + "step": 109530 + }, + { + "epoch": 4.09, + "learning_rate": 1.3513883694360502e-06, + "loss": 0.1259, + "step": 109560 + }, + { + "epoch": 4.09, + "learning_rate": 1.3512033094274085e-06, + "loss": 0.1059, + "step": 109590 + }, + { + "epoch": 4.09, + "learning_rate": 1.3510183254246667e-06, + "loss": 0.1101, + "step": 109620 + }, + { + "epoch": 4.09, + "learning_rate": 1.3508334173758125e-06, + "loss": 0.1061, + "step": 109650 + }, + { + "epoch": 4.09, + "learning_rate": 1.3506485852288816e-06, + "loss": 0.1091, + "step": 109680 + }, + { + "epoch": 4.09, + "learning_rate": 1.3504638289319617e-06, + "loss": 0.1138, + "step": 109710 + }, + { + "epoch": 4.09, + "learning_rate": 1.3502791484331885e-06, + "loss": 0.1002, + "step": 109740 + }, + { + "epoch": 4.09, + "learning_rate": 1.3500945436807477e-06, + "loss": 0.0967, + "step": 109770 + }, + { + "epoch": 4.1, + "learning_rate": 1.3499100146228755e-06, + "loss": 0.1197, + "step": 109800 + }, + { + "epoch": 4.1, + "learning_rate": 1.349725561207856e-06, + "loss": 0.1062, + "step": 109830 + }, + { + "epoch": 4.1, + "learning_rate": 1.3495411833840244e-06, + "loss": 0.1048, + "step": 109860 + }, + { + "epoch": 4.1, + "learning_rate": 1.3493568810997643e-06, + "loss": 0.1209, + "step": 109890 + }, + { + "epoch": 4.1, + "learning_rate": 1.3491726543035087e-06, + "loss": 0.1055, + "step": 109920 + }, + { + "epoch": 4.1, + "learning_rate": 1.3489885029437399e-06, + "loss": 0.0939, + "step": 109950 + }, + { + "epoch": 4.1, + "learning_rate": 1.3488044269689899e-06, + "loss": 0.1263, + "step": 109980 + }, + { + "epoch": 4.1, + "learning_rate": 1.3486204263278393e-06, + "loss": 0.0975, + "step": 110010 + }, + { + "epoch": 4.1, + "learning_rate": 1.3484365009689182e-06, + "loss": 0.1174, + "step": 110040 + }, + { + "epoch": 4.11, + "learning_rate": 1.348252650840905e-06, + "loss": 0.1112, + "step": 110070 + }, + { + "epoch": 4.11, + "learning_rate": 1.348068875892528e-06, + "loss": 0.1009, + "step": 110100 + }, + { + "epoch": 4.11, + "learning_rate": 1.347885176072564e-06, + "loss": 0.1322, + "step": 110130 + }, + { + "epoch": 4.11, + "learning_rate": 1.347701551329839e-06, + "loss": 0.1163, + "step": 110160 + }, + { + "epoch": 4.11, + "learning_rate": 1.347518001613226e-06, + "loss": 0.1042, + "step": 110190 + }, + { + "epoch": 4.11, + "learning_rate": 1.3473345268716497e-06, + "loss": 0.1109, + "step": 110220 + }, + { + "epoch": 4.11, + "learning_rate": 1.3471511270540812e-06, + "loss": 0.116, + "step": 110250 + }, + { + "epoch": 4.11, + "learning_rate": 1.346967802109541e-06, + "loss": 0.1151, + "step": 110280 + }, + { + "epoch": 4.11, + "learning_rate": 1.3467845519870978e-06, + "loss": 0.1289, + "step": 110310 + }, + { + "epoch": 4.12, + "learning_rate": 1.3466013766358696e-06, + "loss": 0.1383, + "step": 110340 + }, + { + "epoch": 4.12, + "learning_rate": 1.3464182760050218e-06, + "loss": 0.1085, + "step": 110370 + }, + { + "epoch": 4.12, + "learning_rate": 1.3462352500437688e-06, + "loss": 0.1116, + "step": 110400 + }, + { + "epoch": 4.12, + "learning_rate": 1.3460522987013739e-06, + "loss": 0.1335, + "step": 110430 + }, + { + "epoch": 4.12, + "learning_rate": 1.3458694219271468e-06, + "loss": 0.1253, + "step": 110460 + }, + { + "epoch": 4.12, + "learning_rate": 1.3456866196704468e-06, + "loss": 0.0908, + "step": 110490 + }, + { + "epoch": 4.12, + "learning_rate": 1.3455038918806814e-06, + "loss": 0.1125, + "step": 110520 + }, + { + "epoch": 4.12, + "learning_rate": 1.3453212385073058e-06, + "loss": 0.1235, + "step": 110550 + }, + { + "epoch": 4.12, + "learning_rate": 1.3451386594998233e-06, + "loss": 0.1163, + "step": 110580 + }, + { + "epoch": 4.13, + "learning_rate": 1.344956154807785e-06, + "loss": 0.1028, + "step": 110610 + }, + { + "epoch": 4.13, + "learning_rate": 1.3447737243807896e-06, + "loss": 0.1258, + "step": 110640 + }, + { + "epoch": 4.13, + "learning_rate": 1.3445913681684844e-06, + "loss": 0.1037, + "step": 110670 + }, + { + "epoch": 4.13, + "learning_rate": 1.344409086120564e-06, + "loss": 0.1141, + "step": 110700 + }, + { + "epoch": 4.13, + "learning_rate": 1.3442268781867715e-06, + "loss": 0.1071, + "step": 110730 + }, + { + "epoch": 4.13, + "learning_rate": 1.344044744316896e-06, + "loss": 0.1079, + "step": 110760 + }, + { + "epoch": 4.13, + "learning_rate": 1.343862684460776e-06, + "loss": 0.1089, + "step": 110790 + }, + { + "epoch": 4.13, + "learning_rate": 1.3436806985682964e-06, + "loss": 0.1047, + "step": 110820 + }, + { + "epoch": 4.13, + "learning_rate": 1.3434987865893903e-06, + "loss": 0.0858, + "step": 110850 + }, + { + "epoch": 4.14, + "learning_rate": 1.3433169484740375e-06, + "loss": 0.105, + "step": 110880 + }, + { + "epoch": 4.14, + "learning_rate": 1.343135184172266e-06, + "loss": 0.096, + "step": 110910 + }, + { + "epoch": 4.14, + "learning_rate": 1.3429534936341501e-06, + "loss": 0.0958, + "step": 110940 + }, + { + "epoch": 4.14, + "learning_rate": 1.3427718768098128e-06, + "loss": 0.1155, + "step": 110970 + }, + { + "epoch": 4.14, + "learning_rate": 1.3425903336494232e-06, + "loss": 0.0927, + "step": 111000 + }, + { + "epoch": 4.14, + "learning_rate": 1.3424088641031974e-06, + "loss": 0.1146, + "step": 111030 + }, + { + "epoch": 4.14, + "learning_rate": 1.342227468121399e-06, + "loss": 0.1023, + "step": 111060 + }, + { + "epoch": 4.14, + "learning_rate": 1.3420461456543393e-06, + "loss": 0.1133, + "step": 111090 + }, + { + "epoch": 4.14, + "learning_rate": 1.3418648966523754e-06, + "loss": 0.1097, + "step": 111120 + }, + { + "epoch": 4.15, + "learning_rate": 1.3416837210659122e-06, + "loss": 0.12, + "step": 111150 + }, + { + "epoch": 4.15, + "learning_rate": 1.3415086544046006e-06, + "loss": 0.1087, + "step": 111180 + }, + { + "epoch": 4.15, + "learning_rate": 1.3413276230574544e-06, + "loss": 0.1138, + "step": 111210 + }, + { + "epoch": 4.15, + "learning_rate": 1.3411466649789512e-06, + "loss": 0.1045, + "step": 111240 + }, + { + "epoch": 4.15, + "learning_rate": 1.3409657801196808e-06, + "loss": 0.1208, + "step": 111270 + }, + { + "epoch": 4.15, + "learning_rate": 1.34078496843028e-06, + "loss": 0.1282, + "step": 111300 + }, + { + "epoch": 4.15, + "learning_rate": 1.3406042298614324e-06, + "loss": 0.1103, + "step": 111330 + }, + { + "epoch": 4.15, + "learning_rate": 1.3404235643638672e-06, + "loss": 0.1066, + "step": 111360 + }, + { + "epoch": 4.15, + "learning_rate": 1.3402429718883612e-06, + "loss": 0.1165, + "step": 111390 + }, + { + "epoch": 4.16, + "learning_rate": 1.3400624523857367e-06, + "loss": 0.117, + "step": 111420 + }, + { + "epoch": 4.16, + "learning_rate": 1.3398820058068632e-06, + "loss": 0.1059, + "step": 111450 + }, + { + "epoch": 4.16, + "learning_rate": 1.339701632102656e-06, + "loss": 0.1196, + "step": 111480 + }, + { + "epoch": 4.16, + "learning_rate": 1.339521331224076e-06, + "loss": 0.1074, + "step": 111510 + }, + { + "epoch": 4.16, + "learning_rate": 1.3393411031221322e-06, + "loss": 0.1078, + "step": 111540 + }, + { + "epoch": 4.16, + "learning_rate": 1.339160947747877e-06, + "loss": 0.1048, + "step": 111570 + }, + { + "epoch": 4.16, + "learning_rate": 1.3389808650524116e-06, + "loss": 0.1235, + "step": 111600 + }, + { + "epoch": 4.16, + "learning_rate": 1.3388008549868816e-06, + "loss": 0.116, + "step": 111630 + }, + { + "epoch": 4.16, + "learning_rate": 1.3386209175024792e-06, + "loss": 0.1006, + "step": 111660 + }, + { + "epoch": 4.17, + "learning_rate": 1.3384410525504416e-06, + "loss": 0.1163, + "step": 111690 + }, + { + "epoch": 4.17, + "learning_rate": 1.3382612600820532e-06, + "loss": 0.1175, + "step": 111720 + }, + { + "epoch": 4.17, + "learning_rate": 1.3380815400486433e-06, + "loss": 0.1113, + "step": 111750 + }, + { + "epoch": 4.17, + "learning_rate": 1.3379018924015868e-06, + "loss": 0.1203, + "step": 111780 + }, + { + "epoch": 4.17, + "learning_rate": 1.3377223170923053e-06, + "loss": 0.1267, + "step": 111810 + }, + { + "epoch": 4.17, + "learning_rate": 1.3375428140722648e-06, + "loss": 0.1165, + "step": 111840 + }, + { + "epoch": 4.17, + "learning_rate": 1.3373633832929783e-06, + "loss": 0.1114, + "step": 111870 + }, + { + "epoch": 4.17, + "learning_rate": 1.3371840247060025e-06, + "loss": 0.1183, + "step": 111900 + }, + { + "epoch": 4.17, + "learning_rate": 1.337004738262941e-06, + "loss": 0.1023, + "step": 111930 + }, + { + "epoch": 4.18, + "learning_rate": 1.3368255239154419e-06, + "loss": 0.1172, + "step": 111960 + }, + { + "epoch": 4.18, + "learning_rate": 1.3366463816151999e-06, + "loss": 0.106, + "step": 111990 + }, + { + "epoch": 4.18, + "learning_rate": 1.3364673113139533e-06, + "loss": 0.1254, + "step": 112020 + }, + { + "epoch": 4.18, + "learning_rate": 1.3362883129634875e-06, + "loss": 0.102, + "step": 112050 + }, + { + "epoch": 4.18, + "learning_rate": 1.3361093865156313e-06, + "loss": 0.1292, + "step": 112080 + }, + { + "epoch": 4.18, + "learning_rate": 1.33593053192226e-06, + "loss": 0.1158, + "step": 112110 + }, + { + "epoch": 4.18, + "learning_rate": 1.3357517491352932e-06, + "loss": 0.1191, + "step": 112140 + }, + { + "epoch": 4.18, + "learning_rate": 1.3355730381066959e-06, + "loss": 0.1157, + "step": 112170 + }, + { + "epoch": 4.18, + "learning_rate": 1.335394398788478e-06, + "loss": 0.0981, + "step": 112200 + }, + { + "epoch": 4.19, + "learning_rate": 1.335215831132694e-06, + "loss": 0.1214, + "step": 112230 + }, + { + "epoch": 4.19, + "learning_rate": 1.3350373350914441e-06, + "loss": 0.114, + "step": 112260 + }, + { + "epoch": 4.19, + "learning_rate": 1.3348589106168724e-06, + "loss": 0.1069, + "step": 112290 + }, + { + "epoch": 4.19, + "learning_rate": 1.3346805576611683e-06, + "loss": 0.0948, + "step": 112320 + }, + { + "epoch": 4.19, + "learning_rate": 1.3345022761765652e-06, + "loss": 0.1153, + "step": 112350 + }, + { + "epoch": 4.19, + "learning_rate": 1.3343240661153423e-06, + "loss": 0.1226, + "step": 112380 + }, + { + "epoch": 4.19, + "learning_rate": 1.3341459274298223e-06, + "loss": 0.0996, + "step": 112410 + }, + { + "epoch": 4.19, + "learning_rate": 1.3339678600723734e-06, + "loss": 0.1247, + "step": 112440 + }, + { + "epoch": 4.19, + "learning_rate": 1.3337898639954075e-06, + "loss": 0.1071, + "step": 112470 + }, + { + "epoch": 4.2, + "learning_rate": 1.3336119391513816e-06, + "loss": 0.1086, + "step": 112500 + }, + { + "epoch": 4.2, + "learning_rate": 1.333434085492796e-06, + "loss": 0.1173, + "step": 112530 + }, + { + "epoch": 4.2, + "learning_rate": 1.3332563029721968e-06, + "loss": 0.1388, + "step": 112560 + }, + { + "epoch": 4.2, + "learning_rate": 1.3330785915421732e-06, + "loss": 0.1097, + "step": 112590 + }, + { + "epoch": 4.2, + "learning_rate": 1.3329009511553593e-06, + "loss": 0.1164, + "step": 112620 + }, + { + "epoch": 4.2, + "learning_rate": 1.332723381764433e-06, + "loss": 0.1041, + "step": 112650 + }, + { + "epoch": 4.2, + "learning_rate": 1.3325458833221163e-06, + "loss": 0.1056, + "step": 112680 + }, + { + "epoch": 4.2, + "learning_rate": 1.3323684557811759e-06, + "loss": 0.1168, + "step": 112710 + }, + { + "epoch": 4.2, + "learning_rate": 1.3321910990944214e-06, + "loss": 0.125, + "step": 112740 + }, + { + "epoch": 4.21, + "learning_rate": 1.3320138132147075e-06, + "loss": 0.1154, + "step": 112770 + }, + { + "epoch": 4.21, + "learning_rate": 1.3318365980949322e-06, + "loss": 0.0917, + "step": 112800 + }, + { + "epoch": 4.21, + "learning_rate": 1.3316594536880373e-06, + "loss": 0.1145, + "step": 112830 + }, + { + "epoch": 4.21, + "learning_rate": 1.3314823799470083e-06, + "loss": 0.1074, + "step": 112860 + }, + { + "epoch": 4.21, + "learning_rate": 1.331305376824875e-06, + "loss": 0.1049, + "step": 112890 + }, + { + "epoch": 4.21, + "learning_rate": 1.331128444274711e-06, + "loss": 0.1078, + "step": 112920 + }, + { + "epoch": 4.21, + "learning_rate": 1.3309515822496324e-06, + "loss": 0.1126, + "step": 112950 + }, + { + "epoch": 4.21, + "learning_rate": 1.3307747907028002e-06, + "loss": 0.1207, + "step": 112980 + }, + { + "epoch": 4.21, + "learning_rate": 1.3305980695874179e-06, + "loss": 0.1214, + "step": 113010 + }, + { + "epoch": 4.22, + "learning_rate": 1.3304214188567336e-06, + "loss": 0.1187, + "step": 113040 + }, + { + "epoch": 4.22, + "learning_rate": 1.3302448384640373e-06, + "loss": 0.1061, + "step": 113070 + }, + { + "epoch": 4.22, + "learning_rate": 1.3300683283626642e-06, + "loss": 0.1027, + "step": 113100 + }, + { + "epoch": 4.22, + "learning_rate": 1.3298918885059912e-06, + "loss": 0.1075, + "step": 113130 + }, + { + "epoch": 4.22, + "learning_rate": 1.3297155188474394e-06, + "loss": 0.1261, + "step": 113160 + }, + { + "epoch": 4.22, + "learning_rate": 1.3295392193404732e-06, + "loss": 0.109, + "step": 113190 + }, + { + "epoch": 4.22, + "learning_rate": 1.3293629899385998e-06, + "loss": 0.1053, + "step": 113220 + }, + { + "epoch": 4.22, + "learning_rate": 1.3291868305953697e-06, + "loss": 0.1162, + "step": 113250 + }, + { + "epoch": 4.22, + "learning_rate": 1.329010741264376e-06, + "loss": 0.1282, + "step": 113280 + }, + { + "epoch": 4.23, + "learning_rate": 1.3288347218992556e-06, + "loss": 0.1163, + "step": 113310 + }, + { + "epoch": 4.23, + "learning_rate": 1.3286587724536878e-06, + "loss": 0.113, + "step": 113340 + }, + { + "epoch": 4.23, + "learning_rate": 1.3284828928813952e-06, + "loss": 0.1118, + "step": 113370 + }, + { + "epoch": 4.23, + "learning_rate": 1.3283070831361434e-06, + "loss": 0.115, + "step": 113400 + }, + { + "epoch": 4.23, + "learning_rate": 1.3281313431717397e-06, + "loss": 0.0957, + "step": 113430 + }, + { + "epoch": 4.23, + "learning_rate": 1.3279556729420353e-06, + "loss": 0.1053, + "step": 113460 + }, + { + "epoch": 4.23, + "learning_rate": 1.3277800724009243e-06, + "loss": 0.1032, + "step": 113490 + }, + { + "epoch": 4.23, + "learning_rate": 1.3276045415023425e-06, + "loss": 0.1142, + "step": 113520 + }, + { + "epoch": 4.24, + "learning_rate": 1.3274290802002687e-06, + "loss": 0.0996, + "step": 113550 + }, + { + "epoch": 4.24, + "learning_rate": 1.3272536884487247e-06, + "loss": 0.1274, + "step": 113580 + }, + { + "epoch": 4.24, + "learning_rate": 1.327078366201774e-06, + "loss": 0.0937, + "step": 113610 + }, + { + "epoch": 4.24, + "learning_rate": 1.3269031134135237e-06, + "loss": 0.1127, + "step": 113640 + }, + { + "epoch": 4.24, + "learning_rate": 1.3267279300381222e-06, + "loss": 0.1036, + "step": 113670 + }, + { + "epoch": 4.24, + "learning_rate": 1.3265528160297608e-06, + "loss": 0.1173, + "step": 113700 + }, + { + "epoch": 4.24, + "learning_rate": 1.3263777713426725e-06, + "loss": 0.1237, + "step": 113730 + }, + { + "epoch": 4.24, + "learning_rate": 1.3262027959311342e-06, + "loss": 0.1113, + "step": 113760 + }, + { + "epoch": 4.24, + "learning_rate": 1.3260278897494627e-06, + "loss": 0.1228, + "step": 113790 + }, + { + "epoch": 4.25, + "learning_rate": 1.3258530527520192e-06, + "loss": 0.1077, + "step": 113820 + }, + { + "epoch": 4.25, + "learning_rate": 1.3256782848932048e-06, + "loss": 0.1051, + "step": 113850 + }, + { + "epoch": 4.25, + "learning_rate": 1.325503586127465e-06, + "loss": 0.1137, + "step": 113880 + }, + { + "epoch": 4.25, + "learning_rate": 1.3253289564092853e-06, + "loss": 0.1075, + "step": 113910 + }, + { + "epoch": 4.25, + "learning_rate": 1.3251543956931945e-06, + "loss": 0.0949, + "step": 113940 + }, + { + "epoch": 4.25, + "learning_rate": 1.3249799039337626e-06, + "loss": 0.0969, + "step": 113970 + }, + { + "epoch": 4.25, + "learning_rate": 1.3248054810856017e-06, + "loss": 0.0864, + "step": 114000 + }, + { + "epoch": 4.25, + "learning_rate": 1.3246311271033657e-06, + "loss": 0.142, + "step": 114030 + }, + { + "epoch": 4.25, + "learning_rate": 1.3244568419417503e-06, + "loss": 0.0939, + "step": 114060 + }, + { + "epoch": 4.26, + "learning_rate": 1.3242826255554928e-06, + "loss": 0.1117, + "step": 114090 + }, + { + "epoch": 4.26, + "learning_rate": 1.3241084778993723e-06, + "loss": 0.0976, + "step": 114120 + }, + { + "epoch": 4.26, + "learning_rate": 1.3239343989282094e-06, + "loss": 0.0982, + "step": 114150 + }, + { + "epoch": 4.26, + "learning_rate": 1.3237603885968664e-06, + "loss": 0.1163, + "step": 114180 + }, + { + "epoch": 4.26, + "learning_rate": 1.323586446860247e-06, + "loss": 0.1309, + "step": 114210 + }, + { + "epoch": 4.26, + "learning_rate": 1.3234125736732962e-06, + "loss": 0.1001, + "step": 114240 + }, + { + "epoch": 4.26, + "learning_rate": 1.3232387689910007e-06, + "loss": 0.1048, + "step": 114270 + }, + { + "epoch": 4.26, + "learning_rate": 1.3230650327683888e-06, + "loss": 0.1139, + "step": 114300 + }, + { + "epoch": 4.26, + "learning_rate": 1.3228913649605295e-06, + "loss": 0.1219, + "step": 114330 + }, + { + "epoch": 4.27, + "learning_rate": 1.3227177655225338e-06, + "loss": 0.1055, + "step": 114360 + }, + { + "epoch": 4.27, + "learning_rate": 1.322544234409553e-06, + "loss": 0.0944, + "step": 114390 + }, + { + "epoch": 4.27, + "learning_rate": 1.32237077157678e-06, + "loss": 0.1249, + "step": 114420 + }, + { + "epoch": 4.27, + "learning_rate": 1.3221973769794496e-06, + "loss": 0.1084, + "step": 114450 + }, + { + "epoch": 4.27, + "learning_rate": 1.3220240505728365e-06, + "loss": 0.0998, + "step": 114480 + }, + { + "epoch": 4.27, + "learning_rate": 1.3218507923122572e-06, + "loss": 0.1065, + "step": 114510 + }, + { + "epoch": 4.27, + "learning_rate": 1.3216776021530689e-06, + "loss": 0.1051, + "step": 114540 + }, + { + "epoch": 4.27, + "learning_rate": 1.321504480050669e-06, + "loss": 0.1216, + "step": 114570 + }, + { + "epoch": 4.27, + "learning_rate": 1.3213314259604976e-06, + "loss": 0.097, + "step": 114600 + }, + { + "epoch": 4.28, + "learning_rate": 1.321158439838034e-06, + "loss": 0.1079, + "step": 114630 + }, + { + "epoch": 4.28, + "learning_rate": 1.3209855216387988e-06, + "loss": 0.108, + "step": 114660 + }, + { + "epoch": 4.28, + "learning_rate": 1.3208126713183533e-06, + "loss": 0.1599, + "step": 114690 + }, + { + "epoch": 4.28, + "learning_rate": 1.3206398888323003e-06, + "loss": 0.1108, + "step": 114720 + }, + { + "epoch": 4.28, + "learning_rate": 1.3204671741362815e-06, + "loss": 0.1027, + "step": 114750 + }, + { + "epoch": 4.28, + "learning_rate": 1.320294527185981e-06, + "loss": 0.1121, + "step": 114780 + }, + { + "epoch": 4.28, + "learning_rate": 1.3201219479371225e-06, + "loss": 0.0957, + "step": 114810 + }, + { + "epoch": 4.28, + "learning_rate": 1.3199494363454705e-06, + "loss": 0.0941, + "step": 114840 + }, + { + "epoch": 4.28, + "learning_rate": 1.319776992366829e-06, + "loss": 0.1005, + "step": 114870 + }, + { + "epoch": 4.29, + "learning_rate": 1.3196046159570445e-06, + "loss": 0.1123, + "step": 114900 + }, + { + "epoch": 4.29, + "learning_rate": 1.3194323070720016e-06, + "loss": 0.11, + "step": 114930 + }, + { + "epoch": 4.29, + "learning_rate": 1.3192600656676268e-06, + "loss": 0.1043, + "step": 114960 + }, + { + "epoch": 4.29, + "learning_rate": 1.3190878916998856e-06, + "loss": 0.1014, + "step": 114990 + }, + { + "epoch": 4.29, + "learning_rate": 1.3189157851247853e-06, + "loss": 0.1287, + "step": 115020 + }, + { + "epoch": 4.29, + "learning_rate": 1.3187437458983718e-06, + "loss": 0.1203, + "step": 115050 + }, + { + "epoch": 4.29, + "learning_rate": 1.318571773976732e-06, + "loss": 0.1252, + "step": 115080 + }, + { + "epoch": 4.29, + "learning_rate": 1.3183998693159926e-06, + "loss": 0.1076, + "step": 115110 + }, + { + "epoch": 4.29, + "learning_rate": 1.3182280318723206e-06, + "loss": 0.0897, + "step": 115140 + }, + { + "epoch": 4.3, + "learning_rate": 1.3180562616019221e-06, + "loss": 0.102, + "step": 115170 + }, + { + "epoch": 4.3, + "learning_rate": 1.3178845584610447e-06, + "loss": 0.114, + "step": 115200 + }, + { + "epoch": 4.3, + "learning_rate": 1.3177129224059746e-06, + "loss": 0.127, + "step": 115230 + }, + { + "epoch": 4.3, + "learning_rate": 1.3175413533930378e-06, + "loss": 0.1226, + "step": 115260 + }, + { + "epoch": 4.3, + "learning_rate": 1.3173698513786012e-06, + "loss": 0.0991, + "step": 115290 + }, + { + "epoch": 4.3, + "learning_rate": 1.3171984163190704e-06, + "loss": 0.1143, + "step": 115320 + }, + { + "epoch": 4.3, + "learning_rate": 1.3170270481708914e-06, + "loss": 0.1015, + "step": 115350 + }, + { + "epoch": 4.3, + "learning_rate": 1.3168557468905489e-06, + "loss": 0.1062, + "step": 115380 + }, + { + "epoch": 4.3, + "learning_rate": 1.3166845124345683e-06, + "loss": 0.1077, + "step": 115410 + }, + { + "epoch": 4.31, + "learning_rate": 1.316513344759514e-06, + "loss": 0.1142, + "step": 115440 + }, + { + "epoch": 4.31, + "learning_rate": 1.3163422438219902e-06, + "loss": 0.1156, + "step": 115470 + }, + { + "epoch": 4.31, + "learning_rate": 1.3161712095786402e-06, + "loss": 0.118, + "step": 115500 + }, + { + "epoch": 4.31, + "learning_rate": 1.3160002419861468e-06, + "loss": 0.1225, + "step": 115530 + }, + { + "epoch": 4.31, + "learning_rate": 1.3158293410012324e-06, + "loss": 0.1199, + "step": 115560 + }, + { + "epoch": 4.31, + "learning_rate": 1.3156585065806587e-06, + "loss": 0.1145, + "step": 115590 + }, + { + "epoch": 4.31, + "learning_rate": 1.3154877386812268e-06, + "loss": 0.1273, + "step": 115620 + }, + { + "epoch": 4.31, + "learning_rate": 1.315317037259776e-06, + "loss": 0.1092, + "step": 115650 + }, + { + "epoch": 4.31, + "learning_rate": 1.3151464022731864e-06, + "loss": 0.1297, + "step": 115680 + }, + { + "epoch": 4.32, + "learning_rate": 1.3149758336783764e-06, + "loss": 0.1129, + "step": 115710 + }, + { + "epoch": 4.32, + "learning_rate": 1.314805331432303e-06, + "loss": 0.098, + "step": 115740 + }, + { + "epoch": 4.32, + "learning_rate": 1.3146348954919639e-06, + "loss": 0.1029, + "step": 115770 + }, + { + "epoch": 4.32, + "learning_rate": 1.3144645258143936e-06, + "loss": 0.1019, + "step": 115800 + }, + { + "epoch": 4.32, + "learning_rate": 1.3142942223566676e-06, + "loss": 0.1293, + "step": 115830 + }, + { + "epoch": 4.32, + "learning_rate": 1.3141239850758991e-06, + "loss": 0.1258, + "step": 115860 + }, + { + "epoch": 4.32, + "learning_rate": 1.3139538139292404e-06, + "loss": 0.1157, + "step": 115890 + }, + { + "epoch": 4.32, + "learning_rate": 1.313783708873883e-06, + "loss": 0.1181, + "step": 115920 + }, + { + "epoch": 4.32, + "learning_rate": 1.3136136698670565e-06, + "loss": 0.1069, + "step": 115950 + }, + { + "epoch": 4.33, + "learning_rate": 1.3134436968660303e-06, + "loss": 0.1075, + "step": 115980 + }, + { + "epoch": 4.33, + "learning_rate": 1.3132737898281114e-06, + "loss": 0.0945, + "step": 116010 + }, + { + "epoch": 4.33, + "learning_rate": 1.3131039487106457e-06, + "loss": 0.1112, + "step": 116040 + }, + { + "epoch": 4.33, + "learning_rate": 1.3129341734710184e-06, + "loss": 0.1181, + "step": 116070 + }, + { + "epoch": 4.33, + "learning_rate": 1.3127644640666526e-06, + "loss": 0.0994, + "step": 116100 + }, + { + "epoch": 4.33, + "learning_rate": 1.3125948204550102e-06, + "loss": 0.0933, + "step": 116130 + }, + { + "epoch": 4.33, + "learning_rate": 1.312425242593591e-06, + "loss": 0.1108, + "step": 116160 + }, + { + "epoch": 4.33, + "learning_rate": 1.3122557304399341e-06, + "loss": 0.0987, + "step": 116190 + }, + { + "epoch": 4.33, + "learning_rate": 1.3120862839516165e-06, + "loss": 0.1096, + "step": 116220 + }, + { + "epoch": 4.34, + "learning_rate": 1.3119169030862539e-06, + "loss": 0.1133, + "step": 116250 + }, + { + "epoch": 4.34, + "learning_rate": 1.3117475878014992e-06, + "loss": 0.1315, + "step": 116280 + }, + { + "epoch": 4.34, + "learning_rate": 1.3115783380550451e-06, + "loss": 0.1295, + "step": 116310 + }, + { + "epoch": 4.34, + "learning_rate": 1.311409153804621e-06, + "loss": 0.0983, + "step": 116340 + }, + { + "epoch": 4.34, + "learning_rate": 1.3112400350079959e-06, + "loss": 0.1068, + "step": 116370 + }, + { + "epoch": 4.34, + "learning_rate": 1.3110709816229757e-06, + "loss": 0.1147, + "step": 116400 + }, + { + "epoch": 4.34, + "learning_rate": 1.3109019936074052e-06, + "loss": 0.1137, + "step": 116430 + }, + { + "epoch": 4.34, + "learning_rate": 1.3107330709191668e-06, + "loss": 0.1208, + "step": 116460 + }, + { + "epoch": 4.34, + "learning_rate": 1.310564213516181e-06, + "loss": 0.1401, + "step": 116490 + }, + { + "epoch": 4.35, + "learning_rate": 1.3103954213564058e-06, + "loss": 0.1, + "step": 116520 + }, + { + "epoch": 4.35, + "learning_rate": 1.310226694397838e-06, + "loss": 0.0975, + "step": 116550 + }, + { + "epoch": 4.35, + "learning_rate": 1.3100580325985118e-06, + "loss": 0.1046, + "step": 116580 + }, + { + "epoch": 4.35, + "learning_rate": 1.309889435916499e-06, + "loss": 0.1025, + "step": 116610 + }, + { + "epoch": 4.35, + "learning_rate": 1.3097209043099094e-06, + "loss": 0.0954, + "step": 116640 + }, + { + "epoch": 4.35, + "learning_rate": 1.3095524377368902e-06, + "loss": 0.1025, + "step": 116670 + }, + { + "epoch": 4.35, + "learning_rate": 1.3093840361556264e-06, + "loss": 0.1078, + "step": 116700 + }, + { + "epoch": 4.35, + "learning_rate": 1.3092156995243415e-06, + "loss": 0.1226, + "step": 116730 + }, + { + "epoch": 4.35, + "learning_rate": 1.3090474278012954e-06, + "loss": 0.1026, + "step": 116760 + }, + { + "epoch": 4.36, + "learning_rate": 1.308879220944786e-06, + "loss": 0.104, + "step": 116790 + }, + { + "epoch": 4.36, + "learning_rate": 1.3087110789131483e-06, + "loss": 0.0929, + "step": 116820 + }, + { + "epoch": 4.36, + "learning_rate": 1.3085430016647556e-06, + "loss": 0.1205, + "step": 116850 + }, + { + "epoch": 4.36, + "learning_rate": 1.308374989158018e-06, + "loss": 0.1218, + "step": 116880 + }, + { + "epoch": 4.36, + "learning_rate": 1.3082070413513832e-06, + "loss": 0.0964, + "step": 116910 + }, + { + "epoch": 4.36, + "learning_rate": 1.3080391582033357e-06, + "loss": 0.1315, + "step": 116940 + }, + { + "epoch": 4.36, + "learning_rate": 1.3078713396723986e-06, + "loss": 0.1172, + "step": 116970 + }, + { + "epoch": 4.36, + "learning_rate": 1.3077035857171309e-06, + "loss": 0.0963, + "step": 117000 + }, + { + "epoch": 4.36, + "learning_rate": 1.307535896296129e-06, + "loss": 0.1049, + "step": 117030 + }, + { + "epoch": 4.37, + "learning_rate": 1.3073682713680269e-06, + "loss": 0.1218, + "step": 117060 + }, + { + "epoch": 4.37, + "learning_rate": 1.3072007108914958e-06, + "loss": 0.0959, + "step": 117090 + }, + { + "epoch": 4.37, + "learning_rate": 1.3070332148252434e-06, + "loss": 0.105, + "step": 117120 + }, + { + "epoch": 4.37, + "learning_rate": 1.306865783128015e-06, + "loss": 0.1334, + "step": 117150 + }, + { + "epoch": 4.37, + "learning_rate": 1.3066984157585923e-06, + "loss": 0.11, + "step": 117180 + }, + { + "epoch": 4.37, + "learning_rate": 1.3065311126757948e-06, + "loss": 0.098, + "step": 117210 + }, + { + "epoch": 4.37, + "learning_rate": 1.3063694474317565e-06, + "loss": 0.133, + "step": 117240 + }, + { + "epoch": 4.37, + "learning_rate": 1.3062022706593288e-06, + "loss": 0.1123, + "step": 117270 + }, + { + "epoch": 4.37, + "learning_rate": 1.3060351580515726e-06, + "loss": 0.1021, + "step": 117300 + }, + { + "epoch": 4.38, + "learning_rate": 1.305868109567453e-06, + "loss": 0.1113, + "step": 117330 + }, + { + "epoch": 4.38, + "learning_rate": 1.3057011251659715e-06, + "loss": 0.1077, + "step": 117360 + }, + { + "epoch": 4.38, + "learning_rate": 1.3055342048061668e-06, + "loss": 0.1296, + "step": 117390 + }, + { + "epoch": 4.38, + "learning_rate": 1.305367348447114e-06, + "loss": 0.1151, + "step": 117420 + }, + { + "epoch": 4.38, + "learning_rate": 1.3052005560479244e-06, + "loss": 0.1305, + "step": 117450 + }, + { + "epoch": 4.38, + "learning_rate": 1.305033827567747e-06, + "loss": 0.1127, + "step": 117480 + }, + { + "epoch": 4.38, + "learning_rate": 1.3048671629657663e-06, + "loss": 0.1051, + "step": 117510 + }, + { + "epoch": 4.38, + "learning_rate": 1.3047005622012037e-06, + "loss": 0.117, + "step": 117540 + }, + { + "epoch": 4.38, + "learning_rate": 1.304534025233317e-06, + "loss": 0.0994, + "step": 117570 + }, + { + "epoch": 4.39, + "learning_rate": 1.3043675520214e-06, + "loss": 0.1318, + "step": 117600 + }, + { + "epoch": 4.39, + "learning_rate": 1.3042011425247836e-06, + "loss": 0.1427, + "step": 117630 + }, + { + "epoch": 4.39, + "learning_rate": 1.3040347967028342e-06, + "loss": 0.0987, + "step": 117660 + }, + { + "epoch": 4.39, + "learning_rate": 1.3038685145149548e-06, + "loss": 0.1065, + "step": 117690 + }, + { + "epoch": 4.39, + "learning_rate": 1.3037022959205856e-06, + "loss": 0.1003, + "step": 117720 + }, + { + "epoch": 4.39, + "learning_rate": 1.303536140879201e-06, + "loss": 0.1064, + "step": 117750 + }, + { + "epoch": 4.39, + "learning_rate": 1.303370049350313e-06, + "loss": 0.0985, + "step": 117780 + }, + { + "epoch": 4.39, + "learning_rate": 1.30320402129347e-06, + "loss": 0.1175, + "step": 117810 + }, + { + "epoch": 4.4, + "learning_rate": 1.3030380566682546e-06, + "loss": 0.1028, + "step": 117840 + }, + { + "epoch": 4.4, + "learning_rate": 1.302872155434287e-06, + "loss": 0.1041, + "step": 117870 + }, + { + "epoch": 4.4, + "learning_rate": 1.3027063175512236e-06, + "loss": 0.12, + "step": 117900 + }, + { + "epoch": 4.4, + "learning_rate": 1.3025405429787558e-06, + "loss": 0.1162, + "step": 117930 + }, + { + "epoch": 4.4, + "learning_rate": 1.3023748316766109e-06, + "loss": 0.1014, + "step": 117960 + }, + { + "epoch": 4.4, + "learning_rate": 1.3022091836045526e-06, + "loss": 0.1341, + "step": 117990 + }, + { + "epoch": 4.4, + "learning_rate": 1.3020435987223801e-06, + "loss": 0.1052, + "step": 118020 + }, + { + "epoch": 4.4, + "learning_rate": 1.3018780769899285e-06, + "loss": 0.0979, + "step": 118050 + }, + { + "epoch": 4.4, + "learning_rate": 1.301712618367069e-06, + "loss": 0.1356, + "step": 118080 + }, + { + "epoch": 4.41, + "learning_rate": 1.3015472228137074e-06, + "loss": 0.1122, + "step": 118110 + }, + { + "epoch": 4.41, + "learning_rate": 1.3013818902897862e-06, + "loss": 0.1188, + "step": 118140 + }, + { + "epoch": 4.41, + "learning_rate": 1.3012166207552833e-06, + "loss": 0.0868, + "step": 118170 + }, + { + "epoch": 4.41, + "learning_rate": 1.3010514141702115e-06, + "loss": 0.1188, + "step": 118200 + }, + { + "epoch": 4.41, + "learning_rate": 1.30088627049462e-06, + "loss": 0.1018, + "step": 118230 + }, + { + "epoch": 4.41, + "learning_rate": 1.3007211896885935e-06, + "loss": 0.1174, + "step": 118260 + }, + { + "epoch": 4.41, + "learning_rate": 1.3005561717122512e-06, + "loss": 0.0993, + "step": 118290 + }, + { + "epoch": 4.41, + "learning_rate": 1.3003912165257485e-06, + "loss": 0.1044, + "step": 118320 + }, + { + "epoch": 4.41, + "learning_rate": 1.300226324089276e-06, + "loss": 0.1051, + "step": 118350 + }, + { + "epoch": 4.42, + "learning_rate": 1.3000614943630596e-06, + "loss": 0.1175, + "step": 118380 + }, + { + "epoch": 4.42, + "learning_rate": 1.2998967273073604e-06, + "loss": 0.1079, + "step": 118410 + }, + { + "epoch": 4.42, + "learning_rate": 1.2997320228824746e-06, + "loss": 0.088, + "step": 118440 + }, + { + "epoch": 4.42, + "learning_rate": 1.2995673810487344e-06, + "loss": 0.1007, + "step": 118470 + }, + { + "epoch": 4.42, + "learning_rate": 1.2994028017665058e-06, + "loss": 0.1451, + "step": 118500 + }, + { + "epoch": 4.42, + "learning_rate": 1.2992382849961916e-06, + "loss": 0.1068, + "step": 118530 + }, + { + "epoch": 4.42, + "learning_rate": 1.299073830698228e-06, + "loss": 0.0986, + "step": 118560 + }, + { + "epoch": 4.42, + "learning_rate": 1.2989094388330875e-06, + "loss": 0.1178, + "step": 118590 + }, + { + "epoch": 4.42, + "learning_rate": 1.2987451093612773e-06, + "loss": 0.1121, + "step": 118620 + }, + { + "epoch": 4.43, + "learning_rate": 1.2985808422433387e-06, + "loss": 0.1215, + "step": 118650 + }, + { + "epoch": 4.43, + "learning_rate": 1.2984166374398496e-06, + "loss": 0.1104, + "step": 118680 + }, + { + "epoch": 4.43, + "learning_rate": 1.2982524949114212e-06, + "loss": 0.1247, + "step": 118710 + }, + { + "epoch": 4.43, + "learning_rate": 1.2980884146187004e-06, + "loss": 0.1251, + "step": 118740 + }, + { + "epoch": 4.43, + "learning_rate": 1.297924396522368e-06, + "loss": 0.1199, + "step": 118770 + }, + { + "epoch": 4.43, + "learning_rate": 1.2977604405831415e-06, + "loss": 0.1071, + "step": 118800 + }, + { + "epoch": 4.43, + "learning_rate": 1.297596546761771e-06, + "loss": 0.0898, + "step": 118830 + }, + { + "epoch": 4.43, + "learning_rate": 1.2974327150190424e-06, + "loss": 0.1061, + "step": 118860 + }, + { + "epoch": 4.43, + "learning_rate": 1.2972689453157762e-06, + "loss": 0.1134, + "step": 118890 + }, + { + "epoch": 4.44, + "learning_rate": 1.2971052376128267e-06, + "loss": 0.1091, + "step": 118920 + }, + { + "epoch": 4.44, + "learning_rate": 1.296947045731291e-06, + "loss": 0.1343, + "step": 118950 + }, + { + "epoch": 4.44, + "learning_rate": 1.2967834598482373e-06, + "loss": 0.1069, + "step": 118980 + }, + { + "epoch": 4.44, + "learning_rate": 1.2966199358495734e-06, + "loss": 0.1203, + "step": 119010 + }, + { + "epoch": 4.44, + "learning_rate": 1.2964564736962915e-06, + "loss": 0.1226, + "step": 119040 + }, + { + "epoch": 4.44, + "learning_rate": 1.2962930733494176e-06, + "loss": 0.1056, + "step": 119070 + }, + { + "epoch": 4.44, + "learning_rate": 1.2961297347700125e-06, + "loss": 0.1027, + "step": 119100 + }, + { + "epoch": 4.44, + "learning_rate": 1.2959664579191709e-06, + "loss": 0.0915, + "step": 119130 + }, + { + "epoch": 4.44, + "learning_rate": 1.295803242758022e-06, + "loss": 0.0957, + "step": 119160 + }, + { + "epoch": 4.45, + "learning_rate": 1.2956400892477298e-06, + "loss": 0.1198, + "step": 119190 + }, + { + "epoch": 4.45, + "learning_rate": 1.2954769973494915e-06, + "loss": 0.1063, + "step": 119220 + }, + { + "epoch": 4.45, + "learning_rate": 1.2953139670245392e-06, + "loss": 0.0955, + "step": 119250 + }, + { + "epoch": 4.45, + "learning_rate": 1.2951509982341392e-06, + "loss": 0.1139, + "step": 119280 + }, + { + "epoch": 4.45, + "learning_rate": 1.2949880909395915e-06, + "loss": 0.124, + "step": 119310 + }, + { + "epoch": 4.45, + "learning_rate": 1.2948252451022304e-06, + "loss": 0.1036, + "step": 119340 + }, + { + "epoch": 4.45, + "learning_rate": 1.2946624606834235e-06, + "loss": 0.0989, + "step": 119370 + }, + { + "epoch": 4.45, + "learning_rate": 1.294499737644574e-06, + "loss": 0.135, + "step": 119400 + }, + { + "epoch": 4.45, + "learning_rate": 1.2943370759471175e-06, + "loss": 0.1173, + "step": 119430 + }, + { + "epoch": 4.46, + "learning_rate": 1.2941744755525243e-06, + "loss": 0.1293, + "step": 119460 + }, + { + "epoch": 4.46, + "learning_rate": 1.2940119364222983e-06, + "loss": 0.1396, + "step": 119490 + }, + { + "epoch": 4.46, + "learning_rate": 1.2938494585179772e-06, + "loss": 0.1242, + "step": 119520 + }, + { + "epoch": 4.46, + "learning_rate": 1.293687041801133e-06, + "loss": 0.1183, + "step": 119550 + }, + { + "epoch": 4.46, + "learning_rate": 1.2935246862333709e-06, + "loss": 0.1215, + "step": 119580 + }, + { + "epoch": 4.46, + "learning_rate": 1.2933623917763297e-06, + "loss": 0.1166, + "step": 119610 + }, + { + "epoch": 4.46, + "learning_rate": 1.2932001583916825e-06, + "loss": 0.1092, + "step": 119640 + }, + { + "epoch": 4.46, + "learning_rate": 1.2930379860411354e-06, + "loss": 0.1093, + "step": 119670 + }, + { + "epoch": 4.46, + "learning_rate": 1.292875874686429e-06, + "loss": 0.103, + "step": 119700 + }, + { + "epoch": 4.47, + "learning_rate": 1.2927138242893364e-06, + "loss": 0.1017, + "step": 119730 + }, + { + "epoch": 4.47, + "learning_rate": 1.2925518348116648e-06, + "loss": 0.1116, + "step": 119760 + }, + { + "epoch": 4.47, + "learning_rate": 1.2923899062152547e-06, + "loss": 0.1123, + "step": 119790 + }, + { + "epoch": 4.47, + "learning_rate": 1.2922280384619804e-06, + "loss": 0.1173, + "step": 119820 + }, + { + "epoch": 4.47, + "learning_rate": 1.2920662315137495e-06, + "loss": 0.0982, + "step": 119850 + }, + { + "epoch": 4.47, + "learning_rate": 1.2919044853325025e-06, + "loss": 0.1055, + "step": 119880 + }, + { + "epoch": 4.47, + "learning_rate": 1.291742799880214e-06, + "loss": 0.0932, + "step": 119910 + }, + { + "epoch": 4.47, + "learning_rate": 1.291581175118891e-06, + "loss": 0.1237, + "step": 119940 + }, + { + "epoch": 4.47, + "learning_rate": 1.2914196110105747e-06, + "loss": 0.1194, + "step": 119970 + }, + { + "epoch": 4.48, + "learning_rate": 1.2912581075173393e-06, + "loss": 0.1114, + "step": 120000 + }, + { + "epoch": 4.48, + "learning_rate": 1.2910966646012915e-06, + "loss": 0.1114, + "step": 120030 + }, + { + "epoch": 4.48, + "learning_rate": 1.2909352822245722e-06, + "loss": 0.1029, + "step": 120060 + }, + { + "epoch": 4.48, + "learning_rate": 1.290773960349354e-06, + "loss": 0.1043, + "step": 120090 + }, + { + "epoch": 4.48, + "learning_rate": 1.2906126989378443e-06, + "loss": 0.1089, + "step": 120120 + }, + { + "epoch": 4.48, + "learning_rate": 1.2904514979522822e-06, + "loss": 0.09, + "step": 120150 + }, + { + "epoch": 4.48, + "learning_rate": 1.2902903573549407e-06, + "loss": 0.0915, + "step": 120180 + }, + { + "epoch": 4.48, + "learning_rate": 1.2901292771081256e-06, + "loss": 0.0993, + "step": 120210 + }, + { + "epoch": 4.48, + "learning_rate": 1.2899682571741744e-06, + "loss": 0.1236, + "step": 120240 + }, + { + "epoch": 4.49, + "learning_rate": 1.2898072975154596e-06, + "loss": 0.096, + "step": 120270 + }, + { + "epoch": 4.49, + "learning_rate": 1.289646398094385e-06, + "loss": 0.1118, + "step": 120300 + }, + { + "epoch": 4.49, + "learning_rate": 1.2894855588733873e-06, + "loss": 0.1015, + "step": 120330 + }, + { + "epoch": 4.49, + "learning_rate": 1.2893247798149368e-06, + "loss": 0.1416, + "step": 120360 + }, + { + "epoch": 4.49, + "learning_rate": 1.2891640608815364e-06, + "loss": 0.1075, + "step": 120390 + }, + { + "epoch": 4.49, + "learning_rate": 1.289003402035721e-06, + "loss": 0.1143, + "step": 120420 + }, + { + "epoch": 4.49, + "learning_rate": 1.2888428032400585e-06, + "loss": 0.0982, + "step": 120450 + }, + { + "epoch": 4.49, + "learning_rate": 1.2886822644571501e-06, + "loss": 0.1061, + "step": 120480 + }, + { + "epoch": 4.49, + "learning_rate": 1.2885217856496285e-06, + "loss": 0.1185, + "step": 120510 + }, + { + "epoch": 4.5, + "learning_rate": 1.2883613667801595e-06, + "loss": 0.1238, + "step": 120540 + }, + { + "epoch": 4.5, + "learning_rate": 1.288201007811442e-06, + "loss": 0.1035, + "step": 120570 + }, + { + "epoch": 4.5, + "learning_rate": 1.2880407087062064e-06, + "loss": 0.1049, + "step": 120600 + }, + { + "epoch": 4.5, + "learning_rate": 1.2878804694272156e-06, + "loss": 0.1034, + "step": 120630 + }, + { + "epoch": 4.5, + "learning_rate": 1.2877202899372663e-06, + "loss": 0.113, + "step": 120660 + }, + { + "epoch": 4.5, + "learning_rate": 1.287560170199186e-06, + "loss": 0.1191, + "step": 120690 + }, + { + "epoch": 4.5, + "learning_rate": 1.287400110175835e-06, + "loss": 0.1073, + "step": 120720 + }, + { + "epoch": 4.5, + "learning_rate": 1.2872401098301063e-06, + "loss": 0.113, + "step": 120750 + }, + { + "epoch": 4.5, + "learning_rate": 1.287080169124925e-06, + "loss": 0.0863, + "step": 120780 + }, + { + "epoch": 4.51, + "learning_rate": 1.2869202880232477e-06, + "loss": 0.113, + "step": 120810 + }, + { + "epoch": 4.51, + "learning_rate": 1.2867604664880645e-06, + "loss": 0.1062, + "step": 120840 + }, + { + "epoch": 4.51, + "learning_rate": 1.2866007044823972e-06, + "loss": 0.1116, + "step": 120870 + }, + { + "epoch": 4.51, + "learning_rate": 1.286441001969299e-06, + "loss": 0.0937, + "step": 120900 + }, + { + "epoch": 4.51, + "learning_rate": 1.2862813589118562e-06, + "loss": 0.1041, + "step": 120930 + }, + { + "epoch": 4.51, + "learning_rate": 1.286121775273186e-06, + "loss": 0.0942, + "step": 120960 + }, + { + "epoch": 4.51, + "learning_rate": 1.285962251016439e-06, + "loss": 0.1193, + "step": 120990 + }, + { + "epoch": 4.51, + "learning_rate": 1.2858027861047972e-06, + "loss": 0.0999, + "step": 121020 + }, + { + "epoch": 4.51, + "learning_rate": 1.285643380501474e-06, + "loss": 0.1127, + "step": 121050 + }, + { + "epoch": 4.52, + "learning_rate": 1.2854840341697152e-06, + "loss": 0.1086, + "step": 121080 + }, + { + "epoch": 4.52, + "learning_rate": 1.285324747072799e-06, + "loss": 0.1131, + "step": 121110 + }, + { + "epoch": 4.52, + "learning_rate": 1.2851655191740345e-06, + "loss": 0.1009, + "step": 121140 + }, + { + "epoch": 4.52, + "learning_rate": 1.285006350436763e-06, + "loss": 0.119, + "step": 121170 + }, + { + "epoch": 4.52, + "learning_rate": 1.2848525435259235e-06, + "loss": 0.0867, + "step": 121200 + }, + { + "epoch": 4.52, + "learning_rate": 1.2846934910327693e-06, + "loss": 0.1177, + "step": 121230 + }, + { + "epoch": 4.52, + "learning_rate": 1.2845344975925409e-06, + "loss": 0.1146, + "step": 121260 + }, + { + "epoch": 4.52, + "learning_rate": 1.2843755631687056e-06, + "loss": 0.1109, + "step": 121290 + }, + { + "epoch": 4.52, + "learning_rate": 1.2842166877247616e-06, + "loss": 0.1045, + "step": 121320 + }, + { + "epoch": 4.53, + "learning_rate": 1.28405787122424e-06, + "loss": 0.0967, + "step": 121350 + }, + { + "epoch": 4.53, + "learning_rate": 1.2838991136307022e-06, + "loss": 0.1022, + "step": 121380 + }, + { + "epoch": 4.53, + "learning_rate": 1.283740414907742e-06, + "loss": 0.1271, + "step": 121410 + }, + { + "epoch": 4.53, + "learning_rate": 1.2835817750189845e-06, + "loss": 0.1612, + "step": 121440 + }, + { + "epoch": 4.53, + "learning_rate": 1.2834231939280856e-06, + "loss": 0.1181, + "step": 121470 + }, + { + "epoch": 4.53, + "learning_rate": 1.2832646715987334e-06, + "loss": 0.125, + "step": 121500 + }, + { + "epoch": 4.53, + "learning_rate": 1.283106207994647e-06, + "loss": 0.113, + "step": 121530 + }, + { + "epoch": 4.53, + "learning_rate": 1.2829478030795776e-06, + "loss": 0.109, + "step": 121560 + }, + { + "epoch": 4.53, + "learning_rate": 1.282789456817306e-06, + "loss": 0.112, + "step": 121590 + }, + { + "epoch": 4.54, + "learning_rate": 1.2826311691716465e-06, + "loss": 0.1436, + "step": 121620 + }, + { + "epoch": 4.54, + "learning_rate": 1.282472940106443e-06, + "loss": 0.0931, + "step": 121650 + }, + { + "epoch": 4.54, + "learning_rate": 1.2823147695855707e-06, + "loss": 0.1049, + "step": 121680 + }, + { + "epoch": 4.54, + "learning_rate": 1.282156657572937e-06, + "loss": 0.0898, + "step": 121710 + }, + { + "epoch": 4.54, + "learning_rate": 1.2819986040324803e-06, + "loss": 0.0961, + "step": 121740 + }, + { + "epoch": 4.54, + "learning_rate": 1.2818406089281688e-06, + "loss": 0.1009, + "step": 121770 + }, + { + "epoch": 4.54, + "learning_rate": 1.2816826722240031e-06, + "loss": 0.0844, + "step": 121800 + }, + { + "epoch": 4.54, + "learning_rate": 1.2815247938840142e-06, + "loss": 0.1061, + "step": 121830 + }, + { + "epoch": 4.54, + "learning_rate": 1.2813669738722644e-06, + "loss": 0.113, + "step": 121860 + }, + { + "epoch": 4.55, + "learning_rate": 1.281209212152847e-06, + "loss": 0.098, + "step": 121890 + }, + { + "epoch": 4.55, + "learning_rate": 1.2810515086898859e-06, + "loss": 0.1026, + "step": 121920 + }, + { + "epoch": 4.55, + "learning_rate": 1.2808938634475362e-06, + "loss": 0.1044, + "step": 121950 + }, + { + "epoch": 4.55, + "learning_rate": 1.2807362763899839e-06, + "loss": 0.1047, + "step": 121980 + }, + { + "epoch": 4.55, + "learning_rate": 1.2805787474814456e-06, + "loss": 0.1011, + "step": 122010 + }, + { + "epoch": 4.55, + "learning_rate": 1.280421276686169e-06, + "loss": 0.0955, + "step": 122040 + }, + { + "epoch": 4.55, + "learning_rate": 1.2802638639684322e-06, + "loss": 0.0999, + "step": 122070 + }, + { + "epoch": 4.55, + "learning_rate": 1.280106509292544e-06, + "loss": 0.1088, + "step": 122100 + }, + { + "epoch": 4.56, + "learning_rate": 1.279949212622845e-06, + "loss": 0.1164, + "step": 122130 + }, + { + "epoch": 4.56, + "learning_rate": 1.2797919739237045e-06, + "loss": 0.0978, + "step": 122160 + }, + { + "epoch": 4.56, + "learning_rate": 1.2796347931595244e-06, + "loss": 0.1141, + "step": 122190 + }, + { + "epoch": 4.56, + "learning_rate": 1.2794776702947356e-06, + "loss": 0.1212, + "step": 122220 + }, + { + "epoch": 4.56, + "learning_rate": 1.2793206052938007e-06, + "loss": 0.1063, + "step": 122250 + }, + { + "epoch": 4.56, + "learning_rate": 1.2791635981212126e-06, + "loss": 0.1107, + "step": 122280 + }, + { + "epoch": 4.56, + "learning_rate": 1.2790066487414936e-06, + "loss": 0.1031, + "step": 122310 + }, + { + "epoch": 4.56, + "learning_rate": 1.2788497571191988e-06, + "loss": 0.1171, + "step": 122340 + }, + { + "epoch": 4.56, + "learning_rate": 1.2786929232189112e-06, + "loss": 0.1054, + "step": 122370 + }, + { + "epoch": 4.57, + "learning_rate": 1.2785361470052457e-06, + "loss": 0.1144, + "step": 122400 + }, + { + "epoch": 4.57, + "learning_rate": 1.2783794284428473e-06, + "loss": 0.0912, + "step": 122430 + }, + { + "epoch": 4.57, + "learning_rate": 1.2782227674963907e-06, + "loss": 0.0941, + "step": 122460 + }, + { + "epoch": 4.57, + "learning_rate": 1.2780661641305817e-06, + "loss": 0.1035, + "step": 122490 + }, + { + "epoch": 4.57, + "learning_rate": 1.2779096183101562e-06, + "loss": 0.1032, + "step": 122520 + }, + { + "epoch": 4.57, + "learning_rate": 1.27775312999988e-06, + "loss": 0.1078, + "step": 122550 + }, + { + "epoch": 4.57, + "learning_rate": 1.2775966991645491e-06, + "loss": 0.0942, + "step": 122580 + }, + { + "epoch": 4.57, + "learning_rate": 1.2774403257689902e-06, + "loss": 0.1285, + "step": 122610 + }, + { + "epoch": 4.57, + "learning_rate": 1.2772840097780594e-06, + "loss": 0.0951, + "step": 122640 + }, + { + "epoch": 4.58, + "learning_rate": 1.2771277511566435e-06, + "loss": 0.1057, + "step": 122670 + }, + { + "epoch": 4.58, + "learning_rate": 1.276971549869659e-06, + "loss": 0.1358, + "step": 122700 + }, + { + "epoch": 4.58, + "learning_rate": 1.2768154058820525e-06, + "loss": 0.1089, + "step": 122730 + }, + { + "epoch": 4.58, + "learning_rate": 1.2766593191588008e-06, + "loss": 0.0994, + "step": 122760 + }, + { + "epoch": 4.58, + "learning_rate": 1.2765032896649106e-06, + "loss": 0.0995, + "step": 122790 + }, + { + "epoch": 4.58, + "learning_rate": 1.276347317365418e-06, + "loss": 0.1219, + "step": 122820 + }, + { + "epoch": 4.58, + "learning_rate": 1.2761914022253899e-06, + "loss": 0.1226, + "step": 122850 + }, + { + "epoch": 4.58, + "learning_rate": 1.2760355442099224e-06, + "loss": 0.1121, + "step": 122880 + }, + { + "epoch": 4.58, + "learning_rate": 1.2758797432841417e-06, + "loss": 0.1174, + "step": 122910 + }, + { + "epoch": 4.59, + "learning_rate": 1.275723999413203e-06, + "loss": 0.117, + "step": 122940 + }, + { + "epoch": 4.59, + "learning_rate": 1.2755683125622934e-06, + "loss": 0.1104, + "step": 122970 + }, + { + "epoch": 4.59, + "learning_rate": 1.2754126826966274e-06, + "loss": 0.122, + "step": 123000 + }, + { + "epoch": 4.59, + "learning_rate": 1.2752571097814504e-06, + "loss": 0.1165, + "step": 123030 + }, + { + "epoch": 4.59, + "learning_rate": 1.2751015937820368e-06, + "loss": 0.1147, + "step": 123060 + }, + { + "epoch": 4.59, + "learning_rate": 1.2749461346636915e-06, + "loss": 0.1127, + "step": 123090 + }, + { + "epoch": 4.59, + "learning_rate": 1.2747907323917488e-06, + "loss": 0.1077, + "step": 123120 + }, + { + "epoch": 4.59, + "learning_rate": 1.2746353869315714e-06, + "loss": 0.1128, + "step": 123150 + }, + { + "epoch": 4.59, + "learning_rate": 1.274480098248553e-06, + "loss": 0.109, + "step": 123180 + }, + { + "epoch": 4.6, + "learning_rate": 1.2743248663081165e-06, + "loss": 0.1067, + "step": 123210 + }, + { + "epoch": 4.6, + "learning_rate": 1.2741696910757134e-06, + "loss": 0.1056, + "step": 123240 + }, + { + "epoch": 4.6, + "learning_rate": 1.2740145725168258e-06, + "loss": 0.1165, + "step": 123270 + }, + { + "epoch": 4.6, + "learning_rate": 1.2738595105969641e-06, + "loss": 0.1169, + "step": 123300 + }, + { + "epoch": 4.6, + "learning_rate": 1.2737045052816693e-06, + "loss": 0.1161, + "step": 123330 + }, + { + "epoch": 4.6, + "learning_rate": 1.2735495565365108e-06, + "loss": 0.1099, + "step": 123360 + }, + { + "epoch": 4.6, + "learning_rate": 1.2733946643270871e-06, + "loss": 0.1046, + "step": 123390 + }, + { + "epoch": 4.6, + "learning_rate": 1.2732398286190276e-06, + "loss": 0.11, + "step": 123420 + }, + { + "epoch": 4.6, + "learning_rate": 1.2730850493779893e-06, + "loss": 0.1129, + "step": 123450 + }, + { + "epoch": 4.61, + "learning_rate": 1.2729303265696585e-06, + "loss": 0.0978, + "step": 123480 + }, + { + "epoch": 4.61, + "learning_rate": 1.2727756601597516e-06, + "loss": 0.1122, + "step": 123510 + }, + { + "epoch": 4.61, + "learning_rate": 1.2726210501140138e-06, + "loss": 0.1107, + "step": 123540 + }, + { + "epoch": 4.61, + "learning_rate": 1.272466496398219e-06, + "loss": 0.1242, + "step": 123570 + }, + { + "epoch": 4.61, + "learning_rate": 1.2723119989781708e-06, + "loss": 0.1147, + "step": 123600 + }, + { + "epoch": 4.61, + "learning_rate": 1.2721575578197013e-06, + "loss": 0.1031, + "step": 123630 + }, + { + "epoch": 4.61, + "learning_rate": 1.2720031728886717e-06, + "loss": 0.118, + "step": 123660 + }, + { + "epoch": 4.61, + "learning_rate": 1.271848844150973e-06, + "loss": 0.1143, + "step": 123690 + }, + { + "epoch": 4.61, + "learning_rate": 1.2716945715725241e-06, + "loss": 0.1111, + "step": 123720 + }, + { + "epoch": 4.62, + "learning_rate": 1.2715403551192732e-06, + "loss": 0.0922, + "step": 123750 + }, + { + "epoch": 4.62, + "learning_rate": 1.2713861947571978e-06, + "loss": 0.1152, + "step": 123780 + }, + { + "epoch": 4.62, + "learning_rate": 1.2712320904523037e-06, + "loss": 0.1104, + "step": 123810 + }, + { + "epoch": 4.62, + "learning_rate": 1.2710780421706256e-06, + "loss": 0.1041, + "step": 123840 + }, + { + "epoch": 4.62, + "learning_rate": 1.270924049878228e-06, + "loss": 0.1147, + "step": 123870 + }, + { + "epoch": 4.62, + "learning_rate": 1.2707701135412023e-06, + "loss": 0.1253, + "step": 123900 + }, + { + "epoch": 4.62, + "learning_rate": 1.27061623312567e-06, + "loss": 0.0983, + "step": 123930 + }, + { + "epoch": 4.62, + "learning_rate": 1.2704624085977816e-06, + "loss": 0.1467, + "step": 123960 + }, + { + "epoch": 4.62, + "learning_rate": 1.2703086399237149e-06, + "loss": 0.1096, + "step": 123990 + }, + { + "epoch": 4.63, + "learning_rate": 1.2701549270696776e-06, + "loss": 0.1055, + "step": 124020 + }, + { + "epoch": 4.63, + "learning_rate": 1.270001270001905e-06, + "loss": 0.0958, + "step": 124050 + }, + { + "epoch": 4.63, + "learning_rate": 1.2698476686866623e-06, + "loss": 0.1173, + "step": 124080 + }, + { + "epoch": 4.63, + "learning_rate": 1.2696941230902421e-06, + "loss": 0.1011, + "step": 124110 + }, + { + "epoch": 4.63, + "learning_rate": 1.2695406331789656e-06, + "loss": 0.0969, + "step": 124140 + }, + { + "epoch": 4.63, + "learning_rate": 1.2693871989191832e-06, + "loss": 0.1129, + "step": 124170 + }, + { + "epoch": 4.63, + "learning_rate": 1.2692338202772736e-06, + "loss": 0.0921, + "step": 124200 + }, + { + "epoch": 4.63, + "learning_rate": 1.2690804972196432e-06, + "loss": 0.1043, + "step": 124230 + }, + { + "epoch": 4.63, + "learning_rate": 1.2689272297127274e-06, + "loss": 0.1239, + "step": 124260 + }, + { + "epoch": 4.64, + "learning_rate": 1.26877401772299e-06, + "loss": 0.1071, + "step": 124290 + }, + { + "epoch": 4.64, + "learning_rate": 1.2686208612169226e-06, + "loss": 0.106, + "step": 124320 + }, + { + "epoch": 4.64, + "learning_rate": 1.268467760161046e-06, + "loss": 0.0871, + "step": 124350 + }, + { + "epoch": 4.64, + "learning_rate": 1.2683147145219085e-06, + "loss": 0.0989, + "step": 124380 + }, + { + "epoch": 4.64, + "learning_rate": 1.268161724266087e-06, + "loss": 0.1087, + "step": 124410 + }, + { + "epoch": 4.64, + "learning_rate": 1.2680138862989871e-06, + "loss": 0.1111, + "step": 124440 + }, + { + "epoch": 4.64, + "learning_rate": 1.2678610048662932e-06, + "loss": 0.1169, + "step": 124470 + }, + { + "epoch": 4.64, + "learning_rate": 1.2677081787179258e-06, + "loss": 0.1131, + "step": 124500 + }, + { + "epoch": 4.64, + "learning_rate": 1.2675554078205732e-06, + "loss": 0.0931, + "step": 124530 + }, + { + "epoch": 4.65, + "learning_rate": 1.2674026921409524e-06, + "loss": 0.1193, + "step": 124560 + }, + { + "epoch": 4.65, + "learning_rate": 1.267250031645808e-06, + "loss": 0.1002, + "step": 124590 + }, + { + "epoch": 4.65, + "learning_rate": 1.267097426301913e-06, + "loss": 0.1363, + "step": 124620 + }, + { + "epoch": 4.65, + "learning_rate": 1.2669448760760672e-06, + "loss": 0.1128, + "step": 124650 + }, + { + "epoch": 4.65, + "learning_rate": 1.2667923809351006e-06, + "loss": 0.1391, + "step": 124680 + }, + { + "epoch": 4.65, + "learning_rate": 1.2666399408458692e-06, + "loss": 0.1102, + "step": 124710 + }, + { + "epoch": 4.65, + "learning_rate": 1.2664875557752578e-06, + "loss": 0.1066, + "step": 124740 + }, + { + "epoch": 4.65, + "learning_rate": 1.2663352256901788e-06, + "loss": 0.093, + "step": 124770 + }, + { + "epoch": 4.65, + "learning_rate": 1.2661829505575724e-06, + "loss": 0.1056, + "step": 124800 + }, + { + "epoch": 4.66, + "learning_rate": 1.266030730344407e-06, + "loss": 0.1005, + "step": 124830 + }, + { + "epoch": 4.66, + "learning_rate": 1.265878565017678e-06, + "loss": 0.1037, + "step": 124860 + }, + { + "epoch": 4.66, + "learning_rate": 1.2657264545444095e-06, + "loss": 0.1174, + "step": 124890 + }, + { + "epoch": 4.66, + "learning_rate": 1.2655743988916528e-06, + "loss": 0.1028, + "step": 124920 + }, + { + "epoch": 4.66, + "learning_rate": 1.2654223980264873e-06, + "loss": 0.1058, + "step": 124950 + }, + { + "epoch": 4.66, + "learning_rate": 1.2652704519160192e-06, + "loss": 0.1087, + "step": 124980 + }, + { + "epoch": 4.66, + "learning_rate": 1.2651185605273832e-06, + "loss": 0.1167, + "step": 125010 + }, + { + "epoch": 4.66, + "learning_rate": 1.2649667238277418e-06, + "loss": 0.1114, + "step": 125040 + }, + { + "epoch": 4.66, + "learning_rate": 1.2648149417842841e-06, + "loss": 0.088, + "step": 125070 + }, + { + "epoch": 4.67, + "learning_rate": 1.2646632143642272e-06, + "loss": 0.1024, + "step": 125100 + }, + { + "epoch": 4.67, + "learning_rate": 1.2645115415348164e-06, + "loss": 0.091, + "step": 125130 + }, + { + "epoch": 4.67, + "learning_rate": 1.2643599232633234e-06, + "loss": 0.0915, + "step": 125160 + }, + { + "epoch": 4.67, + "learning_rate": 1.2642083595170476e-06, + "loss": 0.0983, + "step": 125190 + }, + { + "epoch": 4.67, + "learning_rate": 1.2640568502633168e-06, + "loss": 0.1039, + "step": 125220 + }, + { + "epoch": 4.67, + "learning_rate": 1.263905395469485e-06, + "loss": 0.1241, + "step": 125250 + }, + { + "epoch": 4.67, + "learning_rate": 1.263753995102934e-06, + "loss": 0.113, + "step": 125280 + }, + { + "epoch": 4.67, + "learning_rate": 1.2636026491310733e-06, + "loss": 0.1267, + "step": 125310 + }, + { + "epoch": 4.67, + "learning_rate": 1.263451357521339e-06, + "loss": 0.1088, + "step": 125340 + }, + { + "epoch": 4.68, + "learning_rate": 1.2633001202411954e-06, + "loss": 0.1209, + "step": 125370 + }, + { + "epoch": 4.68, + "learning_rate": 1.2631489372581335e-06, + "loss": 0.1221, + "step": 125400 + }, + { + "epoch": 4.68, + "learning_rate": 1.2630028452896997e-06, + "loss": 0.1268, + "step": 125430 + }, + { + "epoch": 4.68, + "learning_rate": 1.2628517689961675e-06, + "loss": 0.1132, + "step": 125460 + }, + { + "epoch": 4.68, + "learning_rate": 1.2627007469034332e-06, + "loss": 0.1092, + "step": 125490 + }, + { + "epoch": 4.68, + "learning_rate": 1.2625497789790967e-06, + "loss": 0.0943, + "step": 125520 + }, + { + "epoch": 4.68, + "learning_rate": 1.2623988651907834e-06, + "loss": 0.1242, + "step": 125550 + }, + { + "epoch": 4.68, + "learning_rate": 1.2622480055061464e-06, + "loss": 0.1133, + "step": 125580 + }, + { + "epoch": 4.68, + "learning_rate": 1.262097199892866e-06, + "loss": 0.1141, + "step": 125610 + }, + { + "epoch": 4.69, + "learning_rate": 1.2619464483186494e-06, + "loss": 0.1023, + "step": 125640 + }, + { + "epoch": 4.69, + "learning_rate": 1.2617957507512305e-06, + "loss": 0.0932, + "step": 125670 + }, + { + "epoch": 4.69, + "learning_rate": 1.2616451071583708e-06, + "loss": 0.1173, + "step": 125700 + }, + { + "epoch": 4.69, + "learning_rate": 1.2614945175078577e-06, + "loss": 0.1018, + "step": 125730 + }, + { + "epoch": 4.69, + "learning_rate": 1.2613439817675066e-06, + "loss": 0.1226, + "step": 125760 + }, + { + "epoch": 4.69, + "learning_rate": 1.2611934999051594e-06, + "loss": 0.0986, + "step": 125790 + }, + { + "epoch": 4.69, + "learning_rate": 1.2610430718886849e-06, + "loss": 0.1193, + "step": 125820 + }, + { + "epoch": 4.69, + "learning_rate": 1.260892697685978e-06, + "loss": 0.0996, + "step": 125850 + }, + { + "epoch": 4.69, + "learning_rate": 1.2607423772649616e-06, + "loss": 0.0975, + "step": 125880 + }, + { + "epoch": 4.7, + "learning_rate": 1.2605921105935848e-06, + "loss": 0.1183, + "step": 125910 + }, + { + "epoch": 4.7, + "learning_rate": 1.2604418976398225e-06, + "loss": 0.1177, + "step": 125940 + }, + { + "epoch": 4.7, + "learning_rate": 1.2602917383716784e-06, + "loss": 0.1219, + "step": 125970 + }, + { + "epoch": 4.7, + "learning_rate": 1.2601416327571808e-06, + "loss": 0.1232, + "step": 126000 + }, + { + "epoch": 4.7, + "learning_rate": 1.2599915807643858e-06, + "loss": 0.108, + "step": 126030 + }, + { + "epoch": 4.7, + "learning_rate": 1.2598415823613763e-06, + "loss": 0.1135, + "step": 126060 + }, + { + "epoch": 4.7, + "learning_rate": 1.2596916375162605e-06, + "loss": 0.1191, + "step": 126090 + }, + { + "epoch": 4.7, + "learning_rate": 1.2595417461971747e-06, + "loss": 0.1177, + "step": 126120 + }, + { + "epoch": 4.7, + "learning_rate": 1.2593919083722806e-06, + "loss": 0.1073, + "step": 126150 + }, + { + "epoch": 4.71, + "learning_rate": 1.2592421240097672e-06, + "loss": 0.1122, + "step": 126180 + }, + { + "epoch": 4.71, + "learning_rate": 1.2590923930778489e-06, + "loss": 0.1189, + "step": 126210 + }, + { + "epoch": 4.71, + "learning_rate": 1.2589427155447684e-06, + "loss": 0.1024, + "step": 126240 + }, + { + "epoch": 4.71, + "learning_rate": 1.2587930913787927e-06, + "loss": 0.115, + "step": 126270 + }, + { + "epoch": 4.71, + "learning_rate": 1.2586435205482167e-06, + "loss": 0.0978, + "step": 126300 + }, + { + "epoch": 4.71, + "learning_rate": 1.258494003021361e-06, + "loss": 0.109, + "step": 126330 + }, + { + "epoch": 4.71, + "learning_rate": 1.2583445387665727e-06, + "loss": 0.1314, + "step": 126360 + }, + { + "epoch": 4.71, + "learning_rate": 1.2581951277522253e-06, + "loss": 0.1095, + "step": 126390 + }, + { + "epoch": 4.72, + "learning_rate": 1.2580457699467181e-06, + "loss": 0.1293, + "step": 126420 + }, + { + "epoch": 4.72, + "learning_rate": 1.2578964653184774e-06, + "loss": 0.1144, + "step": 126450 + }, + { + "epoch": 4.72, + "learning_rate": 1.2577472138359553e-06, + "loss": 0.1177, + "step": 126480 + }, + { + "epoch": 4.72, + "learning_rate": 1.25759801546763e-06, + "loss": 0.0867, + "step": 126510 + }, + { + "epoch": 4.72, + "learning_rate": 1.2574488701820063e-06, + "loss": 0.0993, + "step": 126540 + }, + { + "epoch": 4.72, + "learning_rate": 1.2572997779476142e-06, + "loss": 0.1046, + "step": 126570 + }, + { + "epoch": 4.72, + "learning_rate": 1.2571507387330115e-06, + "loss": 0.1035, + "step": 126600 + }, + { + "epoch": 4.72, + "learning_rate": 1.2570017525067802e-06, + "loss": 0.1182, + "step": 126630 + }, + { + "epoch": 4.72, + "learning_rate": 1.2568528192375293e-06, + "loss": 0.0898, + "step": 126660 + }, + { + "epoch": 4.73, + "learning_rate": 1.256703938893894e-06, + "loss": 0.1042, + "step": 126690 + }, + { + "epoch": 4.73, + "learning_rate": 1.2565551114445355e-06, + "loss": 0.1312, + "step": 126720 + }, + { + "epoch": 4.73, + "learning_rate": 1.25640633685814e-06, + "loss": 0.1011, + "step": 126750 + }, + { + "epoch": 4.73, + "learning_rate": 1.2562576151034208e-06, + "loss": 0.0966, + "step": 126780 + }, + { + "epoch": 4.73, + "learning_rate": 1.2561089461491165e-06, + "loss": 0.1087, + "step": 126810 + }, + { + "epoch": 4.73, + "learning_rate": 1.2559603299639922e-06, + "loss": 0.1019, + "step": 126840 + }, + { + "epoch": 4.73, + "learning_rate": 1.255811766516838e-06, + "loss": 0.1032, + "step": 126870 + }, + { + "epoch": 4.73, + "learning_rate": 1.25566325577647e-06, + "loss": 0.1446, + "step": 126900 + }, + { + "epoch": 4.73, + "learning_rate": 1.255514797711731e-06, + "loss": 0.085, + "step": 126930 + }, + { + "epoch": 4.74, + "learning_rate": 1.2553663922914887e-06, + "loss": 0.1077, + "step": 126960 + }, + { + "epoch": 4.74, + "learning_rate": 1.2552180394846366e-06, + "loss": 0.1019, + "step": 126990 + }, + { + "epoch": 4.74, + "learning_rate": 1.2550697392600945e-06, + "loss": 0.1007, + "step": 127020 + }, + { + "epoch": 4.74, + "learning_rate": 1.2549214915868075e-06, + "loss": 0.1203, + "step": 127050 + }, + { + "epoch": 4.74, + "learning_rate": 1.254773296433746e-06, + "loss": 0.1135, + "step": 127080 + }, + { + "epoch": 4.74, + "learning_rate": 1.2546251537699069e-06, + "loss": 0.0953, + "step": 127110 + }, + { + "epoch": 4.74, + "learning_rate": 1.2544770635643121e-06, + "loss": 0.1147, + "step": 127140 + }, + { + "epoch": 4.74, + "learning_rate": 1.254329025786009e-06, + "loss": 0.1283, + "step": 127170 + }, + { + "epoch": 4.74, + "learning_rate": 1.254181040404071e-06, + "loss": 0.1249, + "step": 127200 + }, + { + "epoch": 4.75, + "learning_rate": 1.2540331073875973e-06, + "loss": 0.1066, + "step": 127230 + }, + { + "epoch": 4.75, + "learning_rate": 1.2538852267057116e-06, + "loss": 0.0965, + "step": 127260 + }, + { + "epoch": 4.75, + "learning_rate": 1.2537373983275636e-06, + "loss": 0.1096, + "step": 127290 + }, + { + "epoch": 4.75, + "learning_rate": 1.2535896222223288e-06, + "loss": 0.1008, + "step": 127320 + }, + { + "epoch": 4.75, + "learning_rate": 1.2534418983592075e-06, + "loss": 0.0941, + "step": 127350 + }, + { + "epoch": 4.75, + "learning_rate": 1.2532942267074264e-06, + "loss": 0.0951, + "step": 127380 + }, + { + "epoch": 4.75, + "learning_rate": 1.2531466072362361e-06, + "loss": 0.1065, + "step": 127410 + }, + { + "epoch": 4.75, + "learning_rate": 1.2529990399149137e-06, + "loss": 0.1239, + "step": 127440 + }, + { + "epoch": 4.75, + "learning_rate": 1.2528515247127609e-06, + "loss": 0.1015, + "step": 127470 + }, + { + "epoch": 4.76, + "learning_rate": 1.2527040615991059e-06, + "loss": 0.0923, + "step": 127500 + }, + { + "epoch": 4.76, + "learning_rate": 1.2525566505433004e-06, + "loss": 0.1019, + "step": 127530 + }, + { + "epoch": 4.76, + "learning_rate": 1.2524092915147229e-06, + "loss": 0.1318, + "step": 127560 + }, + { + "epoch": 4.76, + "learning_rate": 1.252261984482776e-06, + "loss": 0.0982, + "step": 127590 + }, + { + "epoch": 4.76, + "learning_rate": 1.252114729416888e-06, + "loss": 0.1154, + "step": 127620 + }, + { + "epoch": 4.76, + "learning_rate": 1.2519675262865127e-06, + "loss": 0.1057, + "step": 127650 + }, + { + "epoch": 4.76, + "learning_rate": 1.2518203750611286e-06, + "loss": 0.1046, + "step": 127680 + }, + { + "epoch": 4.76, + "learning_rate": 1.2516732757102388e-06, + "loss": 0.1086, + "step": 127710 + }, + { + "epoch": 4.76, + "learning_rate": 1.2515262282033725e-06, + "loss": 0.1039, + "step": 127740 + }, + { + "epoch": 4.77, + "learning_rate": 1.2513792325100835e-06, + "loss": 0.1005, + "step": 127770 + }, + { + "epoch": 4.77, + "learning_rate": 1.2512322885999503e-06, + "loss": 0.1234, + "step": 127800 + }, + { + "epoch": 4.77, + "learning_rate": 1.251085396442577e-06, + "loss": 0.1053, + "step": 127830 + }, + { + "epoch": 4.77, + "learning_rate": 1.250938556007592e-06, + "loss": 0.1155, + "step": 127860 + }, + { + "epoch": 4.77, + "learning_rate": 1.2507917672646498e-06, + "loss": 0.1063, + "step": 127890 + }, + { + "epoch": 4.77, + "learning_rate": 1.2506450301834284e-06, + "loss": 0.096, + "step": 127920 + }, + { + "epoch": 4.77, + "learning_rate": 1.2504983447336312e-06, + "loss": 0.1392, + "step": 127950 + }, + { + "epoch": 4.77, + "learning_rate": 1.2503517108849869e-06, + "loss": 0.0902, + "step": 127980 + }, + { + "epoch": 4.77, + "learning_rate": 1.2502051286072488e-06, + "loss": 0.1048, + "step": 128010 + }, + { + "epoch": 4.78, + "learning_rate": 1.2500585978701952e-06, + "loss": 0.1352, + "step": 128040 + }, + { + "epoch": 4.78, + "learning_rate": 1.2499121186436284e-06, + "loss": 0.103, + "step": 128070 + }, + { + "epoch": 4.78, + "learning_rate": 1.249765690897376e-06, + "loss": 0.1082, + "step": 128100 + }, + { + "epoch": 4.78, + "learning_rate": 1.2496193146012914e-06, + "loss": 0.1334, + "step": 128130 + }, + { + "epoch": 4.78, + "learning_rate": 1.2494729897252504e-06, + "loss": 0.1062, + "step": 128160 + }, + { + "epoch": 4.78, + "learning_rate": 1.2493267162391553e-06, + "loss": 0.1236, + "step": 128190 + }, + { + "epoch": 4.78, + "learning_rate": 1.2491804941129326e-06, + "loss": 0.0906, + "step": 128220 + }, + { + "epoch": 4.78, + "learning_rate": 1.2490343233165332e-06, + "loss": 0.1046, + "step": 128250 + }, + { + "epoch": 4.78, + "learning_rate": 1.2488882038199327e-06, + "loss": 0.1001, + "step": 128280 + }, + { + "epoch": 4.79, + "learning_rate": 1.2487421355931315e-06, + "loss": 0.1148, + "step": 128310 + }, + { + "epoch": 4.79, + "learning_rate": 1.2485961186061542e-06, + "loss": 0.0994, + "step": 128340 + }, + { + "epoch": 4.79, + "learning_rate": 1.2484501528290504e-06, + "loss": 0.1428, + "step": 128370 + }, + { + "epoch": 4.79, + "learning_rate": 1.2483042382318938e-06, + "loss": 0.1143, + "step": 128400 + }, + { + "epoch": 4.79, + "learning_rate": 1.2481583747847825e-06, + "loss": 0.1171, + "step": 128430 + }, + { + "epoch": 4.79, + "learning_rate": 1.2480125624578395e-06, + "loss": 0.1124, + "step": 128460 + }, + { + "epoch": 4.79, + "learning_rate": 1.2478668012212119e-06, + "loss": 0.0794, + "step": 128490 + }, + { + "epoch": 4.79, + "learning_rate": 1.247721091045071e-06, + "loss": 0.1156, + "step": 128520 + }, + { + "epoch": 4.79, + "learning_rate": 1.2475754318996134e-06, + "loss": 0.095, + "step": 128550 + }, + { + "epoch": 4.8, + "learning_rate": 1.2474298237550587e-06, + "loss": 0.111, + "step": 128580 + }, + { + "epoch": 4.8, + "learning_rate": 1.2472842665816518e-06, + "loss": 0.0807, + "step": 128610 + }, + { + "epoch": 4.8, + "learning_rate": 1.247138760349662e-06, + "loss": 0.0867, + "step": 128640 + }, + { + "epoch": 4.8, + "learning_rate": 1.2469933050293821e-06, + "loss": 0.1201, + "step": 128670 + }, + { + "epoch": 4.8, + "learning_rate": 1.2468479005911294e-06, + "loss": 0.0993, + "step": 128700 + }, + { + "epoch": 4.8, + "learning_rate": 1.246702547005246e-06, + "loss": 0.0974, + "step": 128730 + }, + { + "epoch": 4.8, + "learning_rate": 1.2465572442420977e-06, + "loss": 0.122, + "step": 128760 + }, + { + "epoch": 4.8, + "learning_rate": 1.2464119922720743e-06, + "loss": 0.1179, + "step": 128790 + }, + { + "epoch": 4.8, + "learning_rate": 1.2462667910655897e-06, + "loss": 0.1133, + "step": 128820 + }, + { + "epoch": 4.81, + "learning_rate": 1.2461216405930831e-06, + "loss": 0.1158, + "step": 128850 + }, + { + "epoch": 4.81, + "learning_rate": 1.245976540825016e-06, + "loss": 0.1173, + "step": 128880 + }, + { + "epoch": 4.81, + "learning_rate": 1.2458314917318756e-06, + "loss": 0.1182, + "step": 128910 + }, + { + "epoch": 4.81, + "learning_rate": 1.2456864932841722e-06, + "loss": 0.0994, + "step": 128940 + }, + { + "epoch": 4.81, + "learning_rate": 1.24554154545244e-06, + "loss": 0.1251, + "step": 128970 + }, + { + "epoch": 4.81, + "learning_rate": 1.2453966482072375e-06, + "loss": 0.1237, + "step": 129000 + }, + { + "epoch": 4.81, + "learning_rate": 1.2452518015191477e-06, + "loss": 0.1108, + "step": 129030 + }, + { + "epoch": 4.81, + "learning_rate": 1.2451070053587767e-06, + "loss": 0.1188, + "step": 129060 + }, + { + "epoch": 4.81, + "learning_rate": 1.2449622596967553e-06, + "loss": 0.0938, + "step": 129090 + }, + { + "epoch": 4.82, + "learning_rate": 1.244817564503737e-06, + "loss": 0.1308, + "step": 129120 + }, + { + "epoch": 4.82, + "learning_rate": 1.2446729197504007e-06, + "loss": 0.0957, + "step": 129150 + }, + { + "epoch": 4.82, + "learning_rate": 1.2445283254074479e-06, + "loss": 0.1108, + "step": 129180 + }, + { + "epoch": 4.82, + "learning_rate": 1.2443837814456042e-06, + "loss": 0.1098, + "step": 129210 + }, + { + "epoch": 4.82, + "learning_rate": 1.24424410347837e-06, + "loss": 0.0849, + "step": 129240 + }, + { + "epoch": 4.82, + "learning_rate": 1.2440996585140674e-06, + "loss": 0.1112, + "step": 129270 + }, + { + "epoch": 4.82, + "learning_rate": 1.2439552638441673e-06, + "loss": 0.1154, + "step": 129300 + }, + { + "epoch": 4.82, + "learning_rate": 1.24381091943949e-06, + "loss": 0.1224, + "step": 129330 + }, + { + "epoch": 4.82, + "learning_rate": 1.2436666252708784e-06, + "loss": 0.1096, + "step": 129360 + }, + { + "epoch": 4.83, + "learning_rate": 1.2435223813092007e-06, + "loss": 0.0951, + "step": 129390 + }, + { + "epoch": 4.83, + "learning_rate": 1.2433781875253476e-06, + "loss": 0.1117, + "step": 129420 + }, + { + "epoch": 4.83, + "learning_rate": 1.2432340438902337e-06, + "loss": 0.1054, + "step": 129450 + }, + { + "epoch": 4.83, + "learning_rate": 1.2430899503747977e-06, + "loss": 0.114, + "step": 129480 + }, + { + "epoch": 4.83, + "learning_rate": 1.242945906950001e-06, + "loss": 0.147, + "step": 129510 + }, + { + "epoch": 4.83, + "learning_rate": 1.2428019135868293e-06, + "loss": 0.1143, + "step": 129540 + }, + { + "epoch": 4.83, + "learning_rate": 1.2426579702562912e-06, + "loss": 0.1192, + "step": 129570 + }, + { + "epoch": 4.83, + "learning_rate": 1.2425140769294192e-06, + "loss": 0.098, + "step": 129600 + }, + { + "epoch": 4.83, + "learning_rate": 1.2423702335772691e-06, + "loss": 0.0906, + "step": 129630 + }, + { + "epoch": 4.84, + "learning_rate": 1.2422264401709206e-06, + "loss": 0.0879, + "step": 129660 + }, + { + "epoch": 4.84, + "learning_rate": 1.2420826966814762e-06, + "loss": 0.1079, + "step": 129690 + }, + { + "epoch": 4.84, + "learning_rate": 1.2419390030800618e-06, + "loss": 0.0955, + "step": 129720 + }, + { + "epoch": 4.84, + "learning_rate": 1.2417953593378274e-06, + "loss": 0.0878, + "step": 129750 + }, + { + "epoch": 4.84, + "learning_rate": 1.2416517654259457e-06, + "loss": 0.1031, + "step": 129780 + }, + { + "epoch": 4.84, + "learning_rate": 1.241508221315613e-06, + "loss": 0.0887, + "step": 129810 + }, + { + "epoch": 4.84, + "learning_rate": 1.2413647269780489e-06, + "loss": 0.1279, + "step": 129840 + }, + { + "epoch": 4.84, + "learning_rate": 1.2412212823844956e-06, + "loss": 0.1016, + "step": 129870 + }, + { + "epoch": 4.84, + "learning_rate": 1.2410778875062197e-06, + "loss": 0.1015, + "step": 129900 + }, + { + "epoch": 4.85, + "learning_rate": 1.2409345423145103e-06, + "loss": 0.1133, + "step": 129930 + }, + { + "epoch": 4.85, + "learning_rate": 1.2407912467806803e-06, + "loss": 0.1121, + "step": 129960 + }, + { + "epoch": 4.85, + "learning_rate": 1.2406480008760647e-06, + "loss": 0.1025, + "step": 129990 + }, + { + "epoch": 4.85, + "learning_rate": 1.240504804572023e-06, + "loss": 0.0929, + "step": 130020 + }, + { + "epoch": 4.85, + "learning_rate": 1.2403616578399369e-06, + "loss": 0.1031, + "step": 130050 + }, + { + "epoch": 4.85, + "learning_rate": 1.2402185606512112e-06, + "loss": 0.0915, + "step": 130080 + }, + { + "epoch": 4.85, + "learning_rate": 1.2400755129772745e-06, + "loss": 0.1082, + "step": 130110 + }, + { + "epoch": 4.85, + "learning_rate": 1.2399325147895776e-06, + "loss": 0.1257, + "step": 130140 + }, + { + "epoch": 4.85, + "learning_rate": 1.2397895660595954e-06, + "loss": 0.1182, + "step": 130170 + }, + { + "epoch": 4.86, + "learning_rate": 1.2396466667588247e-06, + "loss": 0.1072, + "step": 130200 + }, + { + "epoch": 4.86, + "learning_rate": 1.2395038168587853e-06, + "loss": 0.1104, + "step": 130230 + }, + { + "epoch": 4.86, + "learning_rate": 1.239361016331022e-06, + "loss": 0.0867, + "step": 130260 + }, + { + "epoch": 4.86, + "learning_rate": 1.2392182651470993e-06, + "loss": 0.118, + "step": 130290 + }, + { + "epoch": 4.86, + "learning_rate": 1.2390755632786075e-06, + "loss": 0.1032, + "step": 130320 + }, + { + "epoch": 4.86, + "learning_rate": 1.238932910697158e-06, + "loss": 0.1065, + "step": 130350 + }, + { + "epoch": 4.86, + "learning_rate": 1.2387903073743856e-06, + "loss": 0.1119, + "step": 130380 + }, + { + "epoch": 4.86, + "learning_rate": 1.2386477532819482e-06, + "loss": 0.1183, + "step": 130410 + }, + { + "epoch": 4.86, + "learning_rate": 1.2385052483915269e-06, + "loss": 0.1116, + "step": 130440 + }, + { + "epoch": 4.87, + "learning_rate": 1.2383627926748244e-06, + "loss": 0.1086, + "step": 130470 + }, + { + "epoch": 4.87, + "learning_rate": 1.2382203861035668e-06, + "loss": 0.1153, + "step": 130500 + }, + { + "epoch": 4.87, + "learning_rate": 1.2380780286495036e-06, + "loss": 0.1252, + "step": 130530 + }, + { + "epoch": 4.87, + "learning_rate": 1.2379357202844055e-06, + "loss": 0.1114, + "step": 130560 + }, + { + "epoch": 4.87, + "learning_rate": 1.2377934609800675e-06, + "loss": 0.1164, + "step": 130590 + }, + { + "epoch": 4.87, + "learning_rate": 1.2376512507083065e-06, + "loss": 0.1095, + "step": 130620 + }, + { + "epoch": 4.87, + "learning_rate": 1.2375090894409624e-06, + "loss": 0.0931, + "step": 130650 + }, + { + "epoch": 4.87, + "learning_rate": 1.237366977149897e-06, + "loss": 0.093, + "step": 130680 + }, + { + "epoch": 4.88, + "learning_rate": 1.2372249138069957e-06, + "loss": 0.122, + "step": 130710 + }, + { + "epoch": 4.88, + "learning_rate": 1.2370828993841653e-06, + "loss": 0.1082, + "step": 130740 + }, + { + "epoch": 4.88, + "learning_rate": 1.2369409338533365e-06, + "loss": 0.1006, + "step": 130770 + }, + { + "epoch": 4.88, + "learning_rate": 1.2367990171864617e-06, + "loss": 0.0942, + "step": 130800 + }, + { + "epoch": 4.88, + "learning_rate": 1.2366571493555165e-06, + "loss": 0.1071, + "step": 130830 + }, + { + "epoch": 4.88, + "learning_rate": 1.2365153303324978e-06, + "loss": 0.1091, + "step": 130860 + }, + { + "epoch": 4.88, + "learning_rate": 1.236373560089426e-06, + "loss": 0.0992, + "step": 130890 + }, + { + "epoch": 4.88, + "learning_rate": 1.2362318385983439e-06, + "loss": 0.1205, + "step": 130920 + }, + { + "epoch": 4.88, + "learning_rate": 1.2360901658313162e-06, + "loss": 0.1319, + "step": 130950 + }, + { + "epoch": 4.89, + "learning_rate": 1.2359485417604307e-06, + "loss": 0.1027, + "step": 130980 + }, + { + "epoch": 4.89, + "learning_rate": 1.2358069663577966e-06, + "loss": 0.1099, + "step": 131010 + }, + { + "epoch": 4.89, + "learning_rate": 1.2356654395955465e-06, + "loss": 0.1503, + "step": 131040 + }, + { + "epoch": 4.89, + "learning_rate": 1.2355239614458344e-06, + "loss": 0.1108, + "step": 131070 + }, + { + "epoch": 4.89, + "learning_rate": 1.2353825318808377e-06, + "loss": 0.1128, + "step": 131100 + }, + { + "epoch": 4.89, + "learning_rate": 1.235241150872755e-06, + "loss": 0.1135, + "step": 131130 + }, + { + "epoch": 4.89, + "learning_rate": 1.2350998183938077e-06, + "loss": 0.1158, + "step": 131160 + }, + { + "epoch": 4.89, + "learning_rate": 1.2349585344162398e-06, + "loss": 0.0993, + "step": 131190 + }, + { + "epoch": 4.89, + "learning_rate": 1.2348172989123166e-06, + "loss": 0.1053, + "step": 131220 + }, + { + "epoch": 4.9, + "learning_rate": 1.2346761118543262e-06, + "loss": 0.1223, + "step": 131250 + }, + { + "epoch": 4.9, + "learning_rate": 1.2345349732145788e-06, + "loss": 0.1265, + "step": 131280 + }, + { + "epoch": 4.9, + "learning_rate": 1.2343938829654069e-06, + "loss": 0.0982, + "step": 131310 + }, + { + "epoch": 4.9, + "learning_rate": 1.2342528410791647e-06, + "loss": 0.0982, + "step": 131340 + }, + { + "epoch": 4.9, + "learning_rate": 1.234111847528229e-06, + "loss": 0.1148, + "step": 131370 + }, + { + "epoch": 4.9, + "learning_rate": 1.2339709022849983e-06, + "loss": 0.0997, + "step": 131400 + }, + { + "epoch": 4.9, + "learning_rate": 1.2338300053218935e-06, + "loss": 0.1387, + "step": 131430 + }, + { + "epoch": 4.9, + "learning_rate": 1.2336891566113572e-06, + "loss": 0.1052, + "step": 131460 + }, + { + "epoch": 4.9, + "learning_rate": 1.233548356125854e-06, + "loss": 0.1157, + "step": 131490 + }, + { + "epoch": 4.91, + "learning_rate": 1.2334076038378707e-06, + "loss": 0.1167, + "step": 131520 + }, + { + "epoch": 4.91, + "learning_rate": 1.2332668997199166e-06, + "loss": 0.0956, + "step": 131550 + }, + { + "epoch": 4.91, + "learning_rate": 1.2331262437445215e-06, + "loss": 0.1308, + "step": 131580 + }, + { + "epoch": 4.91, + "learning_rate": 1.2329856358842388e-06, + "loss": 0.1052, + "step": 131610 + }, + { + "epoch": 4.91, + "learning_rate": 1.2328450761116422e-06, + "loss": 0.1155, + "step": 131640 + }, + { + "epoch": 4.91, + "learning_rate": 1.2327045643993285e-06, + "loss": 0.1028, + "step": 131670 + }, + { + "epoch": 4.91, + "learning_rate": 1.2325641007199162e-06, + "loss": 0.0988, + "step": 131700 + }, + { + "epoch": 4.91, + "learning_rate": 1.232423685046045e-06, + "loss": 0.1303, + "step": 131730 + }, + { + "epoch": 4.91, + "learning_rate": 1.2322833173503769e-06, + "loss": 0.1042, + "step": 131760 + }, + { + "epoch": 4.92, + "learning_rate": 1.2321429976055954e-06, + "loss": 0.1129, + "step": 131790 + }, + { + "epoch": 4.92, + "learning_rate": 1.2320027257844061e-06, + "loss": 0.1206, + "step": 131820 + }, + { + "epoch": 4.92, + "learning_rate": 1.2318625018595365e-06, + "loss": 0.1058, + "step": 131850 + }, + { + "epoch": 4.92, + "learning_rate": 1.231722325803735e-06, + "loss": 0.1102, + "step": 131880 + }, + { + "epoch": 4.92, + "learning_rate": 1.2315821975897726e-06, + "loss": 0.1181, + "step": 131910 + }, + { + "epoch": 4.92, + "learning_rate": 1.2314421171904414e-06, + "loss": 0.1001, + "step": 131940 + }, + { + "epoch": 4.92, + "learning_rate": 1.2313020845785554e-06, + "loss": 0.101, + "step": 131970 + }, + { + "epoch": 4.92, + "learning_rate": 1.2311620997269502e-06, + "loss": 0.1221, + "step": 132000 + }, + { + "epoch": 4.92, + "learning_rate": 1.231022162608483e-06, + "loss": 0.0918, + "step": 132030 + }, + { + "epoch": 4.93, + "learning_rate": 1.2308822731960328e-06, + "loss": 0.1022, + "step": 132060 + }, + { + "epoch": 4.93, + "learning_rate": 1.2307424314624995e-06, + "loss": 0.1079, + "step": 132090 + }, + { + "epoch": 4.93, + "learning_rate": 1.2306026373808053e-06, + "loss": 0.1142, + "step": 132120 + }, + { + "epoch": 4.93, + "learning_rate": 1.230462890923894e-06, + "loss": 0.1201, + "step": 132150 + }, + { + "epoch": 4.93, + "learning_rate": 1.2303231920647298e-06, + "loss": 0.1014, + "step": 132180 + }, + { + "epoch": 4.93, + "learning_rate": 1.2301835407762994e-06, + "loss": 0.1395, + "step": 132210 + }, + { + "epoch": 4.93, + "learning_rate": 1.2300439370316107e-06, + "loss": 0.1087, + "step": 132240 + }, + { + "epoch": 4.93, + "learning_rate": 1.2299043808036933e-06, + "loss": 0.1153, + "step": 132270 + }, + { + "epoch": 4.93, + "learning_rate": 1.2297648720655975e-06, + "loss": 0.0962, + "step": 132300 + }, + { + "epoch": 4.94, + "learning_rate": 1.229625410790396e-06, + "loss": 0.0934, + "step": 132330 + }, + { + "epoch": 4.94, + "learning_rate": 1.2294859969511816e-06, + "loss": 0.1272, + "step": 132360 + }, + { + "epoch": 4.94, + "learning_rate": 1.2293466305210697e-06, + "loss": 0.0931, + "step": 132390 + }, + { + "epoch": 4.94, + "learning_rate": 1.229207311473196e-06, + "loss": 0.1055, + "step": 132420 + }, + { + "epoch": 4.94, + "learning_rate": 1.2290680397807183e-06, + "loss": 0.1143, + "step": 132450 + }, + { + "epoch": 4.94, + "learning_rate": 1.2289288154168154e-06, + "loss": 0.104, + "step": 132480 + }, + { + "epoch": 4.94, + "learning_rate": 1.2287896383546869e-06, + "loss": 0.1297, + "step": 132510 + }, + { + "epoch": 4.94, + "learning_rate": 1.2286505085675547e-06, + "loss": 0.1069, + "step": 132540 + }, + { + "epoch": 4.94, + "learning_rate": 1.2285114260286608e-06, + "loss": 0.1035, + "step": 132570 + }, + { + "epoch": 4.95, + "learning_rate": 1.2283723907112692e-06, + "loss": 0.0854, + "step": 132600 + }, + { + "epoch": 4.95, + "learning_rate": 1.2282334025886644e-06, + "loss": 0.117, + "step": 132630 + }, + { + "epoch": 4.95, + "learning_rate": 1.2280944616341529e-06, + "loss": 0.121, + "step": 132660 + }, + { + "epoch": 4.95, + "learning_rate": 1.2279555678210612e-06, + "loss": 0.1162, + "step": 132690 + }, + { + "epoch": 4.95, + "learning_rate": 1.2278167211227382e-06, + "loss": 0.1071, + "step": 132720 + }, + { + "epoch": 4.95, + "learning_rate": 1.2276779215125527e-06, + "loss": 0.0994, + "step": 132750 + }, + { + "epoch": 4.95, + "learning_rate": 1.2275391689638956e-06, + "loss": 0.1016, + "step": 132780 + }, + { + "epoch": 4.95, + "learning_rate": 1.2274004634501783e-06, + "loss": 0.1168, + "step": 132810 + }, + { + "epoch": 4.95, + "learning_rate": 1.2272618049448327e-06, + "loss": 0.1099, + "step": 132840 + }, + { + "epoch": 4.96, + "learning_rate": 1.2271231934213132e-06, + "loss": 0.1209, + "step": 132870 + }, + { + "epoch": 4.96, + "learning_rate": 1.2269846288530937e-06, + "loss": 0.1109, + "step": 132900 + }, + { + "epoch": 4.96, + "learning_rate": 1.2268461112136695e-06, + "loss": 0.1087, + "step": 132930 + }, + { + "epoch": 4.96, + "learning_rate": 1.2267076404765577e-06, + "loss": 0.1185, + "step": 132960 + }, + { + "epoch": 4.96, + "learning_rate": 1.2265692166152949e-06, + "loss": 0.1128, + "step": 132990 + }, + { + "epoch": 4.96, + "learning_rate": 1.2264308396034396e-06, + "loss": 0.1114, + "step": 133020 + }, + { + "epoch": 4.96, + "learning_rate": 1.226292509414571e-06, + "loss": 0.1145, + "step": 133050 + }, + { + "epoch": 4.96, + "learning_rate": 1.2261542260222886e-06, + "loss": 0.1106, + "step": 133080 + }, + { + "epoch": 4.96, + "learning_rate": 1.2260159894002136e-06, + "loss": 0.1064, + "step": 133110 + }, + { + "epoch": 4.97, + "learning_rate": 1.2258777995219873e-06, + "loss": 0.1026, + "step": 133140 + }, + { + "epoch": 4.97, + "learning_rate": 1.2257396563612719e-06, + "loss": 0.1053, + "step": 133170 + }, + { + "epoch": 4.97, + "learning_rate": 1.2256015598917514e-06, + "loss": 0.1187, + "step": 133200 + }, + { + "epoch": 4.97, + "learning_rate": 1.225463510087129e-06, + "loss": 0.1068, + "step": 133230 + }, + { + "epoch": 4.97, + "learning_rate": 1.2253255069211288e-06, + "loss": 0.0971, + "step": 133260 + }, + { + "epoch": 4.97, + "learning_rate": 1.2251875503674974e-06, + "loss": 0.0877, + "step": 133290 + }, + { + "epoch": 4.97, + "learning_rate": 1.2250496403999999e-06, + "loss": 0.1098, + "step": 133320 + }, + { + "epoch": 4.97, + "learning_rate": 1.2249117769924233e-06, + "loss": 0.1212, + "step": 133350 + }, + { + "epoch": 4.97, + "learning_rate": 1.2247739601185745e-06, + "loss": 0.1139, + "step": 133380 + }, + { + "epoch": 4.98, + "learning_rate": 1.2246361897522823e-06, + "loss": 0.1067, + "step": 133410 + }, + { + "epoch": 4.98, + "learning_rate": 1.2244984658673947e-06, + "loss": 0.1118, + "step": 133440 + }, + { + "epoch": 4.98, + "learning_rate": 1.2243607884377809e-06, + "loss": 0.0962, + "step": 133470 + }, + { + "epoch": 4.98, + "learning_rate": 1.2242231574373308e-06, + "loss": 0.1155, + "step": 133500 + }, + { + "epoch": 4.98, + "learning_rate": 1.224085572839954e-06, + "loss": 0.0926, + "step": 133530 + }, + { + "epoch": 4.98, + "learning_rate": 1.2239480346195822e-06, + "loss": 0.0914, + "step": 133560 + }, + { + "epoch": 4.98, + "learning_rate": 1.2238105427501658e-06, + "loss": 0.1052, + "step": 133590 + }, + { + "epoch": 4.98, + "learning_rate": 1.2236730972056771e-06, + "loss": 0.0977, + "step": 133620 + }, + { + "epoch": 4.98, + "learning_rate": 1.223535697960108e-06, + "loss": 0.0894, + "step": 133650 + }, + { + "epoch": 4.99, + "learning_rate": 1.2233983449874714e-06, + "loss": 0.1005, + "step": 133680 + }, + { + "epoch": 4.99, + "learning_rate": 1.2232610382618002e-06, + "loss": 0.1008, + "step": 133710 + }, + { + "epoch": 4.99, + "learning_rate": 1.2231237777571477e-06, + "loss": 0.115, + "step": 133740 + }, + { + "epoch": 4.99, + "learning_rate": 1.222986563447588e-06, + "loss": 0.1066, + "step": 133770 + }, + { + "epoch": 4.99, + "learning_rate": 1.2228493953072147e-06, + "loss": 0.1144, + "step": 133800 + }, + { + "epoch": 4.99, + "learning_rate": 1.2227122733101433e-06, + "loss": 0.0956, + "step": 133830 + }, + { + "epoch": 4.99, + "learning_rate": 1.222575197430508e-06, + "loss": 0.0942, + "step": 133860 + }, + { + "epoch": 4.99, + "learning_rate": 1.2224427345597513e-06, + "loss": 0.1127, + "step": 133890 + }, + { + "epoch": 4.99, + "learning_rate": 1.222305749302364e-06, + "loss": 0.1057, + "step": 133920 + }, + { + "epoch": 5.0, + "learning_rate": 1.222168810085799e-06, + "loss": 0.1057, + "step": 133950 + }, + { + "epoch": 5.0, + "learning_rate": 1.2220319168842711e-06, + "loss": 0.1067, + "step": 133980 + }, + { + "epoch": 5.0, + "learning_rate": 1.2218950696720162e-06, + "loss": 0.1108, + "step": 134010 + }, + { + "epoch": 5.0, + "learning_rate": 1.2217582684232896e-06, + "loss": 0.1227, + "step": 134040 + }, + { + "epoch": 5.0, + "step": 134060, + "total_flos": 1.587534895646056e+21, + "train_loss": 0.1688916235028426, + "train_runtime": 101673.2407, + "train_samples_per_second": 10.548, + "train_steps_per_second": 1.319 + } + ], + "logging_steps": 30, + "max_steps": 134060, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "total_flos": 1.587534895646056e+21, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}