{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.8816133173949616, "global_step": 42500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.687, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.6806, "step": 20 }, { "epoch": 0.0, "learning_rate": 3e-06, "loss": 0.6846, "step": 30 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.6842, "step": 40 }, { "epoch": 0.0, "learning_rate": 5e-06, "loss": 0.66, "step": 50 }, { "epoch": 0.0, "learning_rate": 6e-06, "loss": 0.6568, "step": 60 }, { "epoch": 0.0, "learning_rate": 7.000000000000001e-06, "loss": 0.6458, "step": 70 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-06, "loss": 0.6134, "step": 80 }, { "epoch": 0.0, "learning_rate": 9e-06, "loss": 0.5791, "step": 90 }, { "epoch": 0.0, "learning_rate": 1e-05, "loss": 0.5694, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.1000000000000001e-05, "loss": 0.5117, "step": 110 }, { "epoch": 0.01, "learning_rate": 1.2e-05, "loss": 0.4606, "step": 120 }, { "epoch": 0.01, "learning_rate": 1.3000000000000001e-05, "loss": 0.3947, "step": 130 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-05, "loss": 0.3784, "step": 140 }, { "epoch": 0.01, "learning_rate": 1.5e-05, "loss": 0.3609, "step": 150 }, { "epoch": 0.01, "learning_rate": 1.6000000000000003e-05, "loss": 0.3163, "step": 160 }, { "epoch": 0.01, "learning_rate": 1.7000000000000003e-05, "loss": 0.2994, "step": 170 }, { "epoch": 0.01, "learning_rate": 1.8e-05, "loss": 0.2586, "step": 180 }, { "epoch": 0.01, "learning_rate": 1.9e-05, "loss": 0.3097, "step": 190 }, { "epoch": 0.01, "learning_rate": 2e-05, "loss": 0.301, "step": 200 }, { "epoch": 0.01, "learning_rate": 2.1e-05, "loss": 0.2829, "step": 210 }, { "epoch": 0.01, "learning_rate": 2.2000000000000003e-05, "loss": 0.2983, "step": 220 }, { "epoch": 0.01, "learning_rate": 2.3000000000000003e-05, "loss": 0.2439, "step": 230 }, { "epoch": 0.01, "learning_rate": 2.4e-05, "loss": 0.2686, "step": 240 }, { "epoch": 0.01, "learning_rate": 2.5e-05, "loss": 0.3592, "step": 250 }, { "epoch": 0.01, "learning_rate": 2.6000000000000002e-05, "loss": 0.3019, "step": 260 }, { "epoch": 0.01, "learning_rate": 2.7000000000000002e-05, "loss": 0.2524, "step": 270 }, { "epoch": 0.01, "learning_rate": 2.8000000000000003e-05, "loss": 0.2285, "step": 280 }, { "epoch": 0.01, "learning_rate": 2.9e-05, "loss": 0.2637, "step": 290 }, { "epoch": 0.01, "learning_rate": 3e-05, "loss": 0.2016, "step": 300 }, { "epoch": 0.01, "learning_rate": 3.1e-05, "loss": 0.2358, "step": 310 }, { "epoch": 0.01, "learning_rate": 3.2000000000000005e-05, "loss": 0.3402, "step": 320 }, { "epoch": 0.01, "learning_rate": 3.3e-05, "loss": 0.2562, "step": 330 }, { "epoch": 0.02, "learning_rate": 3.4000000000000007e-05, "loss": 0.2626, "step": 340 }, { "epoch": 0.02, "learning_rate": 3.5e-05, "loss": 0.243, "step": 350 }, { "epoch": 0.02, "learning_rate": 3.6e-05, "loss": 0.2327, "step": 360 }, { "epoch": 0.02, "learning_rate": 3.7e-05, "loss": 0.258, "step": 370 }, { "epoch": 0.02, "learning_rate": 3.8e-05, "loss": 0.2536, "step": 380 }, { "epoch": 0.02, "learning_rate": 3.9000000000000006e-05, "loss": 0.2657, "step": 390 }, { "epoch": 0.02, "learning_rate": 4e-05, "loss": 0.218, "step": 400 }, { "epoch": 0.02, "learning_rate": 4.1e-05, "loss": 0.2126, "step": 410 }, { "epoch": 0.02, "learning_rate": 4.2e-05, "loss": 0.2723, "step": 420 }, { "epoch": 0.02, "learning_rate": 4.3e-05, "loss": 0.193, "step": 430 }, { "epoch": 0.02, "learning_rate": 4.4000000000000006e-05, "loss": 0.2726, "step": 440 }, { "epoch": 0.02, "learning_rate": 4.5e-05, "loss": 0.2734, "step": 450 }, { "epoch": 0.02, "learning_rate": 4.600000000000001e-05, "loss": 0.2387, "step": 460 }, { "epoch": 0.02, "learning_rate": 4.7e-05, "loss": 0.2204, "step": 470 }, { "epoch": 0.02, "learning_rate": 4.8e-05, "loss": 0.1889, "step": 480 }, { "epoch": 0.02, "learning_rate": 4.9e-05, "loss": 0.2558, "step": 490 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 0.2126, "step": 500 }, { "epoch": 0.02, "learning_rate": 4.9992566271687905e-05, "loss": 0.2394, "step": 510 }, { "epoch": 0.02, "learning_rate": 4.998513254337581e-05, "loss": 0.2793, "step": 520 }, { "epoch": 0.02, "learning_rate": 4.997769881506371e-05, "loss": 0.2972, "step": 530 }, { "epoch": 0.02, "learning_rate": 4.997026508675161e-05, "loss": 0.2031, "step": 540 }, { "epoch": 0.02, "learning_rate": 4.9962831358439515e-05, "loss": 0.264, "step": 550 }, { "epoch": 0.02, "learning_rate": 4.995539763012742e-05, "loss": 0.2923, "step": 560 }, { "epoch": 0.03, "learning_rate": 4.994796390181532e-05, "loss": 0.2861, "step": 570 }, { "epoch": 0.03, "learning_rate": 4.9940530173503216e-05, "loss": 0.2016, "step": 580 }, { "epoch": 0.03, "learning_rate": 4.9933096445191126e-05, "loss": 0.2703, "step": 590 }, { "epoch": 0.03, "learning_rate": 4.992566271687902e-05, "loss": 0.2351, "step": 600 }, { "epoch": 0.03, "learning_rate": 4.991822898856693e-05, "loss": 0.2432, "step": 610 }, { "epoch": 0.03, "learning_rate": 4.9910795260254827e-05, "loss": 0.2567, "step": 620 }, { "epoch": 0.03, "learning_rate": 4.9903361531942736e-05, "loss": 0.3241, "step": 630 }, { "epoch": 0.03, "learning_rate": 4.989592780363064e-05, "loss": 0.2396, "step": 640 }, { "epoch": 0.03, "learning_rate": 4.9888494075318534e-05, "loss": 0.2282, "step": 650 }, { "epoch": 0.03, "learning_rate": 4.9881060347006444e-05, "loss": 0.22, "step": 660 }, { "epoch": 0.03, "learning_rate": 4.987362661869434e-05, "loss": 0.216, "step": 670 }, { "epoch": 0.03, "learning_rate": 4.986619289038225e-05, "loss": 0.3001, "step": 680 }, { "epoch": 0.03, "learning_rate": 4.9858759162070145e-05, "loss": 0.1784, "step": 690 }, { "epoch": 0.03, "learning_rate": 4.985132543375805e-05, "loss": 0.1949, "step": 700 }, { "epoch": 0.03, "learning_rate": 4.984389170544595e-05, "loss": 0.2236, "step": 710 }, { "epoch": 0.03, "learning_rate": 4.983645797713385e-05, "loss": 0.2116, "step": 720 }, { "epoch": 0.03, "learning_rate": 4.9829024248821755e-05, "loss": 0.2238, "step": 730 }, { "epoch": 0.03, "learning_rate": 4.982159052050966e-05, "loss": 0.2767, "step": 740 }, { "epoch": 0.03, "learning_rate": 4.981415679219757e-05, "loss": 0.2553, "step": 750 }, { "epoch": 0.03, "learning_rate": 4.980672306388546e-05, "loss": 0.2686, "step": 760 }, { "epoch": 0.03, "learning_rate": 4.9799289335573365e-05, "loss": 0.2538, "step": 770 }, { "epoch": 0.03, "learning_rate": 4.979185560726127e-05, "loss": 0.2044, "step": 780 }, { "epoch": 0.03, "learning_rate": 4.978442187894917e-05, "loss": 0.1951, "step": 790 }, { "epoch": 0.04, "learning_rate": 4.977698815063707e-05, "loss": 0.1634, "step": 800 }, { "epoch": 0.04, "learning_rate": 4.9769554422324976e-05, "loss": 0.236, "step": 810 }, { "epoch": 0.04, "learning_rate": 4.976212069401288e-05, "loss": 0.1908, "step": 820 }, { "epoch": 0.04, "learning_rate": 4.975468696570078e-05, "loss": 0.2738, "step": 830 }, { "epoch": 0.04, "learning_rate": 4.974725323738868e-05, "loss": 0.2339, "step": 840 }, { "epoch": 0.04, "learning_rate": 4.9739819509076586e-05, "loss": 0.2441, "step": 850 }, { "epoch": 0.04, "learning_rate": 4.973238578076448e-05, "loss": 0.195, "step": 860 }, { "epoch": 0.04, "learning_rate": 4.972495205245239e-05, "loss": 0.1888, "step": 870 }, { "epoch": 0.04, "learning_rate": 4.9717518324140294e-05, "loss": 0.2359, "step": 880 }, { "epoch": 0.04, "learning_rate": 4.9710084595828196e-05, "loss": 0.1867, "step": 890 }, { "epoch": 0.04, "learning_rate": 4.97026508675161e-05, "loss": 0.2421, "step": 900 }, { "epoch": 0.04, "learning_rate": 4.9695217139203995e-05, "loss": 0.1858, "step": 910 }, { "epoch": 0.04, "learning_rate": 4.9687783410891904e-05, "loss": 0.2607, "step": 920 }, { "epoch": 0.04, "learning_rate": 4.96803496825798e-05, "loss": 0.187, "step": 930 }, { "epoch": 0.04, "learning_rate": 4.967291595426771e-05, "loss": 0.1992, "step": 940 }, { "epoch": 0.04, "learning_rate": 4.9665482225955605e-05, "loss": 0.1595, "step": 950 }, { "epoch": 0.04, "learning_rate": 4.9658048497643514e-05, "loss": 0.3007, "step": 960 }, { "epoch": 0.04, "learning_rate": 4.965061476933141e-05, "loss": 0.2044, "step": 970 }, { "epoch": 0.04, "learning_rate": 4.964318104101931e-05, "loss": 0.165, "step": 980 }, { "epoch": 0.04, "learning_rate": 4.9635747312707215e-05, "loss": 0.2566, "step": 990 }, { "epoch": 0.04, "learning_rate": 4.962831358439512e-05, "loss": 0.2033, "step": 1000 }, { "epoch": 0.04, "learning_rate": 4.962087985608303e-05, "loss": 0.2737, "step": 1010 }, { "epoch": 0.05, "learning_rate": 4.961344612777092e-05, "loss": 0.2183, "step": 1020 }, { "epoch": 0.05, "learning_rate": 4.9606012399458826e-05, "loss": 0.2284, "step": 1030 }, { "epoch": 0.05, "learning_rate": 4.959857867114673e-05, "loss": 0.2267, "step": 1040 }, { "epoch": 0.05, "learning_rate": 4.959114494283463e-05, "loss": 0.2029, "step": 1050 }, { "epoch": 0.05, "learning_rate": 4.958371121452253e-05, "loss": 0.2239, "step": 1060 }, { "epoch": 0.05, "learning_rate": 4.9576277486210436e-05, "loss": 0.2491, "step": 1070 }, { "epoch": 0.05, "learning_rate": 4.956884375789834e-05, "loss": 0.2234, "step": 1080 }, { "epoch": 0.05, "learning_rate": 4.956141002958624e-05, "loss": 0.2345, "step": 1090 }, { "epoch": 0.05, "learning_rate": 4.9553976301274144e-05, "loss": 0.234, "step": 1100 }, { "epoch": 0.05, "learning_rate": 4.9546542572962046e-05, "loss": 0.2095, "step": 1110 }, { "epoch": 0.05, "learning_rate": 4.953910884464995e-05, "loss": 0.2476, "step": 1120 }, { "epoch": 0.05, "learning_rate": 4.953167511633785e-05, "loss": 0.1759, "step": 1130 }, { "epoch": 0.05, "learning_rate": 4.9524241388025754e-05, "loss": 0.2135, "step": 1140 }, { "epoch": 0.05, "learning_rate": 4.9516807659713657e-05, "loss": 0.2745, "step": 1150 }, { "epoch": 0.05, "learning_rate": 4.950937393140156e-05, "loss": 0.1498, "step": 1160 }, { "epoch": 0.05, "learning_rate": 4.950194020308946e-05, "loss": 0.2092, "step": 1170 }, { "epoch": 0.05, "learning_rate": 4.9494506474777364e-05, "loss": 0.2188, "step": 1180 }, { "epoch": 0.05, "learning_rate": 4.948707274646526e-05, "loss": 0.2006, "step": 1190 }, { "epoch": 0.05, "learning_rate": 4.947963901815317e-05, "loss": 0.2703, "step": 1200 }, { "epoch": 0.05, "learning_rate": 4.9472205289841065e-05, "loss": 0.3083, "step": 1210 }, { "epoch": 0.05, "learning_rate": 4.9464771561528975e-05, "loss": 0.2267, "step": 1220 }, { "epoch": 0.05, "learning_rate": 4.945733783321687e-05, "loss": 0.2153, "step": 1230 }, { "epoch": 0.05, "learning_rate": 4.944990410490477e-05, "loss": 0.2014, "step": 1240 }, { "epoch": 0.06, "learning_rate": 4.944247037659268e-05, "loss": 0.2497, "step": 1250 }, { "epoch": 0.06, "learning_rate": 4.943503664828058e-05, "loss": 0.2013, "step": 1260 }, { "epoch": 0.06, "learning_rate": 4.942760291996849e-05, "loss": 0.2508, "step": 1270 }, { "epoch": 0.06, "learning_rate": 4.942016919165638e-05, "loss": 0.2829, "step": 1280 }, { "epoch": 0.06, "learning_rate": 4.941273546334429e-05, "loss": 0.1481, "step": 1290 }, { "epoch": 0.06, "learning_rate": 4.940530173503219e-05, "loss": 0.2612, "step": 1300 }, { "epoch": 0.06, "learning_rate": 4.939786800672009e-05, "loss": 0.2074, "step": 1310 }, { "epoch": 0.06, "learning_rate": 4.9390434278407994e-05, "loss": 0.2151, "step": 1320 }, { "epoch": 0.06, "learning_rate": 4.9383000550095896e-05, "loss": 0.1589, "step": 1330 }, { "epoch": 0.06, "learning_rate": 4.93755668217838e-05, "loss": 0.1702, "step": 1340 }, { "epoch": 0.06, "learning_rate": 4.93681330934717e-05, "loss": 0.2212, "step": 1350 }, { "epoch": 0.06, "learning_rate": 4.9360699365159604e-05, "loss": 0.2072, "step": 1360 }, { "epoch": 0.06, "learning_rate": 4.9353265636847506e-05, "loss": 0.213, "step": 1370 }, { "epoch": 0.06, "learning_rate": 4.934583190853541e-05, "loss": 0.2863, "step": 1380 }, { "epoch": 0.06, "learning_rate": 4.933839818022331e-05, "loss": 0.2518, "step": 1390 }, { "epoch": 0.06, "learning_rate": 4.9330964451911214e-05, "loss": 0.2125, "step": 1400 }, { "epoch": 0.06, "learning_rate": 4.932353072359912e-05, "loss": 0.1968, "step": 1410 }, { "epoch": 0.06, "learning_rate": 4.931609699528702e-05, "loss": 0.2009, "step": 1420 }, { "epoch": 0.06, "learning_rate": 4.930866326697492e-05, "loss": 0.2424, "step": 1430 }, { "epoch": 0.06, "learning_rate": 4.9301229538662825e-05, "loss": 0.2323, "step": 1440 }, { "epoch": 0.06, "learning_rate": 4.929379581035072e-05, "loss": 0.1675, "step": 1450 }, { "epoch": 0.06, "learning_rate": 4.928636208203863e-05, "loss": 0.203, "step": 1460 }, { "epoch": 0.07, "learning_rate": 4.9278928353726526e-05, "loss": 0.1722, "step": 1470 }, { "epoch": 0.07, "learning_rate": 4.9271494625414435e-05, "loss": 0.1985, "step": 1480 }, { "epoch": 0.07, "learning_rate": 4.926406089710234e-05, "loss": 0.2263, "step": 1490 }, { "epoch": 0.07, "learning_rate": 4.925662716879024e-05, "loss": 0.1868, "step": 1500 }, { "epoch": 0.07, "learning_rate": 4.924919344047814e-05, "loss": 0.1927, "step": 1510 }, { "epoch": 0.07, "learning_rate": 4.924175971216604e-05, "loss": 0.2468, "step": 1520 }, { "epoch": 0.07, "learning_rate": 4.923432598385395e-05, "loss": 0.185, "step": 1530 }, { "epoch": 0.07, "learning_rate": 4.9226892255541844e-05, "loss": 0.2038, "step": 1540 }, { "epoch": 0.07, "learning_rate": 4.921945852722975e-05, "loss": 0.1751, "step": 1550 }, { "epoch": 0.07, "learning_rate": 4.921202479891765e-05, "loss": 0.2254, "step": 1560 }, { "epoch": 0.07, "learning_rate": 4.920459107060555e-05, "loss": 0.1783, "step": 1570 }, { "epoch": 0.07, "learning_rate": 4.9197157342293454e-05, "loss": 0.202, "step": 1580 }, { "epoch": 0.07, "learning_rate": 4.9189723613981356e-05, "loss": 0.2125, "step": 1590 }, { "epoch": 0.07, "learning_rate": 4.918228988566926e-05, "loss": 0.1957, "step": 1600 }, { "epoch": 0.07, "learning_rate": 4.917485615735716e-05, "loss": 0.2552, "step": 1610 }, { "epoch": 0.07, "learning_rate": 4.916742242904507e-05, "loss": 0.2238, "step": 1620 }, { "epoch": 0.07, "learning_rate": 4.915998870073297e-05, "loss": 0.2815, "step": 1630 }, { "epoch": 0.07, "learning_rate": 4.915255497242087e-05, "loss": 0.2833, "step": 1640 }, { "epoch": 0.07, "learning_rate": 4.914512124410877e-05, "loss": 0.2153, "step": 1650 }, { "epoch": 0.07, "learning_rate": 4.9137687515796675e-05, "loss": 0.1934, "step": 1660 }, { "epoch": 0.07, "learning_rate": 4.913025378748458e-05, "loss": 0.1849, "step": 1670 }, { "epoch": 0.07, "learning_rate": 4.912282005917248e-05, "loss": 0.1726, "step": 1680 }, { "epoch": 0.07, "learning_rate": 4.911538633086038e-05, "loss": 0.1846, "step": 1690 }, { "epoch": 0.08, "learning_rate": 4.9107952602548285e-05, "loss": 0.1558, "step": 1700 }, { "epoch": 0.08, "learning_rate": 4.910051887423619e-05, "loss": 0.2177, "step": 1710 }, { "epoch": 0.08, "learning_rate": 4.909308514592409e-05, "loss": 0.1745, "step": 1720 }, { "epoch": 0.08, "learning_rate": 4.908565141761199e-05, "loss": 0.1974, "step": 1730 }, { "epoch": 0.08, "learning_rate": 4.9078217689299895e-05, "loss": 0.2085, "step": 1740 }, { "epoch": 0.08, "learning_rate": 4.90707839609878e-05, "loss": 0.2092, "step": 1750 }, { "epoch": 0.08, "learning_rate": 4.90633502326757e-05, "loss": 0.1834, "step": 1760 }, { "epoch": 0.08, "learning_rate": 4.90559165043636e-05, "loss": 0.1914, "step": 1770 }, { "epoch": 0.08, "learning_rate": 4.90484827760515e-05, "loss": 0.1843, "step": 1780 }, { "epoch": 0.08, "learning_rate": 4.904104904773941e-05, "loss": 0.2037, "step": 1790 }, { "epoch": 0.08, "learning_rate": 4.9033615319427304e-05, "loss": 0.1713, "step": 1800 }, { "epoch": 0.08, "learning_rate": 4.902618159111521e-05, "loss": 0.1875, "step": 1810 }, { "epoch": 0.08, "learning_rate": 4.901874786280311e-05, "loss": 0.2575, "step": 1820 }, { "epoch": 0.08, "learning_rate": 4.901131413449102e-05, "loss": 0.1896, "step": 1830 }, { "epoch": 0.08, "learning_rate": 4.9003880406178914e-05, "loss": 0.1888, "step": 1840 }, { "epoch": 0.08, "learning_rate": 4.899644667786682e-05, "loss": 0.2391, "step": 1850 }, { "epoch": 0.08, "learning_rate": 4.8989012949554726e-05, "loss": 0.2394, "step": 1860 }, { "epoch": 0.08, "learning_rate": 4.898157922124262e-05, "loss": 0.2168, "step": 1870 }, { "epoch": 0.08, "learning_rate": 4.897414549293053e-05, "loss": 0.2082, "step": 1880 }, { "epoch": 0.08, "learning_rate": 4.896671176461843e-05, "loss": 0.1961, "step": 1890 }, { "epoch": 0.08, "learning_rate": 4.895927803630633e-05, "loss": 0.2373, "step": 1900 }, { "epoch": 0.08, "learning_rate": 4.895184430799423e-05, "loss": 0.1806, "step": 1910 }, { "epoch": 0.09, "learning_rate": 4.8944410579682135e-05, "loss": 0.2002, "step": 1920 }, { "epoch": 0.09, "learning_rate": 4.893697685137004e-05, "loss": 0.1931, "step": 1930 }, { "epoch": 0.09, "learning_rate": 4.892954312305794e-05, "loss": 0.2051, "step": 1940 }, { "epoch": 0.09, "learning_rate": 4.892210939474584e-05, "loss": 0.2271, "step": 1950 }, { "epoch": 0.09, "learning_rate": 4.8914675666433745e-05, "loss": 0.1875, "step": 1960 }, { "epoch": 0.09, "learning_rate": 4.890724193812165e-05, "loss": 0.2059, "step": 1970 }, { "epoch": 0.09, "learning_rate": 4.889980820980955e-05, "loss": 0.2224, "step": 1980 }, { "epoch": 0.09, "learning_rate": 4.889237448149745e-05, "loss": 0.1685, "step": 1990 }, { "epoch": 0.09, "learning_rate": 4.8884940753185355e-05, "loss": 0.1726, "step": 2000 }, { "epoch": 0.09, "learning_rate": 4.887750702487326e-05, "loss": 0.209, "step": 2010 }, { "epoch": 0.09, "learning_rate": 4.887007329656116e-05, "loss": 0.1874, "step": 2020 }, { "epoch": 0.09, "learning_rate": 4.886263956824906e-05, "loss": 0.2343, "step": 2030 }, { "epoch": 0.09, "learning_rate": 4.8855205839936966e-05, "loss": 0.2186, "step": 2040 }, { "epoch": 0.09, "learning_rate": 4.884777211162487e-05, "loss": 0.2283, "step": 2050 }, { "epoch": 0.09, "learning_rate": 4.8840338383312764e-05, "loss": 0.1499, "step": 2060 }, { "epoch": 0.09, "learning_rate": 4.8832904655000673e-05, "loss": 0.2142, "step": 2070 }, { "epoch": 0.09, "learning_rate": 4.882547092668857e-05, "loss": 0.1911, "step": 2080 }, { "epoch": 0.09, "learning_rate": 4.881803719837648e-05, "loss": 0.2193, "step": 2090 }, { "epoch": 0.09, "learning_rate": 4.881060347006438e-05, "loss": 0.183, "step": 2100 }, { "epoch": 0.09, "learning_rate": 4.880316974175228e-05, "loss": 0.1663, "step": 2110 }, { "epoch": 0.09, "learning_rate": 4.8795736013440186e-05, "loss": 0.1923, "step": 2120 }, { "epoch": 0.09, "learning_rate": 4.878830228512808e-05, "loss": 0.2149, "step": 2130 }, { "epoch": 0.09, "learning_rate": 4.878086855681599e-05, "loss": 0.1499, "step": 2140 }, { "epoch": 0.1, "learning_rate": 4.877343482850389e-05, "loss": 0.245, "step": 2150 }, { "epoch": 0.1, "learning_rate": 4.87660011001918e-05, "loss": 0.197, "step": 2160 }, { "epoch": 0.1, "learning_rate": 4.875856737187969e-05, "loss": 0.2179, "step": 2170 }, { "epoch": 0.1, "learning_rate": 4.8751133643567595e-05, "loss": 0.2034, "step": 2180 }, { "epoch": 0.1, "learning_rate": 4.87436999152555e-05, "loss": 0.2402, "step": 2190 }, { "epoch": 0.1, "learning_rate": 4.87362661869434e-05, "loss": 0.2055, "step": 2200 }, { "epoch": 0.1, "learning_rate": 4.87288324586313e-05, "loss": 0.1813, "step": 2210 }, { "epoch": 0.1, "learning_rate": 4.8721398730319205e-05, "loss": 0.1743, "step": 2220 }, { "epoch": 0.1, "learning_rate": 4.8713965002007115e-05, "loss": 0.1802, "step": 2230 }, { "epoch": 0.1, "learning_rate": 4.870653127369501e-05, "loss": 0.2603, "step": 2240 }, { "epoch": 0.1, "learning_rate": 4.869909754538291e-05, "loss": 0.1839, "step": 2250 }, { "epoch": 0.1, "learning_rate": 4.8691663817070816e-05, "loss": 0.177, "step": 2260 }, { "epoch": 0.1, "learning_rate": 4.868423008875872e-05, "loss": 0.2351, "step": 2270 }, { "epoch": 0.1, "learning_rate": 4.867679636044662e-05, "loss": 0.141, "step": 2280 }, { "epoch": 0.1, "learning_rate": 4.8669362632134523e-05, "loss": 0.2108, "step": 2290 }, { "epoch": 0.1, "learning_rate": 4.8661928903822426e-05, "loss": 0.1753, "step": 2300 }, { "epoch": 0.1, "learning_rate": 4.865449517551033e-05, "loss": 0.1649, "step": 2310 }, { "epoch": 0.1, "learning_rate": 4.8647061447198224e-05, "loss": 0.1789, "step": 2320 }, { "epoch": 0.1, "learning_rate": 4.8639627718886134e-05, "loss": 0.1854, "step": 2330 }, { "epoch": 0.1, "learning_rate": 4.8632193990574036e-05, "loss": 0.2197, "step": 2340 }, { "epoch": 0.1, "learning_rate": 4.862476026226194e-05, "loss": 0.209, "step": 2350 }, { "epoch": 0.1, "learning_rate": 4.861732653394984e-05, "loss": 0.2341, "step": 2360 }, { "epoch": 0.1, "learning_rate": 4.8609892805637744e-05, "loss": 0.2378, "step": 2370 }, { "epoch": 0.11, "learning_rate": 4.860245907732565e-05, "loss": 0.2239, "step": 2380 }, { "epoch": 0.11, "learning_rate": 4.859502534901354e-05, "loss": 0.1769, "step": 2390 }, { "epoch": 0.11, "learning_rate": 4.858759162070145e-05, "loss": 0.1461, "step": 2400 }, { "epoch": 0.11, "learning_rate": 4.858015789238935e-05, "loss": 0.2117, "step": 2410 }, { "epoch": 0.11, "learning_rate": 4.857272416407726e-05, "loss": 0.1941, "step": 2420 }, { "epoch": 0.11, "learning_rate": 4.856529043576515e-05, "loss": 0.2028, "step": 2430 }, { "epoch": 0.11, "learning_rate": 4.855785670745306e-05, "loss": 0.2372, "step": 2440 }, { "epoch": 0.11, "learning_rate": 4.855042297914096e-05, "loss": 0.2225, "step": 2450 }, { "epoch": 0.11, "learning_rate": 4.854298925082886e-05, "loss": 0.2131, "step": 2460 }, { "epoch": 0.11, "learning_rate": 4.853555552251677e-05, "loss": 0.2304, "step": 2470 }, { "epoch": 0.11, "learning_rate": 4.8528121794204666e-05, "loss": 0.2115, "step": 2480 }, { "epoch": 0.11, "learning_rate": 4.8520688065892575e-05, "loss": 0.2202, "step": 2490 }, { "epoch": 0.11, "learning_rate": 4.851325433758047e-05, "loss": 0.2384, "step": 2500 }, { "epoch": 0.11, "learning_rate": 4.8505820609268373e-05, "loss": 0.1571, "step": 2510 }, { "epoch": 0.11, "learning_rate": 4.8498386880956276e-05, "loss": 0.2124, "step": 2520 }, { "epoch": 0.11, "learning_rate": 4.849095315264418e-05, "loss": 0.2251, "step": 2530 }, { "epoch": 0.11, "learning_rate": 4.848351942433208e-05, "loss": 0.1642, "step": 2540 }, { "epoch": 0.11, "learning_rate": 4.8476085696019984e-05, "loss": 0.1827, "step": 2550 }, { "epoch": 0.11, "learning_rate": 4.8468651967707886e-05, "loss": 0.244, "step": 2560 }, { "epoch": 0.11, "learning_rate": 4.846121823939579e-05, "loss": 0.1678, "step": 2570 }, { "epoch": 0.11, "learning_rate": 4.845378451108369e-05, "loss": 0.1808, "step": 2580 }, { "epoch": 0.11, "learning_rate": 4.8446350782771594e-05, "loss": 0.2398, "step": 2590 }, { "epoch": 0.12, "learning_rate": 4.84389170544595e-05, "loss": 0.1785, "step": 2600 }, { "epoch": 0.12, "learning_rate": 4.84314833261474e-05, "loss": 0.1615, "step": 2610 }, { "epoch": 0.12, "learning_rate": 4.84240495978353e-05, "loss": 0.1599, "step": 2620 }, { "epoch": 0.12, "learning_rate": 4.8416615869523204e-05, "loss": 0.27, "step": 2630 }, { "epoch": 0.12, "learning_rate": 4.840918214121111e-05, "loss": 0.154, "step": 2640 }, { "epoch": 0.12, "learning_rate": 4.840174841289901e-05, "loss": 0.174, "step": 2650 }, { "epoch": 0.12, "learning_rate": 4.839431468458691e-05, "loss": 0.241, "step": 2660 }, { "epoch": 0.12, "learning_rate": 4.838688095627481e-05, "loss": 0.2166, "step": 2670 }, { "epoch": 0.12, "learning_rate": 4.837944722796272e-05, "loss": 0.1899, "step": 2680 }, { "epoch": 0.12, "learning_rate": 4.837201349965061e-05, "loss": 0.2102, "step": 2690 }, { "epoch": 0.12, "learning_rate": 4.836457977133852e-05, "loss": 0.2238, "step": 2700 }, { "epoch": 0.12, "learning_rate": 4.8357146043026425e-05, "loss": 0.2188, "step": 2710 }, { "epoch": 0.12, "learning_rate": 4.834971231471432e-05, "loss": 0.2044, "step": 2720 }, { "epoch": 0.12, "learning_rate": 4.834227858640223e-05, "loss": 0.2296, "step": 2730 }, { "epoch": 0.12, "learning_rate": 4.8334844858090126e-05, "loss": 0.1713, "step": 2740 }, { "epoch": 0.12, "learning_rate": 4.8327411129778035e-05, "loss": 0.2424, "step": 2750 }, { "epoch": 0.12, "learning_rate": 4.831997740146593e-05, "loss": 0.1632, "step": 2760 }, { "epoch": 0.12, "learning_rate": 4.831254367315384e-05, "loss": 0.2513, "step": 2770 }, { "epoch": 0.12, "learning_rate": 4.8305109944841736e-05, "loss": 0.2334, "step": 2780 }, { "epoch": 0.12, "learning_rate": 4.829767621652964e-05, "loss": 0.1786, "step": 2790 }, { "epoch": 0.12, "learning_rate": 4.829024248821754e-05, "loss": 0.2318, "step": 2800 }, { "epoch": 0.12, "learning_rate": 4.8282808759905444e-05, "loss": 0.21, "step": 2810 }, { "epoch": 0.12, "learning_rate": 4.827537503159335e-05, "loss": 0.2108, "step": 2820 }, { "epoch": 0.13, "learning_rate": 4.826794130328125e-05, "loss": 0.1916, "step": 2830 }, { "epoch": 0.13, "learning_rate": 4.826050757496915e-05, "loss": 0.2485, "step": 2840 }, { "epoch": 0.13, "learning_rate": 4.8253073846657054e-05, "loss": 0.1844, "step": 2850 }, { "epoch": 0.13, "learning_rate": 4.824564011834496e-05, "loss": 0.1989, "step": 2860 }, { "epoch": 0.13, "learning_rate": 4.823820639003286e-05, "loss": 0.2632, "step": 2870 }, { "epoch": 0.13, "learning_rate": 4.823077266172076e-05, "loss": 0.1758, "step": 2880 }, { "epoch": 0.13, "learning_rate": 4.8223338933408665e-05, "loss": 0.1563, "step": 2890 }, { "epoch": 0.13, "learning_rate": 4.821590520509657e-05, "loss": 0.1916, "step": 2900 }, { "epoch": 0.13, "learning_rate": 4.820847147678447e-05, "loss": 0.1954, "step": 2910 }, { "epoch": 0.13, "learning_rate": 4.820103774847237e-05, "loss": 0.1793, "step": 2920 }, { "epoch": 0.13, "learning_rate": 4.819360402016027e-05, "loss": 0.1765, "step": 2930 }, { "epoch": 0.13, "learning_rate": 4.818617029184818e-05, "loss": 0.1776, "step": 2940 }, { "epoch": 0.13, "learning_rate": 4.817873656353607e-05, "loss": 0.2006, "step": 2950 }, { "epoch": 0.13, "learning_rate": 4.817130283522398e-05, "loss": 0.2525, "step": 2960 }, { "epoch": 0.13, "learning_rate": 4.8163869106911885e-05, "loss": 0.183, "step": 2970 }, { "epoch": 0.13, "learning_rate": 4.815643537859979e-05, "loss": 0.2289, "step": 2980 }, { "epoch": 0.13, "learning_rate": 4.814900165028769e-05, "loss": 0.1713, "step": 2990 }, { "epoch": 0.13, "learning_rate": 4.8141567921975586e-05, "loss": 0.2052, "step": 3000 }, { "epoch": 0.13, "learning_rate": 4.8134134193663496e-05, "loss": 0.1598, "step": 3010 }, { "epoch": 0.13, "learning_rate": 4.812670046535139e-05, "loss": 0.1768, "step": 3020 }, { "epoch": 0.13, "learning_rate": 4.81192667370393e-05, "loss": 0.1931, "step": 3030 }, { "epoch": 0.13, "learning_rate": 4.8111833008727197e-05, "loss": 0.2053, "step": 3040 }, { "epoch": 0.14, "learning_rate": 4.81043992804151e-05, "loss": 0.2703, "step": 3050 }, { "epoch": 0.14, "learning_rate": 4.8096965552103e-05, "loss": 0.2645, "step": 3060 }, { "epoch": 0.14, "learning_rate": 4.8089531823790904e-05, "loss": 0.1826, "step": 3070 }, { "epoch": 0.14, "learning_rate": 4.8082098095478814e-05, "loss": 0.1774, "step": 3080 }, { "epoch": 0.14, "learning_rate": 4.807466436716671e-05, "loss": 0.1832, "step": 3090 }, { "epoch": 0.14, "learning_rate": 4.806723063885462e-05, "loss": 0.2073, "step": 3100 }, { "epoch": 0.14, "learning_rate": 4.8059796910542515e-05, "loss": 0.1576, "step": 3110 }, { "epoch": 0.14, "learning_rate": 4.805236318223042e-05, "loss": 0.2053, "step": 3120 }, { "epoch": 0.14, "learning_rate": 4.804492945391832e-05, "loss": 0.1843, "step": 3130 }, { "epoch": 0.14, "learning_rate": 4.803749572560622e-05, "loss": 0.175, "step": 3140 }, { "epoch": 0.14, "learning_rate": 4.8030061997294125e-05, "loss": 0.2084, "step": 3150 }, { "epoch": 0.14, "learning_rate": 4.802262826898203e-05, "loss": 0.189, "step": 3160 }, { "epoch": 0.14, "learning_rate": 4.801519454066993e-05, "loss": 0.1629, "step": 3170 }, { "epoch": 0.14, "learning_rate": 4.800776081235783e-05, "loss": 0.1818, "step": 3180 }, { "epoch": 0.14, "learning_rate": 4.8000327084045735e-05, "loss": 0.2298, "step": 3190 }, { "epoch": 0.14, "learning_rate": 4.799289335573364e-05, "loss": 0.2191, "step": 3200 }, { "epoch": 0.14, "learning_rate": 4.798545962742154e-05, "loss": 0.2111, "step": 3210 }, { "epoch": 0.14, "learning_rate": 4.797802589910944e-05, "loss": 0.203, "step": 3220 }, { "epoch": 0.14, "learning_rate": 4.7970592170797346e-05, "loss": 0.2435, "step": 3230 }, { "epoch": 0.14, "learning_rate": 4.796315844248525e-05, "loss": 0.2138, "step": 3240 }, { "epoch": 0.14, "learning_rate": 4.795572471417315e-05, "loss": 0.2126, "step": 3250 }, { "epoch": 0.14, "learning_rate": 4.7948290985861047e-05, "loss": 0.1502, "step": 3260 }, { "epoch": 0.14, "learning_rate": 4.7940857257548956e-05, "loss": 0.2213, "step": 3270 }, { "epoch": 0.15, "learning_rate": 4.793342352923685e-05, "loss": 0.1981, "step": 3280 }, { "epoch": 0.15, "learning_rate": 4.792598980092476e-05, "loss": 0.1934, "step": 3290 }, { "epoch": 0.15, "learning_rate": 4.791855607261266e-05, "loss": 0.2297, "step": 3300 }, { "epoch": 0.15, "learning_rate": 4.7911122344300566e-05, "loss": 0.2176, "step": 3310 }, { "epoch": 0.15, "learning_rate": 4.790368861598847e-05, "loss": 0.1625, "step": 3320 }, { "epoch": 0.15, "learning_rate": 4.7896254887676365e-05, "loss": 0.2083, "step": 3330 }, { "epoch": 0.15, "learning_rate": 4.7888821159364274e-05, "loss": 0.2289, "step": 3340 }, { "epoch": 0.15, "learning_rate": 4.788138743105217e-05, "loss": 0.2147, "step": 3350 }, { "epoch": 0.15, "learning_rate": 4.787395370274008e-05, "loss": 0.1903, "step": 3360 }, { "epoch": 0.15, "learning_rate": 4.7866519974427975e-05, "loss": 0.211, "step": 3370 }, { "epoch": 0.15, "learning_rate": 4.785908624611588e-05, "loss": 0.203, "step": 3380 }, { "epoch": 0.15, "learning_rate": 4.785165251780378e-05, "loss": 0.1663, "step": 3390 }, { "epoch": 0.15, "learning_rate": 4.784421878949168e-05, "loss": 0.1941, "step": 3400 }, { "epoch": 0.15, "learning_rate": 4.7836785061179585e-05, "loss": 0.1613, "step": 3410 }, { "epoch": 0.15, "learning_rate": 4.782935133286749e-05, "loss": 0.1746, "step": 3420 }, { "epoch": 0.15, "learning_rate": 4.782191760455539e-05, "loss": 0.1815, "step": 3430 }, { "epoch": 0.15, "learning_rate": 4.781448387624329e-05, "loss": 0.2477, "step": 3440 }, { "epoch": 0.15, "learning_rate": 4.7807050147931196e-05, "loss": 0.1687, "step": 3450 }, { "epoch": 0.15, "learning_rate": 4.77996164196191e-05, "loss": 0.2643, "step": 3460 }, { "epoch": 0.15, "learning_rate": 4.7792182691307e-05, "loss": 0.1724, "step": 3470 }, { "epoch": 0.15, "learning_rate": 4.77847489629949e-05, "loss": 0.1636, "step": 3480 }, { "epoch": 0.15, "learning_rate": 4.7777315234682806e-05, "loss": 0.214, "step": 3490 }, { "epoch": 0.15, "learning_rate": 4.776988150637071e-05, "loss": 0.2293, "step": 3500 }, { "epoch": 0.16, "learning_rate": 4.776244777805861e-05, "loss": 0.1665, "step": 3510 }, { "epoch": 0.16, "learning_rate": 4.7755014049746514e-05, "loss": 0.1933, "step": 3520 }, { "epoch": 0.16, "learning_rate": 4.7747580321434416e-05, "loss": 0.1889, "step": 3530 }, { "epoch": 0.16, "learning_rate": 4.774014659312231e-05, "loss": 0.1924, "step": 3540 }, { "epoch": 0.16, "learning_rate": 4.773271286481022e-05, "loss": 0.1279, "step": 3550 }, { "epoch": 0.16, "learning_rate": 4.772527913649812e-05, "loss": 0.1947, "step": 3560 }, { "epoch": 0.16, "learning_rate": 4.7717845408186027e-05, "loss": 0.1765, "step": 3570 }, { "epoch": 0.16, "learning_rate": 4.771041167987393e-05, "loss": 0.1602, "step": 3580 }, { "epoch": 0.16, "learning_rate": 4.7702977951561825e-05, "loss": 0.1642, "step": 3590 }, { "epoch": 0.16, "learning_rate": 4.7695544223249734e-05, "loss": 0.1933, "step": 3600 }, { "epoch": 0.16, "learning_rate": 4.768811049493763e-05, "loss": 0.1973, "step": 3610 }, { "epoch": 0.16, "learning_rate": 4.768067676662554e-05, "loss": 0.2231, "step": 3620 }, { "epoch": 0.16, "learning_rate": 4.7673243038313435e-05, "loss": 0.2081, "step": 3630 }, { "epoch": 0.16, "learning_rate": 4.7665809310001345e-05, "loss": 0.1543, "step": 3640 }, { "epoch": 0.16, "learning_rate": 4.765837558168924e-05, "loss": 0.1711, "step": 3650 }, { "epoch": 0.16, "learning_rate": 4.765094185337714e-05, "loss": 0.2388, "step": 3660 }, { "epoch": 0.16, "learning_rate": 4.7643508125065046e-05, "loss": 0.1933, "step": 3670 }, { "epoch": 0.16, "learning_rate": 4.763607439675295e-05, "loss": 0.2586, "step": 3680 }, { "epoch": 0.16, "learning_rate": 4.762864066844086e-05, "loss": 0.1674, "step": 3690 }, { "epoch": 0.16, "learning_rate": 4.762120694012875e-05, "loss": 0.1698, "step": 3700 }, { "epoch": 0.16, "learning_rate": 4.7613773211816656e-05, "loss": 0.2268, "step": 3710 }, { "epoch": 0.16, "learning_rate": 4.760633948350456e-05, "loss": 0.2456, "step": 3720 }, { "epoch": 0.17, "learning_rate": 4.759890575519246e-05, "loss": 0.2007, "step": 3730 }, { "epoch": 0.17, "learning_rate": 4.7591472026880364e-05, "loss": 0.1754, "step": 3740 }, { "epoch": 0.17, "learning_rate": 4.7584038298568266e-05, "loss": 0.1773, "step": 3750 }, { "epoch": 0.17, "learning_rate": 4.757660457025617e-05, "loss": 0.1932, "step": 3760 }, { "epoch": 0.17, "learning_rate": 4.756917084194407e-05, "loss": 0.1461, "step": 3770 }, { "epoch": 0.17, "learning_rate": 4.7561737113631974e-05, "loss": 0.2114, "step": 3780 }, { "epoch": 0.17, "learning_rate": 4.7554303385319877e-05, "loss": 0.1476, "step": 3790 }, { "epoch": 0.17, "learning_rate": 4.754686965700777e-05, "loss": 0.1578, "step": 3800 }, { "epoch": 0.17, "learning_rate": 4.753943592869568e-05, "loss": 0.1847, "step": 3810 }, { "epoch": 0.17, "learning_rate": 4.7532002200383584e-05, "loss": 0.1762, "step": 3820 }, { "epoch": 0.17, "learning_rate": 4.752456847207149e-05, "loss": 0.203, "step": 3830 }, { "epoch": 0.17, "learning_rate": 4.751713474375939e-05, "loss": 0.2478, "step": 3840 }, { "epoch": 0.17, "learning_rate": 4.750970101544729e-05, "loss": 0.1839, "step": 3850 }, { "epoch": 0.17, "learning_rate": 4.7502267287135195e-05, "loss": 0.1712, "step": 3860 }, { "epoch": 0.17, "learning_rate": 4.749483355882309e-05, "loss": 0.1691, "step": 3870 }, { "epoch": 0.17, "learning_rate": 4.7487399830511e-05, "loss": 0.1733, "step": 3880 }, { "epoch": 0.17, "learning_rate": 4.7479966102198896e-05, "loss": 0.1747, "step": 3890 }, { "epoch": 0.17, "learning_rate": 4.7472532373886805e-05, "loss": 0.1571, "step": 3900 }, { "epoch": 0.17, "learning_rate": 4.74650986455747e-05, "loss": 0.2568, "step": 3910 }, { "epoch": 0.17, "learning_rate": 4.74576649172626e-05, "loss": 0.1446, "step": 3920 }, { "epoch": 0.17, "learning_rate": 4.7450231188950506e-05, "loss": 0.1995, "step": 3930 }, { "epoch": 0.17, "learning_rate": 4.744279746063841e-05, "loss": 0.1421, "step": 3940 }, { "epoch": 0.17, "learning_rate": 4.743536373232632e-05, "loss": 0.2051, "step": 3950 }, { "epoch": 0.18, "learning_rate": 4.7427930004014214e-05, "loss": 0.1617, "step": 3960 }, { "epoch": 0.18, "learning_rate": 4.742049627570212e-05, "loss": 0.2006, "step": 3970 }, { "epoch": 0.18, "learning_rate": 4.741306254739002e-05, "loss": 0.2343, "step": 3980 }, { "epoch": 0.18, "learning_rate": 4.740562881907792e-05, "loss": 0.2017, "step": 3990 }, { "epoch": 0.18, "learning_rate": 4.7398195090765824e-05, "loss": 0.1908, "step": 4000 }, { "epoch": 0.18, "learning_rate": 4.7390761362453726e-05, "loss": 0.2539, "step": 4010 }, { "epoch": 0.18, "learning_rate": 4.738332763414163e-05, "loss": 0.2035, "step": 4020 }, { "epoch": 0.18, "learning_rate": 4.737589390582953e-05, "loss": 0.192, "step": 4030 }, { "epoch": 0.18, "learning_rate": 4.7368460177517434e-05, "loss": 0.2101, "step": 4040 }, { "epoch": 0.18, "learning_rate": 4.736102644920534e-05, "loss": 0.2221, "step": 4050 }, { "epoch": 0.18, "learning_rate": 4.735359272089324e-05, "loss": 0.2134, "step": 4060 }, { "epoch": 0.18, "learning_rate": 4.734615899258114e-05, "loss": 0.1435, "step": 4070 }, { "epoch": 0.18, "learning_rate": 4.7338725264269045e-05, "loss": 0.2155, "step": 4080 }, { "epoch": 0.18, "learning_rate": 4.733129153595695e-05, "loss": 0.1978, "step": 4090 }, { "epoch": 0.18, "learning_rate": 4.732385780764485e-05, "loss": 0.1943, "step": 4100 }, { "epoch": 0.18, "learning_rate": 4.731642407933275e-05, "loss": 0.1801, "step": 4110 }, { "epoch": 0.18, "learning_rate": 4.7308990351020655e-05, "loss": 0.1698, "step": 4120 }, { "epoch": 0.18, "learning_rate": 4.730155662270855e-05, "loss": 0.1645, "step": 4130 }, { "epoch": 0.18, "learning_rate": 4.729412289439646e-05, "loss": 0.2261, "step": 4140 }, { "epoch": 0.18, "learning_rate": 4.7286689166084356e-05, "loss": 0.1853, "step": 4150 }, { "epoch": 0.18, "learning_rate": 4.7279255437772265e-05, "loss": 0.1799, "step": 4160 }, { "epoch": 0.18, "learning_rate": 4.727182170946016e-05, "loss": 0.1956, "step": 4170 }, { "epoch": 0.19, "learning_rate": 4.726438798114807e-05, "loss": 0.1841, "step": 4180 }, { "epoch": 0.19, "learning_rate": 4.725695425283597e-05, "loss": 0.1847, "step": 4190 }, { "epoch": 0.19, "learning_rate": 4.724952052452387e-05, "loss": 0.2544, "step": 4200 }, { "epoch": 0.19, "learning_rate": 4.724208679621178e-05, "loss": 0.199, "step": 4210 }, { "epoch": 0.19, "learning_rate": 4.7234653067899674e-05, "loss": 0.1911, "step": 4220 }, { "epoch": 0.19, "learning_rate": 4.722721933958758e-05, "loss": 0.1555, "step": 4230 }, { "epoch": 0.19, "learning_rate": 4.721978561127548e-05, "loss": 0.2374, "step": 4240 }, { "epoch": 0.19, "learning_rate": 4.721235188296338e-05, "loss": 0.2418, "step": 4250 }, { "epoch": 0.19, "learning_rate": 4.7204918154651284e-05, "loss": 0.2113, "step": 4260 }, { "epoch": 0.19, "learning_rate": 4.719748442633919e-05, "loss": 0.1936, "step": 4270 }, { "epoch": 0.19, "learning_rate": 4.719005069802709e-05, "loss": 0.1843, "step": 4280 }, { "epoch": 0.19, "learning_rate": 4.718261696971499e-05, "loss": 0.1823, "step": 4290 }, { "epoch": 0.19, "learning_rate": 4.7175183241402895e-05, "loss": 0.1758, "step": 4300 }, { "epoch": 0.19, "learning_rate": 4.71677495130908e-05, "loss": 0.1671, "step": 4310 }, { "epoch": 0.19, "learning_rate": 4.71603157847787e-05, "loss": 0.1551, "step": 4320 }, { "epoch": 0.19, "learning_rate": 4.71528820564666e-05, "loss": 0.2358, "step": 4330 }, { "epoch": 0.19, "learning_rate": 4.7145448328154505e-05, "loss": 0.1592, "step": 4340 }, { "epoch": 0.19, "learning_rate": 4.713801459984241e-05, "loss": 0.1948, "step": 4350 }, { "epoch": 0.19, "learning_rate": 4.713058087153031e-05, "loss": 0.2059, "step": 4360 }, { "epoch": 0.19, "learning_rate": 4.712314714321821e-05, "loss": 0.1999, "step": 4370 }, { "epoch": 0.19, "learning_rate": 4.7115713414906115e-05, "loss": 0.2258, "step": 4380 }, { "epoch": 0.19, "learning_rate": 4.710827968659402e-05, "loss": 0.2206, "step": 4390 }, { "epoch": 0.19, "learning_rate": 4.710084595828192e-05, "loss": 0.2226, "step": 4400 }, { "epoch": 0.2, "learning_rate": 4.7093412229969816e-05, "loss": 0.1959, "step": 4410 }, { "epoch": 0.2, "learning_rate": 4.7085978501657725e-05, "loss": 0.1913, "step": 4420 }, { "epoch": 0.2, "learning_rate": 4.707854477334563e-05, "loss": 0.1336, "step": 4430 }, { "epoch": 0.2, "learning_rate": 4.707111104503353e-05, "loss": 0.1619, "step": 4440 }, { "epoch": 0.2, "learning_rate": 4.706367731672143e-05, "loss": 0.1998, "step": 4450 }, { "epoch": 0.2, "learning_rate": 4.705624358840933e-05, "loss": 0.2151, "step": 4460 }, { "epoch": 0.2, "learning_rate": 4.704880986009724e-05, "loss": 0.2118, "step": 4470 }, { "epoch": 0.2, "learning_rate": 4.7041376131785134e-05, "loss": 0.1283, "step": 4480 }, { "epoch": 0.2, "learning_rate": 4.7033942403473044e-05, "loss": 0.1134, "step": 4490 }, { "epoch": 0.2, "learning_rate": 4.702650867516094e-05, "loss": 0.1818, "step": 4500 }, { "epoch": 0.2, "learning_rate": 4.701907494684885e-05, "loss": 0.1521, "step": 4510 }, { "epoch": 0.2, "learning_rate": 4.7011641218536744e-05, "loss": 0.1358, "step": 4520 }, { "epoch": 0.2, "learning_rate": 4.700420749022465e-05, "loss": 0.2277, "step": 4530 }, { "epoch": 0.2, "learning_rate": 4.699677376191255e-05, "loss": 0.1989, "step": 4540 }, { "epoch": 0.2, "learning_rate": 4.698934003360045e-05, "loss": 0.1648, "step": 4550 }, { "epoch": 0.2, "learning_rate": 4.698190630528836e-05, "loss": 0.1664, "step": 4560 }, { "epoch": 0.2, "learning_rate": 4.697447257697626e-05, "loss": 0.2396, "step": 4570 }, { "epoch": 0.2, "learning_rate": 4.696703884866416e-05, "loss": 0.2495, "step": 4580 }, { "epoch": 0.2, "learning_rate": 4.695960512035206e-05, "loss": 0.2068, "step": 4590 }, { "epoch": 0.2, "learning_rate": 4.6952171392039965e-05, "loss": 0.2177, "step": 4600 }, { "epoch": 0.2, "learning_rate": 4.694473766372787e-05, "loss": 0.1966, "step": 4610 }, { "epoch": 0.2, "learning_rate": 4.693730393541577e-05, "loss": 0.2035, "step": 4620 }, { "epoch": 0.2, "learning_rate": 4.692987020710367e-05, "loss": 0.1906, "step": 4630 }, { "epoch": 0.21, "learning_rate": 4.6922436478791575e-05, "loss": 0.1831, "step": 4640 }, { "epoch": 0.21, "learning_rate": 4.691500275047948e-05, "loss": 0.1612, "step": 4650 }, { "epoch": 0.21, "learning_rate": 4.690756902216738e-05, "loss": 0.2006, "step": 4660 }, { "epoch": 0.21, "learning_rate": 4.690013529385528e-05, "loss": 0.2511, "step": 4670 }, { "epoch": 0.21, "learning_rate": 4.6892701565543186e-05, "loss": 0.1666, "step": 4680 }, { "epoch": 0.21, "learning_rate": 4.688526783723109e-05, "loss": 0.2172, "step": 4690 }, { "epoch": 0.21, "learning_rate": 4.687783410891899e-05, "loss": 0.1783, "step": 4700 }, { "epoch": 0.21, "learning_rate": 4.6870400380606893e-05, "loss": 0.1952, "step": 4710 }, { "epoch": 0.21, "learning_rate": 4.6862966652294796e-05, "loss": 0.2201, "step": 4720 }, { "epoch": 0.21, "learning_rate": 4.68555329239827e-05, "loss": 0.1923, "step": 4730 }, { "epoch": 0.21, "learning_rate": 4.6848099195670594e-05, "loss": 0.1705, "step": 4740 }, { "epoch": 0.21, "learning_rate": 4.6840665467358504e-05, "loss": 0.1424, "step": 4750 }, { "epoch": 0.21, "learning_rate": 4.68332317390464e-05, "loss": 0.1996, "step": 4760 }, { "epoch": 0.21, "learning_rate": 4.682579801073431e-05, "loss": 0.2296, "step": 4770 }, { "epoch": 0.21, "learning_rate": 4.6818364282422205e-05, "loss": 0.1768, "step": 4780 }, { "epoch": 0.21, "learning_rate": 4.681093055411011e-05, "loss": 0.2053, "step": 4790 }, { "epoch": 0.21, "learning_rate": 4.680349682579802e-05, "loss": 0.1634, "step": 4800 }, { "epoch": 0.21, "learning_rate": 4.679606309748591e-05, "loss": 0.1569, "step": 4810 }, { "epoch": 0.21, "learning_rate": 4.678862936917382e-05, "loss": 0.2193, "step": 4820 }, { "epoch": 0.21, "learning_rate": 4.678119564086172e-05, "loss": 0.1778, "step": 4830 }, { "epoch": 0.21, "learning_rate": 4.677376191254963e-05, "loss": 0.1908, "step": 4840 }, { "epoch": 0.21, "learning_rate": 4.676632818423752e-05, "loss": 0.2165, "step": 4850 }, { "epoch": 0.22, "learning_rate": 4.6758894455925425e-05, "loss": 0.2005, "step": 4860 }, { "epoch": 0.22, "learning_rate": 4.675146072761333e-05, "loss": 0.205, "step": 4870 }, { "epoch": 0.22, "learning_rate": 4.674402699930123e-05, "loss": 0.2029, "step": 4880 }, { "epoch": 0.22, "learning_rate": 4.673659327098913e-05, "loss": 0.2093, "step": 4890 }, { "epoch": 0.22, "learning_rate": 4.6729159542677036e-05, "loss": 0.1745, "step": 4900 }, { "epoch": 0.22, "learning_rate": 4.672172581436494e-05, "loss": 0.14, "step": 4910 }, { "epoch": 0.22, "learning_rate": 4.671429208605284e-05, "loss": 0.2007, "step": 4920 }, { "epoch": 0.22, "learning_rate": 4.6706858357740743e-05, "loss": 0.129, "step": 4930 }, { "epoch": 0.22, "learning_rate": 4.6699424629428646e-05, "loss": 0.1493, "step": 4940 }, { "epoch": 0.22, "learning_rate": 4.669199090111655e-05, "loss": 0.2641, "step": 4950 }, { "epoch": 0.22, "learning_rate": 4.668455717280445e-05, "loss": 0.176, "step": 4960 }, { "epoch": 0.22, "learning_rate": 4.6677123444492354e-05, "loss": 0.2108, "step": 4970 }, { "epoch": 0.22, "learning_rate": 4.6669689716180256e-05, "loss": 0.1956, "step": 4980 }, { "epoch": 0.22, "learning_rate": 4.666225598786816e-05, "loss": 0.1753, "step": 4990 }, { "epoch": 0.22, "learning_rate": 4.6654822259556055e-05, "loss": 0.1822, "step": 5000 }, { "epoch": 0.22, "learning_rate": 4.6647388531243964e-05, "loss": 0.2252, "step": 5010 }, { "epoch": 0.22, "learning_rate": 4.663995480293186e-05, "loss": 0.1333, "step": 5020 }, { "epoch": 0.22, "learning_rate": 4.663252107461977e-05, "loss": 0.1664, "step": 5030 }, { "epoch": 0.22, "learning_rate": 4.662508734630767e-05, "loss": 0.2348, "step": 5040 }, { "epoch": 0.22, "learning_rate": 4.6617653617995574e-05, "loss": 0.1736, "step": 5050 }, { "epoch": 0.22, "learning_rate": 4.661021988968348e-05, "loss": 0.2118, "step": 5060 }, { "epoch": 0.22, "learning_rate": 4.660278616137137e-05, "loss": 0.2598, "step": 5070 }, { "epoch": 0.22, "learning_rate": 4.659535243305928e-05, "loss": 0.1735, "step": 5080 }, { "epoch": 0.23, "learning_rate": 4.658791870474718e-05, "loss": 0.193, "step": 5090 }, { "epoch": 0.23, "learning_rate": 4.658048497643509e-05, "loss": 0.151, "step": 5100 }, { "epoch": 0.23, "learning_rate": 4.657305124812298e-05, "loss": 0.1902, "step": 5110 }, { "epoch": 0.23, "learning_rate": 4.6565617519810886e-05, "loss": 0.1973, "step": 5120 }, { "epoch": 0.23, "learning_rate": 4.655818379149879e-05, "loss": 0.2868, "step": 5130 }, { "epoch": 0.23, "learning_rate": 4.655075006318669e-05, "loss": 0.1723, "step": 5140 }, { "epoch": 0.23, "learning_rate": 4.6543316334874593e-05, "loss": 0.2217, "step": 5150 }, { "epoch": 0.23, "learning_rate": 4.6535882606562496e-05, "loss": 0.1885, "step": 5160 }, { "epoch": 0.23, "learning_rate": 4.6528448878250405e-05, "loss": 0.1679, "step": 5170 }, { "epoch": 0.23, "learning_rate": 4.65210151499383e-05, "loss": 0.1811, "step": 5180 }, { "epoch": 0.23, "learning_rate": 4.6513581421626204e-05, "loss": 0.1931, "step": 5190 }, { "epoch": 0.23, "learning_rate": 4.6506147693314106e-05, "loss": 0.1989, "step": 5200 }, { "epoch": 0.23, "learning_rate": 4.649871396500201e-05, "loss": 0.1531, "step": 5210 }, { "epoch": 0.23, "learning_rate": 4.649128023668991e-05, "loss": 0.1949, "step": 5220 }, { "epoch": 0.23, "learning_rate": 4.6483846508377814e-05, "loss": 0.1636, "step": 5230 }, { "epoch": 0.23, "learning_rate": 4.647641278006572e-05, "loss": 0.2281, "step": 5240 }, { "epoch": 0.23, "learning_rate": 4.646897905175362e-05, "loss": 0.1615, "step": 5250 }, { "epoch": 0.23, "learning_rate": 4.646154532344152e-05, "loss": 0.2141, "step": 5260 }, { "epoch": 0.23, "learning_rate": 4.6454111595129424e-05, "loss": 0.2078, "step": 5270 }, { "epoch": 0.23, "learning_rate": 4.644667786681732e-05, "loss": 0.2003, "step": 5280 }, { "epoch": 0.23, "learning_rate": 4.643924413850523e-05, "loss": 0.2235, "step": 5290 }, { "epoch": 0.23, "learning_rate": 4.643181041019313e-05, "loss": 0.198, "step": 5300 }, { "epoch": 0.24, "learning_rate": 4.6424376681881035e-05, "loss": 0.1794, "step": 5310 }, { "epoch": 0.24, "learning_rate": 4.641694295356894e-05, "loss": 0.182, "step": 5320 }, { "epoch": 0.24, "learning_rate": 4.640950922525683e-05, "loss": 0.229, "step": 5330 }, { "epoch": 0.24, "learning_rate": 4.640207549694474e-05, "loss": 0.1439, "step": 5340 }, { "epoch": 0.24, "learning_rate": 4.639464176863264e-05, "loss": 0.1614, "step": 5350 }, { "epoch": 0.24, "learning_rate": 4.638720804032055e-05, "loss": 0.1693, "step": 5360 }, { "epoch": 0.24, "learning_rate": 4.6379774312008443e-05, "loss": 0.2107, "step": 5370 }, { "epoch": 0.24, "learning_rate": 4.637234058369635e-05, "loss": 0.1945, "step": 5380 }, { "epoch": 0.24, "learning_rate": 4.636490685538425e-05, "loss": 0.1587, "step": 5390 }, { "epoch": 0.24, "learning_rate": 4.635747312707215e-05, "loss": 0.1913, "step": 5400 }, { "epoch": 0.24, "learning_rate": 4.635003939876006e-05, "loss": 0.2093, "step": 5410 }, { "epoch": 0.24, "learning_rate": 4.6342605670447956e-05, "loss": 0.2301, "step": 5420 }, { "epoch": 0.24, "learning_rate": 4.6335171942135866e-05, "loss": 0.1478, "step": 5430 }, { "epoch": 0.24, "learning_rate": 4.632773821382376e-05, "loss": 0.1874, "step": 5440 }, { "epoch": 0.24, "learning_rate": 4.632030448551167e-05, "loss": 0.2395, "step": 5450 }, { "epoch": 0.24, "learning_rate": 4.631287075719957e-05, "loss": 0.1632, "step": 5460 }, { "epoch": 0.24, "learning_rate": 4.630543702888747e-05, "loss": 0.2397, "step": 5470 }, { "epoch": 0.24, "learning_rate": 4.629800330057537e-05, "loss": 0.1963, "step": 5480 }, { "epoch": 0.24, "learning_rate": 4.6290569572263274e-05, "loss": 0.1628, "step": 5490 }, { "epoch": 0.24, "learning_rate": 4.628313584395118e-05, "loss": 0.1814, "step": 5500 }, { "epoch": 0.24, "learning_rate": 4.627570211563908e-05, "loss": 0.1712, "step": 5510 }, { "epoch": 0.24, "learning_rate": 4.626826838732698e-05, "loss": 0.1666, "step": 5520 }, { "epoch": 0.24, "learning_rate": 4.6260834659014885e-05, "loss": 0.1819, "step": 5530 }, { "epoch": 0.25, "learning_rate": 4.625340093070279e-05, "loss": 0.2382, "step": 5540 }, { "epoch": 0.25, "learning_rate": 4.624596720239069e-05, "loss": 0.1811, "step": 5550 }, { "epoch": 0.25, "learning_rate": 4.623853347407859e-05, "loss": 0.1673, "step": 5560 }, { "epoch": 0.25, "learning_rate": 4.6231099745766495e-05, "loss": 0.1355, "step": 5570 }, { "epoch": 0.25, "learning_rate": 4.62236660174544e-05, "loss": 0.1699, "step": 5580 }, { "epoch": 0.25, "learning_rate": 4.62162322891423e-05, "loss": 0.2482, "step": 5590 }, { "epoch": 0.25, "learning_rate": 4.62087985608302e-05, "loss": 0.2124, "step": 5600 }, { "epoch": 0.25, "learning_rate": 4.62013648325181e-05, "loss": 0.1775, "step": 5610 }, { "epoch": 0.25, "learning_rate": 4.619393110420601e-05, "loss": 0.2245, "step": 5620 }, { "epoch": 0.25, "learning_rate": 4.6186497375893904e-05, "loss": 0.1317, "step": 5630 }, { "epoch": 0.25, "learning_rate": 4.617906364758181e-05, "loss": 0.1534, "step": 5640 }, { "epoch": 0.25, "learning_rate": 4.6171629919269716e-05, "loss": 0.1495, "step": 5650 }, { "epoch": 0.25, "learning_rate": 4.616419619095762e-05, "loss": 0.2115, "step": 5660 }, { "epoch": 0.25, "learning_rate": 4.615676246264552e-05, "loss": 0.1428, "step": 5670 }, { "epoch": 0.25, "learning_rate": 4.6149328734333417e-05, "loss": 0.1488, "step": 5680 }, { "epoch": 0.25, "learning_rate": 4.6141895006021326e-05, "loss": 0.2301, "step": 5690 }, { "epoch": 0.25, "learning_rate": 4.613446127770922e-05, "loss": 0.1726, "step": 5700 }, { "epoch": 0.25, "learning_rate": 4.612702754939713e-05, "loss": 0.2083, "step": 5710 }, { "epoch": 0.25, "learning_rate": 4.611959382108503e-05, "loss": 0.179, "step": 5720 }, { "epoch": 0.25, "learning_rate": 4.611216009277293e-05, "loss": 0.16, "step": 5730 }, { "epoch": 0.25, "learning_rate": 4.610472636446083e-05, "loss": 0.1489, "step": 5740 }, { "epoch": 0.25, "learning_rate": 4.6097292636148735e-05, "loss": 0.2726, "step": 5750 }, { "epoch": 0.26, "learning_rate": 4.608985890783664e-05, "loss": 0.189, "step": 5760 }, { "epoch": 0.26, "learning_rate": 4.608242517952454e-05, "loss": 0.1789, "step": 5770 }, { "epoch": 0.26, "learning_rate": 4.607499145121245e-05, "loss": 0.1753, "step": 5780 }, { "epoch": 0.26, "learning_rate": 4.6067557722900345e-05, "loss": 0.2578, "step": 5790 }, { "epoch": 0.26, "learning_rate": 4.606012399458825e-05, "loss": 0.1571, "step": 5800 }, { "epoch": 0.26, "learning_rate": 4.605269026627615e-05, "loss": 0.2205, "step": 5810 }, { "epoch": 0.26, "learning_rate": 4.604525653796405e-05, "loss": 0.189, "step": 5820 }, { "epoch": 0.26, "learning_rate": 4.6037822809651955e-05, "loss": 0.2021, "step": 5830 }, { "epoch": 0.26, "learning_rate": 4.603038908133986e-05, "loss": 0.1578, "step": 5840 }, { "epoch": 0.26, "learning_rate": 4.602295535302776e-05, "loss": 0.2302, "step": 5850 }, { "epoch": 0.26, "learning_rate": 4.601552162471566e-05, "loss": 0.2554, "step": 5860 }, { "epoch": 0.26, "learning_rate": 4.600808789640356e-05, "loss": 0.1927, "step": 5870 }, { "epoch": 0.26, "learning_rate": 4.600065416809147e-05, "loss": 0.1964, "step": 5880 }, { "epoch": 0.26, "learning_rate": 4.5993220439779364e-05, "loss": 0.1314, "step": 5890 }, { "epoch": 0.26, "learning_rate": 4.598578671146727e-05, "loss": 0.2192, "step": 5900 }, { "epoch": 0.26, "learning_rate": 4.5978352983155176e-05, "loss": 0.2489, "step": 5910 }, { "epoch": 0.26, "learning_rate": 4.597091925484308e-05, "loss": 0.152, "step": 5920 }, { "epoch": 0.26, "learning_rate": 4.596348552653098e-05, "loss": 0.1859, "step": 5930 }, { "epoch": 0.26, "learning_rate": 4.595605179821888e-05, "loss": 0.178, "step": 5940 }, { "epoch": 0.26, "learning_rate": 4.5948618069906786e-05, "loss": 0.2292, "step": 5950 }, { "epoch": 0.26, "learning_rate": 4.594118434159468e-05, "loss": 0.19, "step": 5960 }, { "epoch": 0.26, "learning_rate": 4.593375061328259e-05, "loss": 0.1743, "step": 5970 }, { "epoch": 0.26, "learning_rate": 4.592631688497049e-05, "loss": 0.1965, "step": 5980 }, { "epoch": 0.27, "learning_rate": 4.5918883156658397e-05, "loss": 0.2271, "step": 5990 }, { "epoch": 0.27, "learning_rate": 4.591144942834629e-05, "loss": 0.2326, "step": 6000 }, { "epoch": 0.27, "learning_rate": 4.5904015700034195e-05, "loss": 0.1787, "step": 6010 }, { "epoch": 0.27, "learning_rate": 4.5896581971722104e-05, "loss": 0.1402, "step": 6020 }, { "epoch": 0.27, "learning_rate": 4.588914824341e-05, "loss": 0.18, "step": 6030 }, { "epoch": 0.27, "learning_rate": 4.588171451509791e-05, "loss": 0.1585, "step": 6040 }, { "epoch": 0.27, "learning_rate": 4.5874280786785805e-05, "loss": 0.2416, "step": 6050 }, { "epoch": 0.27, "learning_rate": 4.586684705847371e-05, "loss": 0.2345, "step": 6060 }, { "epoch": 0.27, "learning_rate": 4.585941333016161e-05, "loss": 0.2101, "step": 6070 }, { "epoch": 0.27, "learning_rate": 4.585197960184951e-05, "loss": 0.2595, "step": 6080 }, { "epoch": 0.27, "learning_rate": 4.5844545873537416e-05, "loss": 0.1963, "step": 6090 }, { "epoch": 0.27, "learning_rate": 4.583711214522532e-05, "loss": 0.2147, "step": 6100 }, { "epoch": 0.27, "learning_rate": 4.582967841691322e-05, "loss": 0.1501, "step": 6110 }, { "epoch": 0.27, "learning_rate": 4.582224468860112e-05, "loss": 0.1831, "step": 6120 }, { "epoch": 0.27, "learning_rate": 4.5814810960289026e-05, "loss": 0.1613, "step": 6130 }, { "epoch": 0.27, "learning_rate": 4.580737723197693e-05, "loss": 0.2332, "step": 6140 }, { "epoch": 0.27, "learning_rate": 4.579994350366483e-05, "loss": 0.1608, "step": 6150 }, { "epoch": 0.27, "learning_rate": 4.5792509775352734e-05, "loss": 0.208, "step": 6160 }, { "epoch": 0.27, "learning_rate": 4.5785076047040636e-05, "loss": 0.2136, "step": 6170 }, { "epoch": 0.27, "learning_rate": 4.577764231872854e-05, "loss": 0.1965, "step": 6180 }, { "epoch": 0.27, "learning_rate": 4.577020859041644e-05, "loss": 0.1749, "step": 6190 }, { "epoch": 0.27, "learning_rate": 4.5762774862104344e-05, "loss": 0.1627, "step": 6200 }, { "epoch": 0.27, "learning_rate": 4.5755341133792247e-05, "loss": 0.1519, "step": 6210 }, { "epoch": 0.28, "learning_rate": 4.574790740548014e-05, "loss": 0.2044, "step": 6220 }, { "epoch": 0.28, "learning_rate": 4.574047367716805e-05, "loss": 0.157, "step": 6230 }, { "epoch": 0.28, "learning_rate": 4.573303994885595e-05, "loss": 0.1762, "step": 6240 }, { "epoch": 0.28, "learning_rate": 4.572560622054386e-05, "loss": 0.1864, "step": 6250 }, { "epoch": 0.28, "learning_rate": 4.571817249223175e-05, "loss": 0.1788, "step": 6260 }, { "epoch": 0.28, "learning_rate": 4.5710738763919655e-05, "loss": 0.18, "step": 6270 }, { "epoch": 0.28, "learning_rate": 4.5703305035607565e-05, "loss": 0.1688, "step": 6280 }, { "epoch": 0.28, "learning_rate": 4.569587130729546e-05, "loss": 0.1995, "step": 6290 }, { "epoch": 0.28, "learning_rate": 4.568843757898337e-05, "loss": 0.1566, "step": 6300 }, { "epoch": 0.28, "learning_rate": 4.5681003850671266e-05, "loss": 0.1248, "step": 6310 }, { "epoch": 0.28, "learning_rate": 4.5673570122359175e-05, "loss": 0.2147, "step": 6320 }, { "epoch": 0.28, "learning_rate": 4.566613639404707e-05, "loss": 0.1766, "step": 6330 }, { "epoch": 0.28, "learning_rate": 4.565870266573497e-05, "loss": 0.1949, "step": 6340 }, { "epoch": 0.28, "learning_rate": 4.5651268937422876e-05, "loss": 0.165, "step": 6350 }, { "epoch": 0.28, "learning_rate": 4.564383520911078e-05, "loss": 0.1473, "step": 6360 }, { "epoch": 0.28, "learning_rate": 4.563640148079868e-05, "loss": 0.1761, "step": 6370 }, { "epoch": 0.28, "learning_rate": 4.5628967752486584e-05, "loss": 0.1811, "step": 6380 }, { "epoch": 0.28, "learning_rate": 4.5621534024174486e-05, "loss": 0.208, "step": 6390 }, { "epoch": 0.28, "learning_rate": 4.561410029586239e-05, "loss": 0.2374, "step": 6400 }, { "epoch": 0.28, "learning_rate": 4.560666656755029e-05, "loss": 0.1654, "step": 6410 }, { "epoch": 0.28, "learning_rate": 4.5599232839238194e-05, "loss": 0.1996, "step": 6420 }, { "epoch": 0.28, "learning_rate": 4.5591799110926097e-05, "loss": 0.1596, "step": 6430 }, { "epoch": 0.29, "learning_rate": 4.5584365382614e-05, "loss": 0.1589, "step": 6440 }, { "epoch": 0.29, "learning_rate": 4.55769316543019e-05, "loss": 0.2543, "step": 6450 }, { "epoch": 0.29, "learning_rate": 4.5569497925989804e-05, "loss": 0.1889, "step": 6460 }, { "epoch": 0.29, "learning_rate": 4.556206419767771e-05, "loss": 0.2004, "step": 6470 }, { "epoch": 0.29, "learning_rate": 4.55546304693656e-05, "loss": 0.1637, "step": 6480 }, { "epoch": 0.29, "learning_rate": 4.554719674105351e-05, "loss": 0.1658, "step": 6490 }, { "epoch": 0.29, "learning_rate": 4.553976301274141e-05, "loss": 0.1649, "step": 6500 }, { "epoch": 0.29, "learning_rate": 4.553232928442932e-05, "loss": 0.2118, "step": 6510 }, { "epoch": 0.29, "learning_rate": 4.552489555611722e-05, "loss": 0.1735, "step": 6520 }, { "epoch": 0.29, "learning_rate": 4.551746182780512e-05, "loss": 0.1513, "step": 6530 }, { "epoch": 0.29, "learning_rate": 4.5510028099493025e-05, "loss": 0.1653, "step": 6540 }, { "epoch": 0.29, "learning_rate": 4.550259437118092e-05, "loss": 0.2427, "step": 6550 }, { "epoch": 0.29, "learning_rate": 4.549516064286883e-05, "loss": 0.2136, "step": 6560 }, { "epoch": 0.29, "learning_rate": 4.5487726914556726e-05, "loss": 0.1936, "step": 6570 }, { "epoch": 0.29, "learning_rate": 4.5480293186244635e-05, "loss": 0.1588, "step": 6580 }, { "epoch": 0.29, "learning_rate": 4.547285945793253e-05, "loss": 0.176, "step": 6590 }, { "epoch": 0.29, "learning_rate": 4.5465425729620434e-05, "loss": 0.1324, "step": 6600 }, { "epoch": 0.29, "learning_rate": 4.5457992001308336e-05, "loss": 0.1709, "step": 6610 }, { "epoch": 0.29, "learning_rate": 4.545055827299624e-05, "loss": 0.2037, "step": 6620 }, { "epoch": 0.29, "learning_rate": 4.544312454468415e-05, "loss": 0.1674, "step": 6630 }, { "epoch": 0.29, "learning_rate": 4.5435690816372044e-05, "loss": 0.151, "step": 6640 }, { "epoch": 0.29, "learning_rate": 4.542825708805995e-05, "loss": 0.2253, "step": 6650 }, { "epoch": 0.29, "learning_rate": 4.542082335974785e-05, "loss": 0.2006, "step": 6660 }, { "epoch": 0.3, "learning_rate": 4.541338963143575e-05, "loss": 0.1909, "step": 6670 }, { "epoch": 0.3, "learning_rate": 4.5405955903123654e-05, "loss": 0.2447, "step": 6680 }, { "epoch": 0.3, "learning_rate": 4.539852217481156e-05, "loss": 0.1817, "step": 6690 }, { "epoch": 0.3, "learning_rate": 4.539108844649946e-05, "loss": 0.1492, "step": 6700 }, { "epoch": 0.3, "learning_rate": 4.538365471818736e-05, "loss": 0.2458, "step": 6710 }, { "epoch": 0.3, "learning_rate": 4.5376220989875265e-05, "loss": 0.1959, "step": 6720 }, { "epoch": 0.3, "learning_rate": 4.536878726156317e-05, "loss": 0.2271, "step": 6730 }, { "epoch": 0.3, "learning_rate": 4.536135353325107e-05, "loss": 0.164, "step": 6740 }, { "epoch": 0.3, "learning_rate": 4.535391980493897e-05, "loss": 0.2078, "step": 6750 }, { "epoch": 0.3, "learning_rate": 4.5346486076626875e-05, "loss": 0.1879, "step": 6760 }, { "epoch": 0.3, "learning_rate": 4.533905234831478e-05, "loss": 0.2041, "step": 6770 }, { "epoch": 0.3, "learning_rate": 4.533161862000268e-05, "loss": 0.1384, "step": 6780 }, { "epoch": 0.3, "learning_rate": 4.532418489169058e-05, "loss": 0.1966, "step": 6790 }, { "epoch": 0.3, "learning_rate": 4.5316751163378485e-05, "loss": 0.1927, "step": 6800 }, { "epoch": 0.3, "learning_rate": 4.530931743506638e-05, "loss": 0.1994, "step": 6810 }, { "epoch": 0.3, "learning_rate": 4.530188370675429e-05, "loss": 0.1588, "step": 6820 }, { "epoch": 0.3, "learning_rate": 4.5294449978442186e-05, "loss": 0.2004, "step": 6830 }, { "epoch": 0.3, "learning_rate": 4.5287016250130095e-05, "loss": 0.1486, "step": 6840 }, { "epoch": 0.3, "learning_rate": 4.527958252181799e-05, "loss": 0.18, "step": 6850 }, { "epoch": 0.3, "learning_rate": 4.52721487935059e-05, "loss": 0.2191, "step": 6860 }, { "epoch": 0.3, "learning_rate": 4.5264715065193796e-05, "loss": 0.1661, "step": 6870 }, { "epoch": 0.3, "learning_rate": 4.52572813368817e-05, "loss": 0.1506, "step": 6880 }, { "epoch": 0.31, "learning_rate": 4.524984760856961e-05, "loss": 0.1503, "step": 6890 }, { "epoch": 0.31, "learning_rate": 4.5242413880257504e-05, "loss": 0.1736, "step": 6900 }, { "epoch": 0.31, "learning_rate": 4.5234980151945414e-05, "loss": 0.1788, "step": 6910 }, { "epoch": 0.31, "learning_rate": 4.522754642363331e-05, "loss": 0.2112, "step": 6920 }, { "epoch": 0.31, "learning_rate": 4.522011269532121e-05, "loss": 0.192, "step": 6930 }, { "epoch": 0.31, "learning_rate": 4.5212678967009115e-05, "loss": 0.1904, "step": 6940 }, { "epoch": 0.31, "learning_rate": 4.520524523869702e-05, "loss": 0.1854, "step": 6950 }, { "epoch": 0.31, "learning_rate": 4.519781151038492e-05, "loss": 0.2013, "step": 6960 }, { "epoch": 0.31, "learning_rate": 4.519037778207282e-05, "loss": 0.1996, "step": 6970 }, { "epoch": 0.31, "learning_rate": 4.5182944053760725e-05, "loss": 0.1653, "step": 6980 }, { "epoch": 0.31, "learning_rate": 4.517551032544863e-05, "loss": 0.1929, "step": 6990 }, { "epoch": 0.31, "learning_rate": 4.516807659713653e-05, "loss": 0.2288, "step": 7000 }, { "epoch": 0.31, "learning_rate": 4.516064286882443e-05, "loss": 0.1661, "step": 7010 }, { "epoch": 0.31, "learning_rate": 4.5153209140512335e-05, "loss": 0.1811, "step": 7020 }, { "epoch": 0.31, "learning_rate": 4.514577541220024e-05, "loss": 0.1812, "step": 7030 }, { "epoch": 0.31, "learning_rate": 4.513834168388814e-05, "loss": 0.1966, "step": 7040 }, { "epoch": 0.31, "learning_rate": 4.513090795557604e-05, "loss": 0.1826, "step": 7050 }, { "epoch": 0.31, "learning_rate": 4.5123474227263945e-05, "loss": 0.1712, "step": 7060 }, { "epoch": 0.31, "learning_rate": 4.511604049895185e-05, "loss": 0.164, "step": 7070 }, { "epoch": 0.31, "learning_rate": 4.510860677063975e-05, "loss": 0.1702, "step": 7080 }, { "epoch": 0.31, "learning_rate": 4.5101173042327646e-05, "loss": 0.1527, "step": 7090 }, { "epoch": 0.31, "learning_rate": 4.5093739314015556e-05, "loss": 0.1958, "step": 7100 }, { "epoch": 0.31, "learning_rate": 4.508630558570345e-05, "loss": 0.118, "step": 7110 }, { "epoch": 0.32, "learning_rate": 4.507887185739136e-05, "loss": 0.1961, "step": 7120 }, { "epoch": 0.32, "learning_rate": 4.5071438129079264e-05, "loss": 0.2165, "step": 7130 }, { "epoch": 0.32, "learning_rate": 4.506400440076716e-05, "loss": 0.1998, "step": 7140 }, { "epoch": 0.32, "learning_rate": 4.505657067245507e-05, "loss": 0.1484, "step": 7150 }, { "epoch": 0.32, "learning_rate": 4.5049136944142964e-05, "loss": 0.1947, "step": 7160 }, { "epoch": 0.32, "learning_rate": 4.5041703215830874e-05, "loss": 0.1612, "step": 7170 }, { "epoch": 0.32, "learning_rate": 4.503426948751877e-05, "loss": 0.1744, "step": 7180 }, { "epoch": 0.32, "learning_rate": 4.502683575920668e-05, "loss": 0.1693, "step": 7190 }, { "epoch": 0.32, "learning_rate": 4.5019402030894575e-05, "loss": 0.228, "step": 7200 }, { "epoch": 0.32, "learning_rate": 4.501196830258248e-05, "loss": 0.1791, "step": 7210 }, { "epoch": 0.32, "learning_rate": 4.500453457427038e-05, "loss": 0.1686, "step": 7220 }, { "epoch": 0.32, "learning_rate": 4.499710084595828e-05, "loss": 0.2372, "step": 7230 }, { "epoch": 0.32, "learning_rate": 4.4989667117646185e-05, "loss": 0.2062, "step": 7240 }, { "epoch": 0.32, "learning_rate": 4.498223338933409e-05, "loss": 0.2191, "step": 7250 }, { "epoch": 0.32, "learning_rate": 4.497479966102199e-05, "loss": 0.1079, "step": 7260 }, { "epoch": 0.32, "learning_rate": 4.496736593270989e-05, "loss": 0.2664, "step": 7270 }, { "epoch": 0.32, "learning_rate": 4.4959932204397795e-05, "loss": 0.2324, "step": 7280 }, { "epoch": 0.32, "learning_rate": 4.49524984760857e-05, "loss": 0.2144, "step": 7290 }, { "epoch": 0.32, "learning_rate": 4.49450647477736e-05, "loss": 0.2014, "step": 7300 }, { "epoch": 0.32, "learning_rate": 4.49376310194615e-05, "loss": 0.1716, "step": 7310 }, { "epoch": 0.32, "learning_rate": 4.4930197291149406e-05, "loss": 0.2299, "step": 7320 }, { "epoch": 0.32, "learning_rate": 4.492276356283731e-05, "loss": 0.1925, "step": 7330 }, { "epoch": 0.32, "learning_rate": 4.491532983452521e-05, "loss": 0.14, "step": 7340 }, { "epoch": 0.33, "learning_rate": 4.490789610621311e-05, "loss": 0.1833, "step": 7350 }, { "epoch": 0.33, "learning_rate": 4.4900462377901016e-05, "loss": 0.2224, "step": 7360 }, { "epoch": 0.33, "learning_rate": 4.489302864958892e-05, "loss": 0.1507, "step": 7370 }, { "epoch": 0.33, "learning_rate": 4.488559492127682e-05, "loss": 0.1716, "step": 7380 }, { "epoch": 0.33, "learning_rate": 4.4878161192964724e-05, "loss": 0.2199, "step": 7390 }, { "epoch": 0.33, "learning_rate": 4.4870727464652626e-05, "loss": 0.1584, "step": 7400 }, { "epoch": 0.33, "learning_rate": 4.486329373634053e-05, "loss": 0.2043, "step": 7410 }, { "epoch": 0.33, "learning_rate": 4.4855860008028425e-05, "loss": 0.1725, "step": 7420 }, { "epoch": 0.33, "learning_rate": 4.4848426279716334e-05, "loss": 0.2174, "step": 7430 }, { "epoch": 0.33, "learning_rate": 4.484099255140423e-05, "loss": 0.1567, "step": 7440 }, { "epoch": 0.33, "learning_rate": 4.483355882309214e-05, "loss": 0.1823, "step": 7450 }, { "epoch": 0.33, "learning_rate": 4.4826125094780035e-05, "loss": 0.1842, "step": 7460 }, { "epoch": 0.33, "learning_rate": 4.481869136646794e-05, "loss": 0.185, "step": 7470 }, { "epoch": 0.33, "learning_rate": 4.481125763815584e-05, "loss": 0.187, "step": 7480 }, { "epoch": 0.33, "learning_rate": 4.480382390984374e-05, "loss": 0.2067, "step": 7490 }, { "epoch": 0.33, "learning_rate": 4.479639018153165e-05, "loss": 0.1527, "step": 7500 }, { "epoch": 0.33, "learning_rate": 4.478895645321955e-05, "loss": 0.183, "step": 7510 }, { "epoch": 0.33, "learning_rate": 4.478152272490746e-05, "loss": 0.1452, "step": 7520 }, { "epoch": 0.33, "learning_rate": 4.477408899659535e-05, "loss": 0.2031, "step": 7530 }, { "epoch": 0.33, "learning_rate": 4.4766655268283256e-05, "loss": 0.1597, "step": 7540 }, { "epoch": 0.33, "learning_rate": 4.475922153997116e-05, "loss": 0.1774, "step": 7550 }, { "epoch": 0.33, "learning_rate": 4.475178781165906e-05, "loss": 0.1288, "step": 7560 }, { "epoch": 0.34, "learning_rate": 4.4744354083346963e-05, "loss": 0.1923, "step": 7570 }, { "epoch": 0.34, "learning_rate": 4.4736920355034866e-05, "loss": 0.2991, "step": 7580 }, { "epoch": 0.34, "learning_rate": 4.472948662672277e-05, "loss": 0.2241, "step": 7590 }, { "epoch": 0.34, "learning_rate": 4.472205289841067e-05, "loss": 0.1494, "step": 7600 }, { "epoch": 0.34, "learning_rate": 4.4714619170098574e-05, "loss": 0.1467, "step": 7610 }, { "epoch": 0.34, "learning_rate": 4.4707185441786476e-05, "loss": 0.1669, "step": 7620 }, { "epoch": 0.34, "learning_rate": 4.469975171347438e-05, "loss": 0.1489, "step": 7630 }, { "epoch": 0.34, "learning_rate": 4.469231798516228e-05, "loss": 0.2123, "step": 7640 }, { "epoch": 0.34, "learning_rate": 4.4684884256850184e-05, "loss": 0.3353, "step": 7650 }, { "epoch": 0.34, "learning_rate": 4.467745052853809e-05, "loss": 0.198, "step": 7660 }, { "epoch": 0.34, "learning_rate": 4.467001680022599e-05, "loss": 0.1622, "step": 7670 }, { "epoch": 0.34, "learning_rate": 4.4662583071913885e-05, "loss": 0.195, "step": 7680 }, { "epoch": 0.34, "learning_rate": 4.4655149343601794e-05, "loss": 0.1574, "step": 7690 }, { "epoch": 0.34, "learning_rate": 4.464771561528969e-05, "loss": 0.1931, "step": 7700 }, { "epoch": 0.34, "learning_rate": 4.46402818869776e-05, "loss": 0.1859, "step": 7710 }, { "epoch": 0.34, "learning_rate": 4.4632848158665495e-05, "loss": 0.1755, "step": 7720 }, { "epoch": 0.34, "learning_rate": 4.4625414430353405e-05, "loss": 0.1947, "step": 7730 }, { "epoch": 0.34, "learning_rate": 4.461798070204131e-05, "loss": 0.156, "step": 7740 }, { "epoch": 0.34, "learning_rate": 4.46105469737292e-05, "loss": 0.1542, "step": 7750 }, { "epoch": 0.34, "learning_rate": 4.460311324541711e-05, "loss": 0.1595, "step": 7760 }, { "epoch": 0.34, "learning_rate": 4.459567951710501e-05, "loss": 0.1602, "step": 7770 }, { "epoch": 0.34, "learning_rate": 4.458824578879292e-05, "loss": 0.1505, "step": 7780 }, { "epoch": 0.34, "learning_rate": 4.4580812060480813e-05, "loss": 0.2428, "step": 7790 }, { "epoch": 0.35, "learning_rate": 4.4573378332168716e-05, "loss": 0.1991, "step": 7800 }, { "epoch": 0.35, "learning_rate": 4.456594460385662e-05, "loss": 0.173, "step": 7810 }, { "epoch": 0.35, "learning_rate": 4.455851087554452e-05, "loss": 0.2147, "step": 7820 }, { "epoch": 0.35, "learning_rate": 4.4551077147232424e-05, "loss": 0.1666, "step": 7830 }, { "epoch": 0.35, "learning_rate": 4.4543643418920326e-05, "loss": 0.1638, "step": 7840 }, { "epoch": 0.35, "learning_rate": 4.453620969060823e-05, "loss": 0.2126, "step": 7850 }, { "epoch": 0.35, "learning_rate": 4.452877596229613e-05, "loss": 0.2032, "step": 7860 }, { "epoch": 0.35, "learning_rate": 4.4521342233984034e-05, "loss": 0.2458, "step": 7870 }, { "epoch": 0.35, "learning_rate": 4.451390850567194e-05, "loss": 0.1685, "step": 7880 }, { "epoch": 0.35, "learning_rate": 4.450647477735984e-05, "loss": 0.1631, "step": 7890 }, { "epoch": 0.35, "learning_rate": 4.449904104904774e-05, "loss": 0.2465, "step": 7900 }, { "epoch": 0.35, "learning_rate": 4.4491607320735644e-05, "loss": 0.1393, "step": 7910 }, { "epoch": 0.35, "learning_rate": 4.448417359242355e-05, "loss": 0.1543, "step": 7920 }, { "epoch": 0.35, "learning_rate": 4.447673986411145e-05, "loss": 0.23, "step": 7930 }, { "epoch": 0.35, "learning_rate": 4.446930613579935e-05, "loss": 0.2117, "step": 7940 }, { "epoch": 0.35, "learning_rate": 4.4461872407487255e-05, "loss": 0.1753, "step": 7950 }, { "epoch": 0.35, "learning_rate": 4.445443867917515e-05, "loss": 0.2338, "step": 7960 }, { "epoch": 0.35, "learning_rate": 4.444700495086306e-05, "loss": 0.1755, "step": 7970 }, { "epoch": 0.35, "learning_rate": 4.443957122255096e-05, "loss": 0.1682, "step": 7980 }, { "epoch": 0.35, "learning_rate": 4.4432137494238865e-05, "loss": 0.2344, "step": 7990 }, { "epoch": 0.35, "learning_rate": 4.442470376592677e-05, "loss": 0.1985, "step": 8000 }, { "epoch": 0.35, "learning_rate": 4.4417270037614663e-05, "loss": 0.1698, "step": 8010 }, { "epoch": 0.36, "learning_rate": 4.440983630930257e-05, "loss": 0.1952, "step": 8020 }, { "epoch": 0.36, "learning_rate": 4.440240258099047e-05, "loss": 0.1922, "step": 8030 }, { "epoch": 0.36, "learning_rate": 4.439496885267838e-05, "loss": 0.2079, "step": 8040 }, { "epoch": 0.36, "learning_rate": 4.4387535124366274e-05, "loss": 0.1331, "step": 8050 }, { "epoch": 0.36, "learning_rate": 4.438010139605418e-05, "loss": 0.1648, "step": 8060 }, { "epoch": 0.36, "learning_rate": 4.437266766774208e-05, "loss": 0.1743, "step": 8070 }, { "epoch": 0.36, "learning_rate": 4.436523393942998e-05, "loss": 0.1767, "step": 8080 }, { "epoch": 0.36, "learning_rate": 4.4357800211117884e-05, "loss": 0.1595, "step": 8090 }, { "epoch": 0.36, "learning_rate": 4.435036648280579e-05, "loss": 0.1824, "step": 8100 }, { "epoch": 0.36, "learning_rate": 4.4342932754493696e-05, "loss": 0.1443, "step": 8110 }, { "epoch": 0.36, "learning_rate": 4.433549902618159e-05, "loss": 0.1655, "step": 8120 }, { "epoch": 0.36, "learning_rate": 4.4328065297869494e-05, "loss": 0.2075, "step": 8130 }, { "epoch": 0.36, "learning_rate": 4.43206315695574e-05, "loss": 0.2341, "step": 8140 }, { "epoch": 0.36, "learning_rate": 4.43131978412453e-05, "loss": 0.2391, "step": 8150 }, { "epoch": 0.36, "learning_rate": 4.43057641129332e-05, "loss": 0.1916, "step": 8160 }, { "epoch": 0.36, "learning_rate": 4.4298330384621105e-05, "loss": 0.1898, "step": 8170 }, { "epoch": 0.36, "learning_rate": 4.429089665630901e-05, "loss": 0.2031, "step": 8180 }, { "epoch": 0.36, "learning_rate": 4.428346292799691e-05, "loss": 0.2379, "step": 8190 }, { "epoch": 0.36, "learning_rate": 4.427602919968481e-05, "loss": 0.1596, "step": 8200 }, { "epoch": 0.36, "learning_rate": 4.4268595471372715e-05, "loss": 0.1925, "step": 8210 }, { "epoch": 0.36, "learning_rate": 4.426116174306061e-05, "loss": 0.1989, "step": 8220 }, { "epoch": 0.36, "learning_rate": 4.425372801474852e-05, "loss": 0.1838, "step": 8230 }, { "epoch": 0.36, "learning_rate": 4.424629428643642e-05, "loss": 0.1502, "step": 8240 }, { "epoch": 0.37, "learning_rate": 4.4238860558124325e-05, "loss": 0.1823, "step": 8250 }, { "epoch": 0.37, "learning_rate": 4.423142682981223e-05, "loss": 0.1457, "step": 8260 }, { "epoch": 0.37, "learning_rate": 4.422399310150013e-05, "loss": 0.172, "step": 8270 }, { "epoch": 0.37, "learning_rate": 4.421655937318803e-05, "loss": 0.1577, "step": 8280 }, { "epoch": 0.37, "learning_rate": 4.420912564487593e-05, "loss": 0.1843, "step": 8290 }, { "epoch": 0.37, "learning_rate": 4.420169191656384e-05, "loss": 0.1189, "step": 8300 }, { "epoch": 0.37, "learning_rate": 4.4194258188251734e-05, "loss": 0.1461, "step": 8310 }, { "epoch": 0.37, "learning_rate": 4.418682445993964e-05, "loss": 0.2097, "step": 8320 }, { "epoch": 0.37, "learning_rate": 4.417939073162754e-05, "loss": 0.1927, "step": 8330 }, { "epoch": 0.37, "learning_rate": 4.417195700331544e-05, "loss": 0.2222, "step": 8340 }, { "epoch": 0.37, "learning_rate": 4.416452327500335e-05, "loss": 0.1708, "step": 8350 }, { "epoch": 0.37, "learning_rate": 4.415708954669125e-05, "loss": 0.2081, "step": 8360 }, { "epoch": 0.37, "learning_rate": 4.4149655818379156e-05, "loss": 0.2185, "step": 8370 }, { "epoch": 0.37, "learning_rate": 4.414222209006705e-05, "loss": 0.157, "step": 8380 }, { "epoch": 0.37, "learning_rate": 4.413478836175496e-05, "loss": 0.1918, "step": 8390 }, { "epoch": 0.37, "learning_rate": 4.412735463344286e-05, "loss": 0.1609, "step": 8400 }, { "epoch": 0.37, "learning_rate": 4.411992090513076e-05, "loss": 0.1991, "step": 8410 }, { "epoch": 0.37, "learning_rate": 4.411248717681866e-05, "loss": 0.1783, "step": 8420 }, { "epoch": 0.37, "learning_rate": 4.4105053448506565e-05, "loss": 0.1905, "step": 8430 }, { "epoch": 0.37, "learning_rate": 4.409761972019447e-05, "loss": 0.1632, "step": 8440 }, { "epoch": 0.37, "learning_rate": 4.409018599188237e-05, "loss": 0.2332, "step": 8450 }, { "epoch": 0.37, "learning_rate": 4.408275226357027e-05, "loss": 0.1683, "step": 8460 }, { "epoch": 0.37, "learning_rate": 4.4075318535258175e-05, "loss": 0.2352, "step": 8470 }, { "epoch": 0.38, "learning_rate": 4.406788480694608e-05, "loss": 0.1741, "step": 8480 }, { "epoch": 0.38, "learning_rate": 4.406045107863398e-05, "loss": 0.1827, "step": 8490 }, { "epoch": 0.38, "learning_rate": 4.405301735032188e-05, "loss": 0.1279, "step": 8500 }, { "epoch": 0.38, "learning_rate": 4.4045583622009786e-05, "loss": 0.2747, "step": 8510 }, { "epoch": 0.38, "learning_rate": 4.403814989369769e-05, "loss": 0.1811, "step": 8520 }, { "epoch": 0.38, "learning_rate": 4.403071616538559e-05, "loss": 0.2085, "step": 8530 }, { "epoch": 0.38, "learning_rate": 4.402328243707349e-05, "loss": 0.2108, "step": 8540 }, { "epoch": 0.38, "learning_rate": 4.401584870876139e-05, "loss": 0.1667, "step": 8550 }, { "epoch": 0.38, "learning_rate": 4.40084149804493e-05, "loss": 0.2332, "step": 8560 }, { "epoch": 0.38, "learning_rate": 4.4000981252137194e-05, "loss": 0.1803, "step": 8570 }, { "epoch": 0.38, "learning_rate": 4.3993547523825104e-05, "loss": 0.1876, "step": 8580 }, { "epoch": 0.38, "learning_rate": 4.3986113795513e-05, "loss": 0.1883, "step": 8590 }, { "epoch": 0.38, "learning_rate": 4.397868006720091e-05, "loss": 0.1625, "step": 8600 }, { "epoch": 0.38, "learning_rate": 4.397124633888881e-05, "loss": 0.2853, "step": 8610 }, { "epoch": 0.38, "learning_rate": 4.396381261057671e-05, "loss": 0.2184, "step": 8620 }, { "epoch": 0.38, "learning_rate": 4.3956378882264617e-05, "loss": 0.1837, "step": 8630 }, { "epoch": 0.38, "learning_rate": 4.394894515395251e-05, "loss": 0.1706, "step": 8640 }, { "epoch": 0.38, "learning_rate": 4.394151142564042e-05, "loss": 0.1666, "step": 8650 }, { "epoch": 0.38, "learning_rate": 4.393407769732832e-05, "loss": 0.1953, "step": 8660 }, { "epoch": 0.38, "learning_rate": 4.392664396901622e-05, "loss": 0.1688, "step": 8670 }, { "epoch": 0.38, "learning_rate": 4.391921024070412e-05, "loss": 0.2084, "step": 8680 }, { "epoch": 0.38, "learning_rate": 4.3911776512392025e-05, "loss": 0.1513, "step": 8690 }, { "epoch": 0.39, "learning_rate": 4.390434278407993e-05, "loss": 0.1933, "step": 8700 }, { "epoch": 0.39, "learning_rate": 4.389690905576783e-05, "loss": 0.1418, "step": 8710 }, { "epoch": 0.39, "learning_rate": 4.388947532745574e-05, "loss": 0.2434, "step": 8720 }, { "epoch": 0.39, "learning_rate": 4.3882041599143636e-05, "loss": 0.233, "step": 8730 }, { "epoch": 0.39, "learning_rate": 4.387460787083154e-05, "loss": 0.2053, "step": 8740 }, { "epoch": 0.39, "learning_rate": 4.386717414251944e-05, "loss": 0.1655, "step": 8750 }, { "epoch": 0.39, "learning_rate": 4.385974041420734e-05, "loss": 0.2074, "step": 8760 }, { "epoch": 0.39, "learning_rate": 4.3852306685895246e-05, "loss": 0.1876, "step": 8770 }, { "epoch": 0.39, "learning_rate": 4.384487295758315e-05, "loss": 0.2027, "step": 8780 }, { "epoch": 0.39, "learning_rate": 4.383743922927105e-05, "loss": 0.2319, "step": 8790 }, { "epoch": 0.39, "learning_rate": 4.3830005500958954e-05, "loss": 0.1578, "step": 8800 }, { "epoch": 0.39, "learning_rate": 4.3822571772646856e-05, "loss": 0.1629, "step": 8810 }, { "epoch": 0.39, "learning_rate": 4.381513804433476e-05, "loss": 0.1372, "step": 8820 }, { "epoch": 0.39, "learning_rate": 4.3807704316022655e-05, "loss": 0.1521, "step": 8830 }, { "epoch": 0.39, "learning_rate": 4.3800270587710564e-05, "loss": 0.1603, "step": 8840 }, { "epoch": 0.39, "learning_rate": 4.3792836859398467e-05, "loss": 0.2025, "step": 8850 }, { "epoch": 0.39, "learning_rate": 4.378540313108637e-05, "loss": 0.1751, "step": 8860 }, { "epoch": 0.39, "learning_rate": 4.377796940277427e-05, "loss": 0.1769, "step": 8870 }, { "epoch": 0.39, "learning_rate": 4.377053567446217e-05, "loss": 0.1906, "step": 8880 }, { "epoch": 0.39, "learning_rate": 4.376310194615008e-05, "loss": 0.151, "step": 8890 }, { "epoch": 0.39, "learning_rate": 4.375566821783797e-05, "loss": 0.175, "step": 8900 }, { "epoch": 0.39, "learning_rate": 4.374823448952588e-05, "loss": 0.1883, "step": 8910 }, { "epoch": 0.39, "learning_rate": 4.374080076121378e-05, "loss": 0.2089, "step": 8920 }, { "epoch": 0.4, "learning_rate": 4.373336703290169e-05, "loss": 0.1568, "step": 8930 }, { "epoch": 0.4, "learning_rate": 4.372593330458958e-05, "loss": 0.1726, "step": 8940 }, { "epoch": 0.4, "learning_rate": 4.3718499576277486e-05, "loss": 0.2102, "step": 8950 }, { "epoch": 0.4, "learning_rate": 4.3711065847965395e-05, "loss": 0.2081, "step": 8960 }, { "epoch": 0.4, "learning_rate": 4.370363211965329e-05, "loss": 0.2228, "step": 8970 }, { "epoch": 0.4, "learning_rate": 4.36961983913412e-05, "loss": 0.1576, "step": 8980 }, { "epoch": 0.4, "learning_rate": 4.3688764663029096e-05, "loss": 0.1117, "step": 8990 }, { "epoch": 0.4, "learning_rate": 4.3681330934717005e-05, "loss": 0.1678, "step": 9000 }, { "epoch": 0.4, "learning_rate": 4.36738972064049e-05, "loss": 0.148, "step": 9010 }, { "epoch": 0.4, "learning_rate": 4.3666463478092804e-05, "loss": 0.1882, "step": 9020 }, { "epoch": 0.4, "learning_rate": 4.3659029749780706e-05, "loss": 0.2028, "step": 9030 }, { "epoch": 0.4, "learning_rate": 4.365159602146861e-05, "loss": 0.218, "step": 9040 }, { "epoch": 0.4, "learning_rate": 4.364416229315651e-05, "loss": 0.2028, "step": 9050 }, { "epoch": 0.4, "learning_rate": 4.3636728564844414e-05, "loss": 0.1605, "step": 9060 }, { "epoch": 0.4, "learning_rate": 4.3629294836532317e-05, "loss": 0.2059, "step": 9070 }, { "epoch": 0.4, "learning_rate": 4.362186110822022e-05, "loss": 0.1732, "step": 9080 }, { "epoch": 0.4, "learning_rate": 4.361442737990812e-05, "loss": 0.2279, "step": 9090 }, { "epoch": 0.4, "learning_rate": 4.3606993651596024e-05, "loss": 0.2288, "step": 9100 }, { "epoch": 0.4, "learning_rate": 4.359955992328393e-05, "loss": 0.1731, "step": 9110 }, { "epoch": 0.4, "learning_rate": 4.359212619497183e-05, "loss": 0.1761, "step": 9120 }, { "epoch": 0.4, "learning_rate": 4.358469246665973e-05, "loss": 0.1352, "step": 9130 }, { "epoch": 0.4, "learning_rate": 4.3577258738347635e-05, "loss": 0.22, "step": 9140 }, { "epoch": 0.41, "learning_rate": 4.356982501003554e-05, "loss": 0.1494, "step": 9150 }, { "epoch": 0.41, "learning_rate": 4.356239128172343e-05, "loss": 0.2318, "step": 9160 }, { "epoch": 0.41, "learning_rate": 4.355495755341134e-05, "loss": 0.2461, "step": 9170 }, { "epoch": 0.41, "learning_rate": 4.354752382509924e-05, "loss": 0.2035, "step": 9180 }, { "epoch": 0.41, "learning_rate": 4.354009009678715e-05, "loss": 0.2033, "step": 9190 }, { "epoch": 0.41, "learning_rate": 4.353265636847504e-05, "loss": 0.1743, "step": 9200 }, { "epoch": 0.41, "learning_rate": 4.352522264016295e-05, "loss": 0.1992, "step": 9210 }, { "epoch": 0.41, "learning_rate": 4.3517788911850855e-05, "loss": 0.2013, "step": 9220 }, { "epoch": 0.41, "learning_rate": 4.351035518353875e-05, "loss": 0.1515, "step": 9230 }, { "epoch": 0.41, "learning_rate": 4.350292145522666e-05, "loss": 0.2151, "step": 9240 }, { "epoch": 0.41, "learning_rate": 4.3495487726914556e-05, "loss": 0.155, "step": 9250 }, { "epoch": 0.41, "learning_rate": 4.3488053998602466e-05, "loss": 0.2296, "step": 9260 }, { "epoch": 0.41, "learning_rate": 4.348062027029036e-05, "loss": 0.1772, "step": 9270 }, { "epoch": 0.41, "learning_rate": 4.3473186541978264e-05, "loss": 0.2149, "step": 9280 }, { "epoch": 0.41, "learning_rate": 4.3465752813666166e-05, "loss": 0.197, "step": 9290 }, { "epoch": 0.41, "learning_rate": 4.345831908535407e-05, "loss": 0.1804, "step": 9300 }, { "epoch": 0.41, "learning_rate": 4.345088535704197e-05, "loss": 0.1558, "step": 9310 }, { "epoch": 0.41, "learning_rate": 4.3443451628729874e-05, "loss": 0.176, "step": 9320 }, { "epoch": 0.41, "learning_rate": 4.3436017900417784e-05, "loss": 0.1873, "step": 9330 }, { "epoch": 0.41, "learning_rate": 4.342858417210568e-05, "loss": 0.1178, "step": 9340 }, { "epoch": 0.41, "learning_rate": 4.342115044379358e-05, "loss": 0.1926, "step": 9350 }, { "epoch": 0.41, "learning_rate": 4.3413716715481485e-05, "loss": 0.1929, "step": 9360 }, { "epoch": 0.41, "learning_rate": 4.340628298716939e-05, "loss": 0.1413, "step": 9370 }, { "epoch": 0.42, "learning_rate": 4.339884925885729e-05, "loss": 0.1512, "step": 9380 }, { "epoch": 0.42, "learning_rate": 4.339141553054519e-05, "loss": 0.1591, "step": 9390 }, { "epoch": 0.42, "learning_rate": 4.3383981802233095e-05, "loss": 0.1942, "step": 9400 }, { "epoch": 0.42, "learning_rate": 4.3376548073921e-05, "loss": 0.1819, "step": 9410 }, { "epoch": 0.42, "learning_rate": 4.33691143456089e-05, "loss": 0.1817, "step": 9420 }, { "epoch": 0.42, "learning_rate": 4.33616806172968e-05, "loss": 0.2336, "step": 9430 }, { "epoch": 0.42, "learning_rate": 4.33542468889847e-05, "loss": 0.1976, "step": 9440 }, { "epoch": 0.42, "learning_rate": 4.334681316067261e-05, "loss": 0.2018, "step": 9450 }, { "epoch": 0.42, "learning_rate": 4.333937943236051e-05, "loss": 0.2117, "step": 9460 }, { "epoch": 0.42, "learning_rate": 4.333194570404841e-05, "loss": 0.1811, "step": 9470 }, { "epoch": 0.42, "learning_rate": 4.3324511975736315e-05, "loss": 0.2271, "step": 9480 }, { "epoch": 0.42, "learning_rate": 4.331707824742421e-05, "loss": 0.1652, "step": 9490 }, { "epoch": 0.42, "learning_rate": 4.330964451911212e-05, "loss": 0.1381, "step": 9500 }, { "epoch": 0.42, "learning_rate": 4.3302210790800016e-05, "loss": 0.1873, "step": 9510 }, { "epoch": 0.42, "learning_rate": 4.3294777062487926e-05, "loss": 0.2037, "step": 9520 }, { "epoch": 0.42, "learning_rate": 4.328734333417582e-05, "loss": 0.221, "step": 9530 }, { "epoch": 0.42, "learning_rate": 4.327990960586373e-05, "loss": 0.1828, "step": 9540 }, { "epoch": 0.42, "learning_rate": 4.327247587755163e-05, "loss": 0.1559, "step": 9550 }, { "epoch": 0.42, "learning_rate": 4.326504214923953e-05, "loss": 0.1651, "step": 9560 }, { "epoch": 0.42, "learning_rate": 4.325760842092743e-05, "loss": 0.2335, "step": 9570 }, { "epoch": 0.42, "learning_rate": 4.3250174692615335e-05, "loss": 0.2048, "step": 9580 }, { "epoch": 0.42, "learning_rate": 4.3242740964303244e-05, "loss": 0.2018, "step": 9590 }, { "epoch": 0.43, "learning_rate": 4.323530723599114e-05, "loss": 0.1932, "step": 9600 }, { "epoch": 0.43, "learning_rate": 4.322787350767904e-05, "loss": 0.1778, "step": 9610 }, { "epoch": 0.43, "learning_rate": 4.3220439779366945e-05, "loss": 0.156, "step": 9620 }, { "epoch": 0.43, "learning_rate": 4.321300605105485e-05, "loss": 0.1811, "step": 9630 }, { "epoch": 0.43, "learning_rate": 4.320557232274275e-05, "loss": 0.1856, "step": 9640 }, { "epoch": 0.43, "learning_rate": 4.319813859443065e-05, "loss": 0.1459, "step": 9650 }, { "epoch": 0.43, "learning_rate": 4.3190704866118555e-05, "loss": 0.1755, "step": 9660 }, { "epoch": 0.43, "learning_rate": 4.318327113780646e-05, "loss": 0.1926, "step": 9670 }, { "epoch": 0.43, "learning_rate": 4.317583740949436e-05, "loss": 0.2518, "step": 9680 }, { "epoch": 0.43, "learning_rate": 4.316840368118226e-05, "loss": 0.177, "step": 9690 }, { "epoch": 0.43, "learning_rate": 4.3160969952870165e-05, "loss": 0.1754, "step": 9700 }, { "epoch": 0.43, "learning_rate": 4.315353622455807e-05, "loss": 0.1401, "step": 9710 }, { "epoch": 0.43, "learning_rate": 4.314610249624597e-05, "loss": 0.2021, "step": 9720 }, { "epoch": 0.43, "learning_rate": 4.313866876793387e-05, "loss": 0.1773, "step": 9730 }, { "epoch": 0.43, "learning_rate": 4.3131235039621776e-05, "loss": 0.2316, "step": 9740 }, { "epoch": 0.43, "learning_rate": 4.312380131130968e-05, "loss": 0.1687, "step": 9750 }, { "epoch": 0.43, "learning_rate": 4.311636758299758e-05, "loss": 0.2084, "step": 9760 }, { "epoch": 0.43, "learning_rate": 4.310893385468548e-05, "loss": 0.1606, "step": 9770 }, { "epoch": 0.43, "learning_rate": 4.3101500126373386e-05, "loss": 0.1924, "step": 9780 }, { "epoch": 0.43, "learning_rate": 4.309406639806128e-05, "loss": 0.2141, "step": 9790 }, { "epoch": 0.43, "learning_rate": 4.308663266974919e-05, "loss": 0.2403, "step": 9800 }, { "epoch": 0.43, "learning_rate": 4.307919894143709e-05, "loss": 0.1721, "step": 9810 }, { "epoch": 0.43, "learning_rate": 4.307176521312499e-05, "loss": 0.1882, "step": 9820 }, { "epoch": 0.44, "learning_rate": 4.30643314848129e-05, "loss": 0.208, "step": 9830 }, { "epoch": 0.44, "learning_rate": 4.3056897756500795e-05, "loss": 0.2206, "step": 9840 }, { "epoch": 0.44, "learning_rate": 4.3049464028188704e-05, "loss": 0.2175, "step": 9850 }, { "epoch": 0.44, "learning_rate": 4.30420302998766e-05, "loss": 0.1377, "step": 9860 }, { "epoch": 0.44, "learning_rate": 4.303459657156451e-05, "loss": 0.1895, "step": 9870 }, { "epoch": 0.44, "learning_rate": 4.3027162843252405e-05, "loss": 0.1664, "step": 9880 }, { "epoch": 0.44, "learning_rate": 4.301972911494031e-05, "loss": 0.1151, "step": 9890 }, { "epoch": 0.44, "learning_rate": 4.301229538662821e-05, "loss": 0.2653, "step": 9900 }, { "epoch": 0.44, "learning_rate": 4.300486165831611e-05, "loss": 0.2095, "step": 9910 }, { "epoch": 0.44, "learning_rate": 4.2997427930004015e-05, "loss": 0.1623, "step": 9920 }, { "epoch": 0.44, "learning_rate": 4.298999420169192e-05, "loss": 0.1782, "step": 9930 }, { "epoch": 0.44, "learning_rate": 4.298256047337982e-05, "loss": 0.1388, "step": 9940 }, { "epoch": 0.44, "learning_rate": 4.297512674506772e-05, "loss": 0.1769, "step": 9950 }, { "epoch": 0.44, "learning_rate": 4.2967693016755626e-05, "loss": 0.1942, "step": 9960 }, { "epoch": 0.44, "learning_rate": 4.296025928844353e-05, "loss": 0.2192, "step": 9970 }, { "epoch": 0.44, "learning_rate": 4.295282556013143e-05, "loss": 0.177, "step": 9980 }, { "epoch": 0.44, "learning_rate": 4.2945391831819333e-05, "loss": 0.1848, "step": 9990 }, { "epoch": 0.44, "learning_rate": 4.2937958103507236e-05, "loss": 0.1526, "step": 10000 }, { "epoch": 0.44, "learning_rate": 4.293052437519514e-05, "loss": 0.1877, "step": 10010 }, { "epoch": 0.44, "learning_rate": 4.292309064688304e-05, "loss": 0.2167, "step": 10020 }, { "epoch": 0.44, "learning_rate": 4.291565691857094e-05, "loss": 0.1992, "step": 10030 }, { "epoch": 0.44, "learning_rate": 4.2908223190258846e-05, "loss": 0.1591, "step": 10040 }, { "epoch": 0.44, "learning_rate": 4.290078946194674e-05, "loss": 0.1507, "step": 10050 }, { "epoch": 0.45, "learning_rate": 4.289335573363465e-05, "loss": 0.2026, "step": 10060 }, { "epoch": 0.45, "learning_rate": 4.2885922005322554e-05, "loss": 0.1905, "step": 10070 }, { "epoch": 0.45, "learning_rate": 4.287848827701046e-05, "loss": 0.1971, "step": 10080 }, { "epoch": 0.45, "learning_rate": 4.287105454869836e-05, "loss": 0.2067, "step": 10090 }, { "epoch": 0.45, "learning_rate": 4.2863620820386255e-05, "loss": 0.1914, "step": 10100 }, { "epoch": 0.45, "learning_rate": 4.2856187092074164e-05, "loss": 0.1668, "step": 10110 }, { "epoch": 0.45, "learning_rate": 4.284875336376206e-05, "loss": 0.1812, "step": 10120 }, { "epoch": 0.45, "learning_rate": 4.284131963544997e-05, "loss": 0.1857, "step": 10130 }, { "epoch": 0.45, "learning_rate": 4.2833885907137865e-05, "loss": 0.2499, "step": 10140 }, { "epoch": 0.45, "learning_rate": 4.282645217882577e-05, "loss": 0.1931, "step": 10150 }, { "epoch": 0.45, "learning_rate": 4.281901845051367e-05, "loss": 0.1974, "step": 10160 }, { "epoch": 0.45, "learning_rate": 4.281158472220157e-05, "loss": 0.1624, "step": 10170 }, { "epoch": 0.45, "learning_rate": 4.2804150993889476e-05, "loss": 0.1683, "step": 10180 }, { "epoch": 0.45, "learning_rate": 4.279671726557738e-05, "loss": 0.2189, "step": 10190 }, { "epoch": 0.45, "learning_rate": 4.278928353726529e-05, "loss": 0.1823, "step": 10200 }, { "epoch": 0.45, "learning_rate": 4.2781849808953183e-05, "loss": 0.1606, "step": 10210 }, { "epoch": 0.45, "learning_rate": 4.2774416080641086e-05, "loss": 0.1624, "step": 10220 }, { "epoch": 0.45, "learning_rate": 4.276698235232899e-05, "loss": 0.2127, "step": 10230 }, { "epoch": 0.45, "learning_rate": 4.275954862401689e-05, "loss": 0.1862, "step": 10240 }, { "epoch": 0.45, "learning_rate": 4.2752114895704794e-05, "loss": 0.1648, "step": 10250 }, { "epoch": 0.45, "learning_rate": 4.2744681167392696e-05, "loss": 0.1303, "step": 10260 }, { "epoch": 0.45, "learning_rate": 4.27372474390806e-05, "loss": 0.1737, "step": 10270 }, { "epoch": 0.46, "learning_rate": 4.27298137107685e-05, "loss": 0.1427, "step": 10280 }, { "epoch": 0.46, "learning_rate": 4.2722379982456404e-05, "loss": 0.168, "step": 10290 }, { "epoch": 0.46, "learning_rate": 4.271494625414431e-05, "loss": 0.1027, "step": 10300 }, { "epoch": 0.46, "learning_rate": 4.270751252583221e-05, "loss": 0.1964, "step": 10310 }, { "epoch": 0.46, "learning_rate": 4.270007879752011e-05, "loss": 0.1633, "step": 10320 }, { "epoch": 0.46, "learning_rate": 4.2692645069208014e-05, "loss": 0.1868, "step": 10330 }, { "epoch": 0.46, "learning_rate": 4.268521134089592e-05, "loss": 0.1565, "step": 10340 }, { "epoch": 0.46, "learning_rate": 4.267777761258382e-05, "loss": 0.1755, "step": 10350 }, { "epoch": 0.46, "learning_rate": 4.2670343884271715e-05, "loss": 0.2009, "step": 10360 }, { "epoch": 0.46, "learning_rate": 4.2662910155959625e-05, "loss": 0.1833, "step": 10370 }, { "epoch": 0.46, "learning_rate": 4.265547642764752e-05, "loss": 0.1437, "step": 10380 }, { "epoch": 0.46, "learning_rate": 4.264804269933543e-05, "loss": 0.1884, "step": 10390 }, { "epoch": 0.46, "learning_rate": 4.2640608971023326e-05, "loss": 0.1476, "step": 10400 }, { "epoch": 0.46, "learning_rate": 4.2633175242711235e-05, "loss": 0.29, "step": 10410 }, { "epoch": 0.46, "learning_rate": 4.262574151439913e-05, "loss": 0.1502, "step": 10420 }, { "epoch": 0.46, "learning_rate": 4.2618307786087033e-05, "loss": 0.1783, "step": 10430 }, { "epoch": 0.46, "learning_rate": 4.261087405777494e-05, "loss": 0.18, "step": 10440 }, { "epoch": 0.46, "learning_rate": 4.260344032946284e-05, "loss": 0.1765, "step": 10450 }, { "epoch": 0.46, "learning_rate": 4.259600660115075e-05, "loss": 0.2009, "step": 10460 }, { "epoch": 0.46, "learning_rate": 4.2588572872838644e-05, "loss": 0.1483, "step": 10470 }, { "epoch": 0.46, "learning_rate": 4.2581139144526546e-05, "loss": 0.1918, "step": 10480 }, { "epoch": 0.46, "learning_rate": 4.257370541621445e-05, "loss": 0.1858, "step": 10490 }, { "epoch": 0.46, "learning_rate": 4.256627168790235e-05, "loss": 0.1533, "step": 10500 }, { "epoch": 0.47, "learning_rate": 4.2558837959590254e-05, "loss": 0.1449, "step": 10510 }, { "epoch": 0.47, "learning_rate": 4.255140423127816e-05, "loss": 0.1625, "step": 10520 }, { "epoch": 0.47, "learning_rate": 4.254397050296606e-05, "loss": 0.1632, "step": 10530 }, { "epoch": 0.47, "learning_rate": 4.253653677465396e-05, "loss": 0.2077, "step": 10540 }, { "epoch": 0.47, "learning_rate": 4.2529103046341864e-05, "loss": 0.1443, "step": 10550 }, { "epoch": 0.47, "learning_rate": 4.252166931802977e-05, "loss": 0.1493, "step": 10560 }, { "epoch": 0.47, "learning_rate": 4.251423558971767e-05, "loss": 0.1616, "step": 10570 }, { "epoch": 0.47, "learning_rate": 4.250680186140557e-05, "loss": 0.2514, "step": 10580 }, { "epoch": 0.47, "learning_rate": 4.2499368133093475e-05, "loss": 0.1831, "step": 10590 }, { "epoch": 0.47, "learning_rate": 4.249193440478138e-05, "loss": 0.1393, "step": 10600 }, { "epoch": 0.47, "learning_rate": 4.248450067646928e-05, "loss": 0.11, "step": 10610 }, { "epoch": 0.47, "learning_rate": 4.247706694815718e-05, "loss": 0.1924, "step": 10620 }, { "epoch": 0.47, "learning_rate": 4.2469633219845085e-05, "loss": 0.1566, "step": 10630 }, { "epoch": 0.47, "learning_rate": 4.246219949153298e-05, "loss": 0.1768, "step": 10640 }, { "epoch": 0.47, "learning_rate": 4.245476576322089e-05, "loss": 0.1662, "step": 10650 }, { "epoch": 0.47, "learning_rate": 4.2447332034908786e-05, "loss": 0.1691, "step": 10660 }, { "epoch": 0.47, "learning_rate": 4.2439898306596695e-05, "loss": 0.1934, "step": 10670 }, { "epoch": 0.47, "learning_rate": 4.24324645782846e-05, "loss": 0.1282, "step": 10680 }, { "epoch": 0.47, "learning_rate": 4.2425030849972494e-05, "loss": 0.1914, "step": 10690 }, { "epoch": 0.47, "learning_rate": 4.24175971216604e-05, "loss": 0.176, "step": 10700 }, { "epoch": 0.47, "learning_rate": 4.24101633933483e-05, "loss": 0.1909, "step": 10710 }, { "epoch": 0.47, "learning_rate": 4.240272966503621e-05, "loss": 0.1449, "step": 10720 }, { "epoch": 0.48, "learning_rate": 4.2395295936724104e-05, "loss": 0.1908, "step": 10730 }, { "epoch": 0.48, "learning_rate": 4.2387862208412013e-05, "loss": 0.1826, "step": 10740 }, { "epoch": 0.48, "learning_rate": 4.238042848009991e-05, "loss": 0.2147, "step": 10750 }, { "epoch": 0.48, "learning_rate": 4.237299475178781e-05, "loss": 0.202, "step": 10760 }, { "epoch": 0.48, "learning_rate": 4.2365561023475714e-05, "loss": 0.1779, "step": 10770 }, { "epoch": 0.48, "learning_rate": 4.235812729516362e-05, "loss": 0.2102, "step": 10780 }, { "epoch": 0.48, "learning_rate": 4.235069356685152e-05, "loss": 0.1965, "step": 10790 }, { "epoch": 0.48, "learning_rate": 4.234325983853942e-05, "loss": 0.1599, "step": 10800 }, { "epoch": 0.48, "learning_rate": 4.2335826110227325e-05, "loss": 0.1417, "step": 10810 }, { "epoch": 0.48, "learning_rate": 4.232839238191523e-05, "loss": 0.2261, "step": 10820 }, { "epoch": 0.48, "learning_rate": 4.232095865360313e-05, "loss": 0.2257, "step": 10830 }, { "epoch": 0.48, "learning_rate": 4.231352492529103e-05, "loss": 0.1711, "step": 10840 }, { "epoch": 0.48, "learning_rate": 4.2306091196978935e-05, "loss": 0.1429, "step": 10850 }, { "epoch": 0.48, "learning_rate": 4.229865746866684e-05, "loss": 0.1874, "step": 10860 }, { "epoch": 0.48, "learning_rate": 4.229122374035474e-05, "loss": 0.1569, "step": 10870 }, { "epoch": 0.48, "learning_rate": 4.228379001204264e-05, "loss": 0.1293, "step": 10880 }, { "epoch": 0.48, "learning_rate": 4.2276356283730545e-05, "loss": 0.2276, "step": 10890 }, { "epoch": 0.48, "learning_rate": 4.226892255541844e-05, "loss": 0.222, "step": 10900 }, { "epoch": 0.48, "learning_rate": 4.226148882710635e-05, "loss": 0.1669, "step": 10910 }, { "epoch": 0.48, "learning_rate": 4.225405509879425e-05, "loss": 0.271, "step": 10920 }, { "epoch": 0.48, "learning_rate": 4.2246621370482156e-05, "loss": 0.1592, "step": 10930 }, { "epoch": 0.48, "learning_rate": 4.223918764217006e-05, "loss": 0.1148, "step": 10940 }, { "epoch": 0.48, "learning_rate": 4.223175391385796e-05, "loss": 0.1754, "step": 10950 }, { "epoch": 0.49, "learning_rate": 4.222432018554586e-05, "loss": 0.1576, "step": 10960 }, { "epoch": 0.49, "learning_rate": 4.221688645723376e-05, "loss": 0.2041, "step": 10970 }, { "epoch": 0.49, "learning_rate": 4.220945272892167e-05, "loss": 0.1454, "step": 10980 }, { "epoch": 0.49, "learning_rate": 4.2202019000609564e-05, "loss": 0.228, "step": 10990 }, { "epoch": 0.49, "learning_rate": 4.2194585272297474e-05, "loss": 0.1656, "step": 11000 }, { "epoch": 0.49, "learning_rate": 4.218715154398537e-05, "loss": 0.1957, "step": 11010 }, { "epoch": 0.49, "learning_rate": 4.217971781567327e-05, "loss": 0.1803, "step": 11020 }, { "epoch": 0.49, "learning_rate": 4.2172284087361175e-05, "loss": 0.1226, "step": 11030 }, { "epoch": 0.49, "learning_rate": 4.216485035904908e-05, "loss": 0.1997, "step": 11040 }, { "epoch": 0.49, "learning_rate": 4.2157416630736987e-05, "loss": 0.1402, "step": 11050 }, { "epoch": 0.49, "learning_rate": 4.214998290242488e-05, "loss": 0.2317, "step": 11060 }, { "epoch": 0.49, "learning_rate": 4.214254917411279e-05, "loss": 0.1692, "step": 11070 }, { "epoch": 0.49, "learning_rate": 4.213511544580069e-05, "loss": 0.1831, "step": 11080 }, { "epoch": 0.49, "learning_rate": 4.212768171748859e-05, "loss": 0.1345, "step": 11090 }, { "epoch": 0.49, "learning_rate": 4.212024798917649e-05, "loss": 0.1753, "step": 11100 }, { "epoch": 0.49, "learning_rate": 4.2112814260864395e-05, "loss": 0.127, "step": 11110 }, { "epoch": 0.49, "learning_rate": 4.21053805325523e-05, "loss": 0.1869, "step": 11120 }, { "epoch": 0.49, "learning_rate": 4.20979468042402e-05, "loss": 0.143, "step": 11130 }, { "epoch": 0.49, "learning_rate": 4.20905130759281e-05, "loss": 0.1862, "step": 11140 }, { "epoch": 0.49, "learning_rate": 4.2083079347616006e-05, "loss": 0.1689, "step": 11150 }, { "epoch": 0.49, "learning_rate": 4.207564561930391e-05, "loss": 0.1941, "step": 11160 }, { "epoch": 0.49, "learning_rate": 4.206821189099181e-05, "loss": 0.1832, "step": 11170 }, { "epoch": 0.49, "learning_rate": 4.206077816267971e-05, "loss": 0.221, "step": 11180 }, { "epoch": 0.5, "learning_rate": 4.2053344434367616e-05, "loss": 0.223, "step": 11190 }, { "epoch": 0.5, "learning_rate": 4.204591070605552e-05, "loss": 0.1267, "step": 11200 }, { "epoch": 0.5, "learning_rate": 4.203847697774342e-05, "loss": 0.2387, "step": 11210 }, { "epoch": 0.5, "learning_rate": 4.2031043249431324e-05, "loss": 0.1886, "step": 11220 }, { "epoch": 0.5, "learning_rate": 4.202360952111922e-05, "loss": 0.1446, "step": 11230 }, { "epoch": 0.5, "learning_rate": 4.201617579280713e-05, "loss": 0.1956, "step": 11240 }, { "epoch": 0.5, "learning_rate": 4.2008742064495025e-05, "loss": 0.2077, "step": 11250 }, { "epoch": 0.5, "learning_rate": 4.2001308336182934e-05, "loss": 0.1956, "step": 11260 }, { "epoch": 0.5, "learning_rate": 4.199387460787083e-05, "loss": 0.1965, "step": 11270 }, { "epoch": 0.5, "learning_rate": 4.198644087955874e-05, "loss": 0.1536, "step": 11280 }, { "epoch": 0.5, "learning_rate": 4.197900715124664e-05, "loss": 0.1262, "step": 11290 }, { "epoch": 0.5, "learning_rate": 4.197157342293454e-05, "loss": 0.1763, "step": 11300 }, { "epoch": 0.5, "learning_rate": 4.196413969462245e-05, "loss": 0.2091, "step": 11310 }, { "epoch": 0.5, "learning_rate": 4.195670596631034e-05, "loss": 0.1659, "step": 11320 }, { "epoch": 0.5, "learning_rate": 4.194927223799825e-05, "loss": 0.2228, "step": 11330 }, { "epoch": 0.5, "learning_rate": 4.194183850968615e-05, "loss": 0.1514, "step": 11340 }, { "epoch": 0.5, "learning_rate": 4.193440478137405e-05, "loss": 0.1927, "step": 11350 }, { "epoch": 0.5, "learning_rate": 4.192697105306195e-05, "loss": 0.1106, "step": 11360 }, { "epoch": 0.5, "learning_rate": 4.1919537324749856e-05, "loss": 0.185, "step": 11370 }, { "epoch": 0.5, "learning_rate": 4.191210359643776e-05, "loss": 0.1922, "step": 11380 }, { "epoch": 0.5, "learning_rate": 4.190466986812566e-05, "loss": 0.1806, "step": 11390 }, { "epoch": 0.5, "learning_rate": 4.189723613981356e-05, "loss": 0.1396, "step": 11400 }, { "epoch": 0.51, "learning_rate": 4.1889802411501466e-05, "loss": 0.1341, "step": 11410 }, { "epoch": 0.51, "learning_rate": 4.188236868318937e-05, "loss": 0.1837, "step": 11420 }, { "epoch": 0.51, "learning_rate": 4.187493495487727e-05, "loss": 0.1966, "step": 11430 }, { "epoch": 0.51, "learning_rate": 4.1867501226565174e-05, "loss": 0.1267, "step": 11440 }, { "epoch": 0.51, "learning_rate": 4.1860067498253076e-05, "loss": 0.1613, "step": 11450 }, { "epoch": 0.51, "learning_rate": 4.185263376994098e-05, "loss": 0.182, "step": 11460 }, { "epoch": 0.51, "learning_rate": 4.184520004162888e-05, "loss": 0.1343, "step": 11470 }, { "epoch": 0.51, "learning_rate": 4.1837766313316784e-05, "loss": 0.1839, "step": 11480 }, { "epoch": 0.51, "learning_rate": 4.1830332585004687e-05, "loss": 0.1159, "step": 11490 }, { "epoch": 0.51, "learning_rate": 4.182289885669259e-05, "loss": 0.2091, "step": 11500 }, { "epoch": 0.51, "learning_rate": 4.1815465128380485e-05, "loss": 0.1262, "step": 11510 }, { "epoch": 0.51, "learning_rate": 4.1808031400068394e-05, "loss": 0.157, "step": 11520 }, { "epoch": 0.51, "learning_rate": 4.180059767175629e-05, "loss": 0.2166, "step": 11530 }, { "epoch": 0.51, "learning_rate": 4.17931639434442e-05, "loss": 0.2015, "step": 11540 }, { "epoch": 0.51, "learning_rate": 4.17857302151321e-05, "loss": 0.1893, "step": 11550 }, { "epoch": 0.51, "learning_rate": 4.177829648682e-05, "loss": 0.1795, "step": 11560 }, { "epoch": 0.51, "learning_rate": 4.177086275850791e-05, "loss": 0.1839, "step": 11570 }, { "epoch": 0.51, "learning_rate": 4.17634290301958e-05, "loss": 0.2249, "step": 11580 }, { "epoch": 0.51, "learning_rate": 4.175599530188371e-05, "loss": 0.1665, "step": 11590 }, { "epoch": 0.51, "learning_rate": 4.174856157357161e-05, "loss": 0.1706, "step": 11600 }, { "epoch": 0.51, "learning_rate": 4.174112784525952e-05, "loss": 0.2219, "step": 11610 }, { "epoch": 0.51, "learning_rate": 4.173369411694741e-05, "loss": 0.2024, "step": 11620 }, { "epoch": 0.51, "learning_rate": 4.1726260388635316e-05, "loss": 0.1554, "step": 11630 }, { "epoch": 0.52, "learning_rate": 4.171882666032322e-05, "loss": 0.1359, "step": 11640 }, { "epoch": 0.52, "learning_rate": 4.171139293201112e-05, "loss": 0.2087, "step": 11650 }, { "epoch": 0.52, "learning_rate": 4.170395920369903e-05, "loss": 0.1365, "step": 11660 }, { "epoch": 0.52, "learning_rate": 4.1696525475386926e-05, "loss": 0.1809, "step": 11670 }, { "epoch": 0.52, "learning_rate": 4.168909174707483e-05, "loss": 0.1772, "step": 11680 }, { "epoch": 0.52, "learning_rate": 4.168165801876273e-05, "loss": 0.175, "step": 11690 }, { "epoch": 0.52, "learning_rate": 4.1674224290450634e-05, "loss": 0.2019, "step": 11700 }, { "epoch": 0.52, "learning_rate": 4.1666790562138537e-05, "loss": 0.1415, "step": 11710 }, { "epoch": 0.52, "learning_rate": 4.165935683382644e-05, "loss": 0.2348, "step": 11720 }, { "epoch": 0.52, "learning_rate": 4.165192310551434e-05, "loss": 0.2319, "step": 11730 }, { "epoch": 0.52, "learning_rate": 4.1644489377202244e-05, "loss": 0.1982, "step": 11740 }, { "epoch": 0.52, "learning_rate": 4.163705564889015e-05, "loss": 0.2195, "step": 11750 }, { "epoch": 0.52, "learning_rate": 4.162962192057805e-05, "loss": 0.2094, "step": 11760 }, { "epoch": 0.52, "learning_rate": 4.1622188192265945e-05, "loss": 0.1892, "step": 11770 }, { "epoch": 0.52, "learning_rate": 4.1614754463953855e-05, "loss": 0.1708, "step": 11780 }, { "epoch": 0.52, "learning_rate": 4.160732073564176e-05, "loss": 0.2122, "step": 11790 }, { "epoch": 0.52, "learning_rate": 4.159988700732966e-05, "loss": 0.1743, "step": 11800 }, { "epoch": 0.52, "learning_rate": 4.159245327901756e-05, "loss": 0.1784, "step": 11810 }, { "epoch": 0.52, "learning_rate": 4.1585019550705465e-05, "loss": 0.1261, "step": 11820 }, { "epoch": 0.52, "learning_rate": 4.157758582239337e-05, "loss": 0.207, "step": 11830 }, { "epoch": 0.52, "learning_rate": 4.157015209408126e-05, "loss": 0.1213, "step": 11840 }, { "epoch": 0.52, "learning_rate": 4.156271836576917e-05, "loss": 0.2179, "step": 11850 }, { "epoch": 0.53, "learning_rate": 4.155528463745707e-05, "loss": 0.1774, "step": 11860 }, { "epoch": 0.53, "learning_rate": 4.154785090914498e-05, "loss": 0.1488, "step": 11870 }, { "epoch": 0.53, "learning_rate": 4.1540417180832874e-05, "loss": 0.1256, "step": 11880 }, { "epoch": 0.53, "learning_rate": 4.1532983452520776e-05, "loss": 0.1851, "step": 11890 }, { "epoch": 0.53, "learning_rate": 4.152554972420868e-05, "loss": 0.177, "step": 11900 }, { "epoch": 0.53, "learning_rate": 4.151811599589658e-05, "loss": 0.1401, "step": 11910 }, { "epoch": 0.53, "learning_rate": 4.151068226758449e-05, "loss": 0.2625, "step": 11920 }, { "epoch": 0.53, "learning_rate": 4.1503248539272386e-05, "loss": 0.2286, "step": 11930 }, { "epoch": 0.53, "learning_rate": 4.1495814810960296e-05, "loss": 0.1781, "step": 11940 }, { "epoch": 0.53, "learning_rate": 4.148838108264819e-05, "loss": 0.18, "step": 11950 }, { "epoch": 0.53, "learning_rate": 4.1480947354336094e-05, "loss": 0.2136, "step": 11960 }, { "epoch": 0.53, "learning_rate": 4.1473513626024e-05, "loss": 0.173, "step": 11970 }, { "epoch": 0.53, "learning_rate": 4.14660798977119e-05, "loss": 0.2064, "step": 11980 }, { "epoch": 0.53, "learning_rate": 4.14586461693998e-05, "loss": 0.141, "step": 11990 }, { "epoch": 0.53, "learning_rate": 4.1451212441087705e-05, "loss": 0.2221, "step": 12000 }, { "epoch": 0.53, "learning_rate": 4.144377871277561e-05, "loss": 0.167, "step": 12010 }, { "epoch": 0.53, "learning_rate": 4.143634498446351e-05, "loss": 0.1626, "step": 12020 }, { "epoch": 0.53, "learning_rate": 4.142891125615141e-05, "loss": 0.1911, "step": 12030 }, { "epoch": 0.53, "learning_rate": 4.1421477527839315e-05, "loss": 0.1788, "step": 12040 }, { "epoch": 0.53, "learning_rate": 4.141404379952722e-05, "loss": 0.1812, "step": 12050 }, { "epoch": 0.53, "learning_rate": 4.140661007121512e-05, "loss": 0.1321, "step": 12060 }, { "epoch": 0.53, "learning_rate": 4.139917634290302e-05, "loss": 0.1724, "step": 12070 }, { "epoch": 0.53, "learning_rate": 4.1391742614590925e-05, "loss": 0.2177, "step": 12080 }, { "epoch": 0.54, "learning_rate": 4.138430888627883e-05, "loss": 0.1423, "step": 12090 }, { "epoch": 0.54, "learning_rate": 4.1376875157966724e-05, "loss": 0.1969, "step": 12100 }, { "epoch": 0.54, "learning_rate": 4.136944142965463e-05, "loss": 0.163, "step": 12110 }, { "epoch": 0.54, "learning_rate": 4.136200770134253e-05, "loss": 0.1761, "step": 12120 }, { "epoch": 0.54, "learning_rate": 4.135457397303044e-05, "loss": 0.1902, "step": 12130 }, { "epoch": 0.54, "learning_rate": 4.1347140244718334e-05, "loss": 0.1504, "step": 12140 }, { "epoch": 0.54, "learning_rate": 4.133970651640624e-05, "loss": 0.1742, "step": 12150 }, { "epoch": 0.54, "learning_rate": 4.1332272788094146e-05, "loss": 0.1715, "step": 12160 }, { "epoch": 0.54, "learning_rate": 4.132483905978204e-05, "loss": 0.1676, "step": 12170 }, { "epoch": 0.54, "learning_rate": 4.131740533146995e-05, "loss": 0.1426, "step": 12180 }, { "epoch": 0.54, "learning_rate": 4.130997160315785e-05, "loss": 0.2042, "step": 12190 }, { "epoch": 0.54, "learning_rate": 4.1302537874845756e-05, "loss": 0.1408, "step": 12200 }, { "epoch": 0.54, "learning_rate": 4.129510414653365e-05, "loss": 0.1537, "step": 12210 }, { "epoch": 0.54, "learning_rate": 4.128767041822156e-05, "loss": 0.1971, "step": 12220 }, { "epoch": 0.54, "learning_rate": 4.128023668990946e-05, "loss": 0.2049, "step": 12230 }, { "epoch": 0.54, "learning_rate": 4.127280296159736e-05, "loss": 0.1791, "step": 12240 }, { "epoch": 0.54, "learning_rate": 4.126536923328526e-05, "loss": 0.2066, "step": 12250 }, { "epoch": 0.54, "learning_rate": 4.1257935504973165e-05, "loss": 0.2098, "step": 12260 }, { "epoch": 0.54, "learning_rate": 4.1250501776661074e-05, "loss": 0.17, "step": 12270 }, { "epoch": 0.54, "learning_rate": 4.124306804834897e-05, "loss": 0.2009, "step": 12280 }, { "epoch": 0.54, "learning_rate": 4.123563432003687e-05, "loss": 0.1276, "step": 12290 }, { "epoch": 0.54, "learning_rate": 4.1228200591724775e-05, "loss": 0.1585, "step": 12300 }, { "epoch": 0.55, "learning_rate": 4.122076686341268e-05, "loss": 0.1373, "step": 12310 }, { "epoch": 0.55, "learning_rate": 4.121333313510058e-05, "loss": 0.1365, "step": 12320 }, { "epoch": 0.55, "learning_rate": 4.120589940678848e-05, "loss": 0.2086, "step": 12330 }, { "epoch": 0.55, "learning_rate": 4.1198465678476385e-05, "loss": 0.199, "step": 12340 }, { "epoch": 0.55, "learning_rate": 4.119103195016429e-05, "loss": 0.2007, "step": 12350 }, { "epoch": 0.55, "learning_rate": 4.118359822185219e-05, "loss": 0.1488, "step": 12360 }, { "epoch": 0.55, "learning_rate": 4.117616449354009e-05, "loss": 0.1868, "step": 12370 }, { "epoch": 0.55, "learning_rate": 4.116873076522799e-05, "loss": 0.1883, "step": 12380 }, { "epoch": 0.55, "learning_rate": 4.11612970369159e-05, "loss": 0.2195, "step": 12390 }, { "epoch": 0.55, "learning_rate": 4.11538633086038e-05, "loss": 0.1507, "step": 12400 }, { "epoch": 0.55, "learning_rate": 4.1146429580291704e-05, "loss": 0.1484, "step": 12410 }, { "epoch": 0.55, "learning_rate": 4.1138995851979606e-05, "loss": 0.1368, "step": 12420 }, { "epoch": 0.55, "learning_rate": 4.11315621236675e-05, "loss": 0.2015, "step": 12430 }, { "epoch": 0.55, "learning_rate": 4.112412839535541e-05, "loss": 0.163, "step": 12440 }, { "epoch": 0.55, "learning_rate": 4.111669466704331e-05, "loss": 0.1463, "step": 12450 }, { "epoch": 0.55, "learning_rate": 4.1109260938731216e-05, "loss": 0.178, "step": 12460 }, { "epoch": 0.55, "learning_rate": 4.110182721041911e-05, "loss": 0.1674, "step": 12470 }, { "epoch": 0.55, "learning_rate": 4.109439348210702e-05, "loss": 0.1539, "step": 12480 }, { "epoch": 0.55, "learning_rate": 4.108695975379492e-05, "loss": 0.1685, "step": 12490 }, { "epoch": 0.55, "learning_rate": 4.107952602548282e-05, "loss": 0.1723, "step": 12500 }, { "epoch": 0.55, "learning_rate": 4.107209229717072e-05, "loss": 0.2123, "step": 12510 }, { "epoch": 0.55, "learning_rate": 4.1064658568858625e-05, "loss": 0.2225, "step": 12520 }, { "epoch": 0.55, "learning_rate": 4.1057224840546534e-05, "loss": 0.171, "step": 12530 }, { "epoch": 0.56, "learning_rate": 4.104979111223443e-05, "loss": 0.2094, "step": 12540 }, { "epoch": 0.56, "learning_rate": 4.104235738392234e-05, "loss": 0.1791, "step": 12550 }, { "epoch": 0.56, "learning_rate": 4.1034923655610235e-05, "loss": 0.1262, "step": 12560 }, { "epoch": 0.56, "learning_rate": 4.102748992729814e-05, "loss": 0.1131, "step": 12570 }, { "epoch": 0.56, "learning_rate": 4.102005619898604e-05, "loss": 0.1647, "step": 12580 }, { "epoch": 0.56, "learning_rate": 4.101262247067394e-05, "loss": 0.1611, "step": 12590 }, { "epoch": 0.56, "learning_rate": 4.1005188742361846e-05, "loss": 0.1746, "step": 12600 }, { "epoch": 0.56, "learning_rate": 4.099775501404975e-05, "loss": 0.1884, "step": 12610 }, { "epoch": 0.56, "learning_rate": 4.099032128573765e-05, "loss": 0.1756, "step": 12620 }, { "epoch": 0.56, "learning_rate": 4.0982887557425553e-05, "loss": 0.1811, "step": 12630 }, { "epoch": 0.56, "learning_rate": 4.0975453829113456e-05, "loss": 0.1827, "step": 12640 }, { "epoch": 0.56, "learning_rate": 4.096802010080136e-05, "loss": 0.1841, "step": 12650 }, { "epoch": 0.56, "learning_rate": 4.096058637248926e-05, "loss": 0.1922, "step": 12660 }, { "epoch": 0.56, "learning_rate": 4.0953152644177164e-05, "loss": 0.1931, "step": 12670 }, { "epoch": 0.56, "learning_rate": 4.0945718915865066e-05, "loss": 0.1964, "step": 12680 }, { "epoch": 0.56, "learning_rate": 4.093828518755297e-05, "loss": 0.2106, "step": 12690 }, { "epoch": 0.56, "learning_rate": 4.093085145924087e-05, "loss": 0.1344, "step": 12700 }, { "epoch": 0.56, "learning_rate": 4.092341773092877e-05, "loss": 0.158, "step": 12710 }, { "epoch": 0.56, "learning_rate": 4.091598400261668e-05, "loss": 0.2147, "step": 12720 }, { "epoch": 0.56, "learning_rate": 4.090855027430457e-05, "loss": 0.183, "step": 12730 }, { "epoch": 0.56, "learning_rate": 4.090111654599248e-05, "loss": 0.1935, "step": 12740 }, { "epoch": 0.56, "learning_rate": 4.089368281768038e-05, "loss": 0.2264, "step": 12750 }, { "epoch": 0.56, "learning_rate": 4.088624908936829e-05, "loss": 0.2005, "step": 12760 }, { "epoch": 0.57, "learning_rate": 4.087881536105619e-05, "loss": 0.1411, "step": 12770 }, { "epoch": 0.57, "learning_rate": 4.0871381632744085e-05, "loss": 0.2612, "step": 12780 }, { "epoch": 0.57, "learning_rate": 4.0863947904431995e-05, "loss": 0.1749, "step": 12790 }, { "epoch": 0.57, "learning_rate": 4.085651417611989e-05, "loss": 0.2256, "step": 12800 }, { "epoch": 0.57, "learning_rate": 4.08490804478078e-05, "loss": 0.1945, "step": 12810 }, { "epoch": 0.57, "learning_rate": 4.0841646719495696e-05, "loss": 0.1731, "step": 12820 }, { "epoch": 0.57, "learning_rate": 4.08342129911836e-05, "loss": 0.1909, "step": 12830 }, { "epoch": 0.57, "learning_rate": 4.08267792628715e-05, "loss": 0.1731, "step": 12840 }, { "epoch": 0.57, "learning_rate": 4.0819345534559403e-05, "loss": 0.179, "step": 12850 }, { "epoch": 0.57, "learning_rate": 4.0811911806247306e-05, "loss": 0.2042, "step": 12860 }, { "epoch": 0.57, "learning_rate": 4.080447807793521e-05, "loss": 0.2059, "step": 12870 }, { "epoch": 0.57, "learning_rate": 4.079704434962311e-05, "loss": 0.1974, "step": 12880 }, { "epoch": 0.57, "learning_rate": 4.0789610621311014e-05, "loss": 0.183, "step": 12890 }, { "epoch": 0.57, "learning_rate": 4.0782176892998916e-05, "loss": 0.1429, "step": 12900 }, { "epoch": 0.57, "learning_rate": 4.077474316468682e-05, "loss": 0.1801, "step": 12910 }, { "epoch": 0.57, "learning_rate": 4.076730943637472e-05, "loss": 0.163, "step": 12920 }, { "epoch": 0.57, "learning_rate": 4.0759875708062624e-05, "loss": 0.1712, "step": 12930 }, { "epoch": 0.57, "learning_rate": 4.075244197975053e-05, "loss": 0.1615, "step": 12940 }, { "epoch": 0.57, "learning_rate": 4.074500825143843e-05, "loss": 0.2216, "step": 12950 }, { "epoch": 0.57, "learning_rate": 4.073757452312633e-05, "loss": 0.16, "step": 12960 }, { "epoch": 0.57, "learning_rate": 4.0730140794814234e-05, "loss": 0.1806, "step": 12970 }, { "epoch": 0.57, "learning_rate": 4.072270706650214e-05, "loss": 0.1751, "step": 12980 }, { "epoch": 0.58, "learning_rate": 4.071527333819003e-05, "loss": 0.1612, "step": 12990 }, { "epoch": 0.58, "learning_rate": 4.070783960987794e-05, "loss": 0.1775, "step": 13000 }, { "epoch": 0.58, "learning_rate": 4.0700405881565845e-05, "loss": 0.2432, "step": 13010 }, { "epoch": 0.58, "learning_rate": 4.069297215325375e-05, "loss": 0.1862, "step": 13020 }, { "epoch": 0.58, "learning_rate": 4.068553842494165e-05, "loss": 0.1664, "step": 13030 }, { "epoch": 0.58, "learning_rate": 4.0678104696629546e-05, "loss": 0.2177, "step": 13040 }, { "epoch": 0.58, "learning_rate": 4.0670670968317455e-05, "loss": 0.1614, "step": 13050 }, { "epoch": 0.58, "learning_rate": 4.066323724000535e-05, "loss": 0.2162, "step": 13060 }, { "epoch": 0.58, "learning_rate": 4.065580351169326e-05, "loss": 0.2288, "step": 13070 }, { "epoch": 0.58, "learning_rate": 4.0648369783381156e-05, "loss": 0.1587, "step": 13080 }, { "epoch": 0.58, "learning_rate": 4.0640936055069065e-05, "loss": 0.173, "step": 13090 }, { "epoch": 0.58, "learning_rate": 4.063350232675696e-05, "loss": 0.1758, "step": 13100 }, { "epoch": 0.58, "learning_rate": 4.0626068598444864e-05, "loss": 0.2074, "step": 13110 }, { "epoch": 0.58, "learning_rate": 4.0618634870132766e-05, "loss": 0.1971, "step": 13120 }, { "epoch": 0.58, "learning_rate": 4.061120114182067e-05, "loss": 0.1933, "step": 13130 }, { "epoch": 0.58, "learning_rate": 4.060376741350858e-05, "loss": 0.2035, "step": 13140 }, { "epoch": 0.58, "learning_rate": 4.0596333685196474e-05, "loss": 0.1915, "step": 13150 }, { "epoch": 0.58, "learning_rate": 4.058889995688438e-05, "loss": 0.1426, "step": 13160 }, { "epoch": 0.58, "learning_rate": 4.058146622857228e-05, "loss": 0.2081, "step": 13170 }, { "epoch": 0.58, "learning_rate": 4.057403250026018e-05, "loss": 0.1917, "step": 13180 }, { "epoch": 0.58, "learning_rate": 4.0566598771948084e-05, "loss": 0.1686, "step": 13190 }, { "epoch": 0.58, "learning_rate": 4.055916504363599e-05, "loss": 0.2148, "step": 13200 }, { "epoch": 0.58, "learning_rate": 4.055173131532389e-05, "loss": 0.1654, "step": 13210 }, { "epoch": 0.59, "learning_rate": 4.054429758701179e-05, "loss": 0.1273, "step": 13220 }, { "epoch": 0.59, "learning_rate": 4.0536863858699695e-05, "loss": 0.1609, "step": 13230 }, { "epoch": 0.59, "learning_rate": 4.05294301303876e-05, "loss": 0.1601, "step": 13240 }, { "epoch": 0.59, "learning_rate": 4.05219964020755e-05, "loss": 0.2126, "step": 13250 }, { "epoch": 0.59, "learning_rate": 4.05145626737634e-05, "loss": 0.2226, "step": 13260 }, { "epoch": 0.59, "learning_rate": 4.0507128945451305e-05, "loss": 0.1382, "step": 13270 }, { "epoch": 0.59, "learning_rate": 4.049969521713921e-05, "loss": 0.1883, "step": 13280 }, { "epoch": 0.59, "learning_rate": 4.049226148882711e-05, "loss": 0.1966, "step": 13290 }, { "epoch": 0.59, "learning_rate": 4.048482776051501e-05, "loss": 0.1779, "step": 13300 }, { "epoch": 0.59, "learning_rate": 4.0477394032202915e-05, "loss": 0.1478, "step": 13310 }, { "epoch": 0.59, "learning_rate": 4.046996030389081e-05, "loss": 0.2019, "step": 13320 }, { "epoch": 0.59, "learning_rate": 4.046252657557872e-05, "loss": 0.1968, "step": 13330 }, { "epoch": 0.59, "learning_rate": 4.0455092847266616e-05, "loss": 0.1879, "step": 13340 }, { "epoch": 0.59, "learning_rate": 4.0447659118954526e-05, "loss": 0.1816, "step": 13350 }, { "epoch": 0.59, "learning_rate": 4.044022539064242e-05, "loss": 0.184, "step": 13360 }, { "epoch": 0.59, "learning_rate": 4.0432791662330324e-05, "loss": 0.1695, "step": 13370 }, { "epoch": 0.59, "learning_rate": 4.0425357934018233e-05, "loss": 0.2152, "step": 13380 }, { "epoch": 0.59, "learning_rate": 4.041792420570613e-05, "loss": 0.1595, "step": 13390 }, { "epoch": 0.59, "learning_rate": 4.041049047739404e-05, "loss": 0.1546, "step": 13400 }, { "epoch": 0.59, "learning_rate": 4.0403056749081934e-05, "loss": 0.2177, "step": 13410 }, { "epoch": 0.59, "learning_rate": 4.0395623020769844e-05, "loss": 0.2422, "step": 13420 }, { "epoch": 0.59, "learning_rate": 4.038818929245774e-05, "loss": 0.1355, "step": 13430 }, { "epoch": 0.6, "learning_rate": 4.038075556414564e-05, "loss": 0.1599, "step": 13440 }, { "epoch": 0.6, "learning_rate": 4.0373321835833545e-05, "loss": 0.2513, "step": 13450 }, { "epoch": 0.6, "learning_rate": 4.036588810752145e-05, "loss": 0.1894, "step": 13460 }, { "epoch": 0.6, "learning_rate": 4.035845437920935e-05, "loss": 0.1278, "step": 13470 }, { "epoch": 0.6, "learning_rate": 4.035102065089725e-05, "loss": 0.155, "step": 13480 }, { "epoch": 0.6, "learning_rate": 4.0343586922585155e-05, "loss": 0.1679, "step": 13490 }, { "epoch": 0.6, "learning_rate": 4.033615319427306e-05, "loss": 0.1988, "step": 13500 }, { "epoch": 0.6, "learning_rate": 4.032871946596096e-05, "loss": 0.1582, "step": 13510 }, { "epoch": 0.6, "learning_rate": 4.032128573764886e-05, "loss": 0.1883, "step": 13520 }, { "epoch": 0.6, "learning_rate": 4.0313852009336765e-05, "loss": 0.1937, "step": 13530 }, { "epoch": 0.6, "learning_rate": 4.030641828102467e-05, "loss": 0.1393, "step": 13540 }, { "epoch": 0.6, "learning_rate": 4.029898455271257e-05, "loss": 0.1077, "step": 13550 }, { "epoch": 0.6, "learning_rate": 4.029155082440047e-05, "loss": 0.1836, "step": 13560 }, { "epoch": 0.6, "learning_rate": 4.0284117096088376e-05, "loss": 0.148, "step": 13570 }, { "epoch": 0.6, "learning_rate": 4.027668336777627e-05, "loss": 0.2009, "step": 13580 }, { "epoch": 0.6, "learning_rate": 4.026924963946418e-05, "loss": 0.1606, "step": 13590 }, { "epoch": 0.6, "learning_rate": 4.0261815911152077e-05, "loss": 0.2273, "step": 13600 }, { "epoch": 0.6, "learning_rate": 4.0254382182839986e-05, "loss": 0.2186, "step": 13610 }, { "epoch": 0.6, "learning_rate": 4.024694845452789e-05, "loss": 0.1569, "step": 13620 }, { "epoch": 0.6, "learning_rate": 4.023951472621579e-05, "loss": 0.2116, "step": 13630 }, { "epoch": 0.6, "learning_rate": 4.0232080997903694e-05, "loss": 0.2247, "step": 13640 }, { "epoch": 0.6, "learning_rate": 4.022464726959159e-05, "loss": 0.1533, "step": 13650 }, { "epoch": 0.6, "learning_rate": 4.02172135412795e-05, "loss": 0.1982, "step": 13660 }, { "epoch": 0.61, "learning_rate": 4.0209779812967395e-05, "loss": 0.1936, "step": 13670 }, { "epoch": 0.61, "learning_rate": 4.0202346084655304e-05, "loss": 0.1643, "step": 13680 }, { "epoch": 0.61, "learning_rate": 4.01949123563432e-05, "loss": 0.2199, "step": 13690 }, { "epoch": 0.61, "learning_rate": 4.01874786280311e-05, "loss": 0.1318, "step": 13700 }, { "epoch": 0.61, "learning_rate": 4.0180044899719005e-05, "loss": 0.2135, "step": 13710 }, { "epoch": 0.61, "learning_rate": 4.017261117140691e-05, "loss": 0.1672, "step": 13720 }, { "epoch": 0.61, "learning_rate": 4.016517744309481e-05, "loss": 0.1824, "step": 13730 }, { "epoch": 0.61, "learning_rate": 4.015774371478271e-05, "loss": 0.213, "step": 13740 }, { "epoch": 0.61, "learning_rate": 4.015030998647062e-05, "loss": 0.1086, "step": 13750 }, { "epoch": 0.61, "learning_rate": 4.014287625815852e-05, "loss": 0.1445, "step": 13760 }, { "epoch": 0.61, "learning_rate": 4.013544252984642e-05, "loss": 0.2606, "step": 13770 }, { "epoch": 0.61, "learning_rate": 4.012800880153432e-05, "loss": 0.1701, "step": 13780 }, { "epoch": 0.61, "learning_rate": 4.0120575073222226e-05, "loss": 0.1732, "step": 13790 }, { "epoch": 0.61, "learning_rate": 4.011314134491013e-05, "loss": 0.1474, "step": 13800 }, { "epoch": 0.61, "learning_rate": 4.010570761659803e-05, "loss": 0.1516, "step": 13810 }, { "epoch": 0.61, "learning_rate": 4.009827388828593e-05, "loss": 0.1552, "step": 13820 }, { "epoch": 0.61, "learning_rate": 4.0090840159973836e-05, "loss": 0.2087, "step": 13830 }, { "epoch": 0.61, "learning_rate": 4.008340643166174e-05, "loss": 0.1649, "step": 13840 }, { "epoch": 0.61, "learning_rate": 4.007597270334964e-05, "loss": 0.2025, "step": 13850 }, { "epoch": 0.61, "learning_rate": 4.006853897503754e-05, "loss": 0.2673, "step": 13860 }, { "epoch": 0.61, "learning_rate": 4.0061105246725446e-05, "loss": 0.1513, "step": 13870 }, { "epoch": 0.61, "learning_rate": 4.005367151841335e-05, "loss": 0.1874, "step": 13880 }, { "epoch": 0.61, "learning_rate": 4.004623779010125e-05, "loss": 0.1857, "step": 13890 }, { "epoch": 0.62, "learning_rate": 4.0038804061789154e-05, "loss": 0.1811, "step": 13900 }, { "epoch": 0.62, "learning_rate": 4.003137033347705e-05, "loss": 0.2069, "step": 13910 }, { "epoch": 0.62, "learning_rate": 4.002393660516496e-05, "loss": 0.1399, "step": 13920 }, { "epoch": 0.62, "learning_rate": 4.0016502876852855e-05, "loss": 0.1487, "step": 13930 }, { "epoch": 0.62, "learning_rate": 4.0009069148540764e-05, "loss": 0.1816, "step": 13940 }, { "epoch": 0.62, "learning_rate": 4.000163542022866e-05, "loss": 0.2088, "step": 13950 }, { "epoch": 0.62, "learning_rate": 3.999420169191657e-05, "loss": 0.2395, "step": 13960 }, { "epoch": 0.62, "learning_rate": 3.9986767963604465e-05, "loss": 0.1796, "step": 13970 }, { "epoch": 0.62, "learning_rate": 3.997933423529237e-05, "loss": 0.1745, "step": 13980 }, { "epoch": 0.62, "learning_rate": 3.997190050698028e-05, "loss": 0.2104, "step": 13990 }, { "epoch": 0.62, "learning_rate": 3.996446677866817e-05, "loss": 0.1316, "step": 14000 }, { "epoch": 0.62, "learning_rate": 3.995703305035608e-05, "loss": 0.1606, "step": 14010 }, { "epoch": 0.62, "learning_rate": 3.994959932204398e-05, "loss": 0.1842, "step": 14020 }, { "epoch": 0.62, "learning_rate": 3.994216559373188e-05, "loss": 0.1428, "step": 14030 }, { "epoch": 0.62, "learning_rate": 3.993473186541978e-05, "loss": 0.2148, "step": 14040 }, { "epoch": 0.62, "learning_rate": 3.9927298137107686e-05, "loss": 0.1331, "step": 14050 }, { "epoch": 0.62, "learning_rate": 3.991986440879559e-05, "loss": 0.2023, "step": 14060 }, { "epoch": 0.62, "learning_rate": 3.991243068048349e-05, "loss": 0.1662, "step": 14070 }, { "epoch": 0.62, "learning_rate": 3.9904996952171394e-05, "loss": 0.1284, "step": 14080 }, { "epoch": 0.62, "learning_rate": 3.9897563223859296e-05, "loss": 0.2006, "step": 14090 }, { "epoch": 0.62, "learning_rate": 3.98901294955472e-05, "loss": 0.1739, "step": 14100 }, { "epoch": 0.62, "learning_rate": 3.98826957672351e-05, "loss": 0.1807, "step": 14110 }, { "epoch": 0.63, "learning_rate": 3.9875262038923004e-05, "loss": 0.1737, "step": 14120 }, { "epoch": 0.63, "learning_rate": 3.9867828310610907e-05, "loss": 0.1571, "step": 14130 }, { "epoch": 0.63, "learning_rate": 3.986039458229881e-05, "loss": 0.2, "step": 14140 }, { "epoch": 0.63, "learning_rate": 3.985296085398671e-05, "loss": 0.1388, "step": 14150 }, { "epoch": 0.63, "learning_rate": 3.9845527125674614e-05, "loss": 0.1617, "step": 14160 }, { "epoch": 0.63, "learning_rate": 3.983809339736252e-05, "loss": 0.1556, "step": 14170 }, { "epoch": 0.63, "learning_rate": 3.983065966905042e-05, "loss": 0.2265, "step": 14180 }, { "epoch": 0.63, "learning_rate": 3.9823225940738315e-05, "loss": 0.1992, "step": 14190 }, { "epoch": 0.63, "learning_rate": 3.9815792212426225e-05, "loss": 0.18, "step": 14200 }, { "epoch": 0.63, "learning_rate": 3.980835848411412e-05, "loss": 0.1887, "step": 14210 }, { "epoch": 0.63, "learning_rate": 3.980092475580203e-05, "loss": 0.2241, "step": 14220 }, { "epoch": 0.63, "learning_rate": 3.979349102748993e-05, "loss": 0.2365, "step": 14230 }, { "epoch": 0.63, "learning_rate": 3.978605729917783e-05, "loss": 0.2057, "step": 14240 }, { "epoch": 0.63, "learning_rate": 3.977862357086574e-05, "loss": 0.1955, "step": 14250 }, { "epoch": 0.63, "learning_rate": 3.977118984255363e-05, "loss": 0.1765, "step": 14260 }, { "epoch": 0.63, "learning_rate": 3.976375611424154e-05, "loss": 0.1571, "step": 14270 }, { "epoch": 0.63, "learning_rate": 3.975632238592944e-05, "loss": 0.1937, "step": 14280 }, { "epoch": 0.63, "learning_rate": 3.974888865761735e-05, "loss": 0.134, "step": 14290 }, { "epoch": 0.63, "learning_rate": 3.9741454929305244e-05, "loss": 0.1776, "step": 14300 }, { "epoch": 0.63, "learning_rate": 3.9734021200993146e-05, "loss": 0.1537, "step": 14310 }, { "epoch": 0.63, "learning_rate": 3.972658747268105e-05, "loss": 0.2313, "step": 14320 }, { "epoch": 0.63, "learning_rate": 3.971915374436895e-05, "loss": 0.2078, "step": 14330 }, { "epoch": 0.63, "learning_rate": 3.9711720016056854e-05, "loss": 0.1616, "step": 14340 }, { "epoch": 0.64, "learning_rate": 3.9704286287744756e-05, "loss": 0.1798, "step": 14350 }, { "epoch": 0.64, "learning_rate": 3.969685255943266e-05, "loss": 0.1443, "step": 14360 }, { "epoch": 0.64, "learning_rate": 3.968941883112056e-05, "loss": 0.1898, "step": 14370 }, { "epoch": 0.64, "learning_rate": 3.9681985102808464e-05, "loss": 0.203, "step": 14380 }, { "epoch": 0.64, "learning_rate": 3.967455137449637e-05, "loss": 0.1466, "step": 14390 }, { "epoch": 0.64, "learning_rate": 3.966711764618427e-05, "loss": 0.1291, "step": 14400 }, { "epoch": 0.64, "learning_rate": 3.965968391787217e-05, "loss": 0.1811, "step": 14410 }, { "epoch": 0.64, "learning_rate": 3.9652250189560075e-05, "loss": 0.1675, "step": 14420 }, { "epoch": 0.64, "learning_rate": 3.964481646124798e-05, "loss": 0.2153, "step": 14430 }, { "epoch": 0.64, "learning_rate": 3.963738273293588e-05, "loss": 0.2505, "step": 14440 }, { "epoch": 0.64, "learning_rate": 3.9629949004623776e-05, "loss": 0.1653, "step": 14450 }, { "epoch": 0.64, "learning_rate": 3.9622515276311685e-05, "loss": 0.1974, "step": 14460 }, { "epoch": 0.64, "learning_rate": 3.961508154799958e-05, "loss": 0.198, "step": 14470 }, { "epoch": 0.64, "learning_rate": 3.960764781968749e-05, "loss": 0.1361, "step": 14480 }, { "epoch": 0.64, "learning_rate": 3.960021409137539e-05, "loss": 0.2049, "step": 14490 }, { "epoch": 0.64, "learning_rate": 3.9592780363063295e-05, "loss": 0.1601, "step": 14500 }, { "epoch": 0.64, "learning_rate": 3.95853466347512e-05, "loss": 0.1954, "step": 14510 }, { "epoch": 0.64, "learning_rate": 3.9577912906439094e-05, "loss": 0.2355, "step": 14520 }, { "epoch": 0.64, "learning_rate": 3.9570479178127e-05, "loss": 0.1804, "step": 14530 }, { "epoch": 0.64, "learning_rate": 3.95630454498149e-05, "loss": 0.1831, "step": 14540 }, { "epoch": 0.64, "learning_rate": 3.955561172150281e-05, "loss": 0.191, "step": 14550 }, { "epoch": 0.64, "learning_rate": 3.9548177993190704e-05, "loss": 0.1456, "step": 14560 }, { "epoch": 0.65, "learning_rate": 3.9540744264878606e-05, "loss": 0.2307, "step": 14570 }, { "epoch": 0.65, "learning_rate": 3.953331053656651e-05, "loss": 0.1738, "step": 14580 }, { "epoch": 0.65, "learning_rate": 3.952587680825441e-05, "loss": 0.1566, "step": 14590 }, { "epoch": 0.65, "learning_rate": 3.951844307994232e-05, "loss": 0.2108, "step": 14600 }, { "epoch": 0.65, "learning_rate": 3.951100935163022e-05, "loss": 0.1857, "step": 14610 }, { "epoch": 0.65, "learning_rate": 3.9503575623318126e-05, "loss": 0.1361, "step": 14620 }, { "epoch": 0.65, "learning_rate": 3.949614189500602e-05, "loss": 0.1757, "step": 14630 }, { "epoch": 0.65, "learning_rate": 3.9488708166693925e-05, "loss": 0.16, "step": 14640 }, { "epoch": 0.65, "learning_rate": 3.948127443838183e-05, "loss": 0.1148, "step": 14650 }, { "epoch": 0.65, "learning_rate": 3.947384071006973e-05, "loss": 0.2466, "step": 14660 }, { "epoch": 0.65, "learning_rate": 3.946640698175763e-05, "loss": 0.1654, "step": 14670 }, { "epoch": 0.65, "learning_rate": 3.9458973253445535e-05, "loss": 0.2074, "step": 14680 }, { "epoch": 0.65, "learning_rate": 3.945153952513344e-05, "loss": 0.1869, "step": 14690 }, { "epoch": 0.65, "learning_rate": 3.944410579682134e-05, "loss": 0.1838, "step": 14700 }, { "epoch": 0.65, "learning_rate": 3.943667206850924e-05, "loss": 0.2364, "step": 14710 }, { "epoch": 0.65, "learning_rate": 3.9429238340197145e-05, "loss": 0.1961, "step": 14720 }, { "epoch": 0.65, "learning_rate": 3.942180461188505e-05, "loss": 0.1546, "step": 14730 }, { "epoch": 0.65, "learning_rate": 3.941437088357295e-05, "loss": 0.173, "step": 14740 }, { "epoch": 0.65, "learning_rate": 3.940693715526085e-05, "loss": 0.1447, "step": 14750 }, { "epoch": 0.65, "learning_rate": 3.9399503426948755e-05, "loss": 0.1635, "step": 14760 }, { "epoch": 0.65, "learning_rate": 3.939206969863666e-05, "loss": 0.1859, "step": 14770 }, { "epoch": 0.65, "learning_rate": 3.9384635970324554e-05, "loss": 0.1443, "step": 14780 }, { "epoch": 0.65, "learning_rate": 3.937720224201246e-05, "loss": 0.1578, "step": 14790 }, { "epoch": 0.66, "learning_rate": 3.936976851370036e-05, "loss": 0.1743, "step": 14800 }, { "epoch": 0.66, "learning_rate": 3.936233478538827e-05, "loss": 0.2203, "step": 14810 }, { "epoch": 0.66, "learning_rate": 3.9354901057076164e-05, "loss": 0.175, "step": 14820 }, { "epoch": 0.66, "learning_rate": 3.9347467328764074e-05, "loss": 0.2347, "step": 14830 }, { "epoch": 0.66, "learning_rate": 3.934003360045197e-05, "loss": 0.1566, "step": 14840 }, { "epoch": 0.66, "learning_rate": 3.933259987213987e-05, "loss": 0.2075, "step": 14850 }, { "epoch": 0.66, "learning_rate": 3.932516614382778e-05, "loss": 0.1721, "step": 14860 }, { "epoch": 0.66, "learning_rate": 3.931773241551568e-05, "loss": 0.1945, "step": 14870 }, { "epoch": 0.66, "learning_rate": 3.9310298687203586e-05, "loss": 0.1996, "step": 14880 }, { "epoch": 0.66, "learning_rate": 3.930286495889148e-05, "loss": 0.2423, "step": 14890 }, { "epoch": 0.66, "learning_rate": 3.9295431230579385e-05, "loss": 0.2015, "step": 14900 }, { "epoch": 0.66, "learning_rate": 3.928799750226729e-05, "loss": 0.1663, "step": 14910 }, { "epoch": 0.66, "learning_rate": 3.928056377395519e-05, "loss": 0.146, "step": 14920 }, { "epoch": 0.66, "learning_rate": 3.927313004564309e-05, "loss": 0.1866, "step": 14930 }, { "epoch": 0.66, "learning_rate": 3.9265696317330995e-05, "loss": 0.2157, "step": 14940 }, { "epoch": 0.66, "learning_rate": 3.92582625890189e-05, "loss": 0.1808, "step": 14950 }, { "epoch": 0.66, "learning_rate": 3.92508288607068e-05, "loss": 0.2068, "step": 14960 }, { "epoch": 0.66, "learning_rate": 3.92433951323947e-05, "loss": 0.1938, "step": 14970 }, { "epoch": 0.66, "learning_rate": 3.9235961404082605e-05, "loss": 0.1634, "step": 14980 }, { "epoch": 0.66, "learning_rate": 3.922852767577051e-05, "loss": 0.1862, "step": 14990 }, { "epoch": 0.66, "learning_rate": 3.922109394745841e-05, "loss": 0.1899, "step": 15000 }, { "epoch": 0.66, "learning_rate": 3.921366021914631e-05, "loss": 0.1515, "step": 15010 }, { "epoch": 0.66, "learning_rate": 3.9206226490834216e-05, "loss": 0.1747, "step": 15020 }, { "epoch": 0.67, "learning_rate": 3.919879276252212e-05, "loss": 0.2085, "step": 15030 }, { "epoch": 0.67, "learning_rate": 3.919135903421002e-05, "loss": 0.1681, "step": 15040 }, { "epoch": 0.67, "learning_rate": 3.9183925305897924e-05, "loss": 0.1389, "step": 15050 }, { "epoch": 0.67, "learning_rate": 3.917649157758582e-05, "loss": 0.182, "step": 15060 }, { "epoch": 0.67, "learning_rate": 3.916905784927373e-05, "loss": 0.1381, "step": 15070 }, { "epoch": 0.67, "learning_rate": 3.9161624120961624e-05, "loss": 0.155, "step": 15080 }, { "epoch": 0.67, "learning_rate": 3.9154190392649534e-05, "loss": 0.1663, "step": 15090 }, { "epoch": 0.67, "learning_rate": 3.9146756664337436e-05, "loss": 0.1594, "step": 15100 }, { "epoch": 0.67, "learning_rate": 3.913932293602533e-05, "loss": 0.1702, "step": 15110 }, { "epoch": 0.67, "learning_rate": 3.913188920771324e-05, "loss": 0.2096, "step": 15120 }, { "epoch": 0.67, "learning_rate": 3.912445547940114e-05, "loss": 0.2124, "step": 15130 }, { "epoch": 0.67, "learning_rate": 3.911702175108905e-05, "loss": 0.2006, "step": 15140 }, { "epoch": 0.67, "learning_rate": 3.910958802277694e-05, "loss": 0.1881, "step": 15150 }, { "epoch": 0.67, "learning_rate": 3.910215429446485e-05, "loss": 0.1999, "step": 15160 }, { "epoch": 0.67, "learning_rate": 3.909472056615275e-05, "loss": 0.2433, "step": 15170 }, { "epoch": 0.67, "learning_rate": 3.908728683784065e-05, "loss": 0.1651, "step": 15180 }, { "epoch": 0.67, "learning_rate": 3.907985310952855e-05, "loss": 0.1613, "step": 15190 }, { "epoch": 0.67, "learning_rate": 3.9072419381216455e-05, "loss": 0.1463, "step": 15200 }, { "epoch": 0.67, "learning_rate": 3.9064985652904365e-05, "loss": 0.1863, "step": 15210 }, { "epoch": 0.67, "learning_rate": 3.905755192459226e-05, "loss": 0.2316, "step": 15220 }, { "epoch": 0.67, "learning_rate": 3.905011819628016e-05, "loss": 0.1711, "step": 15230 }, { "epoch": 0.67, "learning_rate": 3.9042684467968066e-05, "loss": 0.122, "step": 15240 }, { "epoch": 0.68, "learning_rate": 3.903525073965597e-05, "loss": 0.166, "step": 15250 }, { "epoch": 0.68, "learning_rate": 3.902781701134387e-05, "loss": 0.1268, "step": 15260 }, { "epoch": 0.68, "learning_rate": 3.9020383283031773e-05, "loss": 0.2188, "step": 15270 }, { "epoch": 0.68, "learning_rate": 3.9012949554719676e-05, "loss": 0.1867, "step": 15280 }, { "epoch": 0.68, "learning_rate": 3.900551582640758e-05, "loss": 0.1212, "step": 15290 }, { "epoch": 0.68, "learning_rate": 3.899808209809548e-05, "loss": 0.176, "step": 15300 }, { "epoch": 0.68, "learning_rate": 3.8990648369783384e-05, "loss": 0.2244, "step": 15310 }, { "epoch": 0.68, "learning_rate": 3.898321464147128e-05, "loss": 0.1978, "step": 15320 }, { "epoch": 0.68, "learning_rate": 3.897578091315919e-05, "loss": 0.1555, "step": 15330 }, { "epoch": 0.68, "learning_rate": 3.896834718484709e-05, "loss": 0.159, "step": 15340 }, { "epoch": 0.68, "learning_rate": 3.8960913456534994e-05, "loss": 0.1307, "step": 15350 }, { "epoch": 0.68, "learning_rate": 3.89534797282229e-05, "loss": 0.1686, "step": 15360 }, { "epoch": 0.68, "learning_rate": 3.89460459999108e-05, "loss": 0.175, "step": 15370 }, { "epoch": 0.68, "learning_rate": 3.89386122715987e-05, "loss": 0.196, "step": 15380 }, { "epoch": 0.68, "learning_rate": 3.89311785432866e-05, "loss": 0.1865, "step": 15390 }, { "epoch": 0.68, "learning_rate": 3.892374481497451e-05, "loss": 0.2059, "step": 15400 }, { "epoch": 0.68, "learning_rate": 3.89163110866624e-05, "loss": 0.1713, "step": 15410 }, { "epoch": 0.68, "learning_rate": 3.890887735835031e-05, "loss": 0.1615, "step": 15420 }, { "epoch": 0.68, "learning_rate": 3.890144363003821e-05, "loss": 0.1533, "step": 15430 }, { "epoch": 0.68, "learning_rate": 3.889400990172611e-05, "loss": 0.1741, "step": 15440 }, { "epoch": 0.68, "learning_rate": 3.888657617341401e-05, "loss": 0.1842, "step": 15450 }, { "epoch": 0.68, "learning_rate": 3.8879142445101916e-05, "loss": 0.15, "step": 15460 }, { "epoch": 0.68, "learning_rate": 3.8871708716789825e-05, "loss": 0.1432, "step": 15470 }, { "epoch": 0.69, "learning_rate": 3.886427498847772e-05, "loss": 0.1422, "step": 15480 }, { "epoch": 0.69, "learning_rate": 3.885684126016563e-05, "loss": 0.1829, "step": 15490 }, { "epoch": 0.69, "learning_rate": 3.8849407531853526e-05, "loss": 0.1515, "step": 15500 }, { "epoch": 0.69, "learning_rate": 3.884197380354143e-05, "loss": 0.2119, "step": 15510 }, { "epoch": 0.69, "learning_rate": 3.883454007522933e-05, "loss": 0.1384, "step": 15520 }, { "epoch": 0.69, "learning_rate": 3.8827106346917234e-05, "loss": 0.1582, "step": 15530 }, { "epoch": 0.69, "learning_rate": 3.8819672618605136e-05, "loss": 0.2138, "step": 15540 }, { "epoch": 0.69, "learning_rate": 3.881223889029304e-05, "loss": 0.2406, "step": 15550 }, { "epoch": 0.69, "learning_rate": 3.880480516198094e-05, "loss": 0.1675, "step": 15560 }, { "epoch": 0.69, "learning_rate": 3.8797371433668844e-05, "loss": 0.1816, "step": 15570 }, { "epoch": 0.69, "learning_rate": 3.878993770535675e-05, "loss": 0.1913, "step": 15580 }, { "epoch": 0.69, "learning_rate": 3.878250397704465e-05, "loss": 0.2115, "step": 15590 }, { "epoch": 0.69, "learning_rate": 3.877507024873255e-05, "loss": 0.1613, "step": 15600 }, { "epoch": 0.69, "learning_rate": 3.8767636520420454e-05, "loss": 0.1511, "step": 15610 }, { "epoch": 0.69, "learning_rate": 3.876020279210836e-05, "loss": 0.192, "step": 15620 }, { "epoch": 0.69, "learning_rate": 3.875276906379626e-05, "loss": 0.1873, "step": 15630 }, { "epoch": 0.69, "learning_rate": 3.874533533548416e-05, "loss": 0.1503, "step": 15640 }, { "epoch": 0.69, "learning_rate": 3.873790160717206e-05, "loss": 0.187, "step": 15650 }, { "epoch": 0.69, "learning_rate": 3.873046787885997e-05, "loss": 0.1747, "step": 15660 }, { "epoch": 0.69, "learning_rate": 3.872303415054786e-05, "loss": 0.1603, "step": 15670 }, { "epoch": 0.69, "learning_rate": 3.871560042223577e-05, "loss": 0.1315, "step": 15680 }, { "epoch": 0.69, "learning_rate": 3.870816669392367e-05, "loss": 0.2076, "step": 15690 }, { "epoch": 0.7, "learning_rate": 3.870073296561158e-05, "loss": 0.1288, "step": 15700 }, { "epoch": 0.7, "learning_rate": 3.869329923729948e-05, "loss": 0.2173, "step": 15710 }, { "epoch": 0.7, "learning_rate": 3.8685865508987376e-05, "loss": 0.2012, "step": 15720 }, { "epoch": 0.7, "learning_rate": 3.8678431780675285e-05, "loss": 0.2115, "step": 15730 }, { "epoch": 0.7, "learning_rate": 3.867099805236318e-05, "loss": 0.1744, "step": 15740 }, { "epoch": 0.7, "learning_rate": 3.866356432405109e-05, "loss": 0.1673, "step": 15750 }, { "epoch": 0.7, "learning_rate": 3.8656130595738986e-05, "loss": 0.1505, "step": 15760 }, { "epoch": 0.7, "learning_rate": 3.8648696867426896e-05, "loss": 0.1564, "step": 15770 }, { "epoch": 0.7, "learning_rate": 3.864126313911479e-05, "loss": 0.1759, "step": 15780 }, { "epoch": 0.7, "learning_rate": 3.8633829410802694e-05, "loss": 0.1863, "step": 15790 }, { "epoch": 0.7, "learning_rate": 3.86263956824906e-05, "loss": 0.1779, "step": 15800 }, { "epoch": 0.7, "learning_rate": 3.86189619541785e-05, "loss": 0.1925, "step": 15810 }, { "epoch": 0.7, "learning_rate": 3.86115282258664e-05, "loss": 0.2246, "step": 15820 }, { "epoch": 0.7, "learning_rate": 3.8604094497554304e-05, "loss": 0.2157, "step": 15830 }, { "epoch": 0.7, "learning_rate": 3.859666076924221e-05, "loss": 0.2142, "step": 15840 }, { "epoch": 0.7, "learning_rate": 3.858922704093011e-05, "loss": 0.148, "step": 15850 }, { "epoch": 0.7, "learning_rate": 3.858179331261801e-05, "loss": 0.1591, "step": 15860 }, { "epoch": 0.7, "learning_rate": 3.8574359584305915e-05, "loss": 0.1627, "step": 15870 }, { "epoch": 0.7, "learning_rate": 3.856692585599382e-05, "loss": 0.1916, "step": 15880 }, { "epoch": 0.7, "learning_rate": 3.855949212768172e-05, "loss": 0.1535, "step": 15890 }, { "epoch": 0.7, "learning_rate": 3.855205839936962e-05, "loss": 0.2123, "step": 15900 }, { "epoch": 0.7, "learning_rate": 3.8544624671057525e-05, "loss": 0.1603, "step": 15910 }, { "epoch": 0.7, "learning_rate": 3.853719094274543e-05, "loss": 0.2374, "step": 15920 }, { "epoch": 0.71, "learning_rate": 3.8529757214433323e-05, "loss": 0.2046, "step": 15930 }, { "epoch": 0.71, "learning_rate": 3.852232348612123e-05, "loss": 0.1366, "step": 15940 }, { "epoch": 0.71, "learning_rate": 3.8514889757809135e-05, "loss": 0.2231, "step": 15950 }, { "epoch": 0.71, "learning_rate": 3.850745602949704e-05, "loss": 0.1625, "step": 15960 }, { "epoch": 0.71, "learning_rate": 3.850002230118494e-05, "loss": 0.1942, "step": 15970 }, { "epoch": 0.71, "learning_rate": 3.849258857287284e-05, "loss": 0.1828, "step": 15980 }, { "epoch": 0.71, "learning_rate": 3.8485154844560746e-05, "loss": 0.1377, "step": 15990 }, { "epoch": 0.71, "learning_rate": 3.847772111624864e-05, "loss": 0.153, "step": 16000 }, { "epoch": 0.71, "learning_rate": 3.847028738793655e-05, "loss": 0.1481, "step": 16010 }, { "epoch": 0.71, "learning_rate": 3.846285365962445e-05, "loss": 0.1814, "step": 16020 }, { "epoch": 0.71, "learning_rate": 3.8455419931312356e-05, "loss": 0.2209, "step": 16030 }, { "epoch": 0.71, "learning_rate": 3.844798620300025e-05, "loss": 0.1763, "step": 16040 }, { "epoch": 0.71, "learning_rate": 3.8440552474688154e-05, "loss": 0.1839, "step": 16050 }, { "epoch": 0.71, "learning_rate": 3.843311874637606e-05, "loss": 0.1772, "step": 16060 }, { "epoch": 0.71, "learning_rate": 3.842568501806396e-05, "loss": 0.1603, "step": 16070 }, { "epoch": 0.71, "learning_rate": 3.841825128975187e-05, "loss": 0.1422, "step": 16080 }, { "epoch": 0.71, "learning_rate": 3.8410817561439765e-05, "loss": 0.1599, "step": 16090 }, { "epoch": 0.71, "learning_rate": 3.8403383833127674e-05, "loss": 0.1844, "step": 16100 }, { "epoch": 0.71, "learning_rate": 3.839595010481557e-05, "loss": 0.198, "step": 16110 }, { "epoch": 0.71, "learning_rate": 3.838851637650347e-05, "loss": 0.1726, "step": 16120 }, { "epoch": 0.71, "learning_rate": 3.8381082648191375e-05, "loss": 0.1359, "step": 16130 }, { "epoch": 0.71, "learning_rate": 3.837364891987928e-05, "loss": 0.1393, "step": 16140 }, { "epoch": 0.72, "learning_rate": 3.836621519156718e-05, "loss": 0.1995, "step": 16150 }, { "epoch": 0.72, "learning_rate": 3.835878146325508e-05, "loss": 0.1708, "step": 16160 }, { "epoch": 0.72, "learning_rate": 3.8351347734942985e-05, "loss": 0.1442, "step": 16170 }, { "epoch": 0.72, "learning_rate": 3.834391400663089e-05, "loss": 0.183, "step": 16180 }, { "epoch": 0.72, "learning_rate": 3.8336480278318784e-05, "loss": 0.2085, "step": 16190 }, { "epoch": 0.72, "learning_rate": 3.832904655000669e-05, "loss": 0.1921, "step": 16200 }, { "epoch": 0.72, "learning_rate": 3.8321612821694596e-05, "loss": 0.1552, "step": 16210 }, { "epoch": 0.72, "learning_rate": 3.83141790933825e-05, "loss": 0.1461, "step": 16220 }, { "epoch": 0.72, "learning_rate": 3.83067453650704e-05, "loss": 0.1943, "step": 16230 }, { "epoch": 0.72, "learning_rate": 3.82993116367583e-05, "loss": 0.1821, "step": 16240 }, { "epoch": 0.72, "learning_rate": 3.8291877908446206e-05, "loss": 0.1274, "step": 16250 }, { "epoch": 0.72, "learning_rate": 3.82844441801341e-05, "loss": 0.1939, "step": 16260 }, { "epoch": 0.72, "learning_rate": 3.827701045182201e-05, "loss": 0.1674, "step": 16270 }, { "epoch": 0.72, "learning_rate": 3.826957672350991e-05, "loss": 0.1787, "step": 16280 }, { "epoch": 0.72, "learning_rate": 3.8262142995197816e-05, "loss": 0.1931, "step": 16290 }, { "epoch": 0.72, "learning_rate": 3.825470926688571e-05, "loss": 0.1361, "step": 16300 }, { "epoch": 0.72, "learning_rate": 3.824727553857362e-05, "loss": 0.1614, "step": 16310 }, { "epoch": 0.72, "learning_rate": 3.8239841810261524e-05, "loss": 0.1769, "step": 16320 }, { "epoch": 0.72, "learning_rate": 3.823240808194942e-05, "loss": 0.1783, "step": 16330 }, { "epoch": 0.72, "learning_rate": 3.822497435363733e-05, "loss": 0.1674, "step": 16340 }, { "epoch": 0.72, "learning_rate": 3.8217540625325225e-05, "loss": 0.1891, "step": 16350 }, { "epoch": 0.72, "learning_rate": 3.8210106897013134e-05, "loss": 0.1544, "step": 16360 }, { "epoch": 0.72, "learning_rate": 3.820267316870103e-05, "loss": 0.1447, "step": 16370 }, { "epoch": 0.73, "learning_rate": 3.819523944038893e-05, "loss": 0.2382, "step": 16380 }, { "epoch": 0.73, "learning_rate": 3.8187805712076835e-05, "loss": 0.1624, "step": 16390 }, { "epoch": 0.73, "learning_rate": 3.818037198376474e-05, "loss": 0.2006, "step": 16400 }, { "epoch": 0.73, "learning_rate": 3.817293825545264e-05, "loss": 0.1946, "step": 16410 }, { "epoch": 0.73, "learning_rate": 3.816550452714054e-05, "loss": 0.1444, "step": 16420 }, { "epoch": 0.73, "learning_rate": 3.8158070798828446e-05, "loss": 0.1565, "step": 16430 }, { "epoch": 0.73, "learning_rate": 3.815063707051635e-05, "loss": 0.1934, "step": 16440 }, { "epoch": 0.73, "learning_rate": 3.814320334220425e-05, "loss": 0.2188, "step": 16450 }, { "epoch": 0.73, "learning_rate": 3.813576961389215e-05, "loss": 0.176, "step": 16460 }, { "epoch": 0.73, "learning_rate": 3.8128335885580056e-05, "loss": 0.1926, "step": 16470 }, { "epoch": 0.73, "learning_rate": 3.812090215726796e-05, "loss": 0.1556, "step": 16480 }, { "epoch": 0.73, "learning_rate": 3.811346842895586e-05, "loss": 0.1507, "step": 16490 }, { "epoch": 0.73, "learning_rate": 3.8106034700643764e-05, "loss": 0.1661, "step": 16500 }, { "epoch": 0.73, "learning_rate": 3.8098600972331666e-05, "loss": 0.1154, "step": 16510 }, { "epoch": 0.73, "learning_rate": 3.809116724401957e-05, "loss": 0.2014, "step": 16520 }, { "epoch": 0.73, "learning_rate": 3.808373351570747e-05, "loss": 0.2436, "step": 16530 }, { "epoch": 0.73, "learning_rate": 3.807629978739537e-05, "loss": 0.2163, "step": 16540 }, { "epoch": 0.73, "learning_rate": 3.8068866059083277e-05, "loss": 0.1875, "step": 16550 }, { "epoch": 0.73, "learning_rate": 3.806143233077118e-05, "loss": 0.2037, "step": 16560 }, { "epoch": 0.73, "learning_rate": 3.805399860245908e-05, "loss": 0.1432, "step": 16570 }, { "epoch": 0.73, "learning_rate": 3.8046564874146984e-05, "loss": 0.1327, "step": 16580 }, { "epoch": 0.73, "learning_rate": 3.803913114583488e-05, "loss": 0.1799, "step": 16590 }, { "epoch": 0.73, "learning_rate": 3.803169741752279e-05, "loss": 0.1937, "step": 16600 }, { "epoch": 0.74, "learning_rate": 3.8024263689210685e-05, "loss": 0.163, "step": 16610 }, { "epoch": 0.74, "learning_rate": 3.8016829960898595e-05, "loss": 0.2167, "step": 16620 }, { "epoch": 0.74, "learning_rate": 3.800939623258649e-05, "loss": 0.1855, "step": 16630 }, { "epoch": 0.74, "learning_rate": 3.80019625042744e-05, "loss": 0.1882, "step": 16640 }, { "epoch": 0.74, "learning_rate": 3.7994528775962296e-05, "loss": 0.1876, "step": 16650 }, { "epoch": 0.74, "learning_rate": 3.79870950476502e-05, "loss": 0.1726, "step": 16660 }, { "epoch": 0.74, "learning_rate": 3.79796613193381e-05, "loss": 0.1705, "step": 16670 }, { "epoch": 0.74, "learning_rate": 3.7972227591026e-05, "loss": 0.2255, "step": 16680 }, { "epoch": 0.74, "learning_rate": 3.796479386271391e-05, "loss": 0.1819, "step": 16690 }, { "epoch": 0.74, "learning_rate": 3.795736013440181e-05, "loss": 0.136, "step": 16700 }, { "epoch": 0.74, "learning_rate": 3.794992640608971e-05, "loss": 0.1702, "step": 16710 }, { "epoch": 0.74, "learning_rate": 3.7942492677777614e-05, "loss": 0.1576, "step": 16720 }, { "epoch": 0.74, "learning_rate": 3.7935058949465516e-05, "loss": 0.1629, "step": 16730 }, { "epoch": 0.74, "learning_rate": 3.792762522115342e-05, "loss": 0.2137, "step": 16740 }, { "epoch": 0.74, "learning_rate": 3.792019149284132e-05, "loss": 0.2444, "step": 16750 }, { "epoch": 0.74, "learning_rate": 3.7912757764529224e-05, "loss": 0.1903, "step": 16760 }, { "epoch": 0.74, "learning_rate": 3.7905324036217127e-05, "loss": 0.1866, "step": 16770 }, { "epoch": 0.74, "learning_rate": 3.789789030790503e-05, "loss": 0.1845, "step": 16780 }, { "epoch": 0.74, "learning_rate": 3.789045657959293e-05, "loss": 0.1824, "step": 16790 }, { "epoch": 0.74, "learning_rate": 3.788302285128083e-05, "loss": 0.1665, "step": 16800 }, { "epoch": 0.74, "learning_rate": 3.787558912296874e-05, "loss": 0.1295, "step": 16810 }, { "epoch": 0.74, "learning_rate": 3.786815539465664e-05, "loss": 0.2139, "step": 16820 }, { "epoch": 0.75, "learning_rate": 3.786072166634454e-05, "loss": 0.182, "step": 16830 }, { "epoch": 0.75, "learning_rate": 3.7853287938032445e-05, "loss": 0.1715, "step": 16840 }, { "epoch": 0.75, "learning_rate": 3.784585420972035e-05, "loss": 0.1296, "step": 16850 }, { "epoch": 0.75, "learning_rate": 3.783842048140825e-05, "loss": 0.1712, "step": 16860 }, { "epoch": 0.75, "learning_rate": 3.7830986753096146e-05, "loss": 0.1466, "step": 16870 }, { "epoch": 0.75, "learning_rate": 3.7823553024784055e-05, "loss": 0.1509, "step": 16880 }, { "epoch": 0.75, "learning_rate": 3.781611929647195e-05, "loss": 0.1392, "step": 16890 }, { "epoch": 0.75, "learning_rate": 3.780868556815986e-05, "loss": 0.1625, "step": 16900 }, { "epoch": 0.75, "learning_rate": 3.7801251839847756e-05, "loss": 0.1873, "step": 16910 }, { "epoch": 0.75, "learning_rate": 3.779381811153566e-05, "loss": 0.1728, "step": 16920 }, { "epoch": 0.75, "learning_rate": 3.778638438322357e-05, "loss": 0.1893, "step": 16930 }, { "epoch": 0.75, "learning_rate": 3.7778950654911464e-05, "loss": 0.2019, "step": 16940 }, { "epoch": 0.75, "learning_rate": 3.777151692659937e-05, "loss": 0.1736, "step": 16950 }, { "epoch": 0.75, "learning_rate": 3.776408319828727e-05, "loss": 0.1544, "step": 16960 }, { "epoch": 0.75, "learning_rate": 3.775664946997518e-05, "loss": 0.1733, "step": 16970 }, { "epoch": 0.75, "learning_rate": 3.7749215741663074e-05, "loss": 0.2452, "step": 16980 }, { "epoch": 0.75, "learning_rate": 3.7741782013350976e-05, "loss": 0.166, "step": 16990 }, { "epoch": 0.75, "learning_rate": 3.773434828503888e-05, "loss": 0.1652, "step": 17000 }, { "epoch": 0.75, "learning_rate": 3.772691455672678e-05, "loss": 0.1679, "step": 17010 }, { "epoch": 0.75, "learning_rate": 3.7719480828414684e-05, "loss": 0.189, "step": 17020 }, { "epoch": 0.75, "learning_rate": 3.771204710010259e-05, "loss": 0.1915, "step": 17030 }, { "epoch": 0.75, "learning_rate": 3.770461337179049e-05, "loss": 0.1695, "step": 17040 }, { "epoch": 0.75, "learning_rate": 3.769717964347839e-05, "loss": 0.2039, "step": 17050 }, { "epoch": 0.76, "learning_rate": 3.7689745915166295e-05, "loss": 0.194, "step": 17060 }, { "epoch": 0.76, "learning_rate": 3.76823121868542e-05, "loss": 0.1376, "step": 17070 }, { "epoch": 0.76, "learning_rate": 3.76748784585421e-05, "loss": 0.1797, "step": 17080 }, { "epoch": 0.76, "learning_rate": 3.766744473023e-05, "loss": 0.2214, "step": 17090 }, { "epoch": 0.76, "learning_rate": 3.7660011001917905e-05, "loss": 0.1726, "step": 17100 }, { "epoch": 0.76, "learning_rate": 3.765257727360581e-05, "loss": 0.182, "step": 17110 }, { "epoch": 0.76, "learning_rate": 3.764514354529371e-05, "loss": 0.1724, "step": 17120 }, { "epoch": 0.76, "learning_rate": 3.7637709816981606e-05, "loss": 0.1936, "step": 17130 }, { "epoch": 0.76, "learning_rate": 3.7630276088669515e-05, "loss": 0.1548, "step": 17140 }, { "epoch": 0.76, "learning_rate": 3.762284236035741e-05, "loss": 0.1747, "step": 17150 }, { "epoch": 0.76, "learning_rate": 3.761540863204532e-05, "loss": 0.1448, "step": 17160 }, { "epoch": 0.76, "learning_rate": 3.7607974903733216e-05, "loss": 0.1696, "step": 17170 }, { "epoch": 0.76, "learning_rate": 3.7600541175421126e-05, "loss": 0.1586, "step": 17180 }, { "epoch": 0.76, "learning_rate": 3.759310744710903e-05, "loss": 0.1775, "step": 17190 }, { "epoch": 0.76, "learning_rate": 3.7585673718796924e-05, "loss": 0.1469, "step": 17200 }, { "epoch": 0.76, "learning_rate": 3.757823999048483e-05, "loss": 0.2098, "step": 17210 }, { "epoch": 0.76, "learning_rate": 3.757080626217273e-05, "loss": 0.2723, "step": 17220 }, { "epoch": 0.76, "learning_rate": 3.756337253386064e-05, "loss": 0.1746, "step": 17230 }, { "epoch": 0.76, "learning_rate": 3.7555938805548534e-05, "loss": 0.183, "step": 17240 }, { "epoch": 0.76, "learning_rate": 3.754850507723644e-05, "loss": 0.2332, "step": 17250 }, { "epoch": 0.76, "learning_rate": 3.754107134892434e-05, "loss": 0.1237, "step": 17260 }, { "epoch": 0.76, "learning_rate": 3.753363762061224e-05, "loss": 0.16, "step": 17270 }, { "epoch": 0.77, "learning_rate": 3.7526203892300145e-05, "loss": 0.1458, "step": 17280 }, { "epoch": 0.77, "learning_rate": 3.751877016398805e-05, "loss": 0.1945, "step": 17290 }, { "epoch": 0.77, "learning_rate": 3.7511336435675956e-05, "loss": 0.1851, "step": 17300 }, { "epoch": 0.77, "learning_rate": 3.750390270736385e-05, "loss": 0.1674, "step": 17310 }, { "epoch": 0.77, "learning_rate": 3.7496468979051755e-05, "loss": 0.1898, "step": 17320 }, { "epoch": 0.77, "learning_rate": 3.748903525073966e-05, "loss": 0.2348, "step": 17330 }, { "epoch": 0.77, "learning_rate": 3.748160152242756e-05, "loss": 0.1871, "step": 17340 }, { "epoch": 0.77, "learning_rate": 3.747416779411546e-05, "loss": 0.159, "step": 17350 }, { "epoch": 0.77, "learning_rate": 3.7466734065803365e-05, "loss": 0.1506, "step": 17360 }, { "epoch": 0.77, "learning_rate": 3.745930033749127e-05, "loss": 0.1554, "step": 17370 }, { "epoch": 0.77, "learning_rate": 3.745186660917917e-05, "loss": 0.1797, "step": 17380 }, { "epoch": 0.77, "learning_rate": 3.744443288086707e-05, "loss": 0.1464, "step": 17390 }, { "epoch": 0.77, "learning_rate": 3.7436999152554975e-05, "loss": 0.1447, "step": 17400 }, { "epoch": 0.77, "learning_rate": 3.742956542424287e-05, "loss": 0.1686, "step": 17410 }, { "epoch": 0.77, "learning_rate": 3.742213169593078e-05, "loss": 0.146, "step": 17420 }, { "epoch": 0.77, "learning_rate": 3.741469796761868e-05, "loss": 0.1873, "step": 17430 }, { "epoch": 0.77, "learning_rate": 3.7407264239306586e-05, "loss": 0.1112, "step": 17440 }, { "epoch": 0.77, "learning_rate": 3.739983051099449e-05, "loss": 0.1684, "step": 17450 }, { "epoch": 0.77, "learning_rate": 3.7392396782682384e-05, "loss": 0.1792, "step": 17460 }, { "epoch": 0.77, "learning_rate": 3.7384963054370294e-05, "loss": 0.1922, "step": 17470 }, { "epoch": 0.77, "learning_rate": 3.737752932605819e-05, "loss": 0.1859, "step": 17480 }, { "epoch": 0.77, "learning_rate": 3.73700955977461e-05, "loss": 0.2012, "step": 17490 }, { "epoch": 0.77, "learning_rate": 3.7362661869433994e-05, "loss": 0.1518, "step": 17500 }, { "epoch": 0.78, "learning_rate": 3.7355228141121904e-05, "loss": 0.1629, "step": 17510 }, { "epoch": 0.78, "learning_rate": 3.73477944128098e-05, "loss": 0.1604, "step": 17520 }, { "epoch": 0.78, "learning_rate": 3.73403606844977e-05, "loss": 0.2046, "step": 17530 }, { "epoch": 0.78, "learning_rate": 3.733292695618561e-05, "loss": 0.1456, "step": 17540 }, { "epoch": 0.78, "learning_rate": 3.732549322787351e-05, "loss": 0.1761, "step": 17550 }, { "epoch": 0.78, "learning_rate": 3.731805949956142e-05, "loss": 0.1817, "step": 17560 }, { "epoch": 0.78, "learning_rate": 3.731062577124931e-05, "loss": 0.1584, "step": 17570 }, { "epoch": 0.78, "learning_rate": 3.7303192042937215e-05, "loss": 0.1975, "step": 17580 }, { "epoch": 0.78, "learning_rate": 3.729575831462512e-05, "loss": 0.15, "step": 17590 }, { "epoch": 0.78, "learning_rate": 3.728832458631302e-05, "loss": 0.1831, "step": 17600 }, { "epoch": 0.78, "learning_rate": 3.728089085800092e-05, "loss": 0.1603, "step": 17610 }, { "epoch": 0.78, "learning_rate": 3.7273457129688825e-05, "loss": 0.1702, "step": 17620 }, { "epoch": 0.78, "learning_rate": 3.726602340137673e-05, "loss": 0.2121, "step": 17630 }, { "epoch": 0.78, "learning_rate": 3.725858967306463e-05, "loss": 0.2074, "step": 17640 }, { "epoch": 0.78, "learning_rate": 3.725115594475253e-05, "loss": 0.1852, "step": 17650 }, { "epoch": 0.78, "learning_rate": 3.7243722216440436e-05, "loss": 0.1709, "step": 17660 }, { "epoch": 0.78, "learning_rate": 3.723628848812834e-05, "loss": 0.1367, "step": 17670 }, { "epoch": 0.78, "learning_rate": 3.722885475981624e-05, "loss": 0.1619, "step": 17680 }, { "epoch": 0.78, "learning_rate": 3.7221421031504144e-05, "loss": 0.1804, "step": 17690 }, { "epoch": 0.78, "learning_rate": 3.7213987303192046e-05, "loss": 0.1804, "step": 17700 }, { "epoch": 0.78, "learning_rate": 3.720655357487995e-05, "loss": 0.2461, "step": 17710 }, { "epoch": 0.78, "learning_rate": 3.719911984656785e-05, "loss": 0.1389, "step": 17720 }, { "epoch": 0.78, "learning_rate": 3.7191686118255754e-05, "loss": 0.185, "step": 17730 }, { "epoch": 0.79, "learning_rate": 3.718425238994365e-05, "loss": 0.1817, "step": 17740 }, { "epoch": 0.79, "learning_rate": 3.717681866163156e-05, "loss": 0.1793, "step": 17750 }, { "epoch": 0.79, "learning_rate": 3.7169384933319455e-05, "loss": 0.1848, "step": 17760 }, { "epoch": 0.79, "learning_rate": 3.7161951205007364e-05, "loss": 0.2006, "step": 17770 }, { "epoch": 0.79, "learning_rate": 3.715451747669526e-05, "loss": 0.1828, "step": 17780 }, { "epoch": 0.79, "learning_rate": 3.714708374838316e-05, "loss": 0.1876, "step": 17790 }, { "epoch": 0.79, "learning_rate": 3.713965002007107e-05, "loss": 0.1765, "step": 17800 }, { "epoch": 0.79, "learning_rate": 3.713221629175897e-05, "loss": 0.1625, "step": 17810 }, { "epoch": 0.79, "learning_rate": 3.712478256344688e-05, "loss": 0.1565, "step": 17820 }, { "epoch": 0.79, "learning_rate": 3.711734883513477e-05, "loss": 0.1122, "step": 17830 }, { "epoch": 0.79, "learning_rate": 3.710991510682268e-05, "loss": 0.1956, "step": 17840 }, { "epoch": 0.79, "learning_rate": 3.710248137851058e-05, "loss": 0.1547, "step": 17850 }, { "epoch": 0.79, "learning_rate": 3.709504765019848e-05, "loss": 0.199, "step": 17860 }, { "epoch": 0.79, "learning_rate": 3.708761392188638e-05, "loss": 0.1392, "step": 17870 }, { "epoch": 0.79, "learning_rate": 3.7080180193574286e-05, "loss": 0.184, "step": 17880 }, { "epoch": 0.79, "learning_rate": 3.707274646526219e-05, "loss": 0.1461, "step": 17890 }, { "epoch": 0.79, "learning_rate": 3.706531273695009e-05, "loss": 0.1669, "step": 17900 }, { "epoch": 0.79, "learning_rate": 3.7057879008637993e-05, "loss": 0.1702, "step": 17910 }, { "epoch": 0.79, "learning_rate": 3.7050445280325896e-05, "loss": 0.1537, "step": 17920 }, { "epoch": 0.79, "learning_rate": 3.70430115520138e-05, "loss": 0.2081, "step": 17930 }, { "epoch": 0.79, "learning_rate": 3.70355778237017e-05, "loss": 0.1356, "step": 17940 }, { "epoch": 0.79, "learning_rate": 3.7028144095389604e-05, "loss": 0.1889, "step": 17950 }, { "epoch": 0.8, "learning_rate": 3.7020710367077506e-05, "loss": 0.2287, "step": 17960 }, { "epoch": 0.8, "learning_rate": 3.701327663876541e-05, "loss": 0.1992, "step": 17970 }, { "epoch": 0.8, "learning_rate": 3.700584291045331e-05, "loss": 0.1901, "step": 17980 }, { "epoch": 0.8, "learning_rate": 3.6998409182141214e-05, "loss": 0.1837, "step": 17990 }, { "epoch": 0.8, "learning_rate": 3.699097545382911e-05, "loss": 0.1823, "step": 18000 }, { "epoch": 0.8, "learning_rate": 3.698354172551702e-05, "loss": 0.1512, "step": 18010 }, { "epoch": 0.8, "learning_rate": 3.6976107997204915e-05, "loss": 0.1823, "step": 18020 }, { "epoch": 0.8, "learning_rate": 3.6968674268892824e-05, "loss": 0.1816, "step": 18030 }, { "epoch": 0.8, "learning_rate": 3.696124054058073e-05, "loss": 0.1537, "step": 18040 }, { "epoch": 0.8, "learning_rate": 3.695380681226863e-05, "loss": 0.1754, "step": 18050 }, { "epoch": 0.8, "learning_rate": 3.694637308395653e-05, "loss": 0.1801, "step": 18060 }, { "epoch": 0.8, "learning_rate": 3.693893935564443e-05, "loss": 0.1886, "step": 18070 }, { "epoch": 0.8, "learning_rate": 3.693150562733234e-05, "loss": 0.1667, "step": 18080 }, { "epoch": 0.8, "learning_rate": 3.692407189902023e-05, "loss": 0.1243, "step": 18090 }, { "epoch": 0.8, "learning_rate": 3.691663817070814e-05, "loss": 0.1358, "step": 18100 }, { "epoch": 0.8, "learning_rate": 3.690920444239604e-05, "loss": 0.1924, "step": 18110 }, { "epoch": 0.8, "learning_rate": 3.690177071408394e-05, "loss": 0.2612, "step": 18120 }, { "epoch": 0.8, "learning_rate": 3.6894336985771843e-05, "loss": 0.1938, "step": 18130 }, { "epoch": 0.8, "learning_rate": 3.6886903257459746e-05, "loss": 0.2132, "step": 18140 }, { "epoch": 0.8, "learning_rate": 3.687946952914765e-05, "loss": 0.1605, "step": 18150 }, { "epoch": 0.8, "learning_rate": 3.687203580083555e-05, "loss": 0.1484, "step": 18160 }, { "epoch": 0.8, "learning_rate": 3.686460207252346e-05, "loss": 0.1778, "step": 18170 }, { "epoch": 0.8, "learning_rate": 3.6857168344211356e-05, "loss": 0.1956, "step": 18180 }, { "epoch": 0.81, "learning_rate": 3.684973461589926e-05, "loss": 0.185, "step": 18190 }, { "epoch": 0.81, "learning_rate": 3.684230088758716e-05, "loss": 0.1576, "step": 18200 }, { "epoch": 0.81, "learning_rate": 3.6834867159275064e-05, "loss": 0.1663, "step": 18210 }, { "epoch": 0.81, "learning_rate": 3.682743343096297e-05, "loss": 0.1558, "step": 18220 }, { "epoch": 0.81, "learning_rate": 3.681999970265087e-05, "loss": 0.2002, "step": 18230 }, { "epoch": 0.81, "learning_rate": 3.681256597433877e-05, "loss": 0.1327, "step": 18240 }, { "epoch": 0.81, "learning_rate": 3.6805132246026674e-05, "loss": 0.2429, "step": 18250 }, { "epoch": 0.81, "learning_rate": 3.679769851771458e-05, "loss": 0.1462, "step": 18260 }, { "epoch": 0.81, "learning_rate": 3.679026478940248e-05, "loss": 0.1836, "step": 18270 }, { "epoch": 0.81, "learning_rate": 3.678283106109038e-05, "loss": 0.1234, "step": 18280 }, { "epoch": 0.81, "learning_rate": 3.6775397332778285e-05, "loss": 0.1474, "step": 18290 }, { "epoch": 0.81, "learning_rate": 3.676796360446619e-05, "loss": 0.1757, "step": 18300 }, { "epoch": 0.81, "learning_rate": 3.676052987615409e-05, "loss": 0.1282, "step": 18310 }, { "epoch": 0.81, "learning_rate": 3.675309614784199e-05, "loss": 0.1834, "step": 18320 }, { "epoch": 0.81, "learning_rate": 3.674566241952989e-05, "loss": 0.1673, "step": 18330 }, { "epoch": 0.81, "learning_rate": 3.67382286912178e-05, "loss": 0.1127, "step": 18340 }, { "epoch": 0.81, "learning_rate": 3.6730794962905693e-05, "loss": 0.1909, "step": 18350 }, { "epoch": 0.81, "learning_rate": 3.67233612345936e-05, "loss": 0.1373, "step": 18360 }, { "epoch": 0.81, "learning_rate": 3.67159275062815e-05, "loss": 0.1647, "step": 18370 }, { "epoch": 0.81, "learning_rate": 3.670849377796941e-05, "loss": 0.2055, "step": 18380 }, { "epoch": 0.81, "learning_rate": 3.6701060049657304e-05, "loss": 0.2445, "step": 18390 }, { "epoch": 0.81, "learning_rate": 3.6693626321345206e-05, "loss": 0.1307, "step": 18400 }, { "epoch": 0.82, "learning_rate": 3.6686192593033116e-05, "loss": 0.1935, "step": 18410 }, { "epoch": 0.82, "learning_rate": 3.667875886472101e-05, "loss": 0.1723, "step": 18420 }, { "epoch": 0.82, "learning_rate": 3.667132513640892e-05, "loss": 0.1545, "step": 18430 }, { "epoch": 0.82, "learning_rate": 3.666389140809682e-05, "loss": 0.1257, "step": 18440 }, { "epoch": 0.82, "learning_rate": 3.665645767978472e-05, "loss": 0.1961, "step": 18450 }, { "epoch": 0.82, "learning_rate": 3.664902395147262e-05, "loss": 0.1733, "step": 18460 }, { "epoch": 0.82, "learning_rate": 3.6641590223160524e-05, "loss": 0.2285, "step": 18470 }, { "epoch": 0.82, "learning_rate": 3.663415649484843e-05, "loss": 0.1601, "step": 18480 }, { "epoch": 0.82, "learning_rate": 3.662672276653633e-05, "loss": 0.2179, "step": 18490 }, { "epoch": 0.82, "learning_rate": 3.661928903822423e-05, "loss": 0.1971, "step": 18500 }, { "epoch": 0.82, "learning_rate": 3.6611855309912135e-05, "loss": 0.1798, "step": 18510 }, { "epoch": 0.82, "learning_rate": 3.660442158160004e-05, "loss": 0.2057, "step": 18520 }, { "epoch": 0.82, "learning_rate": 3.659698785328794e-05, "loss": 0.2089, "step": 18530 }, { "epoch": 0.82, "learning_rate": 3.658955412497584e-05, "loss": 0.1596, "step": 18540 }, { "epoch": 0.82, "learning_rate": 3.6582120396663745e-05, "loss": 0.1648, "step": 18550 }, { "epoch": 0.82, "learning_rate": 3.657468666835165e-05, "loss": 0.1927, "step": 18560 }, { "epoch": 0.82, "learning_rate": 3.656725294003955e-05, "loss": 0.183, "step": 18570 }, { "epoch": 0.82, "learning_rate": 3.655981921172745e-05, "loss": 0.1881, "step": 18580 }, { "epoch": 0.82, "learning_rate": 3.6552385483415355e-05, "loss": 0.131, "step": 18590 }, { "epoch": 0.82, "learning_rate": 3.654495175510326e-05, "loss": 0.1466, "step": 18600 }, { "epoch": 0.82, "learning_rate": 3.6537518026791154e-05, "loss": 0.169, "step": 18610 }, { "epoch": 0.82, "learning_rate": 3.653008429847906e-05, "loss": 0.2043, "step": 18620 }, { "epoch": 0.82, "learning_rate": 3.652265057016696e-05, "loss": 0.1718, "step": 18630 }, { "epoch": 0.83, "learning_rate": 3.651521684185487e-05, "loss": 0.1813, "step": 18640 }, { "epoch": 0.83, "learning_rate": 3.650778311354277e-05, "loss": 0.157, "step": 18650 }, { "epoch": 0.83, "learning_rate": 3.650034938523067e-05, "loss": 0.1306, "step": 18660 }, { "epoch": 0.83, "learning_rate": 3.6492915656918576e-05, "loss": 0.1913, "step": 18670 }, { "epoch": 0.83, "learning_rate": 3.648548192860647e-05, "loss": 0.1343, "step": 18680 }, { "epoch": 0.83, "learning_rate": 3.647804820029438e-05, "loss": 0.1722, "step": 18690 }, { "epoch": 0.83, "learning_rate": 3.647061447198228e-05, "loss": 0.1587, "step": 18700 }, { "epoch": 0.83, "learning_rate": 3.6463180743670186e-05, "loss": 0.1481, "step": 18710 }, { "epoch": 0.83, "learning_rate": 3.645574701535808e-05, "loss": 0.1909, "step": 18720 }, { "epoch": 0.83, "learning_rate": 3.6448313287045985e-05, "loss": 0.1818, "step": 18730 }, { "epoch": 0.83, "learning_rate": 3.644087955873389e-05, "loss": 0.1803, "step": 18740 }, { "epoch": 0.83, "learning_rate": 3.643344583042179e-05, "loss": 0.1451, "step": 18750 }, { "epoch": 0.83, "learning_rate": 3.642601210210969e-05, "loss": 0.1912, "step": 18760 }, { "epoch": 0.83, "learning_rate": 3.6418578373797595e-05, "loss": 0.1878, "step": 18770 }, { "epoch": 0.83, "learning_rate": 3.6411144645485504e-05, "loss": 0.1759, "step": 18780 }, { "epoch": 0.83, "learning_rate": 3.64037109171734e-05, "loss": 0.1528, "step": 18790 }, { "epoch": 0.83, "learning_rate": 3.63962771888613e-05, "loss": 0.1628, "step": 18800 }, { "epoch": 0.83, "learning_rate": 3.6388843460549205e-05, "loss": 0.185, "step": 18810 }, { "epoch": 0.83, "learning_rate": 3.638140973223711e-05, "loss": 0.147, "step": 18820 }, { "epoch": 0.83, "learning_rate": 3.637397600392501e-05, "loss": 0.1668, "step": 18830 }, { "epoch": 0.83, "learning_rate": 3.636654227561291e-05, "loss": 0.1704, "step": 18840 }, { "epoch": 0.83, "learning_rate": 3.6359108547300816e-05, "loss": 0.1502, "step": 18850 }, { "epoch": 0.83, "learning_rate": 3.635167481898872e-05, "loss": 0.188, "step": 18860 }, { "epoch": 0.84, "learning_rate": 3.6344241090676614e-05, "loss": 0.1148, "step": 18870 }, { "epoch": 0.84, "learning_rate": 3.633680736236452e-05, "loss": 0.1785, "step": 18880 }, { "epoch": 0.84, "learning_rate": 3.6329373634052426e-05, "loss": 0.1545, "step": 18890 }, { "epoch": 0.84, "learning_rate": 3.632193990574033e-05, "loss": 0.1851, "step": 18900 }, { "epoch": 0.84, "learning_rate": 3.631450617742823e-05, "loss": 0.2196, "step": 18910 }, { "epoch": 0.84, "learning_rate": 3.6307072449116134e-05, "loss": 0.2182, "step": 18920 }, { "epoch": 0.84, "learning_rate": 3.6299638720804036e-05, "loss": 0.1466, "step": 18930 }, { "epoch": 0.84, "learning_rate": 3.629220499249193e-05, "loss": 0.16, "step": 18940 }, { "epoch": 0.84, "learning_rate": 3.628477126417984e-05, "loss": 0.1314, "step": 18950 }, { "epoch": 0.84, "learning_rate": 3.627733753586774e-05, "loss": 0.1871, "step": 18960 }, { "epoch": 0.84, "learning_rate": 3.6269903807555647e-05, "loss": 0.1611, "step": 18970 }, { "epoch": 0.84, "learning_rate": 3.626247007924354e-05, "loss": 0.1544, "step": 18980 }, { "epoch": 0.84, "learning_rate": 3.6255036350931445e-05, "loss": 0.1622, "step": 18990 }, { "epoch": 0.84, "learning_rate": 3.624760262261935e-05, "loss": 0.224, "step": 19000 }, { "epoch": 0.84, "learning_rate": 3.624016889430725e-05, "loss": 0.1996, "step": 19010 }, { "epoch": 0.84, "learning_rate": 3.623273516599516e-05, "loss": 0.1917, "step": 19020 }, { "epoch": 0.84, "learning_rate": 3.6225301437683055e-05, "loss": 0.1517, "step": 19030 }, { "epoch": 0.84, "learning_rate": 3.6217867709370965e-05, "loss": 0.1468, "step": 19040 }, { "epoch": 0.84, "learning_rate": 3.621043398105886e-05, "loss": 0.1821, "step": 19050 }, { "epoch": 0.84, "learning_rate": 3.620300025274676e-05, "loss": 0.1776, "step": 19060 }, { "epoch": 0.84, "learning_rate": 3.6195566524434666e-05, "loss": 0.2187, "step": 19070 }, { "epoch": 0.84, "learning_rate": 3.618813279612257e-05, "loss": 0.214, "step": 19080 }, { "epoch": 0.85, "learning_rate": 3.618069906781047e-05, "loss": 0.1894, "step": 19090 }, { "epoch": 0.85, "learning_rate": 3.617326533949837e-05, "loss": 0.2246, "step": 19100 }, { "epoch": 0.85, "learning_rate": 3.6165831611186276e-05, "loss": 0.1563, "step": 19110 }, { "epoch": 0.85, "learning_rate": 3.615839788287418e-05, "loss": 0.1936, "step": 19120 }, { "epoch": 0.85, "learning_rate": 3.615096415456208e-05, "loss": 0.204, "step": 19130 }, { "epoch": 0.85, "learning_rate": 3.6143530426249984e-05, "loss": 0.1727, "step": 19140 }, { "epoch": 0.85, "learning_rate": 3.6136096697937886e-05, "loss": 0.1336, "step": 19150 }, { "epoch": 0.85, "learning_rate": 3.612866296962579e-05, "loss": 0.194, "step": 19160 }, { "epoch": 0.85, "learning_rate": 3.612122924131369e-05, "loss": 0.2129, "step": 19170 }, { "epoch": 0.85, "learning_rate": 3.6113795513001594e-05, "loss": 0.17, "step": 19180 }, { "epoch": 0.85, "learning_rate": 3.6106361784689497e-05, "loss": 0.1946, "step": 19190 }, { "epoch": 0.85, "learning_rate": 3.609892805637739e-05, "loss": 0.1407, "step": 19200 }, { "epoch": 0.85, "learning_rate": 3.60914943280653e-05, "loss": 0.1528, "step": 19210 }, { "epoch": 0.85, "learning_rate": 3.60840605997532e-05, "loss": 0.1818, "step": 19220 }, { "epoch": 0.85, "learning_rate": 3.607662687144111e-05, "loss": 0.1997, "step": 19230 }, { "epoch": 0.85, "learning_rate": 3.6069193143129e-05, "loss": 0.1616, "step": 19240 }, { "epoch": 0.85, "learning_rate": 3.606175941481691e-05, "loss": 0.2097, "step": 19250 }, { "epoch": 0.85, "learning_rate": 3.6054325686504815e-05, "loss": 0.1998, "step": 19260 }, { "epoch": 0.85, "learning_rate": 3.604689195819271e-05, "loss": 0.1912, "step": 19270 }, { "epoch": 0.85, "learning_rate": 3.603945822988062e-05, "loss": 0.1473, "step": 19280 }, { "epoch": 0.85, "learning_rate": 3.6032024501568516e-05, "loss": 0.2152, "step": 19290 }, { "epoch": 0.85, "learning_rate": 3.6024590773256425e-05, "loss": 0.1736, "step": 19300 }, { "epoch": 0.85, "learning_rate": 3.601715704494432e-05, "loss": 0.1843, "step": 19310 }, { "epoch": 0.86, "learning_rate": 3.600972331663223e-05, "loss": 0.1757, "step": 19320 }, { "epoch": 0.86, "learning_rate": 3.6002289588320126e-05, "loss": 0.2041, "step": 19330 }, { "epoch": 0.86, "learning_rate": 3.599485586000803e-05, "loss": 0.1544, "step": 19340 }, { "epoch": 0.86, "learning_rate": 3.598742213169593e-05, "loss": 0.163, "step": 19350 }, { "epoch": 0.86, "learning_rate": 3.5979988403383834e-05, "loss": 0.1837, "step": 19360 }, { "epoch": 0.86, "learning_rate": 3.5972554675071736e-05, "loss": 0.1848, "step": 19370 }, { "epoch": 0.86, "learning_rate": 3.596512094675964e-05, "loss": 0.1762, "step": 19380 }, { "epoch": 0.86, "learning_rate": 3.595768721844754e-05, "loss": 0.1707, "step": 19390 }, { "epoch": 0.86, "learning_rate": 3.5950253490135444e-05, "loss": 0.1746, "step": 19400 }, { "epoch": 0.86, "learning_rate": 3.5942819761823347e-05, "loss": 0.2088, "step": 19410 }, { "epoch": 0.86, "learning_rate": 3.593538603351125e-05, "loss": 0.1934, "step": 19420 }, { "epoch": 0.86, "learning_rate": 3.592795230519915e-05, "loss": 0.1898, "step": 19430 }, { "epoch": 0.86, "learning_rate": 3.5920518576887054e-05, "loss": 0.1842, "step": 19440 }, { "epoch": 0.86, "learning_rate": 3.591308484857496e-05, "loss": 0.1432, "step": 19450 }, { "epoch": 0.86, "learning_rate": 3.590565112026286e-05, "loss": 0.2012, "step": 19460 }, { "epoch": 0.86, "learning_rate": 3.589821739195076e-05, "loss": 0.1424, "step": 19470 }, { "epoch": 0.86, "learning_rate": 3.589078366363866e-05, "loss": 0.1975, "step": 19480 }, { "epoch": 0.86, "learning_rate": 3.588334993532657e-05, "loss": 0.2077, "step": 19490 }, { "epoch": 0.86, "learning_rate": 3.587591620701446e-05, "loss": 0.15, "step": 19500 }, { "epoch": 0.86, "learning_rate": 3.586848247870237e-05, "loss": 0.1825, "step": 19510 }, { "epoch": 0.86, "learning_rate": 3.5861048750390275e-05, "loss": 0.1482, "step": 19520 }, { "epoch": 0.86, "learning_rate": 3.585361502207818e-05, "loss": 0.1503, "step": 19530 }, { "epoch": 0.87, "learning_rate": 3.584618129376608e-05, "loss": 0.2178, "step": 19540 }, { "epoch": 0.87, "learning_rate": 3.5838747565453976e-05, "loss": 0.2188, "step": 19550 }, { "epoch": 0.87, "learning_rate": 3.5831313837141885e-05, "loss": 0.1985, "step": 19560 }, { "epoch": 0.87, "learning_rate": 3.582388010882978e-05, "loss": 0.1951, "step": 19570 }, { "epoch": 0.87, "learning_rate": 3.581644638051769e-05, "loss": 0.153, "step": 19580 }, { "epoch": 0.87, "learning_rate": 3.5809012652205586e-05, "loss": 0.1523, "step": 19590 }, { "epoch": 0.87, "learning_rate": 3.580157892389349e-05, "loss": 0.1857, "step": 19600 }, { "epoch": 0.87, "learning_rate": 3.579414519558139e-05, "loss": 0.2138, "step": 19610 }, { "epoch": 0.87, "learning_rate": 3.5786711467269294e-05, "loss": 0.198, "step": 19620 }, { "epoch": 0.87, "learning_rate": 3.57792777389572e-05, "loss": 0.1276, "step": 19630 }, { "epoch": 0.87, "learning_rate": 3.57718440106451e-05, "loss": 0.1954, "step": 19640 }, { "epoch": 0.87, "learning_rate": 3.576441028233301e-05, "loss": 0.1989, "step": 19650 }, { "epoch": 0.87, "learning_rate": 3.5756976554020904e-05, "loss": 0.1647, "step": 19660 }, { "epoch": 0.87, "learning_rate": 3.574954282570881e-05, "loss": 0.1461, "step": 19670 }, { "epoch": 0.87, "learning_rate": 3.574210909739671e-05, "loss": 0.2016, "step": 19680 }, { "epoch": 0.87, "learning_rate": 3.573467536908461e-05, "loss": 0.2673, "step": 19690 }, { "epoch": 0.87, "learning_rate": 3.5727241640772515e-05, "loss": 0.1919, "step": 19700 }, { "epoch": 0.87, "learning_rate": 3.571980791246042e-05, "loss": 0.1426, "step": 19710 }, { "epoch": 0.87, "learning_rate": 3.571237418414832e-05, "loss": 0.2006, "step": 19720 }, { "epoch": 0.87, "learning_rate": 3.570494045583622e-05, "loss": 0.1479, "step": 19730 }, { "epoch": 0.87, "learning_rate": 3.5697506727524125e-05, "loss": 0.1288, "step": 19740 }, { "epoch": 0.87, "learning_rate": 3.569007299921203e-05, "loss": 0.1831, "step": 19750 }, { "epoch": 0.87, "learning_rate": 3.568263927089993e-05, "loss": 0.1801, "step": 19760 }, { "epoch": 0.88, "learning_rate": 3.567520554258783e-05, "loss": 0.2104, "step": 19770 }, { "epoch": 0.88, "learning_rate": 3.5667771814275735e-05, "loss": 0.163, "step": 19780 }, { "epoch": 0.88, "learning_rate": 3.566033808596364e-05, "loss": 0.2035, "step": 19790 }, { "epoch": 0.88, "learning_rate": 3.565290435765154e-05, "loss": 0.1717, "step": 19800 }, { "epoch": 0.88, "learning_rate": 3.5645470629339436e-05, "loss": 0.1464, "step": 19810 }, { "epoch": 0.88, "learning_rate": 3.5638036901027346e-05, "loss": 0.1406, "step": 19820 }, { "epoch": 0.88, "learning_rate": 3.563060317271524e-05, "loss": 0.2377, "step": 19830 }, { "epoch": 0.88, "learning_rate": 3.562316944440315e-05, "loss": 0.1477, "step": 19840 }, { "epoch": 0.88, "learning_rate": 3.5615735716091046e-05, "loss": 0.1597, "step": 19850 }, { "epoch": 0.88, "learning_rate": 3.5608301987778956e-05, "loss": 0.163, "step": 19860 }, { "epoch": 0.88, "learning_rate": 3.560086825946686e-05, "loss": 0.1546, "step": 19870 }, { "epoch": 0.88, "learning_rate": 3.5593434531154754e-05, "loss": 0.1832, "step": 19880 }, { "epoch": 0.88, "learning_rate": 3.5586000802842664e-05, "loss": 0.1707, "step": 19890 }, { "epoch": 0.88, "learning_rate": 3.557856707453056e-05, "loss": 0.2378, "step": 19900 }, { "epoch": 0.88, "learning_rate": 3.557113334621847e-05, "loss": 0.1197, "step": 19910 }, { "epoch": 0.88, "learning_rate": 3.5563699617906365e-05, "loss": 0.1856, "step": 19920 }, { "epoch": 0.88, "learning_rate": 3.555626588959427e-05, "loss": 0.1441, "step": 19930 }, { "epoch": 0.88, "learning_rate": 3.554883216128217e-05, "loss": 0.1594, "step": 19940 }, { "epoch": 0.88, "learning_rate": 3.554139843297007e-05, "loss": 0.21, "step": 19950 }, { "epoch": 0.88, "learning_rate": 3.5533964704657975e-05, "loss": 0.1894, "step": 19960 }, { "epoch": 0.88, "learning_rate": 3.552653097634588e-05, "loss": 0.1895, "step": 19970 }, { "epoch": 0.88, "learning_rate": 3.551909724803378e-05, "loss": 0.1739, "step": 19980 }, { "epoch": 0.89, "learning_rate": 3.551166351972168e-05, "loss": 0.1825, "step": 19990 }, { "epoch": 0.89, "learning_rate": 3.5504229791409585e-05, "loss": 0.1506, "step": 20000 }, { "epoch": 0.89, "learning_rate": 3.549679606309749e-05, "loss": 0.1467, "step": 20010 }, { "epoch": 0.89, "learning_rate": 3.548936233478539e-05, "loss": 0.1529, "step": 20020 }, { "epoch": 0.89, "learning_rate": 3.548192860647329e-05, "loss": 0.2291, "step": 20030 }, { "epoch": 0.89, "learning_rate": 3.5474494878161195e-05, "loss": 0.2038, "step": 20040 }, { "epoch": 0.89, "learning_rate": 3.54670611498491e-05, "loss": 0.1566, "step": 20050 }, { "epoch": 0.89, "learning_rate": 3.5459627421537e-05, "loss": 0.1304, "step": 20060 }, { "epoch": 0.89, "learning_rate": 3.54521936932249e-05, "loss": 0.1848, "step": 20070 }, { "epoch": 0.89, "learning_rate": 3.5444759964912806e-05, "loss": 0.1929, "step": 20080 }, { "epoch": 0.89, "learning_rate": 3.54373262366007e-05, "loss": 0.1364, "step": 20090 }, { "epoch": 0.89, "learning_rate": 3.542989250828861e-05, "loss": 0.1222, "step": 20100 }, { "epoch": 0.89, "learning_rate": 3.542245877997651e-05, "loss": 0.1805, "step": 20110 }, { "epoch": 0.89, "learning_rate": 3.5415025051664416e-05, "loss": 0.1358, "step": 20120 }, { "epoch": 0.89, "learning_rate": 3.540759132335232e-05, "loss": 0.1688, "step": 20130 }, { "epoch": 0.89, "learning_rate": 3.5400157595040214e-05, "loss": 0.1731, "step": 20140 }, { "epoch": 0.89, "learning_rate": 3.5392723866728124e-05, "loss": 0.1693, "step": 20150 }, { "epoch": 0.89, "learning_rate": 3.538529013841602e-05, "loss": 0.1629, "step": 20160 }, { "epoch": 0.89, "learning_rate": 3.537785641010393e-05, "loss": 0.1345, "step": 20170 }, { "epoch": 0.89, "learning_rate": 3.5370422681791825e-05, "loss": 0.2363, "step": 20180 }, { "epoch": 0.89, "learning_rate": 3.5362988953479734e-05, "loss": 0.2016, "step": 20190 }, { "epoch": 0.89, "learning_rate": 3.535555522516763e-05, "loss": 0.1146, "step": 20200 }, { "epoch": 0.89, "learning_rate": 3.534812149685553e-05, "loss": 0.1736, "step": 20210 }, { "epoch": 0.9, "learning_rate": 3.5340687768543435e-05, "loss": 0.1581, "step": 20220 }, { "epoch": 0.9, "learning_rate": 3.533325404023134e-05, "loss": 0.1632, "step": 20230 }, { "epoch": 0.9, "learning_rate": 3.532582031191925e-05, "loss": 0.1448, "step": 20240 }, { "epoch": 0.9, "learning_rate": 3.531838658360714e-05, "loss": 0.2599, "step": 20250 }, { "epoch": 0.9, "learning_rate": 3.5310952855295045e-05, "loss": 0.2301, "step": 20260 }, { "epoch": 0.9, "learning_rate": 3.530351912698295e-05, "loss": 0.1855, "step": 20270 }, { "epoch": 0.9, "learning_rate": 3.529608539867085e-05, "loss": 0.1503, "step": 20280 }, { "epoch": 0.9, "learning_rate": 3.528865167035875e-05, "loss": 0.1726, "step": 20290 }, { "epoch": 0.9, "learning_rate": 3.5281217942046656e-05, "loss": 0.1206, "step": 20300 }, { "epoch": 0.9, "learning_rate": 3.527378421373456e-05, "loss": 0.1642, "step": 20310 }, { "epoch": 0.9, "learning_rate": 3.526635048542246e-05, "loss": 0.1518, "step": 20320 }, { "epoch": 0.9, "learning_rate": 3.5258916757110364e-05, "loss": 0.1623, "step": 20330 }, { "epoch": 0.9, "learning_rate": 3.5251483028798266e-05, "loss": 0.1276, "step": 20340 }, { "epoch": 0.9, "learning_rate": 3.524404930048616e-05, "loss": 0.1679, "step": 20350 }, { "epoch": 0.9, "learning_rate": 3.523661557217407e-05, "loss": 0.1765, "step": 20360 }, { "epoch": 0.9, "learning_rate": 3.5229181843861974e-05, "loss": 0.1949, "step": 20370 }, { "epoch": 0.9, "learning_rate": 3.5221748115549876e-05, "loss": 0.1555, "step": 20380 }, { "epoch": 0.9, "learning_rate": 3.521431438723778e-05, "loss": 0.1585, "step": 20390 }, { "epoch": 0.9, "learning_rate": 3.520688065892568e-05, "loss": 0.2012, "step": 20400 }, { "epoch": 0.9, "learning_rate": 3.5199446930613584e-05, "loss": 0.2392, "step": 20410 }, { "epoch": 0.9, "learning_rate": 3.519201320230148e-05, "loss": 0.1692, "step": 20420 }, { "epoch": 0.9, "learning_rate": 3.518457947398939e-05, "loss": 0.1568, "step": 20430 }, { "epoch": 0.9, "learning_rate": 3.5177145745677285e-05, "loss": 0.1591, "step": 20440 }, { "epoch": 0.91, "learning_rate": 3.5169712017365194e-05, "loss": 0.1805, "step": 20450 }, { "epoch": 0.91, "learning_rate": 3.516227828905309e-05, "loss": 0.1668, "step": 20460 }, { "epoch": 0.91, "learning_rate": 3.515484456074099e-05, "loss": 0.1703, "step": 20470 }, { "epoch": 0.91, "learning_rate": 3.5147410832428895e-05, "loss": 0.1182, "step": 20480 }, { "epoch": 0.91, "learning_rate": 3.51399771041168e-05, "loss": 0.1707, "step": 20490 }, { "epoch": 0.91, "learning_rate": 3.513254337580471e-05, "loss": 0.265, "step": 20500 }, { "epoch": 0.91, "learning_rate": 3.51251096474926e-05, "loss": 0.1651, "step": 20510 }, { "epoch": 0.91, "learning_rate": 3.511767591918051e-05, "loss": 0.1637, "step": 20520 }, { "epoch": 0.91, "learning_rate": 3.511024219086841e-05, "loss": 0.1813, "step": 20530 }, { "epoch": 0.91, "learning_rate": 3.510280846255631e-05, "loss": 0.1879, "step": 20540 }, { "epoch": 0.91, "learning_rate": 3.5095374734244213e-05, "loss": 0.1566, "step": 20550 }, { "epoch": 0.91, "learning_rate": 3.5087941005932116e-05, "loss": 0.1517, "step": 20560 }, { "epoch": 0.91, "learning_rate": 3.508050727762002e-05, "loss": 0.1857, "step": 20570 }, { "epoch": 0.91, "learning_rate": 3.507307354930792e-05, "loss": 0.1479, "step": 20580 }, { "epoch": 0.91, "learning_rate": 3.5065639820995824e-05, "loss": 0.2, "step": 20590 }, { "epoch": 0.91, "learning_rate": 3.5058206092683726e-05, "loss": 0.1639, "step": 20600 }, { "epoch": 0.91, "learning_rate": 3.505077236437163e-05, "loss": 0.1489, "step": 20610 }, { "epoch": 0.91, "learning_rate": 3.504333863605953e-05, "loss": 0.1937, "step": 20620 }, { "epoch": 0.91, "learning_rate": 3.5035904907747434e-05, "loss": 0.1581, "step": 20630 }, { "epoch": 0.91, "learning_rate": 3.502847117943534e-05, "loss": 0.1845, "step": 20640 }, { "epoch": 0.91, "learning_rate": 3.502103745112324e-05, "loss": 0.1222, "step": 20650 }, { "epoch": 0.91, "learning_rate": 3.501360372281114e-05, "loss": 0.1661, "step": 20660 }, { "epoch": 0.92, "learning_rate": 3.5006169994499044e-05, "loss": 0.1912, "step": 20670 }, { "epoch": 0.92, "learning_rate": 3.499873626618694e-05, "loss": 0.1708, "step": 20680 }, { "epoch": 0.92, "learning_rate": 3.499130253787485e-05, "loss": 0.163, "step": 20690 }, { "epoch": 0.92, "learning_rate": 3.4983868809562745e-05, "loss": 0.2093, "step": 20700 }, { "epoch": 0.92, "learning_rate": 3.4976435081250655e-05, "loss": 0.1256, "step": 20710 }, { "epoch": 0.92, "learning_rate": 3.496900135293855e-05, "loss": 0.1564, "step": 20720 }, { "epoch": 0.92, "learning_rate": 3.496156762462646e-05, "loss": 0.1093, "step": 20730 }, { "epoch": 0.92, "learning_rate": 3.495413389631436e-05, "loss": 0.1565, "step": 20740 }, { "epoch": 0.92, "learning_rate": 3.494670016800226e-05, "loss": 0.1329, "step": 20750 }, { "epoch": 0.92, "learning_rate": 3.493926643969017e-05, "loss": 0.1719, "step": 20760 }, { "epoch": 0.92, "learning_rate": 3.4931832711378063e-05, "loss": 0.2113, "step": 20770 }, { "epoch": 0.92, "learning_rate": 3.492439898306597e-05, "loss": 0.1556, "step": 20780 }, { "epoch": 0.92, "learning_rate": 3.491696525475387e-05, "loss": 0.1636, "step": 20790 }, { "epoch": 0.92, "learning_rate": 3.490953152644177e-05, "loss": 0.152, "step": 20800 }, { "epoch": 0.92, "learning_rate": 3.4902097798129674e-05, "loss": 0.1574, "step": 20810 }, { "epoch": 0.92, "learning_rate": 3.4894664069817576e-05, "loss": 0.1751, "step": 20820 }, { "epoch": 0.92, "learning_rate": 3.488723034150548e-05, "loss": 0.166, "step": 20830 }, { "epoch": 0.92, "learning_rate": 3.487979661319338e-05, "loss": 0.2218, "step": 20840 }, { "epoch": 0.92, "learning_rate": 3.487236288488129e-05, "loss": 0.1262, "step": 20850 }, { "epoch": 0.92, "learning_rate": 3.486492915656919e-05, "loss": 0.1297, "step": 20860 }, { "epoch": 0.92, "learning_rate": 3.485749542825709e-05, "loss": 0.2014, "step": 20870 }, { "epoch": 0.92, "learning_rate": 3.485006169994499e-05, "loss": 0.2317, "step": 20880 }, { "epoch": 0.92, "learning_rate": 3.4842627971632894e-05, "loss": 0.1606, "step": 20890 }, { "epoch": 0.93, "learning_rate": 3.48351942433208e-05, "loss": 0.1695, "step": 20900 }, { "epoch": 0.93, "learning_rate": 3.48277605150087e-05, "loss": 0.1627, "step": 20910 }, { "epoch": 0.93, "learning_rate": 3.48203267866966e-05, "loss": 0.1828, "step": 20920 }, { "epoch": 0.93, "learning_rate": 3.4812893058384505e-05, "loss": 0.1814, "step": 20930 }, { "epoch": 0.93, "learning_rate": 3.480545933007241e-05, "loss": 0.1273, "step": 20940 }, { "epoch": 0.93, "learning_rate": 3.479802560176031e-05, "loss": 0.0842, "step": 20950 }, { "epoch": 0.93, "learning_rate": 3.4790591873448206e-05, "loss": 0.1921, "step": 20960 }, { "epoch": 0.93, "learning_rate": 3.4783158145136115e-05, "loss": 0.2187, "step": 20970 }, { "epoch": 0.93, "learning_rate": 3.477572441682402e-05, "loss": 0.151, "step": 20980 }, { "epoch": 0.93, "learning_rate": 3.476829068851192e-05, "loss": 0.1668, "step": 20990 }, { "epoch": 0.93, "learning_rate": 3.476085696019982e-05, "loss": 0.1204, "step": 21000 }, { "epoch": 0.93, "learning_rate": 3.475342323188772e-05, "loss": 0.1847, "step": 21010 }, { "epoch": 0.93, "learning_rate": 3.474598950357563e-05, "loss": 0.2097, "step": 21020 }, { "epoch": 0.93, "learning_rate": 3.4738555775263524e-05, "loss": 0.2032, "step": 21030 }, { "epoch": 0.93, "learning_rate": 3.473112204695143e-05, "loss": 0.1899, "step": 21040 }, { "epoch": 0.93, "learning_rate": 3.472368831863933e-05, "loss": 0.1531, "step": 21050 }, { "epoch": 0.93, "learning_rate": 3.471625459032724e-05, "loss": 0.1445, "step": 21060 }, { "epoch": 0.93, "learning_rate": 3.4708820862015134e-05, "loss": 0.1599, "step": 21070 }, { "epoch": 0.93, "learning_rate": 3.470138713370304e-05, "loss": 0.1504, "step": 21080 }, { "epoch": 0.93, "learning_rate": 3.469395340539094e-05, "loss": 0.2265, "step": 21090 }, { "epoch": 0.93, "learning_rate": 3.468651967707884e-05, "loss": 0.1613, "step": 21100 }, { "epoch": 0.93, "learning_rate": 3.467908594876675e-05, "loss": 0.1736, "step": 21110 }, { "epoch": 0.94, "learning_rate": 3.467165222045465e-05, "loss": 0.2156, "step": 21120 }, { "epoch": 0.94, "learning_rate": 3.466421849214255e-05, "loss": 0.1427, "step": 21130 }, { "epoch": 0.94, "learning_rate": 3.465678476383045e-05, "loss": 0.1633, "step": 21140 }, { "epoch": 0.94, "learning_rate": 3.4649351035518355e-05, "loss": 0.1782, "step": 21150 }, { "epoch": 0.94, "learning_rate": 3.464191730720626e-05, "loss": 0.1281, "step": 21160 }, { "epoch": 0.94, "learning_rate": 3.463448357889416e-05, "loss": 0.1595, "step": 21170 }, { "epoch": 0.94, "learning_rate": 3.462704985058206e-05, "loss": 0.1619, "step": 21180 }, { "epoch": 0.94, "learning_rate": 3.4619616122269965e-05, "loss": 0.1473, "step": 21190 }, { "epoch": 0.94, "learning_rate": 3.461218239395787e-05, "loss": 0.1755, "step": 21200 }, { "epoch": 0.94, "learning_rate": 3.460474866564577e-05, "loss": 0.1827, "step": 21210 }, { "epoch": 0.94, "learning_rate": 3.459731493733367e-05, "loss": 0.1749, "step": 21220 }, { "epoch": 0.94, "learning_rate": 3.4589881209021575e-05, "loss": 0.1945, "step": 21230 }, { "epoch": 0.94, "learning_rate": 3.458244748070948e-05, "loss": 0.1492, "step": 21240 }, { "epoch": 0.94, "learning_rate": 3.457501375239738e-05, "loss": 0.1474, "step": 21250 }, { "epoch": 0.94, "learning_rate": 3.456758002408528e-05, "loss": 0.1614, "step": 21260 }, { "epoch": 0.94, "learning_rate": 3.4560146295773186e-05, "loss": 0.2015, "step": 21270 }, { "epoch": 0.94, "learning_rate": 3.455271256746109e-05, "loss": 0.1238, "step": 21280 }, { "epoch": 0.94, "learning_rate": 3.4545278839148984e-05, "loss": 0.1438, "step": 21290 }, { "epoch": 0.94, "learning_rate": 3.4537845110836893e-05, "loss": 0.2032, "step": 21300 }, { "epoch": 0.94, "learning_rate": 3.453041138252479e-05, "loss": 0.1242, "step": 21310 }, { "epoch": 0.94, "learning_rate": 3.45229776542127e-05, "loss": 0.171, "step": 21320 }, { "epoch": 0.94, "learning_rate": 3.4515543925900594e-05, "loss": 0.1792, "step": 21330 }, { "epoch": 0.94, "learning_rate": 3.45081101975885e-05, "loss": 0.1499, "step": 21340 }, { "epoch": 0.95, "learning_rate": 3.4500676469276406e-05, "loss": 0.1802, "step": 21350 }, { "epoch": 0.95, "learning_rate": 3.44932427409643e-05, "loss": 0.1895, "step": 21360 }, { "epoch": 0.95, "learning_rate": 3.448580901265221e-05, "loss": 0.1448, "step": 21370 }, { "epoch": 0.95, "learning_rate": 3.447837528434011e-05, "loss": 0.1985, "step": 21380 }, { "epoch": 0.95, "learning_rate": 3.4470941556028017e-05, "loss": 0.1352, "step": 21390 }, { "epoch": 0.95, "learning_rate": 3.446350782771591e-05, "loss": 0.1313, "step": 21400 }, { "epoch": 0.95, "learning_rate": 3.4456074099403815e-05, "loss": 0.1684, "step": 21410 }, { "epoch": 0.95, "learning_rate": 3.444864037109172e-05, "loss": 0.1457, "step": 21420 }, { "epoch": 0.95, "learning_rate": 3.444120664277962e-05, "loss": 0.1242, "step": 21430 }, { "epoch": 0.95, "learning_rate": 3.443377291446752e-05, "loss": 0.1113, "step": 21440 }, { "epoch": 0.95, "learning_rate": 3.4426339186155425e-05, "loss": 0.1525, "step": 21450 }, { "epoch": 0.95, "learning_rate": 3.441890545784333e-05, "loss": 0.1954, "step": 21460 }, { "epoch": 0.95, "learning_rate": 3.441147172953123e-05, "loss": 0.184, "step": 21470 }, { "epoch": 0.95, "learning_rate": 3.440403800121913e-05, "loss": 0.1395, "step": 21480 }, { "epoch": 0.95, "learning_rate": 3.4396604272907036e-05, "loss": 0.2079, "step": 21490 }, { "epoch": 0.95, "learning_rate": 3.438917054459494e-05, "loss": 0.1861, "step": 21500 }, { "epoch": 0.95, "learning_rate": 3.438173681628284e-05, "loss": 0.2205, "step": 21510 }, { "epoch": 0.95, "learning_rate": 3.437430308797074e-05, "loss": 0.116, "step": 21520 }, { "epoch": 0.95, "learning_rate": 3.4366869359658646e-05, "loss": 0.1297, "step": 21530 }, { "epoch": 0.95, "learning_rate": 3.435943563134655e-05, "loss": 0.1878, "step": 21540 }, { "epoch": 0.95, "learning_rate": 3.4352001903034444e-05, "loss": 0.1394, "step": 21550 }, { "epoch": 0.95, "learning_rate": 3.4344568174722354e-05, "loss": 0.1725, "step": 21560 }, { "epoch": 0.95, "learning_rate": 3.433713444641025e-05, "loss": 0.1385, "step": 21570 }, { "epoch": 0.96, "learning_rate": 3.432970071809816e-05, "loss": 0.2084, "step": 21580 }, { "epoch": 0.96, "learning_rate": 3.432226698978606e-05, "loss": 0.1816, "step": 21590 }, { "epoch": 0.96, "learning_rate": 3.4314833261473964e-05, "loss": 0.1887, "step": 21600 }, { "epoch": 0.96, "learning_rate": 3.4307399533161867e-05, "loss": 0.1795, "step": 21610 }, { "epoch": 0.96, "learning_rate": 3.429996580484976e-05, "loss": 0.1507, "step": 21620 }, { "epoch": 0.96, "learning_rate": 3.429253207653767e-05, "loss": 0.1552, "step": 21630 }, { "epoch": 0.96, "learning_rate": 3.428509834822557e-05, "loss": 0.1683, "step": 21640 }, { "epoch": 0.96, "learning_rate": 3.427766461991348e-05, "loss": 0.1727, "step": 21650 }, { "epoch": 0.96, "learning_rate": 3.427023089160137e-05, "loss": 0.1811, "step": 21660 }, { "epoch": 0.96, "learning_rate": 3.4262797163289275e-05, "loss": 0.1742, "step": 21670 }, { "epoch": 0.96, "learning_rate": 3.425536343497718e-05, "loss": 0.1658, "step": 21680 }, { "epoch": 0.96, "learning_rate": 3.424792970666508e-05, "loss": 0.1591, "step": 21690 }, { "epoch": 0.96, "learning_rate": 3.424049597835298e-05, "loss": 0.2078, "step": 21700 }, { "epoch": 0.96, "learning_rate": 3.4233062250040886e-05, "loss": 0.1816, "step": 21710 }, { "epoch": 0.96, "learning_rate": 3.4225628521728795e-05, "loss": 0.1703, "step": 21720 }, { "epoch": 0.96, "learning_rate": 3.421819479341669e-05, "loss": 0.1697, "step": 21730 }, { "epoch": 0.96, "learning_rate": 3.421076106510459e-05, "loss": 0.1244, "step": 21740 }, { "epoch": 0.96, "learning_rate": 3.4203327336792496e-05, "loss": 0.1399, "step": 21750 }, { "epoch": 0.96, "learning_rate": 3.41958936084804e-05, "loss": 0.1678, "step": 21760 }, { "epoch": 0.96, "learning_rate": 3.41884598801683e-05, "loss": 0.1592, "step": 21770 }, { "epoch": 0.96, "learning_rate": 3.4181026151856204e-05, "loss": 0.1717, "step": 21780 }, { "epoch": 0.96, "learning_rate": 3.4173592423544106e-05, "loss": 0.2147, "step": 21790 }, { "epoch": 0.97, "learning_rate": 3.416615869523201e-05, "loss": 0.1448, "step": 21800 }, { "epoch": 0.97, "learning_rate": 3.415872496691991e-05, "loss": 0.1589, "step": 21810 }, { "epoch": 0.97, "learning_rate": 3.4151291238607814e-05, "loss": 0.1597, "step": 21820 }, { "epoch": 0.97, "learning_rate": 3.4143857510295717e-05, "loss": 0.211, "step": 21830 }, { "epoch": 0.97, "learning_rate": 3.413642378198362e-05, "loss": 0.1704, "step": 21840 }, { "epoch": 0.97, "learning_rate": 3.412899005367152e-05, "loss": 0.1576, "step": 21850 }, { "epoch": 0.97, "learning_rate": 3.4121556325359424e-05, "loss": 0.1723, "step": 21860 }, { "epoch": 0.97, "learning_rate": 3.411412259704733e-05, "loss": 0.2127, "step": 21870 }, { "epoch": 0.97, "learning_rate": 3.410668886873522e-05, "loss": 0.1752, "step": 21880 }, { "epoch": 0.97, "learning_rate": 3.409925514042313e-05, "loss": 0.1701, "step": 21890 }, { "epoch": 0.97, "learning_rate": 3.409182141211103e-05, "loss": 0.1552, "step": 21900 }, { "epoch": 0.97, "learning_rate": 3.408438768379894e-05, "loss": 0.2003, "step": 21910 }, { "epoch": 0.97, "learning_rate": 3.407695395548683e-05, "loss": 0.2109, "step": 21920 }, { "epoch": 0.97, "learning_rate": 3.406952022717474e-05, "loss": 0.1614, "step": 21930 }, { "epoch": 0.97, "learning_rate": 3.406208649886264e-05, "loss": 0.1553, "step": 21940 }, { "epoch": 0.97, "learning_rate": 3.405465277055054e-05, "loss": 0.168, "step": 21950 }, { "epoch": 0.97, "learning_rate": 3.404721904223845e-05, "loss": 0.1581, "step": 21960 }, { "epoch": 0.97, "learning_rate": 3.4039785313926346e-05, "loss": 0.1623, "step": 21970 }, { "epoch": 0.97, "learning_rate": 3.4032351585614255e-05, "loss": 0.1439, "step": 21980 }, { "epoch": 0.97, "learning_rate": 3.402491785730215e-05, "loss": 0.1899, "step": 21990 }, { "epoch": 0.97, "learning_rate": 3.4017484128990054e-05, "loss": 0.2052, "step": 22000 }, { "epoch": 0.97, "learning_rate": 3.4010050400677956e-05, "loss": 0.2351, "step": 22010 }, { "epoch": 0.97, "learning_rate": 3.400261667236586e-05, "loss": 0.1407, "step": 22020 }, { "epoch": 0.98, "learning_rate": 3.399518294405376e-05, "loss": 0.115, "step": 22030 }, { "epoch": 0.98, "learning_rate": 3.3987749215741664e-05, "loss": 0.2048, "step": 22040 }, { "epoch": 0.98, "learning_rate": 3.3980315487429567e-05, "loss": 0.1572, "step": 22050 }, { "epoch": 0.98, "learning_rate": 3.397288175911747e-05, "loss": 0.1256, "step": 22060 }, { "epoch": 0.98, "learning_rate": 3.396544803080537e-05, "loss": 0.2119, "step": 22070 }, { "epoch": 0.98, "learning_rate": 3.3958014302493274e-05, "loss": 0.1526, "step": 22080 }, { "epoch": 0.98, "learning_rate": 3.395058057418118e-05, "loss": 0.2002, "step": 22090 }, { "epoch": 0.98, "learning_rate": 3.394314684586908e-05, "loss": 0.1494, "step": 22100 }, { "epoch": 0.98, "learning_rate": 3.393571311755698e-05, "loss": 0.1906, "step": 22110 }, { "epoch": 0.98, "learning_rate": 3.3928279389244885e-05, "loss": 0.2292, "step": 22120 }, { "epoch": 0.98, "learning_rate": 3.392084566093279e-05, "loss": 0.178, "step": 22130 }, { "epoch": 0.98, "learning_rate": 3.391341193262069e-05, "loss": 0.1335, "step": 22140 }, { "epoch": 0.98, "learning_rate": 3.390597820430859e-05, "loss": 0.1546, "step": 22150 }, { "epoch": 0.98, "learning_rate": 3.389854447599649e-05, "loss": 0.2016, "step": 22160 }, { "epoch": 0.98, "learning_rate": 3.38911107476844e-05, "loss": 0.1536, "step": 22170 }, { "epoch": 0.98, "learning_rate": 3.388367701937229e-05, "loss": 0.1137, "step": 22180 }, { "epoch": 0.98, "learning_rate": 3.38762432910602e-05, "loss": 0.1889, "step": 22190 }, { "epoch": 0.98, "learning_rate": 3.3868809562748105e-05, "loss": 0.2044, "step": 22200 }, { "epoch": 0.98, "learning_rate": 3.3861375834436e-05, "loss": 0.2402, "step": 22210 }, { "epoch": 0.98, "learning_rate": 3.385394210612391e-05, "loss": 0.1649, "step": 22220 }, { "epoch": 0.98, "learning_rate": 3.3846508377811806e-05, "loss": 0.209, "step": 22230 }, { "epoch": 0.98, "learning_rate": 3.3839074649499716e-05, "loss": 0.2256, "step": 22240 }, { "epoch": 0.99, "learning_rate": 3.383164092118761e-05, "loss": 0.1695, "step": 22250 }, { "epoch": 0.99, "learning_rate": 3.382420719287552e-05, "loss": 0.1262, "step": 22260 }, { "epoch": 0.99, "learning_rate": 3.3816773464563416e-05, "loss": 0.1726, "step": 22270 }, { "epoch": 0.99, "learning_rate": 3.380933973625132e-05, "loss": 0.1169, "step": 22280 }, { "epoch": 0.99, "learning_rate": 3.380190600793922e-05, "loss": 0.1707, "step": 22290 }, { "epoch": 0.99, "learning_rate": 3.3794472279627124e-05, "loss": 0.1996, "step": 22300 }, { "epoch": 0.99, "learning_rate": 3.378703855131503e-05, "loss": 0.2232, "step": 22310 }, { "epoch": 0.99, "learning_rate": 3.377960482300293e-05, "loss": 0.155, "step": 22320 }, { "epoch": 0.99, "learning_rate": 3.377217109469084e-05, "loss": 0.2103, "step": 22330 }, { "epoch": 0.99, "learning_rate": 3.3764737366378735e-05, "loss": 0.2145, "step": 22340 }, { "epoch": 0.99, "learning_rate": 3.375730363806664e-05, "loss": 0.1908, "step": 22350 }, { "epoch": 0.99, "learning_rate": 3.374986990975454e-05, "loss": 0.1762, "step": 22360 }, { "epoch": 0.99, "learning_rate": 3.374243618144244e-05, "loss": 0.1318, "step": 22370 }, { "epoch": 0.99, "learning_rate": 3.3735002453130345e-05, "loss": 0.1456, "step": 22380 }, { "epoch": 0.99, "learning_rate": 3.372756872481825e-05, "loss": 0.1276, "step": 22390 }, { "epoch": 0.99, "learning_rate": 3.372013499650615e-05, "loss": 0.1678, "step": 22400 }, { "epoch": 0.99, "learning_rate": 3.371270126819405e-05, "loss": 0.1953, "step": 22410 }, { "epoch": 0.99, "learning_rate": 3.370526753988195e-05, "loss": 0.1482, "step": 22420 }, { "epoch": 0.99, "learning_rate": 3.369783381156986e-05, "loss": 0.1558, "step": 22430 }, { "epoch": 0.99, "learning_rate": 3.3690400083257754e-05, "loss": 0.1586, "step": 22440 }, { "epoch": 0.99, "learning_rate": 3.368296635494566e-05, "loss": 0.2083, "step": 22450 }, { "epoch": 0.99, "learning_rate": 3.3675532626633566e-05, "loss": 0.1728, "step": 22460 }, { "epoch": 0.99, "learning_rate": 3.366809889832147e-05, "loss": 0.1348, "step": 22470 }, { "epoch": 1.0, "learning_rate": 3.366066517000937e-05, "loss": 0.1237, "step": 22480 }, { "epoch": 1.0, "learning_rate": 3.3653231441697266e-05, "loss": 0.1692, "step": 22490 }, { "epoch": 1.0, "learning_rate": 3.3645797713385176e-05, "loss": 0.236, "step": 22500 }, { "epoch": 1.0, "learning_rate": 3.363836398507307e-05, "loss": 0.1605, "step": 22510 }, { "epoch": 1.0, "learning_rate": 3.363093025676098e-05, "loss": 0.1674, "step": 22520 }, { "epoch": 1.0, "learning_rate": 3.362349652844888e-05, "loss": 0.1877, "step": 22530 }, { "epoch": 1.0, "learning_rate": 3.3616062800136786e-05, "loss": 0.1729, "step": 22540 }, { "epoch": 1.0, "learning_rate": 3.360862907182468e-05, "loss": 0.1376, "step": 22550 }, { "epoch": 1.0, "learning_rate": 3.3601195343512585e-05, "loss": 0.1879, "step": 22560 }, { "epoch": 1.0, "learning_rate": 3.3593761615200494e-05, "loss": 0.1743, "step": 22570 }, { "epoch": 1.0, "learning_rate": 3.358632788688839e-05, "loss": 0.1603, "step": 22580 }, { "epoch": 1.0, "learning_rate": 3.35788941585763e-05, "loss": 0.1699, "step": 22590 }, { "epoch": 1.0, "learning_rate": 3.3571460430264195e-05, "loss": 0.1376, "step": 22600 }, { "epoch": 1.0, "learning_rate": 3.35640267019521e-05, "loss": 0.1383, "step": 22610 }, { "epoch": 1.0, "learning_rate": 3.355659297364e-05, "loss": 0.1432, "step": 22620 }, { "epoch": 1.0, "learning_rate": 3.35491592453279e-05, "loss": 0.1425, "step": 22630 }, { "epoch": 1.0, "learning_rate": 3.3541725517015805e-05, "loss": 0.2085, "step": 22640 }, { "epoch": 1.0, "learning_rate": 3.353429178870371e-05, "loss": 0.128, "step": 22650 }, { "epoch": 1.0, "learning_rate": 3.352685806039161e-05, "loss": 0.1358, "step": 22660 }, { "epoch": 1.0, "learning_rate": 3.351942433207951e-05, "loss": 0.1251, "step": 22670 }, { "epoch": 1.0, "learning_rate": 3.3511990603767415e-05, "loss": 0.1374, "step": 22680 }, { "epoch": 1.0, "learning_rate": 3.350455687545532e-05, "loss": 0.1689, "step": 22690 }, { "epoch": 1.01, "learning_rate": 3.349712314714322e-05, "loss": 0.1001, "step": 22700 }, { "epoch": 1.01, "learning_rate": 3.348968941883112e-05, "loss": 0.1581, "step": 22710 }, { "epoch": 1.01, "learning_rate": 3.3482255690519026e-05, "loss": 0.1598, "step": 22720 }, { "epoch": 1.01, "learning_rate": 3.347482196220693e-05, "loss": 0.1255, "step": 22730 }, { "epoch": 1.01, "learning_rate": 3.346738823389483e-05, "loss": 0.1412, "step": 22740 }, { "epoch": 1.01, "learning_rate": 3.3459954505582734e-05, "loss": 0.1709, "step": 22750 }, { "epoch": 1.01, "learning_rate": 3.3452520777270636e-05, "loss": 0.1701, "step": 22760 }, { "epoch": 1.01, "learning_rate": 3.344508704895853e-05, "loss": 0.1287, "step": 22770 }, { "epoch": 1.01, "learning_rate": 3.343765332064644e-05, "loss": 0.1491, "step": 22780 }, { "epoch": 1.01, "learning_rate": 3.343021959233434e-05, "loss": 0.1237, "step": 22790 }, { "epoch": 1.01, "learning_rate": 3.3422785864022246e-05, "loss": 0.1617, "step": 22800 }, { "epoch": 1.01, "learning_rate": 3.341535213571014e-05, "loss": 0.1074, "step": 22810 }, { "epoch": 1.01, "learning_rate": 3.3407918407398045e-05, "loss": 0.203, "step": 22820 }, { "epoch": 1.01, "learning_rate": 3.3400484679085954e-05, "loss": 0.1214, "step": 22830 }, { "epoch": 1.01, "learning_rate": 3.339305095077385e-05, "loss": 0.1153, "step": 22840 }, { "epoch": 1.01, "learning_rate": 3.338561722246176e-05, "loss": 0.1511, "step": 22850 }, { "epoch": 1.01, "learning_rate": 3.3378183494149655e-05, "loss": 0.1716, "step": 22860 }, { "epoch": 1.01, "learning_rate": 3.3370749765837564e-05, "loss": 0.1396, "step": 22870 }, { "epoch": 1.01, "learning_rate": 3.336331603752546e-05, "loss": 0.1573, "step": 22880 }, { "epoch": 1.01, "learning_rate": 3.335588230921336e-05, "loss": 0.1733, "step": 22890 }, { "epoch": 1.01, "learning_rate": 3.3348448580901265e-05, "loss": 0.1383, "step": 22900 }, { "epoch": 1.01, "learning_rate": 3.334101485258917e-05, "loss": 0.1917, "step": 22910 }, { "epoch": 1.01, "learning_rate": 3.333358112427707e-05, "loss": 0.1511, "step": 22920 }, { "epoch": 1.02, "learning_rate": 3.332614739596497e-05, "loss": 0.1627, "step": 22930 }, { "epoch": 1.02, "learning_rate": 3.3318713667652876e-05, "loss": 0.2148, "step": 22940 }, { "epoch": 1.02, "learning_rate": 3.331127993934078e-05, "loss": 0.1729, "step": 22950 }, { "epoch": 1.02, "learning_rate": 3.330384621102868e-05, "loss": 0.1765, "step": 22960 }, { "epoch": 1.02, "learning_rate": 3.3296412482716584e-05, "loss": 0.1154, "step": 22970 }, { "epoch": 1.02, "learning_rate": 3.3288978754404486e-05, "loss": 0.1176, "step": 22980 }, { "epoch": 1.02, "learning_rate": 3.328154502609239e-05, "loss": 0.1159, "step": 22990 }, { "epoch": 1.02, "learning_rate": 3.327411129778029e-05, "loss": 0.1193, "step": 23000 }, { "epoch": 1.02, "learning_rate": 3.3266677569468194e-05, "loss": 0.1291, "step": 23010 }, { "epoch": 1.02, "learning_rate": 3.3259243841156096e-05, "loss": 0.2065, "step": 23020 }, { "epoch": 1.02, "learning_rate": 3.325181011284399e-05, "loss": 0.1881, "step": 23030 }, { "epoch": 1.02, "learning_rate": 3.32443763845319e-05, "loss": 0.1307, "step": 23040 }, { "epoch": 1.02, "learning_rate": 3.32369426562198e-05, "loss": 0.1743, "step": 23050 }, { "epoch": 1.02, "learning_rate": 3.322950892790771e-05, "loss": 0.1278, "step": 23060 }, { "epoch": 1.02, "learning_rate": 3.322207519959561e-05, "loss": 0.1428, "step": 23070 }, { "epoch": 1.02, "learning_rate": 3.321464147128351e-05, "loss": 0.1014, "step": 23080 }, { "epoch": 1.02, "learning_rate": 3.3207207742971414e-05, "loss": 0.1822, "step": 23090 }, { "epoch": 1.02, "learning_rate": 3.319977401465931e-05, "loss": 0.148, "step": 23100 }, { "epoch": 1.02, "learning_rate": 3.319234028634722e-05, "loss": 0.1551, "step": 23110 }, { "epoch": 1.02, "learning_rate": 3.3184906558035115e-05, "loss": 0.1173, "step": 23120 }, { "epoch": 1.02, "learning_rate": 3.3177472829723025e-05, "loss": 0.1167, "step": 23130 }, { "epoch": 1.02, "learning_rate": 3.317003910141092e-05, "loss": 0.1758, "step": 23140 }, { "epoch": 1.02, "learning_rate": 3.316260537309882e-05, "loss": 0.1343, "step": 23150 }, { "epoch": 1.03, "learning_rate": 3.3155171644786726e-05, "loss": 0.1508, "step": 23160 }, { "epoch": 1.03, "learning_rate": 3.314773791647463e-05, "loss": 0.1394, "step": 23170 }, { "epoch": 1.03, "learning_rate": 3.314030418816254e-05, "loss": 0.1474, "step": 23180 }, { "epoch": 1.03, "learning_rate": 3.3132870459850433e-05, "loss": 0.1296, "step": 23190 }, { "epoch": 1.03, "learning_rate": 3.312543673153834e-05, "loss": 0.1539, "step": 23200 }, { "epoch": 1.03, "learning_rate": 3.311800300322624e-05, "loss": 0.132, "step": 23210 }, { "epoch": 1.03, "learning_rate": 3.311056927491414e-05, "loss": 0.1272, "step": 23220 }, { "epoch": 1.03, "learning_rate": 3.3103135546602044e-05, "loss": 0.1647, "step": 23230 }, { "epoch": 1.03, "learning_rate": 3.3095701818289946e-05, "loss": 0.2238, "step": 23240 }, { "epoch": 1.03, "learning_rate": 3.308826808997785e-05, "loss": 0.1651, "step": 23250 }, { "epoch": 1.03, "learning_rate": 3.308083436166575e-05, "loss": 0.1112, "step": 23260 }, { "epoch": 1.03, "learning_rate": 3.3073400633353654e-05, "loss": 0.1322, "step": 23270 }, { "epoch": 1.03, "learning_rate": 3.306596690504156e-05, "loss": 0.1587, "step": 23280 }, { "epoch": 1.03, "learning_rate": 3.305853317672946e-05, "loss": 0.2148, "step": 23290 }, { "epoch": 1.03, "learning_rate": 3.305109944841736e-05, "loss": 0.1324, "step": 23300 }, { "epoch": 1.03, "learning_rate": 3.3043665720105264e-05, "loss": 0.1616, "step": 23310 }, { "epoch": 1.03, "learning_rate": 3.303623199179317e-05, "loss": 0.1557, "step": 23320 }, { "epoch": 1.03, "learning_rate": 3.302879826348107e-05, "loss": 0.1168, "step": 23330 }, { "epoch": 1.03, "learning_rate": 3.302136453516897e-05, "loss": 0.1474, "step": 23340 }, { "epoch": 1.03, "learning_rate": 3.3013930806856875e-05, "loss": 0.1347, "step": 23350 }, { "epoch": 1.03, "learning_rate": 3.300649707854477e-05, "loss": 0.1286, "step": 23360 }, { "epoch": 1.03, "learning_rate": 3.299906335023268e-05, "loss": 0.1219, "step": 23370 }, { "epoch": 1.04, "learning_rate": 3.2991629621920576e-05, "loss": 0.1566, "step": 23380 }, { "epoch": 1.04, "learning_rate": 3.2984195893608485e-05, "loss": 0.1498, "step": 23390 }, { "epoch": 1.04, "learning_rate": 3.297676216529638e-05, "loss": 0.1543, "step": 23400 }, { "epoch": 1.04, "learning_rate": 3.296932843698429e-05, "loss": 0.1233, "step": 23410 }, { "epoch": 1.04, "learning_rate": 3.2961894708672186e-05, "loss": 0.169, "step": 23420 }, { "epoch": 1.04, "learning_rate": 3.295446098036009e-05, "loss": 0.1652, "step": 23430 }, { "epoch": 1.04, "learning_rate": 3.2947027252048e-05, "loss": 0.1346, "step": 23440 }, { "epoch": 1.04, "learning_rate": 3.2939593523735894e-05, "loss": 0.1613, "step": 23450 }, { "epoch": 1.04, "learning_rate": 3.29321597954238e-05, "loss": 0.1839, "step": 23460 }, { "epoch": 1.04, "learning_rate": 3.29247260671117e-05, "loss": 0.1417, "step": 23470 }, { "epoch": 1.04, "learning_rate": 3.29172923387996e-05, "loss": 0.1361, "step": 23480 }, { "epoch": 1.04, "learning_rate": 3.2909858610487504e-05, "loss": 0.1399, "step": 23490 }, { "epoch": 1.04, "learning_rate": 3.290242488217541e-05, "loss": 0.1331, "step": 23500 }, { "epoch": 1.04, "learning_rate": 3.289499115386331e-05, "loss": 0.1956, "step": 23510 }, { "epoch": 1.04, "learning_rate": 3.288755742555121e-05, "loss": 0.1762, "step": 23520 }, { "epoch": 1.04, "learning_rate": 3.2880123697239114e-05, "loss": 0.1551, "step": 23530 }, { "epoch": 1.04, "learning_rate": 3.287268996892702e-05, "loss": 0.1248, "step": 23540 }, { "epoch": 1.04, "learning_rate": 3.286525624061492e-05, "loss": 0.1196, "step": 23550 }, { "epoch": 1.04, "learning_rate": 3.285782251230282e-05, "loss": 0.1208, "step": 23560 }, { "epoch": 1.04, "learning_rate": 3.2850388783990725e-05, "loss": 0.1304, "step": 23570 }, { "epoch": 1.04, "learning_rate": 3.284295505567863e-05, "loss": 0.104, "step": 23580 }, { "epoch": 1.04, "learning_rate": 3.283552132736653e-05, "loss": 0.1548, "step": 23590 }, { "epoch": 1.04, "learning_rate": 3.282808759905443e-05, "loss": 0.1361, "step": 23600 }, { "epoch": 1.05, "learning_rate": 3.2820653870742335e-05, "loss": 0.1475, "step": 23610 }, { "epoch": 1.05, "learning_rate": 3.281322014243024e-05, "loss": 0.2199, "step": 23620 }, { "epoch": 1.05, "learning_rate": 3.280578641411814e-05, "loss": 0.1698, "step": 23630 }, { "epoch": 1.05, "learning_rate": 3.2798352685806036e-05, "loss": 0.1853, "step": 23640 }, { "epoch": 1.05, "learning_rate": 3.2790918957493945e-05, "loss": 0.1173, "step": 23650 }, { "epoch": 1.05, "learning_rate": 3.278348522918184e-05, "loss": 0.1535, "step": 23660 }, { "epoch": 1.05, "learning_rate": 3.277605150086975e-05, "loss": 0.1118, "step": 23670 }, { "epoch": 1.05, "learning_rate": 3.276861777255765e-05, "loss": 0.172, "step": 23680 }, { "epoch": 1.05, "learning_rate": 3.276118404424555e-05, "loss": 0.1453, "step": 23690 }, { "epoch": 1.05, "learning_rate": 3.275375031593346e-05, "loss": 0.1752, "step": 23700 }, { "epoch": 1.05, "learning_rate": 3.2746316587621354e-05, "loss": 0.1746, "step": 23710 }, { "epoch": 1.05, "learning_rate": 3.2738882859309263e-05, "loss": 0.1836, "step": 23720 }, { "epoch": 1.05, "learning_rate": 3.273144913099716e-05, "loss": 0.1501, "step": 23730 }, { "epoch": 1.05, "learning_rate": 3.272401540268507e-05, "loss": 0.1986, "step": 23740 }, { "epoch": 1.05, "learning_rate": 3.2716581674372964e-05, "loss": 0.0916, "step": 23750 }, { "epoch": 1.05, "learning_rate": 3.270914794606087e-05, "loss": 0.1378, "step": 23760 }, { "epoch": 1.05, "learning_rate": 3.270171421774877e-05, "loss": 0.1449, "step": 23770 }, { "epoch": 1.05, "learning_rate": 3.269428048943667e-05, "loss": 0.161, "step": 23780 }, { "epoch": 1.05, "learning_rate": 3.2686846761124575e-05, "loss": 0.1384, "step": 23790 }, { "epoch": 1.05, "learning_rate": 3.267941303281248e-05, "loss": 0.1655, "step": 23800 }, { "epoch": 1.05, "learning_rate": 3.267197930450038e-05, "loss": 0.1491, "step": 23810 }, { "epoch": 1.05, "learning_rate": 3.266454557618828e-05, "loss": 0.1399, "step": 23820 }, { "epoch": 1.06, "learning_rate": 3.2657111847876185e-05, "loss": 0.1925, "step": 23830 }, { "epoch": 1.06, "learning_rate": 3.264967811956409e-05, "loss": 0.1632, "step": 23840 }, { "epoch": 1.06, "learning_rate": 3.264224439125199e-05, "loss": 0.1427, "step": 23850 }, { "epoch": 1.06, "learning_rate": 3.263481066293989e-05, "loss": 0.1184, "step": 23860 }, { "epoch": 1.06, "learning_rate": 3.2627376934627795e-05, "loss": 0.1602, "step": 23870 }, { "epoch": 1.06, "learning_rate": 3.26199432063157e-05, "loss": 0.1054, "step": 23880 }, { "epoch": 1.06, "learning_rate": 3.26125094780036e-05, "loss": 0.147, "step": 23890 }, { "epoch": 1.06, "learning_rate": 3.2605075749691496e-05, "loss": 0.1914, "step": 23900 }, { "epoch": 1.06, "learning_rate": 3.2597642021379406e-05, "loss": 0.1365, "step": 23910 }, { "epoch": 1.06, "learning_rate": 3.259020829306731e-05, "loss": 0.1269, "step": 23920 }, { "epoch": 1.06, "learning_rate": 3.258277456475521e-05, "loss": 0.1319, "step": 23930 }, { "epoch": 1.06, "learning_rate": 3.257534083644311e-05, "loss": 0.1844, "step": 23940 }, { "epoch": 1.06, "learning_rate": 3.2567907108131016e-05, "loss": 0.1394, "step": 23950 }, { "epoch": 1.06, "learning_rate": 3.256047337981892e-05, "loss": 0.169, "step": 23960 }, { "epoch": 1.06, "learning_rate": 3.2553039651506814e-05, "loss": 0.2282, "step": 23970 }, { "epoch": 1.06, "learning_rate": 3.2545605923194724e-05, "loss": 0.1273, "step": 23980 }, { "epoch": 1.06, "learning_rate": 3.253817219488262e-05, "loss": 0.133, "step": 23990 }, { "epoch": 1.06, "learning_rate": 3.253073846657053e-05, "loss": 0.1362, "step": 24000 }, { "epoch": 1.06, "learning_rate": 3.2523304738258425e-05, "loss": 0.1593, "step": 24010 }, { "epoch": 1.06, "learning_rate": 3.251587100994633e-05, "loss": 0.1473, "step": 24020 }, { "epoch": 1.06, "learning_rate": 3.250843728163423e-05, "loss": 0.1312, "step": 24030 }, { "epoch": 1.06, "learning_rate": 3.250100355332213e-05, "loss": 0.1152, "step": 24040 }, { "epoch": 1.06, "learning_rate": 3.249356982501004e-05, "loss": 0.1981, "step": 24050 }, { "epoch": 1.07, "learning_rate": 3.248613609669794e-05, "loss": 0.0671, "step": 24060 }, { "epoch": 1.07, "learning_rate": 3.247870236838585e-05, "loss": 0.1673, "step": 24070 }, { "epoch": 1.07, "learning_rate": 3.247126864007374e-05, "loss": 0.1507, "step": 24080 }, { "epoch": 1.07, "learning_rate": 3.2463834911761645e-05, "loss": 0.1663, "step": 24090 }, { "epoch": 1.07, "learning_rate": 3.245640118344955e-05, "loss": 0.1521, "step": 24100 }, { "epoch": 1.07, "learning_rate": 3.244896745513745e-05, "loss": 0.1235, "step": 24110 }, { "epoch": 1.07, "learning_rate": 3.244153372682535e-05, "loss": 0.2276, "step": 24120 }, { "epoch": 1.07, "learning_rate": 3.2434099998513256e-05, "loss": 0.133, "step": 24130 }, { "epoch": 1.07, "learning_rate": 3.242666627020116e-05, "loss": 0.1746, "step": 24140 }, { "epoch": 1.07, "learning_rate": 3.241923254188906e-05, "loss": 0.1128, "step": 24150 }, { "epoch": 1.07, "learning_rate": 3.241179881357696e-05, "loss": 0.1737, "step": 24160 }, { "epoch": 1.07, "learning_rate": 3.2404365085264866e-05, "loss": 0.1741, "step": 24170 }, { "epoch": 1.07, "learning_rate": 3.239693135695277e-05, "loss": 0.1552, "step": 24180 }, { "epoch": 1.07, "learning_rate": 3.238949762864067e-05, "loss": 0.1949, "step": 24190 }, { "epoch": 1.07, "learning_rate": 3.2382063900328574e-05, "loss": 0.1574, "step": 24200 }, { "epoch": 1.07, "learning_rate": 3.2374630172016476e-05, "loss": 0.163, "step": 24210 }, { "epoch": 1.07, "learning_rate": 3.236719644370438e-05, "loss": 0.1161, "step": 24220 }, { "epoch": 1.07, "learning_rate": 3.2359762715392275e-05, "loss": 0.1688, "step": 24230 }, { "epoch": 1.07, "learning_rate": 3.2352328987080184e-05, "loss": 0.1118, "step": 24240 }, { "epoch": 1.07, "learning_rate": 3.234489525876808e-05, "loss": 0.1395, "step": 24250 }, { "epoch": 1.07, "learning_rate": 3.233746153045599e-05, "loss": 0.1708, "step": 24260 }, { "epoch": 1.07, "learning_rate": 3.2330027802143885e-05, "loss": 0.1728, "step": 24270 }, { "epoch": 1.07, "learning_rate": 3.2322594073831794e-05, "loss": 0.1304, "step": 24280 }, { "epoch": 1.08, "learning_rate": 3.23151603455197e-05, "loss": 0.15, "step": 24290 }, { "epoch": 1.08, "learning_rate": 3.230772661720759e-05, "loss": 0.1733, "step": 24300 }, { "epoch": 1.08, "learning_rate": 3.23002928888955e-05, "loss": 0.1568, "step": 24310 }, { "epoch": 1.08, "learning_rate": 3.22928591605834e-05, "loss": 0.132, "step": 24320 }, { "epoch": 1.08, "learning_rate": 3.228542543227131e-05, "loss": 0.1694, "step": 24330 }, { "epoch": 1.08, "learning_rate": 3.22779917039592e-05, "loss": 0.1414, "step": 24340 }, { "epoch": 1.08, "learning_rate": 3.2270557975647106e-05, "loss": 0.1125, "step": 24350 }, { "epoch": 1.08, "learning_rate": 3.226312424733501e-05, "loss": 0.1861, "step": 24360 }, { "epoch": 1.08, "learning_rate": 3.225569051902291e-05, "loss": 0.1123, "step": 24370 }, { "epoch": 1.08, "learning_rate": 3.224825679071081e-05, "loss": 0.2113, "step": 24380 }, { "epoch": 1.08, "learning_rate": 3.2240823062398716e-05, "loss": 0.1249, "step": 24390 }, { "epoch": 1.08, "learning_rate": 3.223338933408662e-05, "loss": 0.1609, "step": 24400 }, { "epoch": 1.08, "learning_rate": 3.222595560577452e-05, "loss": 0.1641, "step": 24410 }, { "epoch": 1.08, "learning_rate": 3.2218521877462424e-05, "loss": 0.1596, "step": 24420 }, { "epoch": 1.08, "learning_rate": 3.2211088149150326e-05, "loss": 0.1085, "step": 24430 }, { "epoch": 1.08, "learning_rate": 3.220365442083823e-05, "loss": 0.1112, "step": 24440 }, { "epoch": 1.08, "learning_rate": 3.219622069252613e-05, "loss": 0.1218, "step": 24450 }, { "epoch": 1.08, "learning_rate": 3.2188786964214034e-05, "loss": 0.1712, "step": 24460 }, { "epoch": 1.08, "learning_rate": 3.2181353235901937e-05, "loss": 0.1661, "step": 24470 }, { "epoch": 1.08, "learning_rate": 3.217391950758984e-05, "loss": 0.175, "step": 24480 }, { "epoch": 1.08, "learning_rate": 3.216648577927774e-05, "loss": 0.1173, "step": 24490 }, { "epoch": 1.08, "learning_rate": 3.2159052050965644e-05, "loss": 0.2439, "step": 24500 }, { "epoch": 1.09, "learning_rate": 3.215161832265354e-05, "loss": 0.1192, "step": 24510 }, { "epoch": 1.09, "learning_rate": 3.214418459434145e-05, "loss": 0.1595, "step": 24520 }, { "epoch": 1.09, "learning_rate": 3.213675086602935e-05, "loss": 0.1647, "step": 24530 }, { "epoch": 1.09, "learning_rate": 3.2129317137717255e-05, "loss": 0.129, "step": 24540 }, { "epoch": 1.09, "learning_rate": 3.212188340940516e-05, "loss": 0.1614, "step": 24550 }, { "epoch": 1.09, "learning_rate": 3.211444968109305e-05, "loss": 0.1036, "step": 24560 }, { "epoch": 1.09, "learning_rate": 3.210701595278096e-05, "loss": 0.1644, "step": 24570 }, { "epoch": 1.09, "learning_rate": 3.209958222446886e-05, "loss": 0.1836, "step": 24580 }, { "epoch": 1.09, "learning_rate": 3.209214849615677e-05, "loss": 0.1606, "step": 24590 }, { "epoch": 1.09, "learning_rate": 3.208471476784466e-05, "loss": 0.1816, "step": 24600 }, { "epoch": 1.09, "learning_rate": 3.207728103953257e-05, "loss": 0.1627, "step": 24610 }, { "epoch": 1.09, "learning_rate": 3.206984731122047e-05, "loss": 0.1452, "step": 24620 }, { "epoch": 1.09, "learning_rate": 3.206241358290837e-05, "loss": 0.1502, "step": 24630 }, { "epoch": 1.09, "learning_rate": 3.2054979854596274e-05, "loss": 0.1295, "step": 24640 }, { "epoch": 1.09, "learning_rate": 3.2047546126284176e-05, "loss": 0.1677, "step": 24650 }, { "epoch": 1.09, "learning_rate": 3.2040112397972086e-05, "loss": 0.1334, "step": 24660 }, { "epoch": 1.09, "learning_rate": 3.203267866965998e-05, "loss": 0.1629, "step": 24670 }, { "epoch": 1.09, "learning_rate": 3.2025244941347884e-05, "loss": 0.1868, "step": 24680 }, { "epoch": 1.09, "learning_rate": 3.2017811213035787e-05, "loss": 0.1583, "step": 24690 }, { "epoch": 1.09, "learning_rate": 3.201037748472369e-05, "loss": 0.1676, "step": 24700 }, { "epoch": 1.09, "learning_rate": 3.200294375641159e-05, "loss": 0.1591, "step": 24710 }, { "epoch": 1.09, "learning_rate": 3.1995510028099494e-05, "loss": 0.167, "step": 24720 }, { "epoch": 1.09, "learning_rate": 3.19880762997874e-05, "loss": 0.1286, "step": 24730 }, { "epoch": 1.1, "learning_rate": 3.19806425714753e-05, "loss": 0.2066, "step": 24740 }, { "epoch": 1.1, "learning_rate": 3.19732088431632e-05, "loss": 0.1604, "step": 24750 }, { "epoch": 1.1, "learning_rate": 3.1965775114851105e-05, "loss": 0.1282, "step": 24760 }, { "epoch": 1.1, "learning_rate": 3.1958341386539e-05, "loss": 0.2033, "step": 24770 }, { "epoch": 1.1, "learning_rate": 3.195090765822691e-05, "loss": 0.1383, "step": 24780 }, { "epoch": 1.1, "learning_rate": 3.194347392991481e-05, "loss": 0.1305, "step": 24790 }, { "epoch": 1.1, "learning_rate": 3.1936040201602715e-05, "loss": 0.1642, "step": 24800 }, { "epoch": 1.1, "learning_rate": 3.192860647329062e-05, "loss": 0.1374, "step": 24810 }, { "epoch": 1.1, "learning_rate": 3.192117274497852e-05, "loss": 0.1119, "step": 24820 }, { "epoch": 1.1, "learning_rate": 3.191373901666642e-05, "loss": 0.1053, "step": 24830 }, { "epoch": 1.1, "learning_rate": 3.190630528835432e-05, "loss": 0.1511, "step": 24840 }, { "epoch": 1.1, "learning_rate": 3.189887156004223e-05, "loss": 0.2131, "step": 24850 }, { "epoch": 1.1, "learning_rate": 3.1891437831730124e-05, "loss": 0.1544, "step": 24860 }, { "epoch": 1.1, "learning_rate": 3.188400410341803e-05, "loss": 0.144, "step": 24870 }, { "epoch": 1.1, "learning_rate": 3.187657037510593e-05, "loss": 0.1595, "step": 24880 }, { "epoch": 1.1, "learning_rate": 3.186913664679383e-05, "loss": 0.1436, "step": 24890 }, { "epoch": 1.1, "learning_rate": 3.186170291848174e-05, "loss": 0.1309, "step": 24900 }, { "epoch": 1.1, "learning_rate": 3.1854269190169636e-05, "loss": 0.1357, "step": 24910 }, { "epoch": 1.1, "learning_rate": 3.1846835461857546e-05, "loss": 0.2131, "step": 24920 }, { "epoch": 1.1, "learning_rate": 3.183940173354544e-05, "loss": 0.1345, "step": 24930 }, { "epoch": 1.1, "learning_rate": 3.183196800523335e-05, "loss": 0.1726, "step": 24940 }, { "epoch": 1.1, "learning_rate": 3.182453427692125e-05, "loss": 0.1369, "step": 24950 }, { "epoch": 1.11, "learning_rate": 3.181710054860915e-05, "loss": 0.1384, "step": 24960 }, { "epoch": 1.11, "learning_rate": 3.180966682029705e-05, "loss": 0.1623, "step": 24970 }, { "epoch": 1.11, "learning_rate": 3.1802233091984955e-05, "loss": 0.1461, "step": 24980 }, { "epoch": 1.11, "learning_rate": 3.179479936367286e-05, "loss": 0.1951, "step": 24990 }, { "epoch": 1.11, "learning_rate": 3.178736563536076e-05, "loss": 0.1171, "step": 25000 }, { "epoch": 1.11, "learning_rate": 3.177993190704866e-05, "loss": 0.1823, "step": 25010 }, { "epoch": 1.11, "learning_rate": 3.1772498178736565e-05, "loss": 0.2157, "step": 25020 }, { "epoch": 1.11, "learning_rate": 3.176506445042447e-05, "loss": 0.1523, "step": 25030 }, { "epoch": 1.11, "learning_rate": 3.175763072211237e-05, "loss": 0.1445, "step": 25040 }, { "epoch": 1.11, "learning_rate": 3.175019699380027e-05, "loss": 0.1368, "step": 25050 }, { "epoch": 1.11, "learning_rate": 3.1742763265488175e-05, "loss": 0.1674, "step": 25060 }, { "epoch": 1.11, "learning_rate": 3.173532953717608e-05, "loss": 0.1121, "step": 25070 }, { "epoch": 1.11, "learning_rate": 3.172789580886398e-05, "loss": 0.1196, "step": 25080 }, { "epoch": 1.11, "learning_rate": 3.172046208055188e-05, "loss": 0.1235, "step": 25090 }, { "epoch": 1.11, "learning_rate": 3.171302835223978e-05, "loss": 0.1588, "step": 25100 }, { "epoch": 1.11, "learning_rate": 3.170559462392769e-05, "loss": 0.1523, "step": 25110 }, { "epoch": 1.11, "learning_rate": 3.1698160895615584e-05, "loss": 0.1803, "step": 25120 }, { "epoch": 1.11, "learning_rate": 3.169072716730349e-05, "loss": 0.1624, "step": 25130 }, { "epoch": 1.11, "learning_rate": 3.1683293438991396e-05, "loss": 0.1342, "step": 25140 }, { "epoch": 1.11, "learning_rate": 3.16758597106793e-05, "loss": 0.2084, "step": 25150 }, { "epoch": 1.11, "learning_rate": 3.16684259823672e-05, "loss": 0.0932, "step": 25160 }, { "epoch": 1.11, "learning_rate": 3.16609922540551e-05, "loss": 0.2047, "step": 25170 }, { "epoch": 1.11, "learning_rate": 3.1653558525743006e-05, "loss": 0.1123, "step": 25180 }, { "epoch": 1.12, "learning_rate": 3.16461247974309e-05, "loss": 0.1248, "step": 25190 }, { "epoch": 1.12, "learning_rate": 3.163869106911881e-05, "loss": 0.1479, "step": 25200 }, { "epoch": 1.12, "learning_rate": 3.163125734080671e-05, "loss": 0.156, "step": 25210 }, { "epoch": 1.12, "learning_rate": 3.162382361249461e-05, "loss": 0.1307, "step": 25220 }, { "epoch": 1.12, "learning_rate": 3.161638988418251e-05, "loss": 0.1466, "step": 25230 }, { "epoch": 1.12, "learning_rate": 3.1608956155870415e-05, "loss": 0.1448, "step": 25240 }, { "epoch": 1.12, "learning_rate": 3.160152242755832e-05, "loss": 0.18, "step": 25250 }, { "epoch": 1.12, "learning_rate": 3.159408869924622e-05, "loss": 0.143, "step": 25260 }, { "epoch": 1.12, "learning_rate": 3.158665497093413e-05, "loss": 0.136, "step": 25270 }, { "epoch": 1.12, "learning_rate": 3.1579221242622025e-05, "loss": 0.1491, "step": 25280 }, { "epoch": 1.12, "learning_rate": 3.157178751430993e-05, "loss": 0.1244, "step": 25290 }, { "epoch": 1.12, "learning_rate": 3.156435378599783e-05, "loss": 0.1316, "step": 25300 }, { "epoch": 1.12, "learning_rate": 3.155692005768573e-05, "loss": 0.1738, "step": 25310 }, { "epoch": 1.12, "learning_rate": 3.1549486329373635e-05, "loss": 0.1544, "step": 25320 }, { "epoch": 1.12, "learning_rate": 3.154205260106154e-05, "loss": 0.1014, "step": 25330 }, { "epoch": 1.12, "learning_rate": 3.153461887274944e-05, "loss": 0.1987, "step": 25340 }, { "epoch": 1.12, "learning_rate": 3.152718514443734e-05, "loss": 0.1533, "step": 25350 }, { "epoch": 1.12, "learning_rate": 3.1519751416125246e-05, "loss": 0.1623, "step": 25360 }, { "epoch": 1.12, "learning_rate": 3.151231768781315e-05, "loss": 0.1189, "step": 25370 }, { "epoch": 1.12, "learning_rate": 3.1504883959501044e-05, "loss": 0.1291, "step": 25380 }, { "epoch": 1.12, "learning_rate": 3.1497450231188954e-05, "loss": 0.1348, "step": 25390 }, { "epoch": 1.12, "learning_rate": 3.1490016502876856e-05, "loss": 0.1387, "step": 25400 }, { "epoch": 1.12, "learning_rate": 3.148258277456476e-05, "loss": 0.1461, "step": 25410 }, { "epoch": 1.13, "learning_rate": 3.147514904625266e-05, "loss": 0.1118, "step": 25420 }, { "epoch": 1.13, "learning_rate": 3.146771531794056e-05, "loss": 0.2198, "step": 25430 }, { "epoch": 1.13, "learning_rate": 3.1460281589628466e-05, "loss": 0.1857, "step": 25440 }, { "epoch": 1.13, "learning_rate": 3.145284786131636e-05, "loss": 0.1874, "step": 25450 }, { "epoch": 1.13, "learning_rate": 3.144541413300427e-05, "loss": 0.1489, "step": 25460 }, { "epoch": 1.13, "learning_rate": 3.143798040469217e-05, "loss": 0.1292, "step": 25470 }, { "epoch": 1.13, "learning_rate": 3.143054667638008e-05, "loss": 0.1188, "step": 25480 }, { "epoch": 1.13, "learning_rate": 3.142311294806797e-05, "loss": 0.1805, "step": 25490 }, { "epoch": 1.13, "learning_rate": 3.1415679219755875e-05, "loss": 0.1584, "step": 25500 }, { "epoch": 1.13, "learning_rate": 3.1408245491443784e-05, "loss": 0.1423, "step": 25510 }, { "epoch": 1.13, "learning_rate": 3.140081176313168e-05, "loss": 0.1378, "step": 25520 }, { "epoch": 1.13, "learning_rate": 3.139337803481959e-05, "loss": 0.1886, "step": 25530 }, { "epoch": 1.13, "learning_rate": 3.1385944306507485e-05, "loss": 0.2109, "step": 25540 }, { "epoch": 1.13, "learning_rate": 3.1378510578195395e-05, "loss": 0.1365, "step": 25550 }, { "epoch": 1.13, "learning_rate": 3.137107684988329e-05, "loss": 0.1306, "step": 25560 }, { "epoch": 1.13, "learning_rate": 3.136364312157119e-05, "loss": 0.1394, "step": 25570 }, { "epoch": 1.13, "learning_rate": 3.1356209393259096e-05, "loss": 0.1392, "step": 25580 }, { "epoch": 1.13, "learning_rate": 3.1348775664947e-05, "loss": 0.2209, "step": 25590 }, { "epoch": 1.13, "learning_rate": 3.13413419366349e-05, "loss": 0.1736, "step": 25600 }, { "epoch": 1.13, "learning_rate": 3.1333908208322804e-05, "loss": 0.1587, "step": 25610 }, { "epoch": 1.13, "learning_rate": 3.1326474480010706e-05, "loss": 0.1613, "step": 25620 }, { "epoch": 1.13, "learning_rate": 3.131904075169861e-05, "loss": 0.1337, "step": 25630 }, { "epoch": 1.14, "learning_rate": 3.131160702338651e-05, "loss": 0.1015, "step": 25640 }, { "epoch": 1.14, "learning_rate": 3.1304173295074414e-05, "loss": 0.1252, "step": 25650 }, { "epoch": 1.14, "learning_rate": 3.1296739566762316e-05, "loss": 0.1589, "step": 25660 }, { "epoch": 1.14, "learning_rate": 3.128930583845022e-05, "loss": 0.1852, "step": 25670 }, { "epoch": 1.14, "learning_rate": 3.128187211013812e-05, "loss": 0.1951, "step": 25680 }, { "epoch": 1.14, "learning_rate": 3.1274438381826024e-05, "loss": 0.1477, "step": 25690 }, { "epoch": 1.14, "learning_rate": 3.126700465351393e-05, "loss": 0.1378, "step": 25700 }, { "epoch": 1.14, "learning_rate": 3.125957092520182e-05, "loss": 0.1217, "step": 25710 }, { "epoch": 1.14, "learning_rate": 3.125213719688973e-05, "loss": 0.1329, "step": 25720 }, { "epoch": 1.14, "learning_rate": 3.124470346857763e-05, "loss": 0.1735, "step": 25730 }, { "epoch": 1.14, "learning_rate": 3.123726974026554e-05, "loss": 0.1482, "step": 25740 }, { "epoch": 1.14, "learning_rate": 3.122983601195343e-05, "loss": 0.1239, "step": 25750 }, { "epoch": 1.14, "learning_rate": 3.1222402283641335e-05, "loss": 0.1642, "step": 25760 }, { "epoch": 1.14, "learning_rate": 3.1214968555329245e-05, "loss": 0.157, "step": 25770 }, { "epoch": 1.14, "learning_rate": 3.120753482701714e-05, "loss": 0.1691, "step": 25780 }, { "epoch": 1.14, "learning_rate": 3.120010109870505e-05, "loss": 0.1303, "step": 25790 }, { "epoch": 1.14, "learning_rate": 3.1192667370392946e-05, "loss": 0.1219, "step": 25800 }, { "epoch": 1.14, "learning_rate": 3.1185233642080855e-05, "loss": 0.0967, "step": 25810 }, { "epoch": 1.14, "learning_rate": 3.117779991376875e-05, "loss": 0.1488, "step": 25820 }, { "epoch": 1.14, "learning_rate": 3.1170366185456653e-05, "loss": 0.1457, "step": 25830 }, { "epoch": 1.14, "learning_rate": 3.1162932457144556e-05, "loss": 0.1801, "step": 25840 }, { "epoch": 1.14, "learning_rate": 3.115549872883246e-05, "loss": 0.172, "step": 25850 }, { "epoch": 1.14, "learning_rate": 3.114806500052036e-05, "loss": 0.1527, "step": 25860 }, { "epoch": 1.15, "learning_rate": 3.1140631272208264e-05, "loss": 0.135, "step": 25870 }, { "epoch": 1.15, "learning_rate": 3.113319754389617e-05, "loss": 0.1721, "step": 25880 }, { "epoch": 1.15, "learning_rate": 3.112576381558407e-05, "loss": 0.0981, "step": 25890 }, { "epoch": 1.15, "learning_rate": 3.111833008727197e-05, "loss": 0.1562, "step": 25900 }, { "epoch": 1.15, "learning_rate": 3.1110896358959874e-05, "loss": 0.1307, "step": 25910 }, { "epoch": 1.15, "learning_rate": 3.110346263064778e-05, "loss": 0.1829, "step": 25920 }, { "epoch": 1.15, "learning_rate": 3.109602890233568e-05, "loss": 0.1611, "step": 25930 }, { "epoch": 1.15, "learning_rate": 3.108859517402358e-05, "loss": 0.1592, "step": 25940 }, { "epoch": 1.15, "learning_rate": 3.1081161445711484e-05, "loss": 0.1375, "step": 25950 }, { "epoch": 1.15, "learning_rate": 3.107372771739939e-05, "loss": 0.151, "step": 25960 }, { "epoch": 1.15, "learning_rate": 3.106629398908728e-05, "loss": 0.1313, "step": 25970 }, { "epoch": 1.15, "learning_rate": 3.105886026077519e-05, "loss": 0.1705, "step": 25980 }, { "epoch": 1.15, "learning_rate": 3.105142653246309e-05, "loss": 0.1262, "step": 25990 }, { "epoch": 1.15, "learning_rate": 3.1043992804151e-05, "loss": 0.1111, "step": 26000 }, { "epoch": 1.15, "learning_rate": 3.10365590758389e-05, "loss": 0.1482, "step": 26010 }, { "epoch": 1.15, "learning_rate": 3.10291253475268e-05, "loss": 0.1333, "step": 26020 }, { "epoch": 1.15, "learning_rate": 3.1021691619214705e-05, "loss": 0.1575, "step": 26030 }, { "epoch": 1.15, "learning_rate": 3.10142578909026e-05, "loss": 0.121, "step": 26040 }, { "epoch": 1.15, "learning_rate": 3.100682416259051e-05, "loss": 0.157, "step": 26050 }, { "epoch": 1.15, "learning_rate": 3.0999390434278406e-05, "loss": 0.2108, "step": 26060 }, { "epoch": 1.15, "learning_rate": 3.0991956705966315e-05, "loss": 0.0916, "step": 26070 }, { "epoch": 1.15, "learning_rate": 3.098452297765421e-05, "loss": 0.1626, "step": 26080 }, { "epoch": 1.16, "learning_rate": 3.097708924934212e-05, "loss": 0.2027, "step": 26090 }, { "epoch": 1.16, "learning_rate": 3.0969655521030016e-05, "loss": 0.1645, "step": 26100 }, { "epoch": 1.16, "learning_rate": 3.096222179271792e-05, "loss": 0.1325, "step": 26110 }, { "epoch": 1.16, "learning_rate": 3.095478806440583e-05, "loss": 0.1503, "step": 26120 }, { "epoch": 1.16, "learning_rate": 3.0947354336093724e-05, "loss": 0.1235, "step": 26130 }, { "epoch": 1.16, "learning_rate": 3.0939920607781633e-05, "loss": 0.1056, "step": 26140 }, { "epoch": 1.16, "learning_rate": 3.093248687946953e-05, "loss": 0.1449, "step": 26150 }, { "epoch": 1.16, "learning_rate": 3.092505315115743e-05, "loss": 0.1191, "step": 26160 }, { "epoch": 1.16, "learning_rate": 3.0917619422845334e-05, "loss": 0.1345, "step": 26170 }, { "epoch": 1.16, "learning_rate": 3.091018569453324e-05, "loss": 0.1162, "step": 26180 }, { "epoch": 1.16, "learning_rate": 3.090275196622114e-05, "loss": 0.2079, "step": 26190 }, { "epoch": 1.16, "learning_rate": 3.089531823790904e-05, "loss": 0.1566, "step": 26200 }, { "epoch": 1.16, "learning_rate": 3.0887884509596945e-05, "loss": 0.1513, "step": 26210 }, { "epoch": 1.16, "learning_rate": 3.088045078128485e-05, "loss": 0.1511, "step": 26220 }, { "epoch": 1.16, "learning_rate": 3.087301705297275e-05, "loss": 0.1455, "step": 26230 }, { "epoch": 1.16, "learning_rate": 3.086558332466065e-05, "loss": 0.1521, "step": 26240 }, { "epoch": 1.16, "learning_rate": 3.0858149596348555e-05, "loss": 0.1975, "step": 26250 }, { "epoch": 1.16, "learning_rate": 3.085071586803646e-05, "loss": 0.1597, "step": 26260 }, { "epoch": 1.16, "learning_rate": 3.084328213972436e-05, "loss": 0.1027, "step": 26270 }, { "epoch": 1.16, "learning_rate": 3.083584841141226e-05, "loss": 0.1847, "step": 26280 }, { "epoch": 1.16, "learning_rate": 3.0828414683100165e-05, "loss": 0.173, "step": 26290 }, { "epoch": 1.16, "learning_rate": 3.082098095478807e-05, "loss": 0.1063, "step": 26300 }, { "epoch": 1.16, "learning_rate": 3.081354722647597e-05, "loss": 0.1332, "step": 26310 }, { "epoch": 1.17, "learning_rate": 3.0806113498163866e-05, "loss": 0.1234, "step": 26320 }, { "epoch": 1.17, "learning_rate": 3.0798679769851776e-05, "loss": 0.1911, "step": 26330 }, { "epoch": 1.17, "learning_rate": 3.079124604153967e-05, "loss": 0.1567, "step": 26340 }, { "epoch": 1.17, "learning_rate": 3.078381231322758e-05, "loss": 0.1398, "step": 26350 }, { "epoch": 1.17, "learning_rate": 3.077637858491548e-05, "loss": 0.1072, "step": 26360 }, { "epoch": 1.17, "learning_rate": 3.076894485660338e-05, "loss": 0.1576, "step": 26370 }, { "epoch": 1.17, "learning_rate": 3.076151112829129e-05, "loss": 0.111, "step": 26380 }, { "epoch": 1.17, "learning_rate": 3.0754077399979184e-05, "loss": 0.1455, "step": 26390 }, { "epoch": 1.17, "learning_rate": 3.0746643671667094e-05, "loss": 0.1453, "step": 26400 }, { "epoch": 1.17, "learning_rate": 3.073920994335499e-05, "loss": 0.1678, "step": 26410 }, { "epoch": 1.17, "learning_rate": 3.07317762150429e-05, "loss": 0.156, "step": 26420 }, { "epoch": 1.17, "learning_rate": 3.0724342486730795e-05, "loss": 0.1894, "step": 26430 }, { "epoch": 1.17, "learning_rate": 3.07169087584187e-05, "loss": 0.1332, "step": 26440 }, { "epoch": 1.17, "learning_rate": 3.07094750301066e-05, "loss": 0.1132, "step": 26450 }, { "epoch": 1.17, "learning_rate": 3.07020413017945e-05, "loss": 0.1977, "step": 26460 }, { "epoch": 1.17, "learning_rate": 3.0694607573482405e-05, "loss": 0.1702, "step": 26470 }, { "epoch": 1.17, "learning_rate": 3.068717384517031e-05, "loss": 0.1322, "step": 26480 }, { "epoch": 1.17, "learning_rate": 3.067974011685821e-05, "loss": 0.1805, "step": 26490 }, { "epoch": 1.17, "learning_rate": 3.067230638854611e-05, "loss": 0.194, "step": 26500 }, { "epoch": 1.17, "learning_rate": 3.0664872660234015e-05, "loss": 0.1519, "step": 26510 }, { "epoch": 1.17, "learning_rate": 3.065743893192192e-05, "loss": 0.1217, "step": 26520 }, { "epoch": 1.17, "learning_rate": 3.065000520360982e-05, "loss": 0.116, "step": 26530 }, { "epoch": 1.18, "learning_rate": 3.064257147529772e-05, "loss": 0.1968, "step": 26540 }, { "epoch": 1.18, "learning_rate": 3.0635137746985626e-05, "loss": 0.1126, "step": 26550 }, { "epoch": 1.18, "learning_rate": 3.062770401867353e-05, "loss": 0.1723, "step": 26560 }, { "epoch": 1.18, "learning_rate": 3.062027029036143e-05, "loss": 0.1471, "step": 26570 }, { "epoch": 1.18, "learning_rate": 3.0612836562049327e-05, "loss": 0.1774, "step": 26580 }, { "epoch": 1.18, "learning_rate": 3.0605402833737236e-05, "loss": 0.1577, "step": 26590 }, { "epoch": 1.18, "learning_rate": 3.059796910542513e-05, "loss": 0.166, "step": 26600 }, { "epoch": 1.18, "learning_rate": 3.059053537711304e-05, "loss": 0.1592, "step": 26610 }, { "epoch": 1.18, "learning_rate": 3.0583101648800944e-05, "loss": 0.0929, "step": 26620 }, { "epoch": 1.18, "learning_rate": 3.0575667920488846e-05, "loss": 0.1382, "step": 26630 }, { "epoch": 1.18, "learning_rate": 3.056823419217675e-05, "loss": 0.2038, "step": 26640 }, { "epoch": 1.18, "learning_rate": 3.0560800463864645e-05, "loss": 0.1305, "step": 26650 }, { "epoch": 1.18, "learning_rate": 3.0553366735552554e-05, "loss": 0.1586, "step": 26660 }, { "epoch": 1.18, "learning_rate": 3.054593300724045e-05, "loss": 0.106, "step": 26670 }, { "epoch": 1.18, "learning_rate": 3.053849927892836e-05, "loss": 0.109, "step": 26680 }, { "epoch": 1.18, "learning_rate": 3.0531065550616255e-05, "loss": 0.1824, "step": 26690 }, { "epoch": 1.18, "learning_rate": 3.052363182230416e-05, "loss": 0.1306, "step": 26700 }, { "epoch": 1.18, "learning_rate": 3.051619809399206e-05, "loss": 0.1674, "step": 26710 }, { "epoch": 1.18, "learning_rate": 3.0508764365679966e-05, "loss": 0.143, "step": 26720 }, { "epoch": 1.18, "learning_rate": 3.0501330637367865e-05, "loss": 0.155, "step": 26730 }, { "epoch": 1.18, "learning_rate": 3.0493896909055768e-05, "loss": 0.1609, "step": 26740 }, { "epoch": 1.18, "learning_rate": 3.0486463180743674e-05, "loss": 0.1644, "step": 26750 }, { "epoch": 1.18, "learning_rate": 3.0479029452431573e-05, "loss": 0.1483, "step": 26760 }, { "epoch": 1.19, "learning_rate": 3.047159572411948e-05, "loss": 0.1883, "step": 26770 }, { "epoch": 1.19, "learning_rate": 3.0464161995807378e-05, "loss": 0.1538, "step": 26780 }, { "epoch": 1.19, "learning_rate": 3.0456728267495284e-05, "loss": 0.1636, "step": 26790 }, { "epoch": 1.19, "learning_rate": 3.0449294539183183e-05, "loss": 0.1363, "step": 26800 }, { "epoch": 1.19, "learning_rate": 3.0441860810871086e-05, "loss": 0.1367, "step": 26810 }, { "epoch": 1.19, "learning_rate": 3.0434427082558985e-05, "loss": 0.1668, "step": 26820 }, { "epoch": 1.19, "learning_rate": 3.042699335424689e-05, "loss": 0.1582, "step": 26830 }, { "epoch": 1.19, "learning_rate": 3.041955962593479e-05, "loss": 0.1252, "step": 26840 }, { "epoch": 1.19, "learning_rate": 3.0412125897622696e-05, "loss": 0.1341, "step": 26850 }, { "epoch": 1.19, "learning_rate": 3.04046921693106e-05, "loss": 0.1595, "step": 26860 }, { "epoch": 1.19, "learning_rate": 3.0397258440998498e-05, "loss": 0.1379, "step": 26870 }, { "epoch": 1.19, "learning_rate": 3.0389824712686404e-05, "loss": 0.1789, "step": 26880 }, { "epoch": 1.19, "learning_rate": 3.0382390984374303e-05, "loss": 0.1406, "step": 26890 }, { "epoch": 1.19, "learning_rate": 3.037495725606221e-05, "loss": 0.1767, "step": 26900 }, { "epoch": 1.19, "learning_rate": 3.036752352775011e-05, "loss": 0.1674, "step": 26910 }, { "epoch": 1.19, "learning_rate": 3.0360089799438014e-05, "loss": 0.1269, "step": 26920 }, { "epoch": 1.19, "learning_rate": 3.0352656071125914e-05, "loss": 0.081, "step": 26930 }, { "epoch": 1.19, "learning_rate": 3.0345222342813816e-05, "loss": 0.1021, "step": 26940 }, { "epoch": 1.19, "learning_rate": 3.0337788614501715e-05, "loss": 0.1906, "step": 26950 }, { "epoch": 1.19, "learning_rate": 3.033035488618962e-05, "loss": 0.1397, "step": 26960 }, { "epoch": 1.19, "learning_rate": 3.032292115787752e-05, "loss": 0.1642, "step": 26970 }, { "epoch": 1.19, "learning_rate": 3.0315487429565426e-05, "loss": 0.1013, "step": 26980 }, { "epoch": 1.19, "learning_rate": 3.030805370125333e-05, "loss": 0.1652, "step": 26990 }, { "epoch": 1.2, "learning_rate": 3.030061997294123e-05, "loss": 0.1017, "step": 27000 }, { "epoch": 1.2, "learning_rate": 3.0293186244629134e-05, "loss": 0.1084, "step": 27010 }, { "epoch": 1.2, "learning_rate": 3.0285752516317033e-05, "loss": 0.1528, "step": 27020 }, { "epoch": 1.2, "learning_rate": 3.027831878800494e-05, "loss": 0.0875, "step": 27030 }, { "epoch": 1.2, "learning_rate": 3.027088505969284e-05, "loss": 0.1535, "step": 27040 }, { "epoch": 1.2, "learning_rate": 3.0263451331380744e-05, "loss": 0.1308, "step": 27050 }, { "epoch": 1.2, "learning_rate": 3.0256017603068644e-05, "loss": 0.1835, "step": 27060 }, { "epoch": 1.2, "learning_rate": 3.0248583874756546e-05, "loss": 0.1402, "step": 27070 }, { "epoch": 1.2, "learning_rate": 3.0241150146444445e-05, "loss": 0.1071, "step": 27080 }, { "epoch": 1.2, "learning_rate": 3.023371641813235e-05, "loss": 0.1814, "step": 27090 }, { "epoch": 1.2, "learning_rate": 3.022628268982025e-05, "loss": 0.1821, "step": 27100 }, { "epoch": 1.2, "learning_rate": 3.0218848961508157e-05, "loss": 0.1474, "step": 27110 }, { "epoch": 1.2, "learning_rate": 3.0211415233196063e-05, "loss": 0.1111, "step": 27120 }, { "epoch": 1.2, "learning_rate": 3.0203981504883962e-05, "loss": 0.1711, "step": 27130 }, { "epoch": 1.2, "learning_rate": 3.0196547776571864e-05, "loss": 0.1052, "step": 27140 }, { "epoch": 1.2, "learning_rate": 3.0189114048259763e-05, "loss": 0.1456, "step": 27150 }, { "epoch": 1.2, "learning_rate": 3.018168031994767e-05, "loss": 0.1591, "step": 27160 }, { "epoch": 1.2, "learning_rate": 3.017424659163557e-05, "loss": 0.124, "step": 27170 }, { "epoch": 1.2, "learning_rate": 3.0166812863323475e-05, "loss": 0.1256, "step": 27180 }, { "epoch": 1.2, "learning_rate": 3.0159379135011374e-05, "loss": 0.1741, "step": 27190 }, { "epoch": 1.2, "learning_rate": 3.0151945406699276e-05, "loss": 0.1483, "step": 27200 }, { "epoch": 1.2, "learning_rate": 3.014451167838718e-05, "loss": 0.1348, "step": 27210 }, { "epoch": 1.21, "learning_rate": 3.013707795007508e-05, "loss": 0.2027, "step": 27220 }, { "epoch": 1.21, "learning_rate": 3.0129644221762987e-05, "loss": 0.1567, "step": 27230 }, { "epoch": 1.21, "learning_rate": 3.0122210493450887e-05, "loss": 0.1261, "step": 27240 }, { "epoch": 1.21, "learning_rate": 3.0114776765138793e-05, "loss": 0.1223, "step": 27250 }, { "epoch": 1.21, "learning_rate": 3.0107343036826692e-05, "loss": 0.1428, "step": 27260 }, { "epoch": 1.21, "learning_rate": 3.0099909308514594e-05, "loss": 0.1256, "step": 27270 }, { "epoch": 1.21, "learning_rate": 3.0092475580202494e-05, "loss": 0.1003, "step": 27280 }, { "epoch": 1.21, "learning_rate": 3.00850418518904e-05, "loss": 0.1259, "step": 27290 }, { "epoch": 1.21, "learning_rate": 3.00776081235783e-05, "loss": 0.1305, "step": 27300 }, { "epoch": 1.21, "learning_rate": 3.0070174395266205e-05, "loss": 0.1147, "step": 27310 }, { "epoch": 1.21, "learning_rate": 3.0062740666954104e-05, "loss": 0.1636, "step": 27320 }, { "epoch": 1.21, "learning_rate": 3.005530693864201e-05, "loss": 0.1557, "step": 27330 }, { "epoch": 1.21, "learning_rate": 3.004787321032991e-05, "loss": 0.1221, "step": 27340 }, { "epoch": 1.21, "learning_rate": 3.004043948201781e-05, "loss": 0.1344, "step": 27350 }, { "epoch": 1.21, "learning_rate": 3.0033005753705718e-05, "loss": 0.138, "step": 27360 }, { "epoch": 1.21, "learning_rate": 3.0025572025393617e-05, "loss": 0.1451, "step": 27370 }, { "epoch": 1.21, "learning_rate": 3.0018138297081523e-05, "loss": 0.1763, "step": 27380 }, { "epoch": 1.21, "learning_rate": 3.0010704568769422e-05, "loss": 0.1535, "step": 27390 }, { "epoch": 1.21, "learning_rate": 3.0003270840457325e-05, "loss": 0.1546, "step": 27400 }, { "epoch": 1.21, "learning_rate": 2.9995837112145224e-05, "loss": 0.1816, "step": 27410 }, { "epoch": 1.21, "learning_rate": 2.998840338383313e-05, "loss": 0.1853, "step": 27420 }, { "epoch": 1.21, "learning_rate": 2.998096965552103e-05, "loss": 0.1627, "step": 27430 }, { "epoch": 1.21, "learning_rate": 2.9973535927208935e-05, "loss": 0.1271, "step": 27440 }, { "epoch": 1.22, "learning_rate": 2.9966102198896834e-05, "loss": 0.1263, "step": 27450 }, { "epoch": 1.22, "learning_rate": 2.995866847058474e-05, "loss": 0.1716, "step": 27460 }, { "epoch": 1.22, "learning_rate": 2.9951234742272643e-05, "loss": 0.1519, "step": 27470 }, { "epoch": 1.22, "learning_rate": 2.9943801013960542e-05, "loss": 0.1317, "step": 27480 }, { "epoch": 1.22, "learning_rate": 2.9936367285648448e-05, "loss": 0.1013, "step": 27490 }, { "epoch": 1.22, "learning_rate": 2.9928933557336347e-05, "loss": 0.2028, "step": 27500 }, { "epoch": 1.22, "learning_rate": 2.9921499829024253e-05, "loss": 0.1973, "step": 27510 }, { "epoch": 1.22, "learning_rate": 2.9914066100712152e-05, "loss": 0.1313, "step": 27520 }, { "epoch": 1.22, "learning_rate": 2.9906632372400055e-05, "loss": 0.1675, "step": 27530 }, { "epoch": 1.22, "learning_rate": 2.9899198644087957e-05, "loss": 0.1857, "step": 27540 }, { "epoch": 1.22, "learning_rate": 2.989176491577586e-05, "loss": 0.1488, "step": 27550 }, { "epoch": 1.22, "learning_rate": 2.988433118746376e-05, "loss": 0.1625, "step": 27560 }, { "epoch": 1.22, "learning_rate": 2.9876897459151665e-05, "loss": 0.1378, "step": 27570 }, { "epoch": 1.22, "learning_rate": 2.9869463730839564e-05, "loss": 0.1336, "step": 27580 }, { "epoch": 1.22, "learning_rate": 2.986203000252747e-05, "loss": 0.1663, "step": 27590 }, { "epoch": 1.22, "learning_rate": 2.9854596274215373e-05, "loss": 0.1614, "step": 27600 }, { "epoch": 1.22, "learning_rate": 2.9847162545903272e-05, "loss": 0.1668, "step": 27610 }, { "epoch": 1.22, "learning_rate": 2.9839728817591178e-05, "loss": 0.1866, "step": 27620 }, { "epoch": 1.22, "learning_rate": 2.9832295089279077e-05, "loss": 0.1175, "step": 27630 }, { "epoch": 1.22, "learning_rate": 2.9824861360966983e-05, "loss": 0.1688, "step": 27640 }, { "epoch": 1.22, "learning_rate": 2.9817427632654882e-05, "loss": 0.1099, "step": 27650 }, { "epoch": 1.22, "learning_rate": 2.9809993904342788e-05, "loss": 0.1259, "step": 27660 }, { "epoch": 1.23, "learning_rate": 2.9802560176030687e-05, "loss": 0.0747, "step": 27670 }, { "epoch": 1.23, "learning_rate": 2.979512644771859e-05, "loss": 0.1823, "step": 27680 }, { "epoch": 1.23, "learning_rate": 2.978769271940649e-05, "loss": 0.0987, "step": 27690 }, { "epoch": 1.23, "learning_rate": 2.9780258991094395e-05, "loss": 0.128, "step": 27700 }, { "epoch": 1.23, "learning_rate": 2.9772825262782294e-05, "loss": 0.1732, "step": 27710 }, { "epoch": 1.23, "learning_rate": 2.97653915344702e-05, "loss": 0.1452, "step": 27720 }, { "epoch": 1.23, "learning_rate": 2.9757957806158103e-05, "loss": 0.1089, "step": 27730 }, { "epoch": 1.23, "learning_rate": 2.9750524077846002e-05, "loss": 0.0966, "step": 27740 }, { "epoch": 1.23, "learning_rate": 2.9743090349533908e-05, "loss": 0.1711, "step": 27750 }, { "epoch": 1.23, "learning_rate": 2.9735656621221807e-05, "loss": 0.1119, "step": 27760 }, { "epoch": 1.23, "learning_rate": 2.9728222892909713e-05, "loss": 0.146, "step": 27770 }, { "epoch": 1.23, "learning_rate": 2.9720789164597612e-05, "loss": 0.108, "step": 27780 }, { "epoch": 1.23, "learning_rate": 2.971335543628552e-05, "loss": 0.1284, "step": 27790 }, { "epoch": 1.23, "learning_rate": 2.9705921707973418e-05, "loss": 0.2195, "step": 27800 }, { "epoch": 1.23, "learning_rate": 2.969848797966132e-05, "loss": 0.1669, "step": 27810 }, { "epoch": 1.23, "learning_rate": 2.969105425134922e-05, "loss": 0.172, "step": 27820 }, { "epoch": 1.23, "learning_rate": 2.9683620523037125e-05, "loss": 0.1462, "step": 27830 }, { "epoch": 1.23, "learning_rate": 2.967618679472503e-05, "loss": 0.1083, "step": 27840 }, { "epoch": 1.23, "learning_rate": 2.966875306641293e-05, "loss": 0.1051, "step": 27850 }, { "epoch": 1.23, "learning_rate": 2.9661319338100833e-05, "loss": 0.1141, "step": 27860 }, { "epoch": 1.23, "learning_rate": 2.9653885609788736e-05, "loss": 0.1023, "step": 27870 }, { "epoch": 1.23, "learning_rate": 2.9646451881476638e-05, "loss": 0.0968, "step": 27880 }, { "epoch": 1.23, "learning_rate": 2.9639018153164537e-05, "loss": 0.1392, "step": 27890 }, { "epoch": 1.24, "learning_rate": 2.9631584424852443e-05, "loss": 0.176, "step": 27900 }, { "epoch": 1.24, "learning_rate": 2.9624150696540343e-05, "loss": 0.1899, "step": 27910 }, { "epoch": 1.24, "learning_rate": 2.961671696822825e-05, "loss": 0.1402, "step": 27920 }, { "epoch": 1.24, "learning_rate": 2.9609283239916148e-05, "loss": 0.1926, "step": 27930 }, { "epoch": 1.24, "learning_rate": 2.960184951160405e-05, "loss": 0.1583, "step": 27940 }, { "epoch": 1.24, "learning_rate": 2.959441578329195e-05, "loss": 0.1736, "step": 27950 }, { "epoch": 1.24, "learning_rate": 2.9586982054979855e-05, "loss": 0.1277, "step": 27960 }, { "epoch": 1.24, "learning_rate": 2.957954832666776e-05, "loss": 0.1751, "step": 27970 }, { "epoch": 1.24, "learning_rate": 2.957211459835566e-05, "loss": 0.1664, "step": 27980 }, { "epoch": 1.24, "learning_rate": 2.9564680870043567e-05, "loss": 0.228, "step": 27990 }, { "epoch": 1.24, "learning_rate": 2.9557247141731466e-05, "loss": 0.1517, "step": 28000 }, { "epoch": 1.24, "learning_rate": 2.954981341341937e-05, "loss": 0.1651, "step": 28010 }, { "epoch": 1.24, "learning_rate": 2.9542379685107268e-05, "loss": 0.1372, "step": 28020 }, { "epoch": 1.24, "learning_rate": 2.9534945956795174e-05, "loss": 0.1618, "step": 28030 }, { "epoch": 1.24, "learning_rate": 2.9527512228483073e-05, "loss": 0.148, "step": 28040 }, { "epoch": 1.24, "learning_rate": 2.952007850017098e-05, "loss": 0.132, "step": 28050 }, { "epoch": 1.24, "learning_rate": 2.9512644771858878e-05, "loss": 0.1459, "step": 28060 }, { "epoch": 1.24, "learning_rate": 2.950521104354678e-05, "loss": 0.1457, "step": 28070 }, { "epoch": 1.24, "learning_rate": 2.9497777315234683e-05, "loss": 0.1133, "step": 28080 }, { "epoch": 1.24, "learning_rate": 2.9490343586922586e-05, "loss": 0.1786, "step": 28090 }, { "epoch": 1.24, "learning_rate": 2.948290985861049e-05, "loss": 0.1394, "step": 28100 }, { "epoch": 1.24, "learning_rate": 2.947547613029839e-05, "loss": 0.1536, "step": 28110 }, { "epoch": 1.24, "learning_rate": 2.9468042401986297e-05, "loss": 0.1471, "step": 28120 }, { "epoch": 1.25, "learning_rate": 2.9460608673674196e-05, "loss": 0.1265, "step": 28130 }, { "epoch": 1.25, "learning_rate": 2.94531749453621e-05, "loss": 0.1522, "step": 28140 }, { "epoch": 1.25, "learning_rate": 2.9445741217049998e-05, "loss": 0.1506, "step": 28150 }, { "epoch": 1.25, "learning_rate": 2.9438307488737904e-05, "loss": 0.1343, "step": 28160 }, { "epoch": 1.25, "learning_rate": 2.9430873760425803e-05, "loss": 0.1578, "step": 28170 }, { "epoch": 1.25, "learning_rate": 2.942344003211371e-05, "loss": 0.2303, "step": 28180 }, { "epoch": 1.25, "learning_rate": 2.9416006303801608e-05, "loss": 0.151, "step": 28190 }, { "epoch": 1.25, "learning_rate": 2.9408572575489514e-05, "loss": 0.1878, "step": 28200 }, { "epoch": 1.25, "learning_rate": 2.9401138847177417e-05, "loss": 0.1804, "step": 28210 }, { "epoch": 1.25, "learning_rate": 2.9393705118865316e-05, "loss": 0.1493, "step": 28220 }, { "epoch": 1.25, "learning_rate": 2.9386271390553222e-05, "loss": 0.1496, "step": 28230 }, { "epoch": 1.25, "learning_rate": 2.937883766224112e-05, "loss": 0.1679, "step": 28240 }, { "epoch": 1.25, "learning_rate": 2.9371403933929027e-05, "loss": 0.1547, "step": 28250 }, { "epoch": 1.25, "learning_rate": 2.9363970205616926e-05, "loss": 0.1191, "step": 28260 }, { "epoch": 1.25, "learning_rate": 2.935653647730483e-05, "loss": 0.1048, "step": 28270 }, { "epoch": 1.25, "learning_rate": 2.9349102748992728e-05, "loss": 0.1921, "step": 28280 }, { "epoch": 1.25, "learning_rate": 2.9341669020680634e-05, "loss": 0.165, "step": 28290 }, { "epoch": 1.25, "learning_rate": 2.9334235292368533e-05, "loss": 0.1804, "step": 28300 }, { "epoch": 1.25, "learning_rate": 2.932680156405644e-05, "loss": 0.1343, "step": 28310 }, { "epoch": 1.25, "learning_rate": 2.9319367835744338e-05, "loss": 0.1738, "step": 28320 }, { "epoch": 1.25, "learning_rate": 2.9311934107432244e-05, "loss": 0.143, "step": 28330 }, { "epoch": 1.25, "learning_rate": 2.9304500379120147e-05, "loss": 0.1312, "step": 28340 }, { "epoch": 1.26, "learning_rate": 2.9297066650808046e-05, "loss": 0.2108, "step": 28350 }, { "epoch": 1.26, "learning_rate": 2.9289632922495952e-05, "loss": 0.1524, "step": 28360 }, { "epoch": 1.26, "learning_rate": 2.928219919418385e-05, "loss": 0.1661, "step": 28370 }, { "epoch": 1.26, "learning_rate": 2.9274765465871757e-05, "loss": 0.0959, "step": 28380 }, { "epoch": 1.26, "learning_rate": 2.9267331737559656e-05, "loss": 0.1514, "step": 28390 }, { "epoch": 1.26, "learning_rate": 2.9259898009247562e-05, "loss": 0.19, "step": 28400 }, { "epoch": 1.26, "learning_rate": 2.925246428093546e-05, "loss": 0.1753, "step": 28410 }, { "epoch": 1.26, "learning_rate": 2.9245030552623364e-05, "loss": 0.114, "step": 28420 }, { "epoch": 1.26, "learning_rate": 2.9237596824311263e-05, "loss": 0.1178, "step": 28430 }, { "epoch": 1.26, "learning_rate": 2.923016309599917e-05, "loss": 0.1249, "step": 28440 }, { "epoch": 1.26, "learning_rate": 2.9222729367687075e-05, "loss": 0.1811, "step": 28450 }, { "epoch": 1.26, "learning_rate": 2.9215295639374974e-05, "loss": 0.2, "step": 28460 }, { "epoch": 1.26, "learning_rate": 2.9207861911062877e-05, "loss": 0.1776, "step": 28470 }, { "epoch": 1.26, "learning_rate": 2.9200428182750776e-05, "loss": 0.1257, "step": 28480 }, { "epoch": 1.26, "learning_rate": 2.9192994454438682e-05, "loss": 0.0987, "step": 28490 }, { "epoch": 1.26, "learning_rate": 2.918556072612658e-05, "loss": 0.1029, "step": 28500 }, { "epoch": 1.26, "learning_rate": 2.9178126997814487e-05, "loss": 0.1136, "step": 28510 }, { "epoch": 1.26, "learning_rate": 2.9170693269502386e-05, "loss": 0.1192, "step": 28520 }, { "epoch": 1.26, "learning_rate": 2.9163259541190292e-05, "loss": 0.159, "step": 28530 }, { "epoch": 1.26, "learning_rate": 2.915582581287819e-05, "loss": 0.1786, "step": 28540 }, { "epoch": 1.26, "learning_rate": 2.9148392084566094e-05, "loss": 0.1563, "step": 28550 }, { "epoch": 1.26, "learning_rate": 2.9140958356253993e-05, "loss": 0.1409, "step": 28560 }, { "epoch": 1.26, "learning_rate": 2.91335246279419e-05, "loss": 0.2141, "step": 28570 }, { "epoch": 1.27, "learning_rate": 2.9126090899629805e-05, "loss": 0.1775, "step": 28580 }, { "epoch": 1.27, "learning_rate": 2.9118657171317704e-05, "loss": 0.0956, "step": 28590 }, { "epoch": 1.27, "learning_rate": 2.9111223443005607e-05, "loss": 0.1255, "step": 28600 }, { "epoch": 1.27, "learning_rate": 2.910378971469351e-05, "loss": 0.127, "step": 28610 }, { "epoch": 1.27, "learning_rate": 2.9096355986381412e-05, "loss": 0.1323, "step": 28620 }, { "epoch": 1.27, "learning_rate": 2.908892225806931e-05, "loss": 0.1948, "step": 28630 }, { "epoch": 1.27, "learning_rate": 2.9081488529757217e-05, "loss": 0.1184, "step": 28640 }, { "epoch": 1.27, "learning_rate": 2.9074054801445117e-05, "loss": 0.1966, "step": 28650 }, { "epoch": 1.27, "learning_rate": 2.9066621073133022e-05, "loss": 0.2312, "step": 28660 }, { "epoch": 1.27, "learning_rate": 2.905918734482092e-05, "loss": 0.1325, "step": 28670 }, { "epoch": 1.27, "learning_rate": 2.9051753616508824e-05, "loss": 0.1338, "step": 28680 }, { "epoch": 1.27, "learning_rate": 2.9044319888196723e-05, "loss": 0.1895, "step": 28690 }, { "epoch": 1.27, "learning_rate": 2.903688615988463e-05, "loss": 0.1534, "step": 28700 }, { "epoch": 1.27, "learning_rate": 2.9029452431572535e-05, "loss": 0.1418, "step": 28710 }, { "epoch": 1.27, "learning_rate": 2.9022018703260435e-05, "loss": 0.1474, "step": 28720 }, { "epoch": 1.27, "learning_rate": 2.901458497494834e-05, "loss": 0.1203, "step": 28730 }, { "epoch": 1.27, "learning_rate": 2.900715124663624e-05, "loss": 0.1611, "step": 28740 }, { "epoch": 1.27, "learning_rate": 2.8999717518324142e-05, "loss": 0.1609, "step": 28750 }, { "epoch": 1.27, "learning_rate": 2.899228379001204e-05, "loss": 0.1956, "step": 28760 }, { "epoch": 1.27, "learning_rate": 2.8984850061699947e-05, "loss": 0.1295, "step": 28770 }, { "epoch": 1.27, "learning_rate": 2.8977416333387847e-05, "loss": 0.1835, "step": 28780 }, { "epoch": 1.27, "learning_rate": 2.8969982605075753e-05, "loss": 0.1553, "step": 28790 }, { "epoch": 1.28, "learning_rate": 2.8962548876763652e-05, "loss": 0.2381, "step": 28800 }, { "epoch": 1.28, "learning_rate": 2.8955115148451554e-05, "loss": 0.1798, "step": 28810 }, { "epoch": 1.28, "learning_rate": 2.894768142013946e-05, "loss": 0.1094, "step": 28820 }, { "epoch": 1.28, "learning_rate": 2.894024769182736e-05, "loss": 0.1467, "step": 28830 }, { "epoch": 1.28, "learning_rate": 2.8932813963515266e-05, "loss": 0.1326, "step": 28840 }, { "epoch": 1.28, "learning_rate": 2.8925380235203165e-05, "loss": 0.1006, "step": 28850 }, { "epoch": 1.28, "learning_rate": 2.891794650689107e-05, "loss": 0.1231, "step": 28860 }, { "epoch": 1.28, "learning_rate": 2.891051277857897e-05, "loss": 0.2157, "step": 28870 }, { "epoch": 1.28, "learning_rate": 2.8903079050266872e-05, "loss": 0.1836, "step": 28880 }, { "epoch": 1.28, "learning_rate": 2.889564532195477e-05, "loss": 0.113, "step": 28890 }, { "epoch": 1.28, "learning_rate": 2.8888211593642678e-05, "loss": 0.1742, "step": 28900 }, { "epoch": 1.28, "learning_rate": 2.8880777865330577e-05, "loss": 0.1752, "step": 28910 }, { "epoch": 1.28, "learning_rate": 2.8873344137018483e-05, "loss": 0.1503, "step": 28920 }, { "epoch": 1.28, "learning_rate": 2.8865910408706382e-05, "loss": 0.1529, "step": 28930 }, { "epoch": 1.28, "learning_rate": 2.8858476680394288e-05, "loss": 0.0983, "step": 28940 }, { "epoch": 1.28, "learning_rate": 2.885104295208219e-05, "loss": 0.1762, "step": 28950 }, { "epoch": 1.28, "learning_rate": 2.884360922377009e-05, "loss": 0.1619, "step": 28960 }, { "epoch": 1.28, "learning_rate": 2.8836175495457996e-05, "loss": 0.133, "step": 28970 }, { "epoch": 1.28, "learning_rate": 2.8828741767145895e-05, "loss": 0.173, "step": 28980 }, { "epoch": 1.28, "learning_rate": 2.88213080388338e-05, "loss": 0.182, "step": 28990 }, { "epoch": 1.28, "learning_rate": 2.88138743105217e-05, "loss": 0.1242, "step": 29000 }, { "epoch": 1.28, "learning_rate": 2.8806440582209603e-05, "loss": 0.1322, "step": 29010 }, { "epoch": 1.28, "learning_rate": 2.8799006853897502e-05, "loss": 0.1656, "step": 29020 }, { "epoch": 1.29, "learning_rate": 2.8791573125585408e-05, "loss": 0.1553, "step": 29030 }, { "epoch": 1.29, "learning_rate": 2.8784139397273307e-05, "loss": 0.1048, "step": 29040 }, { "epoch": 1.29, "learning_rate": 2.8776705668961213e-05, "loss": 0.138, "step": 29050 }, { "epoch": 1.29, "learning_rate": 2.8769271940649112e-05, "loss": 0.1399, "step": 29060 }, { "epoch": 1.29, "learning_rate": 2.8761838212337018e-05, "loss": 0.1251, "step": 29070 }, { "epoch": 1.29, "learning_rate": 2.875440448402492e-05, "loss": 0.1519, "step": 29080 }, { "epoch": 1.29, "learning_rate": 2.874697075571282e-05, "loss": 0.131, "step": 29090 }, { "epoch": 1.29, "learning_rate": 2.8739537027400726e-05, "loss": 0.2176, "step": 29100 }, { "epoch": 1.29, "learning_rate": 2.8732103299088625e-05, "loss": 0.1094, "step": 29110 }, { "epoch": 1.29, "learning_rate": 2.872466957077653e-05, "loss": 0.0992, "step": 29120 }, { "epoch": 1.29, "learning_rate": 2.871723584246443e-05, "loss": 0.1838, "step": 29130 }, { "epoch": 1.29, "learning_rate": 2.8709802114152333e-05, "loss": 0.1372, "step": 29140 }, { "epoch": 1.29, "learning_rate": 2.8702368385840235e-05, "loss": 0.1149, "step": 29150 }, { "epoch": 1.29, "learning_rate": 2.8694934657528138e-05, "loss": 0.1251, "step": 29160 }, { "epoch": 1.29, "learning_rate": 2.8687500929216037e-05, "loss": 0.1378, "step": 29170 }, { "epoch": 1.29, "learning_rate": 2.8680067200903943e-05, "loss": 0.1246, "step": 29180 }, { "epoch": 1.29, "learning_rate": 2.867263347259185e-05, "loss": 0.1309, "step": 29190 }, { "epoch": 1.29, "learning_rate": 2.8665199744279748e-05, "loss": 0.2031, "step": 29200 }, { "epoch": 1.29, "learning_rate": 2.865776601596765e-05, "loss": 0.15, "step": 29210 }, { "epoch": 1.29, "learning_rate": 2.865033228765555e-05, "loss": 0.1698, "step": 29220 }, { "epoch": 1.29, "learning_rate": 2.8642898559343456e-05, "loss": 0.1232, "step": 29230 }, { "epoch": 1.29, "learning_rate": 2.8635464831031355e-05, "loss": 0.1104, "step": 29240 }, { "epoch": 1.29, "learning_rate": 2.862803110271926e-05, "loss": 0.1249, "step": 29250 }, { "epoch": 1.3, "learning_rate": 2.862059737440716e-05, "loss": 0.2162, "step": 29260 }, { "epoch": 1.3, "learning_rate": 2.8613163646095066e-05, "loss": 0.1566, "step": 29270 }, { "epoch": 1.3, "learning_rate": 2.8605729917782965e-05, "loss": 0.1291, "step": 29280 }, { "epoch": 1.3, "learning_rate": 2.8598296189470868e-05, "loss": 0.1239, "step": 29290 }, { "epoch": 1.3, "learning_rate": 2.8590862461158767e-05, "loss": 0.1275, "step": 29300 }, { "epoch": 1.3, "learning_rate": 2.8583428732846673e-05, "loss": 0.1484, "step": 29310 }, { "epoch": 1.3, "learning_rate": 2.857599500453458e-05, "loss": 0.1869, "step": 29320 }, { "epoch": 1.3, "learning_rate": 2.856856127622248e-05, "loss": 0.1464, "step": 29330 }, { "epoch": 1.3, "learning_rate": 2.856112754791038e-05, "loss": 0.1524, "step": 29340 }, { "epoch": 1.3, "learning_rate": 2.855369381959828e-05, "loss": 0.1396, "step": 29350 }, { "epoch": 1.3, "learning_rate": 2.8546260091286186e-05, "loss": 0.1517, "step": 29360 }, { "epoch": 1.3, "learning_rate": 2.8538826362974085e-05, "loss": 0.1361, "step": 29370 }, { "epoch": 1.3, "learning_rate": 2.853139263466199e-05, "loss": 0.1123, "step": 29380 }, { "epoch": 1.3, "learning_rate": 2.852395890634989e-05, "loss": 0.1304, "step": 29390 }, { "epoch": 1.3, "learning_rate": 2.8516525178037796e-05, "loss": 0.1495, "step": 29400 }, { "epoch": 1.3, "learning_rate": 2.8509091449725696e-05, "loss": 0.1318, "step": 29410 }, { "epoch": 1.3, "learning_rate": 2.8501657721413598e-05, "loss": 0.1365, "step": 29420 }, { "epoch": 1.3, "learning_rate": 2.8494223993101504e-05, "loss": 0.1668, "step": 29430 }, { "epoch": 1.3, "learning_rate": 2.8486790264789403e-05, "loss": 0.1423, "step": 29440 }, { "epoch": 1.3, "learning_rate": 2.847935653647731e-05, "loss": 0.1917, "step": 29450 }, { "epoch": 1.3, "learning_rate": 2.847192280816521e-05, "loss": 0.103, "step": 29460 }, { "epoch": 1.3, "learning_rate": 2.846448907985311e-05, "loss": 0.1131, "step": 29470 }, { "epoch": 1.31, "learning_rate": 2.8457055351541014e-05, "loss": 0.1179, "step": 29480 }, { "epoch": 1.31, "learning_rate": 2.8449621623228916e-05, "loss": 0.1601, "step": 29490 }, { "epoch": 1.31, "learning_rate": 2.8442187894916815e-05, "loss": 0.1887, "step": 29500 }, { "epoch": 1.31, "learning_rate": 2.843475416660472e-05, "loss": 0.117, "step": 29510 }, { "epoch": 1.31, "learning_rate": 2.842732043829262e-05, "loss": 0.1479, "step": 29520 }, { "epoch": 1.31, "learning_rate": 2.8419886709980527e-05, "loss": 0.142, "step": 29530 }, { "epoch": 1.31, "learning_rate": 2.8412452981668426e-05, "loss": 0.1185, "step": 29540 }, { "epoch": 1.31, "learning_rate": 2.840501925335633e-05, "loss": 0.1911, "step": 29550 }, { "epoch": 1.31, "learning_rate": 2.8397585525044234e-05, "loss": 0.1494, "step": 29560 }, { "epoch": 1.31, "learning_rate": 2.8390151796732134e-05, "loss": 0.1559, "step": 29570 }, { "epoch": 1.31, "learning_rate": 2.838271806842004e-05, "loss": 0.1829, "step": 29580 }, { "epoch": 1.31, "learning_rate": 2.837528434010794e-05, "loss": 0.145, "step": 29590 }, { "epoch": 1.31, "learning_rate": 2.8367850611795845e-05, "loss": 0.1164, "step": 29600 }, { "epoch": 1.31, "learning_rate": 2.8360416883483744e-05, "loss": 0.1135, "step": 29610 }, { "epoch": 1.31, "learning_rate": 2.8352983155171646e-05, "loss": 0.1336, "step": 29620 }, { "epoch": 1.31, "learning_rate": 2.8345549426859546e-05, "loss": 0.1268, "step": 29630 }, { "epoch": 1.31, "learning_rate": 2.833811569854745e-05, "loss": 0.1778, "step": 29640 }, { "epoch": 1.31, "learning_rate": 2.833068197023535e-05, "loss": 0.1086, "step": 29650 }, { "epoch": 1.31, "learning_rate": 2.8323248241923257e-05, "loss": 0.1658, "step": 29660 }, { "epoch": 1.31, "learning_rate": 2.8315814513611156e-05, "loss": 0.1615, "step": 29670 }, { "epoch": 1.31, "learning_rate": 2.830838078529906e-05, "loss": 0.1557, "step": 29680 }, { "epoch": 1.31, "learning_rate": 2.8300947056986964e-05, "loss": 0.1267, "step": 29690 }, { "epoch": 1.31, "learning_rate": 2.8293513328674864e-05, "loss": 0.1803, "step": 29700 }, { "epoch": 1.32, "learning_rate": 2.828607960036277e-05, "loss": 0.1654, "step": 29710 }, { "epoch": 1.32, "learning_rate": 2.827864587205067e-05, "loss": 0.1137, "step": 29720 }, { "epoch": 1.32, "learning_rate": 2.8271212143738575e-05, "loss": 0.144, "step": 29730 }, { "epoch": 1.32, "learning_rate": 2.8263778415426474e-05, "loss": 0.2144, "step": 29740 }, { "epoch": 1.32, "learning_rate": 2.8256344687114377e-05, "loss": 0.1246, "step": 29750 }, { "epoch": 1.32, "learning_rate": 2.8248910958802276e-05, "loss": 0.1245, "step": 29760 }, { "epoch": 1.32, "learning_rate": 2.8241477230490182e-05, "loss": 0.1687, "step": 29770 }, { "epoch": 1.32, "learning_rate": 2.823404350217808e-05, "loss": 0.1618, "step": 29780 }, { "epoch": 1.32, "learning_rate": 2.8226609773865987e-05, "loss": 0.1597, "step": 29790 }, { "epoch": 1.32, "learning_rate": 2.8219176045553893e-05, "loss": 0.1182, "step": 29800 }, { "epoch": 1.32, "learning_rate": 2.8211742317241792e-05, "loss": 0.0865, "step": 29810 }, { "epoch": 1.32, "learning_rate": 2.8204308588929695e-05, "loss": 0.0989, "step": 29820 }, { "epoch": 1.32, "learning_rate": 2.8196874860617594e-05, "loss": 0.163, "step": 29830 }, { "epoch": 1.32, "learning_rate": 2.81894411323055e-05, "loss": 0.1251, "step": 29840 }, { "epoch": 1.32, "learning_rate": 2.81820074039934e-05, "loss": 0.1823, "step": 29850 }, { "epoch": 1.32, "learning_rate": 2.8174573675681305e-05, "loss": 0.1603, "step": 29860 }, { "epoch": 1.32, "learning_rate": 2.8167139947369204e-05, "loss": 0.1301, "step": 29870 }, { "epoch": 1.32, "learning_rate": 2.8159706219057107e-05, "loss": 0.1654, "step": 29880 }, { "epoch": 1.32, "learning_rate": 2.8152272490745006e-05, "loss": 0.1133, "step": 29890 }, { "epoch": 1.32, "learning_rate": 2.8144838762432912e-05, "loss": 0.1784, "step": 29900 }, { "epoch": 1.32, "learning_rate": 2.813740503412081e-05, "loss": 0.1527, "step": 29910 }, { "epoch": 1.32, "learning_rate": 2.8129971305808717e-05, "loss": 0.1273, "step": 29920 }, { "epoch": 1.33, "learning_rate": 2.8122537577496623e-05, "loss": 0.105, "step": 29930 }, { "epoch": 1.33, "learning_rate": 2.8115103849184522e-05, "loss": 0.1594, "step": 29940 }, { "epoch": 1.33, "learning_rate": 2.8107670120872425e-05, "loss": 0.1126, "step": 29950 }, { "epoch": 1.33, "learning_rate": 2.8100236392560324e-05, "loss": 0.1643, "step": 29960 }, { "epoch": 1.33, "learning_rate": 2.809280266424823e-05, "loss": 0.1141, "step": 29970 }, { "epoch": 1.33, "learning_rate": 2.808536893593613e-05, "loss": 0.1526, "step": 29980 }, { "epoch": 1.33, "learning_rate": 2.8077935207624035e-05, "loss": 0.124, "step": 29990 }, { "epoch": 1.33, "learning_rate": 2.8070501479311934e-05, "loss": 0.1584, "step": 30000 }, { "epoch": 1.33, "learning_rate": 2.806306775099984e-05, "loss": 0.1713, "step": 30010 }, { "epoch": 1.33, "learning_rate": 2.805563402268774e-05, "loss": 0.1751, "step": 30020 }, { "epoch": 1.33, "learning_rate": 2.8048200294375642e-05, "loss": 0.1216, "step": 30030 }, { "epoch": 1.33, "learning_rate": 2.804076656606354e-05, "loss": 0.1417, "step": 30040 }, { "epoch": 1.33, "learning_rate": 2.8033332837751447e-05, "loss": 0.1366, "step": 30050 }, { "epoch": 1.33, "learning_rate": 2.8025899109439353e-05, "loss": 0.1571, "step": 30060 }, { "epoch": 1.33, "learning_rate": 2.8018465381127252e-05, "loss": 0.1377, "step": 30070 }, { "epoch": 1.33, "learning_rate": 2.8011031652815155e-05, "loss": 0.1291, "step": 30080 }, { "epoch": 1.33, "learning_rate": 2.8003597924503054e-05, "loss": 0.1121, "step": 30090 }, { "epoch": 1.33, "learning_rate": 2.799616419619096e-05, "loss": 0.1911, "step": 30100 }, { "epoch": 1.33, "learning_rate": 2.798873046787886e-05, "loss": 0.2248, "step": 30110 }, { "epoch": 1.33, "learning_rate": 2.7981296739566765e-05, "loss": 0.1465, "step": 30120 }, { "epoch": 1.33, "learning_rate": 2.7973863011254664e-05, "loss": 0.1543, "step": 30130 }, { "epoch": 1.33, "learning_rate": 2.796642928294257e-05, "loss": 0.1183, "step": 30140 }, { "epoch": 1.33, "learning_rate": 2.795899555463047e-05, "loss": 0.1452, "step": 30150 }, { "epoch": 1.34, "learning_rate": 2.7951561826318372e-05, "loss": 0.1329, "step": 30160 }, { "epoch": 1.34, "learning_rate": 2.7944128098006278e-05, "loss": 0.1074, "step": 30170 }, { "epoch": 1.34, "learning_rate": 2.7936694369694177e-05, "loss": 0.1919, "step": 30180 }, { "epoch": 1.34, "learning_rate": 2.7929260641382083e-05, "loss": 0.1689, "step": 30190 }, { "epoch": 1.34, "learning_rate": 2.7921826913069982e-05, "loss": 0.2, "step": 30200 }, { "epoch": 1.34, "learning_rate": 2.7914393184757885e-05, "loss": 0.1253, "step": 30210 }, { "epoch": 1.34, "learning_rate": 2.7906959456445784e-05, "loss": 0.1472, "step": 30220 }, { "epoch": 1.34, "learning_rate": 2.789952572813369e-05, "loss": 0.1721, "step": 30230 }, { "epoch": 1.34, "learning_rate": 2.789209199982159e-05, "loss": 0.1381, "step": 30240 }, { "epoch": 1.34, "learning_rate": 2.7884658271509495e-05, "loss": 0.0811, "step": 30250 }, { "epoch": 1.34, "learning_rate": 2.7877224543197395e-05, "loss": 0.125, "step": 30260 }, { "epoch": 1.34, "learning_rate": 2.78697908148853e-05, "loss": 0.1221, "step": 30270 }, { "epoch": 1.34, "learning_rate": 2.78623570865732e-05, "loss": 0.134, "step": 30280 }, { "epoch": 1.34, "learning_rate": 2.7854923358261102e-05, "loss": 0.1521, "step": 30290 }, { "epoch": 1.34, "learning_rate": 2.7847489629949008e-05, "loss": 0.1774, "step": 30300 }, { "epoch": 1.34, "learning_rate": 2.7840055901636907e-05, "loss": 0.1585, "step": 30310 }, { "epoch": 1.34, "learning_rate": 2.7832622173324813e-05, "loss": 0.159, "step": 30320 }, { "epoch": 1.34, "learning_rate": 2.7825188445012713e-05, "loss": 0.1729, "step": 30330 }, { "epoch": 1.34, "learning_rate": 2.781775471670062e-05, "loss": 0.1558, "step": 30340 }, { "epoch": 1.34, "learning_rate": 2.7810320988388518e-05, "loss": 0.0943, "step": 30350 }, { "epoch": 1.34, "learning_rate": 2.780288726007642e-05, "loss": 0.2137, "step": 30360 }, { "epoch": 1.34, "learning_rate": 2.779545353176432e-05, "loss": 0.1246, "step": 30370 }, { "epoch": 1.35, "learning_rate": 2.7788019803452225e-05, "loss": 0.1222, "step": 30380 }, { "epoch": 1.35, "learning_rate": 2.7780586075140125e-05, "loss": 0.2231, "step": 30390 }, { "epoch": 1.35, "learning_rate": 2.777315234682803e-05, "loss": 0.1606, "step": 30400 }, { "epoch": 1.35, "learning_rate": 2.776571861851593e-05, "loss": 0.1054, "step": 30410 }, { "epoch": 1.35, "learning_rate": 2.7758284890203832e-05, "loss": 0.208, "step": 30420 }, { "epoch": 1.35, "learning_rate": 2.775085116189174e-05, "loss": 0.1259, "step": 30430 }, { "epoch": 1.35, "learning_rate": 2.7743417433579638e-05, "loss": 0.1513, "step": 30440 }, { "epoch": 1.35, "learning_rate": 2.7735983705267544e-05, "loss": 0.155, "step": 30450 }, { "epoch": 1.35, "learning_rate": 2.7728549976955443e-05, "loss": 0.1968, "step": 30460 }, { "epoch": 1.35, "learning_rate": 2.772111624864335e-05, "loss": 0.1215, "step": 30470 }, { "epoch": 1.35, "learning_rate": 2.7713682520331248e-05, "loss": 0.1343, "step": 30480 }, { "epoch": 1.35, "learning_rate": 2.770624879201915e-05, "loss": 0.1163, "step": 30490 }, { "epoch": 1.35, "learning_rate": 2.769881506370705e-05, "loss": 0.1898, "step": 30500 }, { "epoch": 1.35, "learning_rate": 2.7691381335394956e-05, "loss": 0.1705, "step": 30510 }, { "epoch": 1.35, "learning_rate": 2.7683947607082855e-05, "loss": 0.1185, "step": 30520 }, { "epoch": 1.35, "learning_rate": 2.767651387877076e-05, "loss": 0.1668, "step": 30530 }, { "epoch": 1.35, "learning_rate": 2.7669080150458663e-05, "loss": 0.1074, "step": 30540 }, { "epoch": 1.35, "learning_rate": 2.7661646422146566e-05, "loss": 0.2155, "step": 30550 }, { "epoch": 1.35, "learning_rate": 2.765421269383447e-05, "loss": 0.123, "step": 30560 }, { "epoch": 1.35, "learning_rate": 2.7646778965522368e-05, "loss": 0.1226, "step": 30570 }, { "epoch": 1.35, "learning_rate": 2.7639345237210274e-05, "loss": 0.1137, "step": 30580 }, { "epoch": 1.35, "learning_rate": 2.7631911508898173e-05, "loss": 0.1528, "step": 30590 }, { "epoch": 1.35, "learning_rate": 2.762447778058608e-05, "loss": 0.1295, "step": 30600 }, { "epoch": 1.36, "learning_rate": 2.7617044052273978e-05, "loss": 0.2394, "step": 30610 }, { "epoch": 1.36, "learning_rate": 2.760961032396188e-05, "loss": 0.1601, "step": 30620 }, { "epoch": 1.36, "learning_rate": 2.760217659564978e-05, "loss": 0.1679, "step": 30630 }, { "epoch": 1.36, "learning_rate": 2.7594742867337686e-05, "loss": 0.1367, "step": 30640 }, { "epoch": 1.36, "learning_rate": 2.7587309139025585e-05, "loss": 0.1729, "step": 30650 }, { "epoch": 1.36, "learning_rate": 2.757987541071349e-05, "loss": 0.2015, "step": 30660 }, { "epoch": 1.36, "learning_rate": 2.7572441682401397e-05, "loss": 0.1668, "step": 30670 }, { "epoch": 1.36, "learning_rate": 2.7565007954089296e-05, "loss": 0.1915, "step": 30680 }, { "epoch": 1.36, "learning_rate": 2.75575742257772e-05, "loss": 0.1512, "step": 30690 }, { "epoch": 1.36, "learning_rate": 2.7550140497465098e-05, "loss": 0.1377, "step": 30700 }, { "epoch": 1.36, "learning_rate": 2.7542706769153004e-05, "loss": 0.1612, "step": 30710 }, { "epoch": 1.36, "learning_rate": 2.7535273040840903e-05, "loss": 0.1691, "step": 30720 }, { "epoch": 1.36, "learning_rate": 2.752783931252881e-05, "loss": 0.1558, "step": 30730 }, { "epoch": 1.36, "learning_rate": 2.7520405584216708e-05, "loss": 0.1721, "step": 30740 }, { "epoch": 1.36, "learning_rate": 2.751297185590461e-05, "loss": 0.1315, "step": 30750 }, { "epoch": 1.36, "learning_rate": 2.7505538127592513e-05, "loss": 0.1414, "step": 30760 }, { "epoch": 1.36, "learning_rate": 2.7498104399280416e-05, "loss": 0.1361, "step": 30770 }, { "epoch": 1.36, "learning_rate": 2.7490670670968322e-05, "loss": 0.1368, "step": 30780 }, { "epoch": 1.36, "learning_rate": 2.748323694265622e-05, "loss": 0.134, "step": 30790 }, { "epoch": 1.36, "learning_rate": 2.7475803214344127e-05, "loss": 0.1212, "step": 30800 }, { "epoch": 1.36, "learning_rate": 2.7468369486032026e-05, "loss": 0.1131, "step": 30810 }, { "epoch": 1.36, "learning_rate": 2.746093575771993e-05, "loss": 0.1663, "step": 30820 }, { "epoch": 1.36, "learning_rate": 2.7453502029407828e-05, "loss": 0.1479, "step": 30830 }, { "epoch": 1.37, "learning_rate": 2.7446068301095734e-05, "loss": 0.1667, "step": 30840 }, { "epoch": 1.37, "learning_rate": 2.7438634572783633e-05, "loss": 0.1346, "step": 30850 }, { "epoch": 1.37, "learning_rate": 2.743120084447154e-05, "loss": 0.1295, "step": 30860 }, { "epoch": 1.37, "learning_rate": 2.742376711615944e-05, "loss": 0.2018, "step": 30870 }, { "epoch": 1.37, "learning_rate": 2.7416333387847344e-05, "loss": 0.099, "step": 30880 }, { "epoch": 1.37, "learning_rate": 2.7408899659535243e-05, "loss": 0.1465, "step": 30890 }, { "epoch": 1.37, "learning_rate": 2.7401465931223146e-05, "loss": 0.1391, "step": 30900 }, { "epoch": 1.37, "learning_rate": 2.7394032202911052e-05, "loss": 0.172, "step": 30910 }, { "epoch": 1.37, "learning_rate": 2.738659847459895e-05, "loss": 0.1546, "step": 30920 }, { "epoch": 1.37, "learning_rate": 2.7379164746286857e-05, "loss": 0.1057, "step": 30930 }, { "epoch": 1.37, "learning_rate": 2.7371731017974756e-05, "loss": 0.1202, "step": 30940 }, { "epoch": 1.37, "learning_rate": 2.736429728966266e-05, "loss": 0.1598, "step": 30950 }, { "epoch": 1.37, "learning_rate": 2.7356863561350558e-05, "loss": 0.1468, "step": 30960 }, { "epoch": 1.37, "learning_rate": 2.7349429833038464e-05, "loss": 0.2259, "step": 30970 }, { "epoch": 1.37, "learning_rate": 2.7341996104726363e-05, "loss": 0.1568, "step": 30980 }, { "epoch": 1.37, "learning_rate": 2.733456237641427e-05, "loss": 0.1786, "step": 30990 }, { "epoch": 1.37, "learning_rate": 2.732712864810217e-05, "loss": 0.1627, "step": 31000 }, { "epoch": 1.37, "learning_rate": 2.7319694919790074e-05, "loss": 0.1561, "step": 31010 }, { "epoch": 1.37, "learning_rate": 2.7312261191477974e-05, "loss": 0.1873, "step": 31020 }, { "epoch": 1.37, "learning_rate": 2.7304827463165876e-05, "loss": 0.2104, "step": 31030 }, { "epoch": 1.37, "learning_rate": 2.7297393734853782e-05, "loss": 0.107, "step": 31040 }, { "epoch": 1.37, "learning_rate": 2.728996000654168e-05, "loss": 0.096, "step": 31050 }, { "epoch": 1.38, "learning_rate": 2.7282526278229587e-05, "loss": 0.196, "step": 31060 }, { "epoch": 1.38, "learning_rate": 2.7275092549917487e-05, "loss": 0.126, "step": 31070 }, { "epoch": 1.38, "learning_rate": 2.726765882160539e-05, "loss": 0.0904, "step": 31080 }, { "epoch": 1.38, "learning_rate": 2.7260225093293292e-05, "loss": 0.1979, "step": 31090 }, { "epoch": 1.38, "learning_rate": 2.7252791364981194e-05, "loss": 0.2095, "step": 31100 }, { "epoch": 1.38, "learning_rate": 2.7245357636669093e-05, "loss": 0.1512, "step": 31110 }, { "epoch": 1.38, "learning_rate": 2.7237923908357e-05, "loss": 0.1583, "step": 31120 }, { "epoch": 1.38, "learning_rate": 2.72304901800449e-05, "loss": 0.1236, "step": 31130 }, { "epoch": 1.38, "learning_rate": 2.7223056451732805e-05, "loss": 0.1425, "step": 31140 }, { "epoch": 1.38, "learning_rate": 2.7215622723420707e-05, "loss": 0.1515, "step": 31150 }, { "epoch": 1.38, "learning_rate": 2.7208188995108606e-05, "loss": 0.1374, "step": 31160 }, { "epoch": 1.38, "learning_rate": 2.7200755266796512e-05, "loss": 0.1323, "step": 31170 }, { "epoch": 1.38, "learning_rate": 2.719332153848441e-05, "loss": 0.1654, "step": 31180 }, { "epoch": 1.38, "learning_rate": 2.7185887810172317e-05, "loss": 0.0881, "step": 31190 }, { "epoch": 1.38, "learning_rate": 2.7178454081860217e-05, "loss": 0.1594, "step": 31200 }, { "epoch": 1.38, "learning_rate": 2.7171020353548123e-05, "loss": 0.147, "step": 31210 }, { "epoch": 1.38, "learning_rate": 2.7163586625236022e-05, "loss": 0.132, "step": 31220 }, { "epoch": 1.38, "learning_rate": 2.7156152896923924e-05, "loss": 0.1484, "step": 31230 }, { "epoch": 1.38, "learning_rate": 2.7148719168611824e-05, "loss": 0.1748, "step": 31240 }, { "epoch": 1.38, "learning_rate": 2.714128544029973e-05, "loss": 0.1244, "step": 31250 }, { "epoch": 1.38, "learning_rate": 2.713385171198763e-05, "loss": 0.201, "step": 31260 }, { "epoch": 1.38, "learning_rate": 2.7126417983675535e-05, "loss": 0.1079, "step": 31270 }, { "epoch": 1.38, "learning_rate": 2.7118984255363437e-05, "loss": 0.1308, "step": 31280 }, { "epoch": 1.39, "learning_rate": 2.7111550527051337e-05, "loss": 0.2069, "step": 31290 }, { "epoch": 1.39, "learning_rate": 2.7104116798739242e-05, "loss": 0.1616, "step": 31300 }, { "epoch": 1.39, "learning_rate": 2.709668307042714e-05, "loss": 0.1753, "step": 31310 }, { "epoch": 1.39, "learning_rate": 2.7089249342115048e-05, "loss": 0.1854, "step": 31320 }, { "epoch": 1.39, "learning_rate": 2.7081815613802947e-05, "loss": 0.139, "step": 31330 }, { "epoch": 1.39, "learning_rate": 2.7074381885490853e-05, "loss": 0.1428, "step": 31340 }, { "epoch": 1.39, "learning_rate": 2.7066948157178752e-05, "loss": 0.1224, "step": 31350 }, { "epoch": 1.39, "learning_rate": 2.7059514428866655e-05, "loss": 0.1472, "step": 31360 }, { "epoch": 1.39, "learning_rate": 2.7052080700554554e-05, "loss": 0.1192, "step": 31370 }, { "epoch": 1.39, "learning_rate": 2.704464697224246e-05, "loss": 0.1153, "step": 31380 }, { "epoch": 1.39, "learning_rate": 2.703721324393036e-05, "loss": 0.135, "step": 31390 }, { "epoch": 1.39, "learning_rate": 2.7029779515618265e-05, "loss": 0.1333, "step": 31400 }, { "epoch": 1.39, "learning_rate": 2.702234578730617e-05, "loss": 0.1187, "step": 31410 }, { "epoch": 1.39, "learning_rate": 2.701491205899407e-05, "loss": 0.1685, "step": 31420 }, { "epoch": 1.39, "learning_rate": 2.7007478330681973e-05, "loss": 0.1364, "step": 31430 }, { "epoch": 1.39, "learning_rate": 2.7000044602369872e-05, "loss": 0.1539, "step": 31440 }, { "epoch": 1.39, "learning_rate": 2.6992610874057778e-05, "loss": 0.1257, "step": 31450 }, { "epoch": 1.39, "learning_rate": 2.6985177145745677e-05, "loss": 0.1427, "step": 31460 }, { "epoch": 1.39, "learning_rate": 2.6977743417433583e-05, "loss": 0.1284, "step": 31470 }, { "epoch": 1.39, "learning_rate": 2.6970309689121482e-05, "loss": 0.1433, "step": 31480 }, { "epoch": 1.39, "learning_rate": 2.6962875960809385e-05, "loss": 0.0759, "step": 31490 }, { "epoch": 1.39, "learning_rate": 2.6955442232497284e-05, "loss": 0.1825, "step": 31500 }, { "epoch": 1.4, "learning_rate": 2.694800850418519e-05, "loss": 0.1767, "step": 31510 }, { "epoch": 1.4, "learning_rate": 2.6940574775873096e-05, "loss": 0.0977, "step": 31520 }, { "epoch": 1.4, "learning_rate": 2.6933141047560995e-05, "loss": 0.1597, "step": 31530 }, { "epoch": 1.4, "learning_rate": 2.69257073192489e-05, "loss": 0.1223, "step": 31540 }, { "epoch": 1.4, "learning_rate": 2.69182735909368e-05, "loss": 0.132, "step": 31550 }, { "epoch": 1.4, "learning_rate": 2.6910839862624703e-05, "loss": 0.1957, "step": 31560 }, { "epoch": 1.4, "learning_rate": 2.6903406134312602e-05, "loss": 0.1571, "step": 31570 }, { "epoch": 1.4, "learning_rate": 2.6895972406000508e-05, "loss": 0.1138, "step": 31580 }, { "epoch": 1.4, "learning_rate": 2.6888538677688407e-05, "loss": 0.2126, "step": 31590 }, { "epoch": 1.4, "learning_rate": 2.6881104949376313e-05, "loss": 0.1669, "step": 31600 }, { "epoch": 1.4, "learning_rate": 2.6873671221064212e-05, "loss": 0.1456, "step": 31610 }, { "epoch": 1.4, "learning_rate": 2.6866237492752115e-05, "loss": 0.1687, "step": 31620 }, { "epoch": 1.4, "learning_rate": 2.6858803764440017e-05, "loss": 0.1388, "step": 31630 }, { "epoch": 1.4, "learning_rate": 2.685137003612792e-05, "loss": 0.1262, "step": 31640 }, { "epoch": 1.4, "learning_rate": 2.6843936307815826e-05, "loss": 0.1057, "step": 31650 }, { "epoch": 1.4, "learning_rate": 2.6836502579503725e-05, "loss": 0.1342, "step": 31660 }, { "epoch": 1.4, "learning_rate": 2.682906885119163e-05, "loss": 0.1888, "step": 31670 }, { "epoch": 1.4, "learning_rate": 2.682163512287953e-05, "loss": 0.1858, "step": 31680 }, { "epoch": 1.4, "learning_rate": 2.6814201394567433e-05, "loss": 0.1068, "step": 31690 }, { "epoch": 1.4, "learning_rate": 2.6806767666255332e-05, "loss": 0.1641, "step": 31700 }, { "epoch": 1.4, "learning_rate": 2.6799333937943238e-05, "loss": 0.1524, "step": 31710 }, { "epoch": 1.4, "learning_rate": 2.6791900209631137e-05, "loss": 0.1182, "step": 31720 }, { "epoch": 1.4, "learning_rate": 2.6784466481319043e-05, "loss": 0.0989, "step": 31730 }, { "epoch": 1.41, "learning_rate": 2.6777032753006942e-05, "loss": 0.1723, "step": 31740 }, { "epoch": 1.41, "learning_rate": 2.676959902469485e-05, "loss": 0.1587, "step": 31750 }, { "epoch": 1.41, "learning_rate": 2.676216529638275e-05, "loss": 0.1278, "step": 31760 }, { "epoch": 1.41, "learning_rate": 2.675473156807065e-05, "loss": 0.1498, "step": 31770 }, { "epoch": 1.41, "learning_rate": 2.6747297839758556e-05, "loss": 0.1702, "step": 31780 }, { "epoch": 1.41, "learning_rate": 2.6739864111446455e-05, "loss": 0.1265, "step": 31790 }, { "epoch": 1.41, "learning_rate": 2.673243038313436e-05, "loss": 0.185, "step": 31800 }, { "epoch": 1.41, "learning_rate": 2.672499665482226e-05, "loss": 0.172, "step": 31810 }, { "epoch": 1.41, "learning_rate": 2.6717562926510163e-05, "loss": 0.1897, "step": 31820 }, { "epoch": 1.41, "learning_rate": 2.6710129198198062e-05, "loss": 0.1689, "step": 31830 }, { "epoch": 1.41, "learning_rate": 2.6702695469885968e-05, "loss": 0.117, "step": 31840 }, { "epoch": 1.41, "learning_rate": 2.6695261741573867e-05, "loss": 0.1188, "step": 31850 }, { "epoch": 1.41, "learning_rate": 2.6687828013261773e-05, "loss": 0.1405, "step": 31860 }, { "epoch": 1.41, "learning_rate": 2.6680394284949673e-05, "loss": 0.1542, "step": 31870 }, { "epoch": 1.41, "learning_rate": 2.667296055663758e-05, "loss": 0.136, "step": 31880 }, { "epoch": 1.41, "learning_rate": 2.666552682832548e-05, "loss": 0.1252, "step": 31890 }, { "epoch": 1.41, "learning_rate": 2.665809310001338e-05, "loss": 0.1295, "step": 31900 }, { "epoch": 1.41, "learning_rate": 2.6650659371701286e-05, "loss": 0.2021, "step": 31910 }, { "epoch": 1.41, "learning_rate": 2.6643225643389185e-05, "loss": 0.1507, "step": 31920 }, { "epoch": 1.41, "learning_rate": 2.663579191507709e-05, "loss": 0.1296, "step": 31930 }, { "epoch": 1.41, "learning_rate": 2.662835818676499e-05, "loss": 0.1321, "step": 31940 }, { "epoch": 1.41, "learning_rate": 2.6620924458452897e-05, "loss": 0.1463, "step": 31950 }, { "epoch": 1.41, "learning_rate": 2.6613490730140796e-05, "loss": 0.1233, "step": 31960 }, { "epoch": 1.42, "learning_rate": 2.66060570018287e-05, "loss": 0.0903, "step": 31970 }, { "epoch": 1.42, "learning_rate": 2.6598623273516598e-05, "loss": 0.132, "step": 31980 }, { "epoch": 1.42, "learning_rate": 2.6591189545204504e-05, "loss": 0.1349, "step": 31990 }, { "epoch": 1.42, "learning_rate": 2.6583755816892403e-05, "loss": 0.1165, "step": 32000 }, { "epoch": 1.42, "learning_rate": 2.657632208858031e-05, "loss": 0.1436, "step": 32010 }, { "epoch": 1.42, "learning_rate": 2.656888836026821e-05, "loss": 0.1835, "step": 32020 }, { "epoch": 1.42, "learning_rate": 2.656145463195611e-05, "loss": 0.1228, "step": 32030 }, { "epoch": 1.42, "learning_rate": 2.6554020903644016e-05, "loss": 0.127, "step": 32040 }, { "epoch": 1.42, "learning_rate": 2.6546587175331916e-05, "loss": 0.1121, "step": 32050 }, { "epoch": 1.42, "learning_rate": 2.653915344701982e-05, "loss": 0.1612, "step": 32060 }, { "epoch": 1.42, "learning_rate": 2.653171971870772e-05, "loss": 0.1406, "step": 32070 }, { "epoch": 1.42, "learning_rate": 2.6524285990395627e-05, "loss": 0.0911, "step": 32080 }, { "epoch": 1.42, "learning_rate": 2.6516852262083526e-05, "loss": 0.1907, "step": 32090 }, { "epoch": 1.42, "learning_rate": 2.650941853377143e-05, "loss": 0.1779, "step": 32100 }, { "epoch": 1.42, "learning_rate": 2.6501984805459328e-05, "loss": 0.1272, "step": 32110 }, { "epoch": 1.42, "learning_rate": 2.6494551077147234e-05, "loss": 0.1619, "step": 32120 }, { "epoch": 1.42, "learning_rate": 2.648711734883514e-05, "loss": 0.1197, "step": 32130 }, { "epoch": 1.42, "learning_rate": 2.647968362052304e-05, "loss": 0.1762, "step": 32140 }, { "epoch": 1.42, "learning_rate": 2.647224989221094e-05, "loss": 0.112, "step": 32150 }, { "epoch": 1.42, "learning_rate": 2.6464816163898844e-05, "loss": 0.1069, "step": 32160 }, { "epoch": 1.42, "learning_rate": 2.6457382435586747e-05, "loss": 0.1581, "step": 32170 }, { "epoch": 1.42, "learning_rate": 2.6449948707274646e-05, "loss": 0.1345, "step": 32180 }, { "epoch": 1.43, "learning_rate": 2.6442514978962552e-05, "loss": 0.1486, "step": 32190 }, { "epoch": 1.43, "learning_rate": 2.643508125065045e-05, "loss": 0.1294, "step": 32200 }, { "epoch": 1.43, "learning_rate": 2.6427647522338357e-05, "loss": 0.1449, "step": 32210 }, { "epoch": 1.43, "learning_rate": 2.6420213794026256e-05, "loss": 0.1295, "step": 32220 }, { "epoch": 1.43, "learning_rate": 2.641278006571416e-05, "loss": 0.1516, "step": 32230 }, { "epoch": 1.43, "learning_rate": 2.6405346337402058e-05, "loss": 0.1954, "step": 32240 }, { "epoch": 1.43, "learning_rate": 2.6397912609089964e-05, "loss": 0.1859, "step": 32250 }, { "epoch": 1.43, "learning_rate": 2.639047888077787e-05, "loss": 0.0849, "step": 32260 }, { "epoch": 1.43, "learning_rate": 2.638304515246577e-05, "loss": 0.1443, "step": 32270 }, { "epoch": 1.43, "learning_rate": 2.6375611424153675e-05, "loss": 0.1381, "step": 32280 }, { "epoch": 1.43, "learning_rate": 2.6368177695841574e-05, "loss": 0.0827, "step": 32290 }, { "epoch": 1.43, "learning_rate": 2.6360743967529477e-05, "loss": 0.1385, "step": 32300 }, { "epoch": 1.43, "learning_rate": 2.6353310239217376e-05, "loss": 0.1884, "step": 32310 }, { "epoch": 1.43, "learning_rate": 2.6345876510905282e-05, "loss": 0.2028, "step": 32320 }, { "epoch": 1.43, "learning_rate": 2.633844278259318e-05, "loss": 0.1123, "step": 32330 }, { "epoch": 1.43, "learning_rate": 2.6331009054281087e-05, "loss": 0.1612, "step": 32340 }, { "epoch": 1.43, "learning_rate": 2.6323575325968986e-05, "loss": 0.1554, "step": 32350 }, { "epoch": 1.43, "learning_rate": 2.631614159765689e-05, "loss": 0.1829, "step": 32360 }, { "epoch": 1.43, "learning_rate": 2.630870786934479e-05, "loss": 0.1388, "step": 32370 }, { "epoch": 1.43, "learning_rate": 2.6301274141032694e-05, "loss": 0.1558, "step": 32380 }, { "epoch": 1.43, "learning_rate": 2.62938404127206e-05, "loss": 0.1305, "step": 32390 }, { "epoch": 1.43, "learning_rate": 2.62864066844085e-05, "loss": 0.1503, "step": 32400 }, { "epoch": 1.43, "learning_rate": 2.6278972956096405e-05, "loss": 0.1699, "step": 32410 }, { "epoch": 1.44, "learning_rate": 2.6271539227784304e-05, "loss": 0.1144, "step": 32420 }, { "epoch": 1.44, "learning_rate": 2.6264105499472207e-05, "loss": 0.172, "step": 32430 }, { "epoch": 1.44, "learning_rate": 2.6256671771160106e-05, "loss": 0.1231, "step": 32440 }, { "epoch": 1.44, "learning_rate": 2.6249238042848012e-05, "loss": 0.1551, "step": 32450 }, { "epoch": 1.44, "learning_rate": 2.624180431453591e-05, "loss": 0.2342, "step": 32460 }, { "epoch": 1.44, "learning_rate": 2.6234370586223817e-05, "loss": 0.1555, "step": 32470 }, { "epoch": 1.44, "learning_rate": 2.6226936857911716e-05, "loss": 0.1759, "step": 32480 }, { "epoch": 1.44, "learning_rate": 2.6219503129599622e-05, "loss": 0.1398, "step": 32490 }, { "epoch": 1.44, "learning_rate": 2.6212069401287525e-05, "loss": 0.1524, "step": 32500 }, { "epoch": 1.44, "learning_rate": 2.6204635672975424e-05, "loss": 0.154, "step": 32510 }, { "epoch": 1.44, "learning_rate": 2.619720194466333e-05, "loss": 0.1239, "step": 32520 }, { "epoch": 1.44, "learning_rate": 2.618976821635123e-05, "loss": 0.1664, "step": 32530 }, { "epoch": 1.44, "learning_rate": 2.6182334488039135e-05, "loss": 0.1718, "step": 32540 }, { "epoch": 1.44, "learning_rate": 2.6174900759727034e-05, "loss": 0.1651, "step": 32550 }, { "epoch": 1.44, "learning_rate": 2.6167467031414937e-05, "loss": 0.1001, "step": 32560 }, { "epoch": 1.44, "learning_rate": 2.6160033303102836e-05, "loss": 0.1402, "step": 32570 }, { "epoch": 1.44, "learning_rate": 2.6152599574790742e-05, "loss": 0.1386, "step": 32580 }, { "epoch": 1.44, "learning_rate": 2.614516584647864e-05, "loss": 0.127, "step": 32590 }, { "epoch": 1.44, "learning_rate": 2.6137732118166547e-05, "loss": 0.1557, "step": 32600 }, { "epoch": 1.44, "learning_rate": 2.6130298389854447e-05, "loss": 0.1206, "step": 32610 }, { "epoch": 1.44, "learning_rate": 2.6122864661542352e-05, "loss": 0.141, "step": 32620 }, { "epoch": 1.44, "learning_rate": 2.6115430933230255e-05, "loss": 0.1605, "step": 32630 }, { "epoch": 1.45, "learning_rate": 2.6107997204918154e-05, "loss": 0.1604, "step": 32640 }, { "epoch": 1.45, "learning_rate": 2.610056347660606e-05, "loss": 0.1313, "step": 32650 }, { "epoch": 1.45, "learning_rate": 2.609312974829396e-05, "loss": 0.1143, "step": 32660 }, { "epoch": 1.45, "learning_rate": 2.6085696019981865e-05, "loss": 0.1441, "step": 32670 }, { "epoch": 1.45, "learning_rate": 2.6078262291669765e-05, "loss": 0.1699, "step": 32680 }, { "epoch": 1.45, "learning_rate": 2.6070828563357667e-05, "loss": 0.2155, "step": 32690 }, { "epoch": 1.45, "learning_rate": 2.606339483504557e-05, "loss": 0.1657, "step": 32700 }, { "epoch": 1.45, "learning_rate": 2.6055961106733472e-05, "loss": 0.1874, "step": 32710 }, { "epoch": 1.45, "learning_rate": 2.604852737842137e-05, "loss": 0.1986, "step": 32720 }, { "epoch": 1.45, "learning_rate": 2.6041093650109277e-05, "loss": 0.1623, "step": 32730 }, { "epoch": 1.45, "learning_rate": 2.6033659921797183e-05, "loss": 0.105, "step": 32740 }, { "epoch": 1.45, "learning_rate": 2.6026226193485083e-05, "loss": 0.1584, "step": 32750 }, { "epoch": 1.45, "learning_rate": 2.6018792465172985e-05, "loss": 0.1187, "step": 32760 }, { "epoch": 1.45, "learning_rate": 2.6011358736860884e-05, "loss": 0.1541, "step": 32770 }, { "epoch": 1.45, "learning_rate": 2.600392500854879e-05, "loss": 0.1306, "step": 32780 }, { "epoch": 1.45, "learning_rate": 2.599649128023669e-05, "loss": 0.169, "step": 32790 }, { "epoch": 1.45, "learning_rate": 2.5989057551924596e-05, "loss": 0.1878, "step": 32800 }, { "epoch": 1.45, "learning_rate": 2.5981623823612495e-05, "loss": 0.1562, "step": 32810 }, { "epoch": 1.45, "learning_rate": 2.59741900953004e-05, "loss": 0.1106, "step": 32820 }, { "epoch": 1.45, "learning_rate": 2.59667563669883e-05, "loss": 0.1595, "step": 32830 }, { "epoch": 1.45, "learning_rate": 2.5959322638676202e-05, "loss": 0.1025, "step": 32840 }, { "epoch": 1.45, "learning_rate": 2.59518889103641e-05, "loss": 0.1374, "step": 32850 }, { "epoch": 1.45, "learning_rate": 2.5944455182052008e-05, "loss": 0.2167, "step": 32860 }, { "epoch": 1.46, "learning_rate": 2.5937021453739914e-05, "loss": 0.1805, "step": 32870 }, { "epoch": 1.46, "learning_rate": 2.5929587725427813e-05, "loss": 0.1531, "step": 32880 }, { "epoch": 1.46, "learning_rate": 2.5922153997115715e-05, "loss": 0.1592, "step": 32890 }, { "epoch": 1.46, "learning_rate": 2.5914720268803615e-05, "loss": 0.1272, "step": 32900 }, { "epoch": 1.46, "learning_rate": 2.590728654049152e-05, "loss": 0.1242, "step": 32910 }, { "epoch": 1.46, "learning_rate": 2.589985281217942e-05, "loss": 0.1646, "step": 32920 }, { "epoch": 1.46, "learning_rate": 2.5892419083867326e-05, "loss": 0.1592, "step": 32930 }, { "epoch": 1.46, "learning_rate": 2.5884985355555225e-05, "loss": 0.1403, "step": 32940 }, { "epoch": 1.46, "learning_rate": 2.587755162724313e-05, "loss": 0.1315, "step": 32950 }, { "epoch": 1.46, "learning_rate": 2.587011789893103e-05, "loss": 0.1657, "step": 32960 }, { "epoch": 1.46, "learning_rate": 2.5862684170618933e-05, "loss": 0.1648, "step": 32970 }, { "epoch": 1.46, "learning_rate": 2.5855250442306832e-05, "loss": 0.1137, "step": 32980 }, { "epoch": 1.46, "learning_rate": 2.5847816713994738e-05, "loss": 0.174, "step": 32990 }, { "epoch": 1.46, "learning_rate": 2.5840382985682644e-05, "loss": 0.146, "step": 33000 }, { "epoch": 1.46, "learning_rate": 2.5832949257370543e-05, "loss": 0.1521, "step": 33010 }, { "epoch": 1.46, "learning_rate": 2.5825515529058445e-05, "loss": 0.1428, "step": 33020 }, { "epoch": 1.46, "learning_rate": 2.5818081800746348e-05, "loss": 0.1596, "step": 33030 }, { "epoch": 1.46, "learning_rate": 2.581064807243425e-05, "loss": 0.1194, "step": 33040 }, { "epoch": 1.46, "learning_rate": 2.580321434412215e-05, "loss": 0.1523, "step": 33050 }, { "epoch": 1.46, "learning_rate": 2.5795780615810056e-05, "loss": 0.1264, "step": 33060 }, { "epoch": 1.46, "learning_rate": 2.5788346887497955e-05, "loss": 0.1659, "step": 33070 }, { "epoch": 1.46, "learning_rate": 2.578091315918586e-05, "loss": 0.1802, "step": 33080 }, { "epoch": 1.47, "learning_rate": 2.577347943087376e-05, "loss": 0.141, "step": 33090 }, { "epoch": 1.47, "learning_rate": 2.5766045702561663e-05, "loss": 0.1215, "step": 33100 }, { "epoch": 1.47, "learning_rate": 2.575861197424957e-05, "loss": 0.1057, "step": 33110 }, { "epoch": 1.47, "learning_rate": 2.5751178245937468e-05, "loss": 0.1239, "step": 33120 }, { "epoch": 1.47, "learning_rate": 2.5743744517625374e-05, "loss": 0.1957, "step": 33130 }, { "epoch": 1.47, "learning_rate": 2.5736310789313273e-05, "loss": 0.1355, "step": 33140 }, { "epoch": 1.47, "learning_rate": 2.572887706100118e-05, "loss": 0.1544, "step": 33150 }, { "epoch": 1.47, "learning_rate": 2.5721443332689078e-05, "loss": 0.1895, "step": 33160 }, { "epoch": 1.47, "learning_rate": 2.571400960437698e-05, "loss": 0.1534, "step": 33170 }, { "epoch": 1.47, "learning_rate": 2.570657587606488e-05, "loss": 0.1279, "step": 33180 }, { "epoch": 1.47, "learning_rate": 2.5699142147752786e-05, "loss": 0.1393, "step": 33190 }, { "epoch": 1.47, "learning_rate": 2.5691708419440685e-05, "loss": 0.1386, "step": 33200 }, { "epoch": 1.47, "learning_rate": 2.568427469112859e-05, "loss": 0.1379, "step": 33210 }, { "epoch": 1.47, "learning_rate": 2.567684096281649e-05, "loss": 0.1617, "step": 33220 }, { "epoch": 1.47, "learning_rate": 2.5669407234504393e-05, "loss": 0.1443, "step": 33230 }, { "epoch": 1.47, "learning_rate": 2.56619735061923e-05, "loss": 0.1519, "step": 33240 }, { "epoch": 1.47, "learning_rate": 2.5654539777880198e-05, "loss": 0.1781, "step": 33250 }, { "epoch": 1.47, "learning_rate": 2.5647106049568104e-05, "loss": 0.0905, "step": 33260 }, { "epoch": 1.47, "learning_rate": 2.5639672321256003e-05, "loss": 0.1439, "step": 33270 }, { "epoch": 1.47, "learning_rate": 2.563223859294391e-05, "loss": 0.1094, "step": 33280 }, { "epoch": 1.47, "learning_rate": 2.562480486463181e-05, "loss": 0.1183, "step": 33290 }, { "epoch": 1.47, "learning_rate": 2.561737113631971e-05, "loss": 0.1954, "step": 33300 }, { "epoch": 1.47, "learning_rate": 2.560993740800761e-05, "loss": 0.1215, "step": 33310 }, { "epoch": 1.48, "learning_rate": 2.5602503679695516e-05, "loss": 0.1371, "step": 33320 }, { "epoch": 1.48, "learning_rate": 2.5595069951383415e-05, "loss": 0.1382, "step": 33330 }, { "epoch": 1.48, "learning_rate": 2.558763622307132e-05, "loss": 0.1323, "step": 33340 }, { "epoch": 1.48, "learning_rate": 2.558020249475922e-05, "loss": 0.237, "step": 33350 }, { "epoch": 1.48, "learning_rate": 2.5572768766447126e-05, "loss": 0.1109, "step": 33360 }, { "epoch": 1.48, "learning_rate": 2.556533503813503e-05, "loss": 0.1066, "step": 33370 }, { "epoch": 1.48, "learning_rate": 2.5557901309822928e-05, "loss": 0.1784, "step": 33380 }, { "epoch": 1.48, "learning_rate": 2.5550467581510834e-05, "loss": 0.1199, "step": 33390 }, { "epoch": 1.48, "learning_rate": 2.5543033853198733e-05, "loss": 0.1605, "step": 33400 }, { "epoch": 1.48, "learning_rate": 2.553560012488664e-05, "loss": 0.2012, "step": 33410 }, { "epoch": 1.48, "learning_rate": 2.552816639657454e-05, "loss": 0.1401, "step": 33420 }, { "epoch": 1.48, "learning_rate": 2.552073266826244e-05, "loss": 0.1234, "step": 33430 }, { "epoch": 1.48, "learning_rate": 2.551329893995034e-05, "loss": 0.1435, "step": 33440 }, { "epoch": 1.48, "learning_rate": 2.5505865211638246e-05, "loss": 0.1162, "step": 33450 }, { "epoch": 1.48, "learning_rate": 2.5498431483326145e-05, "loss": 0.1675, "step": 33460 }, { "epoch": 1.48, "learning_rate": 2.549099775501405e-05, "loss": 0.2093, "step": 33470 }, { "epoch": 1.48, "learning_rate": 2.5483564026701957e-05, "loss": 0.1224, "step": 33480 }, { "epoch": 1.48, "learning_rate": 2.5476130298389857e-05, "loss": 0.0908, "step": 33490 }, { "epoch": 1.48, "learning_rate": 2.546869657007776e-05, "loss": 0.1185, "step": 33500 }, { "epoch": 1.48, "learning_rate": 2.546126284176566e-05, "loss": 0.0742, "step": 33510 }, { "epoch": 1.48, "learning_rate": 2.5453829113453564e-05, "loss": 0.2029, "step": 33520 }, { "epoch": 1.48, "learning_rate": 2.5446395385141463e-05, "loss": 0.1588, "step": 33530 }, { "epoch": 1.48, "learning_rate": 2.543896165682937e-05, "loss": 0.174, "step": 33540 }, { "epoch": 1.49, "learning_rate": 2.543152792851727e-05, "loss": 0.1087, "step": 33550 }, { "epoch": 1.49, "learning_rate": 2.5424094200205175e-05, "loss": 0.1234, "step": 33560 }, { "epoch": 1.49, "learning_rate": 2.5416660471893074e-05, "loss": 0.1371, "step": 33570 }, { "epoch": 1.49, "learning_rate": 2.5409226743580976e-05, "loss": 0.1257, "step": 33580 }, { "epoch": 1.49, "learning_rate": 2.5401793015268876e-05, "loss": 0.1198, "step": 33590 }, { "epoch": 1.49, "learning_rate": 2.539435928695678e-05, "loss": 0.1226, "step": 33600 }, { "epoch": 1.49, "learning_rate": 2.5386925558644688e-05, "loss": 0.134, "step": 33610 }, { "epoch": 1.49, "learning_rate": 2.5379491830332587e-05, "loss": 0.1621, "step": 33620 }, { "epoch": 1.49, "learning_rate": 2.537205810202049e-05, "loss": 0.208, "step": 33630 }, { "epoch": 1.49, "learning_rate": 2.536462437370839e-05, "loss": 0.1763, "step": 33640 }, { "epoch": 1.49, "learning_rate": 2.5357190645396294e-05, "loss": 0.1613, "step": 33650 }, { "epoch": 1.49, "learning_rate": 2.5349756917084194e-05, "loss": 0.1225, "step": 33660 }, { "epoch": 1.49, "learning_rate": 2.53423231887721e-05, "loss": 0.1412, "step": 33670 }, { "epoch": 1.49, "learning_rate": 2.533488946046e-05, "loss": 0.1198, "step": 33680 }, { "epoch": 1.49, "learning_rate": 2.5327455732147905e-05, "loss": 0.164, "step": 33690 }, { "epoch": 1.49, "learning_rate": 2.5320022003835804e-05, "loss": 0.1273, "step": 33700 }, { "epoch": 1.49, "learning_rate": 2.5312588275523707e-05, "loss": 0.1598, "step": 33710 }, { "epoch": 1.49, "learning_rate": 2.5305154547211613e-05, "loss": 0.1845, "step": 33720 }, { "epoch": 1.49, "learning_rate": 2.5297720818899512e-05, "loss": 0.1515, "step": 33730 }, { "epoch": 1.49, "learning_rate": 2.5290287090587418e-05, "loss": 0.1737, "step": 33740 }, { "epoch": 1.49, "learning_rate": 2.5282853362275317e-05, "loss": 0.1178, "step": 33750 }, { "epoch": 1.49, "learning_rate": 2.527541963396322e-05, "loss": 0.1331, "step": 33760 }, { "epoch": 1.5, "learning_rate": 2.5267985905651122e-05, "loss": 0.162, "step": 33770 }, { "epoch": 1.5, "learning_rate": 2.5260552177339025e-05, "loss": 0.1675, "step": 33780 }, { "epoch": 1.5, "learning_rate": 2.5253118449026924e-05, "loss": 0.1023, "step": 33790 }, { "epoch": 1.5, "learning_rate": 2.524568472071483e-05, "loss": 0.1536, "step": 33800 }, { "epoch": 1.5, "learning_rate": 2.523825099240273e-05, "loss": 0.1964, "step": 33810 }, { "epoch": 1.5, "learning_rate": 2.5230817264090635e-05, "loss": 0.1589, "step": 33820 }, { "epoch": 1.5, "learning_rate": 2.5223383535778534e-05, "loss": 0.1533, "step": 33830 }, { "epoch": 1.5, "learning_rate": 2.5215949807466437e-05, "loss": 0.1315, "step": 33840 }, { "epoch": 1.5, "learning_rate": 2.5208516079154343e-05, "loss": 0.1183, "step": 33850 }, { "epoch": 1.5, "learning_rate": 2.5201082350842242e-05, "loss": 0.1324, "step": 33860 }, { "epoch": 1.5, "learning_rate": 2.5193648622530148e-05, "loss": 0.1912, "step": 33870 }, { "epoch": 1.5, "learning_rate": 2.5186214894218047e-05, "loss": 0.1056, "step": 33880 }, { "epoch": 1.5, "learning_rate": 2.5178781165905953e-05, "loss": 0.1134, "step": 33890 }, { "epoch": 1.5, "learning_rate": 2.5171347437593852e-05, "loss": 0.1123, "step": 33900 }, { "epoch": 1.5, "learning_rate": 2.5163913709281755e-05, "loss": 0.1326, "step": 33910 }, { "epoch": 1.5, "learning_rate": 2.5156479980969654e-05, "loss": 0.137, "step": 33920 }, { "epoch": 1.5, "learning_rate": 2.514904625265756e-05, "loss": 0.1127, "step": 33930 }, { "epoch": 1.5, "learning_rate": 2.514161252434546e-05, "loss": 0.146, "step": 33940 }, { "epoch": 1.5, "learning_rate": 2.5134178796033365e-05, "loss": 0.1838, "step": 33950 }, { "epoch": 1.5, "learning_rate": 2.5126745067721264e-05, "loss": 0.1118, "step": 33960 }, { "epoch": 1.5, "learning_rate": 2.5119311339409167e-05, "loss": 0.1168, "step": 33970 }, { "epoch": 1.5, "learning_rate": 2.5111877611097073e-05, "loss": 0.1401, "step": 33980 }, { "epoch": 1.5, "learning_rate": 2.5104443882784972e-05, "loss": 0.1482, "step": 33990 }, { "epoch": 1.51, "learning_rate": 2.5097010154472878e-05, "loss": 0.1387, "step": 34000 }, { "epoch": 1.51, "learning_rate": 2.5089576426160777e-05, "loss": 0.145, "step": 34010 }, { "epoch": 1.51, "learning_rate": 2.5082142697848683e-05, "loss": 0.189, "step": 34020 }, { "epoch": 1.51, "learning_rate": 2.5074708969536582e-05, "loss": 0.1273, "step": 34030 }, { "epoch": 1.51, "learning_rate": 2.5067275241224485e-05, "loss": 0.1844, "step": 34040 }, { "epoch": 1.51, "learning_rate": 2.5059841512912384e-05, "loss": 0.1611, "step": 34050 }, { "epoch": 1.51, "learning_rate": 2.505240778460029e-05, "loss": 0.1441, "step": 34060 }, { "epoch": 1.51, "learning_rate": 2.504497405628819e-05, "loss": 0.1155, "step": 34070 }, { "epoch": 1.51, "learning_rate": 2.5037540327976095e-05, "loss": 0.1457, "step": 34080 }, { "epoch": 1.51, "learning_rate": 2.5030106599663998e-05, "loss": 0.1458, "step": 34090 }, { "epoch": 1.51, "learning_rate": 2.50226728713519e-05, "loss": 0.1807, "step": 34100 }, { "epoch": 1.51, "learning_rate": 2.5015239143039803e-05, "loss": 0.1413, "step": 34110 }, { "epoch": 1.51, "learning_rate": 2.5007805414727702e-05, "loss": 0.235, "step": 34120 }, { "epoch": 1.51, "learning_rate": 2.5000371686415608e-05, "loss": 0.1155, "step": 34130 }, { "epoch": 1.51, "learning_rate": 2.4992937958103507e-05, "loss": 0.1807, "step": 34140 }, { "epoch": 1.51, "learning_rate": 2.4985504229791413e-05, "loss": 0.1427, "step": 34150 }, { "epoch": 1.51, "learning_rate": 2.4978070501479316e-05, "loss": 0.1101, "step": 34160 }, { "epoch": 1.51, "learning_rate": 2.4970636773167215e-05, "loss": 0.1731, "step": 34170 }, { "epoch": 1.51, "learning_rate": 2.4963203044855118e-05, "loss": 0.1538, "step": 34180 }, { "epoch": 1.51, "learning_rate": 2.495576931654302e-05, "loss": 0.1202, "step": 34190 }, { "epoch": 1.51, "learning_rate": 2.4948335588230923e-05, "loss": 0.1589, "step": 34200 }, { "epoch": 1.51, "learning_rate": 2.4940901859918825e-05, "loss": 0.1306, "step": 34210 }, { "epoch": 1.52, "learning_rate": 2.4933468131606728e-05, "loss": 0.109, "step": 34220 }, { "epoch": 1.52, "learning_rate": 2.492603440329463e-05, "loss": 0.1969, "step": 34230 }, { "epoch": 1.52, "learning_rate": 2.491860067498253e-05, "loss": 0.1477, "step": 34240 }, { "epoch": 1.52, "learning_rate": 2.4911166946670432e-05, "loss": 0.1533, "step": 34250 }, { "epoch": 1.52, "learning_rate": 2.4903733218358335e-05, "loss": 0.1663, "step": 34260 }, { "epoch": 1.52, "learning_rate": 2.4896299490046237e-05, "loss": 0.119, "step": 34270 }, { "epoch": 1.52, "learning_rate": 2.4888865761734143e-05, "loss": 0.156, "step": 34280 }, { "epoch": 1.52, "learning_rate": 2.4881432033422046e-05, "loss": 0.0833, "step": 34290 }, { "epoch": 1.52, "learning_rate": 2.4873998305109945e-05, "loss": 0.1217, "step": 34300 }, { "epoch": 1.52, "learning_rate": 2.4866564576797848e-05, "loss": 0.1891, "step": 34310 }, { "epoch": 1.52, "learning_rate": 2.485913084848575e-05, "loss": 0.1454, "step": 34320 }, { "epoch": 1.52, "learning_rate": 2.4851697120173653e-05, "loss": 0.1167, "step": 34330 }, { "epoch": 1.52, "learning_rate": 2.4844263391861555e-05, "loss": 0.1035, "step": 34340 }, { "epoch": 1.52, "learning_rate": 2.4836829663549458e-05, "loss": 0.0818, "step": 34350 }, { "epoch": 1.52, "learning_rate": 2.482939593523736e-05, "loss": 0.1934, "step": 34360 }, { "epoch": 1.52, "learning_rate": 2.4821962206925263e-05, "loss": 0.1547, "step": 34370 }, { "epoch": 1.52, "learning_rate": 2.4814528478613162e-05, "loss": 0.11, "step": 34380 }, { "epoch": 1.52, "learning_rate": 2.4807094750301065e-05, "loss": 0.1496, "step": 34390 }, { "epoch": 1.52, "learning_rate": 2.479966102198897e-05, "loss": 0.126, "step": 34400 }, { "epoch": 1.52, "learning_rate": 2.4792227293676874e-05, "loss": 0.1753, "step": 34410 }, { "epoch": 1.52, "learning_rate": 2.4784793565364776e-05, "loss": 0.1081, "step": 34420 }, { "epoch": 1.52, "learning_rate": 2.477735983705268e-05, "loss": 0.1803, "step": 34430 }, { "epoch": 1.52, "learning_rate": 2.4769926108740578e-05, "loss": 0.1352, "step": 34440 }, { "epoch": 1.53, "learning_rate": 2.476249238042848e-05, "loss": 0.2, "step": 34450 }, { "epoch": 1.53, "learning_rate": 2.4755058652116383e-05, "loss": 0.1474, "step": 34460 }, { "epoch": 1.53, "learning_rate": 2.4747624923804286e-05, "loss": 0.1701, "step": 34470 }, { "epoch": 1.53, "learning_rate": 2.4740191195492188e-05, "loss": 0.1923, "step": 34480 }, { "epoch": 1.53, "learning_rate": 2.473275746718009e-05, "loss": 0.188, "step": 34490 }, { "epoch": 1.53, "learning_rate": 2.4725323738867993e-05, "loss": 0.162, "step": 34500 }, { "epoch": 1.53, "learning_rate": 2.4717890010555893e-05, "loss": 0.1622, "step": 34510 }, { "epoch": 1.53, "learning_rate": 2.47104562822438e-05, "loss": 0.1184, "step": 34520 }, { "epoch": 1.53, "learning_rate": 2.47030225539317e-05, "loss": 0.1704, "step": 34530 }, { "epoch": 1.53, "learning_rate": 2.4695588825619604e-05, "loss": 0.1508, "step": 34540 }, { "epoch": 1.53, "learning_rate": 2.4688155097307506e-05, "loss": 0.1689, "step": 34550 }, { "epoch": 1.53, "learning_rate": 2.468072136899541e-05, "loss": 0.1884, "step": 34560 }, { "epoch": 1.53, "learning_rate": 2.4673287640683308e-05, "loss": 0.151, "step": 34570 }, { "epoch": 1.53, "learning_rate": 2.466585391237121e-05, "loss": 0.1427, "step": 34580 }, { "epoch": 1.53, "learning_rate": 2.4658420184059113e-05, "loss": 0.1338, "step": 34590 }, { "epoch": 1.53, "learning_rate": 2.4650986455747016e-05, "loss": 0.1285, "step": 34600 }, { "epoch": 1.53, "learning_rate": 2.464355272743492e-05, "loss": 0.1826, "step": 34610 }, { "epoch": 1.53, "learning_rate": 2.463611899912282e-05, "loss": 0.1911, "step": 34620 }, { "epoch": 1.53, "learning_rate": 2.4628685270810724e-05, "loss": 0.1637, "step": 34630 }, { "epoch": 1.53, "learning_rate": 2.4621251542498626e-05, "loss": 0.1302, "step": 34640 }, { "epoch": 1.53, "learning_rate": 2.461381781418653e-05, "loss": 0.1598, "step": 34650 }, { "epoch": 1.53, "learning_rate": 2.460638408587443e-05, "loss": 0.1755, "step": 34660 }, { "epoch": 1.53, "learning_rate": 2.4598950357562334e-05, "loss": 0.1253, "step": 34670 }, { "epoch": 1.54, "learning_rate": 2.4591516629250236e-05, "loss": 0.201, "step": 34680 }, { "epoch": 1.54, "learning_rate": 2.458408290093814e-05, "loss": 0.1639, "step": 34690 }, { "epoch": 1.54, "learning_rate": 2.457664917262604e-05, "loss": 0.1561, "step": 34700 }, { "epoch": 1.54, "learning_rate": 2.456921544431394e-05, "loss": 0.1554, "step": 34710 }, { "epoch": 1.54, "learning_rate": 2.4561781716001843e-05, "loss": 0.1611, "step": 34720 }, { "epoch": 1.54, "learning_rate": 2.4554347987689746e-05, "loss": 0.1442, "step": 34730 }, { "epoch": 1.54, "learning_rate": 2.454691425937765e-05, "loss": 0.1897, "step": 34740 }, { "epoch": 1.54, "learning_rate": 2.453948053106555e-05, "loss": 0.1399, "step": 34750 }, { "epoch": 1.54, "learning_rate": 2.4532046802753454e-05, "loss": 0.1108, "step": 34760 }, { "epoch": 1.54, "learning_rate": 2.4524613074441356e-05, "loss": 0.1371, "step": 34770 }, { "epoch": 1.54, "learning_rate": 2.451717934612926e-05, "loss": 0.1507, "step": 34780 }, { "epoch": 1.54, "learning_rate": 2.450974561781716e-05, "loss": 0.1695, "step": 34790 }, { "epoch": 1.54, "learning_rate": 2.4502311889505064e-05, "loss": 0.1284, "step": 34800 }, { "epoch": 1.54, "learning_rate": 2.4494878161192967e-05, "loss": 0.1105, "step": 34810 }, { "epoch": 1.54, "learning_rate": 2.448744443288087e-05, "loss": 0.1552, "step": 34820 }, { "epoch": 1.54, "learning_rate": 2.4480010704568772e-05, "loss": 0.177, "step": 34830 }, { "epoch": 1.54, "learning_rate": 2.447257697625667e-05, "loss": 0.1593, "step": 34840 }, { "epoch": 1.54, "learning_rate": 2.4465143247944573e-05, "loss": 0.1774, "step": 34850 }, { "epoch": 1.54, "learning_rate": 2.4457709519632476e-05, "loss": 0.1302, "step": 34860 }, { "epoch": 1.54, "learning_rate": 2.445027579132038e-05, "loss": 0.123, "step": 34870 }, { "epoch": 1.54, "learning_rate": 2.444284206300828e-05, "loss": 0.1488, "step": 34880 }, { "epoch": 1.54, "learning_rate": 2.4435408334696187e-05, "loss": 0.1845, "step": 34890 }, { "epoch": 1.55, "learning_rate": 2.4427974606384086e-05, "loss": 0.1534, "step": 34900 }, { "epoch": 1.55, "learning_rate": 2.442054087807199e-05, "loss": 0.1282, "step": 34910 }, { "epoch": 1.55, "learning_rate": 2.441310714975989e-05, "loss": 0.1372, "step": 34920 }, { "epoch": 1.55, "learning_rate": 2.4405673421447794e-05, "loss": 0.1543, "step": 34930 }, { "epoch": 1.55, "learning_rate": 2.4398239693135697e-05, "loss": 0.1471, "step": 34940 }, { "epoch": 1.55, "learning_rate": 2.43908059648236e-05, "loss": 0.118, "step": 34950 }, { "epoch": 1.55, "learning_rate": 2.4383372236511502e-05, "loss": 0.1549, "step": 34960 }, { "epoch": 1.55, "learning_rate": 2.4375938508199404e-05, "loss": 0.121, "step": 34970 }, { "epoch": 1.55, "learning_rate": 2.4368504779887304e-05, "loss": 0.1723, "step": 34980 }, { "epoch": 1.55, "learning_rate": 2.4361071051575206e-05, "loss": 0.1227, "step": 34990 }, { "epoch": 1.55, "learning_rate": 2.435363732326311e-05, "loss": 0.1333, "step": 35000 }, { "epoch": 1.55, "learning_rate": 2.4346203594951015e-05, "loss": 0.1663, "step": 35010 }, { "epoch": 1.55, "learning_rate": 2.4338769866638917e-05, "loss": 0.1521, "step": 35020 }, { "epoch": 1.55, "learning_rate": 2.433133613832682e-05, "loss": 0.1449, "step": 35030 }, { "epoch": 1.55, "learning_rate": 2.432390241001472e-05, "loss": 0.1986, "step": 35040 }, { "epoch": 1.55, "learning_rate": 2.4316468681702622e-05, "loss": 0.1809, "step": 35050 }, { "epoch": 1.55, "learning_rate": 2.4309034953390524e-05, "loss": 0.1288, "step": 35060 }, { "epoch": 1.55, "learning_rate": 2.4301601225078427e-05, "loss": 0.1436, "step": 35070 }, { "epoch": 1.55, "learning_rate": 2.429416749676633e-05, "loss": 0.1434, "step": 35080 }, { "epoch": 1.55, "learning_rate": 2.4286733768454232e-05, "loss": 0.1354, "step": 35090 }, { "epoch": 1.55, "learning_rate": 2.4279300040142135e-05, "loss": 0.1512, "step": 35100 }, { "epoch": 1.55, "learning_rate": 2.4271866311830034e-05, "loss": 0.0919, "step": 35110 }, { "epoch": 1.55, "learning_rate": 2.4264432583517936e-05, "loss": 0.1083, "step": 35120 }, { "epoch": 1.56, "learning_rate": 2.4256998855205842e-05, "loss": 0.1843, "step": 35130 }, { "epoch": 1.56, "learning_rate": 2.4249565126893745e-05, "loss": 0.1924, "step": 35140 }, { "epoch": 1.56, "learning_rate": 2.4242131398581647e-05, "loss": 0.1512, "step": 35150 }, { "epoch": 1.56, "learning_rate": 2.423469767026955e-05, "loss": 0.1163, "step": 35160 }, { "epoch": 1.56, "learning_rate": 2.4227263941957453e-05, "loss": 0.1116, "step": 35170 }, { "epoch": 1.56, "learning_rate": 2.4219830213645352e-05, "loss": 0.1524, "step": 35180 }, { "epoch": 1.56, "learning_rate": 2.4212396485333254e-05, "loss": 0.1384, "step": 35190 }, { "epoch": 1.56, "learning_rate": 2.4204962757021157e-05, "loss": 0.1199, "step": 35200 }, { "epoch": 1.56, "learning_rate": 2.419752902870906e-05, "loss": 0.1131, "step": 35210 }, { "epoch": 1.56, "learning_rate": 2.4190095300396962e-05, "loss": 0.1457, "step": 35220 }, { "epoch": 1.56, "learning_rate": 2.4182661572084865e-05, "loss": 0.1204, "step": 35230 }, { "epoch": 1.56, "learning_rate": 2.4175227843772767e-05, "loss": 0.108, "step": 35240 }, { "epoch": 1.56, "learning_rate": 2.4167794115460667e-05, "loss": 0.1487, "step": 35250 }, { "epoch": 1.56, "learning_rate": 2.4160360387148572e-05, "loss": 0.1383, "step": 35260 }, { "epoch": 1.56, "learning_rate": 2.4152926658836475e-05, "loss": 0.188, "step": 35270 }, { "epoch": 1.56, "learning_rate": 2.4145492930524378e-05, "loss": 0.1154, "step": 35280 }, { "epoch": 1.56, "learning_rate": 2.413805920221228e-05, "loss": 0.1511, "step": 35290 }, { "epoch": 1.56, "learning_rate": 2.4130625473900183e-05, "loss": 0.1093, "step": 35300 }, { "epoch": 1.56, "learning_rate": 2.4123191745588082e-05, "loss": 0.099, "step": 35310 }, { "epoch": 1.56, "learning_rate": 2.4115758017275985e-05, "loss": 0.2606, "step": 35320 }, { "epoch": 1.56, "learning_rate": 2.4108324288963887e-05, "loss": 0.1668, "step": 35330 }, { "epoch": 1.56, "learning_rate": 2.410089056065179e-05, "loss": 0.131, "step": 35340 }, { "epoch": 1.57, "learning_rate": 2.4093456832339692e-05, "loss": 0.1331, "step": 35350 }, { "epoch": 1.57, "learning_rate": 2.4086023104027595e-05, "loss": 0.1617, "step": 35360 }, { "epoch": 1.57, "learning_rate": 2.4078589375715497e-05, "loss": 0.1177, "step": 35370 }, { "epoch": 1.57, "learning_rate": 2.40711556474034e-05, "loss": 0.0902, "step": 35380 }, { "epoch": 1.57, "learning_rate": 2.4063721919091303e-05, "loss": 0.1651, "step": 35390 }, { "epoch": 1.57, "learning_rate": 2.4056288190779205e-05, "loss": 0.1576, "step": 35400 }, { "epoch": 1.57, "learning_rate": 2.4048854462467108e-05, "loss": 0.0968, "step": 35410 }, { "epoch": 1.57, "learning_rate": 2.404142073415501e-05, "loss": 0.1548, "step": 35420 }, { "epoch": 1.57, "learning_rate": 2.4033987005842913e-05, "loss": 0.1386, "step": 35430 }, { "epoch": 1.57, "learning_rate": 2.4026553277530816e-05, "loss": 0.1142, "step": 35440 }, { "epoch": 1.57, "learning_rate": 2.4019119549218715e-05, "loss": 0.1369, "step": 35450 }, { "epoch": 1.57, "learning_rate": 2.4011685820906617e-05, "loss": 0.1407, "step": 35460 }, { "epoch": 1.57, "learning_rate": 2.400425209259452e-05, "loss": 0.185, "step": 35470 }, { "epoch": 1.57, "learning_rate": 2.3996818364282422e-05, "loss": 0.1832, "step": 35480 }, { "epoch": 1.57, "learning_rate": 2.3989384635970325e-05, "loss": 0.1357, "step": 35490 }, { "epoch": 1.57, "learning_rate": 2.398195090765823e-05, "loss": 0.1047, "step": 35500 }, { "epoch": 1.57, "learning_rate": 2.397451717934613e-05, "loss": 0.1935, "step": 35510 }, { "epoch": 1.57, "learning_rate": 2.3967083451034033e-05, "loss": 0.1061, "step": 35520 }, { "epoch": 1.57, "learning_rate": 2.3959649722721935e-05, "loss": 0.1774, "step": 35530 }, { "epoch": 1.57, "learning_rate": 2.3952215994409838e-05, "loss": 0.1075, "step": 35540 }, { "epoch": 1.57, "learning_rate": 2.394478226609774e-05, "loss": 0.1509, "step": 35550 }, { "epoch": 1.57, "learning_rate": 2.3937348537785643e-05, "loss": 0.0996, "step": 35560 }, { "epoch": 1.57, "learning_rate": 2.3929914809473546e-05, "loss": 0.1611, "step": 35570 }, { "epoch": 1.58, "learning_rate": 2.3922481081161445e-05, "loss": 0.0998, "step": 35580 }, { "epoch": 1.58, "learning_rate": 2.3915047352849347e-05, "loss": 0.1317, "step": 35590 }, { "epoch": 1.58, "learning_rate": 2.390761362453725e-05, "loss": 0.114, "step": 35600 }, { "epoch": 1.58, "learning_rate": 2.3900179896225153e-05, "loss": 0.074, "step": 35610 }, { "epoch": 1.58, "learning_rate": 2.389274616791306e-05, "loss": 0.1895, "step": 35620 }, { "epoch": 1.58, "learning_rate": 2.388531243960096e-05, "loss": 0.1103, "step": 35630 }, { "epoch": 1.58, "learning_rate": 2.387787871128886e-05, "loss": 0.1169, "step": 35640 }, { "epoch": 1.58, "learning_rate": 2.3870444982976763e-05, "loss": 0.1618, "step": 35650 }, { "epoch": 1.58, "learning_rate": 2.3863011254664665e-05, "loss": 0.1537, "step": 35660 }, { "epoch": 1.58, "learning_rate": 2.3855577526352568e-05, "loss": 0.1637, "step": 35670 }, { "epoch": 1.58, "learning_rate": 2.384814379804047e-05, "loss": 0.1615, "step": 35680 }, { "epoch": 1.58, "learning_rate": 2.3840710069728373e-05, "loss": 0.1545, "step": 35690 }, { "epoch": 1.58, "learning_rate": 2.3833276341416276e-05, "loss": 0.1743, "step": 35700 }, { "epoch": 1.58, "learning_rate": 2.382584261310418e-05, "loss": 0.1501, "step": 35710 }, { "epoch": 1.58, "learning_rate": 2.3818408884792078e-05, "loss": 0.1764, "step": 35720 }, { "epoch": 1.58, "learning_rate": 2.381097515647998e-05, "loss": 0.1236, "step": 35730 }, { "epoch": 1.58, "learning_rate": 2.3803541428167883e-05, "loss": 0.086, "step": 35740 }, { "epoch": 1.58, "learning_rate": 2.379610769985579e-05, "loss": 0.1527, "step": 35750 }, { "epoch": 1.58, "learning_rate": 2.378867397154369e-05, "loss": 0.1591, "step": 35760 }, { "epoch": 1.58, "learning_rate": 2.3781240243231594e-05, "loss": 0.1393, "step": 35770 }, { "epoch": 1.58, "learning_rate": 2.3773806514919493e-05, "loss": 0.114, "step": 35780 }, { "epoch": 1.58, "learning_rate": 2.3766372786607396e-05, "loss": 0.1817, "step": 35790 }, { "epoch": 1.58, "learning_rate": 2.3758939058295298e-05, "loss": 0.0687, "step": 35800 }, { "epoch": 1.59, "learning_rate": 2.37515053299832e-05, "loss": 0.1496, "step": 35810 }, { "epoch": 1.59, "learning_rate": 2.3744071601671103e-05, "loss": 0.1424, "step": 35820 }, { "epoch": 1.59, "learning_rate": 2.3736637873359006e-05, "loss": 0.1066, "step": 35830 }, { "epoch": 1.59, "learning_rate": 2.372920414504691e-05, "loss": 0.1715, "step": 35840 }, { "epoch": 1.59, "learning_rate": 2.3721770416734808e-05, "loss": 0.1605, "step": 35850 }, { "epoch": 1.59, "learning_rate": 2.371433668842271e-05, "loss": 0.1487, "step": 35860 }, { "epoch": 1.59, "learning_rate": 2.3706902960110616e-05, "loss": 0.1458, "step": 35870 }, { "epoch": 1.59, "learning_rate": 2.369946923179852e-05, "loss": 0.1552, "step": 35880 }, { "epoch": 1.59, "learning_rate": 2.369203550348642e-05, "loss": 0.1987, "step": 35890 }, { "epoch": 1.59, "learning_rate": 2.3684601775174324e-05, "loss": 0.1774, "step": 35900 }, { "epoch": 1.59, "learning_rate": 2.3677168046862223e-05, "loss": 0.1756, "step": 35910 }, { "epoch": 1.59, "learning_rate": 2.3669734318550126e-05, "loss": 0.1547, "step": 35920 }, { "epoch": 1.59, "learning_rate": 2.366230059023803e-05, "loss": 0.2107, "step": 35930 }, { "epoch": 1.59, "learning_rate": 2.365486686192593e-05, "loss": 0.1231, "step": 35940 }, { "epoch": 1.59, "learning_rate": 2.3647433133613834e-05, "loss": 0.1399, "step": 35950 }, { "epoch": 1.59, "learning_rate": 2.3639999405301736e-05, "loss": 0.1506, "step": 35960 }, { "epoch": 1.59, "learning_rate": 2.363256567698964e-05, "loss": 0.1234, "step": 35970 }, { "epoch": 1.59, "learning_rate": 2.362513194867754e-05, "loss": 0.1785, "step": 35980 }, { "epoch": 1.59, "learning_rate": 2.3617698220365444e-05, "loss": 0.1746, "step": 35990 }, { "epoch": 1.59, "learning_rate": 2.3610264492053346e-05, "loss": 0.1301, "step": 36000 }, { "epoch": 1.59, "learning_rate": 2.360283076374125e-05, "loss": 0.1433, "step": 36010 }, { "epoch": 1.59, "learning_rate": 2.359539703542915e-05, "loss": 0.1924, "step": 36020 }, { "epoch": 1.6, "learning_rate": 2.3587963307117054e-05, "loss": 0.1468, "step": 36030 }, { "epoch": 1.6, "learning_rate": 2.3580529578804957e-05, "loss": 0.15, "step": 36040 }, { "epoch": 1.6, "learning_rate": 2.3573095850492856e-05, "loss": 0.167, "step": 36050 }, { "epoch": 1.6, "learning_rate": 2.356566212218076e-05, "loss": 0.1413, "step": 36060 }, { "epoch": 1.6, "learning_rate": 2.355822839386866e-05, "loss": 0.1446, "step": 36070 }, { "epoch": 1.6, "learning_rate": 2.3550794665556564e-05, "loss": 0.1273, "step": 36080 }, { "epoch": 1.6, "learning_rate": 2.3543360937244466e-05, "loss": 0.1069, "step": 36090 }, { "epoch": 1.6, "learning_rate": 2.353592720893237e-05, "loss": 0.1541, "step": 36100 }, { "epoch": 1.6, "learning_rate": 2.352849348062027e-05, "loss": 0.1314, "step": 36110 }, { "epoch": 1.6, "learning_rate": 2.3521059752308174e-05, "loss": 0.1106, "step": 36120 }, { "epoch": 1.6, "learning_rate": 2.3513626023996077e-05, "loss": 0.1551, "step": 36130 }, { "epoch": 1.6, "learning_rate": 2.350619229568398e-05, "loss": 0.0987, "step": 36140 }, { "epoch": 1.6, "learning_rate": 2.3498758567371882e-05, "loss": 0.1717, "step": 36150 }, { "epoch": 1.6, "learning_rate": 2.3491324839059784e-05, "loss": 0.1017, "step": 36160 }, { "epoch": 1.6, "learning_rate": 2.3483891110747687e-05, "loss": 0.1436, "step": 36170 }, { "epoch": 1.6, "learning_rate": 2.3476457382435586e-05, "loss": 0.1336, "step": 36180 }, { "epoch": 1.6, "learning_rate": 2.346902365412349e-05, "loss": 0.1503, "step": 36190 }, { "epoch": 1.6, "learning_rate": 2.346158992581139e-05, "loss": 0.1503, "step": 36200 }, { "epoch": 1.6, "learning_rate": 2.3454156197499294e-05, "loss": 0.1203, "step": 36210 }, { "epoch": 1.6, "learning_rate": 2.3446722469187196e-05, "loss": 0.1206, "step": 36220 }, { "epoch": 1.6, "learning_rate": 2.34392887408751e-05, "loss": 0.1285, "step": 36230 }, { "epoch": 1.6, "learning_rate": 2.3431855012563e-05, "loss": 0.1152, "step": 36240 }, { "epoch": 1.6, "learning_rate": 2.3424421284250904e-05, "loss": 0.1841, "step": 36250 }, { "epoch": 1.61, "learning_rate": 2.3416987555938807e-05, "loss": 0.1042, "step": 36260 }, { "epoch": 1.61, "learning_rate": 2.340955382762671e-05, "loss": 0.1872, "step": 36270 }, { "epoch": 1.61, "learning_rate": 2.3402120099314612e-05, "loss": 0.1576, "step": 36280 }, { "epoch": 1.61, "learning_rate": 2.3394686371002514e-05, "loss": 0.1559, "step": 36290 }, { "epoch": 1.61, "learning_rate": 2.3387252642690417e-05, "loss": 0.1614, "step": 36300 }, { "epoch": 1.61, "learning_rate": 2.337981891437832e-05, "loss": 0.132, "step": 36310 }, { "epoch": 1.61, "learning_rate": 2.337238518606622e-05, "loss": 0.2252, "step": 36320 }, { "epoch": 1.61, "learning_rate": 2.336495145775412e-05, "loss": 0.1059, "step": 36330 }, { "epoch": 1.61, "learning_rate": 2.3357517729442024e-05, "loss": 0.1423, "step": 36340 }, { "epoch": 1.61, "learning_rate": 2.3350084001129927e-05, "loss": 0.1361, "step": 36350 }, { "epoch": 1.61, "learning_rate": 2.3342650272817833e-05, "loss": 0.1526, "step": 36360 }, { "epoch": 1.61, "learning_rate": 2.3335216544505735e-05, "loss": 0.1447, "step": 36370 }, { "epoch": 1.61, "learning_rate": 2.3327782816193634e-05, "loss": 0.1422, "step": 36380 }, { "epoch": 1.61, "learning_rate": 2.3320349087881537e-05, "loss": 0.159, "step": 36390 }, { "epoch": 1.61, "learning_rate": 2.331291535956944e-05, "loss": 0.1912, "step": 36400 }, { "epoch": 1.61, "learning_rate": 2.3305481631257342e-05, "loss": 0.1861, "step": 36410 }, { "epoch": 1.61, "learning_rate": 2.3298047902945245e-05, "loss": 0.1645, "step": 36420 }, { "epoch": 1.61, "learning_rate": 2.3290614174633147e-05, "loss": 0.1797, "step": 36430 }, { "epoch": 1.61, "learning_rate": 2.328318044632105e-05, "loss": 0.1409, "step": 36440 }, { "epoch": 1.61, "learning_rate": 2.327574671800895e-05, "loss": 0.1255, "step": 36450 }, { "epoch": 1.61, "learning_rate": 2.326831298969685e-05, "loss": 0.1645, "step": 36460 }, { "epoch": 1.61, "learning_rate": 2.3260879261384754e-05, "loss": 0.1588, "step": 36470 }, { "epoch": 1.62, "learning_rate": 2.325344553307266e-05, "loss": 0.1222, "step": 36480 }, { "epoch": 1.62, "learning_rate": 2.3246011804760563e-05, "loss": 0.1296, "step": 36490 }, { "epoch": 1.62, "learning_rate": 2.3238578076448465e-05, "loss": 0.1378, "step": 36500 }, { "epoch": 1.62, "learning_rate": 2.3231144348136364e-05, "loss": 0.1719, "step": 36510 }, { "epoch": 1.62, "learning_rate": 2.3223710619824267e-05, "loss": 0.1373, "step": 36520 }, { "epoch": 1.62, "learning_rate": 2.321627689151217e-05, "loss": 0.1255, "step": 36530 }, { "epoch": 1.62, "learning_rate": 2.3208843163200072e-05, "loss": 0.2024, "step": 36540 }, { "epoch": 1.62, "learning_rate": 2.3201409434887975e-05, "loss": 0.1947, "step": 36550 }, { "epoch": 1.62, "learning_rate": 2.3193975706575877e-05, "loss": 0.1252, "step": 36560 }, { "epoch": 1.62, "learning_rate": 2.318654197826378e-05, "loss": 0.1556, "step": 36570 }, { "epoch": 1.62, "learning_rate": 2.3179108249951682e-05, "loss": 0.1698, "step": 36580 }, { "epoch": 1.62, "learning_rate": 2.317167452163958e-05, "loss": 0.1791, "step": 36590 }, { "epoch": 1.62, "learning_rate": 2.3164240793327484e-05, "loss": 0.1361, "step": 36600 }, { "epoch": 1.62, "learning_rate": 2.315680706501539e-05, "loss": 0.1091, "step": 36610 }, { "epoch": 1.62, "learning_rate": 2.3149373336703293e-05, "loss": 0.1411, "step": 36620 }, { "epoch": 1.62, "learning_rate": 2.3141939608391195e-05, "loss": 0.17, "step": 36630 }, { "epoch": 1.62, "learning_rate": 2.3134505880079098e-05, "loss": 0.1352, "step": 36640 }, { "epoch": 1.62, "learning_rate": 2.3127072151766997e-05, "loss": 0.1815, "step": 36650 }, { "epoch": 1.62, "learning_rate": 2.31196384234549e-05, "loss": 0.1483, "step": 36660 }, { "epoch": 1.62, "learning_rate": 2.3112204695142802e-05, "loss": 0.1911, "step": 36670 }, { "epoch": 1.62, "learning_rate": 2.3104770966830705e-05, "loss": 0.1458, "step": 36680 }, { "epoch": 1.62, "learning_rate": 2.3097337238518607e-05, "loss": 0.1301, "step": 36690 }, { "epoch": 1.62, "learning_rate": 2.308990351020651e-05, "loss": 0.1165, "step": 36700 }, { "epoch": 1.63, "learning_rate": 2.3082469781894413e-05, "loss": 0.1493, "step": 36710 }, { "epoch": 1.63, "learning_rate": 2.3075036053582312e-05, "loss": 0.1439, "step": 36720 }, { "epoch": 1.63, "learning_rate": 2.3067602325270218e-05, "loss": 0.1175, "step": 36730 }, { "epoch": 1.63, "learning_rate": 2.306016859695812e-05, "loss": 0.0809, "step": 36740 }, { "epoch": 1.63, "learning_rate": 2.3052734868646023e-05, "loss": 0.147, "step": 36750 }, { "epoch": 1.63, "learning_rate": 2.3045301140333926e-05, "loss": 0.1503, "step": 36760 }, { "epoch": 1.63, "learning_rate": 2.3037867412021828e-05, "loss": 0.165, "step": 36770 }, { "epoch": 1.63, "learning_rate": 2.3030433683709727e-05, "loss": 0.194, "step": 36780 }, { "epoch": 1.63, "learning_rate": 2.302299995539763e-05, "loss": 0.1698, "step": 36790 }, { "epoch": 1.63, "learning_rate": 2.3015566227085532e-05, "loss": 0.1576, "step": 36800 }, { "epoch": 1.63, "learning_rate": 2.3008132498773435e-05, "loss": 0.174, "step": 36810 }, { "epoch": 1.63, "learning_rate": 2.3000698770461338e-05, "loss": 0.1641, "step": 36820 }, { "epoch": 1.63, "learning_rate": 2.299326504214924e-05, "loss": 0.1537, "step": 36830 }, { "epoch": 1.63, "learning_rate": 2.2985831313837143e-05, "loss": 0.1478, "step": 36840 }, { "epoch": 1.63, "learning_rate": 2.2978397585525045e-05, "loss": 0.1491, "step": 36850 }, { "epoch": 1.63, "learning_rate": 2.2970963857212948e-05, "loss": 0.1404, "step": 36860 }, { "epoch": 1.63, "learning_rate": 2.296353012890085e-05, "loss": 0.1979, "step": 36870 }, { "epoch": 1.63, "learning_rate": 2.2956096400588753e-05, "loss": 0.2271, "step": 36880 }, { "epoch": 1.63, "learning_rate": 2.2948662672276656e-05, "loss": 0.176, "step": 36890 }, { "epoch": 1.63, "learning_rate": 2.2941228943964558e-05, "loss": 0.1743, "step": 36900 }, { "epoch": 1.63, "learning_rate": 2.293379521565246e-05, "loss": 0.1149, "step": 36910 }, { "epoch": 1.63, "learning_rate": 2.292636148734036e-05, "loss": 0.1462, "step": 36920 }, { "epoch": 1.64, "learning_rate": 2.2918927759028263e-05, "loss": 0.0943, "step": 36930 }, { "epoch": 1.64, "learning_rate": 2.2911494030716165e-05, "loss": 0.1126, "step": 36940 }, { "epoch": 1.64, "learning_rate": 2.2904060302404068e-05, "loss": 0.1818, "step": 36950 }, { "epoch": 1.64, "learning_rate": 2.289662657409197e-05, "loss": 0.1315, "step": 36960 }, { "epoch": 1.64, "learning_rate": 2.2889192845779876e-05, "loss": 0.1447, "step": 36970 }, { "epoch": 1.64, "learning_rate": 2.2881759117467775e-05, "loss": 0.175, "step": 36980 }, { "epoch": 1.64, "learning_rate": 2.2874325389155678e-05, "loss": 0.1204, "step": 36990 }, { "epoch": 1.64, "learning_rate": 2.286689166084358e-05, "loss": 0.1856, "step": 37000 }, { "epoch": 1.64, "learning_rate": 2.2859457932531483e-05, "loss": 0.145, "step": 37010 }, { "epoch": 1.64, "learning_rate": 2.2852024204219386e-05, "loss": 0.1422, "step": 37020 }, { "epoch": 1.64, "learning_rate": 2.284459047590729e-05, "loss": 0.1443, "step": 37030 }, { "epoch": 1.64, "learning_rate": 2.283715674759519e-05, "loss": 0.1535, "step": 37040 }, { "epoch": 1.64, "learning_rate": 2.2829723019283094e-05, "loss": 0.1553, "step": 37050 }, { "epoch": 1.64, "learning_rate": 2.2822289290970993e-05, "loss": 0.1628, "step": 37060 }, { "epoch": 1.64, "learning_rate": 2.2814855562658895e-05, "loss": 0.1478, "step": 37070 }, { "epoch": 1.64, "learning_rate": 2.2807421834346798e-05, "loss": 0.1823, "step": 37080 }, { "epoch": 1.64, "learning_rate": 2.27999881060347e-05, "loss": 0.1548, "step": 37090 }, { "epoch": 1.64, "learning_rate": 2.2792554377722606e-05, "loss": 0.1125, "step": 37100 }, { "epoch": 1.64, "learning_rate": 2.278512064941051e-05, "loss": 0.1794, "step": 37110 }, { "epoch": 1.64, "learning_rate": 2.2777686921098408e-05, "loss": 0.132, "step": 37120 }, { "epoch": 1.64, "learning_rate": 2.277025319278631e-05, "loss": 0.1618, "step": 37130 }, { "epoch": 1.64, "learning_rate": 2.2762819464474213e-05, "loss": 0.1427, "step": 37140 }, { "epoch": 1.64, "learning_rate": 2.2755385736162116e-05, "loss": 0.1423, "step": 37150 }, { "epoch": 1.65, "learning_rate": 2.274795200785002e-05, "loss": 0.1587, "step": 37160 }, { "epoch": 1.65, "learning_rate": 2.274051827953792e-05, "loss": 0.1702, "step": 37170 }, { "epoch": 1.65, "learning_rate": 2.2733084551225824e-05, "loss": 0.1358, "step": 37180 }, { "epoch": 1.65, "learning_rate": 2.2725650822913723e-05, "loss": 0.0859, "step": 37190 }, { "epoch": 1.65, "learning_rate": 2.2718217094601625e-05, "loss": 0.1273, "step": 37200 }, { "epoch": 1.65, "learning_rate": 2.2710783366289528e-05, "loss": 0.0804, "step": 37210 }, { "epoch": 1.65, "learning_rate": 2.2703349637977434e-05, "loss": 0.1776, "step": 37220 }, { "epoch": 1.65, "learning_rate": 2.2695915909665337e-05, "loss": 0.1976, "step": 37230 }, { "epoch": 1.65, "learning_rate": 2.268848218135324e-05, "loss": 0.1842, "step": 37240 }, { "epoch": 1.65, "learning_rate": 2.268104845304114e-05, "loss": 0.1664, "step": 37250 }, { "epoch": 1.65, "learning_rate": 2.267361472472904e-05, "loss": 0.1628, "step": 37260 }, { "epoch": 1.65, "learning_rate": 2.2666180996416944e-05, "loss": 0.1308, "step": 37270 }, { "epoch": 1.65, "learning_rate": 2.2658747268104846e-05, "loss": 0.1461, "step": 37280 }, { "epoch": 1.65, "learning_rate": 2.265131353979275e-05, "loss": 0.1315, "step": 37290 }, { "epoch": 1.65, "learning_rate": 2.264387981148065e-05, "loss": 0.1494, "step": 37300 }, { "epoch": 1.65, "learning_rate": 2.2636446083168554e-05, "loss": 0.2113, "step": 37310 }, { "epoch": 1.65, "learning_rate": 2.2629012354856456e-05, "loss": 0.2241, "step": 37320 }, { "epoch": 1.65, "learning_rate": 2.2621578626544356e-05, "loss": 0.1398, "step": 37330 }, { "epoch": 1.65, "learning_rate": 2.261414489823226e-05, "loss": 0.1236, "step": 37340 }, { "epoch": 1.65, "learning_rate": 2.2606711169920164e-05, "loss": 0.0896, "step": 37350 }, { "epoch": 1.65, "learning_rate": 2.2599277441608067e-05, "loss": 0.1438, "step": 37360 }, { "epoch": 1.65, "learning_rate": 2.259184371329597e-05, "loss": 0.1741, "step": 37370 }, { "epoch": 1.65, "learning_rate": 2.2584409984983872e-05, "loss": 0.179, "step": 37380 }, { "epoch": 1.66, "learning_rate": 2.257697625667177e-05, "loss": 0.1261, "step": 37390 }, { "epoch": 1.66, "learning_rate": 2.2569542528359674e-05, "loss": 0.1379, "step": 37400 }, { "epoch": 1.66, "learning_rate": 2.2562108800047576e-05, "loss": 0.132, "step": 37410 }, { "epoch": 1.66, "learning_rate": 2.255467507173548e-05, "loss": 0.169, "step": 37420 }, { "epoch": 1.66, "learning_rate": 2.254724134342338e-05, "loss": 0.1755, "step": 37430 }, { "epoch": 1.66, "learning_rate": 2.2539807615111284e-05, "loss": 0.1474, "step": 37440 }, { "epoch": 1.66, "learning_rate": 2.2532373886799187e-05, "loss": 0.1632, "step": 37450 }, { "epoch": 1.66, "learning_rate": 2.252494015848709e-05, "loss": 0.1748, "step": 37460 }, { "epoch": 1.66, "learning_rate": 2.2517506430174992e-05, "loss": 0.1795, "step": 37470 }, { "epoch": 1.66, "learning_rate": 2.2510072701862894e-05, "loss": 0.1948, "step": 37480 }, { "epoch": 1.66, "learning_rate": 2.2502638973550797e-05, "loss": 0.1682, "step": 37490 }, { "epoch": 1.66, "learning_rate": 2.24952052452387e-05, "loss": 0.1855, "step": 37500 }, { "epoch": 1.66, "learning_rate": 2.2487771516926602e-05, "loss": 0.1403, "step": 37510 }, { "epoch": 1.66, "learning_rate": 2.24803377886145e-05, "loss": 0.1344, "step": 37520 }, { "epoch": 1.66, "learning_rate": 2.2472904060302404e-05, "loss": 0.1273, "step": 37530 }, { "epoch": 1.66, "learning_rate": 2.2465470331990306e-05, "loss": 0.1837, "step": 37540 }, { "epoch": 1.66, "learning_rate": 2.245803660367821e-05, "loss": 0.1326, "step": 37550 }, { "epoch": 1.66, "learning_rate": 2.245060287536611e-05, "loss": 0.1071, "step": 37560 }, { "epoch": 1.66, "learning_rate": 2.2443169147054014e-05, "loss": 0.2249, "step": 37570 }, { "epoch": 1.66, "learning_rate": 2.2435735418741917e-05, "loss": 0.1512, "step": 37580 }, { "epoch": 1.66, "learning_rate": 2.242830169042982e-05, "loss": 0.1756, "step": 37590 }, { "epoch": 1.66, "learning_rate": 2.2420867962117722e-05, "loss": 0.1484, "step": 37600 }, { "epoch": 1.67, "learning_rate": 2.2413434233805624e-05, "loss": 0.1804, "step": 37610 }, { "epoch": 1.67, "learning_rate": 2.2406000505493527e-05, "loss": 0.1178, "step": 37620 }, { "epoch": 1.67, "learning_rate": 2.239856677718143e-05, "loss": 0.1351, "step": 37630 }, { "epoch": 1.67, "learning_rate": 2.2391133048869332e-05, "loss": 0.1189, "step": 37640 }, { "epoch": 1.67, "learning_rate": 2.2383699320557235e-05, "loss": 0.1671, "step": 37650 }, { "epoch": 1.67, "learning_rate": 2.2376265592245134e-05, "loss": 0.1875, "step": 37660 }, { "epoch": 1.67, "learning_rate": 2.2368831863933037e-05, "loss": 0.1668, "step": 37670 }, { "epoch": 1.67, "learning_rate": 2.236139813562094e-05, "loss": 0.1475, "step": 37680 }, { "epoch": 1.67, "learning_rate": 2.2353964407308842e-05, "loss": 0.1092, "step": 37690 }, { "epoch": 1.67, "learning_rate": 2.2346530678996744e-05, "loss": 0.1434, "step": 37700 }, { "epoch": 1.67, "learning_rate": 2.233909695068465e-05, "loss": 0.112, "step": 37710 }, { "epoch": 1.67, "learning_rate": 2.233166322237255e-05, "loss": 0.2474, "step": 37720 }, { "epoch": 1.67, "learning_rate": 2.2324229494060452e-05, "loss": 0.144, "step": 37730 }, { "epoch": 1.67, "learning_rate": 2.2316795765748355e-05, "loss": 0.1134, "step": 37740 }, { "epoch": 1.67, "learning_rate": 2.2309362037436257e-05, "loss": 0.1532, "step": 37750 }, { "epoch": 1.67, "learning_rate": 2.230192830912416e-05, "loss": 0.1463, "step": 37760 }, { "epoch": 1.67, "learning_rate": 2.2294494580812062e-05, "loss": 0.1514, "step": 37770 }, { "epoch": 1.67, "learning_rate": 2.2287060852499965e-05, "loss": 0.1679, "step": 37780 }, { "epoch": 1.67, "learning_rate": 2.2279627124187864e-05, "loss": 0.1397, "step": 37790 }, { "epoch": 1.67, "learning_rate": 2.2272193395875767e-05, "loss": 0.1082, "step": 37800 }, { "epoch": 1.67, "learning_rate": 2.226475966756367e-05, "loss": 0.1529, "step": 37810 }, { "epoch": 1.67, "learning_rate": 2.2257325939251572e-05, "loss": 0.1315, "step": 37820 }, { "epoch": 1.67, "learning_rate": 2.2249892210939478e-05, "loss": 0.1728, "step": 37830 }, { "epoch": 1.68, "learning_rate": 2.224245848262738e-05, "loss": 0.0742, "step": 37840 }, { "epoch": 1.68, "learning_rate": 2.223502475431528e-05, "loss": 0.1193, "step": 37850 }, { "epoch": 1.68, "learning_rate": 2.2227591026003182e-05, "loss": 0.1565, "step": 37860 }, { "epoch": 1.68, "learning_rate": 2.2220157297691085e-05, "loss": 0.1461, "step": 37870 }, { "epoch": 1.68, "learning_rate": 2.2212723569378987e-05, "loss": 0.17, "step": 37880 }, { "epoch": 1.68, "learning_rate": 2.220528984106689e-05, "loss": 0.1158, "step": 37890 }, { "epoch": 1.68, "learning_rate": 2.2197856112754792e-05, "loss": 0.1554, "step": 37900 }, { "epoch": 1.68, "learning_rate": 2.2190422384442695e-05, "loss": 0.1502, "step": 37910 }, { "epoch": 1.68, "learning_rate": 2.2182988656130598e-05, "loss": 0.1373, "step": 37920 }, { "epoch": 1.68, "learning_rate": 2.2175554927818497e-05, "loss": 0.1374, "step": 37930 }, { "epoch": 1.68, "learning_rate": 2.21681211995064e-05, "loss": 0.1506, "step": 37940 }, { "epoch": 1.68, "learning_rate": 2.2160687471194305e-05, "loss": 0.1472, "step": 37950 }, { "epoch": 1.68, "learning_rate": 2.2153253742882208e-05, "loss": 0.1321, "step": 37960 }, { "epoch": 1.68, "learning_rate": 2.214582001457011e-05, "loss": 0.1379, "step": 37970 }, { "epoch": 1.68, "learning_rate": 2.2138386286258013e-05, "loss": 0.1504, "step": 37980 }, { "epoch": 1.68, "learning_rate": 2.2130952557945912e-05, "loss": 0.1524, "step": 37990 }, { "epoch": 1.68, "learning_rate": 2.2123518829633815e-05, "loss": 0.1703, "step": 38000 }, { "epoch": 1.68, "learning_rate": 2.2116085101321717e-05, "loss": 0.122, "step": 38010 }, { "epoch": 1.68, "learning_rate": 2.210865137300962e-05, "loss": 0.1715, "step": 38020 }, { "epoch": 1.68, "learning_rate": 2.2101217644697523e-05, "loss": 0.1587, "step": 38030 }, { "epoch": 1.68, "learning_rate": 2.2093783916385425e-05, "loss": 0.1053, "step": 38040 }, { "epoch": 1.68, "learning_rate": 2.2086350188073328e-05, "loss": 0.128, "step": 38050 }, { "epoch": 1.69, "learning_rate": 2.2078916459761227e-05, "loss": 0.1304, "step": 38060 }, { "epoch": 1.69, "learning_rate": 2.207148273144913e-05, "loss": 0.1327, "step": 38070 }, { "epoch": 1.69, "learning_rate": 2.2064049003137036e-05, "loss": 0.11, "step": 38080 }, { "epoch": 1.69, "learning_rate": 2.2056615274824938e-05, "loss": 0.1445, "step": 38090 }, { "epoch": 1.69, "learning_rate": 2.204918154651284e-05, "loss": 0.1451, "step": 38100 }, { "epoch": 1.69, "learning_rate": 2.2041747818200743e-05, "loss": 0.1912, "step": 38110 }, { "epoch": 1.69, "learning_rate": 2.2034314089888642e-05, "loss": 0.1768, "step": 38120 }, { "epoch": 1.69, "learning_rate": 2.2026880361576545e-05, "loss": 0.1497, "step": 38130 }, { "epoch": 1.69, "learning_rate": 2.2019446633264448e-05, "loss": 0.1722, "step": 38140 }, { "epoch": 1.69, "learning_rate": 2.201201290495235e-05, "loss": 0.1889, "step": 38150 }, { "epoch": 1.69, "learning_rate": 2.2004579176640253e-05, "loss": 0.1294, "step": 38160 }, { "epoch": 1.69, "learning_rate": 2.1997145448328155e-05, "loss": 0.1582, "step": 38170 }, { "epoch": 1.69, "learning_rate": 2.1989711720016058e-05, "loss": 0.1519, "step": 38180 }, { "epoch": 1.69, "learning_rate": 2.198227799170396e-05, "loss": 0.1564, "step": 38190 }, { "epoch": 1.69, "learning_rate": 2.1974844263391863e-05, "loss": 0.1945, "step": 38200 }, { "epoch": 1.69, "learning_rate": 2.1967410535079766e-05, "loss": 0.1481, "step": 38210 }, { "epoch": 1.69, "learning_rate": 2.1959976806767668e-05, "loss": 0.1436, "step": 38220 }, { "epoch": 1.69, "learning_rate": 2.195254307845557e-05, "loss": 0.1586, "step": 38230 }, { "epoch": 1.69, "learning_rate": 2.1945109350143473e-05, "loss": 0.1222, "step": 38240 }, { "epoch": 1.69, "learning_rate": 2.1937675621831376e-05, "loss": 0.1483, "step": 38250 }, { "epoch": 1.69, "learning_rate": 2.1930241893519275e-05, "loss": 0.1879, "step": 38260 }, { "epoch": 1.69, "learning_rate": 2.1922808165207178e-05, "loss": 0.1811, "step": 38270 }, { "epoch": 1.69, "learning_rate": 2.191537443689508e-05, "loss": 0.1553, "step": 38280 }, { "epoch": 1.7, "learning_rate": 2.1907940708582983e-05, "loss": 0.1539, "step": 38290 }, { "epoch": 1.7, "learning_rate": 2.1900506980270885e-05, "loss": 0.1965, "step": 38300 }, { "epoch": 1.7, "learning_rate": 2.1893073251958788e-05, "loss": 0.1506, "step": 38310 }, { "epoch": 1.7, "learning_rate": 2.188563952364669e-05, "loss": 0.1402, "step": 38320 }, { "epoch": 1.7, "learning_rate": 2.1878205795334593e-05, "loss": 0.1208, "step": 38330 }, { "epoch": 1.7, "learning_rate": 2.1870772067022496e-05, "loss": 0.0977, "step": 38340 }, { "epoch": 1.7, "learning_rate": 2.18633383387104e-05, "loss": 0.1206, "step": 38350 }, { "epoch": 1.7, "learning_rate": 2.18559046103983e-05, "loss": 0.0768, "step": 38360 }, { "epoch": 1.7, "learning_rate": 2.1848470882086204e-05, "loss": 0.1074, "step": 38370 }, { "epoch": 1.7, "learning_rate": 2.1841037153774106e-05, "loss": 0.1491, "step": 38380 }, { "epoch": 1.7, "learning_rate": 2.1833603425462005e-05, "loss": 0.1765, "step": 38390 }, { "epoch": 1.7, "learning_rate": 2.1826169697149908e-05, "loss": 0.1332, "step": 38400 }, { "epoch": 1.7, "learning_rate": 2.181873596883781e-05, "loss": 0.1382, "step": 38410 }, { "epoch": 1.7, "learning_rate": 2.1811302240525713e-05, "loss": 0.164, "step": 38420 }, { "epoch": 1.7, "learning_rate": 2.1803868512213616e-05, "loss": 0.1565, "step": 38430 }, { "epoch": 1.7, "learning_rate": 2.179643478390152e-05, "loss": 0.1154, "step": 38440 }, { "epoch": 1.7, "learning_rate": 2.1789001055589424e-05, "loss": 0.1681, "step": 38450 }, { "epoch": 1.7, "learning_rate": 2.1781567327277323e-05, "loss": 0.1177, "step": 38460 }, { "epoch": 1.7, "learning_rate": 2.1774133598965226e-05, "loss": 0.1562, "step": 38470 }, { "epoch": 1.7, "learning_rate": 2.176669987065313e-05, "loss": 0.1756, "step": 38480 }, { "epoch": 1.7, "learning_rate": 2.175926614234103e-05, "loss": 0.1507, "step": 38490 }, { "epoch": 1.7, "learning_rate": 2.1751832414028934e-05, "loss": 0.1297, "step": 38500 }, { "epoch": 1.7, "learning_rate": 2.1744398685716836e-05, "loss": 0.1504, "step": 38510 }, { "epoch": 1.71, "learning_rate": 2.173696495740474e-05, "loss": 0.1793, "step": 38520 }, { "epoch": 1.71, "learning_rate": 2.1729531229092638e-05, "loss": 0.1168, "step": 38530 }, { "epoch": 1.71, "learning_rate": 2.172209750078054e-05, "loss": 0.1638, "step": 38540 }, { "epoch": 1.71, "learning_rate": 2.1714663772468443e-05, "loss": 0.1336, "step": 38550 }, { "epoch": 1.71, "learning_rate": 2.1707230044156346e-05, "loss": 0.1605, "step": 38560 }, { "epoch": 1.71, "learning_rate": 2.1699796315844252e-05, "loss": 0.1709, "step": 38570 }, { "epoch": 1.71, "learning_rate": 2.1692362587532154e-05, "loss": 0.1156, "step": 38580 }, { "epoch": 1.71, "learning_rate": 2.1684928859220054e-05, "loss": 0.1392, "step": 38590 }, { "epoch": 1.71, "learning_rate": 2.1677495130907956e-05, "loss": 0.1381, "step": 38600 }, { "epoch": 1.71, "learning_rate": 2.167006140259586e-05, "loss": 0.1972, "step": 38610 }, { "epoch": 1.71, "learning_rate": 2.166262767428376e-05, "loss": 0.1139, "step": 38620 }, { "epoch": 1.71, "learning_rate": 2.1655193945971664e-05, "loss": 0.1406, "step": 38630 }, { "epoch": 1.71, "learning_rate": 2.1647760217659566e-05, "loss": 0.0876, "step": 38640 }, { "epoch": 1.71, "learning_rate": 2.164032648934747e-05, "loss": 0.1361, "step": 38650 }, { "epoch": 1.71, "learning_rate": 2.1632892761035368e-05, "loss": 0.1362, "step": 38660 }, { "epoch": 1.71, "learning_rate": 2.162545903272327e-05, "loss": 0.0901, "step": 38670 }, { "epoch": 1.71, "learning_rate": 2.1618025304411173e-05, "loss": 0.2154, "step": 38680 }, { "epoch": 1.71, "learning_rate": 2.161059157609908e-05, "loss": 0.1128, "step": 38690 }, { "epoch": 1.71, "learning_rate": 2.1603157847786982e-05, "loss": 0.1681, "step": 38700 }, { "epoch": 1.71, "learning_rate": 2.1595724119474884e-05, "loss": 0.1483, "step": 38710 }, { "epoch": 1.71, "learning_rate": 2.1588290391162787e-05, "loss": 0.1518, "step": 38720 }, { "epoch": 1.71, "learning_rate": 2.1580856662850686e-05, "loss": 0.1751, "step": 38730 }, { "epoch": 1.72, "learning_rate": 2.157342293453859e-05, "loss": 0.1421, "step": 38740 }, { "epoch": 1.72, "learning_rate": 2.156598920622649e-05, "loss": 0.2136, "step": 38750 }, { "epoch": 1.72, "learning_rate": 2.1558555477914394e-05, "loss": 0.1073, "step": 38760 }, { "epoch": 1.72, "learning_rate": 2.1551121749602297e-05, "loss": 0.1545, "step": 38770 }, { "epoch": 1.72, "learning_rate": 2.15436880212902e-05, "loss": 0.1072, "step": 38780 }, { "epoch": 1.72, "learning_rate": 2.1536254292978102e-05, "loss": 0.1504, "step": 38790 }, { "epoch": 1.72, "learning_rate": 2.1528820564666e-05, "loss": 0.1615, "step": 38800 }, { "epoch": 1.72, "learning_rate": 2.1521386836353907e-05, "loss": 0.1411, "step": 38810 }, { "epoch": 1.72, "learning_rate": 2.151395310804181e-05, "loss": 0.1535, "step": 38820 }, { "epoch": 1.72, "learning_rate": 2.1506519379729712e-05, "loss": 0.1187, "step": 38830 }, { "epoch": 1.72, "learning_rate": 2.1499085651417615e-05, "loss": 0.117, "step": 38840 }, { "epoch": 1.72, "learning_rate": 2.1491651923105517e-05, "loss": 0.1308, "step": 38850 }, { "epoch": 1.72, "learning_rate": 2.1484218194793416e-05, "loss": 0.1535, "step": 38860 }, { "epoch": 1.72, "learning_rate": 2.147678446648132e-05, "loss": 0.1399, "step": 38870 }, { "epoch": 1.72, "learning_rate": 2.146935073816922e-05, "loss": 0.1549, "step": 38880 }, { "epoch": 1.72, "learning_rate": 2.1461917009857124e-05, "loss": 0.1746, "step": 38890 }, { "epoch": 1.72, "learning_rate": 2.1454483281545027e-05, "loss": 0.1138, "step": 38900 }, { "epoch": 1.72, "learning_rate": 2.144704955323293e-05, "loss": 0.17, "step": 38910 }, { "epoch": 1.72, "learning_rate": 2.1439615824920832e-05, "loss": 0.1213, "step": 38920 }, { "epoch": 1.72, "learning_rate": 2.1432182096608734e-05, "loss": 0.1178, "step": 38930 }, { "epoch": 1.72, "learning_rate": 2.1424748368296637e-05, "loss": 0.1037, "step": 38940 }, { "epoch": 1.72, "learning_rate": 2.141731463998454e-05, "loss": 0.1546, "step": 38950 }, { "epoch": 1.72, "learning_rate": 2.1409880911672442e-05, "loss": 0.158, "step": 38960 }, { "epoch": 1.73, "learning_rate": 2.1402447183360345e-05, "loss": 0.1642, "step": 38970 }, { "epoch": 1.73, "learning_rate": 2.1395013455048247e-05, "loss": 0.2102, "step": 38980 }, { "epoch": 1.73, "learning_rate": 2.138757972673615e-05, "loss": 0.1607, "step": 38990 }, { "epoch": 1.73, "learning_rate": 2.138014599842405e-05, "loss": 0.1083, "step": 39000 }, { "epoch": 1.73, "learning_rate": 2.1372712270111952e-05, "loss": 0.1547, "step": 39010 }, { "epoch": 1.73, "learning_rate": 2.1365278541799854e-05, "loss": 0.094, "step": 39020 }, { "epoch": 1.73, "learning_rate": 2.1357844813487757e-05, "loss": 0.1685, "step": 39030 }, { "epoch": 1.73, "learning_rate": 2.135041108517566e-05, "loss": 0.1166, "step": 39040 }, { "epoch": 1.73, "learning_rate": 2.1342977356863562e-05, "loss": 0.1837, "step": 39050 }, { "epoch": 1.73, "learning_rate": 2.1335543628551465e-05, "loss": 0.156, "step": 39060 }, { "epoch": 1.73, "learning_rate": 2.1328109900239367e-05, "loss": 0.2299, "step": 39070 }, { "epoch": 1.73, "learning_rate": 2.132067617192727e-05, "loss": 0.1535, "step": 39080 }, { "epoch": 1.73, "learning_rate": 2.1313242443615172e-05, "loss": 0.1139, "step": 39090 }, { "epoch": 1.73, "learning_rate": 2.1305808715303075e-05, "loss": 0.1326, "step": 39100 }, { "epoch": 1.73, "learning_rate": 2.1298374986990977e-05, "loss": 0.1354, "step": 39110 }, { "epoch": 1.73, "learning_rate": 2.129094125867888e-05, "loss": 0.1687, "step": 39120 }, { "epoch": 1.73, "learning_rate": 2.128350753036678e-05, "loss": 0.1154, "step": 39130 }, { "epoch": 1.73, "learning_rate": 2.1276073802054682e-05, "loss": 0.1344, "step": 39140 }, { "epoch": 1.73, "learning_rate": 2.1268640073742584e-05, "loss": 0.1325, "step": 39150 }, { "epoch": 1.73, "learning_rate": 2.1261206345430487e-05, "loss": 0.1767, "step": 39160 }, { "epoch": 1.73, "learning_rate": 2.125377261711839e-05, "loss": 0.129, "step": 39170 }, { "epoch": 1.73, "learning_rate": 2.1246338888806296e-05, "loss": 0.2197, "step": 39180 }, { "epoch": 1.74, "learning_rate": 2.1238905160494195e-05, "loss": 0.1092, "step": 39190 }, { "epoch": 1.74, "learning_rate": 2.1231471432182097e-05, "loss": 0.1698, "step": 39200 }, { "epoch": 1.74, "learning_rate": 2.122403770387e-05, "loss": 0.1557, "step": 39210 }, { "epoch": 1.74, "learning_rate": 2.1216603975557902e-05, "loss": 0.1386, "step": 39220 }, { "epoch": 1.74, "learning_rate": 2.1209170247245805e-05, "loss": 0.1581, "step": 39230 }, { "epoch": 1.74, "learning_rate": 2.1201736518933708e-05, "loss": 0.1088, "step": 39240 }, { "epoch": 1.74, "learning_rate": 2.119430279062161e-05, "loss": 0.1209, "step": 39250 }, { "epoch": 1.74, "learning_rate": 2.1186869062309513e-05, "loss": 0.139, "step": 39260 }, { "epoch": 1.74, "learning_rate": 2.1179435333997412e-05, "loss": 0.1391, "step": 39270 }, { "epoch": 1.74, "learning_rate": 2.1172001605685315e-05, "loss": 0.1924, "step": 39280 }, { "epoch": 1.74, "learning_rate": 2.1164567877373217e-05, "loss": 0.1196, "step": 39290 }, { "epoch": 1.74, "learning_rate": 2.1157134149061123e-05, "loss": 0.1237, "step": 39300 }, { "epoch": 1.74, "learning_rate": 2.1149700420749026e-05, "loss": 0.1459, "step": 39310 }, { "epoch": 1.74, "learning_rate": 2.1142266692436928e-05, "loss": 0.0923, "step": 39320 }, { "epoch": 1.74, "learning_rate": 2.1134832964124827e-05, "loss": 0.1047, "step": 39330 }, { "epoch": 1.74, "learning_rate": 2.112739923581273e-05, "loss": 0.1074, "step": 39340 }, { "epoch": 1.74, "learning_rate": 2.1119965507500633e-05, "loss": 0.1017, "step": 39350 }, { "epoch": 1.74, "learning_rate": 2.1112531779188535e-05, "loss": 0.1142, "step": 39360 }, { "epoch": 1.74, "learning_rate": 2.1105098050876438e-05, "loss": 0.134, "step": 39370 }, { "epoch": 1.74, "learning_rate": 2.109766432256434e-05, "loss": 0.1603, "step": 39380 }, { "epoch": 1.74, "learning_rate": 2.1090230594252243e-05, "loss": 0.1852, "step": 39390 }, { "epoch": 1.74, "learning_rate": 2.1082796865940142e-05, "loss": 0.1572, "step": 39400 }, { "epoch": 1.74, "learning_rate": 2.1075363137628045e-05, "loss": 0.1107, "step": 39410 }, { "epoch": 1.75, "learning_rate": 2.106792940931595e-05, "loss": 0.1275, "step": 39420 }, { "epoch": 1.75, "learning_rate": 2.1060495681003853e-05, "loss": 0.1345, "step": 39430 }, { "epoch": 1.75, "learning_rate": 2.1053061952691756e-05, "loss": 0.1514, "step": 39440 }, { "epoch": 1.75, "learning_rate": 2.104562822437966e-05, "loss": 0.1601, "step": 39450 }, { "epoch": 1.75, "learning_rate": 2.1038194496067558e-05, "loss": 0.2222, "step": 39460 }, { "epoch": 1.75, "learning_rate": 2.103076076775546e-05, "loss": 0.1161, "step": 39470 }, { "epoch": 1.75, "learning_rate": 2.1023327039443363e-05, "loss": 0.149, "step": 39480 }, { "epoch": 1.75, "learning_rate": 2.1015893311131265e-05, "loss": 0.1487, "step": 39490 }, { "epoch": 1.75, "learning_rate": 2.1008459582819168e-05, "loss": 0.1446, "step": 39500 }, { "epoch": 1.75, "learning_rate": 2.100102585450707e-05, "loss": 0.137, "step": 39510 }, { "epoch": 1.75, "learning_rate": 2.0993592126194973e-05, "loss": 0.143, "step": 39520 }, { "epoch": 1.75, "learning_rate": 2.0986158397882876e-05, "loss": 0.1234, "step": 39530 }, { "epoch": 1.75, "learning_rate": 2.0978724669570775e-05, "loss": 0.112, "step": 39540 }, { "epoch": 1.75, "learning_rate": 2.097129094125868e-05, "loss": 0.1129, "step": 39550 }, { "epoch": 1.75, "learning_rate": 2.0963857212946583e-05, "loss": 0.1297, "step": 39560 }, { "epoch": 1.75, "learning_rate": 2.0956423484634486e-05, "loss": 0.1621, "step": 39570 }, { "epoch": 1.75, "learning_rate": 2.094898975632239e-05, "loss": 0.149, "step": 39580 }, { "epoch": 1.75, "learning_rate": 2.094155602801029e-05, "loss": 0.1255, "step": 39590 }, { "epoch": 1.75, "learning_rate": 2.093412229969819e-05, "loss": 0.1665, "step": 39600 }, { "epoch": 1.75, "learning_rate": 2.0926688571386093e-05, "loss": 0.1193, "step": 39610 }, { "epoch": 1.75, "learning_rate": 2.0919254843073995e-05, "loss": 0.1356, "step": 39620 }, { "epoch": 1.75, "learning_rate": 2.0911821114761898e-05, "loss": 0.1532, "step": 39630 }, { "epoch": 1.75, "learning_rate": 2.09043873864498e-05, "loss": 0.1583, "step": 39640 }, { "epoch": 1.76, "learning_rate": 2.0896953658137703e-05, "loss": 0.1153, "step": 39650 }, { "epoch": 1.76, "learning_rate": 2.0889519929825606e-05, "loss": 0.151, "step": 39660 }, { "epoch": 1.76, "learning_rate": 2.088208620151351e-05, "loss": 0.2209, "step": 39670 }, { "epoch": 1.76, "learning_rate": 2.087465247320141e-05, "loss": 0.1453, "step": 39680 }, { "epoch": 1.76, "learning_rate": 2.0867218744889314e-05, "loss": 0.1011, "step": 39690 }, { "epoch": 1.76, "learning_rate": 2.0859785016577216e-05, "loss": 0.1632, "step": 39700 }, { "epoch": 1.76, "learning_rate": 2.085235128826512e-05, "loss": 0.1383, "step": 39710 }, { "epoch": 1.76, "learning_rate": 2.084491755995302e-05, "loss": 0.095, "step": 39720 }, { "epoch": 1.76, "learning_rate": 2.083748383164092e-05, "loss": 0.1388, "step": 39730 }, { "epoch": 1.76, "learning_rate": 2.0830050103328823e-05, "loss": 0.1328, "step": 39740 }, { "epoch": 1.76, "learning_rate": 2.0822616375016726e-05, "loss": 0.1623, "step": 39750 }, { "epoch": 1.76, "learning_rate": 2.0815182646704628e-05, "loss": 0.122, "step": 39760 }, { "epoch": 1.76, "learning_rate": 2.080774891839253e-05, "loss": 0.1353, "step": 39770 }, { "epoch": 1.76, "learning_rate": 2.0800315190080433e-05, "loss": 0.1074, "step": 39780 }, { "epoch": 1.76, "learning_rate": 2.0792881461768336e-05, "loss": 0.239, "step": 39790 }, { "epoch": 1.76, "learning_rate": 2.078544773345624e-05, "loss": 0.1291, "step": 39800 }, { "epoch": 1.76, "learning_rate": 2.077801400514414e-05, "loss": 0.1527, "step": 39810 }, { "epoch": 1.76, "learning_rate": 2.0770580276832044e-05, "loss": 0.1559, "step": 39820 }, { "epoch": 1.76, "learning_rate": 2.0763146548519946e-05, "loss": 0.1396, "step": 39830 }, { "epoch": 1.76, "learning_rate": 2.075571282020785e-05, "loss": 0.144, "step": 39840 }, { "epoch": 1.76, "learning_rate": 2.074827909189575e-05, "loss": 0.1975, "step": 39850 }, { "epoch": 1.76, "learning_rate": 2.0740845363583654e-05, "loss": 0.1778, "step": 39860 }, { "epoch": 1.77, "learning_rate": 2.0733411635271553e-05, "loss": 0.1291, "step": 39870 }, { "epoch": 1.77, "learning_rate": 2.0725977906959456e-05, "loss": 0.116, "step": 39880 }, { "epoch": 1.77, "learning_rate": 2.071854417864736e-05, "loss": 0.1599, "step": 39890 }, { "epoch": 1.77, "learning_rate": 2.071111045033526e-05, "loss": 0.1859, "step": 39900 }, { "epoch": 1.77, "learning_rate": 2.0703676722023164e-05, "loss": 0.1165, "step": 39910 }, { "epoch": 1.77, "learning_rate": 2.069624299371107e-05, "loss": 0.1533, "step": 39920 }, { "epoch": 1.77, "learning_rate": 2.068880926539897e-05, "loss": 0.1517, "step": 39930 }, { "epoch": 1.77, "learning_rate": 2.068137553708687e-05, "loss": 0.1573, "step": 39940 }, { "epoch": 1.77, "learning_rate": 2.0673941808774774e-05, "loss": 0.1337, "step": 39950 }, { "epoch": 1.77, "learning_rate": 2.0666508080462676e-05, "loss": 0.227, "step": 39960 }, { "epoch": 1.77, "learning_rate": 2.065907435215058e-05, "loss": 0.1098, "step": 39970 }, { "epoch": 1.77, "learning_rate": 2.065164062383848e-05, "loss": 0.1182, "step": 39980 }, { "epoch": 1.77, "learning_rate": 2.0644206895526384e-05, "loss": 0.1267, "step": 39990 }, { "epoch": 1.77, "learning_rate": 2.0636773167214283e-05, "loss": 0.1447, "step": 40000 }, { "epoch": 1.77, "learning_rate": 2.0629339438902186e-05, "loss": 0.1662, "step": 40010 }, { "epoch": 1.77, "learning_rate": 2.062190571059009e-05, "loss": 0.1421, "step": 40020 }, { "epoch": 1.77, "learning_rate": 2.061447198227799e-05, "loss": 0.1476, "step": 40030 }, { "epoch": 1.77, "learning_rate": 2.0607038253965897e-05, "loss": 0.181, "step": 40040 }, { "epoch": 1.77, "learning_rate": 2.05996045256538e-05, "loss": 0.1519, "step": 40050 }, { "epoch": 1.77, "learning_rate": 2.05921707973417e-05, "loss": 0.1335, "step": 40060 }, { "epoch": 1.77, "learning_rate": 2.05847370690296e-05, "loss": 0.1612, "step": 40070 }, { "epoch": 1.77, "learning_rate": 2.0577303340717504e-05, "loss": 0.1466, "step": 40080 }, { "epoch": 1.77, "learning_rate": 2.0569869612405407e-05, "loss": 0.114, "step": 40090 }, { "epoch": 1.78, "learning_rate": 2.056243588409331e-05, "loss": 0.1447, "step": 40100 }, { "epoch": 1.78, "learning_rate": 2.0555002155781212e-05, "loss": 0.1929, "step": 40110 }, { "epoch": 1.78, "learning_rate": 2.0547568427469114e-05, "loss": 0.0976, "step": 40120 }, { "epoch": 1.78, "learning_rate": 2.0540134699157017e-05, "loss": 0.1583, "step": 40130 }, { "epoch": 1.78, "learning_rate": 2.0532700970844916e-05, "loss": 0.1264, "step": 40140 }, { "epoch": 1.78, "learning_rate": 2.052526724253282e-05, "loss": 0.1422, "step": 40150 }, { "epoch": 1.78, "learning_rate": 2.0517833514220725e-05, "loss": 0.1475, "step": 40160 }, { "epoch": 1.78, "learning_rate": 2.0510399785908627e-05, "loss": 0.2099, "step": 40170 }, { "epoch": 1.78, "learning_rate": 2.050296605759653e-05, "loss": 0.1323, "step": 40180 }, { "epoch": 1.78, "learning_rate": 2.0495532329284432e-05, "loss": 0.1083, "step": 40190 }, { "epoch": 1.78, "learning_rate": 2.048809860097233e-05, "loss": 0.1086, "step": 40200 }, { "epoch": 1.78, "learning_rate": 2.0480664872660234e-05, "loss": 0.1756, "step": 40210 }, { "epoch": 1.78, "learning_rate": 2.0473231144348137e-05, "loss": 0.1584, "step": 40220 }, { "epoch": 1.78, "learning_rate": 2.046579741603604e-05, "loss": 0.0976, "step": 40230 }, { "epoch": 1.78, "learning_rate": 2.0458363687723942e-05, "loss": 0.1319, "step": 40240 }, { "epoch": 1.78, "learning_rate": 2.0450929959411844e-05, "loss": 0.1174, "step": 40250 }, { "epoch": 1.78, "learning_rate": 2.0443496231099747e-05, "loss": 0.1839, "step": 40260 }, { "epoch": 1.78, "learning_rate": 2.0436062502787646e-05, "loss": 0.1578, "step": 40270 }, { "epoch": 1.78, "learning_rate": 2.0428628774475552e-05, "loss": 0.1312, "step": 40280 }, { "epoch": 1.78, "learning_rate": 2.0421195046163455e-05, "loss": 0.1684, "step": 40290 }, { "epoch": 1.78, "learning_rate": 2.0413761317851357e-05, "loss": 0.1094, "step": 40300 }, { "epoch": 1.78, "learning_rate": 2.040632758953926e-05, "loss": 0.1252, "step": 40310 }, { "epoch": 1.79, "learning_rate": 2.0398893861227163e-05, "loss": 0.165, "step": 40320 }, { "epoch": 1.79, "learning_rate": 2.0391460132915065e-05, "loss": 0.0912, "step": 40330 }, { "epoch": 1.79, "learning_rate": 2.0384026404602964e-05, "loss": 0.1105, "step": 40340 }, { "epoch": 1.79, "learning_rate": 2.0376592676290867e-05, "loss": 0.1435, "step": 40350 }, { "epoch": 1.79, "learning_rate": 2.036915894797877e-05, "loss": 0.239, "step": 40360 }, { "epoch": 1.79, "learning_rate": 2.0361725219666672e-05, "loss": 0.1576, "step": 40370 }, { "epoch": 1.79, "learning_rate": 2.0354291491354575e-05, "loss": 0.15, "step": 40380 }, { "epoch": 1.79, "learning_rate": 2.0346857763042477e-05, "loss": 0.1513, "step": 40390 }, { "epoch": 1.79, "learning_rate": 2.033942403473038e-05, "loss": 0.1466, "step": 40400 }, { "epoch": 1.79, "learning_rate": 2.0331990306418282e-05, "loss": 0.0999, "step": 40410 }, { "epoch": 1.79, "learning_rate": 2.0324556578106185e-05, "loss": 0.1326, "step": 40420 }, { "epoch": 1.79, "learning_rate": 2.0317122849794087e-05, "loss": 0.1293, "step": 40430 }, { "epoch": 1.79, "learning_rate": 2.030968912148199e-05, "loss": 0.1298, "step": 40440 }, { "epoch": 1.79, "learning_rate": 2.0302255393169893e-05, "loss": 0.1571, "step": 40450 }, { "epoch": 1.79, "learning_rate": 2.0294821664857795e-05, "loss": 0.1163, "step": 40460 }, { "epoch": 1.79, "learning_rate": 2.0287387936545694e-05, "loss": 0.1597, "step": 40470 }, { "epoch": 1.79, "learning_rate": 2.0279954208233597e-05, "loss": 0.106, "step": 40480 }, { "epoch": 1.79, "learning_rate": 2.02725204799215e-05, "loss": 0.1158, "step": 40490 }, { "epoch": 1.79, "learning_rate": 2.0265086751609402e-05, "loss": 0.1313, "step": 40500 }, { "epoch": 1.79, "learning_rate": 2.0257653023297305e-05, "loss": 0.0764, "step": 40510 }, { "epoch": 1.79, "learning_rate": 2.0250219294985207e-05, "loss": 0.1253, "step": 40520 }, { "epoch": 1.79, "learning_rate": 2.024278556667311e-05, "loss": 0.1313, "step": 40530 }, { "epoch": 1.79, "learning_rate": 2.0235351838361012e-05, "loss": 0.1436, "step": 40540 }, { "epoch": 1.8, "learning_rate": 2.0227918110048915e-05, "loss": 0.1404, "step": 40550 }, { "epoch": 1.8, "learning_rate": 2.0220484381736818e-05, "loss": 0.1868, "step": 40560 }, { "epoch": 1.8, "learning_rate": 2.021305065342472e-05, "loss": 0.1455, "step": 40570 }, { "epoch": 1.8, "learning_rate": 2.0205616925112623e-05, "loss": 0.1677, "step": 40580 }, { "epoch": 1.8, "learning_rate": 2.0198183196800525e-05, "loss": 0.1349, "step": 40590 }, { "epoch": 1.8, "learning_rate": 2.0190749468488428e-05, "loss": 0.1333, "step": 40600 }, { "epoch": 1.8, "learning_rate": 2.0183315740176327e-05, "loss": 0.1011, "step": 40610 }, { "epoch": 1.8, "learning_rate": 2.017588201186423e-05, "loss": 0.1384, "step": 40620 }, { "epoch": 1.8, "learning_rate": 2.0168448283552132e-05, "loss": 0.1413, "step": 40630 }, { "epoch": 1.8, "learning_rate": 2.0161014555240035e-05, "loss": 0.1677, "step": 40640 }, { "epoch": 1.8, "learning_rate": 2.015358082692794e-05, "loss": 0.1555, "step": 40650 }, { "epoch": 1.8, "learning_rate": 2.0146147098615843e-05, "loss": 0.1846, "step": 40660 }, { "epoch": 1.8, "learning_rate": 2.0138713370303743e-05, "loss": 0.1533, "step": 40670 }, { "epoch": 1.8, "learning_rate": 2.0131279641991645e-05, "loss": 0.2295, "step": 40680 }, { "epoch": 1.8, "learning_rate": 2.0123845913679548e-05, "loss": 0.124, "step": 40690 }, { "epoch": 1.8, "learning_rate": 2.011641218536745e-05, "loss": 0.187, "step": 40700 }, { "epoch": 1.8, "learning_rate": 2.0108978457055353e-05, "loss": 0.172, "step": 40710 }, { "epoch": 1.8, "learning_rate": 2.0101544728743256e-05, "loss": 0.1015, "step": 40720 }, { "epoch": 1.8, "learning_rate": 2.0094111000431158e-05, "loss": 0.1583, "step": 40730 }, { "epoch": 1.8, "learning_rate": 2.0086677272119057e-05, "loss": 0.2003, "step": 40740 }, { "epoch": 1.8, "learning_rate": 2.007924354380696e-05, "loss": 0.166, "step": 40750 }, { "epoch": 1.8, "learning_rate": 2.0071809815494862e-05, "loss": 0.1565, "step": 40760 }, { "epoch": 1.81, "learning_rate": 2.006437608718277e-05, "loss": 0.1749, "step": 40770 }, { "epoch": 1.81, "learning_rate": 2.005694235887067e-05, "loss": 0.1489, "step": 40780 }, { "epoch": 1.81, "learning_rate": 2.0049508630558574e-05, "loss": 0.0947, "step": 40790 }, { "epoch": 1.81, "learning_rate": 2.0042074902246473e-05, "loss": 0.1467, "step": 40800 }, { "epoch": 1.81, "learning_rate": 2.0034641173934375e-05, "loss": 0.156, "step": 40810 }, { "epoch": 1.81, "learning_rate": 2.0027207445622278e-05, "loss": 0.1416, "step": 40820 }, { "epoch": 1.81, "learning_rate": 2.001977371731018e-05, "loss": 0.1276, "step": 40830 }, { "epoch": 1.81, "learning_rate": 2.0012339988998083e-05, "loss": 0.1611, "step": 40840 }, { "epoch": 1.81, "learning_rate": 2.0004906260685986e-05, "loss": 0.1434, "step": 40850 }, { "epoch": 1.81, "learning_rate": 1.9997472532373888e-05, "loss": 0.0909, "step": 40860 }, { "epoch": 1.81, "learning_rate": 1.999003880406179e-05, "loss": 0.1452, "step": 40870 }, { "epoch": 1.81, "learning_rate": 1.998260507574969e-05, "loss": 0.0892, "step": 40880 }, { "epoch": 1.81, "learning_rate": 1.9975171347437593e-05, "loss": 0.1653, "step": 40890 }, { "epoch": 1.81, "learning_rate": 1.99677376191255e-05, "loss": 0.0776, "step": 40900 }, { "epoch": 1.81, "learning_rate": 1.99603038908134e-05, "loss": 0.1773, "step": 40910 }, { "epoch": 1.81, "learning_rate": 1.9952870162501304e-05, "loss": 0.1211, "step": 40920 }, { "epoch": 1.81, "learning_rate": 1.9945436434189206e-05, "loss": 0.1579, "step": 40930 }, { "epoch": 1.81, "learning_rate": 1.9938002705877105e-05, "loss": 0.0973, "step": 40940 }, { "epoch": 1.81, "learning_rate": 1.9930568977565008e-05, "loss": 0.1184, "step": 40950 }, { "epoch": 1.81, "learning_rate": 1.992313524925291e-05, "loss": 0.1914, "step": 40960 }, { "epoch": 1.81, "learning_rate": 1.9915701520940813e-05, "loss": 0.1134, "step": 40970 }, { "epoch": 1.81, "learning_rate": 1.9908267792628716e-05, "loss": 0.0962, "step": 40980 }, { "epoch": 1.81, "learning_rate": 1.990083406431662e-05, "loss": 0.1145, "step": 40990 }, { "epoch": 1.82, "learning_rate": 1.989340033600452e-05, "loss": 0.1235, "step": 41000 }, { "epoch": 1.82, "learning_rate": 1.988596660769242e-05, "loss": 0.1551, "step": 41010 }, { "epoch": 1.82, "learning_rate": 1.9878532879380326e-05, "loss": 0.0836, "step": 41020 }, { "epoch": 1.82, "learning_rate": 1.987109915106823e-05, "loss": 0.101, "step": 41030 }, { "epoch": 1.82, "learning_rate": 1.986366542275613e-05, "loss": 0.0867, "step": 41040 }, { "epoch": 1.82, "learning_rate": 1.9856231694444034e-05, "loss": 0.1777, "step": 41050 }, { "epoch": 1.82, "learning_rate": 1.9848797966131936e-05, "loss": 0.1829, "step": 41060 }, { "epoch": 1.82, "learning_rate": 1.9841364237819836e-05, "loss": 0.1815, "step": 41070 }, { "epoch": 1.82, "learning_rate": 1.9833930509507738e-05, "loss": 0.1267, "step": 41080 }, { "epoch": 1.82, "learning_rate": 1.982649678119564e-05, "loss": 0.134, "step": 41090 }, { "epoch": 1.82, "learning_rate": 1.9819063052883543e-05, "loss": 0.1357, "step": 41100 }, { "epoch": 1.82, "learning_rate": 1.9811629324571446e-05, "loss": 0.1018, "step": 41110 }, { "epoch": 1.82, "learning_rate": 1.980419559625935e-05, "loss": 0.0903, "step": 41120 }, { "epoch": 1.82, "learning_rate": 1.979676186794725e-05, "loss": 0.144, "step": 41130 }, { "epoch": 1.82, "learning_rate": 1.9789328139635154e-05, "loss": 0.1024, "step": 41140 }, { "epoch": 1.82, "learning_rate": 1.9781894411323056e-05, "loss": 0.1144, "step": 41150 }, { "epoch": 1.82, "learning_rate": 1.977446068301096e-05, "loss": 0.1397, "step": 41160 }, { "epoch": 1.82, "learning_rate": 1.976702695469886e-05, "loss": 0.1371, "step": 41170 }, { "epoch": 1.82, "learning_rate": 1.9759593226386764e-05, "loss": 0.1196, "step": 41180 }, { "epoch": 1.82, "learning_rate": 1.9752159498074667e-05, "loss": 0.1595, "step": 41190 }, { "epoch": 1.82, "learning_rate": 1.974472576976257e-05, "loss": 0.1317, "step": 41200 }, { "epoch": 1.82, "learning_rate": 1.973729204145047e-05, "loss": 0.1704, "step": 41210 }, { "epoch": 1.82, "learning_rate": 1.972985831313837e-05, "loss": 0.1577, "step": 41220 }, { "epoch": 1.83, "learning_rate": 1.9722424584826274e-05, "loss": 0.1747, "step": 41230 }, { "epoch": 1.83, "learning_rate": 1.9714990856514176e-05, "loss": 0.1681, "step": 41240 }, { "epoch": 1.83, "learning_rate": 1.970755712820208e-05, "loss": 0.1368, "step": 41250 }, { "epoch": 1.83, "learning_rate": 1.9700123399889985e-05, "loss": 0.118, "step": 41260 }, { "epoch": 1.83, "learning_rate": 1.9692689671577884e-05, "loss": 0.1288, "step": 41270 }, { "epoch": 1.83, "learning_rate": 1.9685255943265786e-05, "loss": 0.1717, "step": 41280 }, { "epoch": 1.83, "learning_rate": 1.967782221495369e-05, "loss": 0.0998, "step": 41290 }, { "epoch": 1.83, "learning_rate": 1.967038848664159e-05, "loss": 0.1304, "step": 41300 }, { "epoch": 1.83, "learning_rate": 1.9662954758329494e-05, "loss": 0.1401, "step": 41310 }, { "epoch": 1.83, "learning_rate": 1.9655521030017397e-05, "loss": 0.1613, "step": 41320 }, { "epoch": 1.83, "learning_rate": 1.96480873017053e-05, "loss": 0.1547, "step": 41330 }, { "epoch": 1.83, "learning_rate": 1.96406535733932e-05, "loss": 0.1541, "step": 41340 }, { "epoch": 1.83, "learning_rate": 1.96332198450811e-05, "loss": 0.1972, "step": 41350 }, { "epoch": 1.83, "learning_rate": 1.9625786116769004e-05, "loss": 0.1285, "step": 41360 }, { "epoch": 1.83, "learning_rate": 1.9618352388456906e-05, "loss": 0.1394, "step": 41370 }, { "epoch": 1.83, "learning_rate": 1.961091866014481e-05, "loss": 0.0893, "step": 41380 }, { "epoch": 1.83, "learning_rate": 1.9603484931832715e-05, "loss": 0.1566, "step": 41390 }, { "epoch": 1.83, "learning_rate": 1.9596051203520614e-05, "loss": 0.1606, "step": 41400 }, { "epoch": 1.83, "learning_rate": 1.9588617475208517e-05, "loss": 0.1507, "step": 41410 }, { "epoch": 1.83, "learning_rate": 1.958118374689642e-05, "loss": 0.1818, "step": 41420 }, { "epoch": 1.83, "learning_rate": 1.9573750018584322e-05, "loss": 0.2006, "step": 41430 }, { "epoch": 1.83, "learning_rate": 1.9566316290272224e-05, "loss": 0.2031, "step": 41440 }, { "epoch": 1.84, "learning_rate": 1.9558882561960127e-05, "loss": 0.1733, "step": 41450 }, { "epoch": 1.84, "learning_rate": 1.955144883364803e-05, "loss": 0.1178, "step": 41460 }, { "epoch": 1.84, "learning_rate": 1.9544015105335932e-05, "loss": 0.1058, "step": 41470 }, { "epoch": 1.84, "learning_rate": 1.953658137702383e-05, "loss": 0.1415, "step": 41480 }, { "epoch": 1.84, "learning_rate": 1.9529147648711734e-05, "loss": 0.1288, "step": 41490 }, { "epoch": 1.84, "learning_rate": 1.9521713920399636e-05, "loss": 0.1605, "step": 41500 }, { "epoch": 1.84, "learning_rate": 1.9514280192087542e-05, "loss": 0.1236, "step": 41510 }, { "epoch": 1.84, "learning_rate": 1.9506846463775445e-05, "loss": 0.1338, "step": 41520 }, { "epoch": 1.84, "learning_rate": 1.9499412735463348e-05, "loss": 0.192, "step": 41530 }, { "epoch": 1.84, "learning_rate": 1.9491979007151247e-05, "loss": 0.1717, "step": 41540 }, { "epoch": 1.84, "learning_rate": 1.948454527883915e-05, "loss": 0.1398, "step": 41550 }, { "epoch": 1.84, "learning_rate": 1.9477111550527052e-05, "loss": 0.1537, "step": 41560 }, { "epoch": 1.84, "learning_rate": 1.9469677822214954e-05, "loss": 0.1588, "step": 41570 }, { "epoch": 1.84, "learning_rate": 1.9462244093902857e-05, "loss": 0.1418, "step": 41580 }, { "epoch": 1.84, "learning_rate": 1.945481036559076e-05, "loss": 0.1368, "step": 41590 }, { "epoch": 1.84, "learning_rate": 1.9447376637278662e-05, "loss": 0.1272, "step": 41600 }, { "epoch": 1.84, "learning_rate": 1.943994290896656e-05, "loss": 0.1379, "step": 41610 }, { "epoch": 1.84, "learning_rate": 1.9432509180654464e-05, "loss": 0.1702, "step": 41620 }, { "epoch": 1.84, "learning_rate": 1.942507545234237e-05, "loss": 0.1286, "step": 41630 }, { "epoch": 1.84, "learning_rate": 1.9417641724030273e-05, "loss": 0.1929, "step": 41640 }, { "epoch": 1.84, "learning_rate": 1.9410207995718175e-05, "loss": 0.1345, "step": 41650 }, { "epoch": 1.84, "learning_rate": 1.9402774267406078e-05, "loss": 0.1369, "step": 41660 }, { "epoch": 1.84, "learning_rate": 1.9395340539093977e-05, "loss": 0.1619, "step": 41670 }, { "epoch": 1.85, "learning_rate": 1.938790681078188e-05, "loss": 0.156, "step": 41680 }, { "epoch": 1.85, "learning_rate": 1.9380473082469782e-05, "loss": 0.2092, "step": 41690 }, { "epoch": 1.85, "learning_rate": 1.9373039354157685e-05, "loss": 0.142, "step": 41700 }, { "epoch": 1.85, "learning_rate": 1.9365605625845587e-05, "loss": 0.1488, "step": 41710 }, { "epoch": 1.85, "learning_rate": 1.935817189753349e-05, "loss": 0.1593, "step": 41720 }, { "epoch": 1.85, "learning_rate": 1.9350738169221392e-05, "loss": 0.1026, "step": 41730 }, { "epoch": 1.85, "learning_rate": 1.9343304440909295e-05, "loss": 0.1095, "step": 41740 }, { "epoch": 1.85, "learning_rate": 1.9335870712597197e-05, "loss": 0.1281, "step": 41750 }, { "epoch": 1.85, "learning_rate": 1.93284369842851e-05, "loss": 0.1098, "step": 41760 }, { "epoch": 1.85, "learning_rate": 1.9321003255973003e-05, "loss": 0.1241, "step": 41770 }, { "epoch": 1.85, "learning_rate": 1.9313569527660905e-05, "loss": 0.2285, "step": 41780 }, { "epoch": 1.85, "learning_rate": 1.9306135799348808e-05, "loss": 0.1388, "step": 41790 }, { "epoch": 1.85, "learning_rate": 1.929870207103671e-05, "loss": 0.1776, "step": 41800 }, { "epoch": 1.85, "learning_rate": 1.929126834272461e-05, "loss": 0.1721, "step": 41810 }, { "epoch": 1.85, "learning_rate": 1.9283834614412512e-05, "loss": 0.163, "step": 41820 }, { "epoch": 1.85, "learning_rate": 1.9276400886100415e-05, "loss": 0.1452, "step": 41830 }, { "epoch": 1.85, "learning_rate": 1.9268967157788317e-05, "loss": 0.1496, "step": 41840 }, { "epoch": 1.85, "learning_rate": 1.926153342947622e-05, "loss": 0.1483, "step": 41850 }, { "epoch": 1.85, "learning_rate": 1.9254099701164122e-05, "loss": 0.1449, "step": 41860 }, { "epoch": 1.85, "learning_rate": 1.9246665972852025e-05, "loss": 0.207, "step": 41870 }, { "epoch": 1.85, "learning_rate": 1.9239232244539928e-05, "loss": 0.1199, "step": 41880 }, { "epoch": 1.85, "learning_rate": 1.923179851622783e-05, "loss": 0.0725, "step": 41890 }, { "epoch": 1.86, "learning_rate": 1.9224364787915733e-05, "loss": 0.1718, "step": 41900 }, { "epoch": 1.86, "learning_rate": 1.9216931059603635e-05, "loss": 0.1339, "step": 41910 }, { "epoch": 1.86, "learning_rate": 1.9209497331291538e-05, "loss": 0.0996, "step": 41920 }, { "epoch": 1.86, "learning_rate": 1.920206360297944e-05, "loss": 0.1852, "step": 41930 }, { "epoch": 1.86, "learning_rate": 1.9194629874667343e-05, "loss": 0.1054, "step": 41940 }, { "epoch": 1.86, "learning_rate": 1.9187196146355242e-05, "loss": 0.1407, "step": 41950 }, { "epoch": 1.86, "learning_rate": 1.9179762418043145e-05, "loss": 0.1131, "step": 41960 }, { "epoch": 1.86, "learning_rate": 1.9172328689731047e-05, "loss": 0.1326, "step": 41970 }, { "epoch": 1.86, "learning_rate": 1.916489496141895e-05, "loss": 0.1436, "step": 41980 }, { "epoch": 1.86, "learning_rate": 1.9157461233106853e-05, "loss": 0.1371, "step": 41990 }, { "epoch": 1.86, "learning_rate": 1.915002750479476e-05, "loss": 0.1359, "step": 42000 }, { "epoch": 1.86, "learning_rate": 1.9142593776482658e-05, "loss": 0.1091, "step": 42010 }, { "epoch": 1.86, "learning_rate": 1.913516004817056e-05, "loss": 0.1061, "step": 42020 }, { "epoch": 1.86, "learning_rate": 1.9127726319858463e-05, "loss": 0.204, "step": 42030 }, { "epoch": 1.86, "learning_rate": 1.9120292591546366e-05, "loss": 0.1694, "step": 42040 }, { "epoch": 1.86, "learning_rate": 1.9112858863234268e-05, "loss": 0.1731, "step": 42050 }, { "epoch": 1.86, "learning_rate": 1.910542513492217e-05, "loss": 0.1877, "step": 42060 }, { "epoch": 1.86, "learning_rate": 1.9097991406610073e-05, "loss": 0.1519, "step": 42070 }, { "epoch": 1.86, "learning_rate": 1.9090557678297972e-05, "loss": 0.2077, "step": 42080 }, { "epoch": 1.86, "learning_rate": 1.9083123949985875e-05, "loss": 0.1386, "step": 42090 }, { "epoch": 1.86, "learning_rate": 1.9075690221673778e-05, "loss": 0.1433, "step": 42100 }, { "epoch": 1.86, "learning_rate": 1.906825649336168e-05, "loss": 0.1448, "step": 42110 }, { "epoch": 1.86, "learning_rate": 1.9060822765049586e-05, "loss": 0.175, "step": 42120 }, { "epoch": 1.87, "learning_rate": 1.905338903673749e-05, "loss": 0.1178, "step": 42130 }, { "epoch": 1.87, "learning_rate": 1.9045955308425388e-05, "loss": 0.2044, "step": 42140 }, { "epoch": 1.87, "learning_rate": 1.903852158011329e-05, "loss": 0.1408, "step": 42150 }, { "epoch": 1.87, "learning_rate": 1.9031087851801193e-05, "loss": 0.1148, "step": 42160 }, { "epoch": 1.87, "learning_rate": 1.9023654123489096e-05, "loss": 0.16, "step": 42170 }, { "epoch": 1.87, "learning_rate": 1.9016220395176998e-05, "loss": 0.1084, "step": 42180 }, { "epoch": 1.87, "learning_rate": 1.90087866668649e-05, "loss": 0.163, "step": 42190 }, { "epoch": 1.87, "learning_rate": 1.9001352938552803e-05, "loss": 0.1945, "step": 42200 }, { "epoch": 1.87, "learning_rate": 1.8993919210240706e-05, "loss": 0.1465, "step": 42210 }, { "epoch": 1.87, "learning_rate": 1.8986485481928605e-05, "loss": 0.111, "step": 42220 }, { "epoch": 1.87, "learning_rate": 1.8979051753616508e-05, "loss": 0.0976, "step": 42230 }, { "epoch": 1.87, "learning_rate": 1.8971618025304414e-05, "loss": 0.1112, "step": 42240 }, { "epoch": 1.87, "learning_rate": 1.8964184296992316e-05, "loss": 0.1763, "step": 42250 }, { "epoch": 1.87, "learning_rate": 1.895675056868022e-05, "loss": 0.1569, "step": 42260 }, { "epoch": 1.87, "learning_rate": 1.894931684036812e-05, "loss": 0.1199, "step": 42270 }, { "epoch": 1.87, "learning_rate": 1.894188311205602e-05, "loss": 0.1221, "step": 42280 }, { "epoch": 1.87, "learning_rate": 1.8934449383743923e-05, "loss": 0.1206, "step": 42290 }, { "epoch": 1.87, "learning_rate": 1.8927015655431826e-05, "loss": 0.1334, "step": 42300 }, { "epoch": 1.87, "learning_rate": 1.891958192711973e-05, "loss": 0.1679, "step": 42310 }, { "epoch": 1.87, "learning_rate": 1.891214819880763e-05, "loss": 0.1302, "step": 42320 }, { "epoch": 1.87, "learning_rate": 1.8904714470495534e-05, "loss": 0.1741, "step": 42330 }, { "epoch": 1.87, "learning_rate": 1.8897280742183436e-05, "loss": 0.1175, "step": 42340 }, { "epoch": 1.87, "learning_rate": 1.8889847013871335e-05, "loss": 0.209, "step": 42350 }, { "epoch": 1.88, "learning_rate": 1.8882413285559238e-05, "loss": 0.1415, "step": 42360 }, { "epoch": 1.88, "learning_rate": 1.8874979557247144e-05, "loss": 0.1839, "step": 42370 }, { "epoch": 1.88, "learning_rate": 1.8867545828935046e-05, "loss": 0.1057, "step": 42380 }, { "epoch": 1.88, "learning_rate": 1.886011210062295e-05, "loss": 0.1267, "step": 42390 }, { "epoch": 1.88, "learning_rate": 1.885267837231085e-05, "loss": 0.167, "step": 42400 }, { "epoch": 1.88, "learning_rate": 1.884524464399875e-05, "loss": 0.1613, "step": 42410 }, { "epoch": 1.88, "learning_rate": 1.8837810915686653e-05, "loss": 0.1547, "step": 42420 }, { "epoch": 1.88, "learning_rate": 1.8830377187374556e-05, "loss": 0.1275, "step": 42430 }, { "epoch": 1.88, "learning_rate": 1.882294345906246e-05, "loss": 0.1761, "step": 42440 }, { "epoch": 1.88, "learning_rate": 1.881550973075036e-05, "loss": 0.1467, "step": 42450 }, { "epoch": 1.88, "learning_rate": 1.8808076002438264e-05, "loss": 0.161, "step": 42460 }, { "epoch": 1.88, "learning_rate": 1.8800642274126166e-05, "loss": 0.1191, "step": 42470 }, { "epoch": 1.88, "learning_rate": 1.879320854581407e-05, "loss": 0.1541, "step": 42480 }, { "epoch": 1.88, "learning_rate": 1.878577481750197e-05, "loss": 0.1342, "step": 42490 }, { "epoch": 1.88, "learning_rate": 1.8778341089189874e-05, "loss": 0.1696, "step": 42500 } ], "max_steps": 67761, "num_train_epochs": 3, "total_flos": 1.2913330651864688e+17, "trial_name": null, "trial_params": null }