diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,25516 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.8816133173949616, + "global_step": 42500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.687, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 0.6806, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.6846, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 0.6842, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 5e-06, + "loss": 0.66, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 6e-06, + "loss": 0.6568, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 7.000000000000001e-06, + "loss": 0.6458, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 0.6134, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 9e-06, + "loss": 0.5791, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 0.5694, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.5117, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 0.4606, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.3947, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.3784, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-05, + "loss": 0.3609, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.3163, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 1.7000000000000003e-05, + "loss": 0.2994, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 1.8e-05, + "loss": 0.2586, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 1.9e-05, + "loss": 0.3097, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 0.301, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 2.1e-05, + "loss": 0.2829, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 2.2000000000000003e-05, + "loss": 0.2983, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.2439, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 0.2686, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-05, + "loss": 0.3592, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 2.6000000000000002e-05, + "loss": 0.3019, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 2.7000000000000002e-05, + "loss": 0.2524, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 0.2285, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 2.9e-05, + "loss": 0.2637, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 0.2016, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 3.1e-05, + "loss": 0.2358, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.3402, + "step": 320 + }, + { + "epoch": 0.01, + "learning_rate": 3.3e-05, + "loss": 0.2562, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.2626, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 3.5e-05, + "loss": 0.243, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 0.2327, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 3.7e-05, + "loss": 0.258, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 3.8e-05, + "loss": 0.2536, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 3.9000000000000006e-05, + "loss": 0.2657, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 0.218, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 4.1e-05, + "loss": 0.2126, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 4.2e-05, + "loss": 0.2723, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 4.3e-05, + "loss": 0.193, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.2726, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 4.5e-05, + "loss": 0.2734, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 4.600000000000001e-05, + "loss": 0.2387, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 4.7e-05, + "loss": 0.2204, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 0.1889, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 4.9e-05, + "loss": 0.2558, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 0.2126, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9992566271687905e-05, + "loss": 0.2394, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 4.998513254337581e-05, + "loss": 0.2793, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 4.997769881506371e-05, + "loss": 0.2972, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 4.997026508675161e-05, + "loss": 0.2031, + "step": 540 + }, + { + "epoch": 0.02, + "learning_rate": 4.9962831358439515e-05, + "loss": 0.264, + "step": 550 + }, + { + "epoch": 0.02, + "learning_rate": 4.995539763012742e-05, + "loss": 0.2923, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 4.994796390181532e-05, + "loss": 0.2861, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 4.9940530173503216e-05, + "loss": 0.2016, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 4.9933096445191126e-05, + "loss": 0.2703, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 4.992566271687902e-05, + "loss": 0.2351, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 4.991822898856693e-05, + "loss": 0.2432, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 4.9910795260254827e-05, + "loss": 0.2567, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 4.9903361531942736e-05, + "loss": 0.3241, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 4.989592780363064e-05, + "loss": 0.2396, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 4.9888494075318534e-05, + "loss": 0.2282, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 4.9881060347006444e-05, + "loss": 0.22, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 4.987362661869434e-05, + "loss": 0.216, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 4.986619289038225e-05, + "loss": 0.3001, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 4.9858759162070145e-05, + "loss": 0.1784, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 4.985132543375805e-05, + "loss": 0.1949, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 4.984389170544595e-05, + "loss": 0.2236, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 4.983645797713385e-05, + "loss": 0.2116, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 4.9829024248821755e-05, + "loss": 0.2238, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 4.982159052050966e-05, + "loss": 0.2767, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 4.981415679219757e-05, + "loss": 0.2553, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 4.980672306388546e-05, + "loss": 0.2686, + "step": 760 + }, + { + "epoch": 0.03, + "learning_rate": 4.9799289335573365e-05, + "loss": 0.2538, + "step": 770 + }, + { + "epoch": 0.03, + "learning_rate": 4.979185560726127e-05, + "loss": 0.2044, + "step": 780 + }, + { + "epoch": 0.03, + "learning_rate": 4.978442187894917e-05, + "loss": 0.1951, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 4.977698815063707e-05, + "loss": 0.1634, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 4.9769554422324976e-05, + "loss": 0.236, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 4.976212069401288e-05, + "loss": 0.1908, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 4.975468696570078e-05, + "loss": 0.2738, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 4.974725323738868e-05, + "loss": 0.2339, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 4.9739819509076586e-05, + "loss": 0.2441, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 4.973238578076448e-05, + "loss": 0.195, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 4.972495205245239e-05, + "loss": 0.1888, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 4.9717518324140294e-05, + "loss": 0.2359, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 4.9710084595828196e-05, + "loss": 0.1867, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 4.97026508675161e-05, + "loss": 0.2421, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 4.9695217139203995e-05, + "loss": 0.1858, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 4.9687783410891904e-05, + "loss": 0.2607, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 4.96803496825798e-05, + "loss": 0.187, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 4.967291595426771e-05, + "loss": 0.1992, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 4.9665482225955605e-05, + "loss": 0.1595, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 4.9658048497643514e-05, + "loss": 0.3007, + "step": 960 + }, + { + "epoch": 0.04, + "learning_rate": 4.965061476933141e-05, + "loss": 0.2044, + "step": 970 + }, + { + "epoch": 0.04, + "learning_rate": 4.964318104101931e-05, + "loss": 0.165, + "step": 980 + }, + { + "epoch": 0.04, + "learning_rate": 4.9635747312707215e-05, + "loss": 0.2566, + "step": 990 + }, + { + "epoch": 0.04, + "learning_rate": 4.962831358439512e-05, + "loss": 0.2033, + "step": 1000 + }, + { + "epoch": 0.04, + "learning_rate": 4.962087985608303e-05, + "loss": 0.2737, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 4.961344612777092e-05, + "loss": 0.2183, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 4.9606012399458826e-05, + "loss": 0.2284, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 4.959857867114673e-05, + "loss": 0.2267, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 4.959114494283463e-05, + "loss": 0.2029, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 4.958371121452253e-05, + "loss": 0.2239, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 4.9576277486210436e-05, + "loss": 0.2491, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 4.956884375789834e-05, + "loss": 0.2234, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 4.956141002958624e-05, + "loss": 0.2345, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 4.9553976301274144e-05, + "loss": 0.234, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9546542572962046e-05, + "loss": 0.2095, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 4.953910884464995e-05, + "loss": 0.2476, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 4.953167511633785e-05, + "loss": 0.1759, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 4.9524241388025754e-05, + "loss": 0.2135, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 4.9516807659713657e-05, + "loss": 0.2745, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 4.950937393140156e-05, + "loss": 0.1498, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 4.950194020308946e-05, + "loss": 0.2092, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 4.9494506474777364e-05, + "loss": 0.2188, + "step": 1180 + }, + { + "epoch": 0.05, + "learning_rate": 4.948707274646526e-05, + "loss": 0.2006, + "step": 1190 + }, + { + "epoch": 0.05, + "learning_rate": 4.947963901815317e-05, + "loss": 0.2703, + "step": 1200 + }, + { + "epoch": 0.05, + "learning_rate": 4.9472205289841065e-05, + "loss": 0.3083, + "step": 1210 + }, + { + "epoch": 0.05, + "learning_rate": 4.9464771561528975e-05, + "loss": 0.2267, + "step": 1220 + }, + { + "epoch": 0.05, + "learning_rate": 4.945733783321687e-05, + "loss": 0.2153, + "step": 1230 + }, + { + "epoch": 0.05, + "learning_rate": 4.944990410490477e-05, + "loss": 0.2014, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 4.944247037659268e-05, + "loss": 0.2497, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 4.943503664828058e-05, + "loss": 0.2013, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 4.942760291996849e-05, + "loss": 0.2508, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 4.942016919165638e-05, + "loss": 0.2829, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 4.941273546334429e-05, + "loss": 0.1481, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 4.940530173503219e-05, + "loss": 0.2612, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 4.939786800672009e-05, + "loss": 0.2074, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 4.9390434278407994e-05, + "loss": 0.2151, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 4.9383000550095896e-05, + "loss": 0.1589, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 4.93755668217838e-05, + "loss": 0.1702, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 4.93681330934717e-05, + "loss": 0.2212, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 4.9360699365159604e-05, + "loss": 0.2072, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 4.9353265636847506e-05, + "loss": 0.213, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 4.934583190853541e-05, + "loss": 0.2863, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 4.933839818022331e-05, + "loss": 0.2518, + "step": 1390 + }, + { + "epoch": 0.06, + "learning_rate": 4.9330964451911214e-05, + "loss": 0.2125, + "step": 1400 + }, + { + "epoch": 0.06, + "learning_rate": 4.932353072359912e-05, + "loss": 0.1968, + "step": 1410 + }, + { + "epoch": 0.06, + "learning_rate": 4.931609699528702e-05, + "loss": 0.2009, + "step": 1420 + }, + { + "epoch": 0.06, + "learning_rate": 4.930866326697492e-05, + "loss": 0.2424, + "step": 1430 + }, + { + "epoch": 0.06, + "learning_rate": 4.9301229538662825e-05, + "loss": 0.2323, + "step": 1440 + }, + { + "epoch": 0.06, + "learning_rate": 4.929379581035072e-05, + "loss": 0.1675, + "step": 1450 + }, + { + "epoch": 0.06, + "learning_rate": 4.928636208203863e-05, + "loss": 0.203, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 4.9278928353726526e-05, + "loss": 0.1722, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 4.9271494625414435e-05, + "loss": 0.1985, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 4.926406089710234e-05, + "loss": 0.2263, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 4.925662716879024e-05, + "loss": 0.1868, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 4.924919344047814e-05, + "loss": 0.1927, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 4.924175971216604e-05, + "loss": 0.2468, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 4.923432598385395e-05, + "loss": 0.185, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 4.9226892255541844e-05, + "loss": 0.2038, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 4.921945852722975e-05, + "loss": 0.1751, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 4.921202479891765e-05, + "loss": 0.2254, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 4.920459107060555e-05, + "loss": 0.1783, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 4.9197157342293454e-05, + "loss": 0.202, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 4.9189723613981356e-05, + "loss": 0.2125, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 4.918228988566926e-05, + "loss": 0.1957, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 4.917485615735716e-05, + "loss": 0.2552, + "step": 1610 + }, + { + "epoch": 0.07, + "learning_rate": 4.916742242904507e-05, + "loss": 0.2238, + "step": 1620 + }, + { + "epoch": 0.07, + "learning_rate": 4.915998870073297e-05, + "loss": 0.2815, + "step": 1630 + }, + { + "epoch": 0.07, + "learning_rate": 4.915255497242087e-05, + "loss": 0.2833, + "step": 1640 + }, + { + "epoch": 0.07, + "learning_rate": 4.914512124410877e-05, + "loss": 0.2153, + "step": 1650 + }, + { + "epoch": 0.07, + "learning_rate": 4.9137687515796675e-05, + "loss": 0.1934, + "step": 1660 + }, + { + "epoch": 0.07, + "learning_rate": 4.913025378748458e-05, + "loss": 0.1849, + "step": 1670 + }, + { + "epoch": 0.07, + "learning_rate": 4.912282005917248e-05, + "loss": 0.1726, + "step": 1680 + }, + { + "epoch": 0.07, + "learning_rate": 4.911538633086038e-05, + "loss": 0.1846, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 4.9107952602548285e-05, + "loss": 0.1558, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 4.910051887423619e-05, + "loss": 0.2177, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 4.909308514592409e-05, + "loss": 0.1745, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 4.908565141761199e-05, + "loss": 0.1974, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 4.9078217689299895e-05, + "loss": 0.2085, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 4.90707839609878e-05, + "loss": 0.2092, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 4.90633502326757e-05, + "loss": 0.1834, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 4.90559165043636e-05, + "loss": 0.1914, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 4.90484827760515e-05, + "loss": 0.1843, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 4.904104904773941e-05, + "loss": 0.2037, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 4.9033615319427304e-05, + "loss": 0.1713, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 4.902618159111521e-05, + "loss": 0.1875, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 4.901874786280311e-05, + "loss": 0.2575, + "step": 1820 + }, + { + "epoch": 0.08, + "learning_rate": 4.901131413449102e-05, + "loss": 0.1896, + "step": 1830 + }, + { + "epoch": 0.08, + "learning_rate": 4.9003880406178914e-05, + "loss": 0.1888, + "step": 1840 + }, + { + "epoch": 0.08, + "learning_rate": 4.899644667786682e-05, + "loss": 0.2391, + "step": 1850 + }, + { + "epoch": 0.08, + "learning_rate": 4.8989012949554726e-05, + "loss": 0.2394, + "step": 1860 + }, + { + "epoch": 0.08, + "learning_rate": 4.898157922124262e-05, + "loss": 0.2168, + "step": 1870 + }, + { + "epoch": 0.08, + "learning_rate": 4.897414549293053e-05, + "loss": 0.2082, + "step": 1880 + }, + { + "epoch": 0.08, + "learning_rate": 4.896671176461843e-05, + "loss": 0.1961, + "step": 1890 + }, + { + "epoch": 0.08, + "learning_rate": 4.895927803630633e-05, + "loss": 0.2373, + "step": 1900 + }, + { + "epoch": 0.08, + "learning_rate": 4.895184430799423e-05, + "loss": 0.1806, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 4.8944410579682135e-05, + "loss": 0.2002, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 4.893697685137004e-05, + "loss": 0.1931, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 4.892954312305794e-05, + "loss": 0.2051, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 4.892210939474584e-05, + "loss": 0.2271, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 4.8914675666433745e-05, + "loss": 0.1875, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 4.890724193812165e-05, + "loss": 0.2059, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 4.889980820980955e-05, + "loss": 0.2224, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 4.889237448149745e-05, + "loss": 0.1685, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 4.8884940753185355e-05, + "loss": 0.1726, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 4.887750702487326e-05, + "loss": 0.209, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 4.887007329656116e-05, + "loss": 0.1874, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 4.886263956824906e-05, + "loss": 0.2343, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 4.8855205839936966e-05, + "loss": 0.2186, + "step": 2040 + }, + { + "epoch": 0.09, + "learning_rate": 4.884777211162487e-05, + "loss": 0.2283, + "step": 2050 + }, + { + "epoch": 0.09, + "learning_rate": 4.8840338383312764e-05, + "loss": 0.1499, + "step": 2060 + }, + { + "epoch": 0.09, + "learning_rate": 4.8832904655000673e-05, + "loss": 0.2142, + "step": 2070 + }, + { + "epoch": 0.09, + "learning_rate": 4.882547092668857e-05, + "loss": 0.1911, + "step": 2080 + }, + { + "epoch": 0.09, + "learning_rate": 4.881803719837648e-05, + "loss": 0.2193, + "step": 2090 + }, + { + "epoch": 0.09, + "learning_rate": 4.881060347006438e-05, + "loss": 0.183, + "step": 2100 + }, + { + "epoch": 0.09, + "learning_rate": 4.880316974175228e-05, + "loss": 0.1663, + "step": 2110 + }, + { + "epoch": 0.09, + "learning_rate": 4.8795736013440186e-05, + "loss": 0.1923, + "step": 2120 + }, + { + "epoch": 0.09, + "learning_rate": 4.878830228512808e-05, + "loss": 0.2149, + "step": 2130 + }, + { + "epoch": 0.09, + "learning_rate": 4.878086855681599e-05, + "loss": 0.1499, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 4.877343482850389e-05, + "loss": 0.245, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 4.87660011001918e-05, + "loss": 0.197, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 4.875856737187969e-05, + "loss": 0.2179, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 4.8751133643567595e-05, + "loss": 0.2034, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 4.87436999152555e-05, + "loss": 0.2402, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 4.87362661869434e-05, + "loss": 0.2055, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 4.87288324586313e-05, + "loss": 0.1813, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 4.8721398730319205e-05, + "loss": 0.1743, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 4.8713965002007115e-05, + "loss": 0.1802, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 4.870653127369501e-05, + "loss": 0.2603, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 4.869909754538291e-05, + "loss": 0.1839, + "step": 2250 + }, + { + "epoch": 0.1, + "learning_rate": 4.8691663817070816e-05, + "loss": 0.177, + "step": 2260 + }, + { + "epoch": 0.1, + "learning_rate": 4.868423008875872e-05, + "loss": 0.2351, + "step": 2270 + }, + { + "epoch": 0.1, + "learning_rate": 4.867679636044662e-05, + "loss": 0.141, + "step": 2280 + }, + { + "epoch": 0.1, + "learning_rate": 4.8669362632134523e-05, + "loss": 0.2108, + "step": 2290 + }, + { + "epoch": 0.1, + "learning_rate": 4.8661928903822426e-05, + "loss": 0.1753, + "step": 2300 + }, + { + "epoch": 0.1, + "learning_rate": 4.865449517551033e-05, + "loss": 0.1649, + "step": 2310 + }, + { + "epoch": 0.1, + "learning_rate": 4.8647061447198224e-05, + "loss": 0.1789, + "step": 2320 + }, + { + "epoch": 0.1, + "learning_rate": 4.8639627718886134e-05, + "loss": 0.1854, + "step": 2330 + }, + { + "epoch": 0.1, + "learning_rate": 4.8632193990574036e-05, + "loss": 0.2197, + "step": 2340 + }, + { + "epoch": 0.1, + "learning_rate": 4.862476026226194e-05, + "loss": 0.209, + "step": 2350 + }, + { + "epoch": 0.1, + "learning_rate": 4.861732653394984e-05, + "loss": 0.2341, + "step": 2360 + }, + { + "epoch": 0.1, + "learning_rate": 4.8609892805637744e-05, + "loss": 0.2378, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 4.860245907732565e-05, + "loss": 0.2239, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 4.859502534901354e-05, + "loss": 0.1769, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 4.858759162070145e-05, + "loss": 0.1461, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 4.858015789238935e-05, + "loss": 0.2117, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 4.857272416407726e-05, + "loss": 0.1941, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 4.856529043576515e-05, + "loss": 0.2028, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 4.855785670745306e-05, + "loss": 0.2372, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 4.855042297914096e-05, + "loss": 0.2225, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 4.854298925082886e-05, + "loss": 0.2131, + "step": 2460 + }, + { + "epoch": 0.11, + "learning_rate": 4.853555552251677e-05, + "loss": 0.2304, + "step": 2470 + }, + { + "epoch": 0.11, + "learning_rate": 4.8528121794204666e-05, + "loss": 0.2115, + "step": 2480 + }, + { + "epoch": 0.11, + "learning_rate": 4.8520688065892575e-05, + "loss": 0.2202, + "step": 2490 + }, + { + "epoch": 0.11, + "learning_rate": 4.851325433758047e-05, + "loss": 0.2384, + "step": 2500 + }, + { + "epoch": 0.11, + "learning_rate": 4.8505820609268373e-05, + "loss": 0.1571, + "step": 2510 + }, + { + "epoch": 0.11, + "learning_rate": 4.8498386880956276e-05, + "loss": 0.2124, + "step": 2520 + }, + { + "epoch": 0.11, + "learning_rate": 4.849095315264418e-05, + "loss": 0.2251, + "step": 2530 + }, + { + "epoch": 0.11, + "learning_rate": 4.848351942433208e-05, + "loss": 0.1642, + "step": 2540 + }, + { + "epoch": 0.11, + "learning_rate": 4.8476085696019984e-05, + "loss": 0.1827, + "step": 2550 + }, + { + "epoch": 0.11, + "learning_rate": 4.8468651967707886e-05, + "loss": 0.244, + "step": 2560 + }, + { + "epoch": 0.11, + "learning_rate": 4.846121823939579e-05, + "loss": 0.1678, + "step": 2570 + }, + { + "epoch": 0.11, + "learning_rate": 4.845378451108369e-05, + "loss": 0.1808, + "step": 2580 + }, + { + "epoch": 0.11, + "learning_rate": 4.8446350782771594e-05, + "loss": 0.2398, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 4.84389170544595e-05, + "loss": 0.1785, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 4.84314833261474e-05, + "loss": 0.1615, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 4.84240495978353e-05, + "loss": 0.1599, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 4.8416615869523204e-05, + "loss": 0.27, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 4.840918214121111e-05, + "loss": 0.154, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 4.840174841289901e-05, + "loss": 0.174, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 4.839431468458691e-05, + "loss": 0.241, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 4.838688095627481e-05, + "loss": 0.2166, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 4.837944722796272e-05, + "loss": 0.1899, + "step": 2680 + }, + { + "epoch": 0.12, + "learning_rate": 4.837201349965061e-05, + "loss": 0.2102, + "step": 2690 + }, + { + "epoch": 0.12, + "learning_rate": 4.836457977133852e-05, + "loss": 0.2238, + "step": 2700 + }, + { + "epoch": 0.12, + "learning_rate": 4.8357146043026425e-05, + "loss": 0.2188, + "step": 2710 + }, + { + "epoch": 0.12, + "learning_rate": 4.834971231471432e-05, + "loss": 0.2044, + "step": 2720 + }, + { + "epoch": 0.12, + "learning_rate": 4.834227858640223e-05, + "loss": 0.2296, + "step": 2730 + }, + { + "epoch": 0.12, + "learning_rate": 4.8334844858090126e-05, + "loss": 0.1713, + "step": 2740 + }, + { + "epoch": 0.12, + "learning_rate": 4.8327411129778035e-05, + "loss": 0.2424, + "step": 2750 + }, + { + "epoch": 0.12, + "learning_rate": 4.831997740146593e-05, + "loss": 0.1632, + "step": 2760 + }, + { + "epoch": 0.12, + "learning_rate": 4.831254367315384e-05, + "loss": 0.2513, + "step": 2770 + }, + { + "epoch": 0.12, + "learning_rate": 4.8305109944841736e-05, + "loss": 0.2334, + "step": 2780 + }, + { + "epoch": 0.12, + "learning_rate": 4.829767621652964e-05, + "loss": 0.1786, + "step": 2790 + }, + { + "epoch": 0.12, + "learning_rate": 4.829024248821754e-05, + "loss": 0.2318, + "step": 2800 + }, + { + "epoch": 0.12, + "learning_rate": 4.8282808759905444e-05, + "loss": 0.21, + "step": 2810 + }, + { + "epoch": 0.12, + "learning_rate": 4.827537503159335e-05, + "loss": 0.2108, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 4.826794130328125e-05, + "loss": 0.1916, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 4.826050757496915e-05, + "loss": 0.2485, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 4.8253073846657054e-05, + "loss": 0.1844, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 4.824564011834496e-05, + "loss": 0.1989, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 4.823820639003286e-05, + "loss": 0.2632, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 4.823077266172076e-05, + "loss": 0.1758, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 4.8223338933408665e-05, + "loss": 0.1563, + "step": 2890 + }, + { + "epoch": 0.13, + "learning_rate": 4.821590520509657e-05, + "loss": 0.1916, + "step": 2900 + }, + { + "epoch": 0.13, + "learning_rate": 4.820847147678447e-05, + "loss": 0.1954, + "step": 2910 + }, + { + "epoch": 0.13, + "learning_rate": 4.820103774847237e-05, + "loss": 0.1793, + "step": 2920 + }, + { + "epoch": 0.13, + "learning_rate": 4.819360402016027e-05, + "loss": 0.1765, + "step": 2930 + }, + { + "epoch": 0.13, + "learning_rate": 4.818617029184818e-05, + "loss": 0.1776, + "step": 2940 + }, + { + "epoch": 0.13, + "learning_rate": 4.817873656353607e-05, + "loss": 0.2006, + "step": 2950 + }, + { + "epoch": 0.13, + "learning_rate": 4.817130283522398e-05, + "loss": 0.2525, + "step": 2960 + }, + { + "epoch": 0.13, + "learning_rate": 4.8163869106911885e-05, + "loss": 0.183, + "step": 2970 + }, + { + "epoch": 0.13, + "learning_rate": 4.815643537859979e-05, + "loss": 0.2289, + "step": 2980 + }, + { + "epoch": 0.13, + "learning_rate": 4.814900165028769e-05, + "loss": 0.1713, + "step": 2990 + }, + { + "epoch": 0.13, + "learning_rate": 4.8141567921975586e-05, + "loss": 0.2052, + "step": 3000 + }, + { + "epoch": 0.13, + "learning_rate": 4.8134134193663496e-05, + "loss": 0.1598, + "step": 3010 + }, + { + "epoch": 0.13, + "learning_rate": 4.812670046535139e-05, + "loss": 0.1768, + "step": 3020 + }, + { + "epoch": 0.13, + "learning_rate": 4.81192667370393e-05, + "loss": 0.1931, + "step": 3030 + }, + { + "epoch": 0.13, + "learning_rate": 4.8111833008727197e-05, + "loss": 0.2053, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 4.81043992804151e-05, + "loss": 0.2703, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 4.8096965552103e-05, + "loss": 0.2645, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 4.8089531823790904e-05, + "loss": 0.1826, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 4.8082098095478814e-05, + "loss": 0.1774, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 4.807466436716671e-05, + "loss": 0.1832, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 4.806723063885462e-05, + "loss": 0.2073, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 4.8059796910542515e-05, + "loss": 0.1576, + "step": 3110 + }, + { + "epoch": 0.14, + "learning_rate": 4.805236318223042e-05, + "loss": 0.2053, + "step": 3120 + }, + { + "epoch": 0.14, + "learning_rate": 4.804492945391832e-05, + "loss": 0.1843, + "step": 3130 + }, + { + "epoch": 0.14, + "learning_rate": 4.803749572560622e-05, + "loss": 0.175, + "step": 3140 + }, + { + "epoch": 0.14, + "learning_rate": 4.8030061997294125e-05, + "loss": 0.2084, + "step": 3150 + }, + { + "epoch": 0.14, + "learning_rate": 4.802262826898203e-05, + "loss": 0.189, + "step": 3160 + }, + { + "epoch": 0.14, + "learning_rate": 4.801519454066993e-05, + "loss": 0.1629, + "step": 3170 + }, + { + "epoch": 0.14, + "learning_rate": 4.800776081235783e-05, + "loss": 0.1818, + "step": 3180 + }, + { + "epoch": 0.14, + "learning_rate": 4.8000327084045735e-05, + "loss": 0.2298, + "step": 3190 + }, + { + "epoch": 0.14, + "learning_rate": 4.799289335573364e-05, + "loss": 0.2191, + "step": 3200 + }, + { + "epoch": 0.14, + "learning_rate": 4.798545962742154e-05, + "loss": 0.2111, + "step": 3210 + }, + { + "epoch": 0.14, + "learning_rate": 4.797802589910944e-05, + "loss": 0.203, + "step": 3220 + }, + { + "epoch": 0.14, + "learning_rate": 4.7970592170797346e-05, + "loss": 0.2435, + "step": 3230 + }, + { + "epoch": 0.14, + "learning_rate": 4.796315844248525e-05, + "loss": 0.2138, + "step": 3240 + }, + { + "epoch": 0.14, + "learning_rate": 4.795572471417315e-05, + "loss": 0.2126, + "step": 3250 + }, + { + "epoch": 0.14, + "learning_rate": 4.7948290985861047e-05, + "loss": 0.1502, + "step": 3260 + }, + { + "epoch": 0.14, + "learning_rate": 4.7940857257548956e-05, + "loss": 0.2213, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 4.793342352923685e-05, + "loss": 0.1981, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 4.792598980092476e-05, + "loss": 0.1934, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 4.791855607261266e-05, + "loss": 0.2297, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 4.7911122344300566e-05, + "loss": 0.2176, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 4.790368861598847e-05, + "loss": 0.1625, + "step": 3320 + }, + { + "epoch": 0.15, + "learning_rate": 4.7896254887676365e-05, + "loss": 0.2083, + "step": 3330 + }, + { + "epoch": 0.15, + "learning_rate": 4.7888821159364274e-05, + "loss": 0.2289, + "step": 3340 + }, + { + "epoch": 0.15, + "learning_rate": 4.788138743105217e-05, + "loss": 0.2147, + "step": 3350 + }, + { + "epoch": 0.15, + "learning_rate": 4.787395370274008e-05, + "loss": 0.1903, + "step": 3360 + }, + { + "epoch": 0.15, + "learning_rate": 4.7866519974427975e-05, + "loss": 0.211, + "step": 3370 + }, + { + "epoch": 0.15, + "learning_rate": 4.785908624611588e-05, + "loss": 0.203, + "step": 3380 + }, + { + "epoch": 0.15, + "learning_rate": 4.785165251780378e-05, + "loss": 0.1663, + "step": 3390 + }, + { + "epoch": 0.15, + "learning_rate": 4.784421878949168e-05, + "loss": 0.1941, + "step": 3400 + }, + { + "epoch": 0.15, + "learning_rate": 4.7836785061179585e-05, + "loss": 0.1613, + "step": 3410 + }, + { + "epoch": 0.15, + "learning_rate": 4.782935133286749e-05, + "loss": 0.1746, + "step": 3420 + }, + { + "epoch": 0.15, + "learning_rate": 4.782191760455539e-05, + "loss": 0.1815, + "step": 3430 + }, + { + "epoch": 0.15, + "learning_rate": 4.781448387624329e-05, + "loss": 0.2477, + "step": 3440 + }, + { + "epoch": 0.15, + "learning_rate": 4.7807050147931196e-05, + "loss": 0.1687, + "step": 3450 + }, + { + "epoch": 0.15, + "learning_rate": 4.77996164196191e-05, + "loss": 0.2643, + "step": 3460 + }, + { + "epoch": 0.15, + "learning_rate": 4.7792182691307e-05, + "loss": 0.1724, + "step": 3470 + }, + { + "epoch": 0.15, + "learning_rate": 4.77847489629949e-05, + "loss": 0.1636, + "step": 3480 + }, + { + "epoch": 0.15, + "learning_rate": 4.7777315234682806e-05, + "loss": 0.214, + "step": 3490 + }, + { + "epoch": 0.15, + "learning_rate": 4.776988150637071e-05, + "loss": 0.2293, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 4.776244777805861e-05, + "loss": 0.1665, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 4.7755014049746514e-05, + "loss": 0.1933, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 4.7747580321434416e-05, + "loss": 0.1889, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 4.774014659312231e-05, + "loss": 0.1924, + "step": 3540 + }, + { + "epoch": 0.16, + "learning_rate": 4.773271286481022e-05, + "loss": 0.1279, + "step": 3550 + }, + { + "epoch": 0.16, + "learning_rate": 4.772527913649812e-05, + "loss": 0.1947, + "step": 3560 + }, + { + "epoch": 0.16, + "learning_rate": 4.7717845408186027e-05, + "loss": 0.1765, + "step": 3570 + }, + { + "epoch": 0.16, + "learning_rate": 4.771041167987393e-05, + "loss": 0.1602, + "step": 3580 + }, + { + "epoch": 0.16, + "learning_rate": 4.7702977951561825e-05, + "loss": 0.1642, + "step": 3590 + }, + { + "epoch": 0.16, + "learning_rate": 4.7695544223249734e-05, + "loss": 0.1933, + "step": 3600 + }, + { + "epoch": 0.16, + "learning_rate": 4.768811049493763e-05, + "loss": 0.1973, + "step": 3610 + }, + { + "epoch": 0.16, + "learning_rate": 4.768067676662554e-05, + "loss": 0.2231, + "step": 3620 + }, + { + "epoch": 0.16, + "learning_rate": 4.7673243038313435e-05, + "loss": 0.2081, + "step": 3630 + }, + { + "epoch": 0.16, + "learning_rate": 4.7665809310001345e-05, + "loss": 0.1543, + "step": 3640 + }, + { + "epoch": 0.16, + "learning_rate": 4.765837558168924e-05, + "loss": 0.1711, + "step": 3650 + }, + { + "epoch": 0.16, + "learning_rate": 4.765094185337714e-05, + "loss": 0.2388, + "step": 3660 + }, + { + "epoch": 0.16, + "learning_rate": 4.7643508125065046e-05, + "loss": 0.1933, + "step": 3670 + }, + { + "epoch": 0.16, + "learning_rate": 4.763607439675295e-05, + "loss": 0.2586, + "step": 3680 + }, + { + "epoch": 0.16, + "learning_rate": 4.762864066844086e-05, + "loss": 0.1674, + "step": 3690 + }, + { + "epoch": 0.16, + "learning_rate": 4.762120694012875e-05, + "loss": 0.1698, + "step": 3700 + }, + { + "epoch": 0.16, + "learning_rate": 4.7613773211816656e-05, + "loss": 0.2268, + "step": 3710 + }, + { + "epoch": 0.16, + "learning_rate": 4.760633948350456e-05, + "loss": 0.2456, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 4.759890575519246e-05, + "loss": 0.2007, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 4.7591472026880364e-05, + "loss": 0.1754, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 4.7584038298568266e-05, + "loss": 0.1773, + "step": 3750 + }, + { + "epoch": 0.17, + "learning_rate": 4.757660457025617e-05, + "loss": 0.1932, + "step": 3760 + }, + { + "epoch": 0.17, + "learning_rate": 4.756917084194407e-05, + "loss": 0.1461, + "step": 3770 + }, + { + "epoch": 0.17, + "learning_rate": 4.7561737113631974e-05, + "loss": 0.2114, + "step": 3780 + }, + { + "epoch": 0.17, + "learning_rate": 4.7554303385319877e-05, + "loss": 0.1476, + "step": 3790 + }, + { + "epoch": 0.17, + "learning_rate": 4.754686965700777e-05, + "loss": 0.1578, + "step": 3800 + }, + { + "epoch": 0.17, + "learning_rate": 4.753943592869568e-05, + "loss": 0.1847, + "step": 3810 + }, + { + "epoch": 0.17, + "learning_rate": 4.7532002200383584e-05, + "loss": 0.1762, + "step": 3820 + }, + { + "epoch": 0.17, + "learning_rate": 4.752456847207149e-05, + "loss": 0.203, + "step": 3830 + }, + { + "epoch": 0.17, + "learning_rate": 4.751713474375939e-05, + "loss": 0.2478, + "step": 3840 + }, + { + "epoch": 0.17, + "learning_rate": 4.750970101544729e-05, + "loss": 0.1839, + "step": 3850 + }, + { + "epoch": 0.17, + "learning_rate": 4.7502267287135195e-05, + "loss": 0.1712, + "step": 3860 + }, + { + "epoch": 0.17, + "learning_rate": 4.749483355882309e-05, + "loss": 0.1691, + "step": 3870 + }, + { + "epoch": 0.17, + "learning_rate": 4.7487399830511e-05, + "loss": 0.1733, + "step": 3880 + }, + { + "epoch": 0.17, + "learning_rate": 4.7479966102198896e-05, + "loss": 0.1747, + "step": 3890 + }, + { + "epoch": 0.17, + "learning_rate": 4.7472532373886805e-05, + "loss": 0.1571, + "step": 3900 + }, + { + "epoch": 0.17, + "learning_rate": 4.74650986455747e-05, + "loss": 0.2568, + "step": 3910 + }, + { + "epoch": 0.17, + "learning_rate": 4.74576649172626e-05, + "loss": 0.1446, + "step": 3920 + }, + { + "epoch": 0.17, + "learning_rate": 4.7450231188950506e-05, + "loss": 0.1995, + "step": 3930 + }, + { + "epoch": 0.17, + "learning_rate": 4.744279746063841e-05, + "loss": 0.1421, + "step": 3940 + }, + { + "epoch": 0.17, + "learning_rate": 4.743536373232632e-05, + "loss": 0.2051, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 4.7427930004014214e-05, + "loss": 0.1617, + "step": 3960 + }, + { + "epoch": 0.18, + "learning_rate": 4.742049627570212e-05, + "loss": 0.2006, + "step": 3970 + }, + { + "epoch": 0.18, + "learning_rate": 4.741306254739002e-05, + "loss": 0.2343, + "step": 3980 + }, + { + "epoch": 0.18, + "learning_rate": 4.740562881907792e-05, + "loss": 0.2017, + "step": 3990 + }, + { + "epoch": 0.18, + "learning_rate": 4.7398195090765824e-05, + "loss": 0.1908, + "step": 4000 + }, + { + "epoch": 0.18, + "learning_rate": 4.7390761362453726e-05, + "loss": 0.2539, + "step": 4010 + }, + { + "epoch": 0.18, + "learning_rate": 4.738332763414163e-05, + "loss": 0.2035, + "step": 4020 + }, + { + "epoch": 0.18, + "learning_rate": 4.737589390582953e-05, + "loss": 0.192, + "step": 4030 + }, + { + "epoch": 0.18, + "learning_rate": 4.7368460177517434e-05, + "loss": 0.2101, + "step": 4040 + }, + { + "epoch": 0.18, + "learning_rate": 4.736102644920534e-05, + "loss": 0.2221, + "step": 4050 + }, + { + "epoch": 0.18, + "learning_rate": 4.735359272089324e-05, + "loss": 0.2134, + "step": 4060 + }, + { + "epoch": 0.18, + "learning_rate": 4.734615899258114e-05, + "loss": 0.1435, + "step": 4070 + }, + { + "epoch": 0.18, + "learning_rate": 4.7338725264269045e-05, + "loss": 0.2155, + "step": 4080 + }, + { + "epoch": 0.18, + "learning_rate": 4.733129153595695e-05, + "loss": 0.1978, + "step": 4090 + }, + { + "epoch": 0.18, + "learning_rate": 4.732385780764485e-05, + "loss": 0.1943, + "step": 4100 + }, + { + "epoch": 0.18, + "learning_rate": 4.731642407933275e-05, + "loss": 0.1801, + "step": 4110 + }, + { + "epoch": 0.18, + "learning_rate": 4.7308990351020655e-05, + "loss": 0.1698, + "step": 4120 + }, + { + "epoch": 0.18, + "learning_rate": 4.730155662270855e-05, + "loss": 0.1645, + "step": 4130 + }, + { + "epoch": 0.18, + "learning_rate": 4.729412289439646e-05, + "loss": 0.2261, + "step": 4140 + }, + { + "epoch": 0.18, + "learning_rate": 4.7286689166084356e-05, + "loss": 0.1853, + "step": 4150 + }, + { + "epoch": 0.18, + "learning_rate": 4.7279255437772265e-05, + "loss": 0.1799, + "step": 4160 + }, + { + "epoch": 0.18, + "learning_rate": 4.727182170946016e-05, + "loss": 0.1956, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 4.726438798114807e-05, + "loss": 0.1841, + "step": 4180 + }, + { + "epoch": 0.19, + "learning_rate": 4.725695425283597e-05, + "loss": 0.1847, + "step": 4190 + }, + { + "epoch": 0.19, + "learning_rate": 4.724952052452387e-05, + "loss": 0.2544, + "step": 4200 + }, + { + "epoch": 0.19, + "learning_rate": 4.724208679621178e-05, + "loss": 0.199, + "step": 4210 + }, + { + "epoch": 0.19, + "learning_rate": 4.7234653067899674e-05, + "loss": 0.1911, + "step": 4220 + }, + { + "epoch": 0.19, + "learning_rate": 4.722721933958758e-05, + "loss": 0.1555, + "step": 4230 + }, + { + "epoch": 0.19, + "learning_rate": 4.721978561127548e-05, + "loss": 0.2374, + "step": 4240 + }, + { + "epoch": 0.19, + "learning_rate": 4.721235188296338e-05, + "loss": 0.2418, + "step": 4250 + }, + { + "epoch": 0.19, + "learning_rate": 4.7204918154651284e-05, + "loss": 0.2113, + "step": 4260 + }, + { + "epoch": 0.19, + "learning_rate": 4.719748442633919e-05, + "loss": 0.1936, + "step": 4270 + }, + { + "epoch": 0.19, + "learning_rate": 4.719005069802709e-05, + "loss": 0.1843, + "step": 4280 + }, + { + "epoch": 0.19, + "learning_rate": 4.718261696971499e-05, + "loss": 0.1823, + "step": 4290 + }, + { + "epoch": 0.19, + "learning_rate": 4.7175183241402895e-05, + "loss": 0.1758, + "step": 4300 + }, + { + "epoch": 0.19, + "learning_rate": 4.71677495130908e-05, + "loss": 0.1671, + "step": 4310 + }, + { + "epoch": 0.19, + "learning_rate": 4.71603157847787e-05, + "loss": 0.1551, + "step": 4320 + }, + { + "epoch": 0.19, + "learning_rate": 4.71528820564666e-05, + "loss": 0.2358, + "step": 4330 + }, + { + "epoch": 0.19, + "learning_rate": 4.7145448328154505e-05, + "loss": 0.1592, + "step": 4340 + }, + { + "epoch": 0.19, + "learning_rate": 4.713801459984241e-05, + "loss": 0.1948, + "step": 4350 + }, + { + "epoch": 0.19, + "learning_rate": 4.713058087153031e-05, + "loss": 0.2059, + "step": 4360 + }, + { + "epoch": 0.19, + "learning_rate": 4.712314714321821e-05, + "loss": 0.1999, + "step": 4370 + }, + { + "epoch": 0.19, + "learning_rate": 4.7115713414906115e-05, + "loss": 0.2258, + "step": 4380 + }, + { + "epoch": 0.19, + "learning_rate": 4.710827968659402e-05, + "loss": 0.2206, + "step": 4390 + }, + { + "epoch": 0.19, + "learning_rate": 4.710084595828192e-05, + "loss": 0.2226, + "step": 4400 + }, + { + "epoch": 0.2, + "learning_rate": 4.7093412229969816e-05, + "loss": 0.1959, + "step": 4410 + }, + { + "epoch": 0.2, + "learning_rate": 4.7085978501657725e-05, + "loss": 0.1913, + "step": 4420 + }, + { + "epoch": 0.2, + "learning_rate": 4.707854477334563e-05, + "loss": 0.1336, + "step": 4430 + }, + { + "epoch": 0.2, + "learning_rate": 4.707111104503353e-05, + "loss": 0.1619, + "step": 4440 + }, + { + "epoch": 0.2, + "learning_rate": 4.706367731672143e-05, + "loss": 0.1998, + "step": 4450 + }, + { + "epoch": 0.2, + "learning_rate": 4.705624358840933e-05, + "loss": 0.2151, + "step": 4460 + }, + { + "epoch": 0.2, + "learning_rate": 4.704880986009724e-05, + "loss": 0.2118, + "step": 4470 + }, + { + "epoch": 0.2, + "learning_rate": 4.7041376131785134e-05, + "loss": 0.1283, + "step": 4480 + }, + { + "epoch": 0.2, + "learning_rate": 4.7033942403473044e-05, + "loss": 0.1134, + "step": 4490 + }, + { + "epoch": 0.2, + "learning_rate": 4.702650867516094e-05, + "loss": 0.1818, + "step": 4500 + }, + { + "epoch": 0.2, + "learning_rate": 4.701907494684885e-05, + "loss": 0.1521, + "step": 4510 + }, + { + "epoch": 0.2, + "learning_rate": 4.7011641218536744e-05, + "loss": 0.1358, + "step": 4520 + }, + { + "epoch": 0.2, + "learning_rate": 4.700420749022465e-05, + "loss": 0.2277, + "step": 4530 + }, + { + "epoch": 0.2, + "learning_rate": 4.699677376191255e-05, + "loss": 0.1989, + "step": 4540 + }, + { + "epoch": 0.2, + "learning_rate": 4.698934003360045e-05, + "loss": 0.1648, + "step": 4550 + }, + { + "epoch": 0.2, + "learning_rate": 4.698190630528836e-05, + "loss": 0.1664, + "step": 4560 + }, + { + "epoch": 0.2, + "learning_rate": 4.697447257697626e-05, + "loss": 0.2396, + "step": 4570 + }, + { + "epoch": 0.2, + "learning_rate": 4.696703884866416e-05, + "loss": 0.2495, + "step": 4580 + }, + { + "epoch": 0.2, + "learning_rate": 4.695960512035206e-05, + "loss": 0.2068, + "step": 4590 + }, + { + "epoch": 0.2, + "learning_rate": 4.6952171392039965e-05, + "loss": 0.2177, + "step": 4600 + }, + { + "epoch": 0.2, + "learning_rate": 4.694473766372787e-05, + "loss": 0.1966, + "step": 4610 + }, + { + "epoch": 0.2, + "learning_rate": 4.693730393541577e-05, + "loss": 0.2035, + "step": 4620 + }, + { + "epoch": 0.2, + "learning_rate": 4.692987020710367e-05, + "loss": 0.1906, + "step": 4630 + }, + { + "epoch": 0.21, + "learning_rate": 4.6922436478791575e-05, + "loss": 0.1831, + "step": 4640 + }, + { + "epoch": 0.21, + "learning_rate": 4.691500275047948e-05, + "loss": 0.1612, + "step": 4650 + }, + { + "epoch": 0.21, + "learning_rate": 4.690756902216738e-05, + "loss": 0.2006, + "step": 4660 + }, + { + "epoch": 0.21, + "learning_rate": 4.690013529385528e-05, + "loss": 0.2511, + "step": 4670 + }, + { + "epoch": 0.21, + "learning_rate": 4.6892701565543186e-05, + "loss": 0.1666, + "step": 4680 + }, + { + "epoch": 0.21, + "learning_rate": 4.688526783723109e-05, + "loss": 0.2172, + "step": 4690 + }, + { + "epoch": 0.21, + "learning_rate": 4.687783410891899e-05, + "loss": 0.1783, + "step": 4700 + }, + { + "epoch": 0.21, + "learning_rate": 4.6870400380606893e-05, + "loss": 0.1952, + "step": 4710 + }, + { + "epoch": 0.21, + "learning_rate": 4.6862966652294796e-05, + "loss": 0.2201, + "step": 4720 + }, + { + "epoch": 0.21, + "learning_rate": 4.68555329239827e-05, + "loss": 0.1923, + "step": 4730 + }, + { + "epoch": 0.21, + "learning_rate": 4.6848099195670594e-05, + "loss": 0.1705, + "step": 4740 + }, + { + "epoch": 0.21, + "learning_rate": 4.6840665467358504e-05, + "loss": 0.1424, + "step": 4750 + }, + { + "epoch": 0.21, + "learning_rate": 4.68332317390464e-05, + "loss": 0.1996, + "step": 4760 + }, + { + "epoch": 0.21, + "learning_rate": 4.682579801073431e-05, + "loss": 0.2296, + "step": 4770 + }, + { + "epoch": 0.21, + "learning_rate": 4.6818364282422205e-05, + "loss": 0.1768, + "step": 4780 + }, + { + "epoch": 0.21, + "learning_rate": 4.681093055411011e-05, + "loss": 0.2053, + "step": 4790 + }, + { + "epoch": 0.21, + "learning_rate": 4.680349682579802e-05, + "loss": 0.1634, + "step": 4800 + }, + { + "epoch": 0.21, + "learning_rate": 4.679606309748591e-05, + "loss": 0.1569, + "step": 4810 + }, + { + "epoch": 0.21, + "learning_rate": 4.678862936917382e-05, + "loss": 0.2193, + "step": 4820 + }, + { + "epoch": 0.21, + "learning_rate": 4.678119564086172e-05, + "loss": 0.1778, + "step": 4830 + }, + { + "epoch": 0.21, + "learning_rate": 4.677376191254963e-05, + "loss": 0.1908, + "step": 4840 + }, + { + "epoch": 0.21, + "learning_rate": 4.676632818423752e-05, + "loss": 0.2165, + "step": 4850 + }, + { + "epoch": 0.22, + "learning_rate": 4.6758894455925425e-05, + "loss": 0.2005, + "step": 4860 + }, + { + "epoch": 0.22, + "learning_rate": 4.675146072761333e-05, + "loss": 0.205, + "step": 4870 + }, + { + "epoch": 0.22, + "learning_rate": 4.674402699930123e-05, + "loss": 0.2029, + "step": 4880 + }, + { + "epoch": 0.22, + "learning_rate": 4.673659327098913e-05, + "loss": 0.2093, + "step": 4890 + }, + { + "epoch": 0.22, + "learning_rate": 4.6729159542677036e-05, + "loss": 0.1745, + "step": 4900 + }, + { + "epoch": 0.22, + "learning_rate": 4.672172581436494e-05, + "loss": 0.14, + "step": 4910 + }, + { + "epoch": 0.22, + "learning_rate": 4.671429208605284e-05, + "loss": 0.2007, + "step": 4920 + }, + { + "epoch": 0.22, + "learning_rate": 4.6706858357740743e-05, + "loss": 0.129, + "step": 4930 + }, + { + "epoch": 0.22, + "learning_rate": 4.6699424629428646e-05, + "loss": 0.1493, + "step": 4940 + }, + { + "epoch": 0.22, + "learning_rate": 4.669199090111655e-05, + "loss": 0.2641, + "step": 4950 + }, + { + "epoch": 0.22, + "learning_rate": 4.668455717280445e-05, + "loss": 0.176, + "step": 4960 + }, + { + "epoch": 0.22, + "learning_rate": 4.6677123444492354e-05, + "loss": 0.2108, + "step": 4970 + }, + { + "epoch": 0.22, + "learning_rate": 4.6669689716180256e-05, + "loss": 0.1956, + "step": 4980 + }, + { + "epoch": 0.22, + "learning_rate": 4.666225598786816e-05, + "loss": 0.1753, + "step": 4990 + }, + { + "epoch": 0.22, + "learning_rate": 4.6654822259556055e-05, + "loss": 0.1822, + "step": 5000 + }, + { + "epoch": 0.22, + "learning_rate": 4.6647388531243964e-05, + "loss": 0.2252, + "step": 5010 + }, + { + "epoch": 0.22, + "learning_rate": 4.663995480293186e-05, + "loss": 0.1333, + "step": 5020 + }, + { + "epoch": 0.22, + "learning_rate": 4.663252107461977e-05, + "loss": 0.1664, + "step": 5030 + }, + { + "epoch": 0.22, + "learning_rate": 4.662508734630767e-05, + "loss": 0.2348, + "step": 5040 + }, + { + "epoch": 0.22, + "learning_rate": 4.6617653617995574e-05, + "loss": 0.1736, + "step": 5050 + }, + { + "epoch": 0.22, + "learning_rate": 4.661021988968348e-05, + "loss": 0.2118, + "step": 5060 + }, + { + "epoch": 0.22, + "learning_rate": 4.660278616137137e-05, + "loss": 0.2598, + "step": 5070 + }, + { + "epoch": 0.22, + "learning_rate": 4.659535243305928e-05, + "loss": 0.1735, + "step": 5080 + }, + { + "epoch": 0.23, + "learning_rate": 4.658791870474718e-05, + "loss": 0.193, + "step": 5090 + }, + { + "epoch": 0.23, + "learning_rate": 4.658048497643509e-05, + "loss": 0.151, + "step": 5100 + }, + { + "epoch": 0.23, + "learning_rate": 4.657305124812298e-05, + "loss": 0.1902, + "step": 5110 + }, + { + "epoch": 0.23, + "learning_rate": 4.6565617519810886e-05, + "loss": 0.1973, + "step": 5120 + }, + { + "epoch": 0.23, + "learning_rate": 4.655818379149879e-05, + "loss": 0.2868, + "step": 5130 + }, + { + "epoch": 0.23, + "learning_rate": 4.655075006318669e-05, + "loss": 0.1723, + "step": 5140 + }, + { + "epoch": 0.23, + "learning_rate": 4.6543316334874593e-05, + "loss": 0.2217, + "step": 5150 + }, + { + "epoch": 0.23, + "learning_rate": 4.6535882606562496e-05, + "loss": 0.1885, + "step": 5160 + }, + { + "epoch": 0.23, + "learning_rate": 4.6528448878250405e-05, + "loss": 0.1679, + "step": 5170 + }, + { + "epoch": 0.23, + "learning_rate": 4.65210151499383e-05, + "loss": 0.1811, + "step": 5180 + }, + { + "epoch": 0.23, + "learning_rate": 4.6513581421626204e-05, + "loss": 0.1931, + "step": 5190 + }, + { + "epoch": 0.23, + "learning_rate": 4.6506147693314106e-05, + "loss": 0.1989, + "step": 5200 + }, + { + "epoch": 0.23, + "learning_rate": 4.649871396500201e-05, + "loss": 0.1531, + "step": 5210 + }, + { + "epoch": 0.23, + "learning_rate": 4.649128023668991e-05, + "loss": 0.1949, + "step": 5220 + }, + { + "epoch": 0.23, + "learning_rate": 4.6483846508377814e-05, + "loss": 0.1636, + "step": 5230 + }, + { + "epoch": 0.23, + "learning_rate": 4.647641278006572e-05, + "loss": 0.2281, + "step": 5240 + }, + { + "epoch": 0.23, + "learning_rate": 4.646897905175362e-05, + "loss": 0.1615, + "step": 5250 + }, + { + "epoch": 0.23, + "learning_rate": 4.646154532344152e-05, + "loss": 0.2141, + "step": 5260 + }, + { + "epoch": 0.23, + "learning_rate": 4.6454111595129424e-05, + "loss": 0.2078, + "step": 5270 + }, + { + "epoch": 0.23, + "learning_rate": 4.644667786681732e-05, + "loss": 0.2003, + "step": 5280 + }, + { + "epoch": 0.23, + "learning_rate": 4.643924413850523e-05, + "loss": 0.2235, + "step": 5290 + }, + { + "epoch": 0.23, + "learning_rate": 4.643181041019313e-05, + "loss": 0.198, + "step": 5300 + }, + { + "epoch": 0.24, + "learning_rate": 4.6424376681881035e-05, + "loss": 0.1794, + "step": 5310 + }, + { + "epoch": 0.24, + "learning_rate": 4.641694295356894e-05, + "loss": 0.182, + "step": 5320 + }, + { + "epoch": 0.24, + "learning_rate": 4.640950922525683e-05, + "loss": 0.229, + "step": 5330 + }, + { + "epoch": 0.24, + "learning_rate": 4.640207549694474e-05, + "loss": 0.1439, + "step": 5340 + }, + { + "epoch": 0.24, + "learning_rate": 4.639464176863264e-05, + "loss": 0.1614, + "step": 5350 + }, + { + "epoch": 0.24, + "learning_rate": 4.638720804032055e-05, + "loss": 0.1693, + "step": 5360 + }, + { + "epoch": 0.24, + "learning_rate": 4.6379774312008443e-05, + "loss": 0.2107, + "step": 5370 + }, + { + "epoch": 0.24, + "learning_rate": 4.637234058369635e-05, + "loss": 0.1945, + "step": 5380 + }, + { + "epoch": 0.24, + "learning_rate": 4.636490685538425e-05, + "loss": 0.1587, + "step": 5390 + }, + { + "epoch": 0.24, + "learning_rate": 4.635747312707215e-05, + "loss": 0.1913, + "step": 5400 + }, + { + "epoch": 0.24, + "learning_rate": 4.635003939876006e-05, + "loss": 0.2093, + "step": 5410 + }, + { + "epoch": 0.24, + "learning_rate": 4.6342605670447956e-05, + "loss": 0.2301, + "step": 5420 + }, + { + "epoch": 0.24, + "learning_rate": 4.6335171942135866e-05, + "loss": 0.1478, + "step": 5430 + }, + { + "epoch": 0.24, + "learning_rate": 4.632773821382376e-05, + "loss": 0.1874, + "step": 5440 + }, + { + "epoch": 0.24, + "learning_rate": 4.632030448551167e-05, + "loss": 0.2395, + "step": 5450 + }, + { + "epoch": 0.24, + "learning_rate": 4.631287075719957e-05, + "loss": 0.1632, + "step": 5460 + }, + { + "epoch": 0.24, + "learning_rate": 4.630543702888747e-05, + "loss": 0.2397, + "step": 5470 + }, + { + "epoch": 0.24, + "learning_rate": 4.629800330057537e-05, + "loss": 0.1963, + "step": 5480 + }, + { + "epoch": 0.24, + "learning_rate": 4.6290569572263274e-05, + "loss": 0.1628, + "step": 5490 + }, + { + "epoch": 0.24, + "learning_rate": 4.628313584395118e-05, + "loss": 0.1814, + "step": 5500 + }, + { + "epoch": 0.24, + "learning_rate": 4.627570211563908e-05, + "loss": 0.1712, + "step": 5510 + }, + { + "epoch": 0.24, + "learning_rate": 4.626826838732698e-05, + "loss": 0.1666, + "step": 5520 + }, + { + "epoch": 0.24, + "learning_rate": 4.6260834659014885e-05, + "loss": 0.1819, + "step": 5530 + }, + { + "epoch": 0.25, + "learning_rate": 4.625340093070279e-05, + "loss": 0.2382, + "step": 5540 + }, + { + "epoch": 0.25, + "learning_rate": 4.624596720239069e-05, + "loss": 0.1811, + "step": 5550 + }, + { + "epoch": 0.25, + "learning_rate": 4.623853347407859e-05, + "loss": 0.1673, + "step": 5560 + }, + { + "epoch": 0.25, + "learning_rate": 4.6231099745766495e-05, + "loss": 0.1355, + "step": 5570 + }, + { + "epoch": 0.25, + "learning_rate": 4.62236660174544e-05, + "loss": 0.1699, + "step": 5580 + }, + { + "epoch": 0.25, + "learning_rate": 4.62162322891423e-05, + "loss": 0.2482, + "step": 5590 + }, + { + "epoch": 0.25, + "learning_rate": 4.62087985608302e-05, + "loss": 0.2124, + "step": 5600 + }, + { + "epoch": 0.25, + "learning_rate": 4.62013648325181e-05, + "loss": 0.1775, + "step": 5610 + }, + { + "epoch": 0.25, + "learning_rate": 4.619393110420601e-05, + "loss": 0.2245, + "step": 5620 + }, + { + "epoch": 0.25, + "learning_rate": 4.6186497375893904e-05, + "loss": 0.1317, + "step": 5630 + }, + { + "epoch": 0.25, + "learning_rate": 4.617906364758181e-05, + "loss": 0.1534, + "step": 5640 + }, + { + "epoch": 0.25, + "learning_rate": 4.6171629919269716e-05, + "loss": 0.1495, + "step": 5650 + }, + { + "epoch": 0.25, + "learning_rate": 4.616419619095762e-05, + "loss": 0.2115, + "step": 5660 + }, + { + "epoch": 0.25, + "learning_rate": 4.615676246264552e-05, + "loss": 0.1428, + "step": 5670 + }, + { + "epoch": 0.25, + "learning_rate": 4.6149328734333417e-05, + "loss": 0.1488, + "step": 5680 + }, + { + "epoch": 0.25, + "learning_rate": 4.6141895006021326e-05, + "loss": 0.2301, + "step": 5690 + }, + { + "epoch": 0.25, + "learning_rate": 4.613446127770922e-05, + "loss": 0.1726, + "step": 5700 + }, + { + "epoch": 0.25, + "learning_rate": 4.612702754939713e-05, + "loss": 0.2083, + "step": 5710 + }, + { + "epoch": 0.25, + "learning_rate": 4.611959382108503e-05, + "loss": 0.179, + "step": 5720 + }, + { + "epoch": 0.25, + "learning_rate": 4.611216009277293e-05, + "loss": 0.16, + "step": 5730 + }, + { + "epoch": 0.25, + "learning_rate": 4.610472636446083e-05, + "loss": 0.1489, + "step": 5740 + }, + { + "epoch": 0.25, + "learning_rate": 4.6097292636148735e-05, + "loss": 0.2726, + "step": 5750 + }, + { + "epoch": 0.26, + "learning_rate": 4.608985890783664e-05, + "loss": 0.189, + "step": 5760 + }, + { + "epoch": 0.26, + "learning_rate": 4.608242517952454e-05, + "loss": 0.1789, + "step": 5770 + }, + { + "epoch": 0.26, + "learning_rate": 4.607499145121245e-05, + "loss": 0.1753, + "step": 5780 + }, + { + "epoch": 0.26, + "learning_rate": 4.6067557722900345e-05, + "loss": 0.2578, + "step": 5790 + }, + { + "epoch": 0.26, + "learning_rate": 4.606012399458825e-05, + "loss": 0.1571, + "step": 5800 + }, + { + "epoch": 0.26, + "learning_rate": 4.605269026627615e-05, + "loss": 0.2205, + "step": 5810 + }, + { + "epoch": 0.26, + "learning_rate": 4.604525653796405e-05, + "loss": 0.189, + "step": 5820 + }, + { + "epoch": 0.26, + "learning_rate": 4.6037822809651955e-05, + "loss": 0.2021, + "step": 5830 + }, + { + "epoch": 0.26, + "learning_rate": 4.603038908133986e-05, + "loss": 0.1578, + "step": 5840 + }, + { + "epoch": 0.26, + "learning_rate": 4.602295535302776e-05, + "loss": 0.2302, + "step": 5850 + }, + { + "epoch": 0.26, + "learning_rate": 4.601552162471566e-05, + "loss": 0.2554, + "step": 5860 + }, + { + "epoch": 0.26, + "learning_rate": 4.600808789640356e-05, + "loss": 0.1927, + "step": 5870 + }, + { + "epoch": 0.26, + "learning_rate": 4.600065416809147e-05, + "loss": 0.1964, + "step": 5880 + }, + { + "epoch": 0.26, + "learning_rate": 4.5993220439779364e-05, + "loss": 0.1314, + "step": 5890 + }, + { + "epoch": 0.26, + "learning_rate": 4.598578671146727e-05, + "loss": 0.2192, + "step": 5900 + }, + { + "epoch": 0.26, + "learning_rate": 4.5978352983155176e-05, + "loss": 0.2489, + "step": 5910 + }, + { + "epoch": 0.26, + "learning_rate": 4.597091925484308e-05, + "loss": 0.152, + "step": 5920 + }, + { + "epoch": 0.26, + "learning_rate": 4.596348552653098e-05, + "loss": 0.1859, + "step": 5930 + }, + { + "epoch": 0.26, + "learning_rate": 4.595605179821888e-05, + "loss": 0.178, + "step": 5940 + }, + { + "epoch": 0.26, + "learning_rate": 4.5948618069906786e-05, + "loss": 0.2292, + "step": 5950 + }, + { + "epoch": 0.26, + "learning_rate": 4.594118434159468e-05, + "loss": 0.19, + "step": 5960 + }, + { + "epoch": 0.26, + "learning_rate": 4.593375061328259e-05, + "loss": 0.1743, + "step": 5970 + }, + { + "epoch": 0.26, + "learning_rate": 4.592631688497049e-05, + "loss": 0.1965, + "step": 5980 + }, + { + "epoch": 0.27, + "learning_rate": 4.5918883156658397e-05, + "loss": 0.2271, + "step": 5990 + }, + { + "epoch": 0.27, + "learning_rate": 4.591144942834629e-05, + "loss": 0.2326, + "step": 6000 + }, + { + "epoch": 0.27, + "learning_rate": 4.5904015700034195e-05, + "loss": 0.1787, + "step": 6010 + }, + { + "epoch": 0.27, + "learning_rate": 4.5896581971722104e-05, + "loss": 0.1402, + "step": 6020 + }, + { + "epoch": 0.27, + "learning_rate": 4.588914824341e-05, + "loss": 0.18, + "step": 6030 + }, + { + "epoch": 0.27, + "learning_rate": 4.588171451509791e-05, + "loss": 0.1585, + "step": 6040 + }, + { + "epoch": 0.27, + "learning_rate": 4.5874280786785805e-05, + "loss": 0.2416, + "step": 6050 + }, + { + "epoch": 0.27, + "learning_rate": 4.586684705847371e-05, + "loss": 0.2345, + "step": 6060 + }, + { + "epoch": 0.27, + "learning_rate": 4.585941333016161e-05, + "loss": 0.2101, + "step": 6070 + }, + { + "epoch": 0.27, + "learning_rate": 4.585197960184951e-05, + "loss": 0.2595, + "step": 6080 + }, + { + "epoch": 0.27, + "learning_rate": 4.5844545873537416e-05, + "loss": 0.1963, + "step": 6090 + }, + { + "epoch": 0.27, + "learning_rate": 4.583711214522532e-05, + "loss": 0.2147, + "step": 6100 + }, + { + "epoch": 0.27, + "learning_rate": 4.582967841691322e-05, + "loss": 0.1501, + "step": 6110 + }, + { + "epoch": 0.27, + "learning_rate": 4.582224468860112e-05, + "loss": 0.1831, + "step": 6120 + }, + { + "epoch": 0.27, + "learning_rate": 4.5814810960289026e-05, + "loss": 0.1613, + "step": 6130 + }, + { + "epoch": 0.27, + "learning_rate": 4.580737723197693e-05, + "loss": 0.2332, + "step": 6140 + }, + { + "epoch": 0.27, + "learning_rate": 4.579994350366483e-05, + "loss": 0.1608, + "step": 6150 + }, + { + "epoch": 0.27, + "learning_rate": 4.5792509775352734e-05, + "loss": 0.208, + "step": 6160 + }, + { + "epoch": 0.27, + "learning_rate": 4.5785076047040636e-05, + "loss": 0.2136, + "step": 6170 + }, + { + "epoch": 0.27, + "learning_rate": 4.577764231872854e-05, + "loss": 0.1965, + "step": 6180 + }, + { + "epoch": 0.27, + "learning_rate": 4.577020859041644e-05, + "loss": 0.1749, + "step": 6190 + }, + { + "epoch": 0.27, + "learning_rate": 4.5762774862104344e-05, + "loss": 0.1627, + "step": 6200 + }, + { + "epoch": 0.27, + "learning_rate": 4.5755341133792247e-05, + "loss": 0.1519, + "step": 6210 + }, + { + "epoch": 0.28, + "learning_rate": 4.574790740548014e-05, + "loss": 0.2044, + "step": 6220 + }, + { + "epoch": 0.28, + "learning_rate": 4.574047367716805e-05, + "loss": 0.157, + "step": 6230 + }, + { + "epoch": 0.28, + "learning_rate": 4.573303994885595e-05, + "loss": 0.1762, + "step": 6240 + }, + { + "epoch": 0.28, + "learning_rate": 4.572560622054386e-05, + "loss": 0.1864, + "step": 6250 + }, + { + "epoch": 0.28, + "learning_rate": 4.571817249223175e-05, + "loss": 0.1788, + "step": 6260 + }, + { + "epoch": 0.28, + "learning_rate": 4.5710738763919655e-05, + "loss": 0.18, + "step": 6270 + }, + { + "epoch": 0.28, + "learning_rate": 4.5703305035607565e-05, + "loss": 0.1688, + "step": 6280 + }, + { + "epoch": 0.28, + "learning_rate": 4.569587130729546e-05, + "loss": 0.1995, + "step": 6290 + }, + { + "epoch": 0.28, + "learning_rate": 4.568843757898337e-05, + "loss": 0.1566, + "step": 6300 + }, + { + "epoch": 0.28, + "learning_rate": 4.5681003850671266e-05, + "loss": 0.1248, + "step": 6310 + }, + { + "epoch": 0.28, + "learning_rate": 4.5673570122359175e-05, + "loss": 0.2147, + "step": 6320 + }, + { + "epoch": 0.28, + "learning_rate": 4.566613639404707e-05, + "loss": 0.1766, + "step": 6330 + }, + { + "epoch": 0.28, + "learning_rate": 4.565870266573497e-05, + "loss": 0.1949, + "step": 6340 + }, + { + "epoch": 0.28, + "learning_rate": 4.5651268937422876e-05, + "loss": 0.165, + "step": 6350 + }, + { + "epoch": 0.28, + "learning_rate": 4.564383520911078e-05, + "loss": 0.1473, + "step": 6360 + }, + { + "epoch": 0.28, + "learning_rate": 4.563640148079868e-05, + "loss": 0.1761, + "step": 6370 + }, + { + "epoch": 0.28, + "learning_rate": 4.5628967752486584e-05, + "loss": 0.1811, + "step": 6380 + }, + { + "epoch": 0.28, + "learning_rate": 4.5621534024174486e-05, + "loss": 0.208, + "step": 6390 + }, + { + "epoch": 0.28, + "learning_rate": 4.561410029586239e-05, + "loss": 0.2374, + "step": 6400 + }, + { + "epoch": 0.28, + "learning_rate": 4.560666656755029e-05, + "loss": 0.1654, + "step": 6410 + }, + { + "epoch": 0.28, + "learning_rate": 4.5599232839238194e-05, + "loss": 0.1996, + "step": 6420 + }, + { + "epoch": 0.28, + "learning_rate": 4.5591799110926097e-05, + "loss": 0.1596, + "step": 6430 + }, + { + "epoch": 0.29, + "learning_rate": 4.5584365382614e-05, + "loss": 0.1589, + "step": 6440 + }, + { + "epoch": 0.29, + "learning_rate": 4.55769316543019e-05, + "loss": 0.2543, + "step": 6450 + }, + { + "epoch": 0.29, + "learning_rate": 4.5569497925989804e-05, + "loss": 0.1889, + "step": 6460 + }, + { + "epoch": 0.29, + "learning_rate": 4.556206419767771e-05, + "loss": 0.2004, + "step": 6470 + }, + { + "epoch": 0.29, + "learning_rate": 4.55546304693656e-05, + "loss": 0.1637, + "step": 6480 + }, + { + "epoch": 0.29, + "learning_rate": 4.554719674105351e-05, + "loss": 0.1658, + "step": 6490 + }, + { + "epoch": 0.29, + "learning_rate": 4.553976301274141e-05, + "loss": 0.1649, + "step": 6500 + }, + { + "epoch": 0.29, + "learning_rate": 4.553232928442932e-05, + "loss": 0.2118, + "step": 6510 + }, + { + "epoch": 0.29, + "learning_rate": 4.552489555611722e-05, + "loss": 0.1735, + "step": 6520 + }, + { + "epoch": 0.29, + "learning_rate": 4.551746182780512e-05, + "loss": 0.1513, + "step": 6530 + }, + { + "epoch": 0.29, + "learning_rate": 4.5510028099493025e-05, + "loss": 0.1653, + "step": 6540 + }, + { + "epoch": 0.29, + "learning_rate": 4.550259437118092e-05, + "loss": 0.2427, + "step": 6550 + }, + { + "epoch": 0.29, + "learning_rate": 4.549516064286883e-05, + "loss": 0.2136, + "step": 6560 + }, + { + "epoch": 0.29, + "learning_rate": 4.5487726914556726e-05, + "loss": 0.1936, + "step": 6570 + }, + { + "epoch": 0.29, + "learning_rate": 4.5480293186244635e-05, + "loss": 0.1588, + "step": 6580 + }, + { + "epoch": 0.29, + "learning_rate": 4.547285945793253e-05, + "loss": 0.176, + "step": 6590 + }, + { + "epoch": 0.29, + "learning_rate": 4.5465425729620434e-05, + "loss": 0.1324, + "step": 6600 + }, + { + "epoch": 0.29, + "learning_rate": 4.5457992001308336e-05, + "loss": 0.1709, + "step": 6610 + }, + { + "epoch": 0.29, + "learning_rate": 4.545055827299624e-05, + "loss": 0.2037, + "step": 6620 + }, + { + "epoch": 0.29, + "learning_rate": 4.544312454468415e-05, + "loss": 0.1674, + "step": 6630 + }, + { + "epoch": 0.29, + "learning_rate": 4.5435690816372044e-05, + "loss": 0.151, + "step": 6640 + }, + { + "epoch": 0.29, + "learning_rate": 4.542825708805995e-05, + "loss": 0.2253, + "step": 6650 + }, + { + "epoch": 0.29, + "learning_rate": 4.542082335974785e-05, + "loss": 0.2006, + "step": 6660 + }, + { + "epoch": 0.3, + "learning_rate": 4.541338963143575e-05, + "loss": 0.1909, + "step": 6670 + }, + { + "epoch": 0.3, + "learning_rate": 4.5405955903123654e-05, + "loss": 0.2447, + "step": 6680 + }, + { + "epoch": 0.3, + "learning_rate": 4.539852217481156e-05, + "loss": 0.1817, + "step": 6690 + }, + { + "epoch": 0.3, + "learning_rate": 4.539108844649946e-05, + "loss": 0.1492, + "step": 6700 + }, + { + "epoch": 0.3, + "learning_rate": 4.538365471818736e-05, + "loss": 0.2458, + "step": 6710 + }, + { + "epoch": 0.3, + "learning_rate": 4.5376220989875265e-05, + "loss": 0.1959, + "step": 6720 + }, + { + "epoch": 0.3, + "learning_rate": 4.536878726156317e-05, + "loss": 0.2271, + "step": 6730 + }, + { + "epoch": 0.3, + "learning_rate": 4.536135353325107e-05, + "loss": 0.164, + "step": 6740 + }, + { + "epoch": 0.3, + "learning_rate": 4.535391980493897e-05, + "loss": 0.2078, + "step": 6750 + }, + { + "epoch": 0.3, + "learning_rate": 4.5346486076626875e-05, + "loss": 0.1879, + "step": 6760 + }, + { + "epoch": 0.3, + "learning_rate": 4.533905234831478e-05, + "loss": 0.2041, + "step": 6770 + }, + { + "epoch": 0.3, + "learning_rate": 4.533161862000268e-05, + "loss": 0.1384, + "step": 6780 + }, + { + "epoch": 0.3, + "learning_rate": 4.532418489169058e-05, + "loss": 0.1966, + "step": 6790 + }, + { + "epoch": 0.3, + "learning_rate": 4.5316751163378485e-05, + "loss": 0.1927, + "step": 6800 + }, + { + "epoch": 0.3, + "learning_rate": 4.530931743506638e-05, + "loss": 0.1994, + "step": 6810 + }, + { + "epoch": 0.3, + "learning_rate": 4.530188370675429e-05, + "loss": 0.1588, + "step": 6820 + }, + { + "epoch": 0.3, + "learning_rate": 4.5294449978442186e-05, + "loss": 0.2004, + "step": 6830 + }, + { + "epoch": 0.3, + "learning_rate": 4.5287016250130095e-05, + "loss": 0.1486, + "step": 6840 + }, + { + "epoch": 0.3, + "learning_rate": 4.527958252181799e-05, + "loss": 0.18, + "step": 6850 + }, + { + "epoch": 0.3, + "learning_rate": 4.52721487935059e-05, + "loss": 0.2191, + "step": 6860 + }, + { + "epoch": 0.3, + "learning_rate": 4.5264715065193796e-05, + "loss": 0.1661, + "step": 6870 + }, + { + "epoch": 0.3, + "learning_rate": 4.52572813368817e-05, + "loss": 0.1506, + "step": 6880 + }, + { + "epoch": 0.31, + "learning_rate": 4.524984760856961e-05, + "loss": 0.1503, + "step": 6890 + }, + { + "epoch": 0.31, + "learning_rate": 4.5242413880257504e-05, + "loss": 0.1736, + "step": 6900 + }, + { + "epoch": 0.31, + "learning_rate": 4.5234980151945414e-05, + "loss": 0.1788, + "step": 6910 + }, + { + "epoch": 0.31, + "learning_rate": 4.522754642363331e-05, + "loss": 0.2112, + "step": 6920 + }, + { + "epoch": 0.31, + "learning_rate": 4.522011269532121e-05, + "loss": 0.192, + "step": 6930 + }, + { + "epoch": 0.31, + "learning_rate": 4.5212678967009115e-05, + "loss": 0.1904, + "step": 6940 + }, + { + "epoch": 0.31, + "learning_rate": 4.520524523869702e-05, + "loss": 0.1854, + "step": 6950 + }, + { + "epoch": 0.31, + "learning_rate": 4.519781151038492e-05, + "loss": 0.2013, + "step": 6960 + }, + { + "epoch": 0.31, + "learning_rate": 4.519037778207282e-05, + "loss": 0.1996, + "step": 6970 + }, + { + "epoch": 0.31, + "learning_rate": 4.5182944053760725e-05, + "loss": 0.1653, + "step": 6980 + }, + { + "epoch": 0.31, + "learning_rate": 4.517551032544863e-05, + "loss": 0.1929, + "step": 6990 + }, + { + "epoch": 0.31, + "learning_rate": 4.516807659713653e-05, + "loss": 0.2288, + "step": 7000 + }, + { + "epoch": 0.31, + "learning_rate": 4.516064286882443e-05, + "loss": 0.1661, + "step": 7010 + }, + { + "epoch": 0.31, + "learning_rate": 4.5153209140512335e-05, + "loss": 0.1811, + "step": 7020 + }, + { + "epoch": 0.31, + "learning_rate": 4.514577541220024e-05, + "loss": 0.1812, + "step": 7030 + }, + { + "epoch": 0.31, + "learning_rate": 4.513834168388814e-05, + "loss": 0.1966, + "step": 7040 + }, + { + "epoch": 0.31, + "learning_rate": 4.513090795557604e-05, + "loss": 0.1826, + "step": 7050 + }, + { + "epoch": 0.31, + "learning_rate": 4.5123474227263945e-05, + "loss": 0.1712, + "step": 7060 + }, + { + "epoch": 0.31, + "learning_rate": 4.511604049895185e-05, + "loss": 0.164, + "step": 7070 + }, + { + "epoch": 0.31, + "learning_rate": 4.510860677063975e-05, + "loss": 0.1702, + "step": 7080 + }, + { + "epoch": 0.31, + "learning_rate": 4.5101173042327646e-05, + "loss": 0.1527, + "step": 7090 + }, + { + "epoch": 0.31, + "learning_rate": 4.5093739314015556e-05, + "loss": 0.1958, + "step": 7100 + }, + { + "epoch": 0.31, + "learning_rate": 4.508630558570345e-05, + "loss": 0.118, + "step": 7110 + }, + { + "epoch": 0.32, + "learning_rate": 4.507887185739136e-05, + "loss": 0.1961, + "step": 7120 + }, + { + "epoch": 0.32, + "learning_rate": 4.5071438129079264e-05, + "loss": 0.2165, + "step": 7130 + }, + { + "epoch": 0.32, + "learning_rate": 4.506400440076716e-05, + "loss": 0.1998, + "step": 7140 + }, + { + "epoch": 0.32, + "learning_rate": 4.505657067245507e-05, + "loss": 0.1484, + "step": 7150 + }, + { + "epoch": 0.32, + "learning_rate": 4.5049136944142964e-05, + "loss": 0.1947, + "step": 7160 + }, + { + "epoch": 0.32, + "learning_rate": 4.5041703215830874e-05, + "loss": 0.1612, + "step": 7170 + }, + { + "epoch": 0.32, + "learning_rate": 4.503426948751877e-05, + "loss": 0.1744, + "step": 7180 + }, + { + "epoch": 0.32, + "learning_rate": 4.502683575920668e-05, + "loss": 0.1693, + "step": 7190 + }, + { + "epoch": 0.32, + "learning_rate": 4.5019402030894575e-05, + "loss": 0.228, + "step": 7200 + }, + { + "epoch": 0.32, + "learning_rate": 4.501196830258248e-05, + "loss": 0.1791, + "step": 7210 + }, + { + "epoch": 0.32, + "learning_rate": 4.500453457427038e-05, + "loss": 0.1686, + "step": 7220 + }, + { + "epoch": 0.32, + "learning_rate": 4.499710084595828e-05, + "loss": 0.2372, + "step": 7230 + }, + { + "epoch": 0.32, + "learning_rate": 4.4989667117646185e-05, + "loss": 0.2062, + "step": 7240 + }, + { + "epoch": 0.32, + "learning_rate": 4.498223338933409e-05, + "loss": 0.2191, + "step": 7250 + }, + { + "epoch": 0.32, + "learning_rate": 4.497479966102199e-05, + "loss": 0.1079, + "step": 7260 + }, + { + "epoch": 0.32, + "learning_rate": 4.496736593270989e-05, + "loss": 0.2664, + "step": 7270 + }, + { + "epoch": 0.32, + "learning_rate": 4.4959932204397795e-05, + "loss": 0.2324, + "step": 7280 + }, + { + "epoch": 0.32, + "learning_rate": 4.49524984760857e-05, + "loss": 0.2144, + "step": 7290 + }, + { + "epoch": 0.32, + "learning_rate": 4.49450647477736e-05, + "loss": 0.2014, + "step": 7300 + }, + { + "epoch": 0.32, + "learning_rate": 4.49376310194615e-05, + "loss": 0.1716, + "step": 7310 + }, + { + "epoch": 0.32, + "learning_rate": 4.4930197291149406e-05, + "loss": 0.2299, + "step": 7320 + }, + { + "epoch": 0.32, + "learning_rate": 4.492276356283731e-05, + "loss": 0.1925, + "step": 7330 + }, + { + "epoch": 0.32, + "learning_rate": 4.491532983452521e-05, + "loss": 0.14, + "step": 7340 + }, + { + "epoch": 0.33, + "learning_rate": 4.490789610621311e-05, + "loss": 0.1833, + "step": 7350 + }, + { + "epoch": 0.33, + "learning_rate": 4.4900462377901016e-05, + "loss": 0.2224, + "step": 7360 + }, + { + "epoch": 0.33, + "learning_rate": 4.489302864958892e-05, + "loss": 0.1507, + "step": 7370 + }, + { + "epoch": 0.33, + "learning_rate": 4.488559492127682e-05, + "loss": 0.1716, + "step": 7380 + }, + { + "epoch": 0.33, + "learning_rate": 4.4878161192964724e-05, + "loss": 0.2199, + "step": 7390 + }, + { + "epoch": 0.33, + "learning_rate": 4.4870727464652626e-05, + "loss": 0.1584, + "step": 7400 + }, + { + "epoch": 0.33, + "learning_rate": 4.486329373634053e-05, + "loss": 0.2043, + "step": 7410 + }, + { + "epoch": 0.33, + "learning_rate": 4.4855860008028425e-05, + "loss": 0.1725, + "step": 7420 + }, + { + "epoch": 0.33, + "learning_rate": 4.4848426279716334e-05, + "loss": 0.2174, + "step": 7430 + }, + { + "epoch": 0.33, + "learning_rate": 4.484099255140423e-05, + "loss": 0.1567, + "step": 7440 + }, + { + "epoch": 0.33, + "learning_rate": 4.483355882309214e-05, + "loss": 0.1823, + "step": 7450 + }, + { + "epoch": 0.33, + "learning_rate": 4.4826125094780035e-05, + "loss": 0.1842, + "step": 7460 + }, + { + "epoch": 0.33, + "learning_rate": 4.481869136646794e-05, + "loss": 0.185, + "step": 7470 + }, + { + "epoch": 0.33, + "learning_rate": 4.481125763815584e-05, + "loss": 0.187, + "step": 7480 + }, + { + "epoch": 0.33, + "learning_rate": 4.480382390984374e-05, + "loss": 0.2067, + "step": 7490 + }, + { + "epoch": 0.33, + "learning_rate": 4.479639018153165e-05, + "loss": 0.1527, + "step": 7500 + }, + { + "epoch": 0.33, + "learning_rate": 4.478895645321955e-05, + "loss": 0.183, + "step": 7510 + }, + { + "epoch": 0.33, + "learning_rate": 4.478152272490746e-05, + "loss": 0.1452, + "step": 7520 + }, + { + "epoch": 0.33, + "learning_rate": 4.477408899659535e-05, + "loss": 0.2031, + "step": 7530 + }, + { + "epoch": 0.33, + "learning_rate": 4.4766655268283256e-05, + "loss": 0.1597, + "step": 7540 + }, + { + "epoch": 0.33, + "learning_rate": 4.475922153997116e-05, + "loss": 0.1774, + "step": 7550 + }, + { + "epoch": 0.33, + "learning_rate": 4.475178781165906e-05, + "loss": 0.1288, + "step": 7560 + }, + { + "epoch": 0.34, + "learning_rate": 4.4744354083346963e-05, + "loss": 0.1923, + "step": 7570 + }, + { + "epoch": 0.34, + "learning_rate": 4.4736920355034866e-05, + "loss": 0.2991, + "step": 7580 + }, + { + "epoch": 0.34, + "learning_rate": 4.472948662672277e-05, + "loss": 0.2241, + "step": 7590 + }, + { + "epoch": 0.34, + "learning_rate": 4.472205289841067e-05, + "loss": 0.1494, + "step": 7600 + }, + { + "epoch": 0.34, + "learning_rate": 4.4714619170098574e-05, + "loss": 0.1467, + "step": 7610 + }, + { + "epoch": 0.34, + "learning_rate": 4.4707185441786476e-05, + "loss": 0.1669, + "step": 7620 + }, + { + "epoch": 0.34, + "learning_rate": 4.469975171347438e-05, + "loss": 0.1489, + "step": 7630 + }, + { + "epoch": 0.34, + "learning_rate": 4.469231798516228e-05, + "loss": 0.2123, + "step": 7640 + }, + { + "epoch": 0.34, + "learning_rate": 4.4684884256850184e-05, + "loss": 0.3353, + "step": 7650 + }, + { + "epoch": 0.34, + "learning_rate": 4.467745052853809e-05, + "loss": 0.198, + "step": 7660 + }, + { + "epoch": 0.34, + "learning_rate": 4.467001680022599e-05, + "loss": 0.1622, + "step": 7670 + }, + { + "epoch": 0.34, + "learning_rate": 4.4662583071913885e-05, + "loss": 0.195, + "step": 7680 + }, + { + "epoch": 0.34, + "learning_rate": 4.4655149343601794e-05, + "loss": 0.1574, + "step": 7690 + }, + { + "epoch": 0.34, + "learning_rate": 4.464771561528969e-05, + "loss": 0.1931, + "step": 7700 + }, + { + "epoch": 0.34, + "learning_rate": 4.46402818869776e-05, + "loss": 0.1859, + "step": 7710 + }, + { + "epoch": 0.34, + "learning_rate": 4.4632848158665495e-05, + "loss": 0.1755, + "step": 7720 + }, + { + "epoch": 0.34, + "learning_rate": 4.4625414430353405e-05, + "loss": 0.1947, + "step": 7730 + }, + { + "epoch": 0.34, + "learning_rate": 4.461798070204131e-05, + "loss": 0.156, + "step": 7740 + }, + { + "epoch": 0.34, + "learning_rate": 4.46105469737292e-05, + "loss": 0.1542, + "step": 7750 + }, + { + "epoch": 0.34, + "learning_rate": 4.460311324541711e-05, + "loss": 0.1595, + "step": 7760 + }, + { + "epoch": 0.34, + "learning_rate": 4.459567951710501e-05, + "loss": 0.1602, + "step": 7770 + }, + { + "epoch": 0.34, + "learning_rate": 4.458824578879292e-05, + "loss": 0.1505, + "step": 7780 + }, + { + "epoch": 0.34, + "learning_rate": 4.4580812060480813e-05, + "loss": 0.2428, + "step": 7790 + }, + { + "epoch": 0.35, + "learning_rate": 4.4573378332168716e-05, + "loss": 0.1991, + "step": 7800 + }, + { + "epoch": 0.35, + "learning_rate": 4.456594460385662e-05, + "loss": 0.173, + "step": 7810 + }, + { + "epoch": 0.35, + "learning_rate": 4.455851087554452e-05, + "loss": 0.2147, + "step": 7820 + }, + { + "epoch": 0.35, + "learning_rate": 4.4551077147232424e-05, + "loss": 0.1666, + "step": 7830 + }, + { + "epoch": 0.35, + "learning_rate": 4.4543643418920326e-05, + "loss": 0.1638, + "step": 7840 + }, + { + "epoch": 0.35, + "learning_rate": 4.453620969060823e-05, + "loss": 0.2126, + "step": 7850 + }, + { + "epoch": 0.35, + "learning_rate": 4.452877596229613e-05, + "loss": 0.2032, + "step": 7860 + }, + { + "epoch": 0.35, + "learning_rate": 4.4521342233984034e-05, + "loss": 0.2458, + "step": 7870 + }, + { + "epoch": 0.35, + "learning_rate": 4.451390850567194e-05, + "loss": 0.1685, + "step": 7880 + }, + { + "epoch": 0.35, + "learning_rate": 4.450647477735984e-05, + "loss": 0.1631, + "step": 7890 + }, + { + "epoch": 0.35, + "learning_rate": 4.449904104904774e-05, + "loss": 0.2465, + "step": 7900 + }, + { + "epoch": 0.35, + "learning_rate": 4.4491607320735644e-05, + "loss": 0.1393, + "step": 7910 + }, + { + "epoch": 0.35, + "learning_rate": 4.448417359242355e-05, + "loss": 0.1543, + "step": 7920 + }, + { + "epoch": 0.35, + "learning_rate": 4.447673986411145e-05, + "loss": 0.23, + "step": 7930 + }, + { + "epoch": 0.35, + "learning_rate": 4.446930613579935e-05, + "loss": 0.2117, + "step": 7940 + }, + { + "epoch": 0.35, + "learning_rate": 4.4461872407487255e-05, + "loss": 0.1753, + "step": 7950 + }, + { + "epoch": 0.35, + "learning_rate": 4.445443867917515e-05, + "loss": 0.2338, + "step": 7960 + }, + { + "epoch": 0.35, + "learning_rate": 4.444700495086306e-05, + "loss": 0.1755, + "step": 7970 + }, + { + "epoch": 0.35, + "learning_rate": 4.443957122255096e-05, + "loss": 0.1682, + "step": 7980 + }, + { + "epoch": 0.35, + "learning_rate": 4.4432137494238865e-05, + "loss": 0.2344, + "step": 7990 + }, + { + "epoch": 0.35, + "learning_rate": 4.442470376592677e-05, + "loss": 0.1985, + "step": 8000 + }, + { + "epoch": 0.35, + "learning_rate": 4.4417270037614663e-05, + "loss": 0.1698, + "step": 8010 + }, + { + "epoch": 0.36, + "learning_rate": 4.440983630930257e-05, + "loss": 0.1952, + "step": 8020 + }, + { + "epoch": 0.36, + "learning_rate": 4.440240258099047e-05, + "loss": 0.1922, + "step": 8030 + }, + { + "epoch": 0.36, + "learning_rate": 4.439496885267838e-05, + "loss": 0.2079, + "step": 8040 + }, + { + "epoch": 0.36, + "learning_rate": 4.4387535124366274e-05, + "loss": 0.1331, + "step": 8050 + }, + { + "epoch": 0.36, + "learning_rate": 4.438010139605418e-05, + "loss": 0.1648, + "step": 8060 + }, + { + "epoch": 0.36, + "learning_rate": 4.437266766774208e-05, + "loss": 0.1743, + "step": 8070 + }, + { + "epoch": 0.36, + "learning_rate": 4.436523393942998e-05, + "loss": 0.1767, + "step": 8080 + }, + { + "epoch": 0.36, + "learning_rate": 4.4357800211117884e-05, + "loss": 0.1595, + "step": 8090 + }, + { + "epoch": 0.36, + "learning_rate": 4.435036648280579e-05, + "loss": 0.1824, + "step": 8100 + }, + { + "epoch": 0.36, + "learning_rate": 4.4342932754493696e-05, + "loss": 0.1443, + "step": 8110 + }, + { + "epoch": 0.36, + "learning_rate": 4.433549902618159e-05, + "loss": 0.1655, + "step": 8120 + }, + { + "epoch": 0.36, + "learning_rate": 4.4328065297869494e-05, + "loss": 0.2075, + "step": 8130 + }, + { + "epoch": 0.36, + "learning_rate": 4.43206315695574e-05, + "loss": 0.2341, + "step": 8140 + }, + { + "epoch": 0.36, + "learning_rate": 4.43131978412453e-05, + "loss": 0.2391, + "step": 8150 + }, + { + "epoch": 0.36, + "learning_rate": 4.43057641129332e-05, + "loss": 0.1916, + "step": 8160 + }, + { + "epoch": 0.36, + "learning_rate": 4.4298330384621105e-05, + "loss": 0.1898, + "step": 8170 + }, + { + "epoch": 0.36, + "learning_rate": 4.429089665630901e-05, + "loss": 0.2031, + "step": 8180 + }, + { + "epoch": 0.36, + "learning_rate": 4.428346292799691e-05, + "loss": 0.2379, + "step": 8190 + }, + { + "epoch": 0.36, + "learning_rate": 4.427602919968481e-05, + "loss": 0.1596, + "step": 8200 + }, + { + "epoch": 0.36, + "learning_rate": 4.4268595471372715e-05, + "loss": 0.1925, + "step": 8210 + }, + { + "epoch": 0.36, + "learning_rate": 4.426116174306061e-05, + "loss": 0.1989, + "step": 8220 + }, + { + "epoch": 0.36, + "learning_rate": 4.425372801474852e-05, + "loss": 0.1838, + "step": 8230 + }, + { + "epoch": 0.36, + "learning_rate": 4.424629428643642e-05, + "loss": 0.1502, + "step": 8240 + }, + { + "epoch": 0.37, + "learning_rate": 4.4238860558124325e-05, + "loss": 0.1823, + "step": 8250 + }, + { + "epoch": 0.37, + "learning_rate": 4.423142682981223e-05, + "loss": 0.1457, + "step": 8260 + }, + { + "epoch": 0.37, + "learning_rate": 4.422399310150013e-05, + "loss": 0.172, + "step": 8270 + }, + { + "epoch": 0.37, + "learning_rate": 4.421655937318803e-05, + "loss": 0.1577, + "step": 8280 + }, + { + "epoch": 0.37, + "learning_rate": 4.420912564487593e-05, + "loss": 0.1843, + "step": 8290 + }, + { + "epoch": 0.37, + "learning_rate": 4.420169191656384e-05, + "loss": 0.1189, + "step": 8300 + }, + { + "epoch": 0.37, + "learning_rate": 4.4194258188251734e-05, + "loss": 0.1461, + "step": 8310 + }, + { + "epoch": 0.37, + "learning_rate": 4.418682445993964e-05, + "loss": 0.2097, + "step": 8320 + }, + { + "epoch": 0.37, + "learning_rate": 4.417939073162754e-05, + "loss": 0.1927, + "step": 8330 + }, + { + "epoch": 0.37, + "learning_rate": 4.417195700331544e-05, + "loss": 0.2222, + "step": 8340 + }, + { + "epoch": 0.37, + "learning_rate": 4.416452327500335e-05, + "loss": 0.1708, + "step": 8350 + }, + { + "epoch": 0.37, + "learning_rate": 4.415708954669125e-05, + "loss": 0.2081, + "step": 8360 + }, + { + "epoch": 0.37, + "learning_rate": 4.4149655818379156e-05, + "loss": 0.2185, + "step": 8370 + }, + { + "epoch": 0.37, + "learning_rate": 4.414222209006705e-05, + "loss": 0.157, + "step": 8380 + }, + { + "epoch": 0.37, + "learning_rate": 4.413478836175496e-05, + "loss": 0.1918, + "step": 8390 + }, + { + "epoch": 0.37, + "learning_rate": 4.412735463344286e-05, + "loss": 0.1609, + "step": 8400 + }, + { + "epoch": 0.37, + "learning_rate": 4.411992090513076e-05, + "loss": 0.1991, + "step": 8410 + }, + { + "epoch": 0.37, + "learning_rate": 4.411248717681866e-05, + "loss": 0.1783, + "step": 8420 + }, + { + "epoch": 0.37, + "learning_rate": 4.4105053448506565e-05, + "loss": 0.1905, + "step": 8430 + }, + { + "epoch": 0.37, + "learning_rate": 4.409761972019447e-05, + "loss": 0.1632, + "step": 8440 + }, + { + "epoch": 0.37, + "learning_rate": 4.409018599188237e-05, + "loss": 0.2332, + "step": 8450 + }, + { + "epoch": 0.37, + "learning_rate": 4.408275226357027e-05, + "loss": 0.1683, + "step": 8460 + }, + { + "epoch": 0.37, + "learning_rate": 4.4075318535258175e-05, + "loss": 0.2352, + "step": 8470 + }, + { + "epoch": 0.38, + "learning_rate": 4.406788480694608e-05, + "loss": 0.1741, + "step": 8480 + }, + { + "epoch": 0.38, + "learning_rate": 4.406045107863398e-05, + "loss": 0.1827, + "step": 8490 + }, + { + "epoch": 0.38, + "learning_rate": 4.405301735032188e-05, + "loss": 0.1279, + "step": 8500 + }, + { + "epoch": 0.38, + "learning_rate": 4.4045583622009786e-05, + "loss": 0.2747, + "step": 8510 + }, + { + "epoch": 0.38, + "learning_rate": 4.403814989369769e-05, + "loss": 0.1811, + "step": 8520 + }, + { + "epoch": 0.38, + "learning_rate": 4.403071616538559e-05, + "loss": 0.2085, + "step": 8530 + }, + { + "epoch": 0.38, + "learning_rate": 4.402328243707349e-05, + "loss": 0.2108, + "step": 8540 + }, + { + "epoch": 0.38, + "learning_rate": 4.401584870876139e-05, + "loss": 0.1667, + "step": 8550 + }, + { + "epoch": 0.38, + "learning_rate": 4.40084149804493e-05, + "loss": 0.2332, + "step": 8560 + }, + { + "epoch": 0.38, + "learning_rate": 4.4000981252137194e-05, + "loss": 0.1803, + "step": 8570 + }, + { + "epoch": 0.38, + "learning_rate": 4.3993547523825104e-05, + "loss": 0.1876, + "step": 8580 + }, + { + "epoch": 0.38, + "learning_rate": 4.3986113795513e-05, + "loss": 0.1883, + "step": 8590 + }, + { + "epoch": 0.38, + "learning_rate": 4.397868006720091e-05, + "loss": 0.1625, + "step": 8600 + }, + { + "epoch": 0.38, + "learning_rate": 4.397124633888881e-05, + "loss": 0.2853, + "step": 8610 + }, + { + "epoch": 0.38, + "learning_rate": 4.396381261057671e-05, + "loss": 0.2184, + "step": 8620 + }, + { + "epoch": 0.38, + "learning_rate": 4.3956378882264617e-05, + "loss": 0.1837, + "step": 8630 + }, + { + "epoch": 0.38, + "learning_rate": 4.394894515395251e-05, + "loss": 0.1706, + "step": 8640 + }, + { + "epoch": 0.38, + "learning_rate": 4.394151142564042e-05, + "loss": 0.1666, + "step": 8650 + }, + { + "epoch": 0.38, + "learning_rate": 4.393407769732832e-05, + "loss": 0.1953, + "step": 8660 + }, + { + "epoch": 0.38, + "learning_rate": 4.392664396901622e-05, + "loss": 0.1688, + "step": 8670 + }, + { + "epoch": 0.38, + "learning_rate": 4.391921024070412e-05, + "loss": 0.2084, + "step": 8680 + }, + { + "epoch": 0.38, + "learning_rate": 4.3911776512392025e-05, + "loss": 0.1513, + "step": 8690 + }, + { + "epoch": 0.39, + "learning_rate": 4.390434278407993e-05, + "loss": 0.1933, + "step": 8700 + }, + { + "epoch": 0.39, + "learning_rate": 4.389690905576783e-05, + "loss": 0.1418, + "step": 8710 + }, + { + "epoch": 0.39, + "learning_rate": 4.388947532745574e-05, + "loss": 0.2434, + "step": 8720 + }, + { + "epoch": 0.39, + "learning_rate": 4.3882041599143636e-05, + "loss": 0.233, + "step": 8730 + }, + { + "epoch": 0.39, + "learning_rate": 4.387460787083154e-05, + "loss": 0.2053, + "step": 8740 + }, + { + "epoch": 0.39, + "learning_rate": 4.386717414251944e-05, + "loss": 0.1655, + "step": 8750 + }, + { + "epoch": 0.39, + "learning_rate": 4.385974041420734e-05, + "loss": 0.2074, + "step": 8760 + }, + { + "epoch": 0.39, + "learning_rate": 4.3852306685895246e-05, + "loss": 0.1876, + "step": 8770 + }, + { + "epoch": 0.39, + "learning_rate": 4.384487295758315e-05, + "loss": 0.2027, + "step": 8780 + }, + { + "epoch": 0.39, + "learning_rate": 4.383743922927105e-05, + "loss": 0.2319, + "step": 8790 + }, + { + "epoch": 0.39, + "learning_rate": 4.3830005500958954e-05, + "loss": 0.1578, + "step": 8800 + }, + { + "epoch": 0.39, + "learning_rate": 4.3822571772646856e-05, + "loss": 0.1629, + "step": 8810 + }, + { + "epoch": 0.39, + "learning_rate": 4.381513804433476e-05, + "loss": 0.1372, + "step": 8820 + }, + { + "epoch": 0.39, + "learning_rate": 4.3807704316022655e-05, + "loss": 0.1521, + "step": 8830 + }, + { + "epoch": 0.39, + "learning_rate": 4.3800270587710564e-05, + "loss": 0.1603, + "step": 8840 + }, + { + "epoch": 0.39, + "learning_rate": 4.3792836859398467e-05, + "loss": 0.2025, + "step": 8850 + }, + { + "epoch": 0.39, + "learning_rate": 4.378540313108637e-05, + "loss": 0.1751, + "step": 8860 + }, + { + "epoch": 0.39, + "learning_rate": 4.377796940277427e-05, + "loss": 0.1769, + "step": 8870 + }, + { + "epoch": 0.39, + "learning_rate": 4.377053567446217e-05, + "loss": 0.1906, + "step": 8880 + }, + { + "epoch": 0.39, + "learning_rate": 4.376310194615008e-05, + "loss": 0.151, + "step": 8890 + }, + { + "epoch": 0.39, + "learning_rate": 4.375566821783797e-05, + "loss": 0.175, + "step": 8900 + }, + { + "epoch": 0.39, + "learning_rate": 4.374823448952588e-05, + "loss": 0.1883, + "step": 8910 + }, + { + "epoch": 0.39, + "learning_rate": 4.374080076121378e-05, + "loss": 0.2089, + "step": 8920 + }, + { + "epoch": 0.4, + "learning_rate": 4.373336703290169e-05, + "loss": 0.1568, + "step": 8930 + }, + { + "epoch": 0.4, + "learning_rate": 4.372593330458958e-05, + "loss": 0.1726, + "step": 8940 + }, + { + "epoch": 0.4, + "learning_rate": 4.3718499576277486e-05, + "loss": 0.2102, + "step": 8950 + }, + { + "epoch": 0.4, + "learning_rate": 4.3711065847965395e-05, + "loss": 0.2081, + "step": 8960 + }, + { + "epoch": 0.4, + "learning_rate": 4.370363211965329e-05, + "loss": 0.2228, + "step": 8970 + }, + { + "epoch": 0.4, + "learning_rate": 4.36961983913412e-05, + "loss": 0.1576, + "step": 8980 + }, + { + "epoch": 0.4, + "learning_rate": 4.3688764663029096e-05, + "loss": 0.1117, + "step": 8990 + }, + { + "epoch": 0.4, + "learning_rate": 4.3681330934717005e-05, + "loss": 0.1678, + "step": 9000 + }, + { + "epoch": 0.4, + "learning_rate": 4.36738972064049e-05, + "loss": 0.148, + "step": 9010 + }, + { + "epoch": 0.4, + "learning_rate": 4.3666463478092804e-05, + "loss": 0.1882, + "step": 9020 + }, + { + "epoch": 0.4, + "learning_rate": 4.3659029749780706e-05, + "loss": 0.2028, + "step": 9030 + }, + { + "epoch": 0.4, + "learning_rate": 4.365159602146861e-05, + "loss": 0.218, + "step": 9040 + }, + { + "epoch": 0.4, + "learning_rate": 4.364416229315651e-05, + "loss": 0.2028, + "step": 9050 + }, + { + "epoch": 0.4, + "learning_rate": 4.3636728564844414e-05, + "loss": 0.1605, + "step": 9060 + }, + { + "epoch": 0.4, + "learning_rate": 4.3629294836532317e-05, + "loss": 0.2059, + "step": 9070 + }, + { + "epoch": 0.4, + "learning_rate": 4.362186110822022e-05, + "loss": 0.1732, + "step": 9080 + }, + { + "epoch": 0.4, + "learning_rate": 4.361442737990812e-05, + "loss": 0.2279, + "step": 9090 + }, + { + "epoch": 0.4, + "learning_rate": 4.3606993651596024e-05, + "loss": 0.2288, + "step": 9100 + }, + { + "epoch": 0.4, + "learning_rate": 4.359955992328393e-05, + "loss": 0.1731, + "step": 9110 + }, + { + "epoch": 0.4, + "learning_rate": 4.359212619497183e-05, + "loss": 0.1761, + "step": 9120 + }, + { + "epoch": 0.4, + "learning_rate": 4.358469246665973e-05, + "loss": 0.1352, + "step": 9130 + }, + { + "epoch": 0.4, + "learning_rate": 4.3577258738347635e-05, + "loss": 0.22, + "step": 9140 + }, + { + "epoch": 0.41, + "learning_rate": 4.356982501003554e-05, + "loss": 0.1494, + "step": 9150 + }, + { + "epoch": 0.41, + "learning_rate": 4.356239128172343e-05, + "loss": 0.2318, + "step": 9160 + }, + { + "epoch": 0.41, + "learning_rate": 4.355495755341134e-05, + "loss": 0.2461, + "step": 9170 + }, + { + "epoch": 0.41, + "learning_rate": 4.354752382509924e-05, + "loss": 0.2035, + "step": 9180 + }, + { + "epoch": 0.41, + "learning_rate": 4.354009009678715e-05, + "loss": 0.2033, + "step": 9190 + }, + { + "epoch": 0.41, + "learning_rate": 4.353265636847504e-05, + "loss": 0.1743, + "step": 9200 + }, + { + "epoch": 0.41, + "learning_rate": 4.352522264016295e-05, + "loss": 0.1992, + "step": 9210 + }, + { + "epoch": 0.41, + "learning_rate": 4.3517788911850855e-05, + "loss": 0.2013, + "step": 9220 + }, + { + "epoch": 0.41, + "learning_rate": 4.351035518353875e-05, + "loss": 0.1515, + "step": 9230 + }, + { + "epoch": 0.41, + "learning_rate": 4.350292145522666e-05, + "loss": 0.2151, + "step": 9240 + }, + { + "epoch": 0.41, + "learning_rate": 4.3495487726914556e-05, + "loss": 0.155, + "step": 9250 + }, + { + "epoch": 0.41, + "learning_rate": 4.3488053998602466e-05, + "loss": 0.2296, + "step": 9260 + }, + { + "epoch": 0.41, + "learning_rate": 4.348062027029036e-05, + "loss": 0.1772, + "step": 9270 + }, + { + "epoch": 0.41, + "learning_rate": 4.3473186541978264e-05, + "loss": 0.2149, + "step": 9280 + }, + { + "epoch": 0.41, + "learning_rate": 4.3465752813666166e-05, + "loss": 0.197, + "step": 9290 + }, + { + "epoch": 0.41, + "learning_rate": 4.345831908535407e-05, + "loss": 0.1804, + "step": 9300 + }, + { + "epoch": 0.41, + "learning_rate": 4.345088535704197e-05, + "loss": 0.1558, + "step": 9310 + }, + { + "epoch": 0.41, + "learning_rate": 4.3443451628729874e-05, + "loss": 0.176, + "step": 9320 + }, + { + "epoch": 0.41, + "learning_rate": 4.3436017900417784e-05, + "loss": 0.1873, + "step": 9330 + }, + { + "epoch": 0.41, + "learning_rate": 4.342858417210568e-05, + "loss": 0.1178, + "step": 9340 + }, + { + "epoch": 0.41, + "learning_rate": 4.342115044379358e-05, + "loss": 0.1926, + "step": 9350 + }, + { + "epoch": 0.41, + "learning_rate": 4.3413716715481485e-05, + "loss": 0.1929, + "step": 9360 + }, + { + "epoch": 0.41, + "learning_rate": 4.340628298716939e-05, + "loss": 0.1413, + "step": 9370 + }, + { + "epoch": 0.42, + "learning_rate": 4.339884925885729e-05, + "loss": 0.1512, + "step": 9380 + }, + { + "epoch": 0.42, + "learning_rate": 4.339141553054519e-05, + "loss": 0.1591, + "step": 9390 + }, + { + "epoch": 0.42, + "learning_rate": 4.3383981802233095e-05, + "loss": 0.1942, + "step": 9400 + }, + { + "epoch": 0.42, + "learning_rate": 4.3376548073921e-05, + "loss": 0.1819, + "step": 9410 + }, + { + "epoch": 0.42, + "learning_rate": 4.33691143456089e-05, + "loss": 0.1817, + "step": 9420 + }, + { + "epoch": 0.42, + "learning_rate": 4.33616806172968e-05, + "loss": 0.2336, + "step": 9430 + }, + { + "epoch": 0.42, + "learning_rate": 4.33542468889847e-05, + "loss": 0.1976, + "step": 9440 + }, + { + "epoch": 0.42, + "learning_rate": 4.334681316067261e-05, + "loss": 0.2018, + "step": 9450 + }, + { + "epoch": 0.42, + "learning_rate": 4.333937943236051e-05, + "loss": 0.2117, + "step": 9460 + }, + { + "epoch": 0.42, + "learning_rate": 4.333194570404841e-05, + "loss": 0.1811, + "step": 9470 + }, + { + "epoch": 0.42, + "learning_rate": 4.3324511975736315e-05, + "loss": 0.2271, + "step": 9480 + }, + { + "epoch": 0.42, + "learning_rate": 4.331707824742421e-05, + "loss": 0.1652, + "step": 9490 + }, + { + "epoch": 0.42, + "learning_rate": 4.330964451911212e-05, + "loss": 0.1381, + "step": 9500 + }, + { + "epoch": 0.42, + "learning_rate": 4.3302210790800016e-05, + "loss": 0.1873, + "step": 9510 + }, + { + "epoch": 0.42, + "learning_rate": 4.3294777062487926e-05, + "loss": 0.2037, + "step": 9520 + }, + { + "epoch": 0.42, + "learning_rate": 4.328734333417582e-05, + "loss": 0.221, + "step": 9530 + }, + { + "epoch": 0.42, + "learning_rate": 4.327990960586373e-05, + "loss": 0.1828, + "step": 9540 + }, + { + "epoch": 0.42, + "learning_rate": 4.327247587755163e-05, + "loss": 0.1559, + "step": 9550 + }, + { + "epoch": 0.42, + "learning_rate": 4.326504214923953e-05, + "loss": 0.1651, + "step": 9560 + }, + { + "epoch": 0.42, + "learning_rate": 4.325760842092743e-05, + "loss": 0.2335, + "step": 9570 + }, + { + "epoch": 0.42, + "learning_rate": 4.3250174692615335e-05, + "loss": 0.2048, + "step": 9580 + }, + { + "epoch": 0.42, + "learning_rate": 4.3242740964303244e-05, + "loss": 0.2018, + "step": 9590 + }, + { + "epoch": 0.43, + "learning_rate": 4.323530723599114e-05, + "loss": 0.1932, + "step": 9600 + }, + { + "epoch": 0.43, + "learning_rate": 4.322787350767904e-05, + "loss": 0.1778, + "step": 9610 + }, + { + "epoch": 0.43, + "learning_rate": 4.3220439779366945e-05, + "loss": 0.156, + "step": 9620 + }, + { + "epoch": 0.43, + "learning_rate": 4.321300605105485e-05, + "loss": 0.1811, + "step": 9630 + }, + { + "epoch": 0.43, + "learning_rate": 4.320557232274275e-05, + "loss": 0.1856, + "step": 9640 + }, + { + "epoch": 0.43, + "learning_rate": 4.319813859443065e-05, + "loss": 0.1459, + "step": 9650 + }, + { + "epoch": 0.43, + "learning_rate": 4.3190704866118555e-05, + "loss": 0.1755, + "step": 9660 + }, + { + "epoch": 0.43, + "learning_rate": 4.318327113780646e-05, + "loss": 0.1926, + "step": 9670 + }, + { + "epoch": 0.43, + "learning_rate": 4.317583740949436e-05, + "loss": 0.2518, + "step": 9680 + }, + { + "epoch": 0.43, + "learning_rate": 4.316840368118226e-05, + "loss": 0.177, + "step": 9690 + }, + { + "epoch": 0.43, + "learning_rate": 4.3160969952870165e-05, + "loss": 0.1754, + "step": 9700 + }, + { + "epoch": 0.43, + "learning_rate": 4.315353622455807e-05, + "loss": 0.1401, + "step": 9710 + }, + { + "epoch": 0.43, + "learning_rate": 4.314610249624597e-05, + "loss": 0.2021, + "step": 9720 + }, + { + "epoch": 0.43, + "learning_rate": 4.313866876793387e-05, + "loss": 0.1773, + "step": 9730 + }, + { + "epoch": 0.43, + "learning_rate": 4.3131235039621776e-05, + "loss": 0.2316, + "step": 9740 + }, + { + "epoch": 0.43, + "learning_rate": 4.312380131130968e-05, + "loss": 0.1687, + "step": 9750 + }, + { + "epoch": 0.43, + "learning_rate": 4.311636758299758e-05, + "loss": 0.2084, + "step": 9760 + }, + { + "epoch": 0.43, + "learning_rate": 4.310893385468548e-05, + "loss": 0.1606, + "step": 9770 + }, + { + "epoch": 0.43, + "learning_rate": 4.3101500126373386e-05, + "loss": 0.1924, + "step": 9780 + }, + { + "epoch": 0.43, + "learning_rate": 4.309406639806128e-05, + "loss": 0.2141, + "step": 9790 + }, + { + "epoch": 0.43, + "learning_rate": 4.308663266974919e-05, + "loss": 0.2403, + "step": 9800 + }, + { + "epoch": 0.43, + "learning_rate": 4.307919894143709e-05, + "loss": 0.1721, + "step": 9810 + }, + { + "epoch": 0.43, + "learning_rate": 4.307176521312499e-05, + "loss": 0.1882, + "step": 9820 + }, + { + "epoch": 0.44, + "learning_rate": 4.30643314848129e-05, + "loss": 0.208, + "step": 9830 + }, + { + "epoch": 0.44, + "learning_rate": 4.3056897756500795e-05, + "loss": 0.2206, + "step": 9840 + }, + { + "epoch": 0.44, + "learning_rate": 4.3049464028188704e-05, + "loss": 0.2175, + "step": 9850 + }, + { + "epoch": 0.44, + "learning_rate": 4.30420302998766e-05, + "loss": 0.1377, + "step": 9860 + }, + { + "epoch": 0.44, + "learning_rate": 4.303459657156451e-05, + "loss": 0.1895, + "step": 9870 + }, + { + "epoch": 0.44, + "learning_rate": 4.3027162843252405e-05, + "loss": 0.1664, + "step": 9880 + }, + { + "epoch": 0.44, + "learning_rate": 4.301972911494031e-05, + "loss": 0.1151, + "step": 9890 + }, + { + "epoch": 0.44, + "learning_rate": 4.301229538662821e-05, + "loss": 0.2653, + "step": 9900 + }, + { + "epoch": 0.44, + "learning_rate": 4.300486165831611e-05, + "loss": 0.2095, + "step": 9910 + }, + { + "epoch": 0.44, + "learning_rate": 4.2997427930004015e-05, + "loss": 0.1623, + "step": 9920 + }, + { + "epoch": 0.44, + "learning_rate": 4.298999420169192e-05, + "loss": 0.1782, + "step": 9930 + }, + { + "epoch": 0.44, + "learning_rate": 4.298256047337982e-05, + "loss": 0.1388, + "step": 9940 + }, + { + "epoch": 0.44, + "learning_rate": 4.297512674506772e-05, + "loss": 0.1769, + "step": 9950 + }, + { + "epoch": 0.44, + "learning_rate": 4.2967693016755626e-05, + "loss": 0.1942, + "step": 9960 + }, + { + "epoch": 0.44, + "learning_rate": 4.296025928844353e-05, + "loss": 0.2192, + "step": 9970 + }, + { + "epoch": 0.44, + "learning_rate": 4.295282556013143e-05, + "loss": 0.177, + "step": 9980 + }, + { + "epoch": 0.44, + "learning_rate": 4.2945391831819333e-05, + "loss": 0.1848, + "step": 9990 + }, + { + "epoch": 0.44, + "learning_rate": 4.2937958103507236e-05, + "loss": 0.1526, + "step": 10000 + }, + { + "epoch": 0.44, + "learning_rate": 4.293052437519514e-05, + "loss": 0.1877, + "step": 10010 + }, + { + "epoch": 0.44, + "learning_rate": 4.292309064688304e-05, + "loss": 0.2167, + "step": 10020 + }, + { + "epoch": 0.44, + "learning_rate": 4.291565691857094e-05, + "loss": 0.1992, + "step": 10030 + }, + { + "epoch": 0.44, + "learning_rate": 4.2908223190258846e-05, + "loss": 0.1591, + "step": 10040 + }, + { + "epoch": 0.44, + "learning_rate": 4.290078946194674e-05, + "loss": 0.1507, + "step": 10050 + }, + { + "epoch": 0.45, + "learning_rate": 4.289335573363465e-05, + "loss": 0.2026, + "step": 10060 + }, + { + "epoch": 0.45, + "learning_rate": 4.2885922005322554e-05, + "loss": 0.1905, + "step": 10070 + }, + { + "epoch": 0.45, + "learning_rate": 4.287848827701046e-05, + "loss": 0.1971, + "step": 10080 + }, + { + "epoch": 0.45, + "learning_rate": 4.287105454869836e-05, + "loss": 0.2067, + "step": 10090 + }, + { + "epoch": 0.45, + "learning_rate": 4.2863620820386255e-05, + "loss": 0.1914, + "step": 10100 + }, + { + "epoch": 0.45, + "learning_rate": 4.2856187092074164e-05, + "loss": 0.1668, + "step": 10110 + }, + { + "epoch": 0.45, + "learning_rate": 4.284875336376206e-05, + "loss": 0.1812, + "step": 10120 + }, + { + "epoch": 0.45, + "learning_rate": 4.284131963544997e-05, + "loss": 0.1857, + "step": 10130 + }, + { + "epoch": 0.45, + "learning_rate": 4.2833885907137865e-05, + "loss": 0.2499, + "step": 10140 + }, + { + "epoch": 0.45, + "learning_rate": 4.282645217882577e-05, + "loss": 0.1931, + "step": 10150 + }, + { + "epoch": 0.45, + "learning_rate": 4.281901845051367e-05, + "loss": 0.1974, + "step": 10160 + }, + { + "epoch": 0.45, + "learning_rate": 4.281158472220157e-05, + "loss": 0.1624, + "step": 10170 + }, + { + "epoch": 0.45, + "learning_rate": 4.2804150993889476e-05, + "loss": 0.1683, + "step": 10180 + }, + { + "epoch": 0.45, + "learning_rate": 4.279671726557738e-05, + "loss": 0.2189, + "step": 10190 + }, + { + "epoch": 0.45, + "learning_rate": 4.278928353726529e-05, + "loss": 0.1823, + "step": 10200 + }, + { + "epoch": 0.45, + "learning_rate": 4.2781849808953183e-05, + "loss": 0.1606, + "step": 10210 + }, + { + "epoch": 0.45, + "learning_rate": 4.2774416080641086e-05, + "loss": 0.1624, + "step": 10220 + }, + { + "epoch": 0.45, + "learning_rate": 4.276698235232899e-05, + "loss": 0.2127, + "step": 10230 + }, + { + "epoch": 0.45, + "learning_rate": 4.275954862401689e-05, + "loss": 0.1862, + "step": 10240 + }, + { + "epoch": 0.45, + "learning_rate": 4.2752114895704794e-05, + "loss": 0.1648, + "step": 10250 + }, + { + "epoch": 0.45, + "learning_rate": 4.2744681167392696e-05, + "loss": 0.1303, + "step": 10260 + }, + { + "epoch": 0.45, + "learning_rate": 4.27372474390806e-05, + "loss": 0.1737, + "step": 10270 + }, + { + "epoch": 0.46, + "learning_rate": 4.27298137107685e-05, + "loss": 0.1427, + "step": 10280 + }, + { + "epoch": 0.46, + "learning_rate": 4.2722379982456404e-05, + "loss": 0.168, + "step": 10290 + }, + { + "epoch": 0.46, + "learning_rate": 4.271494625414431e-05, + "loss": 0.1027, + "step": 10300 + }, + { + "epoch": 0.46, + "learning_rate": 4.270751252583221e-05, + "loss": 0.1964, + "step": 10310 + }, + { + "epoch": 0.46, + "learning_rate": 4.270007879752011e-05, + "loss": 0.1633, + "step": 10320 + }, + { + "epoch": 0.46, + "learning_rate": 4.2692645069208014e-05, + "loss": 0.1868, + "step": 10330 + }, + { + "epoch": 0.46, + "learning_rate": 4.268521134089592e-05, + "loss": 0.1565, + "step": 10340 + }, + { + "epoch": 0.46, + "learning_rate": 4.267777761258382e-05, + "loss": 0.1755, + "step": 10350 + }, + { + "epoch": 0.46, + "learning_rate": 4.2670343884271715e-05, + "loss": 0.2009, + "step": 10360 + }, + { + "epoch": 0.46, + "learning_rate": 4.2662910155959625e-05, + "loss": 0.1833, + "step": 10370 + }, + { + "epoch": 0.46, + "learning_rate": 4.265547642764752e-05, + "loss": 0.1437, + "step": 10380 + }, + { + "epoch": 0.46, + "learning_rate": 4.264804269933543e-05, + "loss": 0.1884, + "step": 10390 + }, + { + "epoch": 0.46, + "learning_rate": 4.2640608971023326e-05, + "loss": 0.1476, + "step": 10400 + }, + { + "epoch": 0.46, + "learning_rate": 4.2633175242711235e-05, + "loss": 0.29, + "step": 10410 + }, + { + "epoch": 0.46, + "learning_rate": 4.262574151439913e-05, + "loss": 0.1502, + "step": 10420 + }, + { + "epoch": 0.46, + "learning_rate": 4.2618307786087033e-05, + "loss": 0.1783, + "step": 10430 + }, + { + "epoch": 0.46, + "learning_rate": 4.261087405777494e-05, + "loss": 0.18, + "step": 10440 + }, + { + "epoch": 0.46, + "learning_rate": 4.260344032946284e-05, + "loss": 0.1765, + "step": 10450 + }, + { + "epoch": 0.46, + "learning_rate": 4.259600660115075e-05, + "loss": 0.2009, + "step": 10460 + }, + { + "epoch": 0.46, + "learning_rate": 4.2588572872838644e-05, + "loss": 0.1483, + "step": 10470 + }, + { + "epoch": 0.46, + "learning_rate": 4.2581139144526546e-05, + "loss": 0.1918, + "step": 10480 + }, + { + "epoch": 0.46, + "learning_rate": 4.257370541621445e-05, + "loss": 0.1858, + "step": 10490 + }, + { + "epoch": 0.46, + "learning_rate": 4.256627168790235e-05, + "loss": 0.1533, + "step": 10500 + }, + { + "epoch": 0.47, + "learning_rate": 4.2558837959590254e-05, + "loss": 0.1449, + "step": 10510 + }, + { + "epoch": 0.47, + "learning_rate": 4.255140423127816e-05, + "loss": 0.1625, + "step": 10520 + }, + { + "epoch": 0.47, + "learning_rate": 4.254397050296606e-05, + "loss": 0.1632, + "step": 10530 + }, + { + "epoch": 0.47, + "learning_rate": 4.253653677465396e-05, + "loss": 0.2077, + "step": 10540 + }, + { + "epoch": 0.47, + "learning_rate": 4.2529103046341864e-05, + "loss": 0.1443, + "step": 10550 + }, + { + "epoch": 0.47, + "learning_rate": 4.252166931802977e-05, + "loss": 0.1493, + "step": 10560 + }, + { + "epoch": 0.47, + "learning_rate": 4.251423558971767e-05, + "loss": 0.1616, + "step": 10570 + }, + { + "epoch": 0.47, + "learning_rate": 4.250680186140557e-05, + "loss": 0.2514, + "step": 10580 + }, + { + "epoch": 0.47, + "learning_rate": 4.2499368133093475e-05, + "loss": 0.1831, + "step": 10590 + }, + { + "epoch": 0.47, + "learning_rate": 4.249193440478138e-05, + "loss": 0.1393, + "step": 10600 + }, + { + "epoch": 0.47, + "learning_rate": 4.248450067646928e-05, + "loss": 0.11, + "step": 10610 + }, + { + "epoch": 0.47, + "learning_rate": 4.247706694815718e-05, + "loss": 0.1924, + "step": 10620 + }, + { + "epoch": 0.47, + "learning_rate": 4.2469633219845085e-05, + "loss": 0.1566, + "step": 10630 + }, + { + "epoch": 0.47, + "learning_rate": 4.246219949153298e-05, + "loss": 0.1768, + "step": 10640 + }, + { + "epoch": 0.47, + "learning_rate": 4.245476576322089e-05, + "loss": 0.1662, + "step": 10650 + }, + { + "epoch": 0.47, + "learning_rate": 4.2447332034908786e-05, + "loss": 0.1691, + "step": 10660 + }, + { + "epoch": 0.47, + "learning_rate": 4.2439898306596695e-05, + "loss": 0.1934, + "step": 10670 + }, + { + "epoch": 0.47, + "learning_rate": 4.24324645782846e-05, + "loss": 0.1282, + "step": 10680 + }, + { + "epoch": 0.47, + "learning_rate": 4.2425030849972494e-05, + "loss": 0.1914, + "step": 10690 + }, + { + "epoch": 0.47, + "learning_rate": 4.24175971216604e-05, + "loss": 0.176, + "step": 10700 + }, + { + "epoch": 0.47, + "learning_rate": 4.24101633933483e-05, + "loss": 0.1909, + "step": 10710 + }, + { + "epoch": 0.47, + "learning_rate": 4.240272966503621e-05, + "loss": 0.1449, + "step": 10720 + }, + { + "epoch": 0.48, + "learning_rate": 4.2395295936724104e-05, + "loss": 0.1908, + "step": 10730 + }, + { + "epoch": 0.48, + "learning_rate": 4.2387862208412013e-05, + "loss": 0.1826, + "step": 10740 + }, + { + "epoch": 0.48, + "learning_rate": 4.238042848009991e-05, + "loss": 0.2147, + "step": 10750 + }, + { + "epoch": 0.48, + "learning_rate": 4.237299475178781e-05, + "loss": 0.202, + "step": 10760 + }, + { + "epoch": 0.48, + "learning_rate": 4.2365561023475714e-05, + "loss": 0.1779, + "step": 10770 + }, + { + "epoch": 0.48, + "learning_rate": 4.235812729516362e-05, + "loss": 0.2102, + "step": 10780 + }, + { + "epoch": 0.48, + "learning_rate": 4.235069356685152e-05, + "loss": 0.1965, + "step": 10790 + }, + { + "epoch": 0.48, + "learning_rate": 4.234325983853942e-05, + "loss": 0.1599, + "step": 10800 + }, + { + "epoch": 0.48, + "learning_rate": 4.2335826110227325e-05, + "loss": 0.1417, + "step": 10810 + }, + { + "epoch": 0.48, + "learning_rate": 4.232839238191523e-05, + "loss": 0.2261, + "step": 10820 + }, + { + "epoch": 0.48, + "learning_rate": 4.232095865360313e-05, + "loss": 0.2257, + "step": 10830 + }, + { + "epoch": 0.48, + "learning_rate": 4.231352492529103e-05, + "loss": 0.1711, + "step": 10840 + }, + { + "epoch": 0.48, + "learning_rate": 4.2306091196978935e-05, + "loss": 0.1429, + "step": 10850 + }, + { + "epoch": 0.48, + "learning_rate": 4.229865746866684e-05, + "loss": 0.1874, + "step": 10860 + }, + { + "epoch": 0.48, + "learning_rate": 4.229122374035474e-05, + "loss": 0.1569, + "step": 10870 + }, + { + "epoch": 0.48, + "learning_rate": 4.228379001204264e-05, + "loss": 0.1293, + "step": 10880 + }, + { + "epoch": 0.48, + "learning_rate": 4.2276356283730545e-05, + "loss": 0.2276, + "step": 10890 + }, + { + "epoch": 0.48, + "learning_rate": 4.226892255541844e-05, + "loss": 0.222, + "step": 10900 + }, + { + "epoch": 0.48, + "learning_rate": 4.226148882710635e-05, + "loss": 0.1669, + "step": 10910 + }, + { + "epoch": 0.48, + "learning_rate": 4.225405509879425e-05, + "loss": 0.271, + "step": 10920 + }, + { + "epoch": 0.48, + "learning_rate": 4.2246621370482156e-05, + "loss": 0.1592, + "step": 10930 + }, + { + "epoch": 0.48, + "learning_rate": 4.223918764217006e-05, + "loss": 0.1148, + "step": 10940 + }, + { + "epoch": 0.48, + "learning_rate": 4.223175391385796e-05, + "loss": 0.1754, + "step": 10950 + }, + { + "epoch": 0.49, + "learning_rate": 4.222432018554586e-05, + "loss": 0.1576, + "step": 10960 + }, + { + "epoch": 0.49, + "learning_rate": 4.221688645723376e-05, + "loss": 0.2041, + "step": 10970 + }, + { + "epoch": 0.49, + "learning_rate": 4.220945272892167e-05, + "loss": 0.1454, + "step": 10980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2202019000609564e-05, + "loss": 0.228, + "step": 10990 + }, + { + "epoch": 0.49, + "learning_rate": 4.2194585272297474e-05, + "loss": 0.1656, + "step": 11000 + }, + { + "epoch": 0.49, + "learning_rate": 4.218715154398537e-05, + "loss": 0.1957, + "step": 11010 + }, + { + "epoch": 0.49, + "learning_rate": 4.217971781567327e-05, + "loss": 0.1803, + "step": 11020 + }, + { + "epoch": 0.49, + "learning_rate": 4.2172284087361175e-05, + "loss": 0.1226, + "step": 11030 + }, + { + "epoch": 0.49, + "learning_rate": 4.216485035904908e-05, + "loss": 0.1997, + "step": 11040 + }, + { + "epoch": 0.49, + "learning_rate": 4.2157416630736987e-05, + "loss": 0.1402, + "step": 11050 + }, + { + "epoch": 0.49, + "learning_rate": 4.214998290242488e-05, + "loss": 0.2317, + "step": 11060 + }, + { + "epoch": 0.49, + "learning_rate": 4.214254917411279e-05, + "loss": 0.1692, + "step": 11070 + }, + { + "epoch": 0.49, + "learning_rate": 4.213511544580069e-05, + "loss": 0.1831, + "step": 11080 + }, + { + "epoch": 0.49, + "learning_rate": 4.212768171748859e-05, + "loss": 0.1345, + "step": 11090 + }, + { + "epoch": 0.49, + "learning_rate": 4.212024798917649e-05, + "loss": 0.1753, + "step": 11100 + }, + { + "epoch": 0.49, + "learning_rate": 4.2112814260864395e-05, + "loss": 0.127, + "step": 11110 + }, + { + "epoch": 0.49, + "learning_rate": 4.21053805325523e-05, + "loss": 0.1869, + "step": 11120 + }, + { + "epoch": 0.49, + "learning_rate": 4.20979468042402e-05, + "loss": 0.143, + "step": 11130 + }, + { + "epoch": 0.49, + "learning_rate": 4.20905130759281e-05, + "loss": 0.1862, + "step": 11140 + }, + { + "epoch": 0.49, + "learning_rate": 4.2083079347616006e-05, + "loss": 0.1689, + "step": 11150 + }, + { + "epoch": 0.49, + "learning_rate": 4.207564561930391e-05, + "loss": 0.1941, + "step": 11160 + }, + { + "epoch": 0.49, + "learning_rate": 4.206821189099181e-05, + "loss": 0.1832, + "step": 11170 + }, + { + "epoch": 0.49, + "learning_rate": 4.206077816267971e-05, + "loss": 0.221, + "step": 11180 + }, + { + "epoch": 0.5, + "learning_rate": 4.2053344434367616e-05, + "loss": 0.223, + "step": 11190 + }, + { + "epoch": 0.5, + "learning_rate": 4.204591070605552e-05, + "loss": 0.1267, + "step": 11200 + }, + { + "epoch": 0.5, + "learning_rate": 4.203847697774342e-05, + "loss": 0.2387, + "step": 11210 + }, + { + "epoch": 0.5, + "learning_rate": 4.2031043249431324e-05, + "loss": 0.1886, + "step": 11220 + }, + { + "epoch": 0.5, + "learning_rate": 4.202360952111922e-05, + "loss": 0.1446, + "step": 11230 + }, + { + "epoch": 0.5, + "learning_rate": 4.201617579280713e-05, + "loss": 0.1956, + "step": 11240 + }, + { + "epoch": 0.5, + "learning_rate": 4.2008742064495025e-05, + "loss": 0.2077, + "step": 11250 + }, + { + "epoch": 0.5, + "learning_rate": 4.2001308336182934e-05, + "loss": 0.1956, + "step": 11260 + }, + { + "epoch": 0.5, + "learning_rate": 4.199387460787083e-05, + "loss": 0.1965, + "step": 11270 + }, + { + "epoch": 0.5, + "learning_rate": 4.198644087955874e-05, + "loss": 0.1536, + "step": 11280 + }, + { + "epoch": 0.5, + "learning_rate": 4.197900715124664e-05, + "loss": 0.1262, + "step": 11290 + }, + { + "epoch": 0.5, + "learning_rate": 4.197157342293454e-05, + "loss": 0.1763, + "step": 11300 + }, + { + "epoch": 0.5, + "learning_rate": 4.196413969462245e-05, + "loss": 0.2091, + "step": 11310 + }, + { + "epoch": 0.5, + "learning_rate": 4.195670596631034e-05, + "loss": 0.1659, + "step": 11320 + }, + { + "epoch": 0.5, + "learning_rate": 4.194927223799825e-05, + "loss": 0.2228, + "step": 11330 + }, + { + "epoch": 0.5, + "learning_rate": 4.194183850968615e-05, + "loss": 0.1514, + "step": 11340 + }, + { + "epoch": 0.5, + "learning_rate": 4.193440478137405e-05, + "loss": 0.1927, + "step": 11350 + }, + { + "epoch": 0.5, + "learning_rate": 4.192697105306195e-05, + "loss": 0.1106, + "step": 11360 + }, + { + "epoch": 0.5, + "learning_rate": 4.1919537324749856e-05, + "loss": 0.185, + "step": 11370 + }, + { + "epoch": 0.5, + "learning_rate": 4.191210359643776e-05, + "loss": 0.1922, + "step": 11380 + }, + { + "epoch": 0.5, + "learning_rate": 4.190466986812566e-05, + "loss": 0.1806, + "step": 11390 + }, + { + "epoch": 0.5, + "learning_rate": 4.189723613981356e-05, + "loss": 0.1396, + "step": 11400 + }, + { + "epoch": 0.51, + "learning_rate": 4.1889802411501466e-05, + "loss": 0.1341, + "step": 11410 + }, + { + "epoch": 0.51, + "learning_rate": 4.188236868318937e-05, + "loss": 0.1837, + "step": 11420 + }, + { + "epoch": 0.51, + "learning_rate": 4.187493495487727e-05, + "loss": 0.1966, + "step": 11430 + }, + { + "epoch": 0.51, + "learning_rate": 4.1867501226565174e-05, + "loss": 0.1267, + "step": 11440 + }, + { + "epoch": 0.51, + "learning_rate": 4.1860067498253076e-05, + "loss": 0.1613, + "step": 11450 + }, + { + "epoch": 0.51, + "learning_rate": 4.185263376994098e-05, + "loss": 0.182, + "step": 11460 + }, + { + "epoch": 0.51, + "learning_rate": 4.184520004162888e-05, + "loss": 0.1343, + "step": 11470 + }, + { + "epoch": 0.51, + "learning_rate": 4.1837766313316784e-05, + "loss": 0.1839, + "step": 11480 + }, + { + "epoch": 0.51, + "learning_rate": 4.1830332585004687e-05, + "loss": 0.1159, + "step": 11490 + }, + { + "epoch": 0.51, + "learning_rate": 4.182289885669259e-05, + "loss": 0.2091, + "step": 11500 + }, + { + "epoch": 0.51, + "learning_rate": 4.1815465128380485e-05, + "loss": 0.1262, + "step": 11510 + }, + { + "epoch": 0.51, + "learning_rate": 4.1808031400068394e-05, + "loss": 0.157, + "step": 11520 + }, + { + "epoch": 0.51, + "learning_rate": 4.180059767175629e-05, + "loss": 0.2166, + "step": 11530 + }, + { + "epoch": 0.51, + "learning_rate": 4.17931639434442e-05, + "loss": 0.2015, + "step": 11540 + }, + { + "epoch": 0.51, + "learning_rate": 4.17857302151321e-05, + "loss": 0.1893, + "step": 11550 + }, + { + "epoch": 0.51, + "learning_rate": 4.177829648682e-05, + "loss": 0.1795, + "step": 11560 + }, + { + "epoch": 0.51, + "learning_rate": 4.177086275850791e-05, + "loss": 0.1839, + "step": 11570 + }, + { + "epoch": 0.51, + "learning_rate": 4.17634290301958e-05, + "loss": 0.2249, + "step": 11580 + }, + { + "epoch": 0.51, + "learning_rate": 4.175599530188371e-05, + "loss": 0.1665, + "step": 11590 + }, + { + "epoch": 0.51, + "learning_rate": 4.174856157357161e-05, + "loss": 0.1706, + "step": 11600 + }, + { + "epoch": 0.51, + "learning_rate": 4.174112784525952e-05, + "loss": 0.2219, + "step": 11610 + }, + { + "epoch": 0.51, + "learning_rate": 4.173369411694741e-05, + "loss": 0.2024, + "step": 11620 + }, + { + "epoch": 0.51, + "learning_rate": 4.1726260388635316e-05, + "loss": 0.1554, + "step": 11630 + }, + { + "epoch": 0.52, + "learning_rate": 4.171882666032322e-05, + "loss": 0.1359, + "step": 11640 + }, + { + "epoch": 0.52, + "learning_rate": 4.171139293201112e-05, + "loss": 0.2087, + "step": 11650 + }, + { + "epoch": 0.52, + "learning_rate": 4.170395920369903e-05, + "loss": 0.1365, + "step": 11660 + }, + { + "epoch": 0.52, + "learning_rate": 4.1696525475386926e-05, + "loss": 0.1809, + "step": 11670 + }, + { + "epoch": 0.52, + "learning_rate": 4.168909174707483e-05, + "loss": 0.1772, + "step": 11680 + }, + { + "epoch": 0.52, + "learning_rate": 4.168165801876273e-05, + "loss": 0.175, + "step": 11690 + }, + { + "epoch": 0.52, + "learning_rate": 4.1674224290450634e-05, + "loss": 0.2019, + "step": 11700 + }, + { + "epoch": 0.52, + "learning_rate": 4.1666790562138537e-05, + "loss": 0.1415, + "step": 11710 + }, + { + "epoch": 0.52, + "learning_rate": 4.165935683382644e-05, + "loss": 0.2348, + "step": 11720 + }, + { + "epoch": 0.52, + "learning_rate": 4.165192310551434e-05, + "loss": 0.2319, + "step": 11730 + }, + { + "epoch": 0.52, + "learning_rate": 4.1644489377202244e-05, + "loss": 0.1982, + "step": 11740 + }, + { + "epoch": 0.52, + "learning_rate": 4.163705564889015e-05, + "loss": 0.2195, + "step": 11750 + }, + { + "epoch": 0.52, + "learning_rate": 4.162962192057805e-05, + "loss": 0.2094, + "step": 11760 + }, + { + "epoch": 0.52, + "learning_rate": 4.1622188192265945e-05, + "loss": 0.1892, + "step": 11770 + }, + { + "epoch": 0.52, + "learning_rate": 4.1614754463953855e-05, + "loss": 0.1708, + "step": 11780 + }, + { + "epoch": 0.52, + "learning_rate": 4.160732073564176e-05, + "loss": 0.2122, + "step": 11790 + }, + { + "epoch": 0.52, + "learning_rate": 4.159988700732966e-05, + "loss": 0.1743, + "step": 11800 + }, + { + "epoch": 0.52, + "learning_rate": 4.159245327901756e-05, + "loss": 0.1784, + "step": 11810 + }, + { + "epoch": 0.52, + "learning_rate": 4.1585019550705465e-05, + "loss": 0.1261, + "step": 11820 + }, + { + "epoch": 0.52, + "learning_rate": 4.157758582239337e-05, + "loss": 0.207, + "step": 11830 + }, + { + "epoch": 0.52, + "learning_rate": 4.157015209408126e-05, + "loss": 0.1213, + "step": 11840 + }, + { + "epoch": 0.52, + "learning_rate": 4.156271836576917e-05, + "loss": 0.2179, + "step": 11850 + }, + { + "epoch": 0.53, + "learning_rate": 4.155528463745707e-05, + "loss": 0.1774, + "step": 11860 + }, + { + "epoch": 0.53, + "learning_rate": 4.154785090914498e-05, + "loss": 0.1488, + "step": 11870 + }, + { + "epoch": 0.53, + "learning_rate": 4.1540417180832874e-05, + "loss": 0.1256, + "step": 11880 + }, + { + "epoch": 0.53, + "learning_rate": 4.1532983452520776e-05, + "loss": 0.1851, + "step": 11890 + }, + { + "epoch": 0.53, + "learning_rate": 4.152554972420868e-05, + "loss": 0.177, + "step": 11900 + }, + { + "epoch": 0.53, + "learning_rate": 4.151811599589658e-05, + "loss": 0.1401, + "step": 11910 + }, + { + "epoch": 0.53, + "learning_rate": 4.151068226758449e-05, + "loss": 0.2625, + "step": 11920 + }, + { + "epoch": 0.53, + "learning_rate": 4.1503248539272386e-05, + "loss": 0.2286, + "step": 11930 + }, + { + "epoch": 0.53, + "learning_rate": 4.1495814810960296e-05, + "loss": 0.1781, + "step": 11940 + }, + { + "epoch": 0.53, + "learning_rate": 4.148838108264819e-05, + "loss": 0.18, + "step": 11950 + }, + { + "epoch": 0.53, + "learning_rate": 4.1480947354336094e-05, + "loss": 0.2136, + "step": 11960 + }, + { + "epoch": 0.53, + "learning_rate": 4.1473513626024e-05, + "loss": 0.173, + "step": 11970 + }, + { + "epoch": 0.53, + "learning_rate": 4.14660798977119e-05, + "loss": 0.2064, + "step": 11980 + }, + { + "epoch": 0.53, + "learning_rate": 4.14586461693998e-05, + "loss": 0.141, + "step": 11990 + }, + { + "epoch": 0.53, + "learning_rate": 4.1451212441087705e-05, + "loss": 0.2221, + "step": 12000 + }, + { + "epoch": 0.53, + "learning_rate": 4.144377871277561e-05, + "loss": 0.167, + "step": 12010 + }, + { + "epoch": 0.53, + "learning_rate": 4.143634498446351e-05, + "loss": 0.1626, + "step": 12020 + }, + { + "epoch": 0.53, + "learning_rate": 4.142891125615141e-05, + "loss": 0.1911, + "step": 12030 + }, + { + "epoch": 0.53, + "learning_rate": 4.1421477527839315e-05, + "loss": 0.1788, + "step": 12040 + }, + { + "epoch": 0.53, + "learning_rate": 4.141404379952722e-05, + "loss": 0.1812, + "step": 12050 + }, + { + "epoch": 0.53, + "learning_rate": 4.140661007121512e-05, + "loss": 0.1321, + "step": 12060 + }, + { + "epoch": 0.53, + "learning_rate": 4.139917634290302e-05, + "loss": 0.1724, + "step": 12070 + }, + { + "epoch": 0.53, + "learning_rate": 4.1391742614590925e-05, + "loss": 0.2177, + "step": 12080 + }, + { + "epoch": 0.54, + "learning_rate": 4.138430888627883e-05, + "loss": 0.1423, + "step": 12090 + }, + { + "epoch": 0.54, + "learning_rate": 4.1376875157966724e-05, + "loss": 0.1969, + "step": 12100 + }, + { + "epoch": 0.54, + "learning_rate": 4.136944142965463e-05, + "loss": 0.163, + "step": 12110 + }, + { + "epoch": 0.54, + "learning_rate": 4.136200770134253e-05, + "loss": 0.1761, + "step": 12120 + }, + { + "epoch": 0.54, + "learning_rate": 4.135457397303044e-05, + "loss": 0.1902, + "step": 12130 + }, + { + "epoch": 0.54, + "learning_rate": 4.1347140244718334e-05, + "loss": 0.1504, + "step": 12140 + }, + { + "epoch": 0.54, + "learning_rate": 4.133970651640624e-05, + "loss": 0.1742, + "step": 12150 + }, + { + "epoch": 0.54, + "learning_rate": 4.1332272788094146e-05, + "loss": 0.1715, + "step": 12160 + }, + { + "epoch": 0.54, + "learning_rate": 4.132483905978204e-05, + "loss": 0.1676, + "step": 12170 + }, + { + "epoch": 0.54, + "learning_rate": 4.131740533146995e-05, + "loss": 0.1426, + "step": 12180 + }, + { + "epoch": 0.54, + "learning_rate": 4.130997160315785e-05, + "loss": 0.2042, + "step": 12190 + }, + { + "epoch": 0.54, + "learning_rate": 4.1302537874845756e-05, + "loss": 0.1408, + "step": 12200 + }, + { + "epoch": 0.54, + "learning_rate": 4.129510414653365e-05, + "loss": 0.1537, + "step": 12210 + }, + { + "epoch": 0.54, + "learning_rate": 4.128767041822156e-05, + "loss": 0.1971, + "step": 12220 + }, + { + "epoch": 0.54, + "learning_rate": 4.128023668990946e-05, + "loss": 0.2049, + "step": 12230 + }, + { + "epoch": 0.54, + "learning_rate": 4.127280296159736e-05, + "loss": 0.1791, + "step": 12240 + }, + { + "epoch": 0.54, + "learning_rate": 4.126536923328526e-05, + "loss": 0.2066, + "step": 12250 + }, + { + "epoch": 0.54, + "learning_rate": 4.1257935504973165e-05, + "loss": 0.2098, + "step": 12260 + }, + { + "epoch": 0.54, + "learning_rate": 4.1250501776661074e-05, + "loss": 0.17, + "step": 12270 + }, + { + "epoch": 0.54, + "learning_rate": 4.124306804834897e-05, + "loss": 0.2009, + "step": 12280 + }, + { + "epoch": 0.54, + "learning_rate": 4.123563432003687e-05, + "loss": 0.1276, + "step": 12290 + }, + { + "epoch": 0.54, + "learning_rate": 4.1228200591724775e-05, + "loss": 0.1585, + "step": 12300 + }, + { + "epoch": 0.55, + "learning_rate": 4.122076686341268e-05, + "loss": 0.1373, + "step": 12310 + }, + { + "epoch": 0.55, + "learning_rate": 4.121333313510058e-05, + "loss": 0.1365, + "step": 12320 + }, + { + "epoch": 0.55, + "learning_rate": 4.120589940678848e-05, + "loss": 0.2086, + "step": 12330 + }, + { + "epoch": 0.55, + "learning_rate": 4.1198465678476385e-05, + "loss": 0.199, + "step": 12340 + }, + { + "epoch": 0.55, + "learning_rate": 4.119103195016429e-05, + "loss": 0.2007, + "step": 12350 + }, + { + "epoch": 0.55, + "learning_rate": 4.118359822185219e-05, + "loss": 0.1488, + "step": 12360 + }, + { + "epoch": 0.55, + "learning_rate": 4.117616449354009e-05, + "loss": 0.1868, + "step": 12370 + }, + { + "epoch": 0.55, + "learning_rate": 4.116873076522799e-05, + "loss": 0.1883, + "step": 12380 + }, + { + "epoch": 0.55, + "learning_rate": 4.11612970369159e-05, + "loss": 0.2195, + "step": 12390 + }, + { + "epoch": 0.55, + "learning_rate": 4.11538633086038e-05, + "loss": 0.1507, + "step": 12400 + }, + { + "epoch": 0.55, + "learning_rate": 4.1146429580291704e-05, + "loss": 0.1484, + "step": 12410 + }, + { + "epoch": 0.55, + "learning_rate": 4.1138995851979606e-05, + "loss": 0.1368, + "step": 12420 + }, + { + "epoch": 0.55, + "learning_rate": 4.11315621236675e-05, + "loss": 0.2015, + "step": 12430 + }, + { + "epoch": 0.55, + "learning_rate": 4.112412839535541e-05, + "loss": 0.163, + "step": 12440 + }, + { + "epoch": 0.55, + "learning_rate": 4.111669466704331e-05, + "loss": 0.1463, + "step": 12450 + }, + { + "epoch": 0.55, + "learning_rate": 4.1109260938731216e-05, + "loss": 0.178, + "step": 12460 + }, + { + "epoch": 0.55, + "learning_rate": 4.110182721041911e-05, + "loss": 0.1674, + "step": 12470 + }, + { + "epoch": 0.55, + "learning_rate": 4.109439348210702e-05, + "loss": 0.1539, + "step": 12480 + }, + { + "epoch": 0.55, + "learning_rate": 4.108695975379492e-05, + "loss": 0.1685, + "step": 12490 + }, + { + "epoch": 0.55, + "learning_rate": 4.107952602548282e-05, + "loss": 0.1723, + "step": 12500 + }, + { + "epoch": 0.55, + "learning_rate": 4.107209229717072e-05, + "loss": 0.2123, + "step": 12510 + }, + { + "epoch": 0.55, + "learning_rate": 4.1064658568858625e-05, + "loss": 0.2225, + "step": 12520 + }, + { + "epoch": 0.55, + "learning_rate": 4.1057224840546534e-05, + "loss": 0.171, + "step": 12530 + }, + { + "epoch": 0.56, + "learning_rate": 4.104979111223443e-05, + "loss": 0.2094, + "step": 12540 + }, + { + "epoch": 0.56, + "learning_rate": 4.104235738392234e-05, + "loss": 0.1791, + "step": 12550 + }, + { + "epoch": 0.56, + "learning_rate": 4.1034923655610235e-05, + "loss": 0.1262, + "step": 12560 + }, + { + "epoch": 0.56, + "learning_rate": 4.102748992729814e-05, + "loss": 0.1131, + "step": 12570 + }, + { + "epoch": 0.56, + "learning_rate": 4.102005619898604e-05, + "loss": 0.1647, + "step": 12580 + }, + { + "epoch": 0.56, + "learning_rate": 4.101262247067394e-05, + "loss": 0.1611, + "step": 12590 + }, + { + "epoch": 0.56, + "learning_rate": 4.1005188742361846e-05, + "loss": 0.1746, + "step": 12600 + }, + { + "epoch": 0.56, + "learning_rate": 4.099775501404975e-05, + "loss": 0.1884, + "step": 12610 + }, + { + "epoch": 0.56, + "learning_rate": 4.099032128573765e-05, + "loss": 0.1756, + "step": 12620 + }, + { + "epoch": 0.56, + "learning_rate": 4.0982887557425553e-05, + "loss": 0.1811, + "step": 12630 + }, + { + "epoch": 0.56, + "learning_rate": 4.0975453829113456e-05, + "loss": 0.1827, + "step": 12640 + }, + { + "epoch": 0.56, + "learning_rate": 4.096802010080136e-05, + "loss": 0.1841, + "step": 12650 + }, + { + "epoch": 0.56, + "learning_rate": 4.096058637248926e-05, + "loss": 0.1922, + "step": 12660 + }, + { + "epoch": 0.56, + "learning_rate": 4.0953152644177164e-05, + "loss": 0.1931, + "step": 12670 + }, + { + "epoch": 0.56, + "learning_rate": 4.0945718915865066e-05, + "loss": 0.1964, + "step": 12680 + }, + { + "epoch": 0.56, + "learning_rate": 4.093828518755297e-05, + "loss": 0.2106, + "step": 12690 + }, + { + "epoch": 0.56, + "learning_rate": 4.093085145924087e-05, + "loss": 0.1344, + "step": 12700 + }, + { + "epoch": 0.56, + "learning_rate": 4.092341773092877e-05, + "loss": 0.158, + "step": 12710 + }, + { + "epoch": 0.56, + "learning_rate": 4.091598400261668e-05, + "loss": 0.2147, + "step": 12720 + }, + { + "epoch": 0.56, + "learning_rate": 4.090855027430457e-05, + "loss": 0.183, + "step": 12730 + }, + { + "epoch": 0.56, + "learning_rate": 4.090111654599248e-05, + "loss": 0.1935, + "step": 12740 + }, + { + "epoch": 0.56, + "learning_rate": 4.089368281768038e-05, + "loss": 0.2264, + "step": 12750 + }, + { + "epoch": 0.56, + "learning_rate": 4.088624908936829e-05, + "loss": 0.2005, + "step": 12760 + }, + { + "epoch": 0.57, + "learning_rate": 4.087881536105619e-05, + "loss": 0.1411, + "step": 12770 + }, + { + "epoch": 0.57, + "learning_rate": 4.0871381632744085e-05, + "loss": 0.2612, + "step": 12780 + }, + { + "epoch": 0.57, + "learning_rate": 4.0863947904431995e-05, + "loss": 0.1749, + "step": 12790 + }, + { + "epoch": 0.57, + "learning_rate": 4.085651417611989e-05, + "loss": 0.2256, + "step": 12800 + }, + { + "epoch": 0.57, + "learning_rate": 4.08490804478078e-05, + "loss": 0.1945, + "step": 12810 + }, + { + "epoch": 0.57, + "learning_rate": 4.0841646719495696e-05, + "loss": 0.1731, + "step": 12820 + }, + { + "epoch": 0.57, + "learning_rate": 4.08342129911836e-05, + "loss": 0.1909, + "step": 12830 + }, + { + "epoch": 0.57, + "learning_rate": 4.08267792628715e-05, + "loss": 0.1731, + "step": 12840 + }, + { + "epoch": 0.57, + "learning_rate": 4.0819345534559403e-05, + "loss": 0.179, + "step": 12850 + }, + { + "epoch": 0.57, + "learning_rate": 4.0811911806247306e-05, + "loss": 0.2042, + "step": 12860 + }, + { + "epoch": 0.57, + "learning_rate": 4.080447807793521e-05, + "loss": 0.2059, + "step": 12870 + }, + { + "epoch": 0.57, + "learning_rate": 4.079704434962311e-05, + "loss": 0.1974, + "step": 12880 + }, + { + "epoch": 0.57, + "learning_rate": 4.0789610621311014e-05, + "loss": 0.183, + "step": 12890 + }, + { + "epoch": 0.57, + "learning_rate": 4.0782176892998916e-05, + "loss": 0.1429, + "step": 12900 + }, + { + "epoch": 0.57, + "learning_rate": 4.077474316468682e-05, + "loss": 0.1801, + "step": 12910 + }, + { + "epoch": 0.57, + "learning_rate": 4.076730943637472e-05, + "loss": 0.163, + "step": 12920 + }, + { + "epoch": 0.57, + "learning_rate": 4.0759875708062624e-05, + "loss": 0.1712, + "step": 12930 + }, + { + "epoch": 0.57, + "learning_rate": 4.075244197975053e-05, + "loss": 0.1615, + "step": 12940 + }, + { + "epoch": 0.57, + "learning_rate": 4.074500825143843e-05, + "loss": 0.2216, + "step": 12950 + }, + { + "epoch": 0.57, + "learning_rate": 4.073757452312633e-05, + "loss": 0.16, + "step": 12960 + }, + { + "epoch": 0.57, + "learning_rate": 4.0730140794814234e-05, + "loss": 0.1806, + "step": 12970 + }, + { + "epoch": 0.57, + "learning_rate": 4.072270706650214e-05, + "loss": 0.1751, + "step": 12980 + }, + { + "epoch": 0.58, + "learning_rate": 4.071527333819003e-05, + "loss": 0.1612, + "step": 12990 + }, + { + "epoch": 0.58, + "learning_rate": 4.070783960987794e-05, + "loss": 0.1775, + "step": 13000 + }, + { + "epoch": 0.58, + "learning_rate": 4.0700405881565845e-05, + "loss": 0.2432, + "step": 13010 + }, + { + "epoch": 0.58, + "learning_rate": 4.069297215325375e-05, + "loss": 0.1862, + "step": 13020 + }, + { + "epoch": 0.58, + "learning_rate": 4.068553842494165e-05, + "loss": 0.1664, + "step": 13030 + }, + { + "epoch": 0.58, + "learning_rate": 4.0678104696629546e-05, + "loss": 0.2177, + "step": 13040 + }, + { + "epoch": 0.58, + "learning_rate": 4.0670670968317455e-05, + "loss": 0.1614, + "step": 13050 + }, + { + "epoch": 0.58, + "learning_rate": 4.066323724000535e-05, + "loss": 0.2162, + "step": 13060 + }, + { + "epoch": 0.58, + "learning_rate": 4.065580351169326e-05, + "loss": 0.2288, + "step": 13070 + }, + { + "epoch": 0.58, + "learning_rate": 4.0648369783381156e-05, + "loss": 0.1587, + "step": 13080 + }, + { + "epoch": 0.58, + "learning_rate": 4.0640936055069065e-05, + "loss": 0.173, + "step": 13090 + }, + { + "epoch": 0.58, + "learning_rate": 4.063350232675696e-05, + "loss": 0.1758, + "step": 13100 + }, + { + "epoch": 0.58, + "learning_rate": 4.0626068598444864e-05, + "loss": 0.2074, + "step": 13110 + }, + { + "epoch": 0.58, + "learning_rate": 4.0618634870132766e-05, + "loss": 0.1971, + "step": 13120 + }, + { + "epoch": 0.58, + "learning_rate": 4.061120114182067e-05, + "loss": 0.1933, + "step": 13130 + }, + { + "epoch": 0.58, + "learning_rate": 4.060376741350858e-05, + "loss": 0.2035, + "step": 13140 + }, + { + "epoch": 0.58, + "learning_rate": 4.0596333685196474e-05, + "loss": 0.1915, + "step": 13150 + }, + { + "epoch": 0.58, + "learning_rate": 4.058889995688438e-05, + "loss": 0.1426, + "step": 13160 + }, + { + "epoch": 0.58, + "learning_rate": 4.058146622857228e-05, + "loss": 0.2081, + "step": 13170 + }, + { + "epoch": 0.58, + "learning_rate": 4.057403250026018e-05, + "loss": 0.1917, + "step": 13180 + }, + { + "epoch": 0.58, + "learning_rate": 4.0566598771948084e-05, + "loss": 0.1686, + "step": 13190 + }, + { + "epoch": 0.58, + "learning_rate": 4.055916504363599e-05, + "loss": 0.2148, + "step": 13200 + }, + { + "epoch": 0.58, + "learning_rate": 4.055173131532389e-05, + "loss": 0.1654, + "step": 13210 + }, + { + "epoch": 0.59, + "learning_rate": 4.054429758701179e-05, + "loss": 0.1273, + "step": 13220 + }, + { + "epoch": 0.59, + "learning_rate": 4.0536863858699695e-05, + "loss": 0.1609, + "step": 13230 + }, + { + "epoch": 0.59, + "learning_rate": 4.05294301303876e-05, + "loss": 0.1601, + "step": 13240 + }, + { + "epoch": 0.59, + "learning_rate": 4.05219964020755e-05, + "loss": 0.2126, + "step": 13250 + }, + { + "epoch": 0.59, + "learning_rate": 4.05145626737634e-05, + "loss": 0.2226, + "step": 13260 + }, + { + "epoch": 0.59, + "learning_rate": 4.0507128945451305e-05, + "loss": 0.1382, + "step": 13270 + }, + { + "epoch": 0.59, + "learning_rate": 4.049969521713921e-05, + "loss": 0.1883, + "step": 13280 + }, + { + "epoch": 0.59, + "learning_rate": 4.049226148882711e-05, + "loss": 0.1966, + "step": 13290 + }, + { + "epoch": 0.59, + "learning_rate": 4.048482776051501e-05, + "loss": 0.1779, + "step": 13300 + }, + { + "epoch": 0.59, + "learning_rate": 4.0477394032202915e-05, + "loss": 0.1478, + "step": 13310 + }, + { + "epoch": 0.59, + "learning_rate": 4.046996030389081e-05, + "loss": 0.2019, + "step": 13320 + }, + { + "epoch": 0.59, + "learning_rate": 4.046252657557872e-05, + "loss": 0.1968, + "step": 13330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0455092847266616e-05, + "loss": 0.1879, + "step": 13340 + }, + { + "epoch": 0.59, + "learning_rate": 4.0447659118954526e-05, + "loss": 0.1816, + "step": 13350 + }, + { + "epoch": 0.59, + "learning_rate": 4.044022539064242e-05, + "loss": 0.184, + "step": 13360 + }, + { + "epoch": 0.59, + "learning_rate": 4.0432791662330324e-05, + "loss": 0.1695, + "step": 13370 + }, + { + "epoch": 0.59, + "learning_rate": 4.0425357934018233e-05, + "loss": 0.2152, + "step": 13380 + }, + { + "epoch": 0.59, + "learning_rate": 4.041792420570613e-05, + "loss": 0.1595, + "step": 13390 + }, + { + "epoch": 0.59, + "learning_rate": 4.041049047739404e-05, + "loss": 0.1546, + "step": 13400 + }, + { + "epoch": 0.59, + "learning_rate": 4.0403056749081934e-05, + "loss": 0.2177, + "step": 13410 + }, + { + "epoch": 0.59, + "learning_rate": 4.0395623020769844e-05, + "loss": 0.2422, + "step": 13420 + }, + { + "epoch": 0.59, + "learning_rate": 4.038818929245774e-05, + "loss": 0.1355, + "step": 13430 + }, + { + "epoch": 0.6, + "learning_rate": 4.038075556414564e-05, + "loss": 0.1599, + "step": 13440 + }, + { + "epoch": 0.6, + "learning_rate": 4.0373321835833545e-05, + "loss": 0.2513, + "step": 13450 + }, + { + "epoch": 0.6, + "learning_rate": 4.036588810752145e-05, + "loss": 0.1894, + "step": 13460 + }, + { + "epoch": 0.6, + "learning_rate": 4.035845437920935e-05, + "loss": 0.1278, + "step": 13470 + }, + { + "epoch": 0.6, + "learning_rate": 4.035102065089725e-05, + "loss": 0.155, + "step": 13480 + }, + { + "epoch": 0.6, + "learning_rate": 4.0343586922585155e-05, + "loss": 0.1679, + "step": 13490 + }, + { + "epoch": 0.6, + "learning_rate": 4.033615319427306e-05, + "loss": 0.1988, + "step": 13500 + }, + { + "epoch": 0.6, + "learning_rate": 4.032871946596096e-05, + "loss": 0.1582, + "step": 13510 + }, + { + "epoch": 0.6, + "learning_rate": 4.032128573764886e-05, + "loss": 0.1883, + "step": 13520 + }, + { + "epoch": 0.6, + "learning_rate": 4.0313852009336765e-05, + "loss": 0.1937, + "step": 13530 + }, + { + "epoch": 0.6, + "learning_rate": 4.030641828102467e-05, + "loss": 0.1393, + "step": 13540 + }, + { + "epoch": 0.6, + "learning_rate": 4.029898455271257e-05, + "loss": 0.1077, + "step": 13550 + }, + { + "epoch": 0.6, + "learning_rate": 4.029155082440047e-05, + "loss": 0.1836, + "step": 13560 + }, + { + "epoch": 0.6, + "learning_rate": 4.0284117096088376e-05, + "loss": 0.148, + "step": 13570 + }, + { + "epoch": 0.6, + "learning_rate": 4.027668336777627e-05, + "loss": 0.2009, + "step": 13580 + }, + { + "epoch": 0.6, + "learning_rate": 4.026924963946418e-05, + "loss": 0.1606, + "step": 13590 + }, + { + "epoch": 0.6, + "learning_rate": 4.0261815911152077e-05, + "loss": 0.2273, + "step": 13600 + }, + { + "epoch": 0.6, + "learning_rate": 4.0254382182839986e-05, + "loss": 0.2186, + "step": 13610 + }, + { + "epoch": 0.6, + "learning_rate": 4.024694845452789e-05, + "loss": 0.1569, + "step": 13620 + }, + { + "epoch": 0.6, + "learning_rate": 4.023951472621579e-05, + "loss": 0.2116, + "step": 13630 + }, + { + "epoch": 0.6, + "learning_rate": 4.0232080997903694e-05, + "loss": 0.2247, + "step": 13640 + }, + { + "epoch": 0.6, + "learning_rate": 4.022464726959159e-05, + "loss": 0.1533, + "step": 13650 + }, + { + "epoch": 0.6, + "learning_rate": 4.02172135412795e-05, + "loss": 0.1982, + "step": 13660 + }, + { + "epoch": 0.61, + "learning_rate": 4.0209779812967395e-05, + "loss": 0.1936, + "step": 13670 + }, + { + "epoch": 0.61, + "learning_rate": 4.0202346084655304e-05, + "loss": 0.1643, + "step": 13680 + }, + { + "epoch": 0.61, + "learning_rate": 4.01949123563432e-05, + "loss": 0.2199, + "step": 13690 + }, + { + "epoch": 0.61, + "learning_rate": 4.01874786280311e-05, + "loss": 0.1318, + "step": 13700 + }, + { + "epoch": 0.61, + "learning_rate": 4.0180044899719005e-05, + "loss": 0.2135, + "step": 13710 + }, + { + "epoch": 0.61, + "learning_rate": 4.017261117140691e-05, + "loss": 0.1672, + "step": 13720 + }, + { + "epoch": 0.61, + "learning_rate": 4.016517744309481e-05, + "loss": 0.1824, + "step": 13730 + }, + { + "epoch": 0.61, + "learning_rate": 4.015774371478271e-05, + "loss": 0.213, + "step": 13740 + }, + { + "epoch": 0.61, + "learning_rate": 4.015030998647062e-05, + "loss": 0.1086, + "step": 13750 + }, + { + "epoch": 0.61, + "learning_rate": 4.014287625815852e-05, + "loss": 0.1445, + "step": 13760 + }, + { + "epoch": 0.61, + "learning_rate": 4.013544252984642e-05, + "loss": 0.2606, + "step": 13770 + }, + { + "epoch": 0.61, + "learning_rate": 4.012800880153432e-05, + "loss": 0.1701, + "step": 13780 + }, + { + "epoch": 0.61, + "learning_rate": 4.0120575073222226e-05, + "loss": 0.1732, + "step": 13790 + }, + { + "epoch": 0.61, + "learning_rate": 4.011314134491013e-05, + "loss": 0.1474, + "step": 13800 + }, + { + "epoch": 0.61, + "learning_rate": 4.010570761659803e-05, + "loss": 0.1516, + "step": 13810 + }, + { + "epoch": 0.61, + "learning_rate": 4.009827388828593e-05, + "loss": 0.1552, + "step": 13820 + }, + { + "epoch": 0.61, + "learning_rate": 4.0090840159973836e-05, + "loss": 0.2087, + "step": 13830 + }, + { + "epoch": 0.61, + "learning_rate": 4.008340643166174e-05, + "loss": 0.1649, + "step": 13840 + }, + { + "epoch": 0.61, + "learning_rate": 4.007597270334964e-05, + "loss": 0.2025, + "step": 13850 + }, + { + "epoch": 0.61, + "learning_rate": 4.006853897503754e-05, + "loss": 0.2673, + "step": 13860 + }, + { + "epoch": 0.61, + "learning_rate": 4.0061105246725446e-05, + "loss": 0.1513, + "step": 13870 + }, + { + "epoch": 0.61, + "learning_rate": 4.005367151841335e-05, + "loss": 0.1874, + "step": 13880 + }, + { + "epoch": 0.61, + "learning_rate": 4.004623779010125e-05, + "loss": 0.1857, + "step": 13890 + }, + { + "epoch": 0.62, + "learning_rate": 4.0038804061789154e-05, + "loss": 0.1811, + "step": 13900 + }, + { + "epoch": 0.62, + "learning_rate": 4.003137033347705e-05, + "loss": 0.2069, + "step": 13910 + }, + { + "epoch": 0.62, + "learning_rate": 4.002393660516496e-05, + "loss": 0.1399, + "step": 13920 + }, + { + "epoch": 0.62, + "learning_rate": 4.0016502876852855e-05, + "loss": 0.1487, + "step": 13930 + }, + { + "epoch": 0.62, + "learning_rate": 4.0009069148540764e-05, + "loss": 0.1816, + "step": 13940 + }, + { + "epoch": 0.62, + "learning_rate": 4.000163542022866e-05, + "loss": 0.2088, + "step": 13950 + }, + { + "epoch": 0.62, + "learning_rate": 3.999420169191657e-05, + "loss": 0.2395, + "step": 13960 + }, + { + "epoch": 0.62, + "learning_rate": 3.9986767963604465e-05, + "loss": 0.1796, + "step": 13970 + }, + { + "epoch": 0.62, + "learning_rate": 3.997933423529237e-05, + "loss": 0.1745, + "step": 13980 + }, + { + "epoch": 0.62, + "learning_rate": 3.997190050698028e-05, + "loss": 0.2104, + "step": 13990 + }, + { + "epoch": 0.62, + "learning_rate": 3.996446677866817e-05, + "loss": 0.1316, + "step": 14000 + }, + { + "epoch": 0.62, + "learning_rate": 3.995703305035608e-05, + "loss": 0.1606, + "step": 14010 + }, + { + "epoch": 0.62, + "learning_rate": 3.994959932204398e-05, + "loss": 0.1842, + "step": 14020 + }, + { + "epoch": 0.62, + "learning_rate": 3.994216559373188e-05, + "loss": 0.1428, + "step": 14030 + }, + { + "epoch": 0.62, + "learning_rate": 3.993473186541978e-05, + "loss": 0.2148, + "step": 14040 + }, + { + "epoch": 0.62, + "learning_rate": 3.9927298137107686e-05, + "loss": 0.1331, + "step": 14050 + }, + { + "epoch": 0.62, + "learning_rate": 3.991986440879559e-05, + "loss": 0.2023, + "step": 14060 + }, + { + "epoch": 0.62, + "learning_rate": 3.991243068048349e-05, + "loss": 0.1662, + "step": 14070 + }, + { + "epoch": 0.62, + "learning_rate": 3.9904996952171394e-05, + "loss": 0.1284, + "step": 14080 + }, + { + "epoch": 0.62, + "learning_rate": 3.9897563223859296e-05, + "loss": 0.2006, + "step": 14090 + }, + { + "epoch": 0.62, + "learning_rate": 3.98901294955472e-05, + "loss": 0.1739, + "step": 14100 + }, + { + "epoch": 0.62, + "learning_rate": 3.98826957672351e-05, + "loss": 0.1807, + "step": 14110 + }, + { + "epoch": 0.63, + "learning_rate": 3.9875262038923004e-05, + "loss": 0.1737, + "step": 14120 + }, + { + "epoch": 0.63, + "learning_rate": 3.9867828310610907e-05, + "loss": 0.1571, + "step": 14130 + }, + { + "epoch": 0.63, + "learning_rate": 3.986039458229881e-05, + "loss": 0.2, + "step": 14140 + }, + { + "epoch": 0.63, + "learning_rate": 3.985296085398671e-05, + "loss": 0.1388, + "step": 14150 + }, + { + "epoch": 0.63, + "learning_rate": 3.9845527125674614e-05, + "loss": 0.1617, + "step": 14160 + }, + { + "epoch": 0.63, + "learning_rate": 3.983809339736252e-05, + "loss": 0.1556, + "step": 14170 + }, + { + "epoch": 0.63, + "learning_rate": 3.983065966905042e-05, + "loss": 0.2265, + "step": 14180 + }, + { + "epoch": 0.63, + "learning_rate": 3.9823225940738315e-05, + "loss": 0.1992, + "step": 14190 + }, + { + "epoch": 0.63, + "learning_rate": 3.9815792212426225e-05, + "loss": 0.18, + "step": 14200 + }, + { + "epoch": 0.63, + "learning_rate": 3.980835848411412e-05, + "loss": 0.1887, + "step": 14210 + }, + { + "epoch": 0.63, + "learning_rate": 3.980092475580203e-05, + "loss": 0.2241, + "step": 14220 + }, + { + "epoch": 0.63, + "learning_rate": 3.979349102748993e-05, + "loss": 0.2365, + "step": 14230 + }, + { + "epoch": 0.63, + "learning_rate": 3.978605729917783e-05, + "loss": 0.2057, + "step": 14240 + }, + { + "epoch": 0.63, + "learning_rate": 3.977862357086574e-05, + "loss": 0.1955, + "step": 14250 + }, + { + "epoch": 0.63, + "learning_rate": 3.977118984255363e-05, + "loss": 0.1765, + "step": 14260 + }, + { + "epoch": 0.63, + "learning_rate": 3.976375611424154e-05, + "loss": 0.1571, + "step": 14270 + }, + { + "epoch": 0.63, + "learning_rate": 3.975632238592944e-05, + "loss": 0.1937, + "step": 14280 + }, + { + "epoch": 0.63, + "learning_rate": 3.974888865761735e-05, + "loss": 0.134, + "step": 14290 + }, + { + "epoch": 0.63, + "learning_rate": 3.9741454929305244e-05, + "loss": 0.1776, + "step": 14300 + }, + { + "epoch": 0.63, + "learning_rate": 3.9734021200993146e-05, + "loss": 0.1537, + "step": 14310 + }, + { + "epoch": 0.63, + "learning_rate": 3.972658747268105e-05, + "loss": 0.2313, + "step": 14320 + }, + { + "epoch": 0.63, + "learning_rate": 3.971915374436895e-05, + "loss": 0.2078, + "step": 14330 + }, + { + "epoch": 0.63, + "learning_rate": 3.9711720016056854e-05, + "loss": 0.1616, + "step": 14340 + }, + { + "epoch": 0.64, + "learning_rate": 3.9704286287744756e-05, + "loss": 0.1798, + "step": 14350 + }, + { + "epoch": 0.64, + "learning_rate": 3.969685255943266e-05, + "loss": 0.1443, + "step": 14360 + }, + { + "epoch": 0.64, + "learning_rate": 3.968941883112056e-05, + "loss": 0.1898, + "step": 14370 + }, + { + "epoch": 0.64, + "learning_rate": 3.9681985102808464e-05, + "loss": 0.203, + "step": 14380 + }, + { + "epoch": 0.64, + "learning_rate": 3.967455137449637e-05, + "loss": 0.1466, + "step": 14390 + }, + { + "epoch": 0.64, + "learning_rate": 3.966711764618427e-05, + "loss": 0.1291, + "step": 14400 + }, + { + "epoch": 0.64, + "learning_rate": 3.965968391787217e-05, + "loss": 0.1811, + "step": 14410 + }, + { + "epoch": 0.64, + "learning_rate": 3.9652250189560075e-05, + "loss": 0.1675, + "step": 14420 + }, + { + "epoch": 0.64, + "learning_rate": 3.964481646124798e-05, + "loss": 0.2153, + "step": 14430 + }, + { + "epoch": 0.64, + "learning_rate": 3.963738273293588e-05, + "loss": 0.2505, + "step": 14440 + }, + { + "epoch": 0.64, + "learning_rate": 3.9629949004623776e-05, + "loss": 0.1653, + "step": 14450 + }, + { + "epoch": 0.64, + "learning_rate": 3.9622515276311685e-05, + "loss": 0.1974, + "step": 14460 + }, + { + "epoch": 0.64, + "learning_rate": 3.961508154799958e-05, + "loss": 0.198, + "step": 14470 + }, + { + "epoch": 0.64, + "learning_rate": 3.960764781968749e-05, + "loss": 0.1361, + "step": 14480 + }, + { + "epoch": 0.64, + "learning_rate": 3.960021409137539e-05, + "loss": 0.2049, + "step": 14490 + }, + { + "epoch": 0.64, + "learning_rate": 3.9592780363063295e-05, + "loss": 0.1601, + "step": 14500 + }, + { + "epoch": 0.64, + "learning_rate": 3.95853466347512e-05, + "loss": 0.1954, + "step": 14510 + }, + { + "epoch": 0.64, + "learning_rate": 3.9577912906439094e-05, + "loss": 0.2355, + "step": 14520 + }, + { + "epoch": 0.64, + "learning_rate": 3.9570479178127e-05, + "loss": 0.1804, + "step": 14530 + }, + { + "epoch": 0.64, + "learning_rate": 3.95630454498149e-05, + "loss": 0.1831, + "step": 14540 + }, + { + "epoch": 0.64, + "learning_rate": 3.955561172150281e-05, + "loss": 0.191, + "step": 14550 + }, + { + "epoch": 0.64, + "learning_rate": 3.9548177993190704e-05, + "loss": 0.1456, + "step": 14560 + }, + { + "epoch": 0.65, + "learning_rate": 3.9540744264878606e-05, + "loss": 0.2307, + "step": 14570 + }, + { + "epoch": 0.65, + "learning_rate": 3.953331053656651e-05, + "loss": 0.1738, + "step": 14580 + }, + { + "epoch": 0.65, + "learning_rate": 3.952587680825441e-05, + "loss": 0.1566, + "step": 14590 + }, + { + "epoch": 0.65, + "learning_rate": 3.951844307994232e-05, + "loss": 0.2108, + "step": 14600 + }, + { + "epoch": 0.65, + "learning_rate": 3.951100935163022e-05, + "loss": 0.1857, + "step": 14610 + }, + { + "epoch": 0.65, + "learning_rate": 3.9503575623318126e-05, + "loss": 0.1361, + "step": 14620 + }, + { + "epoch": 0.65, + "learning_rate": 3.949614189500602e-05, + "loss": 0.1757, + "step": 14630 + }, + { + "epoch": 0.65, + "learning_rate": 3.9488708166693925e-05, + "loss": 0.16, + "step": 14640 + }, + { + "epoch": 0.65, + "learning_rate": 3.948127443838183e-05, + "loss": 0.1148, + "step": 14650 + }, + { + "epoch": 0.65, + "learning_rate": 3.947384071006973e-05, + "loss": 0.2466, + "step": 14660 + }, + { + "epoch": 0.65, + "learning_rate": 3.946640698175763e-05, + "loss": 0.1654, + "step": 14670 + }, + { + "epoch": 0.65, + "learning_rate": 3.9458973253445535e-05, + "loss": 0.2074, + "step": 14680 + }, + { + "epoch": 0.65, + "learning_rate": 3.945153952513344e-05, + "loss": 0.1869, + "step": 14690 + }, + { + "epoch": 0.65, + "learning_rate": 3.944410579682134e-05, + "loss": 0.1838, + "step": 14700 + }, + { + "epoch": 0.65, + "learning_rate": 3.943667206850924e-05, + "loss": 0.2364, + "step": 14710 + }, + { + "epoch": 0.65, + "learning_rate": 3.9429238340197145e-05, + "loss": 0.1961, + "step": 14720 + }, + { + "epoch": 0.65, + "learning_rate": 3.942180461188505e-05, + "loss": 0.1546, + "step": 14730 + }, + { + "epoch": 0.65, + "learning_rate": 3.941437088357295e-05, + "loss": 0.173, + "step": 14740 + }, + { + "epoch": 0.65, + "learning_rate": 3.940693715526085e-05, + "loss": 0.1447, + "step": 14750 + }, + { + "epoch": 0.65, + "learning_rate": 3.9399503426948755e-05, + "loss": 0.1635, + "step": 14760 + }, + { + "epoch": 0.65, + "learning_rate": 3.939206969863666e-05, + "loss": 0.1859, + "step": 14770 + }, + { + "epoch": 0.65, + "learning_rate": 3.9384635970324554e-05, + "loss": 0.1443, + "step": 14780 + }, + { + "epoch": 0.65, + "learning_rate": 3.937720224201246e-05, + "loss": 0.1578, + "step": 14790 + }, + { + "epoch": 0.66, + "learning_rate": 3.936976851370036e-05, + "loss": 0.1743, + "step": 14800 + }, + { + "epoch": 0.66, + "learning_rate": 3.936233478538827e-05, + "loss": 0.2203, + "step": 14810 + }, + { + "epoch": 0.66, + "learning_rate": 3.9354901057076164e-05, + "loss": 0.175, + "step": 14820 + }, + { + "epoch": 0.66, + "learning_rate": 3.9347467328764074e-05, + "loss": 0.2347, + "step": 14830 + }, + { + "epoch": 0.66, + "learning_rate": 3.934003360045197e-05, + "loss": 0.1566, + "step": 14840 + }, + { + "epoch": 0.66, + "learning_rate": 3.933259987213987e-05, + "loss": 0.2075, + "step": 14850 + }, + { + "epoch": 0.66, + "learning_rate": 3.932516614382778e-05, + "loss": 0.1721, + "step": 14860 + }, + { + "epoch": 0.66, + "learning_rate": 3.931773241551568e-05, + "loss": 0.1945, + "step": 14870 + }, + { + "epoch": 0.66, + "learning_rate": 3.9310298687203586e-05, + "loss": 0.1996, + "step": 14880 + }, + { + "epoch": 0.66, + "learning_rate": 3.930286495889148e-05, + "loss": 0.2423, + "step": 14890 + }, + { + "epoch": 0.66, + "learning_rate": 3.9295431230579385e-05, + "loss": 0.2015, + "step": 14900 + }, + { + "epoch": 0.66, + "learning_rate": 3.928799750226729e-05, + "loss": 0.1663, + "step": 14910 + }, + { + "epoch": 0.66, + "learning_rate": 3.928056377395519e-05, + "loss": 0.146, + "step": 14920 + }, + { + "epoch": 0.66, + "learning_rate": 3.927313004564309e-05, + "loss": 0.1866, + "step": 14930 + }, + { + "epoch": 0.66, + "learning_rate": 3.9265696317330995e-05, + "loss": 0.2157, + "step": 14940 + }, + { + "epoch": 0.66, + "learning_rate": 3.92582625890189e-05, + "loss": 0.1808, + "step": 14950 + }, + { + "epoch": 0.66, + "learning_rate": 3.92508288607068e-05, + "loss": 0.2068, + "step": 14960 + }, + { + "epoch": 0.66, + "learning_rate": 3.92433951323947e-05, + "loss": 0.1938, + "step": 14970 + }, + { + "epoch": 0.66, + "learning_rate": 3.9235961404082605e-05, + "loss": 0.1634, + "step": 14980 + }, + { + "epoch": 0.66, + "learning_rate": 3.922852767577051e-05, + "loss": 0.1862, + "step": 14990 + }, + { + "epoch": 0.66, + "learning_rate": 3.922109394745841e-05, + "loss": 0.1899, + "step": 15000 + }, + { + "epoch": 0.66, + "learning_rate": 3.921366021914631e-05, + "loss": 0.1515, + "step": 15010 + }, + { + "epoch": 0.66, + "learning_rate": 3.9206226490834216e-05, + "loss": 0.1747, + "step": 15020 + }, + { + "epoch": 0.67, + "learning_rate": 3.919879276252212e-05, + "loss": 0.2085, + "step": 15030 + }, + { + "epoch": 0.67, + "learning_rate": 3.919135903421002e-05, + "loss": 0.1681, + "step": 15040 + }, + { + "epoch": 0.67, + "learning_rate": 3.9183925305897924e-05, + "loss": 0.1389, + "step": 15050 + }, + { + "epoch": 0.67, + "learning_rate": 3.917649157758582e-05, + "loss": 0.182, + "step": 15060 + }, + { + "epoch": 0.67, + "learning_rate": 3.916905784927373e-05, + "loss": 0.1381, + "step": 15070 + }, + { + "epoch": 0.67, + "learning_rate": 3.9161624120961624e-05, + "loss": 0.155, + "step": 15080 + }, + { + "epoch": 0.67, + "learning_rate": 3.9154190392649534e-05, + "loss": 0.1663, + "step": 15090 + }, + { + "epoch": 0.67, + "learning_rate": 3.9146756664337436e-05, + "loss": 0.1594, + "step": 15100 + }, + { + "epoch": 0.67, + "learning_rate": 3.913932293602533e-05, + "loss": 0.1702, + "step": 15110 + }, + { + "epoch": 0.67, + "learning_rate": 3.913188920771324e-05, + "loss": 0.2096, + "step": 15120 + }, + { + "epoch": 0.67, + "learning_rate": 3.912445547940114e-05, + "loss": 0.2124, + "step": 15130 + }, + { + "epoch": 0.67, + "learning_rate": 3.911702175108905e-05, + "loss": 0.2006, + "step": 15140 + }, + { + "epoch": 0.67, + "learning_rate": 3.910958802277694e-05, + "loss": 0.1881, + "step": 15150 + }, + { + "epoch": 0.67, + "learning_rate": 3.910215429446485e-05, + "loss": 0.1999, + "step": 15160 + }, + { + "epoch": 0.67, + "learning_rate": 3.909472056615275e-05, + "loss": 0.2433, + "step": 15170 + }, + { + "epoch": 0.67, + "learning_rate": 3.908728683784065e-05, + "loss": 0.1651, + "step": 15180 + }, + { + "epoch": 0.67, + "learning_rate": 3.907985310952855e-05, + "loss": 0.1613, + "step": 15190 + }, + { + "epoch": 0.67, + "learning_rate": 3.9072419381216455e-05, + "loss": 0.1463, + "step": 15200 + }, + { + "epoch": 0.67, + "learning_rate": 3.9064985652904365e-05, + "loss": 0.1863, + "step": 15210 + }, + { + "epoch": 0.67, + "learning_rate": 3.905755192459226e-05, + "loss": 0.2316, + "step": 15220 + }, + { + "epoch": 0.67, + "learning_rate": 3.905011819628016e-05, + "loss": 0.1711, + "step": 15230 + }, + { + "epoch": 0.67, + "learning_rate": 3.9042684467968066e-05, + "loss": 0.122, + "step": 15240 + }, + { + "epoch": 0.68, + "learning_rate": 3.903525073965597e-05, + "loss": 0.166, + "step": 15250 + }, + { + "epoch": 0.68, + "learning_rate": 3.902781701134387e-05, + "loss": 0.1268, + "step": 15260 + }, + { + "epoch": 0.68, + "learning_rate": 3.9020383283031773e-05, + "loss": 0.2188, + "step": 15270 + }, + { + "epoch": 0.68, + "learning_rate": 3.9012949554719676e-05, + "loss": 0.1867, + "step": 15280 + }, + { + "epoch": 0.68, + "learning_rate": 3.900551582640758e-05, + "loss": 0.1212, + "step": 15290 + }, + { + "epoch": 0.68, + "learning_rate": 3.899808209809548e-05, + "loss": 0.176, + "step": 15300 + }, + { + "epoch": 0.68, + "learning_rate": 3.8990648369783384e-05, + "loss": 0.2244, + "step": 15310 + }, + { + "epoch": 0.68, + "learning_rate": 3.898321464147128e-05, + "loss": 0.1978, + "step": 15320 + }, + { + "epoch": 0.68, + "learning_rate": 3.897578091315919e-05, + "loss": 0.1555, + "step": 15330 + }, + { + "epoch": 0.68, + "learning_rate": 3.896834718484709e-05, + "loss": 0.159, + "step": 15340 + }, + { + "epoch": 0.68, + "learning_rate": 3.8960913456534994e-05, + "loss": 0.1307, + "step": 15350 + }, + { + "epoch": 0.68, + "learning_rate": 3.89534797282229e-05, + "loss": 0.1686, + "step": 15360 + }, + { + "epoch": 0.68, + "learning_rate": 3.89460459999108e-05, + "loss": 0.175, + "step": 15370 + }, + { + "epoch": 0.68, + "learning_rate": 3.89386122715987e-05, + "loss": 0.196, + "step": 15380 + }, + { + "epoch": 0.68, + "learning_rate": 3.89311785432866e-05, + "loss": 0.1865, + "step": 15390 + }, + { + "epoch": 0.68, + "learning_rate": 3.892374481497451e-05, + "loss": 0.2059, + "step": 15400 + }, + { + "epoch": 0.68, + "learning_rate": 3.89163110866624e-05, + "loss": 0.1713, + "step": 15410 + }, + { + "epoch": 0.68, + "learning_rate": 3.890887735835031e-05, + "loss": 0.1615, + "step": 15420 + }, + { + "epoch": 0.68, + "learning_rate": 3.890144363003821e-05, + "loss": 0.1533, + "step": 15430 + }, + { + "epoch": 0.68, + "learning_rate": 3.889400990172611e-05, + "loss": 0.1741, + "step": 15440 + }, + { + "epoch": 0.68, + "learning_rate": 3.888657617341401e-05, + "loss": 0.1842, + "step": 15450 + }, + { + "epoch": 0.68, + "learning_rate": 3.8879142445101916e-05, + "loss": 0.15, + "step": 15460 + }, + { + "epoch": 0.68, + "learning_rate": 3.8871708716789825e-05, + "loss": 0.1432, + "step": 15470 + }, + { + "epoch": 0.69, + "learning_rate": 3.886427498847772e-05, + "loss": 0.1422, + "step": 15480 + }, + { + "epoch": 0.69, + "learning_rate": 3.885684126016563e-05, + "loss": 0.1829, + "step": 15490 + }, + { + "epoch": 0.69, + "learning_rate": 3.8849407531853526e-05, + "loss": 0.1515, + "step": 15500 + }, + { + "epoch": 0.69, + "learning_rate": 3.884197380354143e-05, + "loss": 0.2119, + "step": 15510 + }, + { + "epoch": 0.69, + "learning_rate": 3.883454007522933e-05, + "loss": 0.1384, + "step": 15520 + }, + { + "epoch": 0.69, + "learning_rate": 3.8827106346917234e-05, + "loss": 0.1582, + "step": 15530 + }, + { + "epoch": 0.69, + "learning_rate": 3.8819672618605136e-05, + "loss": 0.2138, + "step": 15540 + }, + { + "epoch": 0.69, + "learning_rate": 3.881223889029304e-05, + "loss": 0.2406, + "step": 15550 + }, + { + "epoch": 0.69, + "learning_rate": 3.880480516198094e-05, + "loss": 0.1675, + "step": 15560 + }, + { + "epoch": 0.69, + "learning_rate": 3.8797371433668844e-05, + "loss": 0.1816, + "step": 15570 + }, + { + "epoch": 0.69, + "learning_rate": 3.878993770535675e-05, + "loss": 0.1913, + "step": 15580 + }, + { + "epoch": 0.69, + "learning_rate": 3.878250397704465e-05, + "loss": 0.2115, + "step": 15590 + }, + { + "epoch": 0.69, + "learning_rate": 3.877507024873255e-05, + "loss": 0.1613, + "step": 15600 + }, + { + "epoch": 0.69, + "learning_rate": 3.8767636520420454e-05, + "loss": 0.1511, + "step": 15610 + }, + { + "epoch": 0.69, + "learning_rate": 3.876020279210836e-05, + "loss": 0.192, + "step": 15620 + }, + { + "epoch": 0.69, + "learning_rate": 3.875276906379626e-05, + "loss": 0.1873, + "step": 15630 + }, + { + "epoch": 0.69, + "learning_rate": 3.874533533548416e-05, + "loss": 0.1503, + "step": 15640 + }, + { + "epoch": 0.69, + "learning_rate": 3.873790160717206e-05, + "loss": 0.187, + "step": 15650 + }, + { + "epoch": 0.69, + "learning_rate": 3.873046787885997e-05, + "loss": 0.1747, + "step": 15660 + }, + { + "epoch": 0.69, + "learning_rate": 3.872303415054786e-05, + "loss": 0.1603, + "step": 15670 + }, + { + "epoch": 0.69, + "learning_rate": 3.871560042223577e-05, + "loss": 0.1315, + "step": 15680 + }, + { + "epoch": 0.69, + "learning_rate": 3.870816669392367e-05, + "loss": 0.2076, + "step": 15690 + }, + { + "epoch": 0.7, + "learning_rate": 3.870073296561158e-05, + "loss": 0.1288, + "step": 15700 + }, + { + "epoch": 0.7, + "learning_rate": 3.869329923729948e-05, + "loss": 0.2173, + "step": 15710 + }, + { + "epoch": 0.7, + "learning_rate": 3.8685865508987376e-05, + "loss": 0.2012, + "step": 15720 + }, + { + "epoch": 0.7, + "learning_rate": 3.8678431780675285e-05, + "loss": 0.2115, + "step": 15730 + }, + { + "epoch": 0.7, + "learning_rate": 3.867099805236318e-05, + "loss": 0.1744, + "step": 15740 + }, + { + "epoch": 0.7, + "learning_rate": 3.866356432405109e-05, + "loss": 0.1673, + "step": 15750 + }, + { + "epoch": 0.7, + "learning_rate": 3.8656130595738986e-05, + "loss": 0.1505, + "step": 15760 + }, + { + "epoch": 0.7, + "learning_rate": 3.8648696867426896e-05, + "loss": 0.1564, + "step": 15770 + }, + { + "epoch": 0.7, + "learning_rate": 3.864126313911479e-05, + "loss": 0.1759, + "step": 15780 + }, + { + "epoch": 0.7, + "learning_rate": 3.8633829410802694e-05, + "loss": 0.1863, + "step": 15790 + }, + { + "epoch": 0.7, + "learning_rate": 3.86263956824906e-05, + "loss": 0.1779, + "step": 15800 + }, + { + "epoch": 0.7, + "learning_rate": 3.86189619541785e-05, + "loss": 0.1925, + "step": 15810 + }, + { + "epoch": 0.7, + "learning_rate": 3.86115282258664e-05, + "loss": 0.2246, + "step": 15820 + }, + { + "epoch": 0.7, + "learning_rate": 3.8604094497554304e-05, + "loss": 0.2157, + "step": 15830 + }, + { + "epoch": 0.7, + "learning_rate": 3.859666076924221e-05, + "loss": 0.2142, + "step": 15840 + }, + { + "epoch": 0.7, + "learning_rate": 3.858922704093011e-05, + "loss": 0.148, + "step": 15850 + }, + { + "epoch": 0.7, + "learning_rate": 3.858179331261801e-05, + "loss": 0.1591, + "step": 15860 + }, + { + "epoch": 0.7, + "learning_rate": 3.8574359584305915e-05, + "loss": 0.1627, + "step": 15870 + }, + { + "epoch": 0.7, + "learning_rate": 3.856692585599382e-05, + "loss": 0.1916, + "step": 15880 + }, + { + "epoch": 0.7, + "learning_rate": 3.855949212768172e-05, + "loss": 0.1535, + "step": 15890 + }, + { + "epoch": 0.7, + "learning_rate": 3.855205839936962e-05, + "loss": 0.2123, + "step": 15900 + }, + { + "epoch": 0.7, + "learning_rate": 3.8544624671057525e-05, + "loss": 0.1603, + "step": 15910 + }, + { + "epoch": 0.7, + "learning_rate": 3.853719094274543e-05, + "loss": 0.2374, + "step": 15920 + }, + { + "epoch": 0.71, + "learning_rate": 3.8529757214433323e-05, + "loss": 0.2046, + "step": 15930 + }, + { + "epoch": 0.71, + "learning_rate": 3.852232348612123e-05, + "loss": 0.1366, + "step": 15940 + }, + { + "epoch": 0.71, + "learning_rate": 3.8514889757809135e-05, + "loss": 0.2231, + "step": 15950 + }, + { + "epoch": 0.71, + "learning_rate": 3.850745602949704e-05, + "loss": 0.1625, + "step": 15960 + }, + { + "epoch": 0.71, + "learning_rate": 3.850002230118494e-05, + "loss": 0.1942, + "step": 15970 + }, + { + "epoch": 0.71, + "learning_rate": 3.849258857287284e-05, + "loss": 0.1828, + "step": 15980 + }, + { + "epoch": 0.71, + "learning_rate": 3.8485154844560746e-05, + "loss": 0.1377, + "step": 15990 + }, + { + "epoch": 0.71, + "learning_rate": 3.847772111624864e-05, + "loss": 0.153, + "step": 16000 + }, + { + "epoch": 0.71, + "learning_rate": 3.847028738793655e-05, + "loss": 0.1481, + "step": 16010 + }, + { + "epoch": 0.71, + "learning_rate": 3.846285365962445e-05, + "loss": 0.1814, + "step": 16020 + }, + { + "epoch": 0.71, + "learning_rate": 3.8455419931312356e-05, + "loss": 0.2209, + "step": 16030 + }, + { + "epoch": 0.71, + "learning_rate": 3.844798620300025e-05, + "loss": 0.1763, + "step": 16040 + }, + { + "epoch": 0.71, + "learning_rate": 3.8440552474688154e-05, + "loss": 0.1839, + "step": 16050 + }, + { + "epoch": 0.71, + "learning_rate": 3.843311874637606e-05, + "loss": 0.1772, + "step": 16060 + }, + { + "epoch": 0.71, + "learning_rate": 3.842568501806396e-05, + "loss": 0.1603, + "step": 16070 + }, + { + "epoch": 0.71, + "learning_rate": 3.841825128975187e-05, + "loss": 0.1422, + "step": 16080 + }, + { + "epoch": 0.71, + "learning_rate": 3.8410817561439765e-05, + "loss": 0.1599, + "step": 16090 + }, + { + "epoch": 0.71, + "learning_rate": 3.8403383833127674e-05, + "loss": 0.1844, + "step": 16100 + }, + { + "epoch": 0.71, + "learning_rate": 3.839595010481557e-05, + "loss": 0.198, + "step": 16110 + }, + { + "epoch": 0.71, + "learning_rate": 3.838851637650347e-05, + "loss": 0.1726, + "step": 16120 + }, + { + "epoch": 0.71, + "learning_rate": 3.8381082648191375e-05, + "loss": 0.1359, + "step": 16130 + }, + { + "epoch": 0.71, + "learning_rate": 3.837364891987928e-05, + "loss": 0.1393, + "step": 16140 + }, + { + "epoch": 0.72, + "learning_rate": 3.836621519156718e-05, + "loss": 0.1995, + "step": 16150 + }, + { + "epoch": 0.72, + "learning_rate": 3.835878146325508e-05, + "loss": 0.1708, + "step": 16160 + }, + { + "epoch": 0.72, + "learning_rate": 3.8351347734942985e-05, + "loss": 0.1442, + "step": 16170 + }, + { + "epoch": 0.72, + "learning_rate": 3.834391400663089e-05, + "loss": 0.183, + "step": 16180 + }, + { + "epoch": 0.72, + "learning_rate": 3.8336480278318784e-05, + "loss": 0.2085, + "step": 16190 + }, + { + "epoch": 0.72, + "learning_rate": 3.832904655000669e-05, + "loss": 0.1921, + "step": 16200 + }, + { + "epoch": 0.72, + "learning_rate": 3.8321612821694596e-05, + "loss": 0.1552, + "step": 16210 + }, + { + "epoch": 0.72, + "learning_rate": 3.83141790933825e-05, + "loss": 0.1461, + "step": 16220 + }, + { + "epoch": 0.72, + "learning_rate": 3.83067453650704e-05, + "loss": 0.1943, + "step": 16230 + }, + { + "epoch": 0.72, + "learning_rate": 3.82993116367583e-05, + "loss": 0.1821, + "step": 16240 + }, + { + "epoch": 0.72, + "learning_rate": 3.8291877908446206e-05, + "loss": 0.1274, + "step": 16250 + }, + { + "epoch": 0.72, + "learning_rate": 3.82844441801341e-05, + "loss": 0.1939, + "step": 16260 + }, + { + "epoch": 0.72, + "learning_rate": 3.827701045182201e-05, + "loss": 0.1674, + "step": 16270 + }, + { + "epoch": 0.72, + "learning_rate": 3.826957672350991e-05, + "loss": 0.1787, + "step": 16280 + }, + { + "epoch": 0.72, + "learning_rate": 3.8262142995197816e-05, + "loss": 0.1931, + "step": 16290 + }, + { + "epoch": 0.72, + "learning_rate": 3.825470926688571e-05, + "loss": 0.1361, + "step": 16300 + }, + { + "epoch": 0.72, + "learning_rate": 3.824727553857362e-05, + "loss": 0.1614, + "step": 16310 + }, + { + "epoch": 0.72, + "learning_rate": 3.8239841810261524e-05, + "loss": 0.1769, + "step": 16320 + }, + { + "epoch": 0.72, + "learning_rate": 3.823240808194942e-05, + "loss": 0.1783, + "step": 16330 + }, + { + "epoch": 0.72, + "learning_rate": 3.822497435363733e-05, + "loss": 0.1674, + "step": 16340 + }, + { + "epoch": 0.72, + "learning_rate": 3.8217540625325225e-05, + "loss": 0.1891, + "step": 16350 + }, + { + "epoch": 0.72, + "learning_rate": 3.8210106897013134e-05, + "loss": 0.1544, + "step": 16360 + }, + { + "epoch": 0.72, + "learning_rate": 3.820267316870103e-05, + "loss": 0.1447, + "step": 16370 + }, + { + "epoch": 0.73, + "learning_rate": 3.819523944038893e-05, + "loss": 0.2382, + "step": 16380 + }, + { + "epoch": 0.73, + "learning_rate": 3.8187805712076835e-05, + "loss": 0.1624, + "step": 16390 + }, + { + "epoch": 0.73, + "learning_rate": 3.818037198376474e-05, + "loss": 0.2006, + "step": 16400 + }, + { + "epoch": 0.73, + "learning_rate": 3.817293825545264e-05, + "loss": 0.1946, + "step": 16410 + }, + { + "epoch": 0.73, + "learning_rate": 3.816550452714054e-05, + "loss": 0.1444, + "step": 16420 + }, + { + "epoch": 0.73, + "learning_rate": 3.8158070798828446e-05, + "loss": 0.1565, + "step": 16430 + }, + { + "epoch": 0.73, + "learning_rate": 3.815063707051635e-05, + "loss": 0.1934, + "step": 16440 + }, + { + "epoch": 0.73, + "learning_rate": 3.814320334220425e-05, + "loss": 0.2188, + "step": 16450 + }, + { + "epoch": 0.73, + "learning_rate": 3.813576961389215e-05, + "loss": 0.176, + "step": 16460 + }, + { + "epoch": 0.73, + "learning_rate": 3.8128335885580056e-05, + "loss": 0.1926, + "step": 16470 + }, + { + "epoch": 0.73, + "learning_rate": 3.812090215726796e-05, + "loss": 0.1556, + "step": 16480 + }, + { + "epoch": 0.73, + "learning_rate": 3.811346842895586e-05, + "loss": 0.1507, + "step": 16490 + }, + { + "epoch": 0.73, + "learning_rate": 3.8106034700643764e-05, + "loss": 0.1661, + "step": 16500 + }, + { + "epoch": 0.73, + "learning_rate": 3.8098600972331666e-05, + "loss": 0.1154, + "step": 16510 + }, + { + "epoch": 0.73, + "learning_rate": 3.809116724401957e-05, + "loss": 0.2014, + "step": 16520 + }, + { + "epoch": 0.73, + "learning_rate": 3.808373351570747e-05, + "loss": 0.2436, + "step": 16530 + }, + { + "epoch": 0.73, + "learning_rate": 3.807629978739537e-05, + "loss": 0.2163, + "step": 16540 + }, + { + "epoch": 0.73, + "learning_rate": 3.8068866059083277e-05, + "loss": 0.1875, + "step": 16550 + }, + { + "epoch": 0.73, + "learning_rate": 3.806143233077118e-05, + "loss": 0.2037, + "step": 16560 + }, + { + "epoch": 0.73, + "learning_rate": 3.805399860245908e-05, + "loss": 0.1432, + "step": 16570 + }, + { + "epoch": 0.73, + "learning_rate": 3.8046564874146984e-05, + "loss": 0.1327, + "step": 16580 + }, + { + "epoch": 0.73, + "learning_rate": 3.803913114583488e-05, + "loss": 0.1799, + "step": 16590 + }, + { + "epoch": 0.73, + "learning_rate": 3.803169741752279e-05, + "loss": 0.1937, + "step": 16600 + }, + { + "epoch": 0.74, + "learning_rate": 3.8024263689210685e-05, + "loss": 0.163, + "step": 16610 + }, + { + "epoch": 0.74, + "learning_rate": 3.8016829960898595e-05, + "loss": 0.2167, + "step": 16620 + }, + { + "epoch": 0.74, + "learning_rate": 3.800939623258649e-05, + "loss": 0.1855, + "step": 16630 + }, + { + "epoch": 0.74, + "learning_rate": 3.80019625042744e-05, + "loss": 0.1882, + "step": 16640 + }, + { + "epoch": 0.74, + "learning_rate": 3.7994528775962296e-05, + "loss": 0.1876, + "step": 16650 + }, + { + "epoch": 0.74, + "learning_rate": 3.79870950476502e-05, + "loss": 0.1726, + "step": 16660 + }, + { + "epoch": 0.74, + "learning_rate": 3.79796613193381e-05, + "loss": 0.1705, + "step": 16670 + }, + { + "epoch": 0.74, + "learning_rate": 3.7972227591026e-05, + "loss": 0.2255, + "step": 16680 + }, + { + "epoch": 0.74, + "learning_rate": 3.796479386271391e-05, + "loss": 0.1819, + "step": 16690 + }, + { + "epoch": 0.74, + "learning_rate": 3.795736013440181e-05, + "loss": 0.136, + "step": 16700 + }, + { + "epoch": 0.74, + "learning_rate": 3.794992640608971e-05, + "loss": 0.1702, + "step": 16710 + }, + { + "epoch": 0.74, + "learning_rate": 3.7942492677777614e-05, + "loss": 0.1576, + "step": 16720 + }, + { + "epoch": 0.74, + "learning_rate": 3.7935058949465516e-05, + "loss": 0.1629, + "step": 16730 + }, + { + "epoch": 0.74, + "learning_rate": 3.792762522115342e-05, + "loss": 0.2137, + "step": 16740 + }, + { + "epoch": 0.74, + "learning_rate": 3.792019149284132e-05, + "loss": 0.2444, + "step": 16750 + }, + { + "epoch": 0.74, + "learning_rate": 3.7912757764529224e-05, + "loss": 0.1903, + "step": 16760 + }, + { + "epoch": 0.74, + "learning_rate": 3.7905324036217127e-05, + "loss": 0.1866, + "step": 16770 + }, + { + "epoch": 0.74, + "learning_rate": 3.789789030790503e-05, + "loss": 0.1845, + "step": 16780 + }, + { + "epoch": 0.74, + "learning_rate": 3.789045657959293e-05, + "loss": 0.1824, + "step": 16790 + }, + { + "epoch": 0.74, + "learning_rate": 3.788302285128083e-05, + "loss": 0.1665, + "step": 16800 + }, + { + "epoch": 0.74, + "learning_rate": 3.787558912296874e-05, + "loss": 0.1295, + "step": 16810 + }, + { + "epoch": 0.74, + "learning_rate": 3.786815539465664e-05, + "loss": 0.2139, + "step": 16820 + }, + { + "epoch": 0.75, + "learning_rate": 3.786072166634454e-05, + "loss": 0.182, + "step": 16830 + }, + { + "epoch": 0.75, + "learning_rate": 3.7853287938032445e-05, + "loss": 0.1715, + "step": 16840 + }, + { + "epoch": 0.75, + "learning_rate": 3.784585420972035e-05, + "loss": 0.1296, + "step": 16850 + }, + { + "epoch": 0.75, + "learning_rate": 3.783842048140825e-05, + "loss": 0.1712, + "step": 16860 + }, + { + "epoch": 0.75, + "learning_rate": 3.7830986753096146e-05, + "loss": 0.1466, + "step": 16870 + }, + { + "epoch": 0.75, + "learning_rate": 3.7823553024784055e-05, + "loss": 0.1509, + "step": 16880 + }, + { + "epoch": 0.75, + "learning_rate": 3.781611929647195e-05, + "loss": 0.1392, + "step": 16890 + }, + { + "epoch": 0.75, + "learning_rate": 3.780868556815986e-05, + "loss": 0.1625, + "step": 16900 + }, + { + "epoch": 0.75, + "learning_rate": 3.7801251839847756e-05, + "loss": 0.1873, + "step": 16910 + }, + { + "epoch": 0.75, + "learning_rate": 3.779381811153566e-05, + "loss": 0.1728, + "step": 16920 + }, + { + "epoch": 0.75, + "learning_rate": 3.778638438322357e-05, + "loss": 0.1893, + "step": 16930 + }, + { + "epoch": 0.75, + "learning_rate": 3.7778950654911464e-05, + "loss": 0.2019, + "step": 16940 + }, + { + "epoch": 0.75, + "learning_rate": 3.777151692659937e-05, + "loss": 0.1736, + "step": 16950 + }, + { + "epoch": 0.75, + "learning_rate": 3.776408319828727e-05, + "loss": 0.1544, + "step": 16960 + }, + { + "epoch": 0.75, + "learning_rate": 3.775664946997518e-05, + "loss": 0.1733, + "step": 16970 + }, + { + "epoch": 0.75, + "learning_rate": 3.7749215741663074e-05, + "loss": 0.2452, + "step": 16980 + }, + { + "epoch": 0.75, + "learning_rate": 3.7741782013350976e-05, + "loss": 0.166, + "step": 16990 + }, + { + "epoch": 0.75, + "learning_rate": 3.773434828503888e-05, + "loss": 0.1652, + "step": 17000 + }, + { + "epoch": 0.75, + "learning_rate": 3.772691455672678e-05, + "loss": 0.1679, + "step": 17010 + }, + { + "epoch": 0.75, + "learning_rate": 3.7719480828414684e-05, + "loss": 0.189, + "step": 17020 + }, + { + "epoch": 0.75, + "learning_rate": 3.771204710010259e-05, + "loss": 0.1915, + "step": 17030 + }, + { + "epoch": 0.75, + "learning_rate": 3.770461337179049e-05, + "loss": 0.1695, + "step": 17040 + }, + { + "epoch": 0.75, + "learning_rate": 3.769717964347839e-05, + "loss": 0.2039, + "step": 17050 + }, + { + "epoch": 0.76, + "learning_rate": 3.7689745915166295e-05, + "loss": 0.194, + "step": 17060 + }, + { + "epoch": 0.76, + "learning_rate": 3.76823121868542e-05, + "loss": 0.1376, + "step": 17070 + }, + { + "epoch": 0.76, + "learning_rate": 3.76748784585421e-05, + "loss": 0.1797, + "step": 17080 + }, + { + "epoch": 0.76, + "learning_rate": 3.766744473023e-05, + "loss": 0.2214, + "step": 17090 + }, + { + "epoch": 0.76, + "learning_rate": 3.7660011001917905e-05, + "loss": 0.1726, + "step": 17100 + }, + { + "epoch": 0.76, + "learning_rate": 3.765257727360581e-05, + "loss": 0.182, + "step": 17110 + }, + { + "epoch": 0.76, + "learning_rate": 3.764514354529371e-05, + "loss": 0.1724, + "step": 17120 + }, + { + "epoch": 0.76, + "learning_rate": 3.7637709816981606e-05, + "loss": 0.1936, + "step": 17130 + }, + { + "epoch": 0.76, + "learning_rate": 3.7630276088669515e-05, + "loss": 0.1548, + "step": 17140 + }, + { + "epoch": 0.76, + "learning_rate": 3.762284236035741e-05, + "loss": 0.1747, + "step": 17150 + }, + { + "epoch": 0.76, + "learning_rate": 3.761540863204532e-05, + "loss": 0.1448, + "step": 17160 + }, + { + "epoch": 0.76, + "learning_rate": 3.7607974903733216e-05, + "loss": 0.1696, + "step": 17170 + }, + { + "epoch": 0.76, + "learning_rate": 3.7600541175421126e-05, + "loss": 0.1586, + "step": 17180 + }, + { + "epoch": 0.76, + "learning_rate": 3.759310744710903e-05, + "loss": 0.1775, + "step": 17190 + }, + { + "epoch": 0.76, + "learning_rate": 3.7585673718796924e-05, + "loss": 0.1469, + "step": 17200 + }, + { + "epoch": 0.76, + "learning_rate": 3.757823999048483e-05, + "loss": 0.2098, + "step": 17210 + }, + { + "epoch": 0.76, + "learning_rate": 3.757080626217273e-05, + "loss": 0.2723, + "step": 17220 + }, + { + "epoch": 0.76, + "learning_rate": 3.756337253386064e-05, + "loss": 0.1746, + "step": 17230 + }, + { + "epoch": 0.76, + "learning_rate": 3.7555938805548534e-05, + "loss": 0.183, + "step": 17240 + }, + { + "epoch": 0.76, + "learning_rate": 3.754850507723644e-05, + "loss": 0.2332, + "step": 17250 + }, + { + "epoch": 0.76, + "learning_rate": 3.754107134892434e-05, + "loss": 0.1237, + "step": 17260 + }, + { + "epoch": 0.76, + "learning_rate": 3.753363762061224e-05, + "loss": 0.16, + "step": 17270 + }, + { + "epoch": 0.77, + "learning_rate": 3.7526203892300145e-05, + "loss": 0.1458, + "step": 17280 + }, + { + "epoch": 0.77, + "learning_rate": 3.751877016398805e-05, + "loss": 0.1945, + "step": 17290 + }, + { + "epoch": 0.77, + "learning_rate": 3.7511336435675956e-05, + "loss": 0.1851, + "step": 17300 + }, + { + "epoch": 0.77, + "learning_rate": 3.750390270736385e-05, + "loss": 0.1674, + "step": 17310 + }, + { + "epoch": 0.77, + "learning_rate": 3.7496468979051755e-05, + "loss": 0.1898, + "step": 17320 + }, + { + "epoch": 0.77, + "learning_rate": 3.748903525073966e-05, + "loss": 0.2348, + "step": 17330 + }, + { + "epoch": 0.77, + "learning_rate": 3.748160152242756e-05, + "loss": 0.1871, + "step": 17340 + }, + { + "epoch": 0.77, + "learning_rate": 3.747416779411546e-05, + "loss": 0.159, + "step": 17350 + }, + { + "epoch": 0.77, + "learning_rate": 3.7466734065803365e-05, + "loss": 0.1506, + "step": 17360 + }, + { + "epoch": 0.77, + "learning_rate": 3.745930033749127e-05, + "loss": 0.1554, + "step": 17370 + }, + { + "epoch": 0.77, + "learning_rate": 3.745186660917917e-05, + "loss": 0.1797, + "step": 17380 + }, + { + "epoch": 0.77, + "learning_rate": 3.744443288086707e-05, + "loss": 0.1464, + "step": 17390 + }, + { + "epoch": 0.77, + "learning_rate": 3.7436999152554975e-05, + "loss": 0.1447, + "step": 17400 + }, + { + "epoch": 0.77, + "learning_rate": 3.742956542424287e-05, + "loss": 0.1686, + "step": 17410 + }, + { + "epoch": 0.77, + "learning_rate": 3.742213169593078e-05, + "loss": 0.146, + "step": 17420 + }, + { + "epoch": 0.77, + "learning_rate": 3.741469796761868e-05, + "loss": 0.1873, + "step": 17430 + }, + { + "epoch": 0.77, + "learning_rate": 3.7407264239306586e-05, + "loss": 0.1112, + "step": 17440 + }, + { + "epoch": 0.77, + "learning_rate": 3.739983051099449e-05, + "loss": 0.1684, + "step": 17450 + }, + { + "epoch": 0.77, + "learning_rate": 3.7392396782682384e-05, + "loss": 0.1792, + "step": 17460 + }, + { + "epoch": 0.77, + "learning_rate": 3.7384963054370294e-05, + "loss": 0.1922, + "step": 17470 + }, + { + "epoch": 0.77, + "learning_rate": 3.737752932605819e-05, + "loss": 0.1859, + "step": 17480 + }, + { + "epoch": 0.77, + "learning_rate": 3.73700955977461e-05, + "loss": 0.2012, + "step": 17490 + }, + { + "epoch": 0.77, + "learning_rate": 3.7362661869433994e-05, + "loss": 0.1518, + "step": 17500 + }, + { + "epoch": 0.78, + "learning_rate": 3.7355228141121904e-05, + "loss": 0.1629, + "step": 17510 + }, + { + "epoch": 0.78, + "learning_rate": 3.73477944128098e-05, + "loss": 0.1604, + "step": 17520 + }, + { + "epoch": 0.78, + "learning_rate": 3.73403606844977e-05, + "loss": 0.2046, + "step": 17530 + }, + { + "epoch": 0.78, + "learning_rate": 3.733292695618561e-05, + "loss": 0.1456, + "step": 17540 + }, + { + "epoch": 0.78, + "learning_rate": 3.732549322787351e-05, + "loss": 0.1761, + "step": 17550 + }, + { + "epoch": 0.78, + "learning_rate": 3.731805949956142e-05, + "loss": 0.1817, + "step": 17560 + }, + { + "epoch": 0.78, + "learning_rate": 3.731062577124931e-05, + "loss": 0.1584, + "step": 17570 + }, + { + "epoch": 0.78, + "learning_rate": 3.7303192042937215e-05, + "loss": 0.1975, + "step": 17580 + }, + { + "epoch": 0.78, + "learning_rate": 3.729575831462512e-05, + "loss": 0.15, + "step": 17590 + }, + { + "epoch": 0.78, + "learning_rate": 3.728832458631302e-05, + "loss": 0.1831, + "step": 17600 + }, + { + "epoch": 0.78, + "learning_rate": 3.728089085800092e-05, + "loss": 0.1603, + "step": 17610 + }, + { + "epoch": 0.78, + "learning_rate": 3.7273457129688825e-05, + "loss": 0.1702, + "step": 17620 + }, + { + "epoch": 0.78, + "learning_rate": 3.726602340137673e-05, + "loss": 0.2121, + "step": 17630 + }, + { + "epoch": 0.78, + "learning_rate": 3.725858967306463e-05, + "loss": 0.2074, + "step": 17640 + }, + { + "epoch": 0.78, + "learning_rate": 3.725115594475253e-05, + "loss": 0.1852, + "step": 17650 + }, + { + "epoch": 0.78, + "learning_rate": 3.7243722216440436e-05, + "loss": 0.1709, + "step": 17660 + }, + { + "epoch": 0.78, + "learning_rate": 3.723628848812834e-05, + "loss": 0.1367, + "step": 17670 + }, + { + "epoch": 0.78, + "learning_rate": 3.722885475981624e-05, + "loss": 0.1619, + "step": 17680 + }, + { + "epoch": 0.78, + "learning_rate": 3.7221421031504144e-05, + "loss": 0.1804, + "step": 17690 + }, + { + "epoch": 0.78, + "learning_rate": 3.7213987303192046e-05, + "loss": 0.1804, + "step": 17700 + }, + { + "epoch": 0.78, + "learning_rate": 3.720655357487995e-05, + "loss": 0.2461, + "step": 17710 + }, + { + "epoch": 0.78, + "learning_rate": 3.719911984656785e-05, + "loss": 0.1389, + "step": 17720 + }, + { + "epoch": 0.78, + "learning_rate": 3.7191686118255754e-05, + "loss": 0.185, + "step": 17730 + }, + { + "epoch": 0.79, + "learning_rate": 3.718425238994365e-05, + "loss": 0.1817, + "step": 17740 + }, + { + "epoch": 0.79, + "learning_rate": 3.717681866163156e-05, + "loss": 0.1793, + "step": 17750 + }, + { + "epoch": 0.79, + "learning_rate": 3.7169384933319455e-05, + "loss": 0.1848, + "step": 17760 + }, + { + "epoch": 0.79, + "learning_rate": 3.7161951205007364e-05, + "loss": 0.2006, + "step": 17770 + }, + { + "epoch": 0.79, + "learning_rate": 3.715451747669526e-05, + "loss": 0.1828, + "step": 17780 + }, + { + "epoch": 0.79, + "learning_rate": 3.714708374838316e-05, + "loss": 0.1876, + "step": 17790 + }, + { + "epoch": 0.79, + "learning_rate": 3.713965002007107e-05, + "loss": 0.1765, + "step": 17800 + }, + { + "epoch": 0.79, + "learning_rate": 3.713221629175897e-05, + "loss": 0.1625, + "step": 17810 + }, + { + "epoch": 0.79, + "learning_rate": 3.712478256344688e-05, + "loss": 0.1565, + "step": 17820 + }, + { + "epoch": 0.79, + "learning_rate": 3.711734883513477e-05, + "loss": 0.1122, + "step": 17830 + }, + { + "epoch": 0.79, + "learning_rate": 3.710991510682268e-05, + "loss": 0.1956, + "step": 17840 + }, + { + "epoch": 0.79, + "learning_rate": 3.710248137851058e-05, + "loss": 0.1547, + "step": 17850 + }, + { + "epoch": 0.79, + "learning_rate": 3.709504765019848e-05, + "loss": 0.199, + "step": 17860 + }, + { + "epoch": 0.79, + "learning_rate": 3.708761392188638e-05, + "loss": 0.1392, + "step": 17870 + }, + { + "epoch": 0.79, + "learning_rate": 3.7080180193574286e-05, + "loss": 0.184, + "step": 17880 + }, + { + "epoch": 0.79, + "learning_rate": 3.707274646526219e-05, + "loss": 0.1461, + "step": 17890 + }, + { + "epoch": 0.79, + "learning_rate": 3.706531273695009e-05, + "loss": 0.1669, + "step": 17900 + }, + { + "epoch": 0.79, + "learning_rate": 3.7057879008637993e-05, + "loss": 0.1702, + "step": 17910 + }, + { + "epoch": 0.79, + "learning_rate": 3.7050445280325896e-05, + "loss": 0.1537, + "step": 17920 + }, + { + "epoch": 0.79, + "learning_rate": 3.70430115520138e-05, + "loss": 0.2081, + "step": 17930 + }, + { + "epoch": 0.79, + "learning_rate": 3.70355778237017e-05, + "loss": 0.1356, + "step": 17940 + }, + { + "epoch": 0.79, + "learning_rate": 3.7028144095389604e-05, + "loss": 0.1889, + "step": 17950 + }, + { + "epoch": 0.8, + "learning_rate": 3.7020710367077506e-05, + "loss": 0.2287, + "step": 17960 + }, + { + "epoch": 0.8, + "learning_rate": 3.701327663876541e-05, + "loss": 0.1992, + "step": 17970 + }, + { + "epoch": 0.8, + "learning_rate": 3.700584291045331e-05, + "loss": 0.1901, + "step": 17980 + }, + { + "epoch": 0.8, + "learning_rate": 3.6998409182141214e-05, + "loss": 0.1837, + "step": 17990 + }, + { + "epoch": 0.8, + "learning_rate": 3.699097545382911e-05, + "loss": 0.1823, + "step": 18000 + }, + { + "epoch": 0.8, + "learning_rate": 3.698354172551702e-05, + "loss": 0.1512, + "step": 18010 + }, + { + "epoch": 0.8, + "learning_rate": 3.6976107997204915e-05, + "loss": 0.1823, + "step": 18020 + }, + { + "epoch": 0.8, + "learning_rate": 3.6968674268892824e-05, + "loss": 0.1816, + "step": 18030 + }, + { + "epoch": 0.8, + "learning_rate": 3.696124054058073e-05, + "loss": 0.1537, + "step": 18040 + }, + { + "epoch": 0.8, + "learning_rate": 3.695380681226863e-05, + "loss": 0.1754, + "step": 18050 + }, + { + "epoch": 0.8, + "learning_rate": 3.694637308395653e-05, + "loss": 0.1801, + "step": 18060 + }, + { + "epoch": 0.8, + "learning_rate": 3.693893935564443e-05, + "loss": 0.1886, + "step": 18070 + }, + { + "epoch": 0.8, + "learning_rate": 3.693150562733234e-05, + "loss": 0.1667, + "step": 18080 + }, + { + "epoch": 0.8, + "learning_rate": 3.692407189902023e-05, + "loss": 0.1243, + "step": 18090 + }, + { + "epoch": 0.8, + "learning_rate": 3.691663817070814e-05, + "loss": 0.1358, + "step": 18100 + }, + { + "epoch": 0.8, + "learning_rate": 3.690920444239604e-05, + "loss": 0.1924, + "step": 18110 + }, + { + "epoch": 0.8, + "learning_rate": 3.690177071408394e-05, + "loss": 0.2612, + "step": 18120 + }, + { + "epoch": 0.8, + "learning_rate": 3.6894336985771843e-05, + "loss": 0.1938, + "step": 18130 + }, + { + "epoch": 0.8, + "learning_rate": 3.6886903257459746e-05, + "loss": 0.2132, + "step": 18140 + }, + { + "epoch": 0.8, + "learning_rate": 3.687946952914765e-05, + "loss": 0.1605, + "step": 18150 + }, + { + "epoch": 0.8, + "learning_rate": 3.687203580083555e-05, + "loss": 0.1484, + "step": 18160 + }, + { + "epoch": 0.8, + "learning_rate": 3.686460207252346e-05, + "loss": 0.1778, + "step": 18170 + }, + { + "epoch": 0.8, + "learning_rate": 3.6857168344211356e-05, + "loss": 0.1956, + "step": 18180 + }, + { + "epoch": 0.81, + "learning_rate": 3.684973461589926e-05, + "loss": 0.185, + "step": 18190 + }, + { + "epoch": 0.81, + "learning_rate": 3.684230088758716e-05, + "loss": 0.1576, + "step": 18200 + }, + { + "epoch": 0.81, + "learning_rate": 3.6834867159275064e-05, + "loss": 0.1663, + "step": 18210 + }, + { + "epoch": 0.81, + "learning_rate": 3.682743343096297e-05, + "loss": 0.1558, + "step": 18220 + }, + { + "epoch": 0.81, + "learning_rate": 3.681999970265087e-05, + "loss": 0.2002, + "step": 18230 + }, + { + "epoch": 0.81, + "learning_rate": 3.681256597433877e-05, + "loss": 0.1327, + "step": 18240 + }, + { + "epoch": 0.81, + "learning_rate": 3.6805132246026674e-05, + "loss": 0.2429, + "step": 18250 + }, + { + "epoch": 0.81, + "learning_rate": 3.679769851771458e-05, + "loss": 0.1462, + "step": 18260 + }, + { + "epoch": 0.81, + "learning_rate": 3.679026478940248e-05, + "loss": 0.1836, + "step": 18270 + }, + { + "epoch": 0.81, + "learning_rate": 3.678283106109038e-05, + "loss": 0.1234, + "step": 18280 + }, + { + "epoch": 0.81, + "learning_rate": 3.6775397332778285e-05, + "loss": 0.1474, + "step": 18290 + }, + { + "epoch": 0.81, + "learning_rate": 3.676796360446619e-05, + "loss": 0.1757, + "step": 18300 + }, + { + "epoch": 0.81, + "learning_rate": 3.676052987615409e-05, + "loss": 0.1282, + "step": 18310 + }, + { + "epoch": 0.81, + "learning_rate": 3.675309614784199e-05, + "loss": 0.1834, + "step": 18320 + }, + { + "epoch": 0.81, + "learning_rate": 3.674566241952989e-05, + "loss": 0.1673, + "step": 18330 + }, + { + "epoch": 0.81, + "learning_rate": 3.67382286912178e-05, + "loss": 0.1127, + "step": 18340 + }, + { + "epoch": 0.81, + "learning_rate": 3.6730794962905693e-05, + "loss": 0.1909, + "step": 18350 + }, + { + "epoch": 0.81, + "learning_rate": 3.67233612345936e-05, + "loss": 0.1373, + "step": 18360 + }, + { + "epoch": 0.81, + "learning_rate": 3.67159275062815e-05, + "loss": 0.1647, + "step": 18370 + }, + { + "epoch": 0.81, + "learning_rate": 3.670849377796941e-05, + "loss": 0.2055, + "step": 18380 + }, + { + "epoch": 0.81, + "learning_rate": 3.6701060049657304e-05, + "loss": 0.2445, + "step": 18390 + }, + { + "epoch": 0.81, + "learning_rate": 3.6693626321345206e-05, + "loss": 0.1307, + "step": 18400 + }, + { + "epoch": 0.82, + "learning_rate": 3.6686192593033116e-05, + "loss": 0.1935, + "step": 18410 + }, + { + "epoch": 0.82, + "learning_rate": 3.667875886472101e-05, + "loss": 0.1723, + "step": 18420 + }, + { + "epoch": 0.82, + "learning_rate": 3.667132513640892e-05, + "loss": 0.1545, + "step": 18430 + }, + { + "epoch": 0.82, + "learning_rate": 3.666389140809682e-05, + "loss": 0.1257, + "step": 18440 + }, + { + "epoch": 0.82, + "learning_rate": 3.665645767978472e-05, + "loss": 0.1961, + "step": 18450 + }, + { + "epoch": 0.82, + "learning_rate": 3.664902395147262e-05, + "loss": 0.1733, + "step": 18460 + }, + { + "epoch": 0.82, + "learning_rate": 3.6641590223160524e-05, + "loss": 0.2285, + "step": 18470 + }, + { + "epoch": 0.82, + "learning_rate": 3.663415649484843e-05, + "loss": 0.1601, + "step": 18480 + }, + { + "epoch": 0.82, + "learning_rate": 3.662672276653633e-05, + "loss": 0.2179, + "step": 18490 + }, + { + "epoch": 0.82, + "learning_rate": 3.661928903822423e-05, + "loss": 0.1971, + "step": 18500 + }, + { + "epoch": 0.82, + "learning_rate": 3.6611855309912135e-05, + "loss": 0.1798, + "step": 18510 + }, + { + "epoch": 0.82, + "learning_rate": 3.660442158160004e-05, + "loss": 0.2057, + "step": 18520 + }, + { + "epoch": 0.82, + "learning_rate": 3.659698785328794e-05, + "loss": 0.2089, + "step": 18530 + }, + { + "epoch": 0.82, + "learning_rate": 3.658955412497584e-05, + "loss": 0.1596, + "step": 18540 + }, + { + "epoch": 0.82, + "learning_rate": 3.6582120396663745e-05, + "loss": 0.1648, + "step": 18550 + }, + { + "epoch": 0.82, + "learning_rate": 3.657468666835165e-05, + "loss": 0.1927, + "step": 18560 + }, + { + "epoch": 0.82, + "learning_rate": 3.656725294003955e-05, + "loss": 0.183, + "step": 18570 + }, + { + "epoch": 0.82, + "learning_rate": 3.655981921172745e-05, + "loss": 0.1881, + "step": 18580 + }, + { + "epoch": 0.82, + "learning_rate": 3.6552385483415355e-05, + "loss": 0.131, + "step": 18590 + }, + { + "epoch": 0.82, + "learning_rate": 3.654495175510326e-05, + "loss": 0.1466, + "step": 18600 + }, + { + "epoch": 0.82, + "learning_rate": 3.6537518026791154e-05, + "loss": 0.169, + "step": 18610 + }, + { + "epoch": 0.82, + "learning_rate": 3.653008429847906e-05, + "loss": 0.2043, + "step": 18620 + }, + { + "epoch": 0.82, + "learning_rate": 3.652265057016696e-05, + "loss": 0.1718, + "step": 18630 + }, + { + "epoch": 0.83, + "learning_rate": 3.651521684185487e-05, + "loss": 0.1813, + "step": 18640 + }, + { + "epoch": 0.83, + "learning_rate": 3.650778311354277e-05, + "loss": 0.157, + "step": 18650 + }, + { + "epoch": 0.83, + "learning_rate": 3.650034938523067e-05, + "loss": 0.1306, + "step": 18660 + }, + { + "epoch": 0.83, + "learning_rate": 3.6492915656918576e-05, + "loss": 0.1913, + "step": 18670 + }, + { + "epoch": 0.83, + "learning_rate": 3.648548192860647e-05, + "loss": 0.1343, + "step": 18680 + }, + { + "epoch": 0.83, + "learning_rate": 3.647804820029438e-05, + "loss": 0.1722, + "step": 18690 + }, + { + "epoch": 0.83, + "learning_rate": 3.647061447198228e-05, + "loss": 0.1587, + "step": 18700 + }, + { + "epoch": 0.83, + "learning_rate": 3.6463180743670186e-05, + "loss": 0.1481, + "step": 18710 + }, + { + "epoch": 0.83, + "learning_rate": 3.645574701535808e-05, + "loss": 0.1909, + "step": 18720 + }, + { + "epoch": 0.83, + "learning_rate": 3.6448313287045985e-05, + "loss": 0.1818, + "step": 18730 + }, + { + "epoch": 0.83, + "learning_rate": 3.644087955873389e-05, + "loss": 0.1803, + "step": 18740 + }, + { + "epoch": 0.83, + "learning_rate": 3.643344583042179e-05, + "loss": 0.1451, + "step": 18750 + }, + { + "epoch": 0.83, + "learning_rate": 3.642601210210969e-05, + "loss": 0.1912, + "step": 18760 + }, + { + "epoch": 0.83, + "learning_rate": 3.6418578373797595e-05, + "loss": 0.1878, + "step": 18770 + }, + { + "epoch": 0.83, + "learning_rate": 3.6411144645485504e-05, + "loss": 0.1759, + "step": 18780 + }, + { + "epoch": 0.83, + "learning_rate": 3.64037109171734e-05, + "loss": 0.1528, + "step": 18790 + }, + { + "epoch": 0.83, + "learning_rate": 3.63962771888613e-05, + "loss": 0.1628, + "step": 18800 + }, + { + "epoch": 0.83, + "learning_rate": 3.6388843460549205e-05, + "loss": 0.185, + "step": 18810 + }, + { + "epoch": 0.83, + "learning_rate": 3.638140973223711e-05, + "loss": 0.147, + "step": 18820 + }, + { + "epoch": 0.83, + "learning_rate": 3.637397600392501e-05, + "loss": 0.1668, + "step": 18830 + }, + { + "epoch": 0.83, + "learning_rate": 3.636654227561291e-05, + "loss": 0.1704, + "step": 18840 + }, + { + "epoch": 0.83, + "learning_rate": 3.6359108547300816e-05, + "loss": 0.1502, + "step": 18850 + }, + { + "epoch": 0.83, + "learning_rate": 3.635167481898872e-05, + "loss": 0.188, + "step": 18860 + }, + { + "epoch": 0.84, + "learning_rate": 3.6344241090676614e-05, + "loss": 0.1148, + "step": 18870 + }, + { + "epoch": 0.84, + "learning_rate": 3.633680736236452e-05, + "loss": 0.1785, + "step": 18880 + }, + { + "epoch": 0.84, + "learning_rate": 3.6329373634052426e-05, + "loss": 0.1545, + "step": 18890 + }, + { + "epoch": 0.84, + "learning_rate": 3.632193990574033e-05, + "loss": 0.1851, + "step": 18900 + }, + { + "epoch": 0.84, + "learning_rate": 3.631450617742823e-05, + "loss": 0.2196, + "step": 18910 + }, + { + "epoch": 0.84, + "learning_rate": 3.6307072449116134e-05, + "loss": 0.2182, + "step": 18920 + }, + { + "epoch": 0.84, + "learning_rate": 3.6299638720804036e-05, + "loss": 0.1466, + "step": 18930 + }, + { + "epoch": 0.84, + "learning_rate": 3.629220499249193e-05, + "loss": 0.16, + "step": 18940 + }, + { + "epoch": 0.84, + "learning_rate": 3.628477126417984e-05, + "loss": 0.1314, + "step": 18950 + }, + { + "epoch": 0.84, + "learning_rate": 3.627733753586774e-05, + "loss": 0.1871, + "step": 18960 + }, + { + "epoch": 0.84, + "learning_rate": 3.6269903807555647e-05, + "loss": 0.1611, + "step": 18970 + }, + { + "epoch": 0.84, + "learning_rate": 3.626247007924354e-05, + "loss": 0.1544, + "step": 18980 + }, + { + "epoch": 0.84, + "learning_rate": 3.6255036350931445e-05, + "loss": 0.1622, + "step": 18990 + }, + { + "epoch": 0.84, + "learning_rate": 3.624760262261935e-05, + "loss": 0.224, + "step": 19000 + }, + { + "epoch": 0.84, + "learning_rate": 3.624016889430725e-05, + "loss": 0.1996, + "step": 19010 + }, + { + "epoch": 0.84, + "learning_rate": 3.623273516599516e-05, + "loss": 0.1917, + "step": 19020 + }, + { + "epoch": 0.84, + "learning_rate": 3.6225301437683055e-05, + "loss": 0.1517, + "step": 19030 + }, + { + "epoch": 0.84, + "learning_rate": 3.6217867709370965e-05, + "loss": 0.1468, + "step": 19040 + }, + { + "epoch": 0.84, + "learning_rate": 3.621043398105886e-05, + "loss": 0.1821, + "step": 19050 + }, + { + "epoch": 0.84, + "learning_rate": 3.620300025274676e-05, + "loss": 0.1776, + "step": 19060 + }, + { + "epoch": 0.84, + "learning_rate": 3.6195566524434666e-05, + "loss": 0.2187, + "step": 19070 + }, + { + "epoch": 0.84, + "learning_rate": 3.618813279612257e-05, + "loss": 0.214, + "step": 19080 + }, + { + "epoch": 0.85, + "learning_rate": 3.618069906781047e-05, + "loss": 0.1894, + "step": 19090 + }, + { + "epoch": 0.85, + "learning_rate": 3.617326533949837e-05, + "loss": 0.2246, + "step": 19100 + }, + { + "epoch": 0.85, + "learning_rate": 3.6165831611186276e-05, + "loss": 0.1563, + "step": 19110 + }, + { + "epoch": 0.85, + "learning_rate": 3.615839788287418e-05, + "loss": 0.1936, + "step": 19120 + }, + { + "epoch": 0.85, + "learning_rate": 3.615096415456208e-05, + "loss": 0.204, + "step": 19130 + }, + { + "epoch": 0.85, + "learning_rate": 3.6143530426249984e-05, + "loss": 0.1727, + "step": 19140 + }, + { + "epoch": 0.85, + "learning_rate": 3.6136096697937886e-05, + "loss": 0.1336, + "step": 19150 + }, + { + "epoch": 0.85, + "learning_rate": 3.612866296962579e-05, + "loss": 0.194, + "step": 19160 + }, + { + "epoch": 0.85, + "learning_rate": 3.612122924131369e-05, + "loss": 0.2129, + "step": 19170 + }, + { + "epoch": 0.85, + "learning_rate": 3.6113795513001594e-05, + "loss": 0.17, + "step": 19180 + }, + { + "epoch": 0.85, + "learning_rate": 3.6106361784689497e-05, + "loss": 0.1946, + "step": 19190 + }, + { + "epoch": 0.85, + "learning_rate": 3.609892805637739e-05, + "loss": 0.1407, + "step": 19200 + }, + { + "epoch": 0.85, + "learning_rate": 3.60914943280653e-05, + "loss": 0.1528, + "step": 19210 + }, + { + "epoch": 0.85, + "learning_rate": 3.60840605997532e-05, + "loss": 0.1818, + "step": 19220 + }, + { + "epoch": 0.85, + "learning_rate": 3.607662687144111e-05, + "loss": 0.1997, + "step": 19230 + }, + { + "epoch": 0.85, + "learning_rate": 3.6069193143129e-05, + "loss": 0.1616, + "step": 19240 + }, + { + "epoch": 0.85, + "learning_rate": 3.606175941481691e-05, + "loss": 0.2097, + "step": 19250 + }, + { + "epoch": 0.85, + "learning_rate": 3.6054325686504815e-05, + "loss": 0.1998, + "step": 19260 + }, + { + "epoch": 0.85, + "learning_rate": 3.604689195819271e-05, + "loss": 0.1912, + "step": 19270 + }, + { + "epoch": 0.85, + "learning_rate": 3.603945822988062e-05, + "loss": 0.1473, + "step": 19280 + }, + { + "epoch": 0.85, + "learning_rate": 3.6032024501568516e-05, + "loss": 0.2152, + "step": 19290 + }, + { + "epoch": 0.85, + "learning_rate": 3.6024590773256425e-05, + "loss": 0.1736, + "step": 19300 + }, + { + "epoch": 0.85, + "learning_rate": 3.601715704494432e-05, + "loss": 0.1843, + "step": 19310 + }, + { + "epoch": 0.86, + "learning_rate": 3.600972331663223e-05, + "loss": 0.1757, + "step": 19320 + }, + { + "epoch": 0.86, + "learning_rate": 3.6002289588320126e-05, + "loss": 0.2041, + "step": 19330 + }, + { + "epoch": 0.86, + "learning_rate": 3.599485586000803e-05, + "loss": 0.1544, + "step": 19340 + }, + { + "epoch": 0.86, + "learning_rate": 3.598742213169593e-05, + "loss": 0.163, + "step": 19350 + }, + { + "epoch": 0.86, + "learning_rate": 3.5979988403383834e-05, + "loss": 0.1837, + "step": 19360 + }, + { + "epoch": 0.86, + "learning_rate": 3.5972554675071736e-05, + "loss": 0.1848, + "step": 19370 + }, + { + "epoch": 0.86, + "learning_rate": 3.596512094675964e-05, + "loss": 0.1762, + "step": 19380 + }, + { + "epoch": 0.86, + "learning_rate": 3.595768721844754e-05, + "loss": 0.1707, + "step": 19390 + }, + { + "epoch": 0.86, + "learning_rate": 3.5950253490135444e-05, + "loss": 0.1746, + "step": 19400 + }, + { + "epoch": 0.86, + "learning_rate": 3.5942819761823347e-05, + "loss": 0.2088, + "step": 19410 + }, + { + "epoch": 0.86, + "learning_rate": 3.593538603351125e-05, + "loss": 0.1934, + "step": 19420 + }, + { + "epoch": 0.86, + "learning_rate": 3.592795230519915e-05, + "loss": 0.1898, + "step": 19430 + }, + { + "epoch": 0.86, + "learning_rate": 3.5920518576887054e-05, + "loss": 0.1842, + "step": 19440 + }, + { + "epoch": 0.86, + "learning_rate": 3.591308484857496e-05, + "loss": 0.1432, + "step": 19450 + }, + { + "epoch": 0.86, + "learning_rate": 3.590565112026286e-05, + "loss": 0.2012, + "step": 19460 + }, + { + "epoch": 0.86, + "learning_rate": 3.589821739195076e-05, + "loss": 0.1424, + "step": 19470 + }, + { + "epoch": 0.86, + "learning_rate": 3.589078366363866e-05, + "loss": 0.1975, + "step": 19480 + }, + { + "epoch": 0.86, + "learning_rate": 3.588334993532657e-05, + "loss": 0.2077, + "step": 19490 + }, + { + "epoch": 0.86, + "learning_rate": 3.587591620701446e-05, + "loss": 0.15, + "step": 19500 + }, + { + "epoch": 0.86, + "learning_rate": 3.586848247870237e-05, + "loss": 0.1825, + "step": 19510 + }, + { + "epoch": 0.86, + "learning_rate": 3.5861048750390275e-05, + "loss": 0.1482, + "step": 19520 + }, + { + "epoch": 0.86, + "learning_rate": 3.585361502207818e-05, + "loss": 0.1503, + "step": 19530 + }, + { + "epoch": 0.87, + "learning_rate": 3.584618129376608e-05, + "loss": 0.2178, + "step": 19540 + }, + { + "epoch": 0.87, + "learning_rate": 3.5838747565453976e-05, + "loss": 0.2188, + "step": 19550 + }, + { + "epoch": 0.87, + "learning_rate": 3.5831313837141885e-05, + "loss": 0.1985, + "step": 19560 + }, + { + "epoch": 0.87, + "learning_rate": 3.582388010882978e-05, + "loss": 0.1951, + "step": 19570 + }, + { + "epoch": 0.87, + "learning_rate": 3.581644638051769e-05, + "loss": 0.153, + "step": 19580 + }, + { + "epoch": 0.87, + "learning_rate": 3.5809012652205586e-05, + "loss": 0.1523, + "step": 19590 + }, + { + "epoch": 0.87, + "learning_rate": 3.580157892389349e-05, + "loss": 0.1857, + "step": 19600 + }, + { + "epoch": 0.87, + "learning_rate": 3.579414519558139e-05, + "loss": 0.2138, + "step": 19610 + }, + { + "epoch": 0.87, + "learning_rate": 3.5786711467269294e-05, + "loss": 0.198, + "step": 19620 + }, + { + "epoch": 0.87, + "learning_rate": 3.57792777389572e-05, + "loss": 0.1276, + "step": 19630 + }, + { + "epoch": 0.87, + "learning_rate": 3.57718440106451e-05, + "loss": 0.1954, + "step": 19640 + }, + { + "epoch": 0.87, + "learning_rate": 3.576441028233301e-05, + "loss": 0.1989, + "step": 19650 + }, + { + "epoch": 0.87, + "learning_rate": 3.5756976554020904e-05, + "loss": 0.1647, + "step": 19660 + }, + { + "epoch": 0.87, + "learning_rate": 3.574954282570881e-05, + "loss": 0.1461, + "step": 19670 + }, + { + "epoch": 0.87, + "learning_rate": 3.574210909739671e-05, + "loss": 0.2016, + "step": 19680 + }, + { + "epoch": 0.87, + "learning_rate": 3.573467536908461e-05, + "loss": 0.2673, + "step": 19690 + }, + { + "epoch": 0.87, + "learning_rate": 3.5727241640772515e-05, + "loss": 0.1919, + "step": 19700 + }, + { + "epoch": 0.87, + "learning_rate": 3.571980791246042e-05, + "loss": 0.1426, + "step": 19710 + }, + { + "epoch": 0.87, + "learning_rate": 3.571237418414832e-05, + "loss": 0.2006, + "step": 19720 + }, + { + "epoch": 0.87, + "learning_rate": 3.570494045583622e-05, + "loss": 0.1479, + "step": 19730 + }, + { + "epoch": 0.87, + "learning_rate": 3.5697506727524125e-05, + "loss": 0.1288, + "step": 19740 + }, + { + "epoch": 0.87, + "learning_rate": 3.569007299921203e-05, + "loss": 0.1831, + "step": 19750 + }, + { + "epoch": 0.87, + "learning_rate": 3.568263927089993e-05, + "loss": 0.1801, + "step": 19760 + }, + { + "epoch": 0.88, + "learning_rate": 3.567520554258783e-05, + "loss": 0.2104, + "step": 19770 + }, + { + "epoch": 0.88, + "learning_rate": 3.5667771814275735e-05, + "loss": 0.163, + "step": 19780 + }, + { + "epoch": 0.88, + "learning_rate": 3.566033808596364e-05, + "loss": 0.2035, + "step": 19790 + }, + { + "epoch": 0.88, + "learning_rate": 3.565290435765154e-05, + "loss": 0.1717, + "step": 19800 + }, + { + "epoch": 0.88, + "learning_rate": 3.5645470629339436e-05, + "loss": 0.1464, + "step": 19810 + }, + { + "epoch": 0.88, + "learning_rate": 3.5638036901027346e-05, + "loss": 0.1406, + "step": 19820 + }, + { + "epoch": 0.88, + "learning_rate": 3.563060317271524e-05, + "loss": 0.2377, + "step": 19830 + }, + { + "epoch": 0.88, + "learning_rate": 3.562316944440315e-05, + "loss": 0.1477, + "step": 19840 + }, + { + "epoch": 0.88, + "learning_rate": 3.5615735716091046e-05, + "loss": 0.1597, + "step": 19850 + }, + { + "epoch": 0.88, + "learning_rate": 3.5608301987778956e-05, + "loss": 0.163, + "step": 19860 + }, + { + "epoch": 0.88, + "learning_rate": 3.560086825946686e-05, + "loss": 0.1546, + "step": 19870 + }, + { + "epoch": 0.88, + "learning_rate": 3.5593434531154754e-05, + "loss": 0.1832, + "step": 19880 + }, + { + "epoch": 0.88, + "learning_rate": 3.5586000802842664e-05, + "loss": 0.1707, + "step": 19890 + }, + { + "epoch": 0.88, + "learning_rate": 3.557856707453056e-05, + "loss": 0.2378, + "step": 19900 + }, + { + "epoch": 0.88, + "learning_rate": 3.557113334621847e-05, + "loss": 0.1197, + "step": 19910 + }, + { + "epoch": 0.88, + "learning_rate": 3.5563699617906365e-05, + "loss": 0.1856, + "step": 19920 + }, + { + "epoch": 0.88, + "learning_rate": 3.555626588959427e-05, + "loss": 0.1441, + "step": 19930 + }, + { + "epoch": 0.88, + "learning_rate": 3.554883216128217e-05, + "loss": 0.1594, + "step": 19940 + }, + { + "epoch": 0.88, + "learning_rate": 3.554139843297007e-05, + "loss": 0.21, + "step": 19950 + }, + { + "epoch": 0.88, + "learning_rate": 3.5533964704657975e-05, + "loss": 0.1894, + "step": 19960 + }, + { + "epoch": 0.88, + "learning_rate": 3.552653097634588e-05, + "loss": 0.1895, + "step": 19970 + }, + { + "epoch": 0.88, + "learning_rate": 3.551909724803378e-05, + "loss": 0.1739, + "step": 19980 + }, + { + "epoch": 0.89, + "learning_rate": 3.551166351972168e-05, + "loss": 0.1825, + "step": 19990 + }, + { + "epoch": 0.89, + "learning_rate": 3.5504229791409585e-05, + "loss": 0.1506, + "step": 20000 + }, + { + "epoch": 0.89, + "learning_rate": 3.549679606309749e-05, + "loss": 0.1467, + "step": 20010 + }, + { + "epoch": 0.89, + "learning_rate": 3.548936233478539e-05, + "loss": 0.1529, + "step": 20020 + }, + { + "epoch": 0.89, + "learning_rate": 3.548192860647329e-05, + "loss": 0.2291, + "step": 20030 + }, + { + "epoch": 0.89, + "learning_rate": 3.5474494878161195e-05, + "loss": 0.2038, + "step": 20040 + }, + { + "epoch": 0.89, + "learning_rate": 3.54670611498491e-05, + "loss": 0.1566, + "step": 20050 + }, + { + "epoch": 0.89, + "learning_rate": 3.5459627421537e-05, + "loss": 0.1304, + "step": 20060 + }, + { + "epoch": 0.89, + "learning_rate": 3.54521936932249e-05, + "loss": 0.1848, + "step": 20070 + }, + { + "epoch": 0.89, + "learning_rate": 3.5444759964912806e-05, + "loss": 0.1929, + "step": 20080 + }, + { + "epoch": 0.89, + "learning_rate": 3.54373262366007e-05, + "loss": 0.1364, + "step": 20090 + }, + { + "epoch": 0.89, + "learning_rate": 3.542989250828861e-05, + "loss": 0.1222, + "step": 20100 + }, + { + "epoch": 0.89, + "learning_rate": 3.542245877997651e-05, + "loss": 0.1805, + "step": 20110 + }, + { + "epoch": 0.89, + "learning_rate": 3.5415025051664416e-05, + "loss": 0.1358, + "step": 20120 + }, + { + "epoch": 0.89, + "learning_rate": 3.540759132335232e-05, + "loss": 0.1688, + "step": 20130 + }, + { + "epoch": 0.89, + "learning_rate": 3.5400157595040214e-05, + "loss": 0.1731, + "step": 20140 + }, + { + "epoch": 0.89, + "learning_rate": 3.5392723866728124e-05, + "loss": 0.1693, + "step": 20150 + }, + { + "epoch": 0.89, + "learning_rate": 3.538529013841602e-05, + "loss": 0.1629, + "step": 20160 + }, + { + "epoch": 0.89, + "learning_rate": 3.537785641010393e-05, + "loss": 0.1345, + "step": 20170 + }, + { + "epoch": 0.89, + "learning_rate": 3.5370422681791825e-05, + "loss": 0.2363, + "step": 20180 + }, + { + "epoch": 0.89, + "learning_rate": 3.5362988953479734e-05, + "loss": 0.2016, + "step": 20190 + }, + { + "epoch": 0.89, + "learning_rate": 3.535555522516763e-05, + "loss": 0.1146, + "step": 20200 + }, + { + "epoch": 0.89, + "learning_rate": 3.534812149685553e-05, + "loss": 0.1736, + "step": 20210 + }, + { + "epoch": 0.9, + "learning_rate": 3.5340687768543435e-05, + "loss": 0.1581, + "step": 20220 + }, + { + "epoch": 0.9, + "learning_rate": 3.533325404023134e-05, + "loss": 0.1632, + "step": 20230 + }, + { + "epoch": 0.9, + "learning_rate": 3.532582031191925e-05, + "loss": 0.1448, + "step": 20240 + }, + { + "epoch": 0.9, + "learning_rate": 3.531838658360714e-05, + "loss": 0.2599, + "step": 20250 + }, + { + "epoch": 0.9, + "learning_rate": 3.5310952855295045e-05, + "loss": 0.2301, + "step": 20260 + }, + { + "epoch": 0.9, + "learning_rate": 3.530351912698295e-05, + "loss": 0.1855, + "step": 20270 + }, + { + "epoch": 0.9, + "learning_rate": 3.529608539867085e-05, + "loss": 0.1503, + "step": 20280 + }, + { + "epoch": 0.9, + "learning_rate": 3.528865167035875e-05, + "loss": 0.1726, + "step": 20290 + }, + { + "epoch": 0.9, + "learning_rate": 3.5281217942046656e-05, + "loss": 0.1206, + "step": 20300 + }, + { + "epoch": 0.9, + "learning_rate": 3.527378421373456e-05, + "loss": 0.1642, + "step": 20310 + }, + { + "epoch": 0.9, + "learning_rate": 3.526635048542246e-05, + "loss": 0.1518, + "step": 20320 + }, + { + "epoch": 0.9, + "learning_rate": 3.5258916757110364e-05, + "loss": 0.1623, + "step": 20330 + }, + { + "epoch": 0.9, + "learning_rate": 3.5251483028798266e-05, + "loss": 0.1276, + "step": 20340 + }, + { + "epoch": 0.9, + "learning_rate": 3.524404930048616e-05, + "loss": 0.1679, + "step": 20350 + }, + { + "epoch": 0.9, + "learning_rate": 3.523661557217407e-05, + "loss": 0.1765, + "step": 20360 + }, + { + "epoch": 0.9, + "learning_rate": 3.5229181843861974e-05, + "loss": 0.1949, + "step": 20370 + }, + { + "epoch": 0.9, + "learning_rate": 3.5221748115549876e-05, + "loss": 0.1555, + "step": 20380 + }, + { + "epoch": 0.9, + "learning_rate": 3.521431438723778e-05, + "loss": 0.1585, + "step": 20390 + }, + { + "epoch": 0.9, + "learning_rate": 3.520688065892568e-05, + "loss": 0.2012, + "step": 20400 + }, + { + "epoch": 0.9, + "learning_rate": 3.5199446930613584e-05, + "loss": 0.2392, + "step": 20410 + }, + { + "epoch": 0.9, + "learning_rate": 3.519201320230148e-05, + "loss": 0.1692, + "step": 20420 + }, + { + "epoch": 0.9, + "learning_rate": 3.518457947398939e-05, + "loss": 0.1568, + "step": 20430 + }, + { + "epoch": 0.9, + "learning_rate": 3.5177145745677285e-05, + "loss": 0.1591, + "step": 20440 + }, + { + "epoch": 0.91, + "learning_rate": 3.5169712017365194e-05, + "loss": 0.1805, + "step": 20450 + }, + { + "epoch": 0.91, + "learning_rate": 3.516227828905309e-05, + "loss": 0.1668, + "step": 20460 + }, + { + "epoch": 0.91, + "learning_rate": 3.515484456074099e-05, + "loss": 0.1703, + "step": 20470 + }, + { + "epoch": 0.91, + "learning_rate": 3.5147410832428895e-05, + "loss": 0.1182, + "step": 20480 + }, + { + "epoch": 0.91, + "learning_rate": 3.51399771041168e-05, + "loss": 0.1707, + "step": 20490 + }, + { + "epoch": 0.91, + "learning_rate": 3.513254337580471e-05, + "loss": 0.265, + "step": 20500 + }, + { + "epoch": 0.91, + "learning_rate": 3.51251096474926e-05, + "loss": 0.1651, + "step": 20510 + }, + { + "epoch": 0.91, + "learning_rate": 3.511767591918051e-05, + "loss": 0.1637, + "step": 20520 + }, + { + "epoch": 0.91, + "learning_rate": 3.511024219086841e-05, + "loss": 0.1813, + "step": 20530 + }, + { + "epoch": 0.91, + "learning_rate": 3.510280846255631e-05, + "loss": 0.1879, + "step": 20540 + }, + { + "epoch": 0.91, + "learning_rate": 3.5095374734244213e-05, + "loss": 0.1566, + "step": 20550 + }, + { + "epoch": 0.91, + "learning_rate": 3.5087941005932116e-05, + "loss": 0.1517, + "step": 20560 + }, + { + "epoch": 0.91, + "learning_rate": 3.508050727762002e-05, + "loss": 0.1857, + "step": 20570 + }, + { + "epoch": 0.91, + "learning_rate": 3.507307354930792e-05, + "loss": 0.1479, + "step": 20580 + }, + { + "epoch": 0.91, + "learning_rate": 3.5065639820995824e-05, + "loss": 0.2, + "step": 20590 + }, + { + "epoch": 0.91, + "learning_rate": 3.5058206092683726e-05, + "loss": 0.1639, + "step": 20600 + }, + { + "epoch": 0.91, + "learning_rate": 3.505077236437163e-05, + "loss": 0.1489, + "step": 20610 + }, + { + "epoch": 0.91, + "learning_rate": 3.504333863605953e-05, + "loss": 0.1937, + "step": 20620 + }, + { + "epoch": 0.91, + "learning_rate": 3.5035904907747434e-05, + "loss": 0.1581, + "step": 20630 + }, + { + "epoch": 0.91, + "learning_rate": 3.502847117943534e-05, + "loss": 0.1845, + "step": 20640 + }, + { + "epoch": 0.91, + "learning_rate": 3.502103745112324e-05, + "loss": 0.1222, + "step": 20650 + }, + { + "epoch": 0.91, + "learning_rate": 3.501360372281114e-05, + "loss": 0.1661, + "step": 20660 + }, + { + "epoch": 0.92, + "learning_rate": 3.5006169994499044e-05, + "loss": 0.1912, + "step": 20670 + }, + { + "epoch": 0.92, + "learning_rate": 3.499873626618694e-05, + "loss": 0.1708, + "step": 20680 + }, + { + "epoch": 0.92, + "learning_rate": 3.499130253787485e-05, + "loss": 0.163, + "step": 20690 + }, + { + "epoch": 0.92, + "learning_rate": 3.4983868809562745e-05, + "loss": 0.2093, + "step": 20700 + }, + { + "epoch": 0.92, + "learning_rate": 3.4976435081250655e-05, + "loss": 0.1256, + "step": 20710 + }, + { + "epoch": 0.92, + "learning_rate": 3.496900135293855e-05, + "loss": 0.1564, + "step": 20720 + }, + { + "epoch": 0.92, + "learning_rate": 3.496156762462646e-05, + "loss": 0.1093, + "step": 20730 + }, + { + "epoch": 0.92, + "learning_rate": 3.495413389631436e-05, + "loss": 0.1565, + "step": 20740 + }, + { + "epoch": 0.92, + "learning_rate": 3.494670016800226e-05, + "loss": 0.1329, + "step": 20750 + }, + { + "epoch": 0.92, + "learning_rate": 3.493926643969017e-05, + "loss": 0.1719, + "step": 20760 + }, + { + "epoch": 0.92, + "learning_rate": 3.4931832711378063e-05, + "loss": 0.2113, + "step": 20770 + }, + { + "epoch": 0.92, + "learning_rate": 3.492439898306597e-05, + "loss": 0.1556, + "step": 20780 + }, + { + "epoch": 0.92, + "learning_rate": 3.491696525475387e-05, + "loss": 0.1636, + "step": 20790 + }, + { + "epoch": 0.92, + "learning_rate": 3.490953152644177e-05, + "loss": 0.152, + "step": 20800 + }, + { + "epoch": 0.92, + "learning_rate": 3.4902097798129674e-05, + "loss": 0.1574, + "step": 20810 + }, + { + "epoch": 0.92, + "learning_rate": 3.4894664069817576e-05, + "loss": 0.1751, + "step": 20820 + }, + { + "epoch": 0.92, + "learning_rate": 3.488723034150548e-05, + "loss": 0.166, + "step": 20830 + }, + { + "epoch": 0.92, + "learning_rate": 3.487979661319338e-05, + "loss": 0.2218, + "step": 20840 + }, + { + "epoch": 0.92, + "learning_rate": 3.487236288488129e-05, + "loss": 0.1262, + "step": 20850 + }, + { + "epoch": 0.92, + "learning_rate": 3.486492915656919e-05, + "loss": 0.1297, + "step": 20860 + }, + { + "epoch": 0.92, + "learning_rate": 3.485749542825709e-05, + "loss": 0.2014, + "step": 20870 + }, + { + "epoch": 0.92, + "learning_rate": 3.485006169994499e-05, + "loss": 0.2317, + "step": 20880 + }, + { + "epoch": 0.92, + "learning_rate": 3.4842627971632894e-05, + "loss": 0.1606, + "step": 20890 + }, + { + "epoch": 0.93, + "learning_rate": 3.48351942433208e-05, + "loss": 0.1695, + "step": 20900 + }, + { + "epoch": 0.93, + "learning_rate": 3.48277605150087e-05, + "loss": 0.1627, + "step": 20910 + }, + { + "epoch": 0.93, + "learning_rate": 3.48203267866966e-05, + "loss": 0.1828, + "step": 20920 + }, + { + "epoch": 0.93, + "learning_rate": 3.4812893058384505e-05, + "loss": 0.1814, + "step": 20930 + }, + { + "epoch": 0.93, + "learning_rate": 3.480545933007241e-05, + "loss": 0.1273, + "step": 20940 + }, + { + "epoch": 0.93, + "learning_rate": 3.479802560176031e-05, + "loss": 0.0842, + "step": 20950 + }, + { + "epoch": 0.93, + "learning_rate": 3.4790591873448206e-05, + "loss": 0.1921, + "step": 20960 + }, + { + "epoch": 0.93, + "learning_rate": 3.4783158145136115e-05, + "loss": 0.2187, + "step": 20970 + }, + { + "epoch": 0.93, + "learning_rate": 3.477572441682402e-05, + "loss": 0.151, + "step": 20980 + }, + { + "epoch": 0.93, + "learning_rate": 3.476829068851192e-05, + "loss": 0.1668, + "step": 20990 + }, + { + "epoch": 0.93, + "learning_rate": 3.476085696019982e-05, + "loss": 0.1204, + "step": 21000 + }, + { + "epoch": 0.93, + "learning_rate": 3.475342323188772e-05, + "loss": 0.1847, + "step": 21010 + }, + { + "epoch": 0.93, + "learning_rate": 3.474598950357563e-05, + "loss": 0.2097, + "step": 21020 + }, + { + "epoch": 0.93, + "learning_rate": 3.4738555775263524e-05, + "loss": 0.2032, + "step": 21030 + }, + { + "epoch": 0.93, + "learning_rate": 3.473112204695143e-05, + "loss": 0.1899, + "step": 21040 + }, + { + "epoch": 0.93, + "learning_rate": 3.472368831863933e-05, + "loss": 0.1531, + "step": 21050 + }, + { + "epoch": 0.93, + "learning_rate": 3.471625459032724e-05, + "loss": 0.1445, + "step": 21060 + }, + { + "epoch": 0.93, + "learning_rate": 3.4708820862015134e-05, + "loss": 0.1599, + "step": 21070 + }, + { + "epoch": 0.93, + "learning_rate": 3.470138713370304e-05, + "loss": 0.1504, + "step": 21080 + }, + { + "epoch": 0.93, + "learning_rate": 3.469395340539094e-05, + "loss": 0.2265, + "step": 21090 + }, + { + "epoch": 0.93, + "learning_rate": 3.468651967707884e-05, + "loss": 0.1613, + "step": 21100 + }, + { + "epoch": 0.93, + "learning_rate": 3.467908594876675e-05, + "loss": 0.1736, + "step": 21110 + }, + { + "epoch": 0.94, + "learning_rate": 3.467165222045465e-05, + "loss": 0.2156, + "step": 21120 + }, + { + "epoch": 0.94, + "learning_rate": 3.466421849214255e-05, + "loss": 0.1427, + "step": 21130 + }, + { + "epoch": 0.94, + "learning_rate": 3.465678476383045e-05, + "loss": 0.1633, + "step": 21140 + }, + { + "epoch": 0.94, + "learning_rate": 3.4649351035518355e-05, + "loss": 0.1782, + "step": 21150 + }, + { + "epoch": 0.94, + "learning_rate": 3.464191730720626e-05, + "loss": 0.1281, + "step": 21160 + }, + { + "epoch": 0.94, + "learning_rate": 3.463448357889416e-05, + "loss": 0.1595, + "step": 21170 + }, + { + "epoch": 0.94, + "learning_rate": 3.462704985058206e-05, + "loss": 0.1619, + "step": 21180 + }, + { + "epoch": 0.94, + "learning_rate": 3.4619616122269965e-05, + "loss": 0.1473, + "step": 21190 + }, + { + "epoch": 0.94, + "learning_rate": 3.461218239395787e-05, + "loss": 0.1755, + "step": 21200 + }, + { + "epoch": 0.94, + "learning_rate": 3.460474866564577e-05, + "loss": 0.1827, + "step": 21210 + }, + { + "epoch": 0.94, + "learning_rate": 3.459731493733367e-05, + "loss": 0.1749, + "step": 21220 + }, + { + "epoch": 0.94, + "learning_rate": 3.4589881209021575e-05, + "loss": 0.1945, + "step": 21230 + }, + { + "epoch": 0.94, + "learning_rate": 3.458244748070948e-05, + "loss": 0.1492, + "step": 21240 + }, + { + "epoch": 0.94, + "learning_rate": 3.457501375239738e-05, + "loss": 0.1474, + "step": 21250 + }, + { + "epoch": 0.94, + "learning_rate": 3.456758002408528e-05, + "loss": 0.1614, + "step": 21260 + }, + { + "epoch": 0.94, + "learning_rate": 3.4560146295773186e-05, + "loss": 0.2015, + "step": 21270 + }, + { + "epoch": 0.94, + "learning_rate": 3.455271256746109e-05, + "loss": 0.1238, + "step": 21280 + }, + { + "epoch": 0.94, + "learning_rate": 3.4545278839148984e-05, + "loss": 0.1438, + "step": 21290 + }, + { + "epoch": 0.94, + "learning_rate": 3.4537845110836893e-05, + "loss": 0.2032, + "step": 21300 + }, + { + "epoch": 0.94, + "learning_rate": 3.453041138252479e-05, + "loss": 0.1242, + "step": 21310 + }, + { + "epoch": 0.94, + "learning_rate": 3.45229776542127e-05, + "loss": 0.171, + "step": 21320 + }, + { + "epoch": 0.94, + "learning_rate": 3.4515543925900594e-05, + "loss": 0.1792, + "step": 21330 + }, + { + "epoch": 0.94, + "learning_rate": 3.45081101975885e-05, + "loss": 0.1499, + "step": 21340 + }, + { + "epoch": 0.95, + "learning_rate": 3.4500676469276406e-05, + "loss": 0.1802, + "step": 21350 + }, + { + "epoch": 0.95, + "learning_rate": 3.44932427409643e-05, + "loss": 0.1895, + "step": 21360 + }, + { + "epoch": 0.95, + "learning_rate": 3.448580901265221e-05, + "loss": 0.1448, + "step": 21370 + }, + { + "epoch": 0.95, + "learning_rate": 3.447837528434011e-05, + "loss": 0.1985, + "step": 21380 + }, + { + "epoch": 0.95, + "learning_rate": 3.4470941556028017e-05, + "loss": 0.1352, + "step": 21390 + }, + { + "epoch": 0.95, + "learning_rate": 3.446350782771591e-05, + "loss": 0.1313, + "step": 21400 + }, + { + "epoch": 0.95, + "learning_rate": 3.4456074099403815e-05, + "loss": 0.1684, + "step": 21410 + }, + { + "epoch": 0.95, + "learning_rate": 3.444864037109172e-05, + "loss": 0.1457, + "step": 21420 + }, + { + "epoch": 0.95, + "learning_rate": 3.444120664277962e-05, + "loss": 0.1242, + "step": 21430 + }, + { + "epoch": 0.95, + "learning_rate": 3.443377291446752e-05, + "loss": 0.1113, + "step": 21440 + }, + { + "epoch": 0.95, + "learning_rate": 3.4426339186155425e-05, + "loss": 0.1525, + "step": 21450 + }, + { + "epoch": 0.95, + "learning_rate": 3.441890545784333e-05, + "loss": 0.1954, + "step": 21460 + }, + { + "epoch": 0.95, + "learning_rate": 3.441147172953123e-05, + "loss": 0.184, + "step": 21470 + }, + { + "epoch": 0.95, + "learning_rate": 3.440403800121913e-05, + "loss": 0.1395, + "step": 21480 + }, + { + "epoch": 0.95, + "learning_rate": 3.4396604272907036e-05, + "loss": 0.2079, + "step": 21490 + }, + { + "epoch": 0.95, + "learning_rate": 3.438917054459494e-05, + "loss": 0.1861, + "step": 21500 + }, + { + "epoch": 0.95, + "learning_rate": 3.438173681628284e-05, + "loss": 0.2205, + "step": 21510 + }, + { + "epoch": 0.95, + "learning_rate": 3.437430308797074e-05, + "loss": 0.116, + "step": 21520 + }, + { + "epoch": 0.95, + "learning_rate": 3.4366869359658646e-05, + "loss": 0.1297, + "step": 21530 + }, + { + "epoch": 0.95, + "learning_rate": 3.435943563134655e-05, + "loss": 0.1878, + "step": 21540 + }, + { + "epoch": 0.95, + "learning_rate": 3.4352001903034444e-05, + "loss": 0.1394, + "step": 21550 + }, + { + "epoch": 0.95, + "learning_rate": 3.4344568174722354e-05, + "loss": 0.1725, + "step": 21560 + }, + { + "epoch": 0.95, + "learning_rate": 3.433713444641025e-05, + "loss": 0.1385, + "step": 21570 + }, + { + "epoch": 0.96, + "learning_rate": 3.432970071809816e-05, + "loss": 0.2084, + "step": 21580 + }, + { + "epoch": 0.96, + "learning_rate": 3.432226698978606e-05, + "loss": 0.1816, + "step": 21590 + }, + { + "epoch": 0.96, + "learning_rate": 3.4314833261473964e-05, + "loss": 0.1887, + "step": 21600 + }, + { + "epoch": 0.96, + "learning_rate": 3.4307399533161867e-05, + "loss": 0.1795, + "step": 21610 + }, + { + "epoch": 0.96, + "learning_rate": 3.429996580484976e-05, + "loss": 0.1507, + "step": 21620 + }, + { + "epoch": 0.96, + "learning_rate": 3.429253207653767e-05, + "loss": 0.1552, + "step": 21630 + }, + { + "epoch": 0.96, + "learning_rate": 3.428509834822557e-05, + "loss": 0.1683, + "step": 21640 + }, + { + "epoch": 0.96, + "learning_rate": 3.427766461991348e-05, + "loss": 0.1727, + "step": 21650 + }, + { + "epoch": 0.96, + "learning_rate": 3.427023089160137e-05, + "loss": 0.1811, + "step": 21660 + }, + { + "epoch": 0.96, + "learning_rate": 3.4262797163289275e-05, + "loss": 0.1742, + "step": 21670 + }, + { + "epoch": 0.96, + "learning_rate": 3.425536343497718e-05, + "loss": 0.1658, + "step": 21680 + }, + { + "epoch": 0.96, + "learning_rate": 3.424792970666508e-05, + "loss": 0.1591, + "step": 21690 + }, + { + "epoch": 0.96, + "learning_rate": 3.424049597835298e-05, + "loss": 0.2078, + "step": 21700 + }, + { + "epoch": 0.96, + "learning_rate": 3.4233062250040886e-05, + "loss": 0.1816, + "step": 21710 + }, + { + "epoch": 0.96, + "learning_rate": 3.4225628521728795e-05, + "loss": 0.1703, + "step": 21720 + }, + { + "epoch": 0.96, + "learning_rate": 3.421819479341669e-05, + "loss": 0.1697, + "step": 21730 + }, + { + "epoch": 0.96, + "learning_rate": 3.421076106510459e-05, + "loss": 0.1244, + "step": 21740 + }, + { + "epoch": 0.96, + "learning_rate": 3.4203327336792496e-05, + "loss": 0.1399, + "step": 21750 + }, + { + "epoch": 0.96, + "learning_rate": 3.41958936084804e-05, + "loss": 0.1678, + "step": 21760 + }, + { + "epoch": 0.96, + "learning_rate": 3.41884598801683e-05, + "loss": 0.1592, + "step": 21770 + }, + { + "epoch": 0.96, + "learning_rate": 3.4181026151856204e-05, + "loss": 0.1717, + "step": 21780 + }, + { + "epoch": 0.96, + "learning_rate": 3.4173592423544106e-05, + "loss": 0.2147, + "step": 21790 + }, + { + "epoch": 0.97, + "learning_rate": 3.416615869523201e-05, + "loss": 0.1448, + "step": 21800 + }, + { + "epoch": 0.97, + "learning_rate": 3.415872496691991e-05, + "loss": 0.1589, + "step": 21810 + }, + { + "epoch": 0.97, + "learning_rate": 3.4151291238607814e-05, + "loss": 0.1597, + "step": 21820 + }, + { + "epoch": 0.97, + "learning_rate": 3.4143857510295717e-05, + "loss": 0.211, + "step": 21830 + }, + { + "epoch": 0.97, + "learning_rate": 3.413642378198362e-05, + "loss": 0.1704, + "step": 21840 + }, + { + "epoch": 0.97, + "learning_rate": 3.412899005367152e-05, + "loss": 0.1576, + "step": 21850 + }, + { + "epoch": 0.97, + "learning_rate": 3.4121556325359424e-05, + "loss": 0.1723, + "step": 21860 + }, + { + "epoch": 0.97, + "learning_rate": 3.411412259704733e-05, + "loss": 0.2127, + "step": 21870 + }, + { + "epoch": 0.97, + "learning_rate": 3.410668886873522e-05, + "loss": 0.1752, + "step": 21880 + }, + { + "epoch": 0.97, + "learning_rate": 3.409925514042313e-05, + "loss": 0.1701, + "step": 21890 + }, + { + "epoch": 0.97, + "learning_rate": 3.409182141211103e-05, + "loss": 0.1552, + "step": 21900 + }, + { + "epoch": 0.97, + "learning_rate": 3.408438768379894e-05, + "loss": 0.2003, + "step": 21910 + }, + { + "epoch": 0.97, + "learning_rate": 3.407695395548683e-05, + "loss": 0.2109, + "step": 21920 + }, + { + "epoch": 0.97, + "learning_rate": 3.406952022717474e-05, + "loss": 0.1614, + "step": 21930 + }, + { + "epoch": 0.97, + "learning_rate": 3.406208649886264e-05, + "loss": 0.1553, + "step": 21940 + }, + { + "epoch": 0.97, + "learning_rate": 3.405465277055054e-05, + "loss": 0.168, + "step": 21950 + }, + { + "epoch": 0.97, + "learning_rate": 3.404721904223845e-05, + "loss": 0.1581, + "step": 21960 + }, + { + "epoch": 0.97, + "learning_rate": 3.4039785313926346e-05, + "loss": 0.1623, + "step": 21970 + }, + { + "epoch": 0.97, + "learning_rate": 3.4032351585614255e-05, + "loss": 0.1439, + "step": 21980 + }, + { + "epoch": 0.97, + "learning_rate": 3.402491785730215e-05, + "loss": 0.1899, + "step": 21990 + }, + { + "epoch": 0.97, + "learning_rate": 3.4017484128990054e-05, + "loss": 0.2052, + "step": 22000 + }, + { + "epoch": 0.97, + "learning_rate": 3.4010050400677956e-05, + "loss": 0.2351, + "step": 22010 + }, + { + "epoch": 0.97, + "learning_rate": 3.400261667236586e-05, + "loss": 0.1407, + "step": 22020 + }, + { + "epoch": 0.98, + "learning_rate": 3.399518294405376e-05, + "loss": 0.115, + "step": 22030 + }, + { + "epoch": 0.98, + "learning_rate": 3.3987749215741664e-05, + "loss": 0.2048, + "step": 22040 + }, + { + "epoch": 0.98, + "learning_rate": 3.3980315487429567e-05, + "loss": 0.1572, + "step": 22050 + }, + { + "epoch": 0.98, + "learning_rate": 3.397288175911747e-05, + "loss": 0.1256, + "step": 22060 + }, + { + "epoch": 0.98, + "learning_rate": 3.396544803080537e-05, + "loss": 0.2119, + "step": 22070 + }, + { + "epoch": 0.98, + "learning_rate": 3.3958014302493274e-05, + "loss": 0.1526, + "step": 22080 + }, + { + "epoch": 0.98, + "learning_rate": 3.395058057418118e-05, + "loss": 0.2002, + "step": 22090 + }, + { + "epoch": 0.98, + "learning_rate": 3.394314684586908e-05, + "loss": 0.1494, + "step": 22100 + }, + { + "epoch": 0.98, + "learning_rate": 3.393571311755698e-05, + "loss": 0.1906, + "step": 22110 + }, + { + "epoch": 0.98, + "learning_rate": 3.3928279389244885e-05, + "loss": 0.2292, + "step": 22120 + }, + { + "epoch": 0.98, + "learning_rate": 3.392084566093279e-05, + "loss": 0.178, + "step": 22130 + }, + { + "epoch": 0.98, + "learning_rate": 3.391341193262069e-05, + "loss": 0.1335, + "step": 22140 + }, + { + "epoch": 0.98, + "learning_rate": 3.390597820430859e-05, + "loss": 0.1546, + "step": 22150 + }, + { + "epoch": 0.98, + "learning_rate": 3.389854447599649e-05, + "loss": 0.2016, + "step": 22160 + }, + { + "epoch": 0.98, + "learning_rate": 3.38911107476844e-05, + "loss": 0.1536, + "step": 22170 + }, + { + "epoch": 0.98, + "learning_rate": 3.388367701937229e-05, + "loss": 0.1137, + "step": 22180 + }, + { + "epoch": 0.98, + "learning_rate": 3.38762432910602e-05, + "loss": 0.1889, + "step": 22190 + }, + { + "epoch": 0.98, + "learning_rate": 3.3868809562748105e-05, + "loss": 0.2044, + "step": 22200 + }, + { + "epoch": 0.98, + "learning_rate": 3.3861375834436e-05, + "loss": 0.2402, + "step": 22210 + }, + { + "epoch": 0.98, + "learning_rate": 3.385394210612391e-05, + "loss": 0.1649, + "step": 22220 + }, + { + "epoch": 0.98, + "learning_rate": 3.3846508377811806e-05, + "loss": 0.209, + "step": 22230 + }, + { + "epoch": 0.98, + "learning_rate": 3.3839074649499716e-05, + "loss": 0.2256, + "step": 22240 + }, + { + "epoch": 0.99, + "learning_rate": 3.383164092118761e-05, + "loss": 0.1695, + "step": 22250 + }, + { + "epoch": 0.99, + "learning_rate": 3.382420719287552e-05, + "loss": 0.1262, + "step": 22260 + }, + { + "epoch": 0.99, + "learning_rate": 3.3816773464563416e-05, + "loss": 0.1726, + "step": 22270 + }, + { + "epoch": 0.99, + "learning_rate": 3.380933973625132e-05, + "loss": 0.1169, + "step": 22280 + }, + { + "epoch": 0.99, + "learning_rate": 3.380190600793922e-05, + "loss": 0.1707, + "step": 22290 + }, + { + "epoch": 0.99, + "learning_rate": 3.3794472279627124e-05, + "loss": 0.1996, + "step": 22300 + }, + { + "epoch": 0.99, + "learning_rate": 3.378703855131503e-05, + "loss": 0.2232, + "step": 22310 + }, + { + "epoch": 0.99, + "learning_rate": 3.377960482300293e-05, + "loss": 0.155, + "step": 22320 + }, + { + "epoch": 0.99, + "learning_rate": 3.377217109469084e-05, + "loss": 0.2103, + "step": 22330 + }, + { + "epoch": 0.99, + "learning_rate": 3.3764737366378735e-05, + "loss": 0.2145, + "step": 22340 + }, + { + "epoch": 0.99, + "learning_rate": 3.375730363806664e-05, + "loss": 0.1908, + "step": 22350 + }, + { + "epoch": 0.99, + "learning_rate": 3.374986990975454e-05, + "loss": 0.1762, + "step": 22360 + }, + { + "epoch": 0.99, + "learning_rate": 3.374243618144244e-05, + "loss": 0.1318, + "step": 22370 + }, + { + "epoch": 0.99, + "learning_rate": 3.3735002453130345e-05, + "loss": 0.1456, + "step": 22380 + }, + { + "epoch": 0.99, + "learning_rate": 3.372756872481825e-05, + "loss": 0.1276, + "step": 22390 + }, + { + "epoch": 0.99, + "learning_rate": 3.372013499650615e-05, + "loss": 0.1678, + "step": 22400 + }, + { + "epoch": 0.99, + "learning_rate": 3.371270126819405e-05, + "loss": 0.1953, + "step": 22410 + }, + { + "epoch": 0.99, + "learning_rate": 3.370526753988195e-05, + "loss": 0.1482, + "step": 22420 + }, + { + "epoch": 0.99, + "learning_rate": 3.369783381156986e-05, + "loss": 0.1558, + "step": 22430 + }, + { + "epoch": 0.99, + "learning_rate": 3.3690400083257754e-05, + "loss": 0.1586, + "step": 22440 + }, + { + "epoch": 0.99, + "learning_rate": 3.368296635494566e-05, + "loss": 0.2083, + "step": 22450 + }, + { + "epoch": 0.99, + "learning_rate": 3.3675532626633566e-05, + "loss": 0.1728, + "step": 22460 + }, + { + "epoch": 0.99, + "learning_rate": 3.366809889832147e-05, + "loss": 0.1348, + "step": 22470 + }, + { + "epoch": 1.0, + "learning_rate": 3.366066517000937e-05, + "loss": 0.1237, + "step": 22480 + }, + { + "epoch": 1.0, + "learning_rate": 3.3653231441697266e-05, + "loss": 0.1692, + "step": 22490 + }, + { + "epoch": 1.0, + "learning_rate": 3.3645797713385176e-05, + "loss": 0.236, + "step": 22500 + }, + { + "epoch": 1.0, + "learning_rate": 3.363836398507307e-05, + "loss": 0.1605, + "step": 22510 + }, + { + "epoch": 1.0, + "learning_rate": 3.363093025676098e-05, + "loss": 0.1674, + "step": 22520 + }, + { + "epoch": 1.0, + "learning_rate": 3.362349652844888e-05, + "loss": 0.1877, + "step": 22530 + }, + { + "epoch": 1.0, + "learning_rate": 3.3616062800136786e-05, + "loss": 0.1729, + "step": 22540 + }, + { + "epoch": 1.0, + "learning_rate": 3.360862907182468e-05, + "loss": 0.1376, + "step": 22550 + }, + { + "epoch": 1.0, + "learning_rate": 3.3601195343512585e-05, + "loss": 0.1879, + "step": 22560 + }, + { + "epoch": 1.0, + "learning_rate": 3.3593761615200494e-05, + "loss": 0.1743, + "step": 22570 + }, + { + "epoch": 1.0, + "learning_rate": 3.358632788688839e-05, + "loss": 0.1603, + "step": 22580 + }, + { + "epoch": 1.0, + "learning_rate": 3.35788941585763e-05, + "loss": 0.1699, + "step": 22590 + }, + { + "epoch": 1.0, + "learning_rate": 3.3571460430264195e-05, + "loss": 0.1376, + "step": 22600 + }, + { + "epoch": 1.0, + "learning_rate": 3.35640267019521e-05, + "loss": 0.1383, + "step": 22610 + }, + { + "epoch": 1.0, + "learning_rate": 3.355659297364e-05, + "loss": 0.1432, + "step": 22620 + }, + { + "epoch": 1.0, + "learning_rate": 3.35491592453279e-05, + "loss": 0.1425, + "step": 22630 + }, + { + "epoch": 1.0, + "learning_rate": 3.3541725517015805e-05, + "loss": 0.2085, + "step": 22640 + }, + { + "epoch": 1.0, + "learning_rate": 3.353429178870371e-05, + "loss": 0.128, + "step": 22650 + }, + { + "epoch": 1.0, + "learning_rate": 3.352685806039161e-05, + "loss": 0.1358, + "step": 22660 + }, + { + "epoch": 1.0, + "learning_rate": 3.351942433207951e-05, + "loss": 0.1251, + "step": 22670 + }, + { + "epoch": 1.0, + "learning_rate": 3.3511990603767415e-05, + "loss": 0.1374, + "step": 22680 + }, + { + "epoch": 1.0, + "learning_rate": 3.350455687545532e-05, + "loss": 0.1689, + "step": 22690 + }, + { + "epoch": 1.01, + "learning_rate": 3.349712314714322e-05, + "loss": 0.1001, + "step": 22700 + }, + { + "epoch": 1.01, + "learning_rate": 3.348968941883112e-05, + "loss": 0.1581, + "step": 22710 + }, + { + "epoch": 1.01, + "learning_rate": 3.3482255690519026e-05, + "loss": 0.1598, + "step": 22720 + }, + { + "epoch": 1.01, + "learning_rate": 3.347482196220693e-05, + "loss": 0.1255, + "step": 22730 + }, + { + "epoch": 1.01, + "learning_rate": 3.346738823389483e-05, + "loss": 0.1412, + "step": 22740 + }, + { + "epoch": 1.01, + "learning_rate": 3.3459954505582734e-05, + "loss": 0.1709, + "step": 22750 + }, + { + "epoch": 1.01, + "learning_rate": 3.3452520777270636e-05, + "loss": 0.1701, + "step": 22760 + }, + { + "epoch": 1.01, + "learning_rate": 3.344508704895853e-05, + "loss": 0.1287, + "step": 22770 + }, + { + "epoch": 1.01, + "learning_rate": 3.343765332064644e-05, + "loss": 0.1491, + "step": 22780 + }, + { + "epoch": 1.01, + "learning_rate": 3.343021959233434e-05, + "loss": 0.1237, + "step": 22790 + }, + { + "epoch": 1.01, + "learning_rate": 3.3422785864022246e-05, + "loss": 0.1617, + "step": 22800 + }, + { + "epoch": 1.01, + "learning_rate": 3.341535213571014e-05, + "loss": 0.1074, + "step": 22810 + }, + { + "epoch": 1.01, + "learning_rate": 3.3407918407398045e-05, + "loss": 0.203, + "step": 22820 + }, + { + "epoch": 1.01, + "learning_rate": 3.3400484679085954e-05, + "loss": 0.1214, + "step": 22830 + }, + { + "epoch": 1.01, + "learning_rate": 3.339305095077385e-05, + "loss": 0.1153, + "step": 22840 + }, + { + "epoch": 1.01, + "learning_rate": 3.338561722246176e-05, + "loss": 0.1511, + "step": 22850 + }, + { + "epoch": 1.01, + "learning_rate": 3.3378183494149655e-05, + "loss": 0.1716, + "step": 22860 + }, + { + "epoch": 1.01, + "learning_rate": 3.3370749765837564e-05, + "loss": 0.1396, + "step": 22870 + }, + { + "epoch": 1.01, + "learning_rate": 3.336331603752546e-05, + "loss": 0.1573, + "step": 22880 + }, + { + "epoch": 1.01, + "learning_rate": 3.335588230921336e-05, + "loss": 0.1733, + "step": 22890 + }, + { + "epoch": 1.01, + "learning_rate": 3.3348448580901265e-05, + "loss": 0.1383, + "step": 22900 + }, + { + "epoch": 1.01, + "learning_rate": 3.334101485258917e-05, + "loss": 0.1917, + "step": 22910 + }, + { + "epoch": 1.01, + "learning_rate": 3.333358112427707e-05, + "loss": 0.1511, + "step": 22920 + }, + { + "epoch": 1.02, + "learning_rate": 3.332614739596497e-05, + "loss": 0.1627, + "step": 22930 + }, + { + "epoch": 1.02, + "learning_rate": 3.3318713667652876e-05, + "loss": 0.2148, + "step": 22940 + }, + { + "epoch": 1.02, + "learning_rate": 3.331127993934078e-05, + "loss": 0.1729, + "step": 22950 + }, + { + "epoch": 1.02, + "learning_rate": 3.330384621102868e-05, + "loss": 0.1765, + "step": 22960 + }, + { + "epoch": 1.02, + "learning_rate": 3.3296412482716584e-05, + "loss": 0.1154, + "step": 22970 + }, + { + "epoch": 1.02, + "learning_rate": 3.3288978754404486e-05, + "loss": 0.1176, + "step": 22980 + }, + { + "epoch": 1.02, + "learning_rate": 3.328154502609239e-05, + "loss": 0.1159, + "step": 22990 + }, + { + "epoch": 1.02, + "learning_rate": 3.327411129778029e-05, + "loss": 0.1193, + "step": 23000 + }, + { + "epoch": 1.02, + "learning_rate": 3.3266677569468194e-05, + "loss": 0.1291, + "step": 23010 + }, + { + "epoch": 1.02, + "learning_rate": 3.3259243841156096e-05, + "loss": 0.2065, + "step": 23020 + }, + { + "epoch": 1.02, + "learning_rate": 3.325181011284399e-05, + "loss": 0.1881, + "step": 23030 + }, + { + "epoch": 1.02, + "learning_rate": 3.32443763845319e-05, + "loss": 0.1307, + "step": 23040 + }, + { + "epoch": 1.02, + "learning_rate": 3.32369426562198e-05, + "loss": 0.1743, + "step": 23050 + }, + { + "epoch": 1.02, + "learning_rate": 3.322950892790771e-05, + "loss": 0.1278, + "step": 23060 + }, + { + "epoch": 1.02, + "learning_rate": 3.322207519959561e-05, + "loss": 0.1428, + "step": 23070 + }, + { + "epoch": 1.02, + "learning_rate": 3.321464147128351e-05, + "loss": 0.1014, + "step": 23080 + }, + { + "epoch": 1.02, + "learning_rate": 3.3207207742971414e-05, + "loss": 0.1822, + "step": 23090 + }, + { + "epoch": 1.02, + "learning_rate": 3.319977401465931e-05, + "loss": 0.148, + "step": 23100 + }, + { + "epoch": 1.02, + "learning_rate": 3.319234028634722e-05, + "loss": 0.1551, + "step": 23110 + }, + { + "epoch": 1.02, + "learning_rate": 3.3184906558035115e-05, + "loss": 0.1173, + "step": 23120 + }, + { + "epoch": 1.02, + "learning_rate": 3.3177472829723025e-05, + "loss": 0.1167, + "step": 23130 + }, + { + "epoch": 1.02, + "learning_rate": 3.317003910141092e-05, + "loss": 0.1758, + "step": 23140 + }, + { + "epoch": 1.02, + "learning_rate": 3.316260537309882e-05, + "loss": 0.1343, + "step": 23150 + }, + { + "epoch": 1.03, + "learning_rate": 3.3155171644786726e-05, + "loss": 0.1508, + "step": 23160 + }, + { + "epoch": 1.03, + "learning_rate": 3.314773791647463e-05, + "loss": 0.1394, + "step": 23170 + }, + { + "epoch": 1.03, + "learning_rate": 3.314030418816254e-05, + "loss": 0.1474, + "step": 23180 + }, + { + "epoch": 1.03, + "learning_rate": 3.3132870459850433e-05, + "loss": 0.1296, + "step": 23190 + }, + { + "epoch": 1.03, + "learning_rate": 3.312543673153834e-05, + "loss": 0.1539, + "step": 23200 + }, + { + "epoch": 1.03, + "learning_rate": 3.311800300322624e-05, + "loss": 0.132, + "step": 23210 + }, + { + "epoch": 1.03, + "learning_rate": 3.311056927491414e-05, + "loss": 0.1272, + "step": 23220 + }, + { + "epoch": 1.03, + "learning_rate": 3.3103135546602044e-05, + "loss": 0.1647, + "step": 23230 + }, + { + "epoch": 1.03, + "learning_rate": 3.3095701818289946e-05, + "loss": 0.2238, + "step": 23240 + }, + { + "epoch": 1.03, + "learning_rate": 3.308826808997785e-05, + "loss": 0.1651, + "step": 23250 + }, + { + "epoch": 1.03, + "learning_rate": 3.308083436166575e-05, + "loss": 0.1112, + "step": 23260 + }, + { + "epoch": 1.03, + "learning_rate": 3.3073400633353654e-05, + "loss": 0.1322, + "step": 23270 + }, + { + "epoch": 1.03, + "learning_rate": 3.306596690504156e-05, + "loss": 0.1587, + "step": 23280 + }, + { + "epoch": 1.03, + "learning_rate": 3.305853317672946e-05, + "loss": 0.2148, + "step": 23290 + }, + { + "epoch": 1.03, + "learning_rate": 3.305109944841736e-05, + "loss": 0.1324, + "step": 23300 + }, + { + "epoch": 1.03, + "learning_rate": 3.3043665720105264e-05, + "loss": 0.1616, + "step": 23310 + }, + { + "epoch": 1.03, + "learning_rate": 3.303623199179317e-05, + "loss": 0.1557, + "step": 23320 + }, + { + "epoch": 1.03, + "learning_rate": 3.302879826348107e-05, + "loss": 0.1168, + "step": 23330 + }, + { + "epoch": 1.03, + "learning_rate": 3.302136453516897e-05, + "loss": 0.1474, + "step": 23340 + }, + { + "epoch": 1.03, + "learning_rate": 3.3013930806856875e-05, + "loss": 0.1347, + "step": 23350 + }, + { + "epoch": 1.03, + "learning_rate": 3.300649707854477e-05, + "loss": 0.1286, + "step": 23360 + }, + { + "epoch": 1.03, + "learning_rate": 3.299906335023268e-05, + "loss": 0.1219, + "step": 23370 + }, + { + "epoch": 1.04, + "learning_rate": 3.2991629621920576e-05, + "loss": 0.1566, + "step": 23380 + }, + { + "epoch": 1.04, + "learning_rate": 3.2984195893608485e-05, + "loss": 0.1498, + "step": 23390 + }, + { + "epoch": 1.04, + "learning_rate": 3.297676216529638e-05, + "loss": 0.1543, + "step": 23400 + }, + { + "epoch": 1.04, + "learning_rate": 3.296932843698429e-05, + "loss": 0.1233, + "step": 23410 + }, + { + "epoch": 1.04, + "learning_rate": 3.2961894708672186e-05, + "loss": 0.169, + "step": 23420 + }, + { + "epoch": 1.04, + "learning_rate": 3.295446098036009e-05, + "loss": 0.1652, + "step": 23430 + }, + { + "epoch": 1.04, + "learning_rate": 3.2947027252048e-05, + "loss": 0.1346, + "step": 23440 + }, + { + "epoch": 1.04, + "learning_rate": 3.2939593523735894e-05, + "loss": 0.1613, + "step": 23450 + }, + { + "epoch": 1.04, + "learning_rate": 3.29321597954238e-05, + "loss": 0.1839, + "step": 23460 + }, + { + "epoch": 1.04, + "learning_rate": 3.29247260671117e-05, + "loss": 0.1417, + "step": 23470 + }, + { + "epoch": 1.04, + "learning_rate": 3.29172923387996e-05, + "loss": 0.1361, + "step": 23480 + }, + { + "epoch": 1.04, + "learning_rate": 3.2909858610487504e-05, + "loss": 0.1399, + "step": 23490 + }, + { + "epoch": 1.04, + "learning_rate": 3.290242488217541e-05, + "loss": 0.1331, + "step": 23500 + }, + { + "epoch": 1.04, + "learning_rate": 3.289499115386331e-05, + "loss": 0.1956, + "step": 23510 + }, + { + "epoch": 1.04, + "learning_rate": 3.288755742555121e-05, + "loss": 0.1762, + "step": 23520 + }, + { + "epoch": 1.04, + "learning_rate": 3.2880123697239114e-05, + "loss": 0.1551, + "step": 23530 + }, + { + "epoch": 1.04, + "learning_rate": 3.287268996892702e-05, + "loss": 0.1248, + "step": 23540 + }, + { + "epoch": 1.04, + "learning_rate": 3.286525624061492e-05, + "loss": 0.1196, + "step": 23550 + }, + { + "epoch": 1.04, + "learning_rate": 3.285782251230282e-05, + "loss": 0.1208, + "step": 23560 + }, + { + "epoch": 1.04, + "learning_rate": 3.2850388783990725e-05, + "loss": 0.1304, + "step": 23570 + }, + { + "epoch": 1.04, + "learning_rate": 3.284295505567863e-05, + "loss": 0.104, + "step": 23580 + }, + { + "epoch": 1.04, + "learning_rate": 3.283552132736653e-05, + "loss": 0.1548, + "step": 23590 + }, + { + "epoch": 1.04, + "learning_rate": 3.282808759905443e-05, + "loss": 0.1361, + "step": 23600 + }, + { + "epoch": 1.05, + "learning_rate": 3.2820653870742335e-05, + "loss": 0.1475, + "step": 23610 + }, + { + "epoch": 1.05, + "learning_rate": 3.281322014243024e-05, + "loss": 0.2199, + "step": 23620 + }, + { + "epoch": 1.05, + "learning_rate": 3.280578641411814e-05, + "loss": 0.1698, + "step": 23630 + }, + { + "epoch": 1.05, + "learning_rate": 3.2798352685806036e-05, + "loss": 0.1853, + "step": 23640 + }, + { + "epoch": 1.05, + "learning_rate": 3.2790918957493945e-05, + "loss": 0.1173, + "step": 23650 + }, + { + "epoch": 1.05, + "learning_rate": 3.278348522918184e-05, + "loss": 0.1535, + "step": 23660 + }, + { + "epoch": 1.05, + "learning_rate": 3.277605150086975e-05, + "loss": 0.1118, + "step": 23670 + }, + { + "epoch": 1.05, + "learning_rate": 3.276861777255765e-05, + "loss": 0.172, + "step": 23680 + }, + { + "epoch": 1.05, + "learning_rate": 3.276118404424555e-05, + "loss": 0.1453, + "step": 23690 + }, + { + "epoch": 1.05, + "learning_rate": 3.275375031593346e-05, + "loss": 0.1752, + "step": 23700 + }, + { + "epoch": 1.05, + "learning_rate": 3.2746316587621354e-05, + "loss": 0.1746, + "step": 23710 + }, + { + "epoch": 1.05, + "learning_rate": 3.2738882859309263e-05, + "loss": 0.1836, + "step": 23720 + }, + { + "epoch": 1.05, + "learning_rate": 3.273144913099716e-05, + "loss": 0.1501, + "step": 23730 + }, + { + "epoch": 1.05, + "learning_rate": 3.272401540268507e-05, + "loss": 0.1986, + "step": 23740 + }, + { + "epoch": 1.05, + "learning_rate": 3.2716581674372964e-05, + "loss": 0.0916, + "step": 23750 + }, + { + "epoch": 1.05, + "learning_rate": 3.270914794606087e-05, + "loss": 0.1378, + "step": 23760 + }, + { + "epoch": 1.05, + "learning_rate": 3.270171421774877e-05, + "loss": 0.1449, + "step": 23770 + }, + { + "epoch": 1.05, + "learning_rate": 3.269428048943667e-05, + "loss": 0.161, + "step": 23780 + }, + { + "epoch": 1.05, + "learning_rate": 3.2686846761124575e-05, + "loss": 0.1384, + "step": 23790 + }, + { + "epoch": 1.05, + "learning_rate": 3.267941303281248e-05, + "loss": 0.1655, + "step": 23800 + }, + { + "epoch": 1.05, + "learning_rate": 3.267197930450038e-05, + "loss": 0.1491, + "step": 23810 + }, + { + "epoch": 1.05, + "learning_rate": 3.266454557618828e-05, + "loss": 0.1399, + "step": 23820 + }, + { + "epoch": 1.06, + "learning_rate": 3.2657111847876185e-05, + "loss": 0.1925, + "step": 23830 + }, + { + "epoch": 1.06, + "learning_rate": 3.264967811956409e-05, + "loss": 0.1632, + "step": 23840 + }, + { + "epoch": 1.06, + "learning_rate": 3.264224439125199e-05, + "loss": 0.1427, + "step": 23850 + }, + { + "epoch": 1.06, + "learning_rate": 3.263481066293989e-05, + "loss": 0.1184, + "step": 23860 + }, + { + "epoch": 1.06, + "learning_rate": 3.2627376934627795e-05, + "loss": 0.1602, + "step": 23870 + }, + { + "epoch": 1.06, + "learning_rate": 3.26199432063157e-05, + "loss": 0.1054, + "step": 23880 + }, + { + "epoch": 1.06, + "learning_rate": 3.26125094780036e-05, + "loss": 0.147, + "step": 23890 + }, + { + "epoch": 1.06, + "learning_rate": 3.2605075749691496e-05, + "loss": 0.1914, + "step": 23900 + }, + { + "epoch": 1.06, + "learning_rate": 3.2597642021379406e-05, + "loss": 0.1365, + "step": 23910 + }, + { + "epoch": 1.06, + "learning_rate": 3.259020829306731e-05, + "loss": 0.1269, + "step": 23920 + }, + { + "epoch": 1.06, + "learning_rate": 3.258277456475521e-05, + "loss": 0.1319, + "step": 23930 + }, + { + "epoch": 1.06, + "learning_rate": 3.257534083644311e-05, + "loss": 0.1844, + "step": 23940 + }, + { + "epoch": 1.06, + "learning_rate": 3.2567907108131016e-05, + "loss": 0.1394, + "step": 23950 + }, + { + "epoch": 1.06, + "learning_rate": 3.256047337981892e-05, + "loss": 0.169, + "step": 23960 + }, + { + "epoch": 1.06, + "learning_rate": 3.2553039651506814e-05, + "loss": 0.2282, + "step": 23970 + }, + { + "epoch": 1.06, + "learning_rate": 3.2545605923194724e-05, + "loss": 0.1273, + "step": 23980 + }, + { + "epoch": 1.06, + "learning_rate": 3.253817219488262e-05, + "loss": 0.133, + "step": 23990 + }, + { + "epoch": 1.06, + "learning_rate": 3.253073846657053e-05, + "loss": 0.1362, + "step": 24000 + }, + { + "epoch": 1.06, + "learning_rate": 3.2523304738258425e-05, + "loss": 0.1593, + "step": 24010 + }, + { + "epoch": 1.06, + "learning_rate": 3.251587100994633e-05, + "loss": 0.1473, + "step": 24020 + }, + { + "epoch": 1.06, + "learning_rate": 3.250843728163423e-05, + "loss": 0.1312, + "step": 24030 + }, + { + "epoch": 1.06, + "learning_rate": 3.250100355332213e-05, + "loss": 0.1152, + "step": 24040 + }, + { + "epoch": 1.06, + "learning_rate": 3.249356982501004e-05, + "loss": 0.1981, + "step": 24050 + }, + { + "epoch": 1.07, + "learning_rate": 3.248613609669794e-05, + "loss": 0.0671, + "step": 24060 + }, + { + "epoch": 1.07, + "learning_rate": 3.247870236838585e-05, + "loss": 0.1673, + "step": 24070 + }, + { + "epoch": 1.07, + "learning_rate": 3.247126864007374e-05, + "loss": 0.1507, + "step": 24080 + }, + { + "epoch": 1.07, + "learning_rate": 3.2463834911761645e-05, + "loss": 0.1663, + "step": 24090 + }, + { + "epoch": 1.07, + "learning_rate": 3.245640118344955e-05, + "loss": 0.1521, + "step": 24100 + }, + { + "epoch": 1.07, + "learning_rate": 3.244896745513745e-05, + "loss": 0.1235, + "step": 24110 + }, + { + "epoch": 1.07, + "learning_rate": 3.244153372682535e-05, + "loss": 0.2276, + "step": 24120 + }, + { + "epoch": 1.07, + "learning_rate": 3.2434099998513256e-05, + "loss": 0.133, + "step": 24130 + }, + { + "epoch": 1.07, + "learning_rate": 3.242666627020116e-05, + "loss": 0.1746, + "step": 24140 + }, + { + "epoch": 1.07, + "learning_rate": 3.241923254188906e-05, + "loss": 0.1128, + "step": 24150 + }, + { + "epoch": 1.07, + "learning_rate": 3.241179881357696e-05, + "loss": 0.1737, + "step": 24160 + }, + { + "epoch": 1.07, + "learning_rate": 3.2404365085264866e-05, + "loss": 0.1741, + "step": 24170 + }, + { + "epoch": 1.07, + "learning_rate": 3.239693135695277e-05, + "loss": 0.1552, + "step": 24180 + }, + { + "epoch": 1.07, + "learning_rate": 3.238949762864067e-05, + "loss": 0.1949, + "step": 24190 + }, + { + "epoch": 1.07, + "learning_rate": 3.2382063900328574e-05, + "loss": 0.1574, + "step": 24200 + }, + { + "epoch": 1.07, + "learning_rate": 3.2374630172016476e-05, + "loss": 0.163, + "step": 24210 + }, + { + "epoch": 1.07, + "learning_rate": 3.236719644370438e-05, + "loss": 0.1161, + "step": 24220 + }, + { + "epoch": 1.07, + "learning_rate": 3.2359762715392275e-05, + "loss": 0.1688, + "step": 24230 + }, + { + "epoch": 1.07, + "learning_rate": 3.2352328987080184e-05, + "loss": 0.1118, + "step": 24240 + }, + { + "epoch": 1.07, + "learning_rate": 3.234489525876808e-05, + "loss": 0.1395, + "step": 24250 + }, + { + "epoch": 1.07, + "learning_rate": 3.233746153045599e-05, + "loss": 0.1708, + "step": 24260 + }, + { + "epoch": 1.07, + "learning_rate": 3.2330027802143885e-05, + "loss": 0.1728, + "step": 24270 + }, + { + "epoch": 1.07, + "learning_rate": 3.2322594073831794e-05, + "loss": 0.1304, + "step": 24280 + }, + { + "epoch": 1.08, + "learning_rate": 3.23151603455197e-05, + "loss": 0.15, + "step": 24290 + }, + { + "epoch": 1.08, + "learning_rate": 3.230772661720759e-05, + "loss": 0.1733, + "step": 24300 + }, + { + "epoch": 1.08, + "learning_rate": 3.23002928888955e-05, + "loss": 0.1568, + "step": 24310 + }, + { + "epoch": 1.08, + "learning_rate": 3.22928591605834e-05, + "loss": 0.132, + "step": 24320 + }, + { + "epoch": 1.08, + "learning_rate": 3.228542543227131e-05, + "loss": 0.1694, + "step": 24330 + }, + { + "epoch": 1.08, + "learning_rate": 3.22779917039592e-05, + "loss": 0.1414, + "step": 24340 + }, + { + "epoch": 1.08, + "learning_rate": 3.2270557975647106e-05, + "loss": 0.1125, + "step": 24350 + }, + { + "epoch": 1.08, + "learning_rate": 3.226312424733501e-05, + "loss": 0.1861, + "step": 24360 + }, + { + "epoch": 1.08, + "learning_rate": 3.225569051902291e-05, + "loss": 0.1123, + "step": 24370 + }, + { + "epoch": 1.08, + "learning_rate": 3.224825679071081e-05, + "loss": 0.2113, + "step": 24380 + }, + { + "epoch": 1.08, + "learning_rate": 3.2240823062398716e-05, + "loss": 0.1249, + "step": 24390 + }, + { + "epoch": 1.08, + "learning_rate": 3.223338933408662e-05, + "loss": 0.1609, + "step": 24400 + }, + { + "epoch": 1.08, + "learning_rate": 3.222595560577452e-05, + "loss": 0.1641, + "step": 24410 + }, + { + "epoch": 1.08, + "learning_rate": 3.2218521877462424e-05, + "loss": 0.1596, + "step": 24420 + }, + { + "epoch": 1.08, + "learning_rate": 3.2211088149150326e-05, + "loss": 0.1085, + "step": 24430 + }, + { + "epoch": 1.08, + "learning_rate": 3.220365442083823e-05, + "loss": 0.1112, + "step": 24440 + }, + { + "epoch": 1.08, + "learning_rate": 3.219622069252613e-05, + "loss": 0.1218, + "step": 24450 + }, + { + "epoch": 1.08, + "learning_rate": 3.2188786964214034e-05, + "loss": 0.1712, + "step": 24460 + }, + { + "epoch": 1.08, + "learning_rate": 3.2181353235901937e-05, + "loss": 0.1661, + "step": 24470 + }, + { + "epoch": 1.08, + "learning_rate": 3.217391950758984e-05, + "loss": 0.175, + "step": 24480 + }, + { + "epoch": 1.08, + "learning_rate": 3.216648577927774e-05, + "loss": 0.1173, + "step": 24490 + }, + { + "epoch": 1.08, + "learning_rate": 3.2159052050965644e-05, + "loss": 0.2439, + "step": 24500 + }, + { + "epoch": 1.09, + "learning_rate": 3.215161832265354e-05, + "loss": 0.1192, + "step": 24510 + }, + { + "epoch": 1.09, + "learning_rate": 3.214418459434145e-05, + "loss": 0.1595, + "step": 24520 + }, + { + "epoch": 1.09, + "learning_rate": 3.213675086602935e-05, + "loss": 0.1647, + "step": 24530 + }, + { + "epoch": 1.09, + "learning_rate": 3.2129317137717255e-05, + "loss": 0.129, + "step": 24540 + }, + { + "epoch": 1.09, + "learning_rate": 3.212188340940516e-05, + "loss": 0.1614, + "step": 24550 + }, + { + "epoch": 1.09, + "learning_rate": 3.211444968109305e-05, + "loss": 0.1036, + "step": 24560 + }, + { + "epoch": 1.09, + "learning_rate": 3.210701595278096e-05, + "loss": 0.1644, + "step": 24570 + }, + { + "epoch": 1.09, + "learning_rate": 3.209958222446886e-05, + "loss": 0.1836, + "step": 24580 + }, + { + "epoch": 1.09, + "learning_rate": 3.209214849615677e-05, + "loss": 0.1606, + "step": 24590 + }, + { + "epoch": 1.09, + "learning_rate": 3.208471476784466e-05, + "loss": 0.1816, + "step": 24600 + }, + { + "epoch": 1.09, + "learning_rate": 3.207728103953257e-05, + "loss": 0.1627, + "step": 24610 + }, + { + "epoch": 1.09, + "learning_rate": 3.206984731122047e-05, + "loss": 0.1452, + "step": 24620 + }, + { + "epoch": 1.09, + "learning_rate": 3.206241358290837e-05, + "loss": 0.1502, + "step": 24630 + }, + { + "epoch": 1.09, + "learning_rate": 3.2054979854596274e-05, + "loss": 0.1295, + "step": 24640 + }, + { + "epoch": 1.09, + "learning_rate": 3.2047546126284176e-05, + "loss": 0.1677, + "step": 24650 + }, + { + "epoch": 1.09, + "learning_rate": 3.2040112397972086e-05, + "loss": 0.1334, + "step": 24660 + }, + { + "epoch": 1.09, + "learning_rate": 3.203267866965998e-05, + "loss": 0.1629, + "step": 24670 + }, + { + "epoch": 1.09, + "learning_rate": 3.2025244941347884e-05, + "loss": 0.1868, + "step": 24680 + }, + { + "epoch": 1.09, + "learning_rate": 3.2017811213035787e-05, + "loss": 0.1583, + "step": 24690 + }, + { + "epoch": 1.09, + "learning_rate": 3.201037748472369e-05, + "loss": 0.1676, + "step": 24700 + }, + { + "epoch": 1.09, + "learning_rate": 3.200294375641159e-05, + "loss": 0.1591, + "step": 24710 + }, + { + "epoch": 1.09, + "learning_rate": 3.1995510028099494e-05, + "loss": 0.167, + "step": 24720 + }, + { + "epoch": 1.09, + "learning_rate": 3.19880762997874e-05, + "loss": 0.1286, + "step": 24730 + }, + { + "epoch": 1.1, + "learning_rate": 3.19806425714753e-05, + "loss": 0.2066, + "step": 24740 + }, + { + "epoch": 1.1, + "learning_rate": 3.19732088431632e-05, + "loss": 0.1604, + "step": 24750 + }, + { + "epoch": 1.1, + "learning_rate": 3.1965775114851105e-05, + "loss": 0.1282, + "step": 24760 + }, + { + "epoch": 1.1, + "learning_rate": 3.1958341386539e-05, + "loss": 0.2033, + "step": 24770 + }, + { + "epoch": 1.1, + "learning_rate": 3.195090765822691e-05, + "loss": 0.1383, + "step": 24780 + }, + { + "epoch": 1.1, + "learning_rate": 3.194347392991481e-05, + "loss": 0.1305, + "step": 24790 + }, + { + "epoch": 1.1, + "learning_rate": 3.1936040201602715e-05, + "loss": 0.1642, + "step": 24800 + }, + { + "epoch": 1.1, + "learning_rate": 3.192860647329062e-05, + "loss": 0.1374, + "step": 24810 + }, + { + "epoch": 1.1, + "learning_rate": 3.192117274497852e-05, + "loss": 0.1119, + "step": 24820 + }, + { + "epoch": 1.1, + "learning_rate": 3.191373901666642e-05, + "loss": 0.1053, + "step": 24830 + }, + { + "epoch": 1.1, + "learning_rate": 3.190630528835432e-05, + "loss": 0.1511, + "step": 24840 + }, + { + "epoch": 1.1, + "learning_rate": 3.189887156004223e-05, + "loss": 0.2131, + "step": 24850 + }, + { + "epoch": 1.1, + "learning_rate": 3.1891437831730124e-05, + "loss": 0.1544, + "step": 24860 + }, + { + "epoch": 1.1, + "learning_rate": 3.188400410341803e-05, + "loss": 0.144, + "step": 24870 + }, + { + "epoch": 1.1, + "learning_rate": 3.187657037510593e-05, + "loss": 0.1595, + "step": 24880 + }, + { + "epoch": 1.1, + "learning_rate": 3.186913664679383e-05, + "loss": 0.1436, + "step": 24890 + }, + { + "epoch": 1.1, + "learning_rate": 3.186170291848174e-05, + "loss": 0.1309, + "step": 24900 + }, + { + "epoch": 1.1, + "learning_rate": 3.1854269190169636e-05, + "loss": 0.1357, + "step": 24910 + }, + { + "epoch": 1.1, + "learning_rate": 3.1846835461857546e-05, + "loss": 0.2131, + "step": 24920 + }, + { + "epoch": 1.1, + "learning_rate": 3.183940173354544e-05, + "loss": 0.1345, + "step": 24930 + }, + { + "epoch": 1.1, + "learning_rate": 3.183196800523335e-05, + "loss": 0.1726, + "step": 24940 + }, + { + "epoch": 1.1, + "learning_rate": 3.182453427692125e-05, + "loss": 0.1369, + "step": 24950 + }, + { + "epoch": 1.11, + "learning_rate": 3.181710054860915e-05, + "loss": 0.1384, + "step": 24960 + }, + { + "epoch": 1.11, + "learning_rate": 3.180966682029705e-05, + "loss": 0.1623, + "step": 24970 + }, + { + "epoch": 1.11, + "learning_rate": 3.1802233091984955e-05, + "loss": 0.1461, + "step": 24980 + }, + { + "epoch": 1.11, + "learning_rate": 3.179479936367286e-05, + "loss": 0.1951, + "step": 24990 + }, + { + "epoch": 1.11, + "learning_rate": 3.178736563536076e-05, + "loss": 0.1171, + "step": 25000 + }, + { + "epoch": 1.11, + "learning_rate": 3.177993190704866e-05, + "loss": 0.1823, + "step": 25010 + }, + { + "epoch": 1.11, + "learning_rate": 3.1772498178736565e-05, + "loss": 0.2157, + "step": 25020 + }, + { + "epoch": 1.11, + "learning_rate": 3.176506445042447e-05, + "loss": 0.1523, + "step": 25030 + }, + { + "epoch": 1.11, + "learning_rate": 3.175763072211237e-05, + "loss": 0.1445, + "step": 25040 + }, + { + "epoch": 1.11, + "learning_rate": 3.175019699380027e-05, + "loss": 0.1368, + "step": 25050 + }, + { + "epoch": 1.11, + "learning_rate": 3.1742763265488175e-05, + "loss": 0.1674, + "step": 25060 + }, + { + "epoch": 1.11, + "learning_rate": 3.173532953717608e-05, + "loss": 0.1121, + "step": 25070 + }, + { + "epoch": 1.11, + "learning_rate": 3.172789580886398e-05, + "loss": 0.1196, + "step": 25080 + }, + { + "epoch": 1.11, + "learning_rate": 3.172046208055188e-05, + "loss": 0.1235, + "step": 25090 + }, + { + "epoch": 1.11, + "learning_rate": 3.171302835223978e-05, + "loss": 0.1588, + "step": 25100 + }, + { + "epoch": 1.11, + "learning_rate": 3.170559462392769e-05, + "loss": 0.1523, + "step": 25110 + }, + { + "epoch": 1.11, + "learning_rate": 3.1698160895615584e-05, + "loss": 0.1803, + "step": 25120 + }, + { + "epoch": 1.11, + "learning_rate": 3.169072716730349e-05, + "loss": 0.1624, + "step": 25130 + }, + { + "epoch": 1.11, + "learning_rate": 3.1683293438991396e-05, + "loss": 0.1342, + "step": 25140 + }, + { + "epoch": 1.11, + "learning_rate": 3.16758597106793e-05, + "loss": 0.2084, + "step": 25150 + }, + { + "epoch": 1.11, + "learning_rate": 3.16684259823672e-05, + "loss": 0.0932, + "step": 25160 + }, + { + "epoch": 1.11, + "learning_rate": 3.16609922540551e-05, + "loss": 0.2047, + "step": 25170 + }, + { + "epoch": 1.11, + "learning_rate": 3.1653558525743006e-05, + "loss": 0.1123, + "step": 25180 + }, + { + "epoch": 1.12, + "learning_rate": 3.16461247974309e-05, + "loss": 0.1248, + "step": 25190 + }, + { + "epoch": 1.12, + "learning_rate": 3.163869106911881e-05, + "loss": 0.1479, + "step": 25200 + }, + { + "epoch": 1.12, + "learning_rate": 3.163125734080671e-05, + "loss": 0.156, + "step": 25210 + }, + { + "epoch": 1.12, + "learning_rate": 3.162382361249461e-05, + "loss": 0.1307, + "step": 25220 + }, + { + "epoch": 1.12, + "learning_rate": 3.161638988418251e-05, + "loss": 0.1466, + "step": 25230 + }, + { + "epoch": 1.12, + "learning_rate": 3.1608956155870415e-05, + "loss": 0.1448, + "step": 25240 + }, + { + "epoch": 1.12, + "learning_rate": 3.160152242755832e-05, + "loss": 0.18, + "step": 25250 + }, + { + "epoch": 1.12, + "learning_rate": 3.159408869924622e-05, + "loss": 0.143, + "step": 25260 + }, + { + "epoch": 1.12, + "learning_rate": 3.158665497093413e-05, + "loss": 0.136, + "step": 25270 + }, + { + "epoch": 1.12, + "learning_rate": 3.1579221242622025e-05, + "loss": 0.1491, + "step": 25280 + }, + { + "epoch": 1.12, + "learning_rate": 3.157178751430993e-05, + "loss": 0.1244, + "step": 25290 + }, + { + "epoch": 1.12, + "learning_rate": 3.156435378599783e-05, + "loss": 0.1316, + "step": 25300 + }, + { + "epoch": 1.12, + "learning_rate": 3.155692005768573e-05, + "loss": 0.1738, + "step": 25310 + }, + { + "epoch": 1.12, + "learning_rate": 3.1549486329373635e-05, + "loss": 0.1544, + "step": 25320 + }, + { + "epoch": 1.12, + "learning_rate": 3.154205260106154e-05, + "loss": 0.1014, + "step": 25330 + }, + { + "epoch": 1.12, + "learning_rate": 3.153461887274944e-05, + "loss": 0.1987, + "step": 25340 + }, + { + "epoch": 1.12, + "learning_rate": 3.152718514443734e-05, + "loss": 0.1533, + "step": 25350 + }, + { + "epoch": 1.12, + "learning_rate": 3.1519751416125246e-05, + "loss": 0.1623, + "step": 25360 + }, + { + "epoch": 1.12, + "learning_rate": 3.151231768781315e-05, + "loss": 0.1189, + "step": 25370 + }, + { + "epoch": 1.12, + "learning_rate": 3.1504883959501044e-05, + "loss": 0.1291, + "step": 25380 + }, + { + "epoch": 1.12, + "learning_rate": 3.1497450231188954e-05, + "loss": 0.1348, + "step": 25390 + }, + { + "epoch": 1.12, + "learning_rate": 3.1490016502876856e-05, + "loss": 0.1387, + "step": 25400 + }, + { + "epoch": 1.12, + "learning_rate": 3.148258277456476e-05, + "loss": 0.1461, + "step": 25410 + }, + { + "epoch": 1.13, + "learning_rate": 3.147514904625266e-05, + "loss": 0.1118, + "step": 25420 + }, + { + "epoch": 1.13, + "learning_rate": 3.146771531794056e-05, + "loss": 0.2198, + "step": 25430 + }, + { + "epoch": 1.13, + "learning_rate": 3.1460281589628466e-05, + "loss": 0.1857, + "step": 25440 + }, + { + "epoch": 1.13, + "learning_rate": 3.145284786131636e-05, + "loss": 0.1874, + "step": 25450 + }, + { + "epoch": 1.13, + "learning_rate": 3.144541413300427e-05, + "loss": 0.1489, + "step": 25460 + }, + { + "epoch": 1.13, + "learning_rate": 3.143798040469217e-05, + "loss": 0.1292, + "step": 25470 + }, + { + "epoch": 1.13, + "learning_rate": 3.143054667638008e-05, + "loss": 0.1188, + "step": 25480 + }, + { + "epoch": 1.13, + "learning_rate": 3.142311294806797e-05, + "loss": 0.1805, + "step": 25490 + }, + { + "epoch": 1.13, + "learning_rate": 3.1415679219755875e-05, + "loss": 0.1584, + "step": 25500 + }, + { + "epoch": 1.13, + "learning_rate": 3.1408245491443784e-05, + "loss": 0.1423, + "step": 25510 + }, + { + "epoch": 1.13, + "learning_rate": 3.140081176313168e-05, + "loss": 0.1378, + "step": 25520 + }, + { + "epoch": 1.13, + "learning_rate": 3.139337803481959e-05, + "loss": 0.1886, + "step": 25530 + }, + { + "epoch": 1.13, + "learning_rate": 3.1385944306507485e-05, + "loss": 0.2109, + "step": 25540 + }, + { + "epoch": 1.13, + "learning_rate": 3.1378510578195395e-05, + "loss": 0.1365, + "step": 25550 + }, + { + "epoch": 1.13, + "learning_rate": 3.137107684988329e-05, + "loss": 0.1306, + "step": 25560 + }, + { + "epoch": 1.13, + "learning_rate": 3.136364312157119e-05, + "loss": 0.1394, + "step": 25570 + }, + { + "epoch": 1.13, + "learning_rate": 3.1356209393259096e-05, + "loss": 0.1392, + "step": 25580 + }, + { + "epoch": 1.13, + "learning_rate": 3.1348775664947e-05, + "loss": 0.2209, + "step": 25590 + }, + { + "epoch": 1.13, + "learning_rate": 3.13413419366349e-05, + "loss": 0.1736, + "step": 25600 + }, + { + "epoch": 1.13, + "learning_rate": 3.1333908208322804e-05, + "loss": 0.1587, + "step": 25610 + }, + { + "epoch": 1.13, + "learning_rate": 3.1326474480010706e-05, + "loss": 0.1613, + "step": 25620 + }, + { + "epoch": 1.13, + "learning_rate": 3.131904075169861e-05, + "loss": 0.1337, + "step": 25630 + }, + { + "epoch": 1.14, + "learning_rate": 3.131160702338651e-05, + "loss": 0.1015, + "step": 25640 + }, + { + "epoch": 1.14, + "learning_rate": 3.1304173295074414e-05, + "loss": 0.1252, + "step": 25650 + }, + { + "epoch": 1.14, + "learning_rate": 3.1296739566762316e-05, + "loss": 0.1589, + "step": 25660 + }, + { + "epoch": 1.14, + "learning_rate": 3.128930583845022e-05, + "loss": 0.1852, + "step": 25670 + }, + { + "epoch": 1.14, + "learning_rate": 3.128187211013812e-05, + "loss": 0.1951, + "step": 25680 + }, + { + "epoch": 1.14, + "learning_rate": 3.1274438381826024e-05, + "loss": 0.1477, + "step": 25690 + }, + { + "epoch": 1.14, + "learning_rate": 3.126700465351393e-05, + "loss": 0.1378, + "step": 25700 + }, + { + "epoch": 1.14, + "learning_rate": 3.125957092520182e-05, + "loss": 0.1217, + "step": 25710 + }, + { + "epoch": 1.14, + "learning_rate": 3.125213719688973e-05, + "loss": 0.1329, + "step": 25720 + }, + { + "epoch": 1.14, + "learning_rate": 3.124470346857763e-05, + "loss": 0.1735, + "step": 25730 + }, + { + "epoch": 1.14, + "learning_rate": 3.123726974026554e-05, + "loss": 0.1482, + "step": 25740 + }, + { + "epoch": 1.14, + "learning_rate": 3.122983601195343e-05, + "loss": 0.1239, + "step": 25750 + }, + { + "epoch": 1.14, + "learning_rate": 3.1222402283641335e-05, + "loss": 0.1642, + "step": 25760 + }, + { + "epoch": 1.14, + "learning_rate": 3.1214968555329245e-05, + "loss": 0.157, + "step": 25770 + }, + { + "epoch": 1.14, + "learning_rate": 3.120753482701714e-05, + "loss": 0.1691, + "step": 25780 + }, + { + "epoch": 1.14, + "learning_rate": 3.120010109870505e-05, + "loss": 0.1303, + "step": 25790 + }, + { + "epoch": 1.14, + "learning_rate": 3.1192667370392946e-05, + "loss": 0.1219, + "step": 25800 + }, + { + "epoch": 1.14, + "learning_rate": 3.1185233642080855e-05, + "loss": 0.0967, + "step": 25810 + }, + { + "epoch": 1.14, + "learning_rate": 3.117779991376875e-05, + "loss": 0.1488, + "step": 25820 + }, + { + "epoch": 1.14, + "learning_rate": 3.1170366185456653e-05, + "loss": 0.1457, + "step": 25830 + }, + { + "epoch": 1.14, + "learning_rate": 3.1162932457144556e-05, + "loss": 0.1801, + "step": 25840 + }, + { + "epoch": 1.14, + "learning_rate": 3.115549872883246e-05, + "loss": 0.172, + "step": 25850 + }, + { + "epoch": 1.14, + "learning_rate": 3.114806500052036e-05, + "loss": 0.1527, + "step": 25860 + }, + { + "epoch": 1.15, + "learning_rate": 3.1140631272208264e-05, + "loss": 0.135, + "step": 25870 + }, + { + "epoch": 1.15, + "learning_rate": 3.113319754389617e-05, + "loss": 0.1721, + "step": 25880 + }, + { + "epoch": 1.15, + "learning_rate": 3.112576381558407e-05, + "loss": 0.0981, + "step": 25890 + }, + { + "epoch": 1.15, + "learning_rate": 3.111833008727197e-05, + "loss": 0.1562, + "step": 25900 + }, + { + "epoch": 1.15, + "learning_rate": 3.1110896358959874e-05, + "loss": 0.1307, + "step": 25910 + }, + { + "epoch": 1.15, + "learning_rate": 3.110346263064778e-05, + "loss": 0.1829, + "step": 25920 + }, + { + "epoch": 1.15, + "learning_rate": 3.109602890233568e-05, + "loss": 0.1611, + "step": 25930 + }, + { + "epoch": 1.15, + "learning_rate": 3.108859517402358e-05, + "loss": 0.1592, + "step": 25940 + }, + { + "epoch": 1.15, + "learning_rate": 3.1081161445711484e-05, + "loss": 0.1375, + "step": 25950 + }, + { + "epoch": 1.15, + "learning_rate": 3.107372771739939e-05, + "loss": 0.151, + "step": 25960 + }, + { + "epoch": 1.15, + "learning_rate": 3.106629398908728e-05, + "loss": 0.1313, + "step": 25970 + }, + { + "epoch": 1.15, + "learning_rate": 3.105886026077519e-05, + "loss": 0.1705, + "step": 25980 + }, + { + "epoch": 1.15, + "learning_rate": 3.105142653246309e-05, + "loss": 0.1262, + "step": 25990 + }, + { + "epoch": 1.15, + "learning_rate": 3.1043992804151e-05, + "loss": 0.1111, + "step": 26000 + }, + { + "epoch": 1.15, + "learning_rate": 3.10365590758389e-05, + "loss": 0.1482, + "step": 26010 + }, + { + "epoch": 1.15, + "learning_rate": 3.10291253475268e-05, + "loss": 0.1333, + "step": 26020 + }, + { + "epoch": 1.15, + "learning_rate": 3.1021691619214705e-05, + "loss": 0.1575, + "step": 26030 + }, + { + "epoch": 1.15, + "learning_rate": 3.10142578909026e-05, + "loss": 0.121, + "step": 26040 + }, + { + "epoch": 1.15, + "learning_rate": 3.100682416259051e-05, + "loss": 0.157, + "step": 26050 + }, + { + "epoch": 1.15, + "learning_rate": 3.0999390434278406e-05, + "loss": 0.2108, + "step": 26060 + }, + { + "epoch": 1.15, + "learning_rate": 3.0991956705966315e-05, + "loss": 0.0916, + "step": 26070 + }, + { + "epoch": 1.15, + "learning_rate": 3.098452297765421e-05, + "loss": 0.1626, + "step": 26080 + }, + { + "epoch": 1.16, + "learning_rate": 3.097708924934212e-05, + "loss": 0.2027, + "step": 26090 + }, + { + "epoch": 1.16, + "learning_rate": 3.0969655521030016e-05, + "loss": 0.1645, + "step": 26100 + }, + { + "epoch": 1.16, + "learning_rate": 3.096222179271792e-05, + "loss": 0.1325, + "step": 26110 + }, + { + "epoch": 1.16, + "learning_rate": 3.095478806440583e-05, + "loss": 0.1503, + "step": 26120 + }, + { + "epoch": 1.16, + "learning_rate": 3.0947354336093724e-05, + "loss": 0.1235, + "step": 26130 + }, + { + "epoch": 1.16, + "learning_rate": 3.0939920607781633e-05, + "loss": 0.1056, + "step": 26140 + }, + { + "epoch": 1.16, + "learning_rate": 3.093248687946953e-05, + "loss": 0.1449, + "step": 26150 + }, + { + "epoch": 1.16, + "learning_rate": 3.092505315115743e-05, + "loss": 0.1191, + "step": 26160 + }, + { + "epoch": 1.16, + "learning_rate": 3.0917619422845334e-05, + "loss": 0.1345, + "step": 26170 + }, + { + "epoch": 1.16, + "learning_rate": 3.091018569453324e-05, + "loss": 0.1162, + "step": 26180 + }, + { + "epoch": 1.16, + "learning_rate": 3.090275196622114e-05, + "loss": 0.2079, + "step": 26190 + }, + { + "epoch": 1.16, + "learning_rate": 3.089531823790904e-05, + "loss": 0.1566, + "step": 26200 + }, + { + "epoch": 1.16, + "learning_rate": 3.0887884509596945e-05, + "loss": 0.1513, + "step": 26210 + }, + { + "epoch": 1.16, + "learning_rate": 3.088045078128485e-05, + "loss": 0.1511, + "step": 26220 + }, + { + "epoch": 1.16, + "learning_rate": 3.087301705297275e-05, + "loss": 0.1455, + "step": 26230 + }, + { + "epoch": 1.16, + "learning_rate": 3.086558332466065e-05, + "loss": 0.1521, + "step": 26240 + }, + { + "epoch": 1.16, + "learning_rate": 3.0858149596348555e-05, + "loss": 0.1975, + "step": 26250 + }, + { + "epoch": 1.16, + "learning_rate": 3.085071586803646e-05, + "loss": 0.1597, + "step": 26260 + }, + { + "epoch": 1.16, + "learning_rate": 3.084328213972436e-05, + "loss": 0.1027, + "step": 26270 + }, + { + "epoch": 1.16, + "learning_rate": 3.083584841141226e-05, + "loss": 0.1847, + "step": 26280 + }, + { + "epoch": 1.16, + "learning_rate": 3.0828414683100165e-05, + "loss": 0.173, + "step": 26290 + }, + { + "epoch": 1.16, + "learning_rate": 3.082098095478807e-05, + "loss": 0.1063, + "step": 26300 + }, + { + "epoch": 1.16, + "learning_rate": 3.081354722647597e-05, + "loss": 0.1332, + "step": 26310 + }, + { + "epoch": 1.17, + "learning_rate": 3.0806113498163866e-05, + "loss": 0.1234, + "step": 26320 + }, + { + "epoch": 1.17, + "learning_rate": 3.0798679769851776e-05, + "loss": 0.1911, + "step": 26330 + }, + { + "epoch": 1.17, + "learning_rate": 3.079124604153967e-05, + "loss": 0.1567, + "step": 26340 + }, + { + "epoch": 1.17, + "learning_rate": 3.078381231322758e-05, + "loss": 0.1398, + "step": 26350 + }, + { + "epoch": 1.17, + "learning_rate": 3.077637858491548e-05, + "loss": 0.1072, + "step": 26360 + }, + { + "epoch": 1.17, + "learning_rate": 3.076894485660338e-05, + "loss": 0.1576, + "step": 26370 + }, + { + "epoch": 1.17, + "learning_rate": 3.076151112829129e-05, + "loss": 0.111, + "step": 26380 + }, + { + "epoch": 1.17, + "learning_rate": 3.0754077399979184e-05, + "loss": 0.1455, + "step": 26390 + }, + { + "epoch": 1.17, + "learning_rate": 3.0746643671667094e-05, + "loss": 0.1453, + "step": 26400 + }, + { + "epoch": 1.17, + "learning_rate": 3.073920994335499e-05, + "loss": 0.1678, + "step": 26410 + }, + { + "epoch": 1.17, + "learning_rate": 3.07317762150429e-05, + "loss": 0.156, + "step": 26420 + }, + { + "epoch": 1.17, + "learning_rate": 3.0724342486730795e-05, + "loss": 0.1894, + "step": 26430 + }, + { + "epoch": 1.17, + "learning_rate": 3.07169087584187e-05, + "loss": 0.1332, + "step": 26440 + }, + { + "epoch": 1.17, + "learning_rate": 3.07094750301066e-05, + "loss": 0.1132, + "step": 26450 + }, + { + "epoch": 1.17, + "learning_rate": 3.07020413017945e-05, + "loss": 0.1977, + "step": 26460 + }, + { + "epoch": 1.17, + "learning_rate": 3.0694607573482405e-05, + "loss": 0.1702, + "step": 26470 + }, + { + "epoch": 1.17, + "learning_rate": 3.068717384517031e-05, + "loss": 0.1322, + "step": 26480 + }, + { + "epoch": 1.17, + "learning_rate": 3.067974011685821e-05, + "loss": 0.1805, + "step": 26490 + }, + { + "epoch": 1.17, + "learning_rate": 3.067230638854611e-05, + "loss": 0.194, + "step": 26500 + }, + { + "epoch": 1.17, + "learning_rate": 3.0664872660234015e-05, + "loss": 0.1519, + "step": 26510 + }, + { + "epoch": 1.17, + "learning_rate": 3.065743893192192e-05, + "loss": 0.1217, + "step": 26520 + }, + { + "epoch": 1.17, + "learning_rate": 3.065000520360982e-05, + "loss": 0.116, + "step": 26530 + }, + { + "epoch": 1.18, + "learning_rate": 3.064257147529772e-05, + "loss": 0.1968, + "step": 26540 + }, + { + "epoch": 1.18, + "learning_rate": 3.0635137746985626e-05, + "loss": 0.1126, + "step": 26550 + }, + { + "epoch": 1.18, + "learning_rate": 3.062770401867353e-05, + "loss": 0.1723, + "step": 26560 + }, + { + "epoch": 1.18, + "learning_rate": 3.062027029036143e-05, + "loss": 0.1471, + "step": 26570 + }, + { + "epoch": 1.18, + "learning_rate": 3.0612836562049327e-05, + "loss": 0.1774, + "step": 26580 + }, + { + "epoch": 1.18, + "learning_rate": 3.0605402833737236e-05, + "loss": 0.1577, + "step": 26590 + }, + { + "epoch": 1.18, + "learning_rate": 3.059796910542513e-05, + "loss": 0.166, + "step": 26600 + }, + { + "epoch": 1.18, + "learning_rate": 3.059053537711304e-05, + "loss": 0.1592, + "step": 26610 + }, + { + "epoch": 1.18, + "learning_rate": 3.0583101648800944e-05, + "loss": 0.0929, + "step": 26620 + }, + { + "epoch": 1.18, + "learning_rate": 3.0575667920488846e-05, + "loss": 0.1382, + "step": 26630 + }, + { + "epoch": 1.18, + "learning_rate": 3.056823419217675e-05, + "loss": 0.2038, + "step": 26640 + }, + { + "epoch": 1.18, + "learning_rate": 3.0560800463864645e-05, + "loss": 0.1305, + "step": 26650 + }, + { + "epoch": 1.18, + "learning_rate": 3.0553366735552554e-05, + "loss": 0.1586, + "step": 26660 + }, + { + "epoch": 1.18, + "learning_rate": 3.054593300724045e-05, + "loss": 0.106, + "step": 26670 + }, + { + "epoch": 1.18, + "learning_rate": 3.053849927892836e-05, + "loss": 0.109, + "step": 26680 + }, + { + "epoch": 1.18, + "learning_rate": 3.0531065550616255e-05, + "loss": 0.1824, + "step": 26690 + }, + { + "epoch": 1.18, + "learning_rate": 3.052363182230416e-05, + "loss": 0.1306, + "step": 26700 + }, + { + "epoch": 1.18, + "learning_rate": 3.051619809399206e-05, + "loss": 0.1674, + "step": 26710 + }, + { + "epoch": 1.18, + "learning_rate": 3.0508764365679966e-05, + "loss": 0.143, + "step": 26720 + }, + { + "epoch": 1.18, + "learning_rate": 3.0501330637367865e-05, + "loss": 0.155, + "step": 26730 + }, + { + "epoch": 1.18, + "learning_rate": 3.0493896909055768e-05, + "loss": 0.1609, + "step": 26740 + }, + { + "epoch": 1.18, + "learning_rate": 3.0486463180743674e-05, + "loss": 0.1644, + "step": 26750 + }, + { + "epoch": 1.18, + "learning_rate": 3.0479029452431573e-05, + "loss": 0.1483, + "step": 26760 + }, + { + "epoch": 1.19, + "learning_rate": 3.047159572411948e-05, + "loss": 0.1883, + "step": 26770 + }, + { + "epoch": 1.19, + "learning_rate": 3.0464161995807378e-05, + "loss": 0.1538, + "step": 26780 + }, + { + "epoch": 1.19, + "learning_rate": 3.0456728267495284e-05, + "loss": 0.1636, + "step": 26790 + }, + { + "epoch": 1.19, + "learning_rate": 3.0449294539183183e-05, + "loss": 0.1363, + "step": 26800 + }, + { + "epoch": 1.19, + "learning_rate": 3.0441860810871086e-05, + "loss": 0.1367, + "step": 26810 + }, + { + "epoch": 1.19, + "learning_rate": 3.0434427082558985e-05, + "loss": 0.1668, + "step": 26820 + }, + { + "epoch": 1.19, + "learning_rate": 3.042699335424689e-05, + "loss": 0.1582, + "step": 26830 + }, + { + "epoch": 1.19, + "learning_rate": 3.041955962593479e-05, + "loss": 0.1252, + "step": 26840 + }, + { + "epoch": 1.19, + "learning_rate": 3.0412125897622696e-05, + "loss": 0.1341, + "step": 26850 + }, + { + "epoch": 1.19, + "learning_rate": 3.04046921693106e-05, + "loss": 0.1595, + "step": 26860 + }, + { + "epoch": 1.19, + "learning_rate": 3.0397258440998498e-05, + "loss": 0.1379, + "step": 26870 + }, + { + "epoch": 1.19, + "learning_rate": 3.0389824712686404e-05, + "loss": 0.1789, + "step": 26880 + }, + { + "epoch": 1.19, + "learning_rate": 3.0382390984374303e-05, + "loss": 0.1406, + "step": 26890 + }, + { + "epoch": 1.19, + "learning_rate": 3.037495725606221e-05, + "loss": 0.1767, + "step": 26900 + }, + { + "epoch": 1.19, + "learning_rate": 3.036752352775011e-05, + "loss": 0.1674, + "step": 26910 + }, + { + "epoch": 1.19, + "learning_rate": 3.0360089799438014e-05, + "loss": 0.1269, + "step": 26920 + }, + { + "epoch": 1.19, + "learning_rate": 3.0352656071125914e-05, + "loss": 0.081, + "step": 26930 + }, + { + "epoch": 1.19, + "learning_rate": 3.0345222342813816e-05, + "loss": 0.1021, + "step": 26940 + }, + { + "epoch": 1.19, + "learning_rate": 3.0337788614501715e-05, + "loss": 0.1906, + "step": 26950 + }, + { + "epoch": 1.19, + "learning_rate": 3.033035488618962e-05, + "loss": 0.1397, + "step": 26960 + }, + { + "epoch": 1.19, + "learning_rate": 3.032292115787752e-05, + "loss": 0.1642, + "step": 26970 + }, + { + "epoch": 1.19, + "learning_rate": 3.0315487429565426e-05, + "loss": 0.1013, + "step": 26980 + }, + { + "epoch": 1.19, + "learning_rate": 3.030805370125333e-05, + "loss": 0.1652, + "step": 26990 + }, + { + "epoch": 1.2, + "learning_rate": 3.030061997294123e-05, + "loss": 0.1017, + "step": 27000 + }, + { + "epoch": 1.2, + "learning_rate": 3.0293186244629134e-05, + "loss": 0.1084, + "step": 27010 + }, + { + "epoch": 1.2, + "learning_rate": 3.0285752516317033e-05, + "loss": 0.1528, + "step": 27020 + }, + { + "epoch": 1.2, + "learning_rate": 3.027831878800494e-05, + "loss": 0.0875, + "step": 27030 + }, + { + "epoch": 1.2, + "learning_rate": 3.027088505969284e-05, + "loss": 0.1535, + "step": 27040 + }, + { + "epoch": 1.2, + "learning_rate": 3.0263451331380744e-05, + "loss": 0.1308, + "step": 27050 + }, + { + "epoch": 1.2, + "learning_rate": 3.0256017603068644e-05, + "loss": 0.1835, + "step": 27060 + }, + { + "epoch": 1.2, + "learning_rate": 3.0248583874756546e-05, + "loss": 0.1402, + "step": 27070 + }, + { + "epoch": 1.2, + "learning_rate": 3.0241150146444445e-05, + "loss": 0.1071, + "step": 27080 + }, + { + "epoch": 1.2, + "learning_rate": 3.023371641813235e-05, + "loss": 0.1814, + "step": 27090 + }, + { + "epoch": 1.2, + "learning_rate": 3.022628268982025e-05, + "loss": 0.1821, + "step": 27100 + }, + { + "epoch": 1.2, + "learning_rate": 3.0218848961508157e-05, + "loss": 0.1474, + "step": 27110 + }, + { + "epoch": 1.2, + "learning_rate": 3.0211415233196063e-05, + "loss": 0.1111, + "step": 27120 + }, + { + "epoch": 1.2, + "learning_rate": 3.0203981504883962e-05, + "loss": 0.1711, + "step": 27130 + }, + { + "epoch": 1.2, + "learning_rate": 3.0196547776571864e-05, + "loss": 0.1052, + "step": 27140 + }, + { + "epoch": 1.2, + "learning_rate": 3.0189114048259763e-05, + "loss": 0.1456, + "step": 27150 + }, + { + "epoch": 1.2, + "learning_rate": 3.018168031994767e-05, + "loss": 0.1591, + "step": 27160 + }, + { + "epoch": 1.2, + "learning_rate": 3.017424659163557e-05, + "loss": 0.124, + "step": 27170 + }, + { + "epoch": 1.2, + "learning_rate": 3.0166812863323475e-05, + "loss": 0.1256, + "step": 27180 + }, + { + "epoch": 1.2, + "learning_rate": 3.0159379135011374e-05, + "loss": 0.1741, + "step": 27190 + }, + { + "epoch": 1.2, + "learning_rate": 3.0151945406699276e-05, + "loss": 0.1483, + "step": 27200 + }, + { + "epoch": 1.2, + "learning_rate": 3.014451167838718e-05, + "loss": 0.1348, + "step": 27210 + }, + { + "epoch": 1.21, + "learning_rate": 3.013707795007508e-05, + "loss": 0.2027, + "step": 27220 + }, + { + "epoch": 1.21, + "learning_rate": 3.0129644221762987e-05, + "loss": 0.1567, + "step": 27230 + }, + { + "epoch": 1.21, + "learning_rate": 3.0122210493450887e-05, + "loss": 0.1261, + "step": 27240 + }, + { + "epoch": 1.21, + "learning_rate": 3.0114776765138793e-05, + "loss": 0.1223, + "step": 27250 + }, + { + "epoch": 1.21, + "learning_rate": 3.0107343036826692e-05, + "loss": 0.1428, + "step": 27260 + }, + { + "epoch": 1.21, + "learning_rate": 3.0099909308514594e-05, + "loss": 0.1256, + "step": 27270 + }, + { + "epoch": 1.21, + "learning_rate": 3.0092475580202494e-05, + "loss": 0.1003, + "step": 27280 + }, + { + "epoch": 1.21, + "learning_rate": 3.00850418518904e-05, + "loss": 0.1259, + "step": 27290 + }, + { + "epoch": 1.21, + "learning_rate": 3.00776081235783e-05, + "loss": 0.1305, + "step": 27300 + }, + { + "epoch": 1.21, + "learning_rate": 3.0070174395266205e-05, + "loss": 0.1147, + "step": 27310 + }, + { + "epoch": 1.21, + "learning_rate": 3.0062740666954104e-05, + "loss": 0.1636, + "step": 27320 + }, + { + "epoch": 1.21, + "learning_rate": 3.005530693864201e-05, + "loss": 0.1557, + "step": 27330 + }, + { + "epoch": 1.21, + "learning_rate": 3.004787321032991e-05, + "loss": 0.1221, + "step": 27340 + }, + { + "epoch": 1.21, + "learning_rate": 3.004043948201781e-05, + "loss": 0.1344, + "step": 27350 + }, + { + "epoch": 1.21, + "learning_rate": 3.0033005753705718e-05, + "loss": 0.138, + "step": 27360 + }, + { + "epoch": 1.21, + "learning_rate": 3.0025572025393617e-05, + "loss": 0.1451, + "step": 27370 + }, + { + "epoch": 1.21, + "learning_rate": 3.0018138297081523e-05, + "loss": 0.1763, + "step": 27380 + }, + { + "epoch": 1.21, + "learning_rate": 3.0010704568769422e-05, + "loss": 0.1535, + "step": 27390 + }, + { + "epoch": 1.21, + "learning_rate": 3.0003270840457325e-05, + "loss": 0.1546, + "step": 27400 + }, + { + "epoch": 1.21, + "learning_rate": 2.9995837112145224e-05, + "loss": 0.1816, + "step": 27410 + }, + { + "epoch": 1.21, + "learning_rate": 2.998840338383313e-05, + "loss": 0.1853, + "step": 27420 + }, + { + "epoch": 1.21, + "learning_rate": 2.998096965552103e-05, + "loss": 0.1627, + "step": 27430 + }, + { + "epoch": 1.21, + "learning_rate": 2.9973535927208935e-05, + "loss": 0.1271, + "step": 27440 + }, + { + "epoch": 1.22, + "learning_rate": 2.9966102198896834e-05, + "loss": 0.1263, + "step": 27450 + }, + { + "epoch": 1.22, + "learning_rate": 2.995866847058474e-05, + "loss": 0.1716, + "step": 27460 + }, + { + "epoch": 1.22, + "learning_rate": 2.9951234742272643e-05, + "loss": 0.1519, + "step": 27470 + }, + { + "epoch": 1.22, + "learning_rate": 2.9943801013960542e-05, + "loss": 0.1317, + "step": 27480 + }, + { + "epoch": 1.22, + "learning_rate": 2.9936367285648448e-05, + "loss": 0.1013, + "step": 27490 + }, + { + "epoch": 1.22, + "learning_rate": 2.9928933557336347e-05, + "loss": 0.2028, + "step": 27500 + }, + { + "epoch": 1.22, + "learning_rate": 2.9921499829024253e-05, + "loss": 0.1973, + "step": 27510 + }, + { + "epoch": 1.22, + "learning_rate": 2.9914066100712152e-05, + "loss": 0.1313, + "step": 27520 + }, + { + "epoch": 1.22, + "learning_rate": 2.9906632372400055e-05, + "loss": 0.1675, + "step": 27530 + }, + { + "epoch": 1.22, + "learning_rate": 2.9899198644087957e-05, + "loss": 0.1857, + "step": 27540 + }, + { + "epoch": 1.22, + "learning_rate": 2.989176491577586e-05, + "loss": 0.1488, + "step": 27550 + }, + { + "epoch": 1.22, + "learning_rate": 2.988433118746376e-05, + "loss": 0.1625, + "step": 27560 + }, + { + "epoch": 1.22, + "learning_rate": 2.9876897459151665e-05, + "loss": 0.1378, + "step": 27570 + }, + { + "epoch": 1.22, + "learning_rate": 2.9869463730839564e-05, + "loss": 0.1336, + "step": 27580 + }, + { + "epoch": 1.22, + "learning_rate": 2.986203000252747e-05, + "loss": 0.1663, + "step": 27590 + }, + { + "epoch": 1.22, + "learning_rate": 2.9854596274215373e-05, + "loss": 0.1614, + "step": 27600 + }, + { + "epoch": 1.22, + "learning_rate": 2.9847162545903272e-05, + "loss": 0.1668, + "step": 27610 + }, + { + "epoch": 1.22, + "learning_rate": 2.9839728817591178e-05, + "loss": 0.1866, + "step": 27620 + }, + { + "epoch": 1.22, + "learning_rate": 2.9832295089279077e-05, + "loss": 0.1175, + "step": 27630 + }, + { + "epoch": 1.22, + "learning_rate": 2.9824861360966983e-05, + "loss": 0.1688, + "step": 27640 + }, + { + "epoch": 1.22, + "learning_rate": 2.9817427632654882e-05, + "loss": 0.1099, + "step": 27650 + }, + { + "epoch": 1.22, + "learning_rate": 2.9809993904342788e-05, + "loss": 0.1259, + "step": 27660 + }, + { + "epoch": 1.23, + "learning_rate": 2.9802560176030687e-05, + "loss": 0.0747, + "step": 27670 + }, + { + "epoch": 1.23, + "learning_rate": 2.979512644771859e-05, + "loss": 0.1823, + "step": 27680 + }, + { + "epoch": 1.23, + "learning_rate": 2.978769271940649e-05, + "loss": 0.0987, + "step": 27690 + }, + { + "epoch": 1.23, + "learning_rate": 2.9780258991094395e-05, + "loss": 0.128, + "step": 27700 + }, + { + "epoch": 1.23, + "learning_rate": 2.9772825262782294e-05, + "loss": 0.1732, + "step": 27710 + }, + { + "epoch": 1.23, + "learning_rate": 2.97653915344702e-05, + "loss": 0.1452, + "step": 27720 + }, + { + "epoch": 1.23, + "learning_rate": 2.9757957806158103e-05, + "loss": 0.1089, + "step": 27730 + }, + { + "epoch": 1.23, + "learning_rate": 2.9750524077846002e-05, + "loss": 0.0966, + "step": 27740 + }, + { + "epoch": 1.23, + "learning_rate": 2.9743090349533908e-05, + "loss": 0.1711, + "step": 27750 + }, + { + "epoch": 1.23, + "learning_rate": 2.9735656621221807e-05, + "loss": 0.1119, + "step": 27760 + }, + { + "epoch": 1.23, + "learning_rate": 2.9728222892909713e-05, + "loss": 0.146, + "step": 27770 + }, + { + "epoch": 1.23, + "learning_rate": 2.9720789164597612e-05, + "loss": 0.108, + "step": 27780 + }, + { + "epoch": 1.23, + "learning_rate": 2.971335543628552e-05, + "loss": 0.1284, + "step": 27790 + }, + { + "epoch": 1.23, + "learning_rate": 2.9705921707973418e-05, + "loss": 0.2195, + "step": 27800 + }, + { + "epoch": 1.23, + "learning_rate": 2.969848797966132e-05, + "loss": 0.1669, + "step": 27810 + }, + { + "epoch": 1.23, + "learning_rate": 2.969105425134922e-05, + "loss": 0.172, + "step": 27820 + }, + { + "epoch": 1.23, + "learning_rate": 2.9683620523037125e-05, + "loss": 0.1462, + "step": 27830 + }, + { + "epoch": 1.23, + "learning_rate": 2.967618679472503e-05, + "loss": 0.1083, + "step": 27840 + }, + { + "epoch": 1.23, + "learning_rate": 2.966875306641293e-05, + "loss": 0.1051, + "step": 27850 + }, + { + "epoch": 1.23, + "learning_rate": 2.9661319338100833e-05, + "loss": 0.1141, + "step": 27860 + }, + { + "epoch": 1.23, + "learning_rate": 2.9653885609788736e-05, + "loss": 0.1023, + "step": 27870 + }, + { + "epoch": 1.23, + "learning_rate": 2.9646451881476638e-05, + "loss": 0.0968, + "step": 27880 + }, + { + "epoch": 1.23, + "learning_rate": 2.9639018153164537e-05, + "loss": 0.1392, + "step": 27890 + }, + { + "epoch": 1.24, + "learning_rate": 2.9631584424852443e-05, + "loss": 0.176, + "step": 27900 + }, + { + "epoch": 1.24, + "learning_rate": 2.9624150696540343e-05, + "loss": 0.1899, + "step": 27910 + }, + { + "epoch": 1.24, + "learning_rate": 2.961671696822825e-05, + "loss": 0.1402, + "step": 27920 + }, + { + "epoch": 1.24, + "learning_rate": 2.9609283239916148e-05, + "loss": 0.1926, + "step": 27930 + }, + { + "epoch": 1.24, + "learning_rate": 2.960184951160405e-05, + "loss": 0.1583, + "step": 27940 + }, + { + "epoch": 1.24, + "learning_rate": 2.959441578329195e-05, + "loss": 0.1736, + "step": 27950 + }, + { + "epoch": 1.24, + "learning_rate": 2.9586982054979855e-05, + "loss": 0.1277, + "step": 27960 + }, + { + "epoch": 1.24, + "learning_rate": 2.957954832666776e-05, + "loss": 0.1751, + "step": 27970 + }, + { + "epoch": 1.24, + "learning_rate": 2.957211459835566e-05, + "loss": 0.1664, + "step": 27980 + }, + { + "epoch": 1.24, + "learning_rate": 2.9564680870043567e-05, + "loss": 0.228, + "step": 27990 + }, + { + "epoch": 1.24, + "learning_rate": 2.9557247141731466e-05, + "loss": 0.1517, + "step": 28000 + }, + { + "epoch": 1.24, + "learning_rate": 2.954981341341937e-05, + "loss": 0.1651, + "step": 28010 + }, + { + "epoch": 1.24, + "learning_rate": 2.9542379685107268e-05, + "loss": 0.1372, + "step": 28020 + }, + { + "epoch": 1.24, + "learning_rate": 2.9534945956795174e-05, + "loss": 0.1618, + "step": 28030 + }, + { + "epoch": 1.24, + "learning_rate": 2.9527512228483073e-05, + "loss": 0.148, + "step": 28040 + }, + { + "epoch": 1.24, + "learning_rate": 2.952007850017098e-05, + "loss": 0.132, + "step": 28050 + }, + { + "epoch": 1.24, + "learning_rate": 2.9512644771858878e-05, + "loss": 0.1459, + "step": 28060 + }, + { + "epoch": 1.24, + "learning_rate": 2.950521104354678e-05, + "loss": 0.1457, + "step": 28070 + }, + { + "epoch": 1.24, + "learning_rate": 2.9497777315234683e-05, + "loss": 0.1133, + "step": 28080 + }, + { + "epoch": 1.24, + "learning_rate": 2.9490343586922586e-05, + "loss": 0.1786, + "step": 28090 + }, + { + "epoch": 1.24, + "learning_rate": 2.948290985861049e-05, + "loss": 0.1394, + "step": 28100 + }, + { + "epoch": 1.24, + "learning_rate": 2.947547613029839e-05, + "loss": 0.1536, + "step": 28110 + }, + { + "epoch": 1.24, + "learning_rate": 2.9468042401986297e-05, + "loss": 0.1471, + "step": 28120 + }, + { + "epoch": 1.25, + "learning_rate": 2.9460608673674196e-05, + "loss": 0.1265, + "step": 28130 + }, + { + "epoch": 1.25, + "learning_rate": 2.94531749453621e-05, + "loss": 0.1522, + "step": 28140 + }, + { + "epoch": 1.25, + "learning_rate": 2.9445741217049998e-05, + "loss": 0.1506, + "step": 28150 + }, + { + "epoch": 1.25, + "learning_rate": 2.9438307488737904e-05, + "loss": 0.1343, + "step": 28160 + }, + { + "epoch": 1.25, + "learning_rate": 2.9430873760425803e-05, + "loss": 0.1578, + "step": 28170 + }, + { + "epoch": 1.25, + "learning_rate": 2.942344003211371e-05, + "loss": 0.2303, + "step": 28180 + }, + { + "epoch": 1.25, + "learning_rate": 2.9416006303801608e-05, + "loss": 0.151, + "step": 28190 + }, + { + "epoch": 1.25, + "learning_rate": 2.9408572575489514e-05, + "loss": 0.1878, + "step": 28200 + }, + { + "epoch": 1.25, + "learning_rate": 2.9401138847177417e-05, + "loss": 0.1804, + "step": 28210 + }, + { + "epoch": 1.25, + "learning_rate": 2.9393705118865316e-05, + "loss": 0.1493, + "step": 28220 + }, + { + "epoch": 1.25, + "learning_rate": 2.9386271390553222e-05, + "loss": 0.1496, + "step": 28230 + }, + { + "epoch": 1.25, + "learning_rate": 2.937883766224112e-05, + "loss": 0.1679, + "step": 28240 + }, + { + "epoch": 1.25, + "learning_rate": 2.9371403933929027e-05, + "loss": 0.1547, + "step": 28250 + }, + { + "epoch": 1.25, + "learning_rate": 2.9363970205616926e-05, + "loss": 0.1191, + "step": 28260 + }, + { + "epoch": 1.25, + "learning_rate": 2.935653647730483e-05, + "loss": 0.1048, + "step": 28270 + }, + { + "epoch": 1.25, + "learning_rate": 2.9349102748992728e-05, + "loss": 0.1921, + "step": 28280 + }, + { + "epoch": 1.25, + "learning_rate": 2.9341669020680634e-05, + "loss": 0.165, + "step": 28290 + }, + { + "epoch": 1.25, + "learning_rate": 2.9334235292368533e-05, + "loss": 0.1804, + "step": 28300 + }, + { + "epoch": 1.25, + "learning_rate": 2.932680156405644e-05, + "loss": 0.1343, + "step": 28310 + }, + { + "epoch": 1.25, + "learning_rate": 2.9319367835744338e-05, + "loss": 0.1738, + "step": 28320 + }, + { + "epoch": 1.25, + "learning_rate": 2.9311934107432244e-05, + "loss": 0.143, + "step": 28330 + }, + { + "epoch": 1.25, + "learning_rate": 2.9304500379120147e-05, + "loss": 0.1312, + "step": 28340 + }, + { + "epoch": 1.26, + "learning_rate": 2.9297066650808046e-05, + "loss": 0.2108, + "step": 28350 + }, + { + "epoch": 1.26, + "learning_rate": 2.9289632922495952e-05, + "loss": 0.1524, + "step": 28360 + }, + { + "epoch": 1.26, + "learning_rate": 2.928219919418385e-05, + "loss": 0.1661, + "step": 28370 + }, + { + "epoch": 1.26, + "learning_rate": 2.9274765465871757e-05, + "loss": 0.0959, + "step": 28380 + }, + { + "epoch": 1.26, + "learning_rate": 2.9267331737559656e-05, + "loss": 0.1514, + "step": 28390 + }, + { + "epoch": 1.26, + "learning_rate": 2.9259898009247562e-05, + "loss": 0.19, + "step": 28400 + }, + { + "epoch": 1.26, + "learning_rate": 2.925246428093546e-05, + "loss": 0.1753, + "step": 28410 + }, + { + "epoch": 1.26, + "learning_rate": 2.9245030552623364e-05, + "loss": 0.114, + "step": 28420 + }, + { + "epoch": 1.26, + "learning_rate": 2.9237596824311263e-05, + "loss": 0.1178, + "step": 28430 + }, + { + "epoch": 1.26, + "learning_rate": 2.923016309599917e-05, + "loss": 0.1249, + "step": 28440 + }, + { + "epoch": 1.26, + "learning_rate": 2.9222729367687075e-05, + "loss": 0.1811, + "step": 28450 + }, + { + "epoch": 1.26, + "learning_rate": 2.9215295639374974e-05, + "loss": 0.2, + "step": 28460 + }, + { + "epoch": 1.26, + "learning_rate": 2.9207861911062877e-05, + "loss": 0.1776, + "step": 28470 + }, + { + "epoch": 1.26, + "learning_rate": 2.9200428182750776e-05, + "loss": 0.1257, + "step": 28480 + }, + { + "epoch": 1.26, + "learning_rate": 2.9192994454438682e-05, + "loss": 0.0987, + "step": 28490 + }, + { + "epoch": 1.26, + "learning_rate": 2.918556072612658e-05, + "loss": 0.1029, + "step": 28500 + }, + { + "epoch": 1.26, + "learning_rate": 2.9178126997814487e-05, + "loss": 0.1136, + "step": 28510 + }, + { + "epoch": 1.26, + "learning_rate": 2.9170693269502386e-05, + "loss": 0.1192, + "step": 28520 + }, + { + "epoch": 1.26, + "learning_rate": 2.9163259541190292e-05, + "loss": 0.159, + "step": 28530 + }, + { + "epoch": 1.26, + "learning_rate": 2.915582581287819e-05, + "loss": 0.1786, + "step": 28540 + }, + { + "epoch": 1.26, + "learning_rate": 2.9148392084566094e-05, + "loss": 0.1563, + "step": 28550 + }, + { + "epoch": 1.26, + "learning_rate": 2.9140958356253993e-05, + "loss": 0.1409, + "step": 28560 + }, + { + "epoch": 1.26, + "learning_rate": 2.91335246279419e-05, + "loss": 0.2141, + "step": 28570 + }, + { + "epoch": 1.27, + "learning_rate": 2.9126090899629805e-05, + "loss": 0.1775, + "step": 28580 + }, + { + "epoch": 1.27, + "learning_rate": 2.9118657171317704e-05, + "loss": 0.0956, + "step": 28590 + }, + { + "epoch": 1.27, + "learning_rate": 2.9111223443005607e-05, + "loss": 0.1255, + "step": 28600 + }, + { + "epoch": 1.27, + "learning_rate": 2.910378971469351e-05, + "loss": 0.127, + "step": 28610 + }, + { + "epoch": 1.27, + "learning_rate": 2.9096355986381412e-05, + "loss": 0.1323, + "step": 28620 + }, + { + "epoch": 1.27, + "learning_rate": 2.908892225806931e-05, + "loss": 0.1948, + "step": 28630 + }, + { + "epoch": 1.27, + "learning_rate": 2.9081488529757217e-05, + "loss": 0.1184, + "step": 28640 + }, + { + "epoch": 1.27, + "learning_rate": 2.9074054801445117e-05, + "loss": 0.1966, + "step": 28650 + }, + { + "epoch": 1.27, + "learning_rate": 2.9066621073133022e-05, + "loss": 0.2312, + "step": 28660 + }, + { + "epoch": 1.27, + "learning_rate": 2.905918734482092e-05, + "loss": 0.1325, + "step": 28670 + }, + { + "epoch": 1.27, + "learning_rate": 2.9051753616508824e-05, + "loss": 0.1338, + "step": 28680 + }, + { + "epoch": 1.27, + "learning_rate": 2.9044319888196723e-05, + "loss": 0.1895, + "step": 28690 + }, + { + "epoch": 1.27, + "learning_rate": 2.903688615988463e-05, + "loss": 0.1534, + "step": 28700 + }, + { + "epoch": 1.27, + "learning_rate": 2.9029452431572535e-05, + "loss": 0.1418, + "step": 28710 + }, + { + "epoch": 1.27, + "learning_rate": 2.9022018703260435e-05, + "loss": 0.1474, + "step": 28720 + }, + { + "epoch": 1.27, + "learning_rate": 2.901458497494834e-05, + "loss": 0.1203, + "step": 28730 + }, + { + "epoch": 1.27, + "learning_rate": 2.900715124663624e-05, + "loss": 0.1611, + "step": 28740 + }, + { + "epoch": 1.27, + "learning_rate": 2.8999717518324142e-05, + "loss": 0.1609, + "step": 28750 + }, + { + "epoch": 1.27, + "learning_rate": 2.899228379001204e-05, + "loss": 0.1956, + "step": 28760 + }, + { + "epoch": 1.27, + "learning_rate": 2.8984850061699947e-05, + "loss": 0.1295, + "step": 28770 + }, + { + "epoch": 1.27, + "learning_rate": 2.8977416333387847e-05, + "loss": 0.1835, + "step": 28780 + }, + { + "epoch": 1.27, + "learning_rate": 2.8969982605075753e-05, + "loss": 0.1553, + "step": 28790 + }, + { + "epoch": 1.28, + "learning_rate": 2.8962548876763652e-05, + "loss": 0.2381, + "step": 28800 + }, + { + "epoch": 1.28, + "learning_rate": 2.8955115148451554e-05, + "loss": 0.1798, + "step": 28810 + }, + { + "epoch": 1.28, + "learning_rate": 2.894768142013946e-05, + "loss": 0.1094, + "step": 28820 + }, + { + "epoch": 1.28, + "learning_rate": 2.894024769182736e-05, + "loss": 0.1467, + "step": 28830 + }, + { + "epoch": 1.28, + "learning_rate": 2.8932813963515266e-05, + "loss": 0.1326, + "step": 28840 + }, + { + "epoch": 1.28, + "learning_rate": 2.8925380235203165e-05, + "loss": 0.1006, + "step": 28850 + }, + { + "epoch": 1.28, + "learning_rate": 2.891794650689107e-05, + "loss": 0.1231, + "step": 28860 + }, + { + "epoch": 1.28, + "learning_rate": 2.891051277857897e-05, + "loss": 0.2157, + "step": 28870 + }, + { + "epoch": 1.28, + "learning_rate": 2.8903079050266872e-05, + "loss": 0.1836, + "step": 28880 + }, + { + "epoch": 1.28, + "learning_rate": 2.889564532195477e-05, + "loss": 0.113, + "step": 28890 + }, + { + "epoch": 1.28, + "learning_rate": 2.8888211593642678e-05, + "loss": 0.1742, + "step": 28900 + }, + { + "epoch": 1.28, + "learning_rate": 2.8880777865330577e-05, + "loss": 0.1752, + "step": 28910 + }, + { + "epoch": 1.28, + "learning_rate": 2.8873344137018483e-05, + "loss": 0.1503, + "step": 28920 + }, + { + "epoch": 1.28, + "learning_rate": 2.8865910408706382e-05, + "loss": 0.1529, + "step": 28930 + }, + { + "epoch": 1.28, + "learning_rate": 2.8858476680394288e-05, + "loss": 0.0983, + "step": 28940 + }, + { + "epoch": 1.28, + "learning_rate": 2.885104295208219e-05, + "loss": 0.1762, + "step": 28950 + }, + { + "epoch": 1.28, + "learning_rate": 2.884360922377009e-05, + "loss": 0.1619, + "step": 28960 + }, + { + "epoch": 1.28, + "learning_rate": 2.8836175495457996e-05, + "loss": 0.133, + "step": 28970 + }, + { + "epoch": 1.28, + "learning_rate": 2.8828741767145895e-05, + "loss": 0.173, + "step": 28980 + }, + { + "epoch": 1.28, + "learning_rate": 2.88213080388338e-05, + "loss": 0.182, + "step": 28990 + }, + { + "epoch": 1.28, + "learning_rate": 2.88138743105217e-05, + "loss": 0.1242, + "step": 29000 + }, + { + "epoch": 1.28, + "learning_rate": 2.8806440582209603e-05, + "loss": 0.1322, + "step": 29010 + }, + { + "epoch": 1.28, + "learning_rate": 2.8799006853897502e-05, + "loss": 0.1656, + "step": 29020 + }, + { + "epoch": 1.29, + "learning_rate": 2.8791573125585408e-05, + "loss": 0.1553, + "step": 29030 + }, + { + "epoch": 1.29, + "learning_rate": 2.8784139397273307e-05, + "loss": 0.1048, + "step": 29040 + }, + { + "epoch": 1.29, + "learning_rate": 2.8776705668961213e-05, + "loss": 0.138, + "step": 29050 + }, + { + "epoch": 1.29, + "learning_rate": 2.8769271940649112e-05, + "loss": 0.1399, + "step": 29060 + }, + { + "epoch": 1.29, + "learning_rate": 2.8761838212337018e-05, + "loss": 0.1251, + "step": 29070 + }, + { + "epoch": 1.29, + "learning_rate": 2.875440448402492e-05, + "loss": 0.1519, + "step": 29080 + }, + { + "epoch": 1.29, + "learning_rate": 2.874697075571282e-05, + "loss": 0.131, + "step": 29090 + }, + { + "epoch": 1.29, + "learning_rate": 2.8739537027400726e-05, + "loss": 0.2176, + "step": 29100 + }, + { + "epoch": 1.29, + "learning_rate": 2.8732103299088625e-05, + "loss": 0.1094, + "step": 29110 + }, + { + "epoch": 1.29, + "learning_rate": 2.872466957077653e-05, + "loss": 0.0992, + "step": 29120 + }, + { + "epoch": 1.29, + "learning_rate": 2.871723584246443e-05, + "loss": 0.1838, + "step": 29130 + }, + { + "epoch": 1.29, + "learning_rate": 2.8709802114152333e-05, + "loss": 0.1372, + "step": 29140 + }, + { + "epoch": 1.29, + "learning_rate": 2.8702368385840235e-05, + "loss": 0.1149, + "step": 29150 + }, + { + "epoch": 1.29, + "learning_rate": 2.8694934657528138e-05, + "loss": 0.1251, + "step": 29160 + }, + { + "epoch": 1.29, + "learning_rate": 2.8687500929216037e-05, + "loss": 0.1378, + "step": 29170 + }, + { + "epoch": 1.29, + "learning_rate": 2.8680067200903943e-05, + "loss": 0.1246, + "step": 29180 + }, + { + "epoch": 1.29, + "learning_rate": 2.867263347259185e-05, + "loss": 0.1309, + "step": 29190 + }, + { + "epoch": 1.29, + "learning_rate": 2.8665199744279748e-05, + "loss": 0.2031, + "step": 29200 + }, + { + "epoch": 1.29, + "learning_rate": 2.865776601596765e-05, + "loss": 0.15, + "step": 29210 + }, + { + "epoch": 1.29, + "learning_rate": 2.865033228765555e-05, + "loss": 0.1698, + "step": 29220 + }, + { + "epoch": 1.29, + "learning_rate": 2.8642898559343456e-05, + "loss": 0.1232, + "step": 29230 + }, + { + "epoch": 1.29, + "learning_rate": 2.8635464831031355e-05, + "loss": 0.1104, + "step": 29240 + }, + { + "epoch": 1.29, + "learning_rate": 2.862803110271926e-05, + "loss": 0.1249, + "step": 29250 + }, + { + "epoch": 1.3, + "learning_rate": 2.862059737440716e-05, + "loss": 0.2162, + "step": 29260 + }, + { + "epoch": 1.3, + "learning_rate": 2.8613163646095066e-05, + "loss": 0.1566, + "step": 29270 + }, + { + "epoch": 1.3, + "learning_rate": 2.8605729917782965e-05, + "loss": 0.1291, + "step": 29280 + }, + { + "epoch": 1.3, + "learning_rate": 2.8598296189470868e-05, + "loss": 0.1239, + "step": 29290 + }, + { + "epoch": 1.3, + "learning_rate": 2.8590862461158767e-05, + "loss": 0.1275, + "step": 29300 + }, + { + "epoch": 1.3, + "learning_rate": 2.8583428732846673e-05, + "loss": 0.1484, + "step": 29310 + }, + { + "epoch": 1.3, + "learning_rate": 2.857599500453458e-05, + "loss": 0.1869, + "step": 29320 + }, + { + "epoch": 1.3, + "learning_rate": 2.856856127622248e-05, + "loss": 0.1464, + "step": 29330 + }, + { + "epoch": 1.3, + "learning_rate": 2.856112754791038e-05, + "loss": 0.1524, + "step": 29340 + }, + { + "epoch": 1.3, + "learning_rate": 2.855369381959828e-05, + "loss": 0.1396, + "step": 29350 + }, + { + "epoch": 1.3, + "learning_rate": 2.8546260091286186e-05, + "loss": 0.1517, + "step": 29360 + }, + { + "epoch": 1.3, + "learning_rate": 2.8538826362974085e-05, + "loss": 0.1361, + "step": 29370 + }, + { + "epoch": 1.3, + "learning_rate": 2.853139263466199e-05, + "loss": 0.1123, + "step": 29380 + }, + { + "epoch": 1.3, + "learning_rate": 2.852395890634989e-05, + "loss": 0.1304, + "step": 29390 + }, + { + "epoch": 1.3, + "learning_rate": 2.8516525178037796e-05, + "loss": 0.1495, + "step": 29400 + }, + { + "epoch": 1.3, + "learning_rate": 2.8509091449725696e-05, + "loss": 0.1318, + "step": 29410 + }, + { + "epoch": 1.3, + "learning_rate": 2.8501657721413598e-05, + "loss": 0.1365, + "step": 29420 + }, + { + "epoch": 1.3, + "learning_rate": 2.8494223993101504e-05, + "loss": 0.1668, + "step": 29430 + }, + { + "epoch": 1.3, + "learning_rate": 2.8486790264789403e-05, + "loss": 0.1423, + "step": 29440 + }, + { + "epoch": 1.3, + "learning_rate": 2.847935653647731e-05, + "loss": 0.1917, + "step": 29450 + }, + { + "epoch": 1.3, + "learning_rate": 2.847192280816521e-05, + "loss": 0.103, + "step": 29460 + }, + { + "epoch": 1.3, + "learning_rate": 2.846448907985311e-05, + "loss": 0.1131, + "step": 29470 + }, + { + "epoch": 1.31, + "learning_rate": 2.8457055351541014e-05, + "loss": 0.1179, + "step": 29480 + }, + { + "epoch": 1.31, + "learning_rate": 2.8449621623228916e-05, + "loss": 0.1601, + "step": 29490 + }, + { + "epoch": 1.31, + "learning_rate": 2.8442187894916815e-05, + "loss": 0.1887, + "step": 29500 + }, + { + "epoch": 1.31, + "learning_rate": 2.843475416660472e-05, + "loss": 0.117, + "step": 29510 + }, + { + "epoch": 1.31, + "learning_rate": 2.842732043829262e-05, + "loss": 0.1479, + "step": 29520 + }, + { + "epoch": 1.31, + "learning_rate": 2.8419886709980527e-05, + "loss": 0.142, + "step": 29530 + }, + { + "epoch": 1.31, + "learning_rate": 2.8412452981668426e-05, + "loss": 0.1185, + "step": 29540 + }, + { + "epoch": 1.31, + "learning_rate": 2.840501925335633e-05, + "loss": 0.1911, + "step": 29550 + }, + { + "epoch": 1.31, + "learning_rate": 2.8397585525044234e-05, + "loss": 0.1494, + "step": 29560 + }, + { + "epoch": 1.31, + "learning_rate": 2.8390151796732134e-05, + "loss": 0.1559, + "step": 29570 + }, + { + "epoch": 1.31, + "learning_rate": 2.838271806842004e-05, + "loss": 0.1829, + "step": 29580 + }, + { + "epoch": 1.31, + "learning_rate": 2.837528434010794e-05, + "loss": 0.145, + "step": 29590 + }, + { + "epoch": 1.31, + "learning_rate": 2.8367850611795845e-05, + "loss": 0.1164, + "step": 29600 + }, + { + "epoch": 1.31, + "learning_rate": 2.8360416883483744e-05, + "loss": 0.1135, + "step": 29610 + }, + { + "epoch": 1.31, + "learning_rate": 2.8352983155171646e-05, + "loss": 0.1336, + "step": 29620 + }, + { + "epoch": 1.31, + "learning_rate": 2.8345549426859546e-05, + "loss": 0.1268, + "step": 29630 + }, + { + "epoch": 1.31, + "learning_rate": 2.833811569854745e-05, + "loss": 0.1778, + "step": 29640 + }, + { + "epoch": 1.31, + "learning_rate": 2.833068197023535e-05, + "loss": 0.1086, + "step": 29650 + }, + { + "epoch": 1.31, + "learning_rate": 2.8323248241923257e-05, + "loss": 0.1658, + "step": 29660 + }, + { + "epoch": 1.31, + "learning_rate": 2.8315814513611156e-05, + "loss": 0.1615, + "step": 29670 + }, + { + "epoch": 1.31, + "learning_rate": 2.830838078529906e-05, + "loss": 0.1557, + "step": 29680 + }, + { + "epoch": 1.31, + "learning_rate": 2.8300947056986964e-05, + "loss": 0.1267, + "step": 29690 + }, + { + "epoch": 1.31, + "learning_rate": 2.8293513328674864e-05, + "loss": 0.1803, + "step": 29700 + }, + { + "epoch": 1.32, + "learning_rate": 2.828607960036277e-05, + "loss": 0.1654, + "step": 29710 + }, + { + "epoch": 1.32, + "learning_rate": 2.827864587205067e-05, + "loss": 0.1137, + "step": 29720 + }, + { + "epoch": 1.32, + "learning_rate": 2.8271212143738575e-05, + "loss": 0.144, + "step": 29730 + }, + { + "epoch": 1.32, + "learning_rate": 2.8263778415426474e-05, + "loss": 0.2144, + "step": 29740 + }, + { + "epoch": 1.32, + "learning_rate": 2.8256344687114377e-05, + "loss": 0.1246, + "step": 29750 + }, + { + "epoch": 1.32, + "learning_rate": 2.8248910958802276e-05, + "loss": 0.1245, + "step": 29760 + }, + { + "epoch": 1.32, + "learning_rate": 2.8241477230490182e-05, + "loss": 0.1687, + "step": 29770 + }, + { + "epoch": 1.32, + "learning_rate": 2.823404350217808e-05, + "loss": 0.1618, + "step": 29780 + }, + { + "epoch": 1.32, + "learning_rate": 2.8226609773865987e-05, + "loss": 0.1597, + "step": 29790 + }, + { + "epoch": 1.32, + "learning_rate": 2.8219176045553893e-05, + "loss": 0.1182, + "step": 29800 + }, + { + "epoch": 1.32, + "learning_rate": 2.8211742317241792e-05, + "loss": 0.0865, + "step": 29810 + }, + { + "epoch": 1.32, + "learning_rate": 2.8204308588929695e-05, + "loss": 0.0989, + "step": 29820 + }, + { + "epoch": 1.32, + "learning_rate": 2.8196874860617594e-05, + "loss": 0.163, + "step": 29830 + }, + { + "epoch": 1.32, + "learning_rate": 2.81894411323055e-05, + "loss": 0.1251, + "step": 29840 + }, + { + "epoch": 1.32, + "learning_rate": 2.81820074039934e-05, + "loss": 0.1823, + "step": 29850 + }, + { + "epoch": 1.32, + "learning_rate": 2.8174573675681305e-05, + "loss": 0.1603, + "step": 29860 + }, + { + "epoch": 1.32, + "learning_rate": 2.8167139947369204e-05, + "loss": 0.1301, + "step": 29870 + }, + { + "epoch": 1.32, + "learning_rate": 2.8159706219057107e-05, + "loss": 0.1654, + "step": 29880 + }, + { + "epoch": 1.32, + "learning_rate": 2.8152272490745006e-05, + "loss": 0.1133, + "step": 29890 + }, + { + "epoch": 1.32, + "learning_rate": 2.8144838762432912e-05, + "loss": 0.1784, + "step": 29900 + }, + { + "epoch": 1.32, + "learning_rate": 2.813740503412081e-05, + "loss": 0.1527, + "step": 29910 + }, + { + "epoch": 1.32, + "learning_rate": 2.8129971305808717e-05, + "loss": 0.1273, + "step": 29920 + }, + { + "epoch": 1.33, + "learning_rate": 2.8122537577496623e-05, + "loss": 0.105, + "step": 29930 + }, + { + "epoch": 1.33, + "learning_rate": 2.8115103849184522e-05, + "loss": 0.1594, + "step": 29940 + }, + { + "epoch": 1.33, + "learning_rate": 2.8107670120872425e-05, + "loss": 0.1126, + "step": 29950 + }, + { + "epoch": 1.33, + "learning_rate": 2.8100236392560324e-05, + "loss": 0.1643, + "step": 29960 + }, + { + "epoch": 1.33, + "learning_rate": 2.809280266424823e-05, + "loss": 0.1141, + "step": 29970 + }, + { + "epoch": 1.33, + "learning_rate": 2.808536893593613e-05, + "loss": 0.1526, + "step": 29980 + }, + { + "epoch": 1.33, + "learning_rate": 2.8077935207624035e-05, + "loss": 0.124, + "step": 29990 + }, + { + "epoch": 1.33, + "learning_rate": 2.8070501479311934e-05, + "loss": 0.1584, + "step": 30000 + }, + { + "epoch": 1.33, + "learning_rate": 2.806306775099984e-05, + "loss": 0.1713, + "step": 30010 + }, + { + "epoch": 1.33, + "learning_rate": 2.805563402268774e-05, + "loss": 0.1751, + "step": 30020 + }, + { + "epoch": 1.33, + "learning_rate": 2.8048200294375642e-05, + "loss": 0.1216, + "step": 30030 + }, + { + "epoch": 1.33, + "learning_rate": 2.804076656606354e-05, + "loss": 0.1417, + "step": 30040 + }, + { + "epoch": 1.33, + "learning_rate": 2.8033332837751447e-05, + "loss": 0.1366, + "step": 30050 + }, + { + "epoch": 1.33, + "learning_rate": 2.8025899109439353e-05, + "loss": 0.1571, + "step": 30060 + }, + { + "epoch": 1.33, + "learning_rate": 2.8018465381127252e-05, + "loss": 0.1377, + "step": 30070 + }, + { + "epoch": 1.33, + "learning_rate": 2.8011031652815155e-05, + "loss": 0.1291, + "step": 30080 + }, + { + "epoch": 1.33, + "learning_rate": 2.8003597924503054e-05, + "loss": 0.1121, + "step": 30090 + }, + { + "epoch": 1.33, + "learning_rate": 2.799616419619096e-05, + "loss": 0.1911, + "step": 30100 + }, + { + "epoch": 1.33, + "learning_rate": 2.798873046787886e-05, + "loss": 0.2248, + "step": 30110 + }, + { + "epoch": 1.33, + "learning_rate": 2.7981296739566765e-05, + "loss": 0.1465, + "step": 30120 + }, + { + "epoch": 1.33, + "learning_rate": 2.7973863011254664e-05, + "loss": 0.1543, + "step": 30130 + }, + { + "epoch": 1.33, + "learning_rate": 2.796642928294257e-05, + "loss": 0.1183, + "step": 30140 + }, + { + "epoch": 1.33, + "learning_rate": 2.795899555463047e-05, + "loss": 0.1452, + "step": 30150 + }, + { + "epoch": 1.34, + "learning_rate": 2.7951561826318372e-05, + "loss": 0.1329, + "step": 30160 + }, + { + "epoch": 1.34, + "learning_rate": 2.7944128098006278e-05, + "loss": 0.1074, + "step": 30170 + }, + { + "epoch": 1.34, + "learning_rate": 2.7936694369694177e-05, + "loss": 0.1919, + "step": 30180 + }, + { + "epoch": 1.34, + "learning_rate": 2.7929260641382083e-05, + "loss": 0.1689, + "step": 30190 + }, + { + "epoch": 1.34, + "learning_rate": 2.7921826913069982e-05, + "loss": 0.2, + "step": 30200 + }, + { + "epoch": 1.34, + "learning_rate": 2.7914393184757885e-05, + "loss": 0.1253, + "step": 30210 + }, + { + "epoch": 1.34, + "learning_rate": 2.7906959456445784e-05, + "loss": 0.1472, + "step": 30220 + }, + { + "epoch": 1.34, + "learning_rate": 2.789952572813369e-05, + "loss": 0.1721, + "step": 30230 + }, + { + "epoch": 1.34, + "learning_rate": 2.789209199982159e-05, + "loss": 0.1381, + "step": 30240 + }, + { + "epoch": 1.34, + "learning_rate": 2.7884658271509495e-05, + "loss": 0.0811, + "step": 30250 + }, + { + "epoch": 1.34, + "learning_rate": 2.7877224543197395e-05, + "loss": 0.125, + "step": 30260 + }, + { + "epoch": 1.34, + "learning_rate": 2.78697908148853e-05, + "loss": 0.1221, + "step": 30270 + }, + { + "epoch": 1.34, + "learning_rate": 2.78623570865732e-05, + "loss": 0.134, + "step": 30280 + }, + { + "epoch": 1.34, + "learning_rate": 2.7854923358261102e-05, + "loss": 0.1521, + "step": 30290 + }, + { + "epoch": 1.34, + "learning_rate": 2.7847489629949008e-05, + "loss": 0.1774, + "step": 30300 + }, + { + "epoch": 1.34, + "learning_rate": 2.7840055901636907e-05, + "loss": 0.1585, + "step": 30310 + }, + { + "epoch": 1.34, + "learning_rate": 2.7832622173324813e-05, + "loss": 0.159, + "step": 30320 + }, + { + "epoch": 1.34, + "learning_rate": 2.7825188445012713e-05, + "loss": 0.1729, + "step": 30330 + }, + { + "epoch": 1.34, + "learning_rate": 2.781775471670062e-05, + "loss": 0.1558, + "step": 30340 + }, + { + "epoch": 1.34, + "learning_rate": 2.7810320988388518e-05, + "loss": 0.0943, + "step": 30350 + }, + { + "epoch": 1.34, + "learning_rate": 2.780288726007642e-05, + "loss": 0.2137, + "step": 30360 + }, + { + "epoch": 1.34, + "learning_rate": 2.779545353176432e-05, + "loss": 0.1246, + "step": 30370 + }, + { + "epoch": 1.35, + "learning_rate": 2.7788019803452225e-05, + "loss": 0.1222, + "step": 30380 + }, + { + "epoch": 1.35, + "learning_rate": 2.7780586075140125e-05, + "loss": 0.2231, + "step": 30390 + }, + { + "epoch": 1.35, + "learning_rate": 2.777315234682803e-05, + "loss": 0.1606, + "step": 30400 + }, + { + "epoch": 1.35, + "learning_rate": 2.776571861851593e-05, + "loss": 0.1054, + "step": 30410 + }, + { + "epoch": 1.35, + "learning_rate": 2.7758284890203832e-05, + "loss": 0.208, + "step": 30420 + }, + { + "epoch": 1.35, + "learning_rate": 2.775085116189174e-05, + "loss": 0.1259, + "step": 30430 + }, + { + "epoch": 1.35, + "learning_rate": 2.7743417433579638e-05, + "loss": 0.1513, + "step": 30440 + }, + { + "epoch": 1.35, + "learning_rate": 2.7735983705267544e-05, + "loss": 0.155, + "step": 30450 + }, + { + "epoch": 1.35, + "learning_rate": 2.7728549976955443e-05, + "loss": 0.1968, + "step": 30460 + }, + { + "epoch": 1.35, + "learning_rate": 2.772111624864335e-05, + "loss": 0.1215, + "step": 30470 + }, + { + "epoch": 1.35, + "learning_rate": 2.7713682520331248e-05, + "loss": 0.1343, + "step": 30480 + }, + { + "epoch": 1.35, + "learning_rate": 2.770624879201915e-05, + "loss": 0.1163, + "step": 30490 + }, + { + "epoch": 1.35, + "learning_rate": 2.769881506370705e-05, + "loss": 0.1898, + "step": 30500 + }, + { + "epoch": 1.35, + "learning_rate": 2.7691381335394956e-05, + "loss": 0.1705, + "step": 30510 + }, + { + "epoch": 1.35, + "learning_rate": 2.7683947607082855e-05, + "loss": 0.1185, + "step": 30520 + }, + { + "epoch": 1.35, + "learning_rate": 2.767651387877076e-05, + "loss": 0.1668, + "step": 30530 + }, + { + "epoch": 1.35, + "learning_rate": 2.7669080150458663e-05, + "loss": 0.1074, + "step": 30540 + }, + { + "epoch": 1.35, + "learning_rate": 2.7661646422146566e-05, + "loss": 0.2155, + "step": 30550 + }, + { + "epoch": 1.35, + "learning_rate": 2.765421269383447e-05, + "loss": 0.123, + "step": 30560 + }, + { + "epoch": 1.35, + "learning_rate": 2.7646778965522368e-05, + "loss": 0.1226, + "step": 30570 + }, + { + "epoch": 1.35, + "learning_rate": 2.7639345237210274e-05, + "loss": 0.1137, + "step": 30580 + }, + { + "epoch": 1.35, + "learning_rate": 2.7631911508898173e-05, + "loss": 0.1528, + "step": 30590 + }, + { + "epoch": 1.35, + "learning_rate": 2.762447778058608e-05, + "loss": 0.1295, + "step": 30600 + }, + { + "epoch": 1.36, + "learning_rate": 2.7617044052273978e-05, + "loss": 0.2394, + "step": 30610 + }, + { + "epoch": 1.36, + "learning_rate": 2.760961032396188e-05, + "loss": 0.1601, + "step": 30620 + }, + { + "epoch": 1.36, + "learning_rate": 2.760217659564978e-05, + "loss": 0.1679, + "step": 30630 + }, + { + "epoch": 1.36, + "learning_rate": 2.7594742867337686e-05, + "loss": 0.1367, + "step": 30640 + }, + { + "epoch": 1.36, + "learning_rate": 2.7587309139025585e-05, + "loss": 0.1729, + "step": 30650 + }, + { + "epoch": 1.36, + "learning_rate": 2.757987541071349e-05, + "loss": 0.2015, + "step": 30660 + }, + { + "epoch": 1.36, + "learning_rate": 2.7572441682401397e-05, + "loss": 0.1668, + "step": 30670 + }, + { + "epoch": 1.36, + "learning_rate": 2.7565007954089296e-05, + "loss": 0.1915, + "step": 30680 + }, + { + "epoch": 1.36, + "learning_rate": 2.75575742257772e-05, + "loss": 0.1512, + "step": 30690 + }, + { + "epoch": 1.36, + "learning_rate": 2.7550140497465098e-05, + "loss": 0.1377, + "step": 30700 + }, + { + "epoch": 1.36, + "learning_rate": 2.7542706769153004e-05, + "loss": 0.1612, + "step": 30710 + }, + { + "epoch": 1.36, + "learning_rate": 2.7535273040840903e-05, + "loss": 0.1691, + "step": 30720 + }, + { + "epoch": 1.36, + "learning_rate": 2.752783931252881e-05, + "loss": 0.1558, + "step": 30730 + }, + { + "epoch": 1.36, + "learning_rate": 2.7520405584216708e-05, + "loss": 0.1721, + "step": 30740 + }, + { + "epoch": 1.36, + "learning_rate": 2.751297185590461e-05, + "loss": 0.1315, + "step": 30750 + }, + { + "epoch": 1.36, + "learning_rate": 2.7505538127592513e-05, + "loss": 0.1414, + "step": 30760 + }, + { + "epoch": 1.36, + "learning_rate": 2.7498104399280416e-05, + "loss": 0.1361, + "step": 30770 + }, + { + "epoch": 1.36, + "learning_rate": 2.7490670670968322e-05, + "loss": 0.1368, + "step": 30780 + }, + { + "epoch": 1.36, + "learning_rate": 2.748323694265622e-05, + "loss": 0.134, + "step": 30790 + }, + { + "epoch": 1.36, + "learning_rate": 2.7475803214344127e-05, + "loss": 0.1212, + "step": 30800 + }, + { + "epoch": 1.36, + "learning_rate": 2.7468369486032026e-05, + "loss": 0.1131, + "step": 30810 + }, + { + "epoch": 1.36, + "learning_rate": 2.746093575771993e-05, + "loss": 0.1663, + "step": 30820 + }, + { + "epoch": 1.36, + "learning_rate": 2.7453502029407828e-05, + "loss": 0.1479, + "step": 30830 + }, + { + "epoch": 1.37, + "learning_rate": 2.7446068301095734e-05, + "loss": 0.1667, + "step": 30840 + }, + { + "epoch": 1.37, + "learning_rate": 2.7438634572783633e-05, + "loss": 0.1346, + "step": 30850 + }, + { + "epoch": 1.37, + "learning_rate": 2.743120084447154e-05, + "loss": 0.1295, + "step": 30860 + }, + { + "epoch": 1.37, + "learning_rate": 2.742376711615944e-05, + "loss": 0.2018, + "step": 30870 + }, + { + "epoch": 1.37, + "learning_rate": 2.7416333387847344e-05, + "loss": 0.099, + "step": 30880 + }, + { + "epoch": 1.37, + "learning_rate": 2.7408899659535243e-05, + "loss": 0.1465, + "step": 30890 + }, + { + "epoch": 1.37, + "learning_rate": 2.7401465931223146e-05, + "loss": 0.1391, + "step": 30900 + }, + { + "epoch": 1.37, + "learning_rate": 2.7394032202911052e-05, + "loss": 0.172, + "step": 30910 + }, + { + "epoch": 1.37, + "learning_rate": 2.738659847459895e-05, + "loss": 0.1546, + "step": 30920 + }, + { + "epoch": 1.37, + "learning_rate": 2.7379164746286857e-05, + "loss": 0.1057, + "step": 30930 + }, + { + "epoch": 1.37, + "learning_rate": 2.7371731017974756e-05, + "loss": 0.1202, + "step": 30940 + }, + { + "epoch": 1.37, + "learning_rate": 2.736429728966266e-05, + "loss": 0.1598, + "step": 30950 + }, + { + "epoch": 1.37, + "learning_rate": 2.7356863561350558e-05, + "loss": 0.1468, + "step": 30960 + }, + { + "epoch": 1.37, + "learning_rate": 2.7349429833038464e-05, + "loss": 0.2259, + "step": 30970 + }, + { + "epoch": 1.37, + "learning_rate": 2.7341996104726363e-05, + "loss": 0.1568, + "step": 30980 + }, + { + "epoch": 1.37, + "learning_rate": 2.733456237641427e-05, + "loss": 0.1786, + "step": 30990 + }, + { + "epoch": 1.37, + "learning_rate": 2.732712864810217e-05, + "loss": 0.1627, + "step": 31000 + }, + { + "epoch": 1.37, + "learning_rate": 2.7319694919790074e-05, + "loss": 0.1561, + "step": 31010 + }, + { + "epoch": 1.37, + "learning_rate": 2.7312261191477974e-05, + "loss": 0.1873, + "step": 31020 + }, + { + "epoch": 1.37, + "learning_rate": 2.7304827463165876e-05, + "loss": 0.2104, + "step": 31030 + }, + { + "epoch": 1.37, + "learning_rate": 2.7297393734853782e-05, + "loss": 0.107, + "step": 31040 + }, + { + "epoch": 1.37, + "learning_rate": 2.728996000654168e-05, + "loss": 0.096, + "step": 31050 + }, + { + "epoch": 1.38, + "learning_rate": 2.7282526278229587e-05, + "loss": 0.196, + "step": 31060 + }, + { + "epoch": 1.38, + "learning_rate": 2.7275092549917487e-05, + "loss": 0.126, + "step": 31070 + }, + { + "epoch": 1.38, + "learning_rate": 2.726765882160539e-05, + "loss": 0.0904, + "step": 31080 + }, + { + "epoch": 1.38, + "learning_rate": 2.7260225093293292e-05, + "loss": 0.1979, + "step": 31090 + }, + { + "epoch": 1.38, + "learning_rate": 2.7252791364981194e-05, + "loss": 0.2095, + "step": 31100 + }, + { + "epoch": 1.38, + "learning_rate": 2.7245357636669093e-05, + "loss": 0.1512, + "step": 31110 + }, + { + "epoch": 1.38, + "learning_rate": 2.7237923908357e-05, + "loss": 0.1583, + "step": 31120 + }, + { + "epoch": 1.38, + "learning_rate": 2.72304901800449e-05, + "loss": 0.1236, + "step": 31130 + }, + { + "epoch": 1.38, + "learning_rate": 2.7223056451732805e-05, + "loss": 0.1425, + "step": 31140 + }, + { + "epoch": 1.38, + "learning_rate": 2.7215622723420707e-05, + "loss": 0.1515, + "step": 31150 + }, + { + "epoch": 1.38, + "learning_rate": 2.7208188995108606e-05, + "loss": 0.1374, + "step": 31160 + }, + { + "epoch": 1.38, + "learning_rate": 2.7200755266796512e-05, + "loss": 0.1323, + "step": 31170 + }, + { + "epoch": 1.38, + "learning_rate": 2.719332153848441e-05, + "loss": 0.1654, + "step": 31180 + }, + { + "epoch": 1.38, + "learning_rate": 2.7185887810172317e-05, + "loss": 0.0881, + "step": 31190 + }, + { + "epoch": 1.38, + "learning_rate": 2.7178454081860217e-05, + "loss": 0.1594, + "step": 31200 + }, + { + "epoch": 1.38, + "learning_rate": 2.7171020353548123e-05, + "loss": 0.147, + "step": 31210 + }, + { + "epoch": 1.38, + "learning_rate": 2.7163586625236022e-05, + "loss": 0.132, + "step": 31220 + }, + { + "epoch": 1.38, + "learning_rate": 2.7156152896923924e-05, + "loss": 0.1484, + "step": 31230 + }, + { + "epoch": 1.38, + "learning_rate": 2.7148719168611824e-05, + "loss": 0.1748, + "step": 31240 + }, + { + "epoch": 1.38, + "learning_rate": 2.714128544029973e-05, + "loss": 0.1244, + "step": 31250 + }, + { + "epoch": 1.38, + "learning_rate": 2.713385171198763e-05, + "loss": 0.201, + "step": 31260 + }, + { + "epoch": 1.38, + "learning_rate": 2.7126417983675535e-05, + "loss": 0.1079, + "step": 31270 + }, + { + "epoch": 1.38, + "learning_rate": 2.7118984255363437e-05, + "loss": 0.1308, + "step": 31280 + }, + { + "epoch": 1.39, + "learning_rate": 2.7111550527051337e-05, + "loss": 0.2069, + "step": 31290 + }, + { + "epoch": 1.39, + "learning_rate": 2.7104116798739242e-05, + "loss": 0.1616, + "step": 31300 + }, + { + "epoch": 1.39, + "learning_rate": 2.709668307042714e-05, + "loss": 0.1753, + "step": 31310 + }, + { + "epoch": 1.39, + "learning_rate": 2.7089249342115048e-05, + "loss": 0.1854, + "step": 31320 + }, + { + "epoch": 1.39, + "learning_rate": 2.7081815613802947e-05, + "loss": 0.139, + "step": 31330 + }, + { + "epoch": 1.39, + "learning_rate": 2.7074381885490853e-05, + "loss": 0.1428, + "step": 31340 + }, + { + "epoch": 1.39, + "learning_rate": 2.7066948157178752e-05, + "loss": 0.1224, + "step": 31350 + }, + { + "epoch": 1.39, + "learning_rate": 2.7059514428866655e-05, + "loss": 0.1472, + "step": 31360 + }, + { + "epoch": 1.39, + "learning_rate": 2.7052080700554554e-05, + "loss": 0.1192, + "step": 31370 + }, + { + "epoch": 1.39, + "learning_rate": 2.704464697224246e-05, + "loss": 0.1153, + "step": 31380 + }, + { + "epoch": 1.39, + "learning_rate": 2.703721324393036e-05, + "loss": 0.135, + "step": 31390 + }, + { + "epoch": 1.39, + "learning_rate": 2.7029779515618265e-05, + "loss": 0.1333, + "step": 31400 + }, + { + "epoch": 1.39, + "learning_rate": 2.702234578730617e-05, + "loss": 0.1187, + "step": 31410 + }, + { + "epoch": 1.39, + "learning_rate": 2.701491205899407e-05, + "loss": 0.1685, + "step": 31420 + }, + { + "epoch": 1.39, + "learning_rate": 2.7007478330681973e-05, + "loss": 0.1364, + "step": 31430 + }, + { + "epoch": 1.39, + "learning_rate": 2.7000044602369872e-05, + "loss": 0.1539, + "step": 31440 + }, + { + "epoch": 1.39, + "learning_rate": 2.6992610874057778e-05, + "loss": 0.1257, + "step": 31450 + }, + { + "epoch": 1.39, + "learning_rate": 2.6985177145745677e-05, + "loss": 0.1427, + "step": 31460 + }, + { + "epoch": 1.39, + "learning_rate": 2.6977743417433583e-05, + "loss": 0.1284, + "step": 31470 + }, + { + "epoch": 1.39, + "learning_rate": 2.6970309689121482e-05, + "loss": 0.1433, + "step": 31480 + }, + { + "epoch": 1.39, + "learning_rate": 2.6962875960809385e-05, + "loss": 0.0759, + "step": 31490 + }, + { + "epoch": 1.39, + "learning_rate": 2.6955442232497284e-05, + "loss": 0.1825, + "step": 31500 + }, + { + "epoch": 1.4, + "learning_rate": 2.694800850418519e-05, + "loss": 0.1767, + "step": 31510 + }, + { + "epoch": 1.4, + "learning_rate": 2.6940574775873096e-05, + "loss": 0.0977, + "step": 31520 + }, + { + "epoch": 1.4, + "learning_rate": 2.6933141047560995e-05, + "loss": 0.1597, + "step": 31530 + }, + { + "epoch": 1.4, + "learning_rate": 2.69257073192489e-05, + "loss": 0.1223, + "step": 31540 + }, + { + "epoch": 1.4, + "learning_rate": 2.69182735909368e-05, + "loss": 0.132, + "step": 31550 + }, + { + "epoch": 1.4, + "learning_rate": 2.6910839862624703e-05, + "loss": 0.1957, + "step": 31560 + }, + { + "epoch": 1.4, + "learning_rate": 2.6903406134312602e-05, + "loss": 0.1571, + "step": 31570 + }, + { + "epoch": 1.4, + "learning_rate": 2.6895972406000508e-05, + "loss": 0.1138, + "step": 31580 + }, + { + "epoch": 1.4, + "learning_rate": 2.6888538677688407e-05, + "loss": 0.2126, + "step": 31590 + }, + { + "epoch": 1.4, + "learning_rate": 2.6881104949376313e-05, + "loss": 0.1669, + "step": 31600 + }, + { + "epoch": 1.4, + "learning_rate": 2.6873671221064212e-05, + "loss": 0.1456, + "step": 31610 + }, + { + "epoch": 1.4, + "learning_rate": 2.6866237492752115e-05, + "loss": 0.1687, + "step": 31620 + }, + { + "epoch": 1.4, + "learning_rate": 2.6858803764440017e-05, + "loss": 0.1388, + "step": 31630 + }, + { + "epoch": 1.4, + "learning_rate": 2.685137003612792e-05, + "loss": 0.1262, + "step": 31640 + }, + { + "epoch": 1.4, + "learning_rate": 2.6843936307815826e-05, + "loss": 0.1057, + "step": 31650 + }, + { + "epoch": 1.4, + "learning_rate": 2.6836502579503725e-05, + "loss": 0.1342, + "step": 31660 + }, + { + "epoch": 1.4, + "learning_rate": 2.682906885119163e-05, + "loss": 0.1888, + "step": 31670 + }, + { + "epoch": 1.4, + "learning_rate": 2.682163512287953e-05, + "loss": 0.1858, + "step": 31680 + }, + { + "epoch": 1.4, + "learning_rate": 2.6814201394567433e-05, + "loss": 0.1068, + "step": 31690 + }, + { + "epoch": 1.4, + "learning_rate": 2.6806767666255332e-05, + "loss": 0.1641, + "step": 31700 + }, + { + "epoch": 1.4, + "learning_rate": 2.6799333937943238e-05, + "loss": 0.1524, + "step": 31710 + }, + { + "epoch": 1.4, + "learning_rate": 2.6791900209631137e-05, + "loss": 0.1182, + "step": 31720 + }, + { + "epoch": 1.4, + "learning_rate": 2.6784466481319043e-05, + "loss": 0.0989, + "step": 31730 + }, + { + "epoch": 1.41, + "learning_rate": 2.6777032753006942e-05, + "loss": 0.1723, + "step": 31740 + }, + { + "epoch": 1.41, + "learning_rate": 2.676959902469485e-05, + "loss": 0.1587, + "step": 31750 + }, + { + "epoch": 1.41, + "learning_rate": 2.676216529638275e-05, + "loss": 0.1278, + "step": 31760 + }, + { + "epoch": 1.41, + "learning_rate": 2.675473156807065e-05, + "loss": 0.1498, + "step": 31770 + }, + { + "epoch": 1.41, + "learning_rate": 2.6747297839758556e-05, + "loss": 0.1702, + "step": 31780 + }, + { + "epoch": 1.41, + "learning_rate": 2.6739864111446455e-05, + "loss": 0.1265, + "step": 31790 + }, + { + "epoch": 1.41, + "learning_rate": 2.673243038313436e-05, + "loss": 0.185, + "step": 31800 + }, + { + "epoch": 1.41, + "learning_rate": 2.672499665482226e-05, + "loss": 0.172, + "step": 31810 + }, + { + "epoch": 1.41, + "learning_rate": 2.6717562926510163e-05, + "loss": 0.1897, + "step": 31820 + }, + { + "epoch": 1.41, + "learning_rate": 2.6710129198198062e-05, + "loss": 0.1689, + "step": 31830 + }, + { + "epoch": 1.41, + "learning_rate": 2.6702695469885968e-05, + "loss": 0.117, + "step": 31840 + }, + { + "epoch": 1.41, + "learning_rate": 2.6695261741573867e-05, + "loss": 0.1188, + "step": 31850 + }, + { + "epoch": 1.41, + "learning_rate": 2.6687828013261773e-05, + "loss": 0.1405, + "step": 31860 + }, + { + "epoch": 1.41, + "learning_rate": 2.6680394284949673e-05, + "loss": 0.1542, + "step": 31870 + }, + { + "epoch": 1.41, + "learning_rate": 2.667296055663758e-05, + "loss": 0.136, + "step": 31880 + }, + { + "epoch": 1.41, + "learning_rate": 2.666552682832548e-05, + "loss": 0.1252, + "step": 31890 + }, + { + "epoch": 1.41, + "learning_rate": 2.665809310001338e-05, + "loss": 0.1295, + "step": 31900 + }, + { + "epoch": 1.41, + "learning_rate": 2.6650659371701286e-05, + "loss": 0.2021, + "step": 31910 + }, + { + "epoch": 1.41, + "learning_rate": 2.6643225643389185e-05, + "loss": 0.1507, + "step": 31920 + }, + { + "epoch": 1.41, + "learning_rate": 2.663579191507709e-05, + "loss": 0.1296, + "step": 31930 + }, + { + "epoch": 1.41, + "learning_rate": 2.662835818676499e-05, + "loss": 0.1321, + "step": 31940 + }, + { + "epoch": 1.41, + "learning_rate": 2.6620924458452897e-05, + "loss": 0.1463, + "step": 31950 + }, + { + "epoch": 1.41, + "learning_rate": 2.6613490730140796e-05, + "loss": 0.1233, + "step": 31960 + }, + { + "epoch": 1.42, + "learning_rate": 2.66060570018287e-05, + "loss": 0.0903, + "step": 31970 + }, + { + "epoch": 1.42, + "learning_rate": 2.6598623273516598e-05, + "loss": 0.132, + "step": 31980 + }, + { + "epoch": 1.42, + "learning_rate": 2.6591189545204504e-05, + "loss": 0.1349, + "step": 31990 + }, + { + "epoch": 1.42, + "learning_rate": 2.6583755816892403e-05, + "loss": 0.1165, + "step": 32000 + }, + { + "epoch": 1.42, + "learning_rate": 2.657632208858031e-05, + "loss": 0.1436, + "step": 32010 + }, + { + "epoch": 1.42, + "learning_rate": 2.656888836026821e-05, + "loss": 0.1835, + "step": 32020 + }, + { + "epoch": 1.42, + "learning_rate": 2.656145463195611e-05, + "loss": 0.1228, + "step": 32030 + }, + { + "epoch": 1.42, + "learning_rate": 2.6554020903644016e-05, + "loss": 0.127, + "step": 32040 + }, + { + "epoch": 1.42, + "learning_rate": 2.6546587175331916e-05, + "loss": 0.1121, + "step": 32050 + }, + { + "epoch": 1.42, + "learning_rate": 2.653915344701982e-05, + "loss": 0.1612, + "step": 32060 + }, + { + "epoch": 1.42, + "learning_rate": 2.653171971870772e-05, + "loss": 0.1406, + "step": 32070 + }, + { + "epoch": 1.42, + "learning_rate": 2.6524285990395627e-05, + "loss": 0.0911, + "step": 32080 + }, + { + "epoch": 1.42, + "learning_rate": 2.6516852262083526e-05, + "loss": 0.1907, + "step": 32090 + }, + { + "epoch": 1.42, + "learning_rate": 2.650941853377143e-05, + "loss": 0.1779, + "step": 32100 + }, + { + "epoch": 1.42, + "learning_rate": 2.6501984805459328e-05, + "loss": 0.1272, + "step": 32110 + }, + { + "epoch": 1.42, + "learning_rate": 2.6494551077147234e-05, + "loss": 0.1619, + "step": 32120 + }, + { + "epoch": 1.42, + "learning_rate": 2.648711734883514e-05, + "loss": 0.1197, + "step": 32130 + }, + { + "epoch": 1.42, + "learning_rate": 2.647968362052304e-05, + "loss": 0.1762, + "step": 32140 + }, + { + "epoch": 1.42, + "learning_rate": 2.647224989221094e-05, + "loss": 0.112, + "step": 32150 + }, + { + "epoch": 1.42, + "learning_rate": 2.6464816163898844e-05, + "loss": 0.1069, + "step": 32160 + }, + { + "epoch": 1.42, + "learning_rate": 2.6457382435586747e-05, + "loss": 0.1581, + "step": 32170 + }, + { + "epoch": 1.42, + "learning_rate": 2.6449948707274646e-05, + "loss": 0.1345, + "step": 32180 + }, + { + "epoch": 1.43, + "learning_rate": 2.6442514978962552e-05, + "loss": 0.1486, + "step": 32190 + }, + { + "epoch": 1.43, + "learning_rate": 2.643508125065045e-05, + "loss": 0.1294, + "step": 32200 + }, + { + "epoch": 1.43, + "learning_rate": 2.6427647522338357e-05, + "loss": 0.1449, + "step": 32210 + }, + { + "epoch": 1.43, + "learning_rate": 2.6420213794026256e-05, + "loss": 0.1295, + "step": 32220 + }, + { + "epoch": 1.43, + "learning_rate": 2.641278006571416e-05, + "loss": 0.1516, + "step": 32230 + }, + { + "epoch": 1.43, + "learning_rate": 2.6405346337402058e-05, + "loss": 0.1954, + "step": 32240 + }, + { + "epoch": 1.43, + "learning_rate": 2.6397912609089964e-05, + "loss": 0.1859, + "step": 32250 + }, + { + "epoch": 1.43, + "learning_rate": 2.639047888077787e-05, + "loss": 0.0849, + "step": 32260 + }, + { + "epoch": 1.43, + "learning_rate": 2.638304515246577e-05, + "loss": 0.1443, + "step": 32270 + }, + { + "epoch": 1.43, + "learning_rate": 2.6375611424153675e-05, + "loss": 0.1381, + "step": 32280 + }, + { + "epoch": 1.43, + "learning_rate": 2.6368177695841574e-05, + "loss": 0.0827, + "step": 32290 + }, + { + "epoch": 1.43, + "learning_rate": 2.6360743967529477e-05, + "loss": 0.1385, + "step": 32300 + }, + { + "epoch": 1.43, + "learning_rate": 2.6353310239217376e-05, + "loss": 0.1884, + "step": 32310 + }, + { + "epoch": 1.43, + "learning_rate": 2.6345876510905282e-05, + "loss": 0.2028, + "step": 32320 + }, + { + "epoch": 1.43, + "learning_rate": 2.633844278259318e-05, + "loss": 0.1123, + "step": 32330 + }, + { + "epoch": 1.43, + "learning_rate": 2.6331009054281087e-05, + "loss": 0.1612, + "step": 32340 + }, + { + "epoch": 1.43, + "learning_rate": 2.6323575325968986e-05, + "loss": 0.1554, + "step": 32350 + }, + { + "epoch": 1.43, + "learning_rate": 2.631614159765689e-05, + "loss": 0.1829, + "step": 32360 + }, + { + "epoch": 1.43, + "learning_rate": 2.630870786934479e-05, + "loss": 0.1388, + "step": 32370 + }, + { + "epoch": 1.43, + "learning_rate": 2.6301274141032694e-05, + "loss": 0.1558, + "step": 32380 + }, + { + "epoch": 1.43, + "learning_rate": 2.62938404127206e-05, + "loss": 0.1305, + "step": 32390 + }, + { + "epoch": 1.43, + "learning_rate": 2.62864066844085e-05, + "loss": 0.1503, + "step": 32400 + }, + { + "epoch": 1.43, + "learning_rate": 2.6278972956096405e-05, + "loss": 0.1699, + "step": 32410 + }, + { + "epoch": 1.44, + "learning_rate": 2.6271539227784304e-05, + "loss": 0.1144, + "step": 32420 + }, + { + "epoch": 1.44, + "learning_rate": 2.6264105499472207e-05, + "loss": 0.172, + "step": 32430 + }, + { + "epoch": 1.44, + "learning_rate": 2.6256671771160106e-05, + "loss": 0.1231, + "step": 32440 + }, + { + "epoch": 1.44, + "learning_rate": 2.6249238042848012e-05, + "loss": 0.1551, + "step": 32450 + }, + { + "epoch": 1.44, + "learning_rate": 2.624180431453591e-05, + "loss": 0.2342, + "step": 32460 + }, + { + "epoch": 1.44, + "learning_rate": 2.6234370586223817e-05, + "loss": 0.1555, + "step": 32470 + }, + { + "epoch": 1.44, + "learning_rate": 2.6226936857911716e-05, + "loss": 0.1759, + "step": 32480 + }, + { + "epoch": 1.44, + "learning_rate": 2.6219503129599622e-05, + "loss": 0.1398, + "step": 32490 + }, + { + "epoch": 1.44, + "learning_rate": 2.6212069401287525e-05, + "loss": 0.1524, + "step": 32500 + }, + { + "epoch": 1.44, + "learning_rate": 2.6204635672975424e-05, + "loss": 0.154, + "step": 32510 + }, + { + "epoch": 1.44, + "learning_rate": 2.619720194466333e-05, + "loss": 0.1239, + "step": 32520 + }, + { + "epoch": 1.44, + "learning_rate": 2.618976821635123e-05, + "loss": 0.1664, + "step": 32530 + }, + { + "epoch": 1.44, + "learning_rate": 2.6182334488039135e-05, + "loss": 0.1718, + "step": 32540 + }, + { + "epoch": 1.44, + "learning_rate": 2.6174900759727034e-05, + "loss": 0.1651, + "step": 32550 + }, + { + "epoch": 1.44, + "learning_rate": 2.6167467031414937e-05, + "loss": 0.1001, + "step": 32560 + }, + { + "epoch": 1.44, + "learning_rate": 2.6160033303102836e-05, + "loss": 0.1402, + "step": 32570 + }, + { + "epoch": 1.44, + "learning_rate": 2.6152599574790742e-05, + "loss": 0.1386, + "step": 32580 + }, + { + "epoch": 1.44, + "learning_rate": 2.614516584647864e-05, + "loss": 0.127, + "step": 32590 + }, + { + "epoch": 1.44, + "learning_rate": 2.6137732118166547e-05, + "loss": 0.1557, + "step": 32600 + }, + { + "epoch": 1.44, + "learning_rate": 2.6130298389854447e-05, + "loss": 0.1206, + "step": 32610 + }, + { + "epoch": 1.44, + "learning_rate": 2.6122864661542352e-05, + "loss": 0.141, + "step": 32620 + }, + { + "epoch": 1.44, + "learning_rate": 2.6115430933230255e-05, + "loss": 0.1605, + "step": 32630 + }, + { + "epoch": 1.45, + "learning_rate": 2.6107997204918154e-05, + "loss": 0.1604, + "step": 32640 + }, + { + "epoch": 1.45, + "learning_rate": 2.610056347660606e-05, + "loss": 0.1313, + "step": 32650 + }, + { + "epoch": 1.45, + "learning_rate": 2.609312974829396e-05, + "loss": 0.1143, + "step": 32660 + }, + { + "epoch": 1.45, + "learning_rate": 2.6085696019981865e-05, + "loss": 0.1441, + "step": 32670 + }, + { + "epoch": 1.45, + "learning_rate": 2.6078262291669765e-05, + "loss": 0.1699, + "step": 32680 + }, + { + "epoch": 1.45, + "learning_rate": 2.6070828563357667e-05, + "loss": 0.2155, + "step": 32690 + }, + { + "epoch": 1.45, + "learning_rate": 2.606339483504557e-05, + "loss": 0.1657, + "step": 32700 + }, + { + "epoch": 1.45, + "learning_rate": 2.6055961106733472e-05, + "loss": 0.1874, + "step": 32710 + }, + { + "epoch": 1.45, + "learning_rate": 2.604852737842137e-05, + "loss": 0.1986, + "step": 32720 + }, + { + "epoch": 1.45, + "learning_rate": 2.6041093650109277e-05, + "loss": 0.1623, + "step": 32730 + }, + { + "epoch": 1.45, + "learning_rate": 2.6033659921797183e-05, + "loss": 0.105, + "step": 32740 + }, + { + "epoch": 1.45, + "learning_rate": 2.6026226193485083e-05, + "loss": 0.1584, + "step": 32750 + }, + { + "epoch": 1.45, + "learning_rate": 2.6018792465172985e-05, + "loss": 0.1187, + "step": 32760 + }, + { + "epoch": 1.45, + "learning_rate": 2.6011358736860884e-05, + "loss": 0.1541, + "step": 32770 + }, + { + "epoch": 1.45, + "learning_rate": 2.600392500854879e-05, + "loss": 0.1306, + "step": 32780 + }, + { + "epoch": 1.45, + "learning_rate": 2.599649128023669e-05, + "loss": 0.169, + "step": 32790 + }, + { + "epoch": 1.45, + "learning_rate": 2.5989057551924596e-05, + "loss": 0.1878, + "step": 32800 + }, + { + "epoch": 1.45, + "learning_rate": 2.5981623823612495e-05, + "loss": 0.1562, + "step": 32810 + }, + { + "epoch": 1.45, + "learning_rate": 2.59741900953004e-05, + "loss": 0.1106, + "step": 32820 + }, + { + "epoch": 1.45, + "learning_rate": 2.59667563669883e-05, + "loss": 0.1595, + "step": 32830 + }, + { + "epoch": 1.45, + "learning_rate": 2.5959322638676202e-05, + "loss": 0.1025, + "step": 32840 + }, + { + "epoch": 1.45, + "learning_rate": 2.59518889103641e-05, + "loss": 0.1374, + "step": 32850 + }, + { + "epoch": 1.45, + "learning_rate": 2.5944455182052008e-05, + "loss": 0.2167, + "step": 32860 + }, + { + "epoch": 1.46, + "learning_rate": 2.5937021453739914e-05, + "loss": 0.1805, + "step": 32870 + }, + { + "epoch": 1.46, + "learning_rate": 2.5929587725427813e-05, + "loss": 0.1531, + "step": 32880 + }, + { + "epoch": 1.46, + "learning_rate": 2.5922153997115715e-05, + "loss": 0.1592, + "step": 32890 + }, + { + "epoch": 1.46, + "learning_rate": 2.5914720268803615e-05, + "loss": 0.1272, + "step": 32900 + }, + { + "epoch": 1.46, + "learning_rate": 2.590728654049152e-05, + "loss": 0.1242, + "step": 32910 + }, + { + "epoch": 1.46, + "learning_rate": 2.589985281217942e-05, + "loss": 0.1646, + "step": 32920 + }, + { + "epoch": 1.46, + "learning_rate": 2.5892419083867326e-05, + "loss": 0.1592, + "step": 32930 + }, + { + "epoch": 1.46, + "learning_rate": 2.5884985355555225e-05, + "loss": 0.1403, + "step": 32940 + }, + { + "epoch": 1.46, + "learning_rate": 2.587755162724313e-05, + "loss": 0.1315, + "step": 32950 + }, + { + "epoch": 1.46, + "learning_rate": 2.587011789893103e-05, + "loss": 0.1657, + "step": 32960 + }, + { + "epoch": 1.46, + "learning_rate": 2.5862684170618933e-05, + "loss": 0.1648, + "step": 32970 + }, + { + "epoch": 1.46, + "learning_rate": 2.5855250442306832e-05, + "loss": 0.1137, + "step": 32980 + }, + { + "epoch": 1.46, + "learning_rate": 2.5847816713994738e-05, + "loss": 0.174, + "step": 32990 + }, + { + "epoch": 1.46, + "learning_rate": 2.5840382985682644e-05, + "loss": 0.146, + "step": 33000 + }, + { + "epoch": 1.46, + "learning_rate": 2.5832949257370543e-05, + "loss": 0.1521, + "step": 33010 + }, + { + "epoch": 1.46, + "learning_rate": 2.5825515529058445e-05, + "loss": 0.1428, + "step": 33020 + }, + { + "epoch": 1.46, + "learning_rate": 2.5818081800746348e-05, + "loss": 0.1596, + "step": 33030 + }, + { + "epoch": 1.46, + "learning_rate": 2.581064807243425e-05, + "loss": 0.1194, + "step": 33040 + }, + { + "epoch": 1.46, + "learning_rate": 2.580321434412215e-05, + "loss": 0.1523, + "step": 33050 + }, + { + "epoch": 1.46, + "learning_rate": 2.5795780615810056e-05, + "loss": 0.1264, + "step": 33060 + }, + { + "epoch": 1.46, + "learning_rate": 2.5788346887497955e-05, + "loss": 0.1659, + "step": 33070 + }, + { + "epoch": 1.46, + "learning_rate": 2.578091315918586e-05, + "loss": 0.1802, + "step": 33080 + }, + { + "epoch": 1.47, + "learning_rate": 2.577347943087376e-05, + "loss": 0.141, + "step": 33090 + }, + { + "epoch": 1.47, + "learning_rate": 2.5766045702561663e-05, + "loss": 0.1215, + "step": 33100 + }, + { + "epoch": 1.47, + "learning_rate": 2.575861197424957e-05, + "loss": 0.1057, + "step": 33110 + }, + { + "epoch": 1.47, + "learning_rate": 2.5751178245937468e-05, + "loss": 0.1239, + "step": 33120 + }, + { + "epoch": 1.47, + "learning_rate": 2.5743744517625374e-05, + "loss": 0.1957, + "step": 33130 + }, + { + "epoch": 1.47, + "learning_rate": 2.5736310789313273e-05, + "loss": 0.1355, + "step": 33140 + }, + { + "epoch": 1.47, + "learning_rate": 2.572887706100118e-05, + "loss": 0.1544, + "step": 33150 + }, + { + "epoch": 1.47, + "learning_rate": 2.5721443332689078e-05, + "loss": 0.1895, + "step": 33160 + }, + { + "epoch": 1.47, + "learning_rate": 2.571400960437698e-05, + "loss": 0.1534, + "step": 33170 + }, + { + "epoch": 1.47, + "learning_rate": 2.570657587606488e-05, + "loss": 0.1279, + "step": 33180 + }, + { + "epoch": 1.47, + "learning_rate": 2.5699142147752786e-05, + "loss": 0.1393, + "step": 33190 + }, + { + "epoch": 1.47, + "learning_rate": 2.5691708419440685e-05, + "loss": 0.1386, + "step": 33200 + }, + { + "epoch": 1.47, + "learning_rate": 2.568427469112859e-05, + "loss": 0.1379, + "step": 33210 + }, + { + "epoch": 1.47, + "learning_rate": 2.567684096281649e-05, + "loss": 0.1617, + "step": 33220 + }, + { + "epoch": 1.47, + "learning_rate": 2.5669407234504393e-05, + "loss": 0.1443, + "step": 33230 + }, + { + "epoch": 1.47, + "learning_rate": 2.56619735061923e-05, + "loss": 0.1519, + "step": 33240 + }, + { + "epoch": 1.47, + "learning_rate": 2.5654539777880198e-05, + "loss": 0.1781, + "step": 33250 + }, + { + "epoch": 1.47, + "learning_rate": 2.5647106049568104e-05, + "loss": 0.0905, + "step": 33260 + }, + { + "epoch": 1.47, + "learning_rate": 2.5639672321256003e-05, + "loss": 0.1439, + "step": 33270 + }, + { + "epoch": 1.47, + "learning_rate": 2.563223859294391e-05, + "loss": 0.1094, + "step": 33280 + }, + { + "epoch": 1.47, + "learning_rate": 2.562480486463181e-05, + "loss": 0.1183, + "step": 33290 + }, + { + "epoch": 1.47, + "learning_rate": 2.561737113631971e-05, + "loss": 0.1954, + "step": 33300 + }, + { + "epoch": 1.47, + "learning_rate": 2.560993740800761e-05, + "loss": 0.1215, + "step": 33310 + }, + { + "epoch": 1.48, + "learning_rate": 2.5602503679695516e-05, + "loss": 0.1371, + "step": 33320 + }, + { + "epoch": 1.48, + "learning_rate": 2.5595069951383415e-05, + "loss": 0.1382, + "step": 33330 + }, + { + "epoch": 1.48, + "learning_rate": 2.558763622307132e-05, + "loss": 0.1323, + "step": 33340 + }, + { + "epoch": 1.48, + "learning_rate": 2.558020249475922e-05, + "loss": 0.237, + "step": 33350 + }, + { + "epoch": 1.48, + "learning_rate": 2.5572768766447126e-05, + "loss": 0.1109, + "step": 33360 + }, + { + "epoch": 1.48, + "learning_rate": 2.556533503813503e-05, + "loss": 0.1066, + "step": 33370 + }, + { + "epoch": 1.48, + "learning_rate": 2.5557901309822928e-05, + "loss": 0.1784, + "step": 33380 + }, + { + "epoch": 1.48, + "learning_rate": 2.5550467581510834e-05, + "loss": 0.1199, + "step": 33390 + }, + { + "epoch": 1.48, + "learning_rate": 2.5543033853198733e-05, + "loss": 0.1605, + "step": 33400 + }, + { + "epoch": 1.48, + "learning_rate": 2.553560012488664e-05, + "loss": 0.2012, + "step": 33410 + }, + { + "epoch": 1.48, + "learning_rate": 2.552816639657454e-05, + "loss": 0.1401, + "step": 33420 + }, + { + "epoch": 1.48, + "learning_rate": 2.552073266826244e-05, + "loss": 0.1234, + "step": 33430 + }, + { + "epoch": 1.48, + "learning_rate": 2.551329893995034e-05, + "loss": 0.1435, + "step": 33440 + }, + { + "epoch": 1.48, + "learning_rate": 2.5505865211638246e-05, + "loss": 0.1162, + "step": 33450 + }, + { + "epoch": 1.48, + "learning_rate": 2.5498431483326145e-05, + "loss": 0.1675, + "step": 33460 + }, + { + "epoch": 1.48, + "learning_rate": 2.549099775501405e-05, + "loss": 0.2093, + "step": 33470 + }, + { + "epoch": 1.48, + "learning_rate": 2.5483564026701957e-05, + "loss": 0.1224, + "step": 33480 + }, + { + "epoch": 1.48, + "learning_rate": 2.5476130298389857e-05, + "loss": 0.0908, + "step": 33490 + }, + { + "epoch": 1.48, + "learning_rate": 2.546869657007776e-05, + "loss": 0.1185, + "step": 33500 + }, + { + "epoch": 1.48, + "learning_rate": 2.546126284176566e-05, + "loss": 0.0742, + "step": 33510 + }, + { + "epoch": 1.48, + "learning_rate": 2.5453829113453564e-05, + "loss": 0.2029, + "step": 33520 + }, + { + "epoch": 1.48, + "learning_rate": 2.5446395385141463e-05, + "loss": 0.1588, + "step": 33530 + }, + { + "epoch": 1.48, + "learning_rate": 2.543896165682937e-05, + "loss": 0.174, + "step": 33540 + }, + { + "epoch": 1.49, + "learning_rate": 2.543152792851727e-05, + "loss": 0.1087, + "step": 33550 + }, + { + "epoch": 1.49, + "learning_rate": 2.5424094200205175e-05, + "loss": 0.1234, + "step": 33560 + }, + { + "epoch": 1.49, + "learning_rate": 2.5416660471893074e-05, + "loss": 0.1371, + "step": 33570 + }, + { + "epoch": 1.49, + "learning_rate": 2.5409226743580976e-05, + "loss": 0.1257, + "step": 33580 + }, + { + "epoch": 1.49, + "learning_rate": 2.5401793015268876e-05, + "loss": 0.1198, + "step": 33590 + }, + { + "epoch": 1.49, + "learning_rate": 2.539435928695678e-05, + "loss": 0.1226, + "step": 33600 + }, + { + "epoch": 1.49, + "learning_rate": 2.5386925558644688e-05, + "loss": 0.134, + "step": 33610 + }, + { + "epoch": 1.49, + "learning_rate": 2.5379491830332587e-05, + "loss": 0.1621, + "step": 33620 + }, + { + "epoch": 1.49, + "learning_rate": 2.537205810202049e-05, + "loss": 0.208, + "step": 33630 + }, + { + "epoch": 1.49, + "learning_rate": 2.536462437370839e-05, + "loss": 0.1763, + "step": 33640 + }, + { + "epoch": 1.49, + "learning_rate": 2.5357190645396294e-05, + "loss": 0.1613, + "step": 33650 + }, + { + "epoch": 1.49, + "learning_rate": 2.5349756917084194e-05, + "loss": 0.1225, + "step": 33660 + }, + { + "epoch": 1.49, + "learning_rate": 2.53423231887721e-05, + "loss": 0.1412, + "step": 33670 + }, + { + "epoch": 1.49, + "learning_rate": 2.533488946046e-05, + "loss": 0.1198, + "step": 33680 + }, + { + "epoch": 1.49, + "learning_rate": 2.5327455732147905e-05, + "loss": 0.164, + "step": 33690 + }, + { + "epoch": 1.49, + "learning_rate": 2.5320022003835804e-05, + "loss": 0.1273, + "step": 33700 + }, + { + "epoch": 1.49, + "learning_rate": 2.5312588275523707e-05, + "loss": 0.1598, + "step": 33710 + }, + { + "epoch": 1.49, + "learning_rate": 2.5305154547211613e-05, + "loss": 0.1845, + "step": 33720 + }, + { + "epoch": 1.49, + "learning_rate": 2.5297720818899512e-05, + "loss": 0.1515, + "step": 33730 + }, + { + "epoch": 1.49, + "learning_rate": 2.5290287090587418e-05, + "loss": 0.1737, + "step": 33740 + }, + { + "epoch": 1.49, + "learning_rate": 2.5282853362275317e-05, + "loss": 0.1178, + "step": 33750 + }, + { + "epoch": 1.49, + "learning_rate": 2.527541963396322e-05, + "loss": 0.1331, + "step": 33760 + }, + { + "epoch": 1.5, + "learning_rate": 2.5267985905651122e-05, + "loss": 0.162, + "step": 33770 + }, + { + "epoch": 1.5, + "learning_rate": 2.5260552177339025e-05, + "loss": 0.1675, + "step": 33780 + }, + { + "epoch": 1.5, + "learning_rate": 2.5253118449026924e-05, + "loss": 0.1023, + "step": 33790 + }, + { + "epoch": 1.5, + "learning_rate": 2.524568472071483e-05, + "loss": 0.1536, + "step": 33800 + }, + { + "epoch": 1.5, + "learning_rate": 2.523825099240273e-05, + "loss": 0.1964, + "step": 33810 + }, + { + "epoch": 1.5, + "learning_rate": 2.5230817264090635e-05, + "loss": 0.1589, + "step": 33820 + }, + { + "epoch": 1.5, + "learning_rate": 2.5223383535778534e-05, + "loss": 0.1533, + "step": 33830 + }, + { + "epoch": 1.5, + "learning_rate": 2.5215949807466437e-05, + "loss": 0.1315, + "step": 33840 + }, + { + "epoch": 1.5, + "learning_rate": 2.5208516079154343e-05, + "loss": 0.1183, + "step": 33850 + }, + { + "epoch": 1.5, + "learning_rate": 2.5201082350842242e-05, + "loss": 0.1324, + "step": 33860 + }, + { + "epoch": 1.5, + "learning_rate": 2.5193648622530148e-05, + "loss": 0.1912, + "step": 33870 + }, + { + "epoch": 1.5, + "learning_rate": 2.5186214894218047e-05, + "loss": 0.1056, + "step": 33880 + }, + { + "epoch": 1.5, + "learning_rate": 2.5178781165905953e-05, + "loss": 0.1134, + "step": 33890 + }, + { + "epoch": 1.5, + "learning_rate": 2.5171347437593852e-05, + "loss": 0.1123, + "step": 33900 + }, + { + "epoch": 1.5, + "learning_rate": 2.5163913709281755e-05, + "loss": 0.1326, + "step": 33910 + }, + { + "epoch": 1.5, + "learning_rate": 2.5156479980969654e-05, + "loss": 0.137, + "step": 33920 + }, + { + "epoch": 1.5, + "learning_rate": 2.514904625265756e-05, + "loss": 0.1127, + "step": 33930 + }, + { + "epoch": 1.5, + "learning_rate": 2.514161252434546e-05, + "loss": 0.146, + "step": 33940 + }, + { + "epoch": 1.5, + "learning_rate": 2.5134178796033365e-05, + "loss": 0.1838, + "step": 33950 + }, + { + "epoch": 1.5, + "learning_rate": 2.5126745067721264e-05, + "loss": 0.1118, + "step": 33960 + }, + { + "epoch": 1.5, + "learning_rate": 2.5119311339409167e-05, + "loss": 0.1168, + "step": 33970 + }, + { + "epoch": 1.5, + "learning_rate": 2.5111877611097073e-05, + "loss": 0.1401, + "step": 33980 + }, + { + "epoch": 1.5, + "learning_rate": 2.5104443882784972e-05, + "loss": 0.1482, + "step": 33990 + }, + { + "epoch": 1.51, + "learning_rate": 2.5097010154472878e-05, + "loss": 0.1387, + "step": 34000 + }, + { + "epoch": 1.51, + "learning_rate": 2.5089576426160777e-05, + "loss": 0.145, + "step": 34010 + }, + { + "epoch": 1.51, + "learning_rate": 2.5082142697848683e-05, + "loss": 0.189, + "step": 34020 + }, + { + "epoch": 1.51, + "learning_rate": 2.5074708969536582e-05, + "loss": 0.1273, + "step": 34030 + }, + { + "epoch": 1.51, + "learning_rate": 2.5067275241224485e-05, + "loss": 0.1844, + "step": 34040 + }, + { + "epoch": 1.51, + "learning_rate": 2.5059841512912384e-05, + "loss": 0.1611, + "step": 34050 + }, + { + "epoch": 1.51, + "learning_rate": 2.505240778460029e-05, + "loss": 0.1441, + "step": 34060 + }, + { + "epoch": 1.51, + "learning_rate": 2.504497405628819e-05, + "loss": 0.1155, + "step": 34070 + }, + { + "epoch": 1.51, + "learning_rate": 2.5037540327976095e-05, + "loss": 0.1457, + "step": 34080 + }, + { + "epoch": 1.51, + "learning_rate": 2.5030106599663998e-05, + "loss": 0.1458, + "step": 34090 + }, + { + "epoch": 1.51, + "learning_rate": 2.50226728713519e-05, + "loss": 0.1807, + "step": 34100 + }, + { + "epoch": 1.51, + "learning_rate": 2.5015239143039803e-05, + "loss": 0.1413, + "step": 34110 + }, + { + "epoch": 1.51, + "learning_rate": 2.5007805414727702e-05, + "loss": 0.235, + "step": 34120 + }, + { + "epoch": 1.51, + "learning_rate": 2.5000371686415608e-05, + "loss": 0.1155, + "step": 34130 + }, + { + "epoch": 1.51, + "learning_rate": 2.4992937958103507e-05, + "loss": 0.1807, + "step": 34140 + }, + { + "epoch": 1.51, + "learning_rate": 2.4985504229791413e-05, + "loss": 0.1427, + "step": 34150 + }, + { + "epoch": 1.51, + "learning_rate": 2.4978070501479316e-05, + "loss": 0.1101, + "step": 34160 + }, + { + "epoch": 1.51, + "learning_rate": 2.4970636773167215e-05, + "loss": 0.1731, + "step": 34170 + }, + { + "epoch": 1.51, + "learning_rate": 2.4963203044855118e-05, + "loss": 0.1538, + "step": 34180 + }, + { + "epoch": 1.51, + "learning_rate": 2.495576931654302e-05, + "loss": 0.1202, + "step": 34190 + }, + { + "epoch": 1.51, + "learning_rate": 2.4948335588230923e-05, + "loss": 0.1589, + "step": 34200 + }, + { + "epoch": 1.51, + "learning_rate": 2.4940901859918825e-05, + "loss": 0.1306, + "step": 34210 + }, + { + "epoch": 1.52, + "learning_rate": 2.4933468131606728e-05, + "loss": 0.109, + "step": 34220 + }, + { + "epoch": 1.52, + "learning_rate": 2.492603440329463e-05, + "loss": 0.1969, + "step": 34230 + }, + { + "epoch": 1.52, + "learning_rate": 2.491860067498253e-05, + "loss": 0.1477, + "step": 34240 + }, + { + "epoch": 1.52, + "learning_rate": 2.4911166946670432e-05, + "loss": 0.1533, + "step": 34250 + }, + { + "epoch": 1.52, + "learning_rate": 2.4903733218358335e-05, + "loss": 0.1663, + "step": 34260 + }, + { + "epoch": 1.52, + "learning_rate": 2.4896299490046237e-05, + "loss": 0.119, + "step": 34270 + }, + { + "epoch": 1.52, + "learning_rate": 2.4888865761734143e-05, + "loss": 0.156, + "step": 34280 + }, + { + "epoch": 1.52, + "learning_rate": 2.4881432033422046e-05, + "loss": 0.0833, + "step": 34290 + }, + { + "epoch": 1.52, + "learning_rate": 2.4873998305109945e-05, + "loss": 0.1217, + "step": 34300 + }, + { + "epoch": 1.52, + "learning_rate": 2.4866564576797848e-05, + "loss": 0.1891, + "step": 34310 + }, + { + "epoch": 1.52, + "learning_rate": 2.485913084848575e-05, + "loss": 0.1454, + "step": 34320 + }, + { + "epoch": 1.52, + "learning_rate": 2.4851697120173653e-05, + "loss": 0.1167, + "step": 34330 + }, + { + "epoch": 1.52, + "learning_rate": 2.4844263391861555e-05, + "loss": 0.1035, + "step": 34340 + }, + { + "epoch": 1.52, + "learning_rate": 2.4836829663549458e-05, + "loss": 0.0818, + "step": 34350 + }, + { + "epoch": 1.52, + "learning_rate": 2.482939593523736e-05, + "loss": 0.1934, + "step": 34360 + }, + { + "epoch": 1.52, + "learning_rate": 2.4821962206925263e-05, + "loss": 0.1547, + "step": 34370 + }, + { + "epoch": 1.52, + "learning_rate": 2.4814528478613162e-05, + "loss": 0.11, + "step": 34380 + }, + { + "epoch": 1.52, + "learning_rate": 2.4807094750301065e-05, + "loss": 0.1496, + "step": 34390 + }, + { + "epoch": 1.52, + "learning_rate": 2.479966102198897e-05, + "loss": 0.126, + "step": 34400 + }, + { + "epoch": 1.52, + "learning_rate": 2.4792227293676874e-05, + "loss": 0.1753, + "step": 34410 + }, + { + "epoch": 1.52, + "learning_rate": 2.4784793565364776e-05, + "loss": 0.1081, + "step": 34420 + }, + { + "epoch": 1.52, + "learning_rate": 2.477735983705268e-05, + "loss": 0.1803, + "step": 34430 + }, + { + "epoch": 1.52, + "learning_rate": 2.4769926108740578e-05, + "loss": 0.1352, + "step": 34440 + }, + { + "epoch": 1.53, + "learning_rate": 2.476249238042848e-05, + "loss": 0.2, + "step": 34450 + }, + { + "epoch": 1.53, + "learning_rate": 2.4755058652116383e-05, + "loss": 0.1474, + "step": 34460 + }, + { + "epoch": 1.53, + "learning_rate": 2.4747624923804286e-05, + "loss": 0.1701, + "step": 34470 + }, + { + "epoch": 1.53, + "learning_rate": 2.4740191195492188e-05, + "loss": 0.1923, + "step": 34480 + }, + { + "epoch": 1.53, + "learning_rate": 2.473275746718009e-05, + "loss": 0.188, + "step": 34490 + }, + { + "epoch": 1.53, + "learning_rate": 2.4725323738867993e-05, + "loss": 0.162, + "step": 34500 + }, + { + "epoch": 1.53, + "learning_rate": 2.4717890010555893e-05, + "loss": 0.1622, + "step": 34510 + }, + { + "epoch": 1.53, + "learning_rate": 2.47104562822438e-05, + "loss": 0.1184, + "step": 34520 + }, + { + "epoch": 1.53, + "learning_rate": 2.47030225539317e-05, + "loss": 0.1704, + "step": 34530 + }, + { + "epoch": 1.53, + "learning_rate": 2.4695588825619604e-05, + "loss": 0.1508, + "step": 34540 + }, + { + "epoch": 1.53, + "learning_rate": 2.4688155097307506e-05, + "loss": 0.1689, + "step": 34550 + }, + { + "epoch": 1.53, + "learning_rate": 2.468072136899541e-05, + "loss": 0.1884, + "step": 34560 + }, + { + "epoch": 1.53, + "learning_rate": 2.4673287640683308e-05, + "loss": 0.151, + "step": 34570 + }, + { + "epoch": 1.53, + "learning_rate": 2.466585391237121e-05, + "loss": 0.1427, + "step": 34580 + }, + { + "epoch": 1.53, + "learning_rate": 2.4658420184059113e-05, + "loss": 0.1338, + "step": 34590 + }, + { + "epoch": 1.53, + "learning_rate": 2.4650986455747016e-05, + "loss": 0.1285, + "step": 34600 + }, + { + "epoch": 1.53, + "learning_rate": 2.464355272743492e-05, + "loss": 0.1826, + "step": 34610 + }, + { + "epoch": 1.53, + "learning_rate": 2.463611899912282e-05, + "loss": 0.1911, + "step": 34620 + }, + { + "epoch": 1.53, + "learning_rate": 2.4628685270810724e-05, + "loss": 0.1637, + "step": 34630 + }, + { + "epoch": 1.53, + "learning_rate": 2.4621251542498626e-05, + "loss": 0.1302, + "step": 34640 + }, + { + "epoch": 1.53, + "learning_rate": 2.461381781418653e-05, + "loss": 0.1598, + "step": 34650 + }, + { + "epoch": 1.53, + "learning_rate": 2.460638408587443e-05, + "loss": 0.1755, + "step": 34660 + }, + { + "epoch": 1.53, + "learning_rate": 2.4598950357562334e-05, + "loss": 0.1253, + "step": 34670 + }, + { + "epoch": 1.54, + "learning_rate": 2.4591516629250236e-05, + "loss": 0.201, + "step": 34680 + }, + { + "epoch": 1.54, + "learning_rate": 2.458408290093814e-05, + "loss": 0.1639, + "step": 34690 + }, + { + "epoch": 1.54, + "learning_rate": 2.457664917262604e-05, + "loss": 0.1561, + "step": 34700 + }, + { + "epoch": 1.54, + "learning_rate": 2.456921544431394e-05, + "loss": 0.1554, + "step": 34710 + }, + { + "epoch": 1.54, + "learning_rate": 2.4561781716001843e-05, + "loss": 0.1611, + "step": 34720 + }, + { + "epoch": 1.54, + "learning_rate": 2.4554347987689746e-05, + "loss": 0.1442, + "step": 34730 + }, + { + "epoch": 1.54, + "learning_rate": 2.454691425937765e-05, + "loss": 0.1897, + "step": 34740 + }, + { + "epoch": 1.54, + "learning_rate": 2.453948053106555e-05, + "loss": 0.1399, + "step": 34750 + }, + { + "epoch": 1.54, + "learning_rate": 2.4532046802753454e-05, + "loss": 0.1108, + "step": 34760 + }, + { + "epoch": 1.54, + "learning_rate": 2.4524613074441356e-05, + "loss": 0.1371, + "step": 34770 + }, + { + "epoch": 1.54, + "learning_rate": 2.451717934612926e-05, + "loss": 0.1507, + "step": 34780 + }, + { + "epoch": 1.54, + "learning_rate": 2.450974561781716e-05, + "loss": 0.1695, + "step": 34790 + }, + { + "epoch": 1.54, + "learning_rate": 2.4502311889505064e-05, + "loss": 0.1284, + "step": 34800 + }, + { + "epoch": 1.54, + "learning_rate": 2.4494878161192967e-05, + "loss": 0.1105, + "step": 34810 + }, + { + "epoch": 1.54, + "learning_rate": 2.448744443288087e-05, + "loss": 0.1552, + "step": 34820 + }, + { + "epoch": 1.54, + "learning_rate": 2.4480010704568772e-05, + "loss": 0.177, + "step": 34830 + }, + { + "epoch": 1.54, + "learning_rate": 2.447257697625667e-05, + "loss": 0.1593, + "step": 34840 + }, + { + "epoch": 1.54, + "learning_rate": 2.4465143247944573e-05, + "loss": 0.1774, + "step": 34850 + }, + { + "epoch": 1.54, + "learning_rate": 2.4457709519632476e-05, + "loss": 0.1302, + "step": 34860 + }, + { + "epoch": 1.54, + "learning_rate": 2.445027579132038e-05, + "loss": 0.123, + "step": 34870 + }, + { + "epoch": 1.54, + "learning_rate": 2.444284206300828e-05, + "loss": 0.1488, + "step": 34880 + }, + { + "epoch": 1.54, + "learning_rate": 2.4435408334696187e-05, + "loss": 0.1845, + "step": 34890 + }, + { + "epoch": 1.55, + "learning_rate": 2.4427974606384086e-05, + "loss": 0.1534, + "step": 34900 + }, + { + "epoch": 1.55, + "learning_rate": 2.442054087807199e-05, + "loss": 0.1282, + "step": 34910 + }, + { + "epoch": 1.55, + "learning_rate": 2.441310714975989e-05, + "loss": 0.1372, + "step": 34920 + }, + { + "epoch": 1.55, + "learning_rate": 2.4405673421447794e-05, + "loss": 0.1543, + "step": 34930 + }, + { + "epoch": 1.55, + "learning_rate": 2.4398239693135697e-05, + "loss": 0.1471, + "step": 34940 + }, + { + "epoch": 1.55, + "learning_rate": 2.43908059648236e-05, + "loss": 0.118, + "step": 34950 + }, + { + "epoch": 1.55, + "learning_rate": 2.4383372236511502e-05, + "loss": 0.1549, + "step": 34960 + }, + { + "epoch": 1.55, + "learning_rate": 2.4375938508199404e-05, + "loss": 0.121, + "step": 34970 + }, + { + "epoch": 1.55, + "learning_rate": 2.4368504779887304e-05, + "loss": 0.1723, + "step": 34980 + }, + { + "epoch": 1.55, + "learning_rate": 2.4361071051575206e-05, + "loss": 0.1227, + "step": 34990 + }, + { + "epoch": 1.55, + "learning_rate": 2.435363732326311e-05, + "loss": 0.1333, + "step": 35000 + }, + { + "epoch": 1.55, + "learning_rate": 2.4346203594951015e-05, + "loss": 0.1663, + "step": 35010 + }, + { + "epoch": 1.55, + "learning_rate": 2.4338769866638917e-05, + "loss": 0.1521, + "step": 35020 + }, + { + "epoch": 1.55, + "learning_rate": 2.433133613832682e-05, + "loss": 0.1449, + "step": 35030 + }, + { + "epoch": 1.55, + "learning_rate": 2.432390241001472e-05, + "loss": 0.1986, + "step": 35040 + }, + { + "epoch": 1.55, + "learning_rate": 2.4316468681702622e-05, + "loss": 0.1809, + "step": 35050 + }, + { + "epoch": 1.55, + "learning_rate": 2.4309034953390524e-05, + "loss": 0.1288, + "step": 35060 + }, + { + "epoch": 1.55, + "learning_rate": 2.4301601225078427e-05, + "loss": 0.1436, + "step": 35070 + }, + { + "epoch": 1.55, + "learning_rate": 2.429416749676633e-05, + "loss": 0.1434, + "step": 35080 + }, + { + "epoch": 1.55, + "learning_rate": 2.4286733768454232e-05, + "loss": 0.1354, + "step": 35090 + }, + { + "epoch": 1.55, + "learning_rate": 2.4279300040142135e-05, + "loss": 0.1512, + "step": 35100 + }, + { + "epoch": 1.55, + "learning_rate": 2.4271866311830034e-05, + "loss": 0.0919, + "step": 35110 + }, + { + "epoch": 1.55, + "learning_rate": 2.4264432583517936e-05, + "loss": 0.1083, + "step": 35120 + }, + { + "epoch": 1.56, + "learning_rate": 2.4256998855205842e-05, + "loss": 0.1843, + "step": 35130 + }, + { + "epoch": 1.56, + "learning_rate": 2.4249565126893745e-05, + "loss": 0.1924, + "step": 35140 + }, + { + "epoch": 1.56, + "learning_rate": 2.4242131398581647e-05, + "loss": 0.1512, + "step": 35150 + }, + { + "epoch": 1.56, + "learning_rate": 2.423469767026955e-05, + "loss": 0.1163, + "step": 35160 + }, + { + "epoch": 1.56, + "learning_rate": 2.4227263941957453e-05, + "loss": 0.1116, + "step": 35170 + }, + { + "epoch": 1.56, + "learning_rate": 2.4219830213645352e-05, + "loss": 0.1524, + "step": 35180 + }, + { + "epoch": 1.56, + "learning_rate": 2.4212396485333254e-05, + "loss": 0.1384, + "step": 35190 + }, + { + "epoch": 1.56, + "learning_rate": 2.4204962757021157e-05, + "loss": 0.1199, + "step": 35200 + }, + { + "epoch": 1.56, + "learning_rate": 2.419752902870906e-05, + "loss": 0.1131, + "step": 35210 + }, + { + "epoch": 1.56, + "learning_rate": 2.4190095300396962e-05, + "loss": 0.1457, + "step": 35220 + }, + { + "epoch": 1.56, + "learning_rate": 2.4182661572084865e-05, + "loss": 0.1204, + "step": 35230 + }, + { + "epoch": 1.56, + "learning_rate": 2.4175227843772767e-05, + "loss": 0.108, + "step": 35240 + }, + { + "epoch": 1.56, + "learning_rate": 2.4167794115460667e-05, + "loss": 0.1487, + "step": 35250 + }, + { + "epoch": 1.56, + "learning_rate": 2.4160360387148572e-05, + "loss": 0.1383, + "step": 35260 + }, + { + "epoch": 1.56, + "learning_rate": 2.4152926658836475e-05, + "loss": 0.188, + "step": 35270 + }, + { + "epoch": 1.56, + "learning_rate": 2.4145492930524378e-05, + "loss": 0.1154, + "step": 35280 + }, + { + "epoch": 1.56, + "learning_rate": 2.413805920221228e-05, + "loss": 0.1511, + "step": 35290 + }, + { + "epoch": 1.56, + "learning_rate": 2.4130625473900183e-05, + "loss": 0.1093, + "step": 35300 + }, + { + "epoch": 1.56, + "learning_rate": 2.4123191745588082e-05, + "loss": 0.099, + "step": 35310 + }, + { + "epoch": 1.56, + "learning_rate": 2.4115758017275985e-05, + "loss": 0.2606, + "step": 35320 + }, + { + "epoch": 1.56, + "learning_rate": 2.4108324288963887e-05, + "loss": 0.1668, + "step": 35330 + }, + { + "epoch": 1.56, + "learning_rate": 2.410089056065179e-05, + "loss": 0.131, + "step": 35340 + }, + { + "epoch": 1.57, + "learning_rate": 2.4093456832339692e-05, + "loss": 0.1331, + "step": 35350 + }, + { + "epoch": 1.57, + "learning_rate": 2.4086023104027595e-05, + "loss": 0.1617, + "step": 35360 + }, + { + "epoch": 1.57, + "learning_rate": 2.4078589375715497e-05, + "loss": 0.1177, + "step": 35370 + }, + { + "epoch": 1.57, + "learning_rate": 2.40711556474034e-05, + "loss": 0.0902, + "step": 35380 + }, + { + "epoch": 1.57, + "learning_rate": 2.4063721919091303e-05, + "loss": 0.1651, + "step": 35390 + }, + { + "epoch": 1.57, + "learning_rate": 2.4056288190779205e-05, + "loss": 0.1576, + "step": 35400 + }, + { + "epoch": 1.57, + "learning_rate": 2.4048854462467108e-05, + "loss": 0.0968, + "step": 35410 + }, + { + "epoch": 1.57, + "learning_rate": 2.404142073415501e-05, + "loss": 0.1548, + "step": 35420 + }, + { + "epoch": 1.57, + "learning_rate": 2.4033987005842913e-05, + "loss": 0.1386, + "step": 35430 + }, + { + "epoch": 1.57, + "learning_rate": 2.4026553277530816e-05, + "loss": 0.1142, + "step": 35440 + }, + { + "epoch": 1.57, + "learning_rate": 2.4019119549218715e-05, + "loss": 0.1369, + "step": 35450 + }, + { + "epoch": 1.57, + "learning_rate": 2.4011685820906617e-05, + "loss": 0.1407, + "step": 35460 + }, + { + "epoch": 1.57, + "learning_rate": 2.400425209259452e-05, + "loss": 0.185, + "step": 35470 + }, + { + "epoch": 1.57, + "learning_rate": 2.3996818364282422e-05, + "loss": 0.1832, + "step": 35480 + }, + { + "epoch": 1.57, + "learning_rate": 2.3989384635970325e-05, + "loss": 0.1357, + "step": 35490 + }, + { + "epoch": 1.57, + "learning_rate": 2.398195090765823e-05, + "loss": 0.1047, + "step": 35500 + }, + { + "epoch": 1.57, + "learning_rate": 2.397451717934613e-05, + "loss": 0.1935, + "step": 35510 + }, + { + "epoch": 1.57, + "learning_rate": 2.3967083451034033e-05, + "loss": 0.1061, + "step": 35520 + }, + { + "epoch": 1.57, + "learning_rate": 2.3959649722721935e-05, + "loss": 0.1774, + "step": 35530 + }, + { + "epoch": 1.57, + "learning_rate": 2.3952215994409838e-05, + "loss": 0.1075, + "step": 35540 + }, + { + "epoch": 1.57, + "learning_rate": 2.394478226609774e-05, + "loss": 0.1509, + "step": 35550 + }, + { + "epoch": 1.57, + "learning_rate": 2.3937348537785643e-05, + "loss": 0.0996, + "step": 35560 + }, + { + "epoch": 1.57, + "learning_rate": 2.3929914809473546e-05, + "loss": 0.1611, + "step": 35570 + }, + { + "epoch": 1.58, + "learning_rate": 2.3922481081161445e-05, + "loss": 0.0998, + "step": 35580 + }, + { + "epoch": 1.58, + "learning_rate": 2.3915047352849347e-05, + "loss": 0.1317, + "step": 35590 + }, + { + "epoch": 1.58, + "learning_rate": 2.390761362453725e-05, + "loss": 0.114, + "step": 35600 + }, + { + "epoch": 1.58, + "learning_rate": 2.3900179896225153e-05, + "loss": 0.074, + "step": 35610 + }, + { + "epoch": 1.58, + "learning_rate": 2.389274616791306e-05, + "loss": 0.1895, + "step": 35620 + }, + { + "epoch": 1.58, + "learning_rate": 2.388531243960096e-05, + "loss": 0.1103, + "step": 35630 + }, + { + "epoch": 1.58, + "learning_rate": 2.387787871128886e-05, + "loss": 0.1169, + "step": 35640 + }, + { + "epoch": 1.58, + "learning_rate": 2.3870444982976763e-05, + "loss": 0.1618, + "step": 35650 + }, + { + "epoch": 1.58, + "learning_rate": 2.3863011254664665e-05, + "loss": 0.1537, + "step": 35660 + }, + { + "epoch": 1.58, + "learning_rate": 2.3855577526352568e-05, + "loss": 0.1637, + "step": 35670 + }, + { + "epoch": 1.58, + "learning_rate": 2.384814379804047e-05, + "loss": 0.1615, + "step": 35680 + }, + { + "epoch": 1.58, + "learning_rate": 2.3840710069728373e-05, + "loss": 0.1545, + "step": 35690 + }, + { + "epoch": 1.58, + "learning_rate": 2.3833276341416276e-05, + "loss": 0.1743, + "step": 35700 + }, + { + "epoch": 1.58, + "learning_rate": 2.382584261310418e-05, + "loss": 0.1501, + "step": 35710 + }, + { + "epoch": 1.58, + "learning_rate": 2.3818408884792078e-05, + "loss": 0.1764, + "step": 35720 + }, + { + "epoch": 1.58, + "learning_rate": 2.381097515647998e-05, + "loss": 0.1236, + "step": 35730 + }, + { + "epoch": 1.58, + "learning_rate": 2.3803541428167883e-05, + "loss": 0.086, + "step": 35740 + }, + { + "epoch": 1.58, + "learning_rate": 2.379610769985579e-05, + "loss": 0.1527, + "step": 35750 + }, + { + "epoch": 1.58, + "learning_rate": 2.378867397154369e-05, + "loss": 0.1591, + "step": 35760 + }, + { + "epoch": 1.58, + "learning_rate": 2.3781240243231594e-05, + "loss": 0.1393, + "step": 35770 + }, + { + "epoch": 1.58, + "learning_rate": 2.3773806514919493e-05, + "loss": 0.114, + "step": 35780 + }, + { + "epoch": 1.58, + "learning_rate": 2.3766372786607396e-05, + "loss": 0.1817, + "step": 35790 + }, + { + "epoch": 1.58, + "learning_rate": 2.3758939058295298e-05, + "loss": 0.0687, + "step": 35800 + }, + { + "epoch": 1.59, + "learning_rate": 2.37515053299832e-05, + "loss": 0.1496, + "step": 35810 + }, + { + "epoch": 1.59, + "learning_rate": 2.3744071601671103e-05, + "loss": 0.1424, + "step": 35820 + }, + { + "epoch": 1.59, + "learning_rate": 2.3736637873359006e-05, + "loss": 0.1066, + "step": 35830 + }, + { + "epoch": 1.59, + "learning_rate": 2.372920414504691e-05, + "loss": 0.1715, + "step": 35840 + }, + { + "epoch": 1.59, + "learning_rate": 2.3721770416734808e-05, + "loss": 0.1605, + "step": 35850 + }, + { + "epoch": 1.59, + "learning_rate": 2.371433668842271e-05, + "loss": 0.1487, + "step": 35860 + }, + { + "epoch": 1.59, + "learning_rate": 2.3706902960110616e-05, + "loss": 0.1458, + "step": 35870 + }, + { + "epoch": 1.59, + "learning_rate": 2.369946923179852e-05, + "loss": 0.1552, + "step": 35880 + }, + { + "epoch": 1.59, + "learning_rate": 2.369203550348642e-05, + "loss": 0.1987, + "step": 35890 + }, + { + "epoch": 1.59, + "learning_rate": 2.3684601775174324e-05, + "loss": 0.1774, + "step": 35900 + }, + { + "epoch": 1.59, + "learning_rate": 2.3677168046862223e-05, + "loss": 0.1756, + "step": 35910 + }, + { + "epoch": 1.59, + "learning_rate": 2.3669734318550126e-05, + "loss": 0.1547, + "step": 35920 + }, + { + "epoch": 1.59, + "learning_rate": 2.366230059023803e-05, + "loss": 0.2107, + "step": 35930 + }, + { + "epoch": 1.59, + "learning_rate": 2.365486686192593e-05, + "loss": 0.1231, + "step": 35940 + }, + { + "epoch": 1.59, + "learning_rate": 2.3647433133613834e-05, + "loss": 0.1399, + "step": 35950 + }, + { + "epoch": 1.59, + "learning_rate": 2.3639999405301736e-05, + "loss": 0.1506, + "step": 35960 + }, + { + "epoch": 1.59, + "learning_rate": 2.363256567698964e-05, + "loss": 0.1234, + "step": 35970 + }, + { + "epoch": 1.59, + "learning_rate": 2.362513194867754e-05, + "loss": 0.1785, + "step": 35980 + }, + { + "epoch": 1.59, + "learning_rate": 2.3617698220365444e-05, + "loss": 0.1746, + "step": 35990 + }, + { + "epoch": 1.59, + "learning_rate": 2.3610264492053346e-05, + "loss": 0.1301, + "step": 36000 + }, + { + "epoch": 1.59, + "learning_rate": 2.360283076374125e-05, + "loss": 0.1433, + "step": 36010 + }, + { + "epoch": 1.59, + "learning_rate": 2.359539703542915e-05, + "loss": 0.1924, + "step": 36020 + }, + { + "epoch": 1.6, + "learning_rate": 2.3587963307117054e-05, + "loss": 0.1468, + "step": 36030 + }, + { + "epoch": 1.6, + "learning_rate": 2.3580529578804957e-05, + "loss": 0.15, + "step": 36040 + }, + { + "epoch": 1.6, + "learning_rate": 2.3573095850492856e-05, + "loss": 0.167, + "step": 36050 + }, + { + "epoch": 1.6, + "learning_rate": 2.356566212218076e-05, + "loss": 0.1413, + "step": 36060 + }, + { + "epoch": 1.6, + "learning_rate": 2.355822839386866e-05, + "loss": 0.1446, + "step": 36070 + }, + { + "epoch": 1.6, + "learning_rate": 2.3550794665556564e-05, + "loss": 0.1273, + "step": 36080 + }, + { + "epoch": 1.6, + "learning_rate": 2.3543360937244466e-05, + "loss": 0.1069, + "step": 36090 + }, + { + "epoch": 1.6, + "learning_rate": 2.353592720893237e-05, + "loss": 0.1541, + "step": 36100 + }, + { + "epoch": 1.6, + "learning_rate": 2.352849348062027e-05, + "loss": 0.1314, + "step": 36110 + }, + { + "epoch": 1.6, + "learning_rate": 2.3521059752308174e-05, + "loss": 0.1106, + "step": 36120 + }, + { + "epoch": 1.6, + "learning_rate": 2.3513626023996077e-05, + "loss": 0.1551, + "step": 36130 + }, + { + "epoch": 1.6, + "learning_rate": 2.350619229568398e-05, + "loss": 0.0987, + "step": 36140 + }, + { + "epoch": 1.6, + "learning_rate": 2.3498758567371882e-05, + "loss": 0.1717, + "step": 36150 + }, + { + "epoch": 1.6, + "learning_rate": 2.3491324839059784e-05, + "loss": 0.1017, + "step": 36160 + }, + { + "epoch": 1.6, + "learning_rate": 2.3483891110747687e-05, + "loss": 0.1436, + "step": 36170 + }, + { + "epoch": 1.6, + "learning_rate": 2.3476457382435586e-05, + "loss": 0.1336, + "step": 36180 + }, + { + "epoch": 1.6, + "learning_rate": 2.346902365412349e-05, + "loss": 0.1503, + "step": 36190 + }, + { + "epoch": 1.6, + "learning_rate": 2.346158992581139e-05, + "loss": 0.1503, + "step": 36200 + }, + { + "epoch": 1.6, + "learning_rate": 2.3454156197499294e-05, + "loss": 0.1203, + "step": 36210 + }, + { + "epoch": 1.6, + "learning_rate": 2.3446722469187196e-05, + "loss": 0.1206, + "step": 36220 + }, + { + "epoch": 1.6, + "learning_rate": 2.34392887408751e-05, + "loss": 0.1285, + "step": 36230 + }, + { + "epoch": 1.6, + "learning_rate": 2.3431855012563e-05, + "loss": 0.1152, + "step": 36240 + }, + { + "epoch": 1.6, + "learning_rate": 2.3424421284250904e-05, + "loss": 0.1841, + "step": 36250 + }, + { + "epoch": 1.61, + "learning_rate": 2.3416987555938807e-05, + "loss": 0.1042, + "step": 36260 + }, + { + "epoch": 1.61, + "learning_rate": 2.340955382762671e-05, + "loss": 0.1872, + "step": 36270 + }, + { + "epoch": 1.61, + "learning_rate": 2.3402120099314612e-05, + "loss": 0.1576, + "step": 36280 + }, + { + "epoch": 1.61, + "learning_rate": 2.3394686371002514e-05, + "loss": 0.1559, + "step": 36290 + }, + { + "epoch": 1.61, + "learning_rate": 2.3387252642690417e-05, + "loss": 0.1614, + "step": 36300 + }, + { + "epoch": 1.61, + "learning_rate": 2.337981891437832e-05, + "loss": 0.132, + "step": 36310 + }, + { + "epoch": 1.61, + "learning_rate": 2.337238518606622e-05, + "loss": 0.2252, + "step": 36320 + }, + { + "epoch": 1.61, + "learning_rate": 2.336495145775412e-05, + "loss": 0.1059, + "step": 36330 + }, + { + "epoch": 1.61, + "learning_rate": 2.3357517729442024e-05, + "loss": 0.1423, + "step": 36340 + }, + { + "epoch": 1.61, + "learning_rate": 2.3350084001129927e-05, + "loss": 0.1361, + "step": 36350 + }, + { + "epoch": 1.61, + "learning_rate": 2.3342650272817833e-05, + "loss": 0.1526, + "step": 36360 + }, + { + "epoch": 1.61, + "learning_rate": 2.3335216544505735e-05, + "loss": 0.1447, + "step": 36370 + }, + { + "epoch": 1.61, + "learning_rate": 2.3327782816193634e-05, + "loss": 0.1422, + "step": 36380 + }, + { + "epoch": 1.61, + "learning_rate": 2.3320349087881537e-05, + "loss": 0.159, + "step": 36390 + }, + { + "epoch": 1.61, + "learning_rate": 2.331291535956944e-05, + "loss": 0.1912, + "step": 36400 + }, + { + "epoch": 1.61, + "learning_rate": 2.3305481631257342e-05, + "loss": 0.1861, + "step": 36410 + }, + { + "epoch": 1.61, + "learning_rate": 2.3298047902945245e-05, + "loss": 0.1645, + "step": 36420 + }, + { + "epoch": 1.61, + "learning_rate": 2.3290614174633147e-05, + "loss": 0.1797, + "step": 36430 + }, + { + "epoch": 1.61, + "learning_rate": 2.328318044632105e-05, + "loss": 0.1409, + "step": 36440 + }, + { + "epoch": 1.61, + "learning_rate": 2.327574671800895e-05, + "loss": 0.1255, + "step": 36450 + }, + { + "epoch": 1.61, + "learning_rate": 2.326831298969685e-05, + "loss": 0.1645, + "step": 36460 + }, + { + "epoch": 1.61, + "learning_rate": 2.3260879261384754e-05, + "loss": 0.1588, + "step": 36470 + }, + { + "epoch": 1.62, + "learning_rate": 2.325344553307266e-05, + "loss": 0.1222, + "step": 36480 + }, + { + "epoch": 1.62, + "learning_rate": 2.3246011804760563e-05, + "loss": 0.1296, + "step": 36490 + }, + { + "epoch": 1.62, + "learning_rate": 2.3238578076448465e-05, + "loss": 0.1378, + "step": 36500 + }, + { + "epoch": 1.62, + "learning_rate": 2.3231144348136364e-05, + "loss": 0.1719, + "step": 36510 + }, + { + "epoch": 1.62, + "learning_rate": 2.3223710619824267e-05, + "loss": 0.1373, + "step": 36520 + }, + { + "epoch": 1.62, + "learning_rate": 2.321627689151217e-05, + "loss": 0.1255, + "step": 36530 + }, + { + "epoch": 1.62, + "learning_rate": 2.3208843163200072e-05, + "loss": 0.2024, + "step": 36540 + }, + { + "epoch": 1.62, + "learning_rate": 2.3201409434887975e-05, + "loss": 0.1947, + "step": 36550 + }, + { + "epoch": 1.62, + "learning_rate": 2.3193975706575877e-05, + "loss": 0.1252, + "step": 36560 + }, + { + "epoch": 1.62, + "learning_rate": 2.318654197826378e-05, + "loss": 0.1556, + "step": 36570 + }, + { + "epoch": 1.62, + "learning_rate": 2.3179108249951682e-05, + "loss": 0.1698, + "step": 36580 + }, + { + "epoch": 1.62, + "learning_rate": 2.317167452163958e-05, + "loss": 0.1791, + "step": 36590 + }, + { + "epoch": 1.62, + "learning_rate": 2.3164240793327484e-05, + "loss": 0.1361, + "step": 36600 + }, + { + "epoch": 1.62, + "learning_rate": 2.315680706501539e-05, + "loss": 0.1091, + "step": 36610 + }, + { + "epoch": 1.62, + "learning_rate": 2.3149373336703293e-05, + "loss": 0.1411, + "step": 36620 + }, + { + "epoch": 1.62, + "learning_rate": 2.3141939608391195e-05, + "loss": 0.17, + "step": 36630 + }, + { + "epoch": 1.62, + "learning_rate": 2.3134505880079098e-05, + "loss": 0.1352, + "step": 36640 + }, + { + "epoch": 1.62, + "learning_rate": 2.3127072151766997e-05, + "loss": 0.1815, + "step": 36650 + }, + { + "epoch": 1.62, + "learning_rate": 2.31196384234549e-05, + "loss": 0.1483, + "step": 36660 + }, + { + "epoch": 1.62, + "learning_rate": 2.3112204695142802e-05, + "loss": 0.1911, + "step": 36670 + }, + { + "epoch": 1.62, + "learning_rate": 2.3104770966830705e-05, + "loss": 0.1458, + "step": 36680 + }, + { + "epoch": 1.62, + "learning_rate": 2.3097337238518607e-05, + "loss": 0.1301, + "step": 36690 + }, + { + "epoch": 1.62, + "learning_rate": 2.308990351020651e-05, + "loss": 0.1165, + "step": 36700 + }, + { + "epoch": 1.63, + "learning_rate": 2.3082469781894413e-05, + "loss": 0.1493, + "step": 36710 + }, + { + "epoch": 1.63, + "learning_rate": 2.3075036053582312e-05, + "loss": 0.1439, + "step": 36720 + }, + { + "epoch": 1.63, + "learning_rate": 2.3067602325270218e-05, + "loss": 0.1175, + "step": 36730 + }, + { + "epoch": 1.63, + "learning_rate": 2.306016859695812e-05, + "loss": 0.0809, + "step": 36740 + }, + { + "epoch": 1.63, + "learning_rate": 2.3052734868646023e-05, + "loss": 0.147, + "step": 36750 + }, + { + "epoch": 1.63, + "learning_rate": 2.3045301140333926e-05, + "loss": 0.1503, + "step": 36760 + }, + { + "epoch": 1.63, + "learning_rate": 2.3037867412021828e-05, + "loss": 0.165, + "step": 36770 + }, + { + "epoch": 1.63, + "learning_rate": 2.3030433683709727e-05, + "loss": 0.194, + "step": 36780 + }, + { + "epoch": 1.63, + "learning_rate": 2.302299995539763e-05, + "loss": 0.1698, + "step": 36790 + }, + { + "epoch": 1.63, + "learning_rate": 2.3015566227085532e-05, + "loss": 0.1576, + "step": 36800 + }, + { + "epoch": 1.63, + "learning_rate": 2.3008132498773435e-05, + "loss": 0.174, + "step": 36810 + }, + { + "epoch": 1.63, + "learning_rate": 2.3000698770461338e-05, + "loss": 0.1641, + "step": 36820 + }, + { + "epoch": 1.63, + "learning_rate": 2.299326504214924e-05, + "loss": 0.1537, + "step": 36830 + }, + { + "epoch": 1.63, + "learning_rate": 2.2985831313837143e-05, + "loss": 0.1478, + "step": 36840 + }, + { + "epoch": 1.63, + "learning_rate": 2.2978397585525045e-05, + "loss": 0.1491, + "step": 36850 + }, + { + "epoch": 1.63, + "learning_rate": 2.2970963857212948e-05, + "loss": 0.1404, + "step": 36860 + }, + { + "epoch": 1.63, + "learning_rate": 2.296353012890085e-05, + "loss": 0.1979, + "step": 36870 + }, + { + "epoch": 1.63, + "learning_rate": 2.2956096400588753e-05, + "loss": 0.2271, + "step": 36880 + }, + { + "epoch": 1.63, + "learning_rate": 2.2948662672276656e-05, + "loss": 0.176, + "step": 36890 + }, + { + "epoch": 1.63, + "learning_rate": 2.2941228943964558e-05, + "loss": 0.1743, + "step": 36900 + }, + { + "epoch": 1.63, + "learning_rate": 2.293379521565246e-05, + "loss": 0.1149, + "step": 36910 + }, + { + "epoch": 1.63, + "learning_rate": 2.292636148734036e-05, + "loss": 0.1462, + "step": 36920 + }, + { + "epoch": 1.64, + "learning_rate": 2.2918927759028263e-05, + "loss": 0.0943, + "step": 36930 + }, + { + "epoch": 1.64, + "learning_rate": 2.2911494030716165e-05, + "loss": 0.1126, + "step": 36940 + }, + { + "epoch": 1.64, + "learning_rate": 2.2904060302404068e-05, + "loss": 0.1818, + "step": 36950 + }, + { + "epoch": 1.64, + "learning_rate": 2.289662657409197e-05, + "loss": 0.1315, + "step": 36960 + }, + { + "epoch": 1.64, + "learning_rate": 2.2889192845779876e-05, + "loss": 0.1447, + "step": 36970 + }, + { + "epoch": 1.64, + "learning_rate": 2.2881759117467775e-05, + "loss": 0.175, + "step": 36980 + }, + { + "epoch": 1.64, + "learning_rate": 2.2874325389155678e-05, + "loss": 0.1204, + "step": 36990 + }, + { + "epoch": 1.64, + "learning_rate": 2.286689166084358e-05, + "loss": 0.1856, + "step": 37000 + }, + { + "epoch": 1.64, + "learning_rate": 2.2859457932531483e-05, + "loss": 0.145, + "step": 37010 + }, + { + "epoch": 1.64, + "learning_rate": 2.2852024204219386e-05, + "loss": 0.1422, + "step": 37020 + }, + { + "epoch": 1.64, + "learning_rate": 2.284459047590729e-05, + "loss": 0.1443, + "step": 37030 + }, + { + "epoch": 1.64, + "learning_rate": 2.283715674759519e-05, + "loss": 0.1535, + "step": 37040 + }, + { + "epoch": 1.64, + "learning_rate": 2.2829723019283094e-05, + "loss": 0.1553, + "step": 37050 + }, + { + "epoch": 1.64, + "learning_rate": 2.2822289290970993e-05, + "loss": 0.1628, + "step": 37060 + }, + { + "epoch": 1.64, + "learning_rate": 2.2814855562658895e-05, + "loss": 0.1478, + "step": 37070 + }, + { + "epoch": 1.64, + "learning_rate": 2.2807421834346798e-05, + "loss": 0.1823, + "step": 37080 + }, + { + "epoch": 1.64, + "learning_rate": 2.27999881060347e-05, + "loss": 0.1548, + "step": 37090 + }, + { + "epoch": 1.64, + "learning_rate": 2.2792554377722606e-05, + "loss": 0.1125, + "step": 37100 + }, + { + "epoch": 1.64, + "learning_rate": 2.278512064941051e-05, + "loss": 0.1794, + "step": 37110 + }, + { + "epoch": 1.64, + "learning_rate": 2.2777686921098408e-05, + "loss": 0.132, + "step": 37120 + }, + { + "epoch": 1.64, + "learning_rate": 2.277025319278631e-05, + "loss": 0.1618, + "step": 37130 + }, + { + "epoch": 1.64, + "learning_rate": 2.2762819464474213e-05, + "loss": 0.1427, + "step": 37140 + }, + { + "epoch": 1.64, + "learning_rate": 2.2755385736162116e-05, + "loss": 0.1423, + "step": 37150 + }, + { + "epoch": 1.65, + "learning_rate": 2.274795200785002e-05, + "loss": 0.1587, + "step": 37160 + }, + { + "epoch": 1.65, + "learning_rate": 2.274051827953792e-05, + "loss": 0.1702, + "step": 37170 + }, + { + "epoch": 1.65, + "learning_rate": 2.2733084551225824e-05, + "loss": 0.1358, + "step": 37180 + }, + { + "epoch": 1.65, + "learning_rate": 2.2725650822913723e-05, + "loss": 0.0859, + "step": 37190 + }, + { + "epoch": 1.65, + "learning_rate": 2.2718217094601625e-05, + "loss": 0.1273, + "step": 37200 + }, + { + "epoch": 1.65, + "learning_rate": 2.2710783366289528e-05, + "loss": 0.0804, + "step": 37210 + }, + { + "epoch": 1.65, + "learning_rate": 2.2703349637977434e-05, + "loss": 0.1776, + "step": 37220 + }, + { + "epoch": 1.65, + "learning_rate": 2.2695915909665337e-05, + "loss": 0.1976, + "step": 37230 + }, + { + "epoch": 1.65, + "learning_rate": 2.268848218135324e-05, + "loss": 0.1842, + "step": 37240 + }, + { + "epoch": 1.65, + "learning_rate": 2.268104845304114e-05, + "loss": 0.1664, + "step": 37250 + }, + { + "epoch": 1.65, + "learning_rate": 2.267361472472904e-05, + "loss": 0.1628, + "step": 37260 + }, + { + "epoch": 1.65, + "learning_rate": 2.2666180996416944e-05, + "loss": 0.1308, + "step": 37270 + }, + { + "epoch": 1.65, + "learning_rate": 2.2658747268104846e-05, + "loss": 0.1461, + "step": 37280 + }, + { + "epoch": 1.65, + "learning_rate": 2.265131353979275e-05, + "loss": 0.1315, + "step": 37290 + }, + { + "epoch": 1.65, + "learning_rate": 2.264387981148065e-05, + "loss": 0.1494, + "step": 37300 + }, + { + "epoch": 1.65, + "learning_rate": 2.2636446083168554e-05, + "loss": 0.2113, + "step": 37310 + }, + { + "epoch": 1.65, + "learning_rate": 2.2629012354856456e-05, + "loss": 0.2241, + "step": 37320 + }, + { + "epoch": 1.65, + "learning_rate": 2.2621578626544356e-05, + "loss": 0.1398, + "step": 37330 + }, + { + "epoch": 1.65, + "learning_rate": 2.261414489823226e-05, + "loss": 0.1236, + "step": 37340 + }, + { + "epoch": 1.65, + "learning_rate": 2.2606711169920164e-05, + "loss": 0.0896, + "step": 37350 + }, + { + "epoch": 1.65, + "learning_rate": 2.2599277441608067e-05, + "loss": 0.1438, + "step": 37360 + }, + { + "epoch": 1.65, + "learning_rate": 2.259184371329597e-05, + "loss": 0.1741, + "step": 37370 + }, + { + "epoch": 1.65, + "learning_rate": 2.2584409984983872e-05, + "loss": 0.179, + "step": 37380 + }, + { + "epoch": 1.66, + "learning_rate": 2.257697625667177e-05, + "loss": 0.1261, + "step": 37390 + }, + { + "epoch": 1.66, + "learning_rate": 2.2569542528359674e-05, + "loss": 0.1379, + "step": 37400 + }, + { + "epoch": 1.66, + "learning_rate": 2.2562108800047576e-05, + "loss": 0.132, + "step": 37410 + }, + { + "epoch": 1.66, + "learning_rate": 2.255467507173548e-05, + "loss": 0.169, + "step": 37420 + }, + { + "epoch": 1.66, + "learning_rate": 2.254724134342338e-05, + "loss": 0.1755, + "step": 37430 + }, + { + "epoch": 1.66, + "learning_rate": 2.2539807615111284e-05, + "loss": 0.1474, + "step": 37440 + }, + { + "epoch": 1.66, + "learning_rate": 2.2532373886799187e-05, + "loss": 0.1632, + "step": 37450 + }, + { + "epoch": 1.66, + "learning_rate": 2.252494015848709e-05, + "loss": 0.1748, + "step": 37460 + }, + { + "epoch": 1.66, + "learning_rate": 2.2517506430174992e-05, + "loss": 0.1795, + "step": 37470 + }, + { + "epoch": 1.66, + "learning_rate": 2.2510072701862894e-05, + "loss": 0.1948, + "step": 37480 + }, + { + "epoch": 1.66, + "learning_rate": 2.2502638973550797e-05, + "loss": 0.1682, + "step": 37490 + }, + { + "epoch": 1.66, + "learning_rate": 2.24952052452387e-05, + "loss": 0.1855, + "step": 37500 + }, + { + "epoch": 1.66, + "learning_rate": 2.2487771516926602e-05, + "loss": 0.1403, + "step": 37510 + }, + { + "epoch": 1.66, + "learning_rate": 2.24803377886145e-05, + "loss": 0.1344, + "step": 37520 + }, + { + "epoch": 1.66, + "learning_rate": 2.2472904060302404e-05, + "loss": 0.1273, + "step": 37530 + }, + { + "epoch": 1.66, + "learning_rate": 2.2465470331990306e-05, + "loss": 0.1837, + "step": 37540 + }, + { + "epoch": 1.66, + "learning_rate": 2.245803660367821e-05, + "loss": 0.1326, + "step": 37550 + }, + { + "epoch": 1.66, + "learning_rate": 2.245060287536611e-05, + "loss": 0.1071, + "step": 37560 + }, + { + "epoch": 1.66, + "learning_rate": 2.2443169147054014e-05, + "loss": 0.2249, + "step": 37570 + }, + { + "epoch": 1.66, + "learning_rate": 2.2435735418741917e-05, + "loss": 0.1512, + "step": 37580 + }, + { + "epoch": 1.66, + "learning_rate": 2.242830169042982e-05, + "loss": 0.1756, + "step": 37590 + }, + { + "epoch": 1.66, + "learning_rate": 2.2420867962117722e-05, + "loss": 0.1484, + "step": 37600 + }, + { + "epoch": 1.67, + "learning_rate": 2.2413434233805624e-05, + "loss": 0.1804, + "step": 37610 + }, + { + "epoch": 1.67, + "learning_rate": 2.2406000505493527e-05, + "loss": 0.1178, + "step": 37620 + }, + { + "epoch": 1.67, + "learning_rate": 2.239856677718143e-05, + "loss": 0.1351, + "step": 37630 + }, + { + "epoch": 1.67, + "learning_rate": 2.2391133048869332e-05, + "loss": 0.1189, + "step": 37640 + }, + { + "epoch": 1.67, + "learning_rate": 2.2383699320557235e-05, + "loss": 0.1671, + "step": 37650 + }, + { + "epoch": 1.67, + "learning_rate": 2.2376265592245134e-05, + "loss": 0.1875, + "step": 37660 + }, + { + "epoch": 1.67, + "learning_rate": 2.2368831863933037e-05, + "loss": 0.1668, + "step": 37670 + }, + { + "epoch": 1.67, + "learning_rate": 2.236139813562094e-05, + "loss": 0.1475, + "step": 37680 + }, + { + "epoch": 1.67, + "learning_rate": 2.2353964407308842e-05, + "loss": 0.1092, + "step": 37690 + }, + { + "epoch": 1.67, + "learning_rate": 2.2346530678996744e-05, + "loss": 0.1434, + "step": 37700 + }, + { + "epoch": 1.67, + "learning_rate": 2.233909695068465e-05, + "loss": 0.112, + "step": 37710 + }, + { + "epoch": 1.67, + "learning_rate": 2.233166322237255e-05, + "loss": 0.2474, + "step": 37720 + }, + { + "epoch": 1.67, + "learning_rate": 2.2324229494060452e-05, + "loss": 0.144, + "step": 37730 + }, + { + "epoch": 1.67, + "learning_rate": 2.2316795765748355e-05, + "loss": 0.1134, + "step": 37740 + }, + { + "epoch": 1.67, + "learning_rate": 2.2309362037436257e-05, + "loss": 0.1532, + "step": 37750 + }, + { + "epoch": 1.67, + "learning_rate": 2.230192830912416e-05, + "loss": 0.1463, + "step": 37760 + }, + { + "epoch": 1.67, + "learning_rate": 2.2294494580812062e-05, + "loss": 0.1514, + "step": 37770 + }, + { + "epoch": 1.67, + "learning_rate": 2.2287060852499965e-05, + "loss": 0.1679, + "step": 37780 + }, + { + "epoch": 1.67, + "learning_rate": 2.2279627124187864e-05, + "loss": 0.1397, + "step": 37790 + }, + { + "epoch": 1.67, + "learning_rate": 2.2272193395875767e-05, + "loss": 0.1082, + "step": 37800 + }, + { + "epoch": 1.67, + "learning_rate": 2.226475966756367e-05, + "loss": 0.1529, + "step": 37810 + }, + { + "epoch": 1.67, + "learning_rate": 2.2257325939251572e-05, + "loss": 0.1315, + "step": 37820 + }, + { + "epoch": 1.67, + "learning_rate": 2.2249892210939478e-05, + "loss": 0.1728, + "step": 37830 + }, + { + "epoch": 1.68, + "learning_rate": 2.224245848262738e-05, + "loss": 0.0742, + "step": 37840 + }, + { + "epoch": 1.68, + "learning_rate": 2.223502475431528e-05, + "loss": 0.1193, + "step": 37850 + }, + { + "epoch": 1.68, + "learning_rate": 2.2227591026003182e-05, + "loss": 0.1565, + "step": 37860 + }, + { + "epoch": 1.68, + "learning_rate": 2.2220157297691085e-05, + "loss": 0.1461, + "step": 37870 + }, + { + "epoch": 1.68, + "learning_rate": 2.2212723569378987e-05, + "loss": 0.17, + "step": 37880 + }, + { + "epoch": 1.68, + "learning_rate": 2.220528984106689e-05, + "loss": 0.1158, + "step": 37890 + }, + { + "epoch": 1.68, + "learning_rate": 2.2197856112754792e-05, + "loss": 0.1554, + "step": 37900 + }, + { + "epoch": 1.68, + "learning_rate": 2.2190422384442695e-05, + "loss": 0.1502, + "step": 37910 + }, + { + "epoch": 1.68, + "learning_rate": 2.2182988656130598e-05, + "loss": 0.1373, + "step": 37920 + }, + { + "epoch": 1.68, + "learning_rate": 2.2175554927818497e-05, + "loss": 0.1374, + "step": 37930 + }, + { + "epoch": 1.68, + "learning_rate": 2.21681211995064e-05, + "loss": 0.1506, + "step": 37940 + }, + { + "epoch": 1.68, + "learning_rate": 2.2160687471194305e-05, + "loss": 0.1472, + "step": 37950 + }, + { + "epoch": 1.68, + "learning_rate": 2.2153253742882208e-05, + "loss": 0.1321, + "step": 37960 + }, + { + "epoch": 1.68, + "learning_rate": 2.214582001457011e-05, + "loss": 0.1379, + "step": 37970 + }, + { + "epoch": 1.68, + "learning_rate": 2.2138386286258013e-05, + "loss": 0.1504, + "step": 37980 + }, + { + "epoch": 1.68, + "learning_rate": 2.2130952557945912e-05, + "loss": 0.1524, + "step": 37990 + }, + { + "epoch": 1.68, + "learning_rate": 2.2123518829633815e-05, + "loss": 0.1703, + "step": 38000 + }, + { + "epoch": 1.68, + "learning_rate": 2.2116085101321717e-05, + "loss": 0.122, + "step": 38010 + }, + { + "epoch": 1.68, + "learning_rate": 2.210865137300962e-05, + "loss": 0.1715, + "step": 38020 + }, + { + "epoch": 1.68, + "learning_rate": 2.2101217644697523e-05, + "loss": 0.1587, + "step": 38030 + }, + { + "epoch": 1.68, + "learning_rate": 2.2093783916385425e-05, + "loss": 0.1053, + "step": 38040 + }, + { + "epoch": 1.68, + "learning_rate": 2.2086350188073328e-05, + "loss": 0.128, + "step": 38050 + }, + { + "epoch": 1.69, + "learning_rate": 2.2078916459761227e-05, + "loss": 0.1304, + "step": 38060 + }, + { + "epoch": 1.69, + "learning_rate": 2.207148273144913e-05, + "loss": 0.1327, + "step": 38070 + }, + { + "epoch": 1.69, + "learning_rate": 2.2064049003137036e-05, + "loss": 0.11, + "step": 38080 + }, + { + "epoch": 1.69, + "learning_rate": 2.2056615274824938e-05, + "loss": 0.1445, + "step": 38090 + }, + { + "epoch": 1.69, + "learning_rate": 2.204918154651284e-05, + "loss": 0.1451, + "step": 38100 + }, + { + "epoch": 1.69, + "learning_rate": 2.2041747818200743e-05, + "loss": 0.1912, + "step": 38110 + }, + { + "epoch": 1.69, + "learning_rate": 2.2034314089888642e-05, + "loss": 0.1768, + "step": 38120 + }, + { + "epoch": 1.69, + "learning_rate": 2.2026880361576545e-05, + "loss": 0.1497, + "step": 38130 + }, + { + "epoch": 1.69, + "learning_rate": 2.2019446633264448e-05, + "loss": 0.1722, + "step": 38140 + }, + { + "epoch": 1.69, + "learning_rate": 2.201201290495235e-05, + "loss": 0.1889, + "step": 38150 + }, + { + "epoch": 1.69, + "learning_rate": 2.2004579176640253e-05, + "loss": 0.1294, + "step": 38160 + }, + { + "epoch": 1.69, + "learning_rate": 2.1997145448328155e-05, + "loss": 0.1582, + "step": 38170 + }, + { + "epoch": 1.69, + "learning_rate": 2.1989711720016058e-05, + "loss": 0.1519, + "step": 38180 + }, + { + "epoch": 1.69, + "learning_rate": 2.198227799170396e-05, + "loss": 0.1564, + "step": 38190 + }, + { + "epoch": 1.69, + "learning_rate": 2.1974844263391863e-05, + "loss": 0.1945, + "step": 38200 + }, + { + "epoch": 1.69, + "learning_rate": 2.1967410535079766e-05, + "loss": 0.1481, + "step": 38210 + }, + { + "epoch": 1.69, + "learning_rate": 2.1959976806767668e-05, + "loss": 0.1436, + "step": 38220 + }, + { + "epoch": 1.69, + "learning_rate": 2.195254307845557e-05, + "loss": 0.1586, + "step": 38230 + }, + { + "epoch": 1.69, + "learning_rate": 2.1945109350143473e-05, + "loss": 0.1222, + "step": 38240 + }, + { + "epoch": 1.69, + "learning_rate": 2.1937675621831376e-05, + "loss": 0.1483, + "step": 38250 + }, + { + "epoch": 1.69, + "learning_rate": 2.1930241893519275e-05, + "loss": 0.1879, + "step": 38260 + }, + { + "epoch": 1.69, + "learning_rate": 2.1922808165207178e-05, + "loss": 0.1811, + "step": 38270 + }, + { + "epoch": 1.69, + "learning_rate": 2.191537443689508e-05, + "loss": 0.1553, + "step": 38280 + }, + { + "epoch": 1.7, + "learning_rate": 2.1907940708582983e-05, + "loss": 0.1539, + "step": 38290 + }, + { + "epoch": 1.7, + "learning_rate": 2.1900506980270885e-05, + "loss": 0.1965, + "step": 38300 + }, + { + "epoch": 1.7, + "learning_rate": 2.1893073251958788e-05, + "loss": 0.1506, + "step": 38310 + }, + { + "epoch": 1.7, + "learning_rate": 2.188563952364669e-05, + "loss": 0.1402, + "step": 38320 + }, + { + "epoch": 1.7, + "learning_rate": 2.1878205795334593e-05, + "loss": 0.1208, + "step": 38330 + }, + { + "epoch": 1.7, + "learning_rate": 2.1870772067022496e-05, + "loss": 0.0977, + "step": 38340 + }, + { + "epoch": 1.7, + "learning_rate": 2.18633383387104e-05, + "loss": 0.1206, + "step": 38350 + }, + { + "epoch": 1.7, + "learning_rate": 2.18559046103983e-05, + "loss": 0.0768, + "step": 38360 + }, + { + "epoch": 1.7, + "learning_rate": 2.1848470882086204e-05, + "loss": 0.1074, + "step": 38370 + }, + { + "epoch": 1.7, + "learning_rate": 2.1841037153774106e-05, + "loss": 0.1491, + "step": 38380 + }, + { + "epoch": 1.7, + "learning_rate": 2.1833603425462005e-05, + "loss": 0.1765, + "step": 38390 + }, + { + "epoch": 1.7, + "learning_rate": 2.1826169697149908e-05, + "loss": 0.1332, + "step": 38400 + }, + { + "epoch": 1.7, + "learning_rate": 2.181873596883781e-05, + "loss": 0.1382, + "step": 38410 + }, + { + "epoch": 1.7, + "learning_rate": 2.1811302240525713e-05, + "loss": 0.164, + "step": 38420 + }, + { + "epoch": 1.7, + "learning_rate": 2.1803868512213616e-05, + "loss": 0.1565, + "step": 38430 + }, + { + "epoch": 1.7, + "learning_rate": 2.179643478390152e-05, + "loss": 0.1154, + "step": 38440 + }, + { + "epoch": 1.7, + "learning_rate": 2.1789001055589424e-05, + "loss": 0.1681, + "step": 38450 + }, + { + "epoch": 1.7, + "learning_rate": 2.1781567327277323e-05, + "loss": 0.1177, + "step": 38460 + }, + { + "epoch": 1.7, + "learning_rate": 2.1774133598965226e-05, + "loss": 0.1562, + "step": 38470 + }, + { + "epoch": 1.7, + "learning_rate": 2.176669987065313e-05, + "loss": 0.1756, + "step": 38480 + }, + { + "epoch": 1.7, + "learning_rate": 2.175926614234103e-05, + "loss": 0.1507, + "step": 38490 + }, + { + "epoch": 1.7, + "learning_rate": 2.1751832414028934e-05, + "loss": 0.1297, + "step": 38500 + }, + { + "epoch": 1.7, + "learning_rate": 2.1744398685716836e-05, + "loss": 0.1504, + "step": 38510 + }, + { + "epoch": 1.71, + "learning_rate": 2.173696495740474e-05, + "loss": 0.1793, + "step": 38520 + }, + { + "epoch": 1.71, + "learning_rate": 2.1729531229092638e-05, + "loss": 0.1168, + "step": 38530 + }, + { + "epoch": 1.71, + "learning_rate": 2.172209750078054e-05, + "loss": 0.1638, + "step": 38540 + }, + { + "epoch": 1.71, + "learning_rate": 2.1714663772468443e-05, + "loss": 0.1336, + "step": 38550 + }, + { + "epoch": 1.71, + "learning_rate": 2.1707230044156346e-05, + "loss": 0.1605, + "step": 38560 + }, + { + "epoch": 1.71, + "learning_rate": 2.1699796315844252e-05, + "loss": 0.1709, + "step": 38570 + }, + { + "epoch": 1.71, + "learning_rate": 2.1692362587532154e-05, + "loss": 0.1156, + "step": 38580 + }, + { + "epoch": 1.71, + "learning_rate": 2.1684928859220054e-05, + "loss": 0.1392, + "step": 38590 + }, + { + "epoch": 1.71, + "learning_rate": 2.1677495130907956e-05, + "loss": 0.1381, + "step": 38600 + }, + { + "epoch": 1.71, + "learning_rate": 2.167006140259586e-05, + "loss": 0.1972, + "step": 38610 + }, + { + "epoch": 1.71, + "learning_rate": 2.166262767428376e-05, + "loss": 0.1139, + "step": 38620 + }, + { + "epoch": 1.71, + "learning_rate": 2.1655193945971664e-05, + "loss": 0.1406, + "step": 38630 + }, + { + "epoch": 1.71, + "learning_rate": 2.1647760217659566e-05, + "loss": 0.0876, + "step": 38640 + }, + { + "epoch": 1.71, + "learning_rate": 2.164032648934747e-05, + "loss": 0.1361, + "step": 38650 + }, + { + "epoch": 1.71, + "learning_rate": 2.1632892761035368e-05, + "loss": 0.1362, + "step": 38660 + }, + { + "epoch": 1.71, + "learning_rate": 2.162545903272327e-05, + "loss": 0.0901, + "step": 38670 + }, + { + "epoch": 1.71, + "learning_rate": 2.1618025304411173e-05, + "loss": 0.2154, + "step": 38680 + }, + { + "epoch": 1.71, + "learning_rate": 2.161059157609908e-05, + "loss": 0.1128, + "step": 38690 + }, + { + "epoch": 1.71, + "learning_rate": 2.1603157847786982e-05, + "loss": 0.1681, + "step": 38700 + }, + { + "epoch": 1.71, + "learning_rate": 2.1595724119474884e-05, + "loss": 0.1483, + "step": 38710 + }, + { + "epoch": 1.71, + "learning_rate": 2.1588290391162787e-05, + "loss": 0.1518, + "step": 38720 + }, + { + "epoch": 1.71, + "learning_rate": 2.1580856662850686e-05, + "loss": 0.1751, + "step": 38730 + }, + { + "epoch": 1.72, + "learning_rate": 2.157342293453859e-05, + "loss": 0.1421, + "step": 38740 + }, + { + "epoch": 1.72, + "learning_rate": 2.156598920622649e-05, + "loss": 0.2136, + "step": 38750 + }, + { + "epoch": 1.72, + "learning_rate": 2.1558555477914394e-05, + "loss": 0.1073, + "step": 38760 + }, + { + "epoch": 1.72, + "learning_rate": 2.1551121749602297e-05, + "loss": 0.1545, + "step": 38770 + }, + { + "epoch": 1.72, + "learning_rate": 2.15436880212902e-05, + "loss": 0.1072, + "step": 38780 + }, + { + "epoch": 1.72, + "learning_rate": 2.1536254292978102e-05, + "loss": 0.1504, + "step": 38790 + }, + { + "epoch": 1.72, + "learning_rate": 2.1528820564666e-05, + "loss": 0.1615, + "step": 38800 + }, + { + "epoch": 1.72, + "learning_rate": 2.1521386836353907e-05, + "loss": 0.1411, + "step": 38810 + }, + { + "epoch": 1.72, + "learning_rate": 2.151395310804181e-05, + "loss": 0.1535, + "step": 38820 + }, + { + "epoch": 1.72, + "learning_rate": 2.1506519379729712e-05, + "loss": 0.1187, + "step": 38830 + }, + { + "epoch": 1.72, + "learning_rate": 2.1499085651417615e-05, + "loss": 0.117, + "step": 38840 + }, + { + "epoch": 1.72, + "learning_rate": 2.1491651923105517e-05, + "loss": 0.1308, + "step": 38850 + }, + { + "epoch": 1.72, + "learning_rate": 2.1484218194793416e-05, + "loss": 0.1535, + "step": 38860 + }, + { + "epoch": 1.72, + "learning_rate": 2.147678446648132e-05, + "loss": 0.1399, + "step": 38870 + }, + { + "epoch": 1.72, + "learning_rate": 2.146935073816922e-05, + "loss": 0.1549, + "step": 38880 + }, + { + "epoch": 1.72, + "learning_rate": 2.1461917009857124e-05, + "loss": 0.1746, + "step": 38890 + }, + { + "epoch": 1.72, + "learning_rate": 2.1454483281545027e-05, + "loss": 0.1138, + "step": 38900 + }, + { + "epoch": 1.72, + "learning_rate": 2.144704955323293e-05, + "loss": 0.17, + "step": 38910 + }, + { + "epoch": 1.72, + "learning_rate": 2.1439615824920832e-05, + "loss": 0.1213, + "step": 38920 + }, + { + "epoch": 1.72, + "learning_rate": 2.1432182096608734e-05, + "loss": 0.1178, + "step": 38930 + }, + { + "epoch": 1.72, + "learning_rate": 2.1424748368296637e-05, + "loss": 0.1037, + "step": 38940 + }, + { + "epoch": 1.72, + "learning_rate": 2.141731463998454e-05, + "loss": 0.1546, + "step": 38950 + }, + { + "epoch": 1.72, + "learning_rate": 2.1409880911672442e-05, + "loss": 0.158, + "step": 38960 + }, + { + "epoch": 1.73, + "learning_rate": 2.1402447183360345e-05, + "loss": 0.1642, + "step": 38970 + }, + { + "epoch": 1.73, + "learning_rate": 2.1395013455048247e-05, + "loss": 0.2102, + "step": 38980 + }, + { + "epoch": 1.73, + "learning_rate": 2.138757972673615e-05, + "loss": 0.1607, + "step": 38990 + }, + { + "epoch": 1.73, + "learning_rate": 2.138014599842405e-05, + "loss": 0.1083, + "step": 39000 + }, + { + "epoch": 1.73, + "learning_rate": 2.1372712270111952e-05, + "loss": 0.1547, + "step": 39010 + }, + { + "epoch": 1.73, + "learning_rate": 2.1365278541799854e-05, + "loss": 0.094, + "step": 39020 + }, + { + "epoch": 1.73, + "learning_rate": 2.1357844813487757e-05, + "loss": 0.1685, + "step": 39030 + }, + { + "epoch": 1.73, + "learning_rate": 2.135041108517566e-05, + "loss": 0.1166, + "step": 39040 + }, + { + "epoch": 1.73, + "learning_rate": 2.1342977356863562e-05, + "loss": 0.1837, + "step": 39050 + }, + { + "epoch": 1.73, + "learning_rate": 2.1335543628551465e-05, + "loss": 0.156, + "step": 39060 + }, + { + "epoch": 1.73, + "learning_rate": 2.1328109900239367e-05, + "loss": 0.2299, + "step": 39070 + }, + { + "epoch": 1.73, + "learning_rate": 2.132067617192727e-05, + "loss": 0.1535, + "step": 39080 + }, + { + "epoch": 1.73, + "learning_rate": 2.1313242443615172e-05, + "loss": 0.1139, + "step": 39090 + }, + { + "epoch": 1.73, + "learning_rate": 2.1305808715303075e-05, + "loss": 0.1326, + "step": 39100 + }, + { + "epoch": 1.73, + "learning_rate": 2.1298374986990977e-05, + "loss": 0.1354, + "step": 39110 + }, + { + "epoch": 1.73, + "learning_rate": 2.129094125867888e-05, + "loss": 0.1687, + "step": 39120 + }, + { + "epoch": 1.73, + "learning_rate": 2.128350753036678e-05, + "loss": 0.1154, + "step": 39130 + }, + { + "epoch": 1.73, + "learning_rate": 2.1276073802054682e-05, + "loss": 0.1344, + "step": 39140 + }, + { + "epoch": 1.73, + "learning_rate": 2.1268640073742584e-05, + "loss": 0.1325, + "step": 39150 + }, + { + "epoch": 1.73, + "learning_rate": 2.1261206345430487e-05, + "loss": 0.1767, + "step": 39160 + }, + { + "epoch": 1.73, + "learning_rate": 2.125377261711839e-05, + "loss": 0.129, + "step": 39170 + }, + { + "epoch": 1.73, + "learning_rate": 2.1246338888806296e-05, + "loss": 0.2197, + "step": 39180 + }, + { + "epoch": 1.74, + "learning_rate": 2.1238905160494195e-05, + "loss": 0.1092, + "step": 39190 + }, + { + "epoch": 1.74, + "learning_rate": 2.1231471432182097e-05, + "loss": 0.1698, + "step": 39200 + }, + { + "epoch": 1.74, + "learning_rate": 2.122403770387e-05, + "loss": 0.1557, + "step": 39210 + }, + { + "epoch": 1.74, + "learning_rate": 2.1216603975557902e-05, + "loss": 0.1386, + "step": 39220 + }, + { + "epoch": 1.74, + "learning_rate": 2.1209170247245805e-05, + "loss": 0.1581, + "step": 39230 + }, + { + "epoch": 1.74, + "learning_rate": 2.1201736518933708e-05, + "loss": 0.1088, + "step": 39240 + }, + { + "epoch": 1.74, + "learning_rate": 2.119430279062161e-05, + "loss": 0.1209, + "step": 39250 + }, + { + "epoch": 1.74, + "learning_rate": 2.1186869062309513e-05, + "loss": 0.139, + "step": 39260 + }, + { + "epoch": 1.74, + "learning_rate": 2.1179435333997412e-05, + "loss": 0.1391, + "step": 39270 + }, + { + "epoch": 1.74, + "learning_rate": 2.1172001605685315e-05, + "loss": 0.1924, + "step": 39280 + }, + { + "epoch": 1.74, + "learning_rate": 2.1164567877373217e-05, + "loss": 0.1196, + "step": 39290 + }, + { + "epoch": 1.74, + "learning_rate": 2.1157134149061123e-05, + "loss": 0.1237, + "step": 39300 + }, + { + "epoch": 1.74, + "learning_rate": 2.1149700420749026e-05, + "loss": 0.1459, + "step": 39310 + }, + { + "epoch": 1.74, + "learning_rate": 2.1142266692436928e-05, + "loss": 0.0923, + "step": 39320 + }, + { + "epoch": 1.74, + "learning_rate": 2.1134832964124827e-05, + "loss": 0.1047, + "step": 39330 + }, + { + "epoch": 1.74, + "learning_rate": 2.112739923581273e-05, + "loss": 0.1074, + "step": 39340 + }, + { + "epoch": 1.74, + "learning_rate": 2.1119965507500633e-05, + "loss": 0.1017, + "step": 39350 + }, + { + "epoch": 1.74, + "learning_rate": 2.1112531779188535e-05, + "loss": 0.1142, + "step": 39360 + }, + { + "epoch": 1.74, + "learning_rate": 2.1105098050876438e-05, + "loss": 0.134, + "step": 39370 + }, + { + "epoch": 1.74, + "learning_rate": 2.109766432256434e-05, + "loss": 0.1603, + "step": 39380 + }, + { + "epoch": 1.74, + "learning_rate": 2.1090230594252243e-05, + "loss": 0.1852, + "step": 39390 + }, + { + "epoch": 1.74, + "learning_rate": 2.1082796865940142e-05, + "loss": 0.1572, + "step": 39400 + }, + { + "epoch": 1.74, + "learning_rate": 2.1075363137628045e-05, + "loss": 0.1107, + "step": 39410 + }, + { + "epoch": 1.75, + "learning_rate": 2.106792940931595e-05, + "loss": 0.1275, + "step": 39420 + }, + { + "epoch": 1.75, + "learning_rate": 2.1060495681003853e-05, + "loss": 0.1345, + "step": 39430 + }, + { + "epoch": 1.75, + "learning_rate": 2.1053061952691756e-05, + "loss": 0.1514, + "step": 39440 + }, + { + "epoch": 1.75, + "learning_rate": 2.104562822437966e-05, + "loss": 0.1601, + "step": 39450 + }, + { + "epoch": 1.75, + "learning_rate": 2.1038194496067558e-05, + "loss": 0.2222, + "step": 39460 + }, + { + "epoch": 1.75, + "learning_rate": 2.103076076775546e-05, + "loss": 0.1161, + "step": 39470 + }, + { + "epoch": 1.75, + "learning_rate": 2.1023327039443363e-05, + "loss": 0.149, + "step": 39480 + }, + { + "epoch": 1.75, + "learning_rate": 2.1015893311131265e-05, + "loss": 0.1487, + "step": 39490 + }, + { + "epoch": 1.75, + "learning_rate": 2.1008459582819168e-05, + "loss": 0.1446, + "step": 39500 + }, + { + "epoch": 1.75, + "learning_rate": 2.100102585450707e-05, + "loss": 0.137, + "step": 39510 + }, + { + "epoch": 1.75, + "learning_rate": 2.0993592126194973e-05, + "loss": 0.143, + "step": 39520 + }, + { + "epoch": 1.75, + "learning_rate": 2.0986158397882876e-05, + "loss": 0.1234, + "step": 39530 + }, + { + "epoch": 1.75, + "learning_rate": 2.0978724669570775e-05, + "loss": 0.112, + "step": 39540 + }, + { + "epoch": 1.75, + "learning_rate": 2.097129094125868e-05, + "loss": 0.1129, + "step": 39550 + }, + { + "epoch": 1.75, + "learning_rate": 2.0963857212946583e-05, + "loss": 0.1297, + "step": 39560 + }, + { + "epoch": 1.75, + "learning_rate": 2.0956423484634486e-05, + "loss": 0.1621, + "step": 39570 + }, + { + "epoch": 1.75, + "learning_rate": 2.094898975632239e-05, + "loss": 0.149, + "step": 39580 + }, + { + "epoch": 1.75, + "learning_rate": 2.094155602801029e-05, + "loss": 0.1255, + "step": 39590 + }, + { + "epoch": 1.75, + "learning_rate": 2.093412229969819e-05, + "loss": 0.1665, + "step": 39600 + }, + { + "epoch": 1.75, + "learning_rate": 2.0926688571386093e-05, + "loss": 0.1193, + "step": 39610 + }, + { + "epoch": 1.75, + "learning_rate": 2.0919254843073995e-05, + "loss": 0.1356, + "step": 39620 + }, + { + "epoch": 1.75, + "learning_rate": 2.0911821114761898e-05, + "loss": 0.1532, + "step": 39630 + }, + { + "epoch": 1.75, + "learning_rate": 2.09043873864498e-05, + "loss": 0.1583, + "step": 39640 + }, + { + "epoch": 1.76, + "learning_rate": 2.0896953658137703e-05, + "loss": 0.1153, + "step": 39650 + }, + { + "epoch": 1.76, + "learning_rate": 2.0889519929825606e-05, + "loss": 0.151, + "step": 39660 + }, + { + "epoch": 1.76, + "learning_rate": 2.088208620151351e-05, + "loss": 0.2209, + "step": 39670 + }, + { + "epoch": 1.76, + "learning_rate": 2.087465247320141e-05, + "loss": 0.1453, + "step": 39680 + }, + { + "epoch": 1.76, + "learning_rate": 2.0867218744889314e-05, + "loss": 0.1011, + "step": 39690 + }, + { + "epoch": 1.76, + "learning_rate": 2.0859785016577216e-05, + "loss": 0.1632, + "step": 39700 + }, + { + "epoch": 1.76, + "learning_rate": 2.085235128826512e-05, + "loss": 0.1383, + "step": 39710 + }, + { + "epoch": 1.76, + "learning_rate": 2.084491755995302e-05, + "loss": 0.095, + "step": 39720 + }, + { + "epoch": 1.76, + "learning_rate": 2.083748383164092e-05, + "loss": 0.1388, + "step": 39730 + }, + { + "epoch": 1.76, + "learning_rate": 2.0830050103328823e-05, + "loss": 0.1328, + "step": 39740 + }, + { + "epoch": 1.76, + "learning_rate": 2.0822616375016726e-05, + "loss": 0.1623, + "step": 39750 + }, + { + "epoch": 1.76, + "learning_rate": 2.0815182646704628e-05, + "loss": 0.122, + "step": 39760 + }, + { + "epoch": 1.76, + "learning_rate": 2.080774891839253e-05, + "loss": 0.1353, + "step": 39770 + }, + { + "epoch": 1.76, + "learning_rate": 2.0800315190080433e-05, + "loss": 0.1074, + "step": 39780 + }, + { + "epoch": 1.76, + "learning_rate": 2.0792881461768336e-05, + "loss": 0.239, + "step": 39790 + }, + { + "epoch": 1.76, + "learning_rate": 2.078544773345624e-05, + "loss": 0.1291, + "step": 39800 + }, + { + "epoch": 1.76, + "learning_rate": 2.077801400514414e-05, + "loss": 0.1527, + "step": 39810 + }, + { + "epoch": 1.76, + "learning_rate": 2.0770580276832044e-05, + "loss": 0.1559, + "step": 39820 + }, + { + "epoch": 1.76, + "learning_rate": 2.0763146548519946e-05, + "loss": 0.1396, + "step": 39830 + }, + { + "epoch": 1.76, + "learning_rate": 2.075571282020785e-05, + "loss": 0.144, + "step": 39840 + }, + { + "epoch": 1.76, + "learning_rate": 2.074827909189575e-05, + "loss": 0.1975, + "step": 39850 + }, + { + "epoch": 1.76, + "learning_rate": 2.0740845363583654e-05, + "loss": 0.1778, + "step": 39860 + }, + { + "epoch": 1.77, + "learning_rate": 2.0733411635271553e-05, + "loss": 0.1291, + "step": 39870 + }, + { + "epoch": 1.77, + "learning_rate": 2.0725977906959456e-05, + "loss": 0.116, + "step": 39880 + }, + { + "epoch": 1.77, + "learning_rate": 2.071854417864736e-05, + "loss": 0.1599, + "step": 39890 + }, + { + "epoch": 1.77, + "learning_rate": 2.071111045033526e-05, + "loss": 0.1859, + "step": 39900 + }, + { + "epoch": 1.77, + "learning_rate": 2.0703676722023164e-05, + "loss": 0.1165, + "step": 39910 + }, + { + "epoch": 1.77, + "learning_rate": 2.069624299371107e-05, + "loss": 0.1533, + "step": 39920 + }, + { + "epoch": 1.77, + "learning_rate": 2.068880926539897e-05, + "loss": 0.1517, + "step": 39930 + }, + { + "epoch": 1.77, + "learning_rate": 2.068137553708687e-05, + "loss": 0.1573, + "step": 39940 + }, + { + "epoch": 1.77, + "learning_rate": 2.0673941808774774e-05, + "loss": 0.1337, + "step": 39950 + }, + { + "epoch": 1.77, + "learning_rate": 2.0666508080462676e-05, + "loss": 0.227, + "step": 39960 + }, + { + "epoch": 1.77, + "learning_rate": 2.065907435215058e-05, + "loss": 0.1098, + "step": 39970 + }, + { + "epoch": 1.77, + "learning_rate": 2.065164062383848e-05, + "loss": 0.1182, + "step": 39980 + }, + { + "epoch": 1.77, + "learning_rate": 2.0644206895526384e-05, + "loss": 0.1267, + "step": 39990 + }, + { + "epoch": 1.77, + "learning_rate": 2.0636773167214283e-05, + "loss": 0.1447, + "step": 40000 + }, + { + "epoch": 1.77, + "learning_rate": 2.0629339438902186e-05, + "loss": 0.1662, + "step": 40010 + }, + { + "epoch": 1.77, + "learning_rate": 2.062190571059009e-05, + "loss": 0.1421, + "step": 40020 + }, + { + "epoch": 1.77, + "learning_rate": 2.061447198227799e-05, + "loss": 0.1476, + "step": 40030 + }, + { + "epoch": 1.77, + "learning_rate": 2.0607038253965897e-05, + "loss": 0.181, + "step": 40040 + }, + { + "epoch": 1.77, + "learning_rate": 2.05996045256538e-05, + "loss": 0.1519, + "step": 40050 + }, + { + "epoch": 1.77, + "learning_rate": 2.05921707973417e-05, + "loss": 0.1335, + "step": 40060 + }, + { + "epoch": 1.77, + "learning_rate": 2.05847370690296e-05, + "loss": 0.1612, + "step": 40070 + }, + { + "epoch": 1.77, + "learning_rate": 2.0577303340717504e-05, + "loss": 0.1466, + "step": 40080 + }, + { + "epoch": 1.77, + "learning_rate": 2.0569869612405407e-05, + "loss": 0.114, + "step": 40090 + }, + { + "epoch": 1.78, + "learning_rate": 2.056243588409331e-05, + "loss": 0.1447, + "step": 40100 + }, + { + "epoch": 1.78, + "learning_rate": 2.0555002155781212e-05, + "loss": 0.1929, + "step": 40110 + }, + { + "epoch": 1.78, + "learning_rate": 2.0547568427469114e-05, + "loss": 0.0976, + "step": 40120 + }, + { + "epoch": 1.78, + "learning_rate": 2.0540134699157017e-05, + "loss": 0.1583, + "step": 40130 + }, + { + "epoch": 1.78, + "learning_rate": 2.0532700970844916e-05, + "loss": 0.1264, + "step": 40140 + }, + { + "epoch": 1.78, + "learning_rate": 2.052526724253282e-05, + "loss": 0.1422, + "step": 40150 + }, + { + "epoch": 1.78, + "learning_rate": 2.0517833514220725e-05, + "loss": 0.1475, + "step": 40160 + }, + { + "epoch": 1.78, + "learning_rate": 2.0510399785908627e-05, + "loss": 0.2099, + "step": 40170 + }, + { + "epoch": 1.78, + "learning_rate": 2.050296605759653e-05, + "loss": 0.1323, + "step": 40180 + }, + { + "epoch": 1.78, + "learning_rate": 2.0495532329284432e-05, + "loss": 0.1083, + "step": 40190 + }, + { + "epoch": 1.78, + "learning_rate": 2.048809860097233e-05, + "loss": 0.1086, + "step": 40200 + }, + { + "epoch": 1.78, + "learning_rate": 2.0480664872660234e-05, + "loss": 0.1756, + "step": 40210 + }, + { + "epoch": 1.78, + "learning_rate": 2.0473231144348137e-05, + "loss": 0.1584, + "step": 40220 + }, + { + "epoch": 1.78, + "learning_rate": 2.046579741603604e-05, + "loss": 0.0976, + "step": 40230 + }, + { + "epoch": 1.78, + "learning_rate": 2.0458363687723942e-05, + "loss": 0.1319, + "step": 40240 + }, + { + "epoch": 1.78, + "learning_rate": 2.0450929959411844e-05, + "loss": 0.1174, + "step": 40250 + }, + { + "epoch": 1.78, + "learning_rate": 2.0443496231099747e-05, + "loss": 0.1839, + "step": 40260 + }, + { + "epoch": 1.78, + "learning_rate": 2.0436062502787646e-05, + "loss": 0.1578, + "step": 40270 + }, + { + "epoch": 1.78, + "learning_rate": 2.0428628774475552e-05, + "loss": 0.1312, + "step": 40280 + }, + { + "epoch": 1.78, + "learning_rate": 2.0421195046163455e-05, + "loss": 0.1684, + "step": 40290 + }, + { + "epoch": 1.78, + "learning_rate": 2.0413761317851357e-05, + "loss": 0.1094, + "step": 40300 + }, + { + "epoch": 1.78, + "learning_rate": 2.040632758953926e-05, + "loss": 0.1252, + "step": 40310 + }, + { + "epoch": 1.79, + "learning_rate": 2.0398893861227163e-05, + "loss": 0.165, + "step": 40320 + }, + { + "epoch": 1.79, + "learning_rate": 2.0391460132915065e-05, + "loss": 0.0912, + "step": 40330 + }, + { + "epoch": 1.79, + "learning_rate": 2.0384026404602964e-05, + "loss": 0.1105, + "step": 40340 + }, + { + "epoch": 1.79, + "learning_rate": 2.0376592676290867e-05, + "loss": 0.1435, + "step": 40350 + }, + { + "epoch": 1.79, + "learning_rate": 2.036915894797877e-05, + "loss": 0.239, + "step": 40360 + }, + { + "epoch": 1.79, + "learning_rate": 2.0361725219666672e-05, + "loss": 0.1576, + "step": 40370 + }, + { + "epoch": 1.79, + "learning_rate": 2.0354291491354575e-05, + "loss": 0.15, + "step": 40380 + }, + { + "epoch": 1.79, + "learning_rate": 2.0346857763042477e-05, + "loss": 0.1513, + "step": 40390 + }, + { + "epoch": 1.79, + "learning_rate": 2.033942403473038e-05, + "loss": 0.1466, + "step": 40400 + }, + { + "epoch": 1.79, + "learning_rate": 2.0331990306418282e-05, + "loss": 0.0999, + "step": 40410 + }, + { + "epoch": 1.79, + "learning_rate": 2.0324556578106185e-05, + "loss": 0.1326, + "step": 40420 + }, + { + "epoch": 1.79, + "learning_rate": 2.0317122849794087e-05, + "loss": 0.1293, + "step": 40430 + }, + { + "epoch": 1.79, + "learning_rate": 2.030968912148199e-05, + "loss": 0.1298, + "step": 40440 + }, + { + "epoch": 1.79, + "learning_rate": 2.0302255393169893e-05, + "loss": 0.1571, + "step": 40450 + }, + { + "epoch": 1.79, + "learning_rate": 2.0294821664857795e-05, + "loss": 0.1163, + "step": 40460 + }, + { + "epoch": 1.79, + "learning_rate": 2.0287387936545694e-05, + "loss": 0.1597, + "step": 40470 + }, + { + "epoch": 1.79, + "learning_rate": 2.0279954208233597e-05, + "loss": 0.106, + "step": 40480 + }, + { + "epoch": 1.79, + "learning_rate": 2.02725204799215e-05, + "loss": 0.1158, + "step": 40490 + }, + { + "epoch": 1.79, + "learning_rate": 2.0265086751609402e-05, + "loss": 0.1313, + "step": 40500 + }, + { + "epoch": 1.79, + "learning_rate": 2.0257653023297305e-05, + "loss": 0.0764, + "step": 40510 + }, + { + "epoch": 1.79, + "learning_rate": 2.0250219294985207e-05, + "loss": 0.1253, + "step": 40520 + }, + { + "epoch": 1.79, + "learning_rate": 2.024278556667311e-05, + "loss": 0.1313, + "step": 40530 + }, + { + "epoch": 1.79, + "learning_rate": 2.0235351838361012e-05, + "loss": 0.1436, + "step": 40540 + }, + { + "epoch": 1.8, + "learning_rate": 2.0227918110048915e-05, + "loss": 0.1404, + "step": 40550 + }, + { + "epoch": 1.8, + "learning_rate": 2.0220484381736818e-05, + "loss": 0.1868, + "step": 40560 + }, + { + "epoch": 1.8, + "learning_rate": 2.021305065342472e-05, + "loss": 0.1455, + "step": 40570 + }, + { + "epoch": 1.8, + "learning_rate": 2.0205616925112623e-05, + "loss": 0.1677, + "step": 40580 + }, + { + "epoch": 1.8, + "learning_rate": 2.0198183196800525e-05, + "loss": 0.1349, + "step": 40590 + }, + { + "epoch": 1.8, + "learning_rate": 2.0190749468488428e-05, + "loss": 0.1333, + "step": 40600 + }, + { + "epoch": 1.8, + "learning_rate": 2.0183315740176327e-05, + "loss": 0.1011, + "step": 40610 + }, + { + "epoch": 1.8, + "learning_rate": 2.017588201186423e-05, + "loss": 0.1384, + "step": 40620 + }, + { + "epoch": 1.8, + "learning_rate": 2.0168448283552132e-05, + "loss": 0.1413, + "step": 40630 + }, + { + "epoch": 1.8, + "learning_rate": 2.0161014555240035e-05, + "loss": 0.1677, + "step": 40640 + }, + { + "epoch": 1.8, + "learning_rate": 2.015358082692794e-05, + "loss": 0.1555, + "step": 40650 + }, + { + "epoch": 1.8, + "learning_rate": 2.0146147098615843e-05, + "loss": 0.1846, + "step": 40660 + }, + { + "epoch": 1.8, + "learning_rate": 2.0138713370303743e-05, + "loss": 0.1533, + "step": 40670 + }, + { + "epoch": 1.8, + "learning_rate": 2.0131279641991645e-05, + "loss": 0.2295, + "step": 40680 + }, + { + "epoch": 1.8, + "learning_rate": 2.0123845913679548e-05, + "loss": 0.124, + "step": 40690 + }, + { + "epoch": 1.8, + "learning_rate": 2.011641218536745e-05, + "loss": 0.187, + "step": 40700 + }, + { + "epoch": 1.8, + "learning_rate": 2.0108978457055353e-05, + "loss": 0.172, + "step": 40710 + }, + { + "epoch": 1.8, + "learning_rate": 2.0101544728743256e-05, + "loss": 0.1015, + "step": 40720 + }, + { + "epoch": 1.8, + "learning_rate": 2.0094111000431158e-05, + "loss": 0.1583, + "step": 40730 + }, + { + "epoch": 1.8, + "learning_rate": 2.0086677272119057e-05, + "loss": 0.2003, + "step": 40740 + }, + { + "epoch": 1.8, + "learning_rate": 2.007924354380696e-05, + "loss": 0.166, + "step": 40750 + }, + { + "epoch": 1.8, + "learning_rate": 2.0071809815494862e-05, + "loss": 0.1565, + "step": 40760 + }, + { + "epoch": 1.81, + "learning_rate": 2.006437608718277e-05, + "loss": 0.1749, + "step": 40770 + }, + { + "epoch": 1.81, + "learning_rate": 2.005694235887067e-05, + "loss": 0.1489, + "step": 40780 + }, + { + "epoch": 1.81, + "learning_rate": 2.0049508630558574e-05, + "loss": 0.0947, + "step": 40790 + }, + { + "epoch": 1.81, + "learning_rate": 2.0042074902246473e-05, + "loss": 0.1467, + "step": 40800 + }, + { + "epoch": 1.81, + "learning_rate": 2.0034641173934375e-05, + "loss": 0.156, + "step": 40810 + }, + { + "epoch": 1.81, + "learning_rate": 2.0027207445622278e-05, + "loss": 0.1416, + "step": 40820 + }, + { + "epoch": 1.81, + "learning_rate": 2.001977371731018e-05, + "loss": 0.1276, + "step": 40830 + }, + { + "epoch": 1.81, + "learning_rate": 2.0012339988998083e-05, + "loss": 0.1611, + "step": 40840 + }, + { + "epoch": 1.81, + "learning_rate": 2.0004906260685986e-05, + "loss": 0.1434, + "step": 40850 + }, + { + "epoch": 1.81, + "learning_rate": 1.9997472532373888e-05, + "loss": 0.0909, + "step": 40860 + }, + { + "epoch": 1.81, + "learning_rate": 1.999003880406179e-05, + "loss": 0.1452, + "step": 40870 + }, + { + "epoch": 1.81, + "learning_rate": 1.998260507574969e-05, + "loss": 0.0892, + "step": 40880 + }, + { + "epoch": 1.81, + "learning_rate": 1.9975171347437593e-05, + "loss": 0.1653, + "step": 40890 + }, + { + "epoch": 1.81, + "learning_rate": 1.99677376191255e-05, + "loss": 0.0776, + "step": 40900 + }, + { + "epoch": 1.81, + "learning_rate": 1.99603038908134e-05, + "loss": 0.1773, + "step": 40910 + }, + { + "epoch": 1.81, + "learning_rate": 1.9952870162501304e-05, + "loss": 0.1211, + "step": 40920 + }, + { + "epoch": 1.81, + "learning_rate": 1.9945436434189206e-05, + "loss": 0.1579, + "step": 40930 + }, + { + "epoch": 1.81, + "learning_rate": 1.9938002705877105e-05, + "loss": 0.0973, + "step": 40940 + }, + { + "epoch": 1.81, + "learning_rate": 1.9930568977565008e-05, + "loss": 0.1184, + "step": 40950 + }, + { + "epoch": 1.81, + "learning_rate": 1.992313524925291e-05, + "loss": 0.1914, + "step": 40960 + }, + { + "epoch": 1.81, + "learning_rate": 1.9915701520940813e-05, + "loss": 0.1134, + "step": 40970 + }, + { + "epoch": 1.81, + "learning_rate": 1.9908267792628716e-05, + "loss": 0.0962, + "step": 40980 + }, + { + "epoch": 1.81, + "learning_rate": 1.990083406431662e-05, + "loss": 0.1145, + "step": 40990 + }, + { + "epoch": 1.82, + "learning_rate": 1.989340033600452e-05, + "loss": 0.1235, + "step": 41000 + }, + { + "epoch": 1.82, + "learning_rate": 1.988596660769242e-05, + "loss": 0.1551, + "step": 41010 + }, + { + "epoch": 1.82, + "learning_rate": 1.9878532879380326e-05, + "loss": 0.0836, + "step": 41020 + }, + { + "epoch": 1.82, + "learning_rate": 1.987109915106823e-05, + "loss": 0.101, + "step": 41030 + }, + { + "epoch": 1.82, + "learning_rate": 1.986366542275613e-05, + "loss": 0.0867, + "step": 41040 + }, + { + "epoch": 1.82, + "learning_rate": 1.9856231694444034e-05, + "loss": 0.1777, + "step": 41050 + }, + { + "epoch": 1.82, + "learning_rate": 1.9848797966131936e-05, + "loss": 0.1829, + "step": 41060 + }, + { + "epoch": 1.82, + "learning_rate": 1.9841364237819836e-05, + "loss": 0.1815, + "step": 41070 + }, + { + "epoch": 1.82, + "learning_rate": 1.9833930509507738e-05, + "loss": 0.1267, + "step": 41080 + }, + { + "epoch": 1.82, + "learning_rate": 1.982649678119564e-05, + "loss": 0.134, + "step": 41090 + }, + { + "epoch": 1.82, + "learning_rate": 1.9819063052883543e-05, + "loss": 0.1357, + "step": 41100 + }, + { + "epoch": 1.82, + "learning_rate": 1.9811629324571446e-05, + "loss": 0.1018, + "step": 41110 + }, + { + "epoch": 1.82, + "learning_rate": 1.980419559625935e-05, + "loss": 0.0903, + "step": 41120 + }, + { + "epoch": 1.82, + "learning_rate": 1.979676186794725e-05, + "loss": 0.144, + "step": 41130 + }, + { + "epoch": 1.82, + "learning_rate": 1.9789328139635154e-05, + "loss": 0.1024, + "step": 41140 + }, + { + "epoch": 1.82, + "learning_rate": 1.9781894411323056e-05, + "loss": 0.1144, + "step": 41150 + }, + { + "epoch": 1.82, + "learning_rate": 1.977446068301096e-05, + "loss": 0.1397, + "step": 41160 + }, + { + "epoch": 1.82, + "learning_rate": 1.976702695469886e-05, + "loss": 0.1371, + "step": 41170 + }, + { + "epoch": 1.82, + "learning_rate": 1.9759593226386764e-05, + "loss": 0.1196, + "step": 41180 + }, + { + "epoch": 1.82, + "learning_rate": 1.9752159498074667e-05, + "loss": 0.1595, + "step": 41190 + }, + { + "epoch": 1.82, + "learning_rate": 1.974472576976257e-05, + "loss": 0.1317, + "step": 41200 + }, + { + "epoch": 1.82, + "learning_rate": 1.973729204145047e-05, + "loss": 0.1704, + "step": 41210 + }, + { + "epoch": 1.82, + "learning_rate": 1.972985831313837e-05, + "loss": 0.1577, + "step": 41220 + }, + { + "epoch": 1.83, + "learning_rate": 1.9722424584826274e-05, + "loss": 0.1747, + "step": 41230 + }, + { + "epoch": 1.83, + "learning_rate": 1.9714990856514176e-05, + "loss": 0.1681, + "step": 41240 + }, + { + "epoch": 1.83, + "learning_rate": 1.970755712820208e-05, + "loss": 0.1368, + "step": 41250 + }, + { + "epoch": 1.83, + "learning_rate": 1.9700123399889985e-05, + "loss": 0.118, + "step": 41260 + }, + { + "epoch": 1.83, + "learning_rate": 1.9692689671577884e-05, + "loss": 0.1288, + "step": 41270 + }, + { + "epoch": 1.83, + "learning_rate": 1.9685255943265786e-05, + "loss": 0.1717, + "step": 41280 + }, + { + "epoch": 1.83, + "learning_rate": 1.967782221495369e-05, + "loss": 0.0998, + "step": 41290 + }, + { + "epoch": 1.83, + "learning_rate": 1.967038848664159e-05, + "loss": 0.1304, + "step": 41300 + }, + { + "epoch": 1.83, + "learning_rate": 1.9662954758329494e-05, + "loss": 0.1401, + "step": 41310 + }, + { + "epoch": 1.83, + "learning_rate": 1.9655521030017397e-05, + "loss": 0.1613, + "step": 41320 + }, + { + "epoch": 1.83, + "learning_rate": 1.96480873017053e-05, + "loss": 0.1547, + "step": 41330 + }, + { + "epoch": 1.83, + "learning_rate": 1.96406535733932e-05, + "loss": 0.1541, + "step": 41340 + }, + { + "epoch": 1.83, + "learning_rate": 1.96332198450811e-05, + "loss": 0.1972, + "step": 41350 + }, + { + "epoch": 1.83, + "learning_rate": 1.9625786116769004e-05, + "loss": 0.1285, + "step": 41360 + }, + { + "epoch": 1.83, + "learning_rate": 1.9618352388456906e-05, + "loss": 0.1394, + "step": 41370 + }, + { + "epoch": 1.83, + "learning_rate": 1.961091866014481e-05, + "loss": 0.0893, + "step": 41380 + }, + { + "epoch": 1.83, + "learning_rate": 1.9603484931832715e-05, + "loss": 0.1566, + "step": 41390 + }, + { + "epoch": 1.83, + "learning_rate": 1.9596051203520614e-05, + "loss": 0.1606, + "step": 41400 + }, + { + "epoch": 1.83, + "learning_rate": 1.9588617475208517e-05, + "loss": 0.1507, + "step": 41410 + }, + { + "epoch": 1.83, + "learning_rate": 1.958118374689642e-05, + "loss": 0.1818, + "step": 41420 + }, + { + "epoch": 1.83, + "learning_rate": 1.9573750018584322e-05, + "loss": 0.2006, + "step": 41430 + }, + { + "epoch": 1.83, + "learning_rate": 1.9566316290272224e-05, + "loss": 0.2031, + "step": 41440 + }, + { + "epoch": 1.84, + "learning_rate": 1.9558882561960127e-05, + "loss": 0.1733, + "step": 41450 + }, + { + "epoch": 1.84, + "learning_rate": 1.955144883364803e-05, + "loss": 0.1178, + "step": 41460 + }, + { + "epoch": 1.84, + "learning_rate": 1.9544015105335932e-05, + "loss": 0.1058, + "step": 41470 + }, + { + "epoch": 1.84, + "learning_rate": 1.953658137702383e-05, + "loss": 0.1415, + "step": 41480 + }, + { + "epoch": 1.84, + "learning_rate": 1.9529147648711734e-05, + "loss": 0.1288, + "step": 41490 + }, + { + "epoch": 1.84, + "learning_rate": 1.9521713920399636e-05, + "loss": 0.1605, + "step": 41500 + }, + { + "epoch": 1.84, + "learning_rate": 1.9514280192087542e-05, + "loss": 0.1236, + "step": 41510 + }, + { + "epoch": 1.84, + "learning_rate": 1.9506846463775445e-05, + "loss": 0.1338, + "step": 41520 + }, + { + "epoch": 1.84, + "learning_rate": 1.9499412735463348e-05, + "loss": 0.192, + "step": 41530 + }, + { + "epoch": 1.84, + "learning_rate": 1.9491979007151247e-05, + "loss": 0.1717, + "step": 41540 + }, + { + "epoch": 1.84, + "learning_rate": 1.948454527883915e-05, + "loss": 0.1398, + "step": 41550 + }, + { + "epoch": 1.84, + "learning_rate": 1.9477111550527052e-05, + "loss": 0.1537, + "step": 41560 + }, + { + "epoch": 1.84, + "learning_rate": 1.9469677822214954e-05, + "loss": 0.1588, + "step": 41570 + }, + { + "epoch": 1.84, + "learning_rate": 1.9462244093902857e-05, + "loss": 0.1418, + "step": 41580 + }, + { + "epoch": 1.84, + "learning_rate": 1.945481036559076e-05, + "loss": 0.1368, + "step": 41590 + }, + { + "epoch": 1.84, + "learning_rate": 1.9447376637278662e-05, + "loss": 0.1272, + "step": 41600 + }, + { + "epoch": 1.84, + "learning_rate": 1.943994290896656e-05, + "loss": 0.1379, + "step": 41610 + }, + { + "epoch": 1.84, + "learning_rate": 1.9432509180654464e-05, + "loss": 0.1702, + "step": 41620 + }, + { + "epoch": 1.84, + "learning_rate": 1.942507545234237e-05, + "loss": 0.1286, + "step": 41630 + }, + { + "epoch": 1.84, + "learning_rate": 1.9417641724030273e-05, + "loss": 0.1929, + "step": 41640 + }, + { + "epoch": 1.84, + "learning_rate": 1.9410207995718175e-05, + "loss": 0.1345, + "step": 41650 + }, + { + "epoch": 1.84, + "learning_rate": 1.9402774267406078e-05, + "loss": 0.1369, + "step": 41660 + }, + { + "epoch": 1.84, + "learning_rate": 1.9395340539093977e-05, + "loss": 0.1619, + "step": 41670 + }, + { + "epoch": 1.85, + "learning_rate": 1.938790681078188e-05, + "loss": 0.156, + "step": 41680 + }, + { + "epoch": 1.85, + "learning_rate": 1.9380473082469782e-05, + "loss": 0.2092, + "step": 41690 + }, + { + "epoch": 1.85, + "learning_rate": 1.9373039354157685e-05, + "loss": 0.142, + "step": 41700 + }, + { + "epoch": 1.85, + "learning_rate": 1.9365605625845587e-05, + "loss": 0.1488, + "step": 41710 + }, + { + "epoch": 1.85, + "learning_rate": 1.935817189753349e-05, + "loss": 0.1593, + "step": 41720 + }, + { + "epoch": 1.85, + "learning_rate": 1.9350738169221392e-05, + "loss": 0.1026, + "step": 41730 + }, + { + "epoch": 1.85, + "learning_rate": 1.9343304440909295e-05, + "loss": 0.1095, + "step": 41740 + }, + { + "epoch": 1.85, + "learning_rate": 1.9335870712597197e-05, + "loss": 0.1281, + "step": 41750 + }, + { + "epoch": 1.85, + "learning_rate": 1.93284369842851e-05, + "loss": 0.1098, + "step": 41760 + }, + { + "epoch": 1.85, + "learning_rate": 1.9321003255973003e-05, + "loss": 0.1241, + "step": 41770 + }, + { + "epoch": 1.85, + "learning_rate": 1.9313569527660905e-05, + "loss": 0.2285, + "step": 41780 + }, + { + "epoch": 1.85, + "learning_rate": 1.9306135799348808e-05, + "loss": 0.1388, + "step": 41790 + }, + { + "epoch": 1.85, + "learning_rate": 1.929870207103671e-05, + "loss": 0.1776, + "step": 41800 + }, + { + "epoch": 1.85, + "learning_rate": 1.929126834272461e-05, + "loss": 0.1721, + "step": 41810 + }, + { + "epoch": 1.85, + "learning_rate": 1.9283834614412512e-05, + "loss": 0.163, + "step": 41820 + }, + { + "epoch": 1.85, + "learning_rate": 1.9276400886100415e-05, + "loss": 0.1452, + "step": 41830 + }, + { + "epoch": 1.85, + "learning_rate": 1.9268967157788317e-05, + "loss": 0.1496, + "step": 41840 + }, + { + "epoch": 1.85, + "learning_rate": 1.926153342947622e-05, + "loss": 0.1483, + "step": 41850 + }, + { + "epoch": 1.85, + "learning_rate": 1.9254099701164122e-05, + "loss": 0.1449, + "step": 41860 + }, + { + "epoch": 1.85, + "learning_rate": 1.9246665972852025e-05, + "loss": 0.207, + "step": 41870 + }, + { + "epoch": 1.85, + "learning_rate": 1.9239232244539928e-05, + "loss": 0.1199, + "step": 41880 + }, + { + "epoch": 1.85, + "learning_rate": 1.923179851622783e-05, + "loss": 0.0725, + "step": 41890 + }, + { + "epoch": 1.86, + "learning_rate": 1.9224364787915733e-05, + "loss": 0.1718, + "step": 41900 + }, + { + "epoch": 1.86, + "learning_rate": 1.9216931059603635e-05, + "loss": 0.1339, + "step": 41910 + }, + { + "epoch": 1.86, + "learning_rate": 1.9209497331291538e-05, + "loss": 0.0996, + "step": 41920 + }, + { + "epoch": 1.86, + "learning_rate": 1.920206360297944e-05, + "loss": 0.1852, + "step": 41930 + }, + { + "epoch": 1.86, + "learning_rate": 1.9194629874667343e-05, + "loss": 0.1054, + "step": 41940 + }, + { + "epoch": 1.86, + "learning_rate": 1.9187196146355242e-05, + "loss": 0.1407, + "step": 41950 + }, + { + "epoch": 1.86, + "learning_rate": 1.9179762418043145e-05, + "loss": 0.1131, + "step": 41960 + }, + { + "epoch": 1.86, + "learning_rate": 1.9172328689731047e-05, + "loss": 0.1326, + "step": 41970 + }, + { + "epoch": 1.86, + "learning_rate": 1.916489496141895e-05, + "loss": 0.1436, + "step": 41980 + }, + { + "epoch": 1.86, + "learning_rate": 1.9157461233106853e-05, + "loss": 0.1371, + "step": 41990 + }, + { + "epoch": 1.86, + "learning_rate": 1.915002750479476e-05, + "loss": 0.1359, + "step": 42000 + }, + { + "epoch": 1.86, + "learning_rate": 1.9142593776482658e-05, + "loss": 0.1091, + "step": 42010 + }, + { + "epoch": 1.86, + "learning_rate": 1.913516004817056e-05, + "loss": 0.1061, + "step": 42020 + }, + { + "epoch": 1.86, + "learning_rate": 1.9127726319858463e-05, + "loss": 0.204, + "step": 42030 + }, + { + "epoch": 1.86, + "learning_rate": 1.9120292591546366e-05, + "loss": 0.1694, + "step": 42040 + }, + { + "epoch": 1.86, + "learning_rate": 1.9112858863234268e-05, + "loss": 0.1731, + "step": 42050 + }, + { + "epoch": 1.86, + "learning_rate": 1.910542513492217e-05, + "loss": 0.1877, + "step": 42060 + }, + { + "epoch": 1.86, + "learning_rate": 1.9097991406610073e-05, + "loss": 0.1519, + "step": 42070 + }, + { + "epoch": 1.86, + "learning_rate": 1.9090557678297972e-05, + "loss": 0.2077, + "step": 42080 + }, + { + "epoch": 1.86, + "learning_rate": 1.9083123949985875e-05, + "loss": 0.1386, + "step": 42090 + }, + { + "epoch": 1.86, + "learning_rate": 1.9075690221673778e-05, + "loss": 0.1433, + "step": 42100 + }, + { + "epoch": 1.86, + "learning_rate": 1.906825649336168e-05, + "loss": 0.1448, + "step": 42110 + }, + { + "epoch": 1.86, + "learning_rate": 1.9060822765049586e-05, + "loss": 0.175, + "step": 42120 + }, + { + "epoch": 1.87, + "learning_rate": 1.905338903673749e-05, + "loss": 0.1178, + "step": 42130 + }, + { + "epoch": 1.87, + "learning_rate": 1.9045955308425388e-05, + "loss": 0.2044, + "step": 42140 + }, + { + "epoch": 1.87, + "learning_rate": 1.903852158011329e-05, + "loss": 0.1408, + "step": 42150 + }, + { + "epoch": 1.87, + "learning_rate": 1.9031087851801193e-05, + "loss": 0.1148, + "step": 42160 + }, + { + "epoch": 1.87, + "learning_rate": 1.9023654123489096e-05, + "loss": 0.16, + "step": 42170 + }, + { + "epoch": 1.87, + "learning_rate": 1.9016220395176998e-05, + "loss": 0.1084, + "step": 42180 + }, + { + "epoch": 1.87, + "learning_rate": 1.90087866668649e-05, + "loss": 0.163, + "step": 42190 + }, + { + "epoch": 1.87, + "learning_rate": 1.9001352938552803e-05, + "loss": 0.1945, + "step": 42200 + }, + { + "epoch": 1.87, + "learning_rate": 1.8993919210240706e-05, + "loss": 0.1465, + "step": 42210 + }, + { + "epoch": 1.87, + "learning_rate": 1.8986485481928605e-05, + "loss": 0.111, + "step": 42220 + }, + { + "epoch": 1.87, + "learning_rate": 1.8979051753616508e-05, + "loss": 0.0976, + "step": 42230 + }, + { + "epoch": 1.87, + "learning_rate": 1.8971618025304414e-05, + "loss": 0.1112, + "step": 42240 + }, + { + "epoch": 1.87, + "learning_rate": 1.8964184296992316e-05, + "loss": 0.1763, + "step": 42250 + }, + { + "epoch": 1.87, + "learning_rate": 1.895675056868022e-05, + "loss": 0.1569, + "step": 42260 + }, + { + "epoch": 1.87, + "learning_rate": 1.894931684036812e-05, + "loss": 0.1199, + "step": 42270 + }, + { + "epoch": 1.87, + "learning_rate": 1.894188311205602e-05, + "loss": 0.1221, + "step": 42280 + }, + { + "epoch": 1.87, + "learning_rate": 1.8934449383743923e-05, + "loss": 0.1206, + "step": 42290 + }, + { + "epoch": 1.87, + "learning_rate": 1.8927015655431826e-05, + "loss": 0.1334, + "step": 42300 + }, + { + "epoch": 1.87, + "learning_rate": 1.891958192711973e-05, + "loss": 0.1679, + "step": 42310 + }, + { + "epoch": 1.87, + "learning_rate": 1.891214819880763e-05, + "loss": 0.1302, + "step": 42320 + }, + { + "epoch": 1.87, + "learning_rate": 1.8904714470495534e-05, + "loss": 0.1741, + "step": 42330 + }, + { + "epoch": 1.87, + "learning_rate": 1.8897280742183436e-05, + "loss": 0.1175, + "step": 42340 + }, + { + "epoch": 1.87, + "learning_rate": 1.8889847013871335e-05, + "loss": 0.209, + "step": 42350 + }, + { + "epoch": 1.88, + "learning_rate": 1.8882413285559238e-05, + "loss": 0.1415, + "step": 42360 + }, + { + "epoch": 1.88, + "learning_rate": 1.8874979557247144e-05, + "loss": 0.1839, + "step": 42370 + }, + { + "epoch": 1.88, + "learning_rate": 1.8867545828935046e-05, + "loss": 0.1057, + "step": 42380 + }, + { + "epoch": 1.88, + "learning_rate": 1.886011210062295e-05, + "loss": 0.1267, + "step": 42390 + }, + { + "epoch": 1.88, + "learning_rate": 1.885267837231085e-05, + "loss": 0.167, + "step": 42400 + }, + { + "epoch": 1.88, + "learning_rate": 1.884524464399875e-05, + "loss": 0.1613, + "step": 42410 + }, + { + "epoch": 1.88, + "learning_rate": 1.8837810915686653e-05, + "loss": 0.1547, + "step": 42420 + }, + { + "epoch": 1.88, + "learning_rate": 1.8830377187374556e-05, + "loss": 0.1275, + "step": 42430 + }, + { + "epoch": 1.88, + "learning_rate": 1.882294345906246e-05, + "loss": 0.1761, + "step": 42440 + }, + { + "epoch": 1.88, + "learning_rate": 1.881550973075036e-05, + "loss": 0.1467, + "step": 42450 + }, + { + "epoch": 1.88, + "learning_rate": 1.8808076002438264e-05, + "loss": 0.161, + "step": 42460 + }, + { + "epoch": 1.88, + "learning_rate": 1.8800642274126166e-05, + "loss": 0.1191, + "step": 42470 + }, + { + "epoch": 1.88, + "learning_rate": 1.879320854581407e-05, + "loss": 0.1541, + "step": 42480 + }, + { + "epoch": 1.88, + "learning_rate": 1.878577481750197e-05, + "loss": 0.1342, + "step": 42490 + }, + { + "epoch": 1.88, + "learning_rate": 1.8778341089189874e-05, + "loss": 0.1696, + "step": 42500 + } + ], + "max_steps": 67761, + "num_train_epochs": 3, + "total_flos": 1.2913330651864688e+17, + "trial_name": null, + "trial_params": null +}