{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.44236188478930466, "eval_steps": 500, "global_step": 40500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019996132455658605, "loss": 42.0911, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.0001999226491131721, "loss": 38.7852, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.00019988397366975814, "loss": 31.5332, "step": 150 }, { "epoch": 0.0, "learning_rate": 0.00019984529822634418, "loss": 25.7456, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.0001998066227829302, "loss": 20.5779, "step": 250 }, { "epoch": 0.0, "learning_rate": 0.00019976794733951624, "loss": 17.4712, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.00019972927189610228, "loss": 14.2287, "step": 350 }, { "epoch": 0.0, "learning_rate": 0.00019969059645268835, "loss": 12.748, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.0001996519210092744, "loss": 11.9603, "step": 450 }, { "epoch": 0.0, "learning_rate": 0.00019961324556586044, "loss": 12.6114, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.00019957457012244646, "loss": 11.2424, "step": 550 }, { "epoch": 0.0, "learning_rate": 0.0001995358946790325, "loss": 10.4185, "step": 600 }, { "epoch": 0.01, "learning_rate": 0.00019949721923561854, "loss": 10.6495, "step": 650 }, { "epoch": 0.01, "learning_rate": 0.00019945854379220458, "loss": 8.6583, "step": 700 }, { "epoch": 0.01, "learning_rate": 0.00019941986834879063, "loss": 7.9045, "step": 750 }, { "epoch": 0.01, "learning_rate": 0.00019938119290537667, "loss": 7.5867, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.00019934251746196271, "loss": 6.344, "step": 850 }, { "epoch": 0.01, "learning_rate": 0.00019930384201854876, "loss": 7.0004, "step": 900 }, { "epoch": 0.01, "learning_rate": 0.0001992651665751348, "loss": 6.0177, "step": 950 }, { "epoch": 0.01, "learning_rate": 0.00019922649113172082, "loss": 6.4546, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.00019918781568830686, "loss": 4.9639, "step": 1050 }, { "epoch": 0.01, "learning_rate": 0.0001991491402448929, "loss": 4.5082, "step": 1100 }, { "epoch": 0.01, "learning_rate": 0.00019911046480147897, "loss": 4.9607, "step": 1150 }, { "epoch": 0.01, "learning_rate": 0.00019907178935806501, "loss": 4.6557, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.00019903311391465106, "loss": 3.7942, "step": 1250 }, { "epoch": 0.01, "learning_rate": 0.0001989944384712371, "loss": 3.3466, "step": 1300 }, { "epoch": 0.01, "learning_rate": 0.00019895576302782312, "loss": 4.2555, "step": 1350 }, { "epoch": 0.01, "learning_rate": 0.00019891708758440916, "loss": 3.7983, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.0001988784121409952, "loss": 3.6397, "step": 1450 }, { "epoch": 0.01, "learning_rate": 0.00019883973669758125, "loss": 3.2206, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.0001988010612541673, "loss": 2.9513, "step": 1550 }, { "epoch": 0.01, "learning_rate": 0.00019876238581075333, "loss": 3.443, "step": 1600 }, { "epoch": 0.01, "learning_rate": 0.00019872371036733938, "loss": 2.6171, "step": 1650 }, { "epoch": 0.01, "learning_rate": 0.00019868503492392542, "loss": 2.6626, "step": 1700 }, { "epoch": 0.01, "learning_rate": 0.00019864635948051143, "loss": 3.2079, "step": 1750 }, { "epoch": 0.01, "learning_rate": 0.00019860768403709748, "loss": 2.679, "step": 1800 }, { "epoch": 0.01, "learning_rate": 0.00019856900859368352, "loss": 3.2509, "step": 1850 }, { "epoch": 0.01, "learning_rate": 0.0001985303331502696, "loss": 2.3529, "step": 1900 }, { "epoch": 0.02, "learning_rate": 0.00019849165770685563, "loss": 2.3721, "step": 1950 }, { "epoch": 0.02, "learning_rate": 0.00019845298226344168, "loss": 2.7719, "step": 2000 }, { "epoch": 0.02, "learning_rate": 0.00019841430682002772, "loss": 2.3059, "step": 2050 }, { "epoch": 0.02, "learning_rate": 0.00019837563137661374, "loss": 2.9214, "step": 2100 }, { "epoch": 0.02, "learning_rate": 0.00019833695593319978, "loss": 2.4541, "step": 2150 }, { "epoch": 0.02, "learning_rate": 0.00019829828048978582, "loss": 2.3267, "step": 2200 }, { "epoch": 0.02, "learning_rate": 0.00019825960504637186, "loss": 2.1945, "step": 2250 }, { "epoch": 0.02, "learning_rate": 0.0001982209296029579, "loss": 2.3966, "step": 2300 }, { "epoch": 0.02, "learning_rate": 0.00019818225415954395, "loss": 2.5349, "step": 2350 }, { "epoch": 0.02, "learning_rate": 0.00019814357871613, "loss": 2.0588, "step": 2400 }, { "epoch": 0.02, "learning_rate": 0.00019810490327271604, "loss": 3.1209, "step": 2450 }, { "epoch": 0.02, "learning_rate": 0.00019806622782930205, "loss": 2.3281, "step": 2500 }, { "epoch": 0.02, "learning_rate": 0.0001980275523858881, "loss": 2.0749, "step": 2550 }, { "epoch": 0.02, "learning_rate": 0.00019798887694247414, "loss": 2.1665, "step": 2600 }, { "epoch": 0.02, "learning_rate": 0.0001979502014990602, "loss": 2.5256, "step": 2650 }, { "epoch": 0.02, "learning_rate": 0.00019791152605564625, "loss": 2.3435, "step": 2700 }, { "epoch": 0.02, "learning_rate": 0.0001978728506122323, "loss": 2.2333, "step": 2750 }, { "epoch": 0.02, "learning_rate": 0.00019783417516881834, "loss": 1.9695, "step": 2800 }, { "epoch": 0.02, "learning_rate": 0.00019779549972540435, "loss": 2.3046, "step": 2850 }, { "epoch": 0.02, "learning_rate": 0.0001977568242819904, "loss": 2.1951, "step": 2900 }, { "epoch": 0.02, "learning_rate": 0.00019771814883857644, "loss": 2.2141, "step": 2950 }, { "epoch": 0.02, "learning_rate": 0.00019767947339516248, "loss": 2.3285, "step": 3000 }, { "epoch": 0.02, "learning_rate": 0.00019764079795174853, "loss": 1.9263, "step": 3050 }, { "epoch": 0.02, "learning_rate": 0.00019760212250833457, "loss": 2.4391, "step": 3100 }, { "epoch": 0.02, "learning_rate": 0.0001975634470649206, "loss": 2.2386, "step": 3150 }, { "epoch": 0.02, "learning_rate": 0.00019752477162150665, "loss": 1.9979, "step": 3200 }, { "epoch": 0.03, "learning_rate": 0.00019748609617809267, "loss": 2.2926, "step": 3250 }, { "epoch": 0.03, "learning_rate": 0.0001974474207346787, "loss": 2.0263, "step": 3300 }, { "epoch": 0.03, "learning_rate": 0.00019740874529126476, "loss": 2.3533, "step": 3350 }, { "epoch": 0.03, "learning_rate": 0.00019737006984785083, "loss": 2.0248, "step": 3400 }, { "epoch": 0.03, "learning_rate": 0.00019733139440443687, "loss": 1.5322, "step": 3450 }, { "epoch": 0.03, "learning_rate": 0.0001972927189610229, "loss": 1.2563, "step": 3500 }, { "epoch": 0.03, "learning_rate": 0.00019725404351760896, "loss": 1.2361, "step": 3550 }, { "epoch": 0.03, "learning_rate": 0.00019721536807419497, "loss": 1.3821, "step": 3600 }, { "epoch": 0.03, "learning_rate": 0.00019717669263078101, "loss": 1.0988, "step": 3650 }, { "epoch": 0.03, "learning_rate": 0.00019713801718736706, "loss": 1.2244, "step": 3700 }, { "epoch": 0.03, "learning_rate": 0.0001970993417439531, "loss": 0.9095, "step": 3750 }, { "epoch": 0.03, "learning_rate": 0.00019706066630053914, "loss": 1.2458, "step": 3800 }, { "epoch": 0.03, "learning_rate": 0.0001970219908571252, "loss": 1.1168, "step": 3850 }, { "epoch": 0.03, "learning_rate": 0.00019698331541371123, "loss": 0.7974, "step": 3900 }, { "epoch": 0.03, "learning_rate": 0.00019694463997029727, "loss": 1.0594, "step": 3950 }, { "epoch": 0.03, "learning_rate": 0.0001969059645268833, "loss": 1.2522, "step": 4000 }, { "epoch": 0.03, "learning_rate": 0.00019686728908346933, "loss": 0.8916, "step": 4050 }, { "epoch": 0.03, "learning_rate": 0.0001968286136400554, "loss": 0.9284, "step": 4100 }, { "epoch": 0.03, "learning_rate": 0.00019678993819664144, "loss": 0.7177, "step": 4150 }, { "epoch": 0.03, "learning_rate": 0.0001967512627532275, "loss": 1.0662, "step": 4200 }, { "epoch": 0.03, "learning_rate": 0.00019671258730981353, "loss": 1.0509, "step": 4250 }, { "epoch": 0.03, "learning_rate": 0.00019667391186639957, "loss": 1.0486, "step": 4300 }, { "epoch": 0.03, "learning_rate": 0.0001966352364229856, "loss": 0.9541, "step": 4350 }, { "epoch": 0.03, "learning_rate": 0.00019659656097957163, "loss": 1.1056, "step": 4400 }, { "epoch": 0.03, "learning_rate": 0.00019655788553615768, "loss": 1.0613, "step": 4450 }, { "epoch": 0.03, "learning_rate": 0.00019651921009274372, "loss": 0.9647, "step": 4500 }, { "epoch": 0.04, "learning_rate": 0.00019648053464932976, "loss": 0.8281, "step": 4550 }, { "epoch": 0.04, "learning_rate": 0.0001964418592059158, "loss": 0.8205, "step": 4600 }, { "epoch": 0.04, "learning_rate": 0.00019640318376250185, "loss": 0.8107, "step": 4650 }, { "epoch": 0.04, "learning_rate": 0.0001963645083190879, "loss": 0.9087, "step": 4700 }, { "epoch": 0.04, "learning_rate": 0.00019632583287567393, "loss": 0.7476, "step": 4750 }, { "epoch": 0.04, "learning_rate": 0.00019628715743225995, "loss": 0.8191, "step": 4800 }, { "epoch": 0.04, "learning_rate": 0.00019624848198884602, "loss": 1.0138, "step": 4850 }, { "epoch": 0.04, "learning_rate": 0.00019620980654543206, "loss": 1.0121, "step": 4900 }, { "epoch": 0.04, "learning_rate": 0.0001961711311020181, "loss": 0.7376, "step": 4950 }, { "epoch": 0.04, "learning_rate": 0.00019613245565860415, "loss": 0.8335, "step": 5000 }, { "epoch": 0.04, "learning_rate": 0.0001960937802151902, "loss": 0.9411, "step": 5050 }, { "epoch": 0.04, "learning_rate": 0.0001960551047717762, "loss": 0.8631, "step": 5100 }, { "epoch": 0.04, "learning_rate": 0.00019601642932836225, "loss": 0.936, "step": 5150 }, { "epoch": 0.04, "learning_rate": 0.0001959777538849483, "loss": 0.8524, "step": 5200 }, { "epoch": 0.04, "learning_rate": 0.00019593907844153434, "loss": 0.7093, "step": 5250 }, { "epoch": 0.04, "learning_rate": 0.00019590040299812038, "loss": 0.8302, "step": 5300 }, { "epoch": 0.04, "learning_rate": 0.00019586172755470642, "loss": 0.8756, "step": 5350 }, { "epoch": 0.04, "learning_rate": 0.00019582305211129247, "loss": 0.7728, "step": 5400 }, { "epoch": 0.04, "learning_rate": 0.0001957843766678785, "loss": 0.809, "step": 5450 }, { "epoch": 0.04, "learning_rate": 0.00019574570122446455, "loss": 0.7282, "step": 5500 }, { "epoch": 0.04, "learning_rate": 0.00019570702578105057, "loss": 0.8608, "step": 5550 }, { "epoch": 0.04, "learning_rate": 0.00019566835033763664, "loss": 1.0078, "step": 5600 }, { "epoch": 0.04, "learning_rate": 0.00019562967489422268, "loss": 0.9773, "step": 5650 }, { "epoch": 0.04, "learning_rate": 0.00019559099945080872, "loss": 0.7969, "step": 5700 }, { "epoch": 0.04, "learning_rate": 0.00019555232400739477, "loss": 0.9988, "step": 5750 }, { "epoch": 0.04, "learning_rate": 0.0001955136485639808, "loss": 0.7552, "step": 5800 }, { "epoch": 0.05, "learning_rate": 0.00019547497312056685, "loss": 0.8775, "step": 5850 }, { "epoch": 0.05, "learning_rate": 0.00019543629767715287, "loss": 0.8895, "step": 5900 }, { "epoch": 0.05, "learning_rate": 0.0001953976222337389, "loss": 0.8106, "step": 5950 }, { "epoch": 0.05, "learning_rate": 0.00019535894679032496, "loss": 0.6546, "step": 6000 }, { "epoch": 0.05, "learning_rate": 0.000195320271346911, "loss": 0.9039, "step": 6050 }, { "epoch": 0.05, "learning_rate": 0.00019528159590349704, "loss": 0.652, "step": 6100 }, { "epoch": 0.05, "learning_rate": 0.00019524292046008308, "loss": 0.6561, "step": 6150 }, { "epoch": 0.05, "learning_rate": 0.00019520424501666913, "loss": 1.046, "step": 6200 }, { "epoch": 0.05, "learning_rate": 0.00019516556957325517, "loss": 0.8783, "step": 6250 }, { "epoch": 0.05, "learning_rate": 0.00019512689412984119, "loss": 0.7351, "step": 6300 }, { "epoch": 0.05, "learning_rate": 0.00019508821868642726, "loss": 0.733, "step": 6350 }, { "epoch": 0.05, "learning_rate": 0.0001950495432430133, "loss": 0.7675, "step": 6400 }, { "epoch": 0.05, "learning_rate": 0.00019501086779959934, "loss": 0.9451, "step": 6450 }, { "epoch": 0.05, "learning_rate": 0.00019497219235618539, "loss": 0.9686, "step": 6500 }, { "epoch": 0.05, "learning_rate": 0.00019493351691277143, "loss": 0.6083, "step": 6550 }, { "epoch": 0.05, "learning_rate": 0.00019489484146935747, "loss": 0.8619, "step": 6600 }, { "epoch": 0.05, "learning_rate": 0.0001948561660259435, "loss": 0.6557, "step": 6650 }, { "epoch": 0.05, "learning_rate": 0.00019481749058252953, "loss": 0.8819, "step": 6700 }, { "epoch": 0.05, "learning_rate": 0.00019477881513911557, "loss": 0.8356, "step": 6750 }, { "epoch": 0.05, "learning_rate": 0.00019474013969570162, "loss": 0.8211, "step": 6800 }, { "epoch": 0.05, "learning_rate": 0.00019470146425228766, "loss": 0.8393, "step": 6850 }, { "epoch": 0.05, "learning_rate": 0.0001946627888088737, "loss": 1.0301, "step": 6900 }, { "epoch": 0.05, "learning_rate": 0.00019462411336545975, "loss": 0.7435, "step": 6950 }, { "epoch": 0.05, "learning_rate": 0.0001945854379220458, "loss": 0.71, "step": 7000 }, { "epoch": 0.05, "learning_rate": 0.0001945467624786318, "loss": 0.7786, "step": 7050 }, { "epoch": 0.05, "learning_rate": 0.00019450808703521787, "loss": 1.1273, "step": 7100 }, { "epoch": 0.06, "learning_rate": 0.00019446941159180392, "loss": 0.923, "step": 7150 }, { "epoch": 0.06, "learning_rate": 0.00019443073614838996, "loss": 0.8656, "step": 7200 }, { "epoch": 0.06, "learning_rate": 0.000194392060704976, "loss": 0.8191, "step": 7250 }, { "epoch": 0.06, "learning_rate": 0.00019435338526156205, "loss": 0.8924, "step": 7300 }, { "epoch": 0.06, "learning_rate": 0.0001943147098181481, "loss": 0.9004, "step": 7350 }, { "epoch": 0.06, "learning_rate": 0.0001942760343747341, "loss": 0.6538, "step": 7400 }, { "epoch": 0.06, "learning_rate": 0.00019423735893132015, "loss": 0.8669, "step": 7450 }, { "epoch": 0.06, "learning_rate": 0.0001941986834879062, "loss": 0.9103, "step": 7500 }, { "epoch": 0.06, "learning_rate": 0.00019416000804449223, "loss": 0.8853, "step": 7550 }, { "epoch": 0.06, "learning_rate": 0.00019412133260107828, "loss": 0.7989, "step": 7600 }, { "epoch": 0.06, "learning_rate": 0.00019408265715766432, "loss": 0.6957, "step": 7650 }, { "epoch": 0.06, "learning_rate": 0.00019404398171425036, "loss": 0.8685, "step": 7700 }, { "epoch": 0.06, "learning_rate": 0.0001940053062708364, "loss": 0.6701, "step": 7750 }, { "epoch": 0.06, "learning_rate": 0.00019396663082742242, "loss": 0.7488, "step": 7800 }, { "epoch": 0.06, "learning_rate": 0.0001939279553840085, "loss": 0.9214, "step": 7850 }, { "epoch": 0.06, "learning_rate": 0.00019388927994059454, "loss": 0.7879, "step": 7900 }, { "epoch": 0.06, "learning_rate": 0.00019385060449718058, "loss": 0.8522, "step": 7950 }, { "epoch": 0.06, "learning_rate": 0.00019381192905376662, "loss": 0.9119, "step": 8000 }, { "epoch": 0.06, "learning_rate": 0.00019377325361035267, "loss": 0.6229, "step": 8050 }, { "epoch": 0.06, "learning_rate": 0.0001937345781669387, "loss": 0.8, "step": 8100 }, { "epoch": 0.06, "learning_rate": 0.00019369590272352472, "loss": 0.6705, "step": 8150 }, { "epoch": 0.06, "learning_rate": 0.00019365722728011077, "loss": 0.8694, "step": 8200 }, { "epoch": 0.06, "learning_rate": 0.0001936185518366968, "loss": 0.7932, "step": 8250 }, { "epoch": 0.06, "learning_rate": 0.00019357987639328285, "loss": 0.7311, "step": 8300 }, { "epoch": 0.06, "learning_rate": 0.0001935412009498689, "loss": 0.844, "step": 8350 }, { "epoch": 0.06, "learning_rate": 0.00019350252550645494, "loss": 0.8428, "step": 8400 }, { "epoch": 0.07, "learning_rate": 0.00019346385006304098, "loss": 0.8791, "step": 8450 }, { "epoch": 0.07, "learning_rate": 0.00019342517461962703, "loss": 0.9576, "step": 8500 }, { "epoch": 0.07, "learning_rate": 0.00019338649917621304, "loss": 0.821, "step": 8550 }, { "epoch": 0.07, "learning_rate": 0.0001933478237327991, "loss": 1.0343, "step": 8600 }, { "epoch": 0.07, "learning_rate": 0.00019330914828938515, "loss": 0.862, "step": 8650 }, { "epoch": 0.07, "learning_rate": 0.0001932704728459712, "loss": 0.6914, "step": 8700 }, { "epoch": 0.07, "learning_rate": 0.00019323179740255724, "loss": 1.0047, "step": 8750 }, { "epoch": 0.07, "learning_rate": 0.00019319312195914328, "loss": 0.7347, "step": 8800 }, { "epoch": 0.07, "learning_rate": 0.00019315444651572933, "loss": 0.7331, "step": 8850 }, { "epoch": 0.07, "learning_rate": 0.00019311577107231534, "loss": 0.9639, "step": 8900 }, { "epoch": 0.07, "learning_rate": 0.00019307709562890139, "loss": 0.7824, "step": 8950 }, { "epoch": 0.07, "learning_rate": 0.00019303842018548743, "loss": 0.8321, "step": 9000 }, { "epoch": 0.07, "learning_rate": 0.00019299974474207347, "loss": 1.053, "step": 9050 }, { "epoch": 0.07, "learning_rate": 0.00019296106929865951, "loss": 0.677, "step": 9100 }, { "epoch": 0.07, "learning_rate": 0.00019292239385524556, "loss": 0.8771, "step": 9150 }, { "epoch": 0.07, "learning_rate": 0.0001928837184118316, "loss": 0.7547, "step": 9200 }, { "epoch": 0.07, "learning_rate": 0.00019284504296841764, "loss": 0.7911, "step": 9250 }, { "epoch": 0.07, "learning_rate": 0.00019280636752500366, "loss": 0.8772, "step": 9300 }, { "epoch": 0.07, "learning_rate": 0.00019276769208158973, "loss": 1.0254, "step": 9350 }, { "epoch": 0.07, "learning_rate": 0.00019272901663817577, "loss": 0.9881, "step": 9400 }, { "epoch": 0.07, "learning_rate": 0.00019269034119476182, "loss": 0.9809, "step": 9450 }, { "epoch": 0.07, "learning_rate": 0.00019265166575134786, "loss": 0.6407, "step": 9500 }, { "epoch": 0.07, "learning_rate": 0.0001926129903079339, "loss": 0.8552, "step": 9550 }, { "epoch": 0.07, "learning_rate": 0.00019257431486451994, "loss": 0.5715, "step": 9600 }, { "epoch": 0.07, "learning_rate": 0.00019253563942110596, "loss": 0.7908, "step": 9650 }, { "epoch": 0.08, "learning_rate": 0.000192496963977692, "loss": 0.8544, "step": 9700 }, { "epoch": 0.08, "learning_rate": 0.00019245828853427805, "loss": 0.7795, "step": 9750 }, { "epoch": 0.08, "learning_rate": 0.0001924196130908641, "loss": 0.7534, "step": 9800 }, { "epoch": 0.08, "learning_rate": 0.00019238093764745013, "loss": 0.9141, "step": 9850 }, { "epoch": 0.08, "learning_rate": 0.00019234226220403618, "loss": 0.6377, "step": 9900 }, { "epoch": 0.08, "learning_rate": 0.00019230358676062222, "loss": 0.8392, "step": 9950 }, { "epoch": 0.08, "learning_rate": 0.00019226491131720826, "loss": 0.8541, "step": 10000 }, { "epoch": 0.08, "learning_rate": 0.0001922262358737943, "loss": 0.7969, "step": 10050 }, { "epoch": 0.08, "learning_rate": 0.00019218756043038035, "loss": 0.6434, "step": 10100 }, { "epoch": 0.08, "learning_rate": 0.0001921488849869664, "loss": 0.9645, "step": 10150 }, { "epoch": 0.08, "learning_rate": 0.00019211020954355243, "loss": 0.8545, "step": 10200 }, { "epoch": 0.08, "learning_rate": 0.00019207153410013848, "loss": 0.669, "step": 10250 }, { "epoch": 0.08, "learning_rate": 0.00019203285865672452, "loss": 0.7878, "step": 10300 }, { "epoch": 0.08, "learning_rate": 0.00019199418321331056, "loss": 0.6872, "step": 10350 }, { "epoch": 0.08, "learning_rate": 0.00019195550776989658, "loss": 0.7578, "step": 10400 }, { "epoch": 0.08, "learning_rate": 0.00019191683232648262, "loss": 0.6626, "step": 10450 }, { "epoch": 0.08, "learning_rate": 0.00019187815688306866, "loss": 0.7433, "step": 10500 }, { "epoch": 0.08, "learning_rate": 0.0001918394814396547, "loss": 0.8421, "step": 10550 }, { "epoch": 0.08, "learning_rate": 0.00019180080599624075, "loss": 0.8302, "step": 10600 }, { "epoch": 0.08, "learning_rate": 0.0001917621305528268, "loss": 0.7689, "step": 10650 }, { "epoch": 0.08, "learning_rate": 0.00019172345510941284, "loss": 0.7695, "step": 10700 }, { "epoch": 0.08, "learning_rate": 0.00019168477966599888, "loss": 0.8601, "step": 10750 }, { "epoch": 0.08, "learning_rate": 0.00019164610422258492, "loss": 1.0576, "step": 10800 }, { "epoch": 0.08, "learning_rate": 0.00019160742877917097, "loss": 0.6168, "step": 10850 }, { "epoch": 0.08, "learning_rate": 0.000191568753335757, "loss": 0.8053, "step": 10900 }, { "epoch": 0.08, "learning_rate": 0.00019153007789234305, "loss": 0.6034, "step": 10950 }, { "epoch": 0.09, "learning_rate": 0.0001914914024489291, "loss": 0.8146, "step": 11000 }, { "epoch": 0.09, "learning_rate": 0.00019145272700551514, "loss": 0.813, "step": 11050 }, { "epoch": 0.09, "learning_rate": 0.00019141405156210118, "loss": 0.7254, "step": 11100 }, { "epoch": 0.09, "learning_rate": 0.00019137537611868722, "loss": 0.8516, "step": 11150 }, { "epoch": 0.09, "learning_rate": 0.00019133670067527324, "loss": 0.8619, "step": 11200 }, { "epoch": 0.09, "learning_rate": 0.00019129802523185928, "loss": 0.8323, "step": 11250 }, { "epoch": 0.09, "learning_rate": 0.00019125934978844533, "loss": 0.802, "step": 11300 }, { "epoch": 0.09, "learning_rate": 0.00019122067434503137, "loss": 0.9431, "step": 11350 }, { "epoch": 0.09, "learning_rate": 0.0001911819989016174, "loss": 0.833, "step": 11400 }, { "epoch": 0.09, "learning_rate": 0.00019114332345820345, "loss": 0.8785, "step": 11450 }, { "epoch": 0.09, "learning_rate": 0.0001911046480147895, "loss": 0.894, "step": 11500 }, { "epoch": 0.09, "learning_rate": 0.00019106597257137554, "loss": 0.9139, "step": 11550 }, { "epoch": 0.09, "learning_rate": 0.00019102729712796158, "loss": 0.7368, "step": 11600 }, { "epoch": 0.09, "learning_rate": 0.00019098862168454763, "loss": 0.8103, "step": 11650 }, { "epoch": 0.09, "learning_rate": 0.00019094994624113367, "loss": 0.8547, "step": 11700 }, { "epoch": 0.09, "learning_rate": 0.0001909112707977197, "loss": 0.7595, "step": 11750 }, { "epoch": 0.09, "learning_rate": 0.00019087259535430576, "loss": 0.7432, "step": 11800 }, { "epoch": 0.09, "learning_rate": 0.0001908339199108918, "loss": 0.8002, "step": 11850 }, { "epoch": 0.09, "learning_rate": 0.00019079524446747784, "loss": 0.7238, "step": 11900 }, { "epoch": 0.09, "learning_rate": 0.00019075656902406386, "loss": 0.7368, "step": 11950 }, { "epoch": 0.09, "learning_rate": 0.0001907178935806499, "loss": 0.7619, "step": 12000 }, { "epoch": 0.09, "learning_rate": 0.00019067921813723594, "loss": 0.8555, "step": 12050 }, { "epoch": 0.09, "learning_rate": 0.000190640542693822, "loss": 0.7968, "step": 12100 }, { "epoch": 0.09, "learning_rate": 0.00019060186725040803, "loss": 0.6167, "step": 12150 }, { "epoch": 0.09, "learning_rate": 0.00019056319180699407, "loss": 0.7268, "step": 12200 }, { "epoch": 0.09, "learning_rate": 0.00019052451636358012, "loss": 0.7624, "step": 12250 }, { "epoch": 0.1, "learning_rate": 0.00019048584092016616, "loss": 0.7498, "step": 12300 }, { "epoch": 0.1, "learning_rate": 0.0001904471654767522, "loss": 0.7913, "step": 12350 }, { "epoch": 0.1, "learning_rate": 0.00019040849003333825, "loss": 0.9818, "step": 12400 }, { "epoch": 0.1, "learning_rate": 0.0001903698145899243, "loss": 0.9653, "step": 12450 }, { "epoch": 0.1, "learning_rate": 0.00019033113914651033, "loss": 0.8576, "step": 12500 }, { "epoch": 0.1, "learning_rate": 0.00019029246370309637, "loss": 0.7793, "step": 12550 }, { "epoch": 0.1, "learning_rate": 0.00019025378825968242, "loss": 0.9986, "step": 12600 }, { "epoch": 0.1, "learning_rate": 0.00019021511281626846, "loss": 0.6204, "step": 12650 }, { "epoch": 0.1, "learning_rate": 0.00019017643737285448, "loss": 0.7432, "step": 12700 }, { "epoch": 0.1, "learning_rate": 0.00019013776192944052, "loss": 0.7728, "step": 12750 }, { "epoch": 0.1, "learning_rate": 0.00019009908648602656, "loss": 0.924, "step": 12800 }, { "epoch": 0.1, "learning_rate": 0.0001900604110426126, "loss": 0.8306, "step": 12850 }, { "epoch": 0.1, "learning_rate": 0.00019002173559919865, "loss": 0.9331, "step": 12900 }, { "epoch": 0.1, "learning_rate": 0.0001899830601557847, "loss": 0.9156, "step": 12950 }, { "epoch": 0.1, "learning_rate": 0.00018994438471237073, "loss": 0.7475, "step": 13000 }, { "epoch": 0.1, "learning_rate": 0.00018990570926895678, "loss": 0.8014, "step": 13050 }, { "epoch": 0.1, "learning_rate": 0.00018986703382554282, "loss": 0.7636, "step": 13100 }, { "epoch": 0.1, "learning_rate": 0.00018982835838212886, "loss": 0.8878, "step": 13150 }, { "epoch": 0.1, "learning_rate": 0.0001897896829387149, "loss": 0.7146, "step": 13200 }, { "epoch": 0.1, "learning_rate": 0.00018975100749530095, "loss": 0.7577, "step": 13250 }, { "epoch": 0.1, "learning_rate": 0.000189712332051887, "loss": 0.9388, "step": 13300 }, { "epoch": 0.1, "learning_rate": 0.00018967365660847304, "loss": 0.7735, "step": 13350 }, { "epoch": 0.1, "learning_rate": 0.00018963498116505908, "loss": 0.6801, "step": 13400 }, { "epoch": 0.1, "learning_rate": 0.0001895963057216451, "loss": 0.7908, "step": 13450 }, { "epoch": 0.1, "learning_rate": 0.00018955763027823114, "loss": 0.7054, "step": 13500 }, { "epoch": 0.1, "learning_rate": 0.00018951895483481718, "loss": 0.8082, "step": 13550 }, { "epoch": 0.11, "learning_rate": 0.00018948027939140322, "loss": 0.7959, "step": 13600 }, { "epoch": 0.11, "learning_rate": 0.00018944160394798927, "loss": 0.8319, "step": 13650 }, { "epoch": 0.11, "learning_rate": 0.0001894029285045753, "loss": 0.7559, "step": 13700 }, { "epoch": 0.11, "learning_rate": 0.00018936425306116135, "loss": 0.6439, "step": 13750 }, { "epoch": 0.11, "learning_rate": 0.0001893255776177474, "loss": 0.7906, "step": 13800 }, { "epoch": 0.11, "learning_rate": 0.00018928690217433344, "loss": 0.9517, "step": 13850 }, { "epoch": 0.11, "learning_rate": 0.00018924822673091948, "loss": 0.8082, "step": 13900 }, { "epoch": 0.11, "learning_rate": 0.00018920955128750552, "loss": 0.8872, "step": 13950 }, { "epoch": 0.11, "learning_rate": 0.00018917087584409157, "loss": 0.6533, "step": 14000 }, { "epoch": 0.11, "learning_rate": 0.0001891322004006776, "loss": 0.8846, "step": 14050 }, { "epoch": 0.11, "learning_rate": 0.00018909352495726365, "loss": 0.7644, "step": 14100 }, { "epoch": 0.11, "learning_rate": 0.0001890548495138497, "loss": 0.9197, "step": 14150 }, { "epoch": 0.11, "learning_rate": 0.0001890161740704357, "loss": 0.8356, "step": 14200 }, { "epoch": 0.11, "learning_rate": 0.00018897749862702176, "loss": 0.7626, "step": 14250 }, { "epoch": 0.11, "learning_rate": 0.0001889388231836078, "loss": 0.7978, "step": 14300 }, { "epoch": 0.11, "learning_rate": 0.00018890014774019384, "loss": 0.9382, "step": 14350 }, { "epoch": 0.11, "learning_rate": 0.00018886147229677988, "loss": 0.8213, "step": 14400 }, { "epoch": 0.11, "learning_rate": 0.00018882279685336593, "loss": 0.8098, "step": 14450 }, { "epoch": 0.11, "learning_rate": 0.00018878412140995197, "loss": 0.6624, "step": 14500 }, { "epoch": 0.11, "learning_rate": 0.00018874544596653801, "loss": 0.6901, "step": 14550 }, { "epoch": 0.11, "learning_rate": 0.00018870677052312406, "loss": 0.8449, "step": 14600 }, { "epoch": 0.11, "learning_rate": 0.0001886680950797101, "loss": 0.992, "step": 14650 }, { "epoch": 0.11, "learning_rate": 0.00018862941963629614, "loss": 0.8163, "step": 14700 }, { "epoch": 0.11, "learning_rate": 0.00018859074419288219, "loss": 0.9079, "step": 14750 }, { "epoch": 0.11, "learning_rate": 0.00018855206874946823, "loss": 0.6967, "step": 14800 }, { "epoch": 0.11, "learning_rate": 0.00018851339330605427, "loss": 0.7634, "step": 14850 }, { "epoch": 0.12, "learning_rate": 0.00018847471786264032, "loss": 0.881, "step": 14900 }, { "epoch": 0.12, "learning_rate": 0.00018843604241922633, "loss": 0.9108, "step": 14950 }, { "epoch": 0.12, "learning_rate": 0.00018839736697581237, "loss": 0.7132, "step": 15000 }, { "epoch": 0.12, "learning_rate": 0.00018835869153239842, "loss": 0.5067, "step": 15050 }, { "epoch": 0.12, "learning_rate": 0.00018832001608898446, "loss": 1.1357, "step": 15100 }, { "epoch": 0.12, "learning_rate": 0.0001882813406455705, "loss": 0.7256, "step": 15150 }, { "epoch": 0.12, "learning_rate": 0.00018824266520215655, "loss": 0.6846, "step": 15200 }, { "epoch": 0.12, "learning_rate": 0.0001882039897587426, "loss": 0.8358, "step": 15250 }, { "epoch": 0.12, "learning_rate": 0.00018816531431532863, "loss": 0.7776, "step": 15300 }, { "epoch": 0.12, "learning_rate": 0.00018812663887191468, "loss": 0.5573, "step": 15350 }, { "epoch": 0.12, "learning_rate": 0.00018808796342850072, "loss": 0.6548, "step": 15400 }, { "epoch": 0.12, "learning_rate": 0.00018804928798508676, "loss": 0.7813, "step": 15450 }, { "epoch": 0.12, "learning_rate": 0.0001880106125416728, "loss": 0.853, "step": 15500 }, { "epoch": 0.12, "learning_rate": 0.00018797193709825885, "loss": 0.757, "step": 15550 }, { "epoch": 0.12, "learning_rate": 0.0001879332616548449, "loss": 0.7511, "step": 15600 }, { "epoch": 0.12, "learning_rate": 0.00018789458621143093, "loss": 0.8809, "step": 15650 }, { "epoch": 0.12, "learning_rate": 0.00018785591076801698, "loss": 0.6439, "step": 15700 }, { "epoch": 0.12, "learning_rate": 0.000187817235324603, "loss": 0.6401, "step": 15750 }, { "epoch": 0.12, "learning_rate": 0.00018777855988118904, "loss": 0.9463, "step": 15800 }, { "epoch": 0.12, "learning_rate": 0.00018773988443777508, "loss": 0.7206, "step": 15850 }, { "epoch": 0.12, "learning_rate": 0.00018770120899436112, "loss": 0.738, "step": 15900 }, { "epoch": 0.12, "learning_rate": 0.00018766253355094716, "loss": 0.8078, "step": 15950 }, { "epoch": 0.12, "learning_rate": 0.0001876238581075332, "loss": 0.8814, "step": 16000 }, { "epoch": 0.12, "learning_rate": 0.00018758518266411925, "loss": 0.7841, "step": 16050 }, { "epoch": 0.12, "learning_rate": 0.0001875465072207053, "loss": 0.9534, "step": 16100 }, { "epoch": 0.12, "learning_rate": 0.00018750783177729134, "loss": 0.7588, "step": 16150 }, { "epoch": 0.13, "learning_rate": 0.00018746915633387738, "loss": 0.7467, "step": 16200 }, { "epoch": 0.13, "learning_rate": 0.00018743048089046342, "loss": 0.7402, "step": 16250 }, { "epoch": 0.13, "learning_rate": 0.00018739180544704947, "loss": 0.7391, "step": 16300 }, { "epoch": 0.13, "learning_rate": 0.0001873531300036355, "loss": 0.93, "step": 16350 }, { "epoch": 0.13, "learning_rate": 0.00018731445456022155, "loss": 0.673, "step": 16400 }, { "epoch": 0.13, "learning_rate": 0.0001872757791168076, "loss": 0.8719, "step": 16450 }, { "epoch": 0.13, "learning_rate": 0.0001872371036733936, "loss": 0.7977, "step": 16500 }, { "epoch": 0.13, "learning_rate": 0.00018719842822997965, "loss": 0.8446, "step": 16550 }, { "epoch": 0.13, "learning_rate": 0.0001871597527865657, "loss": 0.5509, "step": 16600 }, { "epoch": 0.13, "learning_rate": 0.00018712107734315174, "loss": 0.7187, "step": 16650 }, { "epoch": 0.13, "learning_rate": 0.00018708240189973778, "loss": 0.7886, "step": 16700 }, { "epoch": 0.13, "learning_rate": 0.00018704372645632383, "loss": 0.787, "step": 16750 }, { "epoch": 0.13, "learning_rate": 0.00018700505101290987, "loss": 0.8182, "step": 16800 }, { "epoch": 0.13, "learning_rate": 0.0001869663755694959, "loss": 0.7996, "step": 16850 }, { "epoch": 0.13, "learning_rate": 0.00018692770012608195, "loss": 1.0537, "step": 16900 }, { "epoch": 0.13, "learning_rate": 0.000186889024682668, "loss": 0.7795, "step": 16950 }, { "epoch": 0.13, "learning_rate": 0.00018685034923925404, "loss": 0.6382, "step": 17000 }, { "epoch": 0.13, "learning_rate": 0.00018681167379584008, "loss": 0.8503, "step": 17050 }, { "epoch": 0.13, "learning_rate": 0.00018677299835242613, "loss": 0.7172, "step": 17100 }, { "epoch": 0.13, "learning_rate": 0.00018673432290901217, "loss": 0.8269, "step": 17150 }, { "epoch": 0.13, "learning_rate": 0.0001866956474655982, "loss": 0.8608, "step": 17200 }, { "epoch": 0.13, "learning_rate": 0.00018665697202218423, "loss": 0.5488, "step": 17250 }, { "epoch": 0.13, "learning_rate": 0.00018661829657877027, "loss": 0.6198, "step": 17300 }, { "epoch": 0.13, "learning_rate": 0.00018657962113535631, "loss": 0.6294, "step": 17350 }, { "epoch": 0.13, "learning_rate": 0.00018654094569194236, "loss": 0.6832, "step": 17400 }, { "epoch": 0.13, "learning_rate": 0.0001865022702485284, "loss": 0.7857, "step": 17450 }, { "epoch": 0.14, "learning_rate": 0.00018646359480511444, "loss": 0.873, "step": 17500 }, { "epoch": 0.14, "learning_rate": 0.00018642491936170051, "loss": 0.8379, "step": 17550 }, { "epoch": 0.14, "learning_rate": 0.00018638624391828653, "loss": 0.716, "step": 17600 }, { "epoch": 0.14, "learning_rate": 0.00018634756847487257, "loss": 0.7123, "step": 17650 }, { "epoch": 0.14, "learning_rate": 0.00018630889303145862, "loss": 0.7582, "step": 17700 }, { "epoch": 0.14, "learning_rate": 0.00018627021758804466, "loss": 0.8126, "step": 17750 }, { "epoch": 0.14, "learning_rate": 0.0001862315421446307, "loss": 0.8564, "step": 17800 }, { "epoch": 0.14, "learning_rate": 0.00018619286670121674, "loss": 0.5869, "step": 17850 }, { "epoch": 0.14, "learning_rate": 0.0001861541912578028, "loss": 0.7508, "step": 17900 }, { "epoch": 0.14, "learning_rate": 0.00018611551581438883, "loss": 0.7061, "step": 17950 }, { "epoch": 0.14, "learning_rate": 0.00018607684037097485, "loss": 0.7345, "step": 18000 }, { "epoch": 0.14, "learning_rate": 0.0001860381649275609, "loss": 0.5775, "step": 18050 }, { "epoch": 0.14, "learning_rate": 0.00018599948948414693, "loss": 0.7817, "step": 18100 }, { "epoch": 0.14, "learning_rate": 0.00018596081404073298, "loss": 0.7201, "step": 18150 }, { "epoch": 0.14, "learning_rate": 0.00018592213859731902, "loss": 0.8352, "step": 18200 }, { "epoch": 0.14, "learning_rate": 0.00018588346315390506, "loss": 0.7986, "step": 18250 }, { "epoch": 0.14, "learning_rate": 0.00018584478771049113, "loss": 0.5892, "step": 18300 }, { "epoch": 0.14, "learning_rate": 0.00018580611226707715, "loss": 0.6573, "step": 18350 }, { "epoch": 0.14, "learning_rate": 0.0001857674368236632, "loss": 0.7291, "step": 18400 }, { "epoch": 0.14, "learning_rate": 0.00018572876138024923, "loss": 0.8477, "step": 18450 }, { "epoch": 0.14, "learning_rate": 0.00018569008593683528, "loss": 0.7634, "step": 18500 }, { "epoch": 0.14, "learning_rate": 0.00018565141049342132, "loss": 0.5596, "step": 18550 }, { "epoch": 0.14, "learning_rate": 0.00018561273505000736, "loss": 0.7536, "step": 18600 }, { "epoch": 0.14, "learning_rate": 0.0001855740596065934, "loss": 0.8015, "step": 18650 }, { "epoch": 0.14, "learning_rate": 0.00018553538416317945, "loss": 0.9044, "step": 18700 }, { "epoch": 0.15, "learning_rate": 0.00018549670871976547, "loss": 0.7212, "step": 18750 }, { "epoch": 0.15, "learning_rate": 0.0001854580332763515, "loss": 0.6835, "step": 18800 }, { "epoch": 0.15, "learning_rate": 0.00018541935783293755, "loss": 0.6431, "step": 18850 }, { "epoch": 0.15, "learning_rate": 0.0001853806823895236, "loss": 0.6776, "step": 18900 }, { "epoch": 0.15, "learning_rate": 0.00018534200694610964, "loss": 0.8134, "step": 18950 }, { "epoch": 0.15, "learning_rate": 0.00018530333150269568, "loss": 0.7613, "step": 19000 }, { "epoch": 0.15, "learning_rate": 0.00018526465605928175, "loss": 0.6909, "step": 19050 }, { "epoch": 0.15, "learning_rate": 0.00018522598061586777, "loss": 0.5647, "step": 19100 }, { "epoch": 0.15, "learning_rate": 0.0001851873051724538, "loss": 0.845, "step": 19150 }, { "epoch": 0.15, "learning_rate": 0.00018514862972903985, "loss": 0.6676, "step": 19200 }, { "epoch": 0.15, "learning_rate": 0.0001851099542856259, "loss": 0.608, "step": 19250 }, { "epoch": 0.15, "learning_rate": 0.00018507127884221194, "loss": 0.6545, "step": 19300 }, { "epoch": 0.15, "learning_rate": 0.00018503260339879798, "loss": 0.8084, "step": 19350 }, { "epoch": 0.15, "learning_rate": 0.00018499392795538402, "loss": 0.9323, "step": 19400 }, { "epoch": 0.15, "learning_rate": 0.00018495525251197007, "loss": 0.7761, "step": 19450 }, { "epoch": 0.15, "learning_rate": 0.00018491657706855608, "loss": 0.7525, "step": 19500 }, { "epoch": 0.15, "learning_rate": 0.00018487790162514213, "loss": 0.7387, "step": 19550 }, { "epoch": 0.15, "learning_rate": 0.00018483922618172817, "loss": 0.7412, "step": 19600 }, { "epoch": 0.15, "learning_rate": 0.0001848005507383142, "loss": 0.6455, "step": 19650 }, { "epoch": 0.15, "learning_rate": 0.00018476187529490026, "loss": 0.6401, "step": 19700 }, { "epoch": 0.15, "learning_rate": 0.00018472319985148633, "loss": 0.7524, "step": 19750 }, { "epoch": 0.15, "learning_rate": 0.00018468452440807237, "loss": 0.8381, "step": 19800 }, { "epoch": 0.15, "learning_rate": 0.00018464584896465838, "loss": 0.7317, "step": 19850 }, { "epoch": 0.15, "learning_rate": 0.00018460717352124443, "loss": 0.7321, "step": 19900 }, { "epoch": 0.15, "learning_rate": 0.00018456849807783047, "loss": 0.8627, "step": 19950 }, { "epoch": 0.15, "learning_rate": 0.0001845298226344165, "loss": 0.8806, "step": 20000 }, { "epoch": 0.16, "learning_rate": 0.00018449114719100256, "loss": 0.6949, "step": 20050 }, { "epoch": 0.16, "learning_rate": 0.0001844524717475886, "loss": 0.8466, "step": 20100 }, { "epoch": 0.16, "learning_rate": 0.00018441379630417464, "loss": 0.468, "step": 20150 }, { "epoch": 0.16, "learning_rate": 0.00018437512086076069, "loss": 0.8107, "step": 20200 }, { "epoch": 0.16, "learning_rate": 0.0001843364454173467, "loss": 0.8214, "step": 20250 }, { "epoch": 0.16, "learning_rate": 0.00018429776997393274, "loss": 0.6371, "step": 20300 }, { "epoch": 0.16, "learning_rate": 0.0001842590945305188, "loss": 0.8139, "step": 20350 }, { "epoch": 0.16, "learning_rate": 0.00018422041908710483, "loss": 0.8821, "step": 20400 }, { "epoch": 0.16, "learning_rate": 0.00018418174364369087, "loss": 0.749, "step": 20450 }, { "epoch": 0.16, "learning_rate": 0.00018414306820027694, "loss": 0.7666, "step": 20500 }, { "epoch": 0.16, "learning_rate": 0.000184104392756863, "loss": 0.804, "step": 20550 }, { "epoch": 0.16, "learning_rate": 0.000184065717313449, "loss": 0.8258, "step": 20600 }, { "epoch": 0.16, "learning_rate": 0.00018402704187003505, "loss": 0.6565, "step": 20650 }, { "epoch": 0.16, "learning_rate": 0.0001839883664266211, "loss": 0.6481, "step": 20700 }, { "epoch": 0.16, "learning_rate": 0.00018394969098320713, "loss": 0.5938, "step": 20750 }, { "epoch": 0.16, "learning_rate": 0.00018391101553979317, "loss": 0.6611, "step": 20800 }, { "epoch": 0.16, "learning_rate": 0.00018387234009637922, "loss": 0.9062, "step": 20850 }, { "epoch": 0.16, "learning_rate": 0.00018383366465296526, "loss": 0.6141, "step": 20900 }, { "epoch": 0.16, "learning_rate": 0.0001837949892095513, "loss": 0.6457, "step": 20950 }, { "epoch": 0.16, "learning_rate": 0.00018375631376613735, "loss": 0.5349, "step": 21000 }, { "epoch": 0.16, "learning_rate": 0.00018371763832272336, "loss": 0.6687, "step": 21050 }, { "epoch": 0.16, "learning_rate": 0.0001836789628793094, "loss": 1.0448, "step": 21100 }, { "epoch": 0.16, "learning_rate": 0.00018364028743589545, "loss": 0.8059, "step": 21150 }, { "epoch": 0.16, "learning_rate": 0.0001836016119924815, "loss": 0.6748, "step": 21200 }, { "epoch": 0.16, "learning_rate": 0.00018356293654906756, "loss": 0.5979, "step": 21250 }, { "epoch": 0.16, "learning_rate": 0.0001835242611056536, "loss": 0.8469, "step": 21300 }, { "epoch": 0.17, "learning_rate": 0.00018348558566223962, "loss": 0.7463, "step": 21350 }, { "epoch": 0.17, "learning_rate": 0.00018344691021882566, "loss": 0.7493, "step": 21400 }, { "epoch": 0.17, "learning_rate": 0.0001834082347754117, "loss": 0.8654, "step": 21450 }, { "epoch": 0.17, "learning_rate": 0.00018336955933199775, "loss": 0.7216, "step": 21500 }, { "epoch": 0.17, "learning_rate": 0.0001833308838885838, "loss": 0.7847, "step": 21550 }, { "epoch": 0.17, "learning_rate": 0.00018329220844516984, "loss": 0.5339, "step": 21600 }, { "epoch": 0.17, "learning_rate": 0.00018325353300175588, "loss": 0.7045, "step": 21650 }, { "epoch": 0.17, "learning_rate": 0.00018321485755834192, "loss": 0.6995, "step": 21700 }, { "epoch": 0.17, "learning_rate": 0.00018317618211492797, "loss": 0.736, "step": 21750 }, { "epoch": 0.17, "learning_rate": 0.00018313750667151398, "loss": 0.7212, "step": 21800 }, { "epoch": 0.17, "learning_rate": 0.00018309883122810002, "loss": 0.6062, "step": 21850 }, { "epoch": 0.17, "learning_rate": 0.00018306015578468607, "loss": 0.889, "step": 21900 }, { "epoch": 0.17, "learning_rate": 0.0001830214803412721, "loss": 0.6812, "step": 21950 }, { "epoch": 0.17, "learning_rate": 0.00018298280489785818, "loss": 0.7713, "step": 22000 }, { "epoch": 0.17, "learning_rate": 0.00018294412945444422, "loss": 0.7462, "step": 22050 }, { "epoch": 0.17, "learning_rate": 0.00018290545401103027, "loss": 0.5084, "step": 22100 }, { "epoch": 0.17, "learning_rate": 0.00018286677856761628, "loss": 0.6875, "step": 22150 }, { "epoch": 0.17, "learning_rate": 0.00018282810312420233, "loss": 0.8552, "step": 22200 }, { "epoch": 0.17, "learning_rate": 0.00018278942768078837, "loss": 0.7549, "step": 22250 }, { "epoch": 0.17, "learning_rate": 0.0001827507522373744, "loss": 0.6307, "step": 22300 }, { "epoch": 0.17, "learning_rate": 0.00018271207679396045, "loss": 1.0293, "step": 22350 }, { "epoch": 0.17, "learning_rate": 0.0001826734013505465, "loss": 0.7603, "step": 22400 }, { "epoch": 0.17, "learning_rate": 0.00018263472590713254, "loss": 0.5218, "step": 22450 }, { "epoch": 0.17, "learning_rate": 0.00018259605046371858, "loss": 0.5962, "step": 22500 }, { "epoch": 0.17, "learning_rate": 0.0001825573750203046, "loss": 0.7793, "step": 22550 }, { "epoch": 0.17, "learning_rate": 0.00018251869957689064, "loss": 0.6511, "step": 22600 }, { "epoch": 0.18, "learning_rate": 0.00018248002413347669, "loss": 0.6589, "step": 22650 }, { "epoch": 0.18, "learning_rate": 0.00018244134869006273, "loss": 0.826, "step": 22700 }, { "epoch": 0.18, "learning_rate": 0.0001824026732466488, "loss": 0.7561, "step": 22750 }, { "epoch": 0.18, "learning_rate": 0.00018236399780323484, "loss": 0.7605, "step": 22800 }, { "epoch": 0.18, "learning_rate": 0.00018232532235982088, "loss": 0.7887, "step": 22850 }, { "epoch": 0.18, "learning_rate": 0.0001822866469164069, "loss": 0.6065, "step": 22900 }, { "epoch": 0.18, "learning_rate": 0.00018224797147299294, "loss": 0.7631, "step": 22950 }, { "epoch": 0.18, "learning_rate": 0.00018220929602957899, "loss": 0.6708, "step": 23000 }, { "epoch": 0.18, "learning_rate": 0.00018217062058616503, "loss": 0.8115, "step": 23050 }, { "epoch": 0.18, "learning_rate": 0.00018213194514275107, "loss": 0.5469, "step": 23100 }, { "epoch": 0.18, "learning_rate": 0.00018209326969933712, "loss": 0.751, "step": 23150 }, { "epoch": 0.18, "learning_rate": 0.00018205459425592316, "loss": 0.6424, "step": 23200 }, { "epoch": 0.18, "learning_rate": 0.0001820159188125092, "loss": 0.7017, "step": 23250 }, { "epoch": 0.18, "learning_rate": 0.00018197724336909522, "loss": 0.615, "step": 23300 }, { "epoch": 0.18, "learning_rate": 0.00018193856792568126, "loss": 0.9175, "step": 23350 }, { "epoch": 0.18, "learning_rate": 0.0001818998924822673, "loss": 0.9438, "step": 23400 }, { "epoch": 0.18, "learning_rate": 0.00018186121703885335, "loss": 0.8183, "step": 23450 }, { "epoch": 0.18, "learning_rate": 0.00018182254159543942, "loss": 0.9829, "step": 23500 }, { "epoch": 0.18, "learning_rate": 0.00018178386615202546, "loss": 0.7703, "step": 23550 }, { "epoch": 0.18, "learning_rate": 0.0001817451907086115, "loss": 0.6007, "step": 23600 }, { "epoch": 0.18, "learning_rate": 0.00018170651526519752, "loss": 0.6095, "step": 23650 }, { "epoch": 0.18, "learning_rate": 0.00018166783982178356, "loss": 0.6811, "step": 23700 }, { "epoch": 0.18, "learning_rate": 0.0001816291643783696, "loss": 0.6791, "step": 23750 }, { "epoch": 0.18, "learning_rate": 0.00018159048893495565, "loss": 0.8032, "step": 23800 }, { "epoch": 0.18, "learning_rate": 0.0001815518134915417, "loss": 0.6968, "step": 23850 }, { "epoch": 0.18, "learning_rate": 0.00018151313804812773, "loss": 0.7912, "step": 23900 }, { "epoch": 0.19, "learning_rate": 0.00018147446260471378, "loss": 0.6557, "step": 23950 }, { "epoch": 0.19, "learning_rate": 0.00018143578716129982, "loss": 0.7041, "step": 24000 }, { "epoch": 0.19, "learning_rate": 0.00018139711171788584, "loss": 0.7028, "step": 24050 }, { "epoch": 0.19, "learning_rate": 0.00018135843627447188, "loss": 0.5454, "step": 24100 }, { "epoch": 0.19, "learning_rate": 0.00018131976083105792, "loss": 0.8485, "step": 24150 }, { "epoch": 0.19, "learning_rate": 0.00018128108538764396, "loss": 0.6944, "step": 24200 }, { "epoch": 0.19, "learning_rate": 0.00018124240994423003, "loss": 0.6429, "step": 24250 }, { "epoch": 0.19, "learning_rate": 0.00018120373450081608, "loss": 0.9911, "step": 24300 }, { "epoch": 0.19, "learning_rate": 0.00018116505905740212, "loss": 0.7624, "step": 24350 }, { "epoch": 0.19, "learning_rate": 0.00018112638361398814, "loss": 0.8377, "step": 24400 }, { "epoch": 0.19, "learning_rate": 0.00018108770817057418, "loss": 0.7377, "step": 24450 }, { "epoch": 0.19, "learning_rate": 0.00018104903272716022, "loss": 0.8191, "step": 24500 }, { "epoch": 0.19, "learning_rate": 0.00018101035728374627, "loss": 0.6292, "step": 24550 }, { "epoch": 0.19, "learning_rate": 0.0001809716818403323, "loss": 0.7387, "step": 24600 }, { "epoch": 0.19, "learning_rate": 0.00018093300639691835, "loss": 0.7166, "step": 24650 }, { "epoch": 0.19, "learning_rate": 0.0001808943309535044, "loss": 0.787, "step": 24700 }, { "epoch": 0.19, "learning_rate": 0.00018085565551009044, "loss": 0.6451, "step": 24750 }, { "epoch": 0.19, "learning_rate": 0.00018081698006667645, "loss": 0.8142, "step": 24800 }, { "epoch": 0.19, "learning_rate": 0.0001807783046232625, "loss": 0.9096, "step": 24850 }, { "epoch": 0.19, "learning_rate": 0.00018073962917984854, "loss": 0.6828, "step": 24900 }, { "epoch": 0.19, "learning_rate": 0.00018070095373643458, "loss": 0.5445, "step": 24950 }, { "epoch": 0.19, "learning_rate": 0.00018066227829302065, "loss": 0.8181, "step": 25000 }, { "epoch": 0.2, "learning_rate": 0.00018047301300240095, "loss": 1.124, "step": 25050 }, { "epoch": 0.2, "learning_rate": 0.00018043403698044964, "loss": 1.1854, "step": 25100 }, { "epoch": 0.2, "learning_rate": 0.00018039506095849834, "loss": 1.0665, "step": 25150 }, { "epoch": 0.2, "learning_rate": 0.00018035608493654706, "loss": 0.8882, "step": 25200 }, { "epoch": 0.2, "learning_rate": 0.00018031710891459575, "loss": 1.0253, "step": 25250 }, { "epoch": 0.2, "learning_rate": 0.00018027813289264445, "loss": 1.0734, "step": 25300 }, { "epoch": 0.2, "learning_rate": 0.00018023915687069315, "loss": 1.0606, "step": 25350 }, { "epoch": 0.2, "learning_rate": 0.00018020018084874187, "loss": 0.9589, "step": 25400 }, { "epoch": 0.2, "learning_rate": 0.00018016120482679056, "loss": 1.0562, "step": 25450 }, { "epoch": 0.2, "learning_rate": 0.00018012222880483926, "loss": 0.8676, "step": 25500 }, { "epoch": 0.2, "learning_rate": 0.00018008325278288798, "loss": 1.0997, "step": 25550 }, { "epoch": 0.2, "learning_rate": 0.00018004427676093668, "loss": 0.9763, "step": 25600 }, { "epoch": 0.2, "learning_rate": 0.00018000530073898537, "loss": 0.8347, "step": 25650 }, { "epoch": 0.2, "learning_rate": 0.0001799663247170341, "loss": 0.9396, "step": 25700 }, { "epoch": 0.2, "learning_rate": 0.0001799273486950828, "loss": 0.9281, "step": 25750 }, { "epoch": 0.2, "learning_rate": 0.0001798883726731315, "loss": 0.9826, "step": 25800 }, { "epoch": 0.2, "learning_rate": 0.0001798493966511802, "loss": 0.8583, "step": 25850 }, { "epoch": 0.2, "learning_rate": 0.0001798104206292289, "loss": 0.8509, "step": 25900 }, { "epoch": 0.2, "learning_rate": 0.00017977144460727763, "loss": 0.8912, "step": 25950 }, { "epoch": 0.2, "learning_rate": 0.0001797324685853263, "loss": 0.8786, "step": 26000 }, { "epoch": 0.2, "learning_rate": 0.00017969349256337502, "loss": 0.8482, "step": 26050 }, { "epoch": 0.2, "learning_rate": 0.00017965451654142374, "loss": 0.9426, "step": 26100 }, { "epoch": 0.2, "learning_rate": 0.00017961554051947244, "loss": 0.9505, "step": 26150 }, { "epoch": 0.2, "learning_rate": 0.00017957656449752113, "loss": 0.8555, "step": 26200 }, { "epoch": 0.2, "learning_rate": 0.00017953758847556983, "loss": 1.1169, "step": 26250 }, { "epoch": 0.21, "learning_rate": 0.00017949861245361855, "loss": 0.8806, "step": 26300 }, { "epoch": 0.21, "learning_rate": 0.00017945963643166725, "loss": 0.9295, "step": 26350 }, { "epoch": 0.21, "learning_rate": 0.00017942066040971594, "loss": 0.8931, "step": 26400 }, { "epoch": 0.21, "learning_rate": 0.00017938168438776466, "loss": 0.9139, "step": 26450 }, { "epoch": 0.21, "learning_rate": 0.00017934270836581336, "loss": 0.9318, "step": 26500 }, { "epoch": 0.21, "learning_rate": 0.00017930373234386205, "loss": 1.0256, "step": 26550 }, { "epoch": 0.21, "learning_rate": 0.00017926475632191078, "loss": 0.9042, "step": 26600 }, { "epoch": 0.21, "learning_rate": 0.00017922578029995947, "loss": 0.8945, "step": 26650 }, { "epoch": 0.21, "learning_rate": 0.00017918680427800817, "loss": 0.8622, "step": 26700 }, { "epoch": 0.21, "learning_rate": 0.0001791478282560569, "loss": 0.8348, "step": 26750 }, { "epoch": 0.21, "learning_rate": 0.00017910885223410559, "loss": 1.0544, "step": 26800 }, { "epoch": 0.21, "learning_rate": 0.0001790698762121543, "loss": 0.7097, "step": 26850 }, { "epoch": 0.21, "learning_rate": 0.00017903090019020298, "loss": 0.8808, "step": 26900 }, { "epoch": 0.21, "learning_rate": 0.0001789919241682517, "loss": 0.896, "step": 26950 }, { "epoch": 0.21, "learning_rate": 0.00017895294814630042, "loss": 1.0487, "step": 27000 }, { "epoch": 0.21, "learning_rate": 0.00017891397212434912, "loss": 0.9996, "step": 27050 }, { "epoch": 0.21, "learning_rate": 0.0001788749961023978, "loss": 0.9624, "step": 27100 }, { "epoch": 0.21, "learning_rate": 0.0001788360200804465, "loss": 0.9344, "step": 27150 }, { "epoch": 0.21, "learning_rate": 0.00017879704405849523, "loss": 0.9103, "step": 27200 }, { "epoch": 0.21, "learning_rate": 0.00017875806803654393, "loss": 0.7311, "step": 27250 }, { "epoch": 0.21, "learning_rate": 0.00017871909201459262, "loss": 0.9748, "step": 27300 }, { "epoch": 0.21, "learning_rate": 0.00017868011599264134, "loss": 0.7231, "step": 27350 }, { "epoch": 0.21, "learning_rate": 0.00017864113997069004, "loss": 0.9844, "step": 27400 }, { "epoch": 0.21, "learning_rate": 0.00017860216394873874, "loss": 0.9322, "step": 27450 }, { "epoch": 0.21, "learning_rate": 0.00017856318792678746, "loss": 0.9103, "step": 27500 }, { "epoch": 0.21, "learning_rate": 0.00017852421190483615, "loss": 1.0132, "step": 27550 }, { "epoch": 0.22, "learning_rate": 0.00017848523588288485, "loss": 0.8617, "step": 27600 }, { "epoch": 0.22, "learning_rate": 0.00017844625986093354, "loss": 1.0296, "step": 27650 }, { "epoch": 0.22, "learning_rate": 0.00017840728383898227, "loss": 1.0048, "step": 27700 }, { "epoch": 0.22, "learning_rate": 0.000178368307817031, "loss": 1.1557, "step": 27750 }, { "epoch": 0.22, "learning_rate": 0.00017832933179507966, "loss": 0.7993, "step": 27800 }, { "epoch": 0.22, "learning_rate": 0.00017829035577312838, "loss": 1.002, "step": 27850 }, { "epoch": 0.22, "learning_rate": 0.0001782513797511771, "loss": 1.0392, "step": 27900 }, { "epoch": 0.22, "learning_rate": 0.0001782124037292258, "loss": 0.8991, "step": 27950 }, { "epoch": 0.22, "learning_rate": 0.0001781734277072745, "loss": 0.8488, "step": 28000 }, { "epoch": 0.22, "learning_rate": 0.0001781344516853232, "loss": 0.8418, "step": 28050 }, { "epoch": 0.22, "learning_rate": 0.0001780954756633719, "loss": 1.02, "step": 28100 }, { "epoch": 0.22, "learning_rate": 0.0001780564996414206, "loss": 1.0404, "step": 28150 }, { "epoch": 0.22, "learning_rate": 0.0001780175236194693, "loss": 0.9571, "step": 28200 }, { "epoch": 0.22, "learning_rate": 0.00017797854759751803, "loss": 0.7724, "step": 28250 }, { "epoch": 0.22, "learning_rate": 0.00017793957157556672, "loss": 1.0129, "step": 28300 }, { "epoch": 0.22, "learning_rate": 0.00017790059555361542, "loss": 0.8916, "step": 28350 }, { "epoch": 0.22, "learning_rate": 0.00017786161953166414, "loss": 0.9504, "step": 28400 }, { "epoch": 0.22, "learning_rate": 0.00017782264350971283, "loss": 0.8393, "step": 28450 }, { "epoch": 0.22, "learning_rate": 0.00017778366748776153, "loss": 0.7675, "step": 28500 }, { "epoch": 0.22, "learning_rate": 0.00017774469146581023, "loss": 0.8273, "step": 28550 }, { "epoch": 0.22, "learning_rate": 0.00017770571544385895, "loss": 0.8967, "step": 28600 }, { "epoch": 0.22, "learning_rate": 0.00017766673942190764, "loss": 1.084, "step": 28650 }, { "epoch": 0.22, "learning_rate": 0.00017762776339995634, "loss": 0.7741, "step": 28700 }, { "epoch": 0.22, "learning_rate": 0.00017758878737800506, "loss": 1.1056, "step": 28750 }, { "epoch": 0.22, "learning_rate": 0.00017754981135605378, "loss": 1.183, "step": 28800 }, { "epoch": 0.22, "learning_rate": 0.00017751083533410245, "loss": 1.0375, "step": 28850 }, { "epoch": 0.23, "learning_rate": 0.00017747185931215118, "loss": 1.2414, "step": 28900 }, { "epoch": 0.23, "learning_rate": 0.00017743288329019987, "loss": 1.2209, "step": 28950 }, { "epoch": 0.23, "learning_rate": 0.0001773939072682486, "loss": 0.8581, "step": 29000 }, { "epoch": 0.23, "learning_rate": 0.0001773549312462973, "loss": 1.0284, "step": 29050 }, { "epoch": 0.23, "learning_rate": 0.00017731595522434598, "loss": 0.8502, "step": 29100 }, { "epoch": 0.23, "learning_rate": 0.0001772769792023947, "loss": 0.6947, "step": 29150 }, { "epoch": 0.23, "learning_rate": 0.0001772380031804434, "loss": 0.7579, "step": 29200 }, { "epoch": 0.23, "learning_rate": 0.0001771990271584921, "loss": 0.9771, "step": 29250 }, { "epoch": 0.23, "learning_rate": 0.00017716005113654082, "loss": 0.8661, "step": 29300 }, { "epoch": 0.23, "learning_rate": 0.00017712107511458952, "loss": 0.8433, "step": 29350 }, { "epoch": 0.23, "learning_rate": 0.0001770820990926382, "loss": 0.9419, "step": 29400 }, { "epoch": 0.23, "learning_rate": 0.0001770431230706869, "loss": 1.076, "step": 29450 }, { "epoch": 0.23, "learning_rate": 0.00017700414704873563, "loss": 0.9966, "step": 29500 }, { "epoch": 0.23, "learning_rate": 0.00017696517102678433, "loss": 0.7618, "step": 29550 }, { "epoch": 0.23, "learning_rate": 0.00017692619500483302, "loss": 0.8269, "step": 29600 }, { "epoch": 0.23, "learning_rate": 0.00017688721898288174, "loss": 0.8109, "step": 29650 }, { "epoch": 0.23, "learning_rate": 0.00017684824296093047, "loss": 0.7426, "step": 29700 }, { "epoch": 0.23, "learning_rate": 0.00017680926693897913, "loss": 0.9972, "step": 29750 }, { "epoch": 0.23, "learning_rate": 0.00017677029091702786, "loss": 0.7991, "step": 29800 }, { "epoch": 0.23, "learning_rate": 0.00017673131489507655, "loss": 0.7988, "step": 29850 }, { "epoch": 0.23, "learning_rate": 0.00017669233887312527, "loss": 0.7364, "step": 29900 }, { "epoch": 0.23, "learning_rate": 0.00017665336285117397, "loss": 1.0258, "step": 29950 }, { "epoch": 0.23, "learning_rate": 0.00017661438682922267, "loss": 1.0606, "step": 30000 }, { "epoch": 0.23, "learning_rate": 0.0001765754108072714, "loss": 0.909, "step": 30050 }, { "epoch": 0.23, "learning_rate": 0.00017653643478532008, "loss": 0.8428, "step": 30100 }, { "epoch": 0.24, "learning_rate": 0.00017649745876336878, "loss": 0.8707, "step": 30150 }, { "epoch": 0.24, "learning_rate": 0.0001764584827414175, "loss": 0.7147, "step": 30200 }, { "epoch": 0.24, "learning_rate": 0.0001764195067194662, "loss": 0.89, "step": 30250 }, { "epoch": 0.24, "learning_rate": 0.0001763805306975149, "loss": 0.847, "step": 30300 }, { "epoch": 0.24, "learning_rate": 0.0001763415546755636, "loss": 0.7826, "step": 30350 }, { "epoch": 0.24, "learning_rate": 0.0001763025786536123, "loss": 0.8614, "step": 30400 }, { "epoch": 0.24, "learning_rate": 0.000176263602631661, "loss": 0.8078, "step": 30450 }, { "epoch": 0.24, "learning_rate": 0.0001762246266097097, "loss": 0.872, "step": 30500 }, { "epoch": 0.24, "learning_rate": 0.00017618565058775842, "loss": 0.8734, "step": 30550 }, { "epoch": 0.24, "learning_rate": 0.00017614667456580715, "loss": 0.8836, "step": 30600 }, { "epoch": 0.24, "learning_rate": 0.00017610769854385582, "loss": 1.042, "step": 30650 }, { "epoch": 0.24, "learning_rate": 0.00017606872252190454, "loss": 0.8561, "step": 30700 }, { "epoch": 0.24, "learning_rate": 0.00017602974649995323, "loss": 0.6824, "step": 30750 }, { "epoch": 0.24, "learning_rate": 0.00017599077047800196, "loss": 0.9277, "step": 30800 }, { "epoch": 0.24, "learning_rate": 0.00017595179445605065, "loss": 0.9887, "step": 30850 }, { "epoch": 0.24, "learning_rate": 0.00017591281843409935, "loss": 0.7925, "step": 30900 }, { "epoch": 0.24, "learning_rate": 0.00017587384241214807, "loss": 0.8944, "step": 30950 }, { "epoch": 0.24, "learning_rate": 0.00017583486639019674, "loss": 1.2576, "step": 31000 }, { "epoch": 0.24, "learning_rate": 0.00017579589036824546, "loss": 1.0837, "step": 31050 }, { "epoch": 0.24, "learning_rate": 0.00017575691434629418, "loss": 1.0205, "step": 31100 }, { "epoch": 0.24, "learning_rate": 0.00017571793832434288, "loss": 1.1691, "step": 31150 }, { "epoch": 0.24, "learning_rate": 0.00017567896230239157, "loss": 0.9541, "step": 31200 }, { "epoch": 0.24, "learning_rate": 0.00017563998628044027, "loss": 0.7735, "step": 31250 }, { "epoch": 0.24, "learning_rate": 0.000175601010258489, "loss": 0.9219, "step": 31300 }, { "epoch": 0.24, "learning_rate": 0.0001755620342365377, "loss": 0.6201, "step": 31350 }, { "epoch": 0.24, "learning_rate": 0.00017552305821458638, "loss": 0.8204, "step": 31400 }, { "epoch": 0.25, "learning_rate": 0.0001754840821926351, "loss": 0.7272, "step": 31450 }, { "epoch": 0.25, "learning_rate": 0.0001754451061706838, "loss": 0.864, "step": 31500 }, { "epoch": 0.25, "learning_rate": 0.0001754061301487325, "loss": 0.9935, "step": 31550 }, { "epoch": 0.25, "learning_rate": 0.00017536715412678122, "loss": 0.8651, "step": 31600 }, { "epoch": 0.25, "learning_rate": 0.00017532817810482992, "loss": 0.8508, "step": 31650 }, { "epoch": 0.25, "learning_rate": 0.0001752892020828786, "loss": 0.863, "step": 31700 }, { "epoch": 0.25, "learning_rate": 0.00017525022606092733, "loss": 0.9272, "step": 31750 }, { "epoch": 0.25, "learning_rate": 0.00017521125003897603, "loss": 0.9609, "step": 31800 }, { "epoch": 0.25, "learning_rate": 0.00017517227401702475, "loss": 1.1736, "step": 31850 }, { "epoch": 0.25, "learning_rate": 0.00017513329799507342, "loss": 0.8571, "step": 31900 }, { "epoch": 0.25, "learning_rate": 0.00017509432197312214, "loss": 0.758, "step": 31950 }, { "epoch": 0.25, "learning_rate": 0.00017505534595117086, "loss": 1.0157, "step": 32000 }, { "epoch": 0.25, "learning_rate": 0.00017501636992921956, "loss": 0.762, "step": 32050 }, { "epoch": 0.25, "learning_rate": 0.00017497739390726826, "loss": 0.7206, "step": 32100 }, { "epoch": 0.25, "learning_rate": 0.00017493841788531695, "loss": 0.9902, "step": 32150 }, { "epoch": 0.25, "learning_rate": 0.00017489944186336567, "loss": 0.8943, "step": 32200 }, { "epoch": 0.25, "learning_rate": 0.00017486046584141437, "loss": 0.9721, "step": 32250 }, { "epoch": 0.25, "learning_rate": 0.00017482148981946306, "loss": 0.9522, "step": 32300 }, { "epoch": 0.25, "learning_rate": 0.0001747825137975118, "loss": 0.9819, "step": 32350 }, { "epoch": 0.25, "learning_rate": 0.00017474353777556048, "loss": 1.0563, "step": 32400 }, { "epoch": 0.25, "learning_rate": 0.00017470456175360918, "loss": 0.782, "step": 32450 }, { "epoch": 0.25, "learning_rate": 0.0001746655857316579, "loss": 0.9609, "step": 32500 }, { "epoch": 0.25, "learning_rate": 0.0001746266097097066, "loss": 0.9329, "step": 32550 }, { "epoch": 0.25, "learning_rate": 0.0001745876336877553, "loss": 0.7019, "step": 32600 }, { "epoch": 0.25, "learning_rate": 0.00017454865766580401, "loss": 0.9395, "step": 32650 }, { "epoch": 0.25, "learning_rate": 0.0001745096816438527, "loss": 0.7248, "step": 32700 }, { "epoch": 0.26, "learning_rate": 0.00017447070562190143, "loss": 0.7116, "step": 32750 }, { "epoch": 0.26, "learning_rate": 0.0001744317295999501, "loss": 0.92, "step": 32800 }, { "epoch": 0.26, "learning_rate": 0.00017439275357799882, "loss": 0.8105, "step": 32850 }, { "epoch": 0.26, "learning_rate": 0.00017435377755604755, "loss": 0.8492, "step": 32900 }, { "epoch": 0.26, "learning_rate": 0.00017431480153409624, "loss": 0.8305, "step": 32950 }, { "epoch": 0.26, "learning_rate": 0.00017427582551214494, "loss": 0.882, "step": 33000 }, { "epoch": 0.36, "learning_rate": 0.00016390108569805799, "loss": 40.3284, "step": 33050 }, { "epoch": 0.36, "learning_rate": 0.00016384647311968892, "loss": 41.3929, "step": 33100 }, { "epoch": 0.36, "learning_rate": 0.0001637918605413199, "loss": 40.5771, "step": 33150 }, { "epoch": 0.36, "learning_rate": 0.00016373724796295083, "loss": 40.1587, "step": 33200 }, { "epoch": 0.36, "learning_rate": 0.0001636826353845818, "loss": 38.4849, "step": 33250 }, { "epoch": 0.36, "learning_rate": 0.00016362802280621276, "loss": 40.8953, "step": 33300 }, { "epoch": 0.36, "learning_rate": 0.0001635734102278437, "loss": 41.1837, "step": 33350 }, { "epoch": 0.36, "learning_rate": 0.00016351879764947463, "loss": 41.4111, "step": 33400 }, { "epoch": 0.37, "learning_rate": 0.0001634641850711056, "loss": 39.7779, "step": 33450 }, { "epoch": 0.37, "learning_rate": 0.00016340957249273653, "loss": 39.6051, "step": 33500 }, { "epoch": 0.37, "learning_rate": 0.0001633549599143675, "loss": 39.1987, "step": 33550 }, { "epoch": 0.37, "learning_rate": 0.00016330034733599844, "loss": 36.4834, "step": 33600 }, { "epoch": 0.37, "learning_rate": 0.0001632457347576294, "loss": 38.9442, "step": 33650 }, { "epoch": 0.37, "learning_rate": 0.00016319112217926034, "loss": 38.7699, "step": 33700 }, { "epoch": 0.37, "learning_rate": 0.00016313650960089128, "loss": 38.1662, "step": 33750 }, { "epoch": 0.37, "learning_rate": 0.00016308189702252224, "loss": 38.3107, "step": 33800 }, { "epoch": 0.37, "learning_rate": 0.00016302728444415318, "loss": 37.137, "step": 33850 }, { "epoch": 0.37, "learning_rate": 0.00016297267186578414, "loss": 39.8413, "step": 33900 }, { "epoch": 0.37, "learning_rate": 0.00016291805928741508, "loss": 37.834, "step": 33950 }, { "epoch": 0.37, "learning_rate": 0.00016286344670904602, "loss": 38.752, "step": 34000 }, { "epoch": 0.37, "learning_rate": 0.00016280883413067698, "loss": 38.9749, "step": 34050 }, { "epoch": 0.37, "learning_rate": 0.00016275422155230792, "loss": 37.0203, "step": 34100 }, { "epoch": 0.37, "learning_rate": 0.00016269960897393888, "loss": 37.8575, "step": 34150 }, { "epoch": 0.37, "learning_rate": 0.00016264499639556982, "loss": 36.1197, "step": 34200 }, { "epoch": 0.37, "learning_rate": 0.0001625903838172008, "loss": 38.9567, "step": 34250 }, { "epoch": 0.37, "learning_rate": 0.00016253577123883172, "loss": 36.921, "step": 34300 }, { "epoch": 0.38, "learning_rate": 0.0001624811586604627, "loss": 37.7047, "step": 34350 }, { "epoch": 0.38, "learning_rate": 0.00016242654608209365, "loss": 37.8749, "step": 34400 }, { "epoch": 0.38, "learning_rate": 0.0001623719335037246, "loss": 36.0547, "step": 34450 }, { "epoch": 0.38, "learning_rate": 0.00016231732092535553, "loss": 35.7079, "step": 34500 }, { "epoch": 0.38, "learning_rate": 0.0001622627083469865, "loss": 35.5162, "step": 34550 }, { "epoch": 0.38, "learning_rate": 0.00016220809576861743, "loss": 35.6316, "step": 34600 }, { "epoch": 0.38, "learning_rate": 0.0001621534831902484, "loss": 37.1081, "step": 34650 }, { "epoch": 0.38, "learning_rate": 0.00016209887061187933, "loss": 35.4266, "step": 34700 }, { "epoch": 0.38, "learning_rate": 0.0001620442580335103, "loss": 35.8718, "step": 34750 }, { "epoch": 0.38, "learning_rate": 0.00016198964545514124, "loss": 34.2143, "step": 34800 }, { "epoch": 0.38, "learning_rate": 0.00016193503287677217, "loss": 34.0882, "step": 34850 }, { "epoch": 0.38, "learning_rate": 0.00016188042029840314, "loss": 32.8758, "step": 34900 }, { "epoch": 0.38, "learning_rate": 0.00016182580772003408, "loss": 32.0339, "step": 34950 }, { "epoch": 0.38, "learning_rate": 0.00016177119514166504, "loss": 31.4164, "step": 35000 }, { "epoch": 0.38, "learning_rate": 0.00016171658256329598, "loss": 31.8205, "step": 35050 }, { "epoch": 0.38, "learning_rate": 0.00016166196998492694, "loss": 32.6587, "step": 35100 }, { "epoch": 0.38, "learning_rate": 0.00016160735740655788, "loss": 31.8695, "step": 35150 }, { "epoch": 0.38, "learning_rate": 0.00016155274482818882, "loss": 31.0461, "step": 35200 }, { "epoch": 0.39, "learning_rate": 0.00016149813224981978, "loss": 30.1198, "step": 35250 }, { "epoch": 0.39, "learning_rate": 0.00016144351967145072, "loss": 28.9032, "step": 35300 }, { "epoch": 0.39, "learning_rate": 0.00016138890709308169, "loss": 30.3631, "step": 35350 }, { "epoch": 0.39, "learning_rate": 0.00016133429451471265, "loss": 29.2617, "step": 35400 }, { "epoch": 0.39, "learning_rate": 0.0001612796819363436, "loss": 28.4782, "step": 35450 }, { "epoch": 0.39, "learning_rate": 0.00016122506935797455, "loss": 28.6378, "step": 35500 }, { "epoch": 0.39, "learning_rate": 0.0001611704567796055, "loss": 28.3341, "step": 35550 }, { "epoch": 0.39, "learning_rate": 0.00016111584420123645, "loss": 27.6153, "step": 35600 }, { "epoch": 0.39, "learning_rate": 0.0001610612316228674, "loss": 26.5044, "step": 35650 }, { "epoch": 0.39, "learning_rate": 0.00016100661904449833, "loss": 26.8876, "step": 35700 }, { "epoch": 0.39, "learning_rate": 0.0001609520064661293, "loss": 26.9291, "step": 35750 }, { "epoch": 0.39, "learning_rate": 0.00016089739388776023, "loss": 24.024, "step": 35800 }, { "epoch": 0.39, "learning_rate": 0.0001608427813093912, "loss": 24.5533, "step": 35850 }, { "epoch": 0.39, "learning_rate": 0.00016078816873102213, "loss": 24.6948, "step": 35900 }, { "epoch": 0.39, "learning_rate": 0.0001607335561526531, "loss": 22.2483, "step": 35950 }, { "epoch": 0.39, "learning_rate": 0.00016067894357428404, "loss": 21.7253, "step": 36000 }, { "epoch": 0.39, "learning_rate": 0.00016062433099591497, "loss": 20.7581, "step": 36050 }, { "epoch": 0.39, "learning_rate": 0.00016056971841754594, "loss": 18.6484, "step": 36100 }, { "epoch": 0.39, "learning_rate": 0.00016051510583917688, "loss": 19.3484, "step": 36150 }, { "epoch": 0.4, "learning_rate": 0.00016046049326080784, "loss": 15.8305, "step": 36200 }, { "epoch": 0.4, "learning_rate": 0.00016040588068243878, "loss": 16.644, "step": 36250 }, { "epoch": 0.4, "learning_rate": 0.00016035126810406972, "loss": 16.1415, "step": 36300 }, { "epoch": 0.4, "learning_rate": 0.00016029665552570068, "loss": 16.2331, "step": 36350 }, { "epoch": 0.4, "learning_rate": 0.00016024204294733162, "loss": 13.7222, "step": 36400 }, { "epoch": 0.4, "learning_rate": 0.00016018743036896258, "loss": 13.1968, "step": 36450 }, { "epoch": 0.4, "learning_rate": 0.00016013281779059355, "loss": 13.7183, "step": 36500 }, { "epoch": 0.4, "learning_rate": 0.00016007820521222449, "loss": 13.6719, "step": 36550 }, { "epoch": 0.4, "learning_rate": 0.00016002359263385545, "loss": 12.565, "step": 36600 }, { "epoch": 0.4, "learning_rate": 0.0001599689800554864, "loss": 11.7014, "step": 36650 }, { "epoch": 0.4, "learning_rate": 0.00015991436747711735, "loss": 11.9391, "step": 36700 }, { "epoch": 0.4, "learning_rate": 0.0001598597548987483, "loss": 10.8187, "step": 36750 }, { "epoch": 0.4, "learning_rate": 0.00015980514232037923, "loss": 9.9151, "step": 36800 }, { "epoch": 0.4, "learning_rate": 0.0001597505297420102, "loss": 8.8924, "step": 36850 }, { "epoch": 0.4, "learning_rate": 0.00015969591716364113, "loss": 7.7144, "step": 36900 }, { "epoch": 0.4, "learning_rate": 0.0001596413045852721, "loss": 6.7915, "step": 36950 }, { "epoch": 0.4, "learning_rate": 0.00015958669200690303, "loss": 6.1585, "step": 37000 }, { "epoch": 0.4, "learning_rate": 0.000159532079428534, "loss": 6.4101, "step": 37050 }, { "epoch": 0.41, "learning_rate": 0.00015947746685016494, "loss": 4.6158, "step": 37100 }, { "epoch": 0.41, "learning_rate": 0.00015942285427179587, "loss": 4.76, "step": 37150 }, { "epoch": 0.41, "learning_rate": 0.00015936824169342684, "loss": 4.0994, "step": 37200 }, { "epoch": 0.41, "learning_rate": 0.00015931362911505778, "loss": 4.7396, "step": 37250 }, { "epoch": 0.41, "learning_rate": 0.00015925901653668874, "loss": 3.6542, "step": 37300 }, { "epoch": 0.41, "learning_rate": 0.00015920440395831968, "loss": 3.4333, "step": 37350 }, { "epoch": 0.41, "learning_rate": 0.00015914979137995064, "loss": 4.575, "step": 37400 }, { "epoch": 0.41, "learning_rate": 0.00015909517880158158, "loss": 3.3926, "step": 37450 }, { "epoch": 0.41, "learning_rate": 0.00015904056622321252, "loss": 3.3063, "step": 37500 }, { "epoch": 0.41, "learning_rate": 0.00015898595364484348, "loss": 2.9068, "step": 37550 }, { "epoch": 0.41, "learning_rate": 0.00015893134106647445, "loss": 2.9475, "step": 37600 }, { "epoch": 0.41, "learning_rate": 0.00015887672848810538, "loss": 2.94, "step": 37650 }, { "epoch": 0.41, "learning_rate": 0.00015882211590973635, "loss": 3.2924, "step": 37700 }, { "epoch": 0.41, "learning_rate": 0.0001587675033313673, "loss": 3.4012, "step": 37750 }, { "epoch": 0.41, "learning_rate": 0.00015871289075299825, "loss": 2.8093, "step": 37800 }, { "epoch": 0.41, "learning_rate": 0.0001586582781746292, "loss": 3.115, "step": 37850 }, { "epoch": 0.41, "learning_rate": 0.00015860366559626015, "loss": 2.4926, "step": 37900 }, { "epoch": 0.41, "learning_rate": 0.0001585490530178911, "loss": 2.3319, "step": 37950 }, { "epoch": 0.42, "learning_rate": 0.00015849444043952203, "loss": 2.4095, "step": 38000 }, { "epoch": 0.42, "learning_rate": 0.000158439827861153, "loss": 2.563, "step": 38050 }, { "epoch": 0.42, "learning_rate": 0.00015838521528278393, "loss": 2.5545, "step": 38100 }, { "epoch": 0.42, "learning_rate": 0.0001583306027044149, "loss": 2.0663, "step": 38150 }, { "epoch": 0.42, "learning_rate": 0.00015827599012604583, "loss": 2.0732, "step": 38200 }, { "epoch": 0.42, "learning_rate": 0.0001582213775476768, "loss": 2.9127, "step": 38250 }, { "epoch": 0.42, "learning_rate": 0.00015816676496930774, "loss": 2.2365, "step": 38300 }, { "epoch": 0.42, "learning_rate": 0.00015811215239093867, "loss": 3.9376, "step": 38350 }, { "epoch": 0.42, "learning_rate": 0.00015805753981256964, "loss": 2.0433, "step": 38400 }, { "epoch": 0.42, "learning_rate": 0.00015800292723420058, "loss": 2.1487, "step": 38450 }, { "epoch": 0.42, "learning_rate": 0.00015794831465583154, "loss": 1.8283, "step": 38500 }, { "epoch": 0.42, "learning_rate": 0.00015789370207746248, "loss": 1.5619, "step": 38550 }, { "epoch": 0.42, "learning_rate": 0.00015783908949909342, "loss": 1.6508, "step": 38600 }, { "epoch": 0.42, "learning_rate": 0.00015778447692072438, "loss": 1.8076, "step": 38650 }, { "epoch": 0.42, "learning_rate": 0.00015772986434235535, "loss": 1.5081, "step": 38700 }, { "epoch": 0.42, "learning_rate": 0.0001576752517639863, "loss": 1.7372, "step": 38750 }, { "epoch": 0.42, "learning_rate": 0.00015762063918561725, "loss": 1.504, "step": 38800 }, { "epoch": 0.42, "learning_rate": 0.00015756602660724819, "loss": 1.4685, "step": 38850 }, { "epoch": 0.42, "learning_rate": 0.00015751141402887915, "loss": 1.366, "step": 38900 }, { "epoch": 0.43, "learning_rate": 0.0001574568014505101, "loss": 1.3556, "step": 38950 }, { "epoch": 0.43, "learning_rate": 0.00015740218887214105, "loss": 1.328, "step": 39000 }, { "epoch": 0.43, "learning_rate": 0.000157347576293772, "loss": 1.672, "step": 39050 }, { "epoch": 0.43, "learning_rate": 0.00015729296371540293, "loss": 1.2776, "step": 39100 }, { "epoch": 0.43, "learning_rate": 0.0001572383511370339, "loss": 1.619, "step": 39150 }, { "epoch": 0.43, "learning_rate": 0.00015718373855866483, "loss": 1.4484, "step": 39200 }, { "epoch": 0.43, "learning_rate": 0.0001571291259802958, "loss": 1.4561, "step": 39250 }, { "epoch": 0.43, "learning_rate": 0.00015707451340192673, "loss": 1.5445, "step": 39300 }, { "epoch": 0.43, "learning_rate": 0.0001570199008235577, "loss": 1.6477, "step": 39350 }, { "epoch": 0.43, "learning_rate": 0.00015696528824518864, "loss": 1.483, "step": 39400 }, { "epoch": 0.43, "learning_rate": 0.00015691067566681957, "loss": 1.4913, "step": 39450 }, { "epoch": 0.43, "learning_rate": 0.00015685606308845054, "loss": 1.2746, "step": 39500 }, { "epoch": 0.43, "learning_rate": 0.00015680145051008148, "loss": 1.4588, "step": 39550 }, { "epoch": 0.43, "learning_rate": 0.00015674683793171244, "loss": 1.3793, "step": 39600 }, { "epoch": 0.43, "learning_rate": 0.00015669222535334338, "loss": 1.4776, "step": 39650 }, { "epoch": 0.43, "learning_rate": 0.00015663761277497434, "loss": 1.7906, "step": 39700 }, { "epoch": 0.43, "learning_rate": 0.0001565830001966053, "loss": 1.4083, "step": 39750 }, { "epoch": 0.43, "learning_rate": 0.00015652838761823624, "loss": 1.5248, "step": 39800 }, { "epoch": 0.44, "learning_rate": 0.0001564737750398672, "loss": 1.2159, "step": 39850 }, { "epoch": 0.44, "learning_rate": 0.00015641916246149815, "loss": 1.4073, "step": 39900 }, { "epoch": 0.44, "learning_rate": 0.00015636454988312908, "loss": 1.2702, "step": 39950 }, { "epoch": 0.44, "learning_rate": 0.00015630993730476005, "loss": 1.33, "step": 40000 }, { "epoch": 0.44, "learning_rate": 0.000156255324726391, "loss": 1.4365, "step": 40050 }, { "epoch": 0.44, "learning_rate": 0.00015620071214802195, "loss": 1.2484, "step": 40100 }, { "epoch": 0.44, "learning_rate": 0.0001561460995696529, "loss": 1.2985, "step": 40150 }, { "epoch": 0.44, "learning_rate": 0.00015609148699128385, "loss": 1.3169, "step": 40200 }, { "epoch": 0.44, "learning_rate": 0.0001560368744129148, "loss": 1.2415, "step": 40250 }, { "epoch": 0.44, "learning_rate": 0.00015598226183454573, "loss": 1.0357, "step": 40300 }, { "epoch": 0.44, "learning_rate": 0.0001559276492561767, "loss": 1.3613, "step": 40350 }, { "epoch": 0.44, "learning_rate": 0.00015587303667780763, "loss": 1.1524, "step": 40400 }, { "epoch": 0.44, "learning_rate": 0.0001558184240994386, "loss": 1.4132, "step": 40450 }, { "epoch": 0.44, "learning_rate": 0.00015576381152106953, "loss": 1.4276, "step": 40500 } ], "logging_steps": 50, "max_steps": 183108, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 2.90367192517632e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }