|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.44236188478930466, |
|
"eval_steps": 500, |
|
"global_step": 40500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019996132455658605, |
|
"loss": 42.0911, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001999226491131721, |
|
"loss": 38.7852, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019988397366975814, |
|
"loss": 31.5332, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019984529822634418, |
|
"loss": 25.7456, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001998066227829302, |
|
"loss": 20.5779, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019976794733951624, |
|
"loss": 17.4712, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019972927189610228, |
|
"loss": 14.2287, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019969059645268835, |
|
"loss": 12.748, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001996519210092744, |
|
"loss": 11.9603, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019961324556586044, |
|
"loss": 12.6114, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.00019957457012244646, |
|
"loss": 11.2424, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 0.0001995358946790325, |
|
"loss": 10.4185, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019949721923561854, |
|
"loss": 10.6495, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019945854379220458, |
|
"loss": 8.6583, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019941986834879063, |
|
"loss": 7.9045, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019938119290537667, |
|
"loss": 7.5867, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019934251746196271, |
|
"loss": 6.344, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019930384201854876, |
|
"loss": 7.0004, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001992651665751348, |
|
"loss": 6.0177, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019922649113172082, |
|
"loss": 6.4546, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019918781568830686, |
|
"loss": 4.9639, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001991491402448929, |
|
"loss": 4.5082, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019911046480147897, |
|
"loss": 4.9607, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019907178935806501, |
|
"loss": 4.6557, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019903311391465106, |
|
"loss": 3.7942, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001989944384712371, |
|
"loss": 3.3466, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019895576302782312, |
|
"loss": 4.2555, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019891708758440916, |
|
"loss": 3.7983, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001988784121409952, |
|
"loss": 3.6397, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019883973669758125, |
|
"loss": 3.2206, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001988010612541673, |
|
"loss": 2.9513, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019876238581075333, |
|
"loss": 3.443, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019872371036733938, |
|
"loss": 2.6171, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019868503492392542, |
|
"loss": 2.6626, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019864635948051143, |
|
"loss": 3.2079, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019860768403709748, |
|
"loss": 2.679, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00019856900859368352, |
|
"loss": 3.2509, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001985303331502696, |
|
"loss": 2.3529, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019849165770685563, |
|
"loss": 2.3721, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019845298226344168, |
|
"loss": 2.7719, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019841430682002772, |
|
"loss": 2.3059, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019837563137661374, |
|
"loss": 2.9214, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019833695593319978, |
|
"loss": 2.4541, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019829828048978582, |
|
"loss": 2.3267, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019825960504637186, |
|
"loss": 2.1945, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001982209296029579, |
|
"loss": 2.3966, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019818225415954395, |
|
"loss": 2.5349, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019814357871613, |
|
"loss": 2.0588, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019810490327271604, |
|
"loss": 3.1209, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019806622782930205, |
|
"loss": 2.3281, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001980275523858881, |
|
"loss": 2.0749, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019798887694247414, |
|
"loss": 2.1665, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001979502014990602, |
|
"loss": 2.5256, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019791152605564625, |
|
"loss": 2.3435, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001978728506122323, |
|
"loss": 2.2333, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019783417516881834, |
|
"loss": 1.9695, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019779549972540435, |
|
"loss": 2.3046, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001977568242819904, |
|
"loss": 2.1951, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019771814883857644, |
|
"loss": 2.2141, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019767947339516248, |
|
"loss": 2.3285, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019764079795174853, |
|
"loss": 1.9263, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019760212250833457, |
|
"loss": 2.4391, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001975634470649206, |
|
"loss": 2.2386, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00019752477162150665, |
|
"loss": 1.9979, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019748609617809267, |
|
"loss": 2.2926, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001974474207346787, |
|
"loss": 2.0263, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019740874529126476, |
|
"loss": 2.3533, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019737006984785083, |
|
"loss": 2.0248, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019733139440443687, |
|
"loss": 1.5322, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001972927189610229, |
|
"loss": 1.2563, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019725404351760896, |
|
"loss": 1.2361, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019721536807419497, |
|
"loss": 1.3821, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019717669263078101, |
|
"loss": 1.0988, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019713801718736706, |
|
"loss": 1.2244, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001970993417439531, |
|
"loss": 0.9095, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019706066630053914, |
|
"loss": 1.2458, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001970219908571252, |
|
"loss": 1.1168, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019698331541371123, |
|
"loss": 0.7974, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019694463997029727, |
|
"loss": 1.0594, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001969059645268833, |
|
"loss": 1.2522, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019686728908346933, |
|
"loss": 0.8916, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001968286136400554, |
|
"loss": 0.9284, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019678993819664144, |
|
"loss": 0.7177, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001967512627532275, |
|
"loss": 1.0662, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019671258730981353, |
|
"loss": 1.0509, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019667391186639957, |
|
"loss": 1.0486, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0001966352364229856, |
|
"loss": 0.9541, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019659656097957163, |
|
"loss": 1.1056, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019655788553615768, |
|
"loss": 1.0613, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019651921009274372, |
|
"loss": 0.9647, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019648053464932976, |
|
"loss": 0.8281, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001964418592059158, |
|
"loss": 0.8205, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019640318376250185, |
|
"loss": 0.8107, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001963645083190879, |
|
"loss": 0.9087, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019632583287567393, |
|
"loss": 0.7476, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019628715743225995, |
|
"loss": 0.8191, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019624848198884602, |
|
"loss": 1.0138, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019620980654543206, |
|
"loss": 1.0121, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001961711311020181, |
|
"loss": 0.7376, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019613245565860415, |
|
"loss": 0.8335, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001960937802151902, |
|
"loss": 0.9411, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001960551047717762, |
|
"loss": 0.8631, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019601642932836225, |
|
"loss": 0.936, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001959777538849483, |
|
"loss": 0.8524, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019593907844153434, |
|
"loss": 0.7093, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019590040299812038, |
|
"loss": 0.8302, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019586172755470642, |
|
"loss": 0.8756, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019582305211129247, |
|
"loss": 0.7728, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001957843766678785, |
|
"loss": 0.809, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019574570122446455, |
|
"loss": 0.7282, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019570702578105057, |
|
"loss": 0.8608, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019566835033763664, |
|
"loss": 1.0078, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019562967489422268, |
|
"loss": 0.9773, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019559099945080872, |
|
"loss": 0.7969, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019555232400739477, |
|
"loss": 0.9988, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001955136485639808, |
|
"loss": 0.7552, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019547497312056685, |
|
"loss": 0.8775, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019543629767715287, |
|
"loss": 0.8895, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001953976222337389, |
|
"loss": 0.8106, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019535894679032496, |
|
"loss": 0.6546, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.000195320271346911, |
|
"loss": 0.9039, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019528159590349704, |
|
"loss": 0.652, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019524292046008308, |
|
"loss": 0.6561, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019520424501666913, |
|
"loss": 1.046, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019516556957325517, |
|
"loss": 0.8783, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019512689412984119, |
|
"loss": 0.7351, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019508821868642726, |
|
"loss": 0.733, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001950495432430133, |
|
"loss": 0.7675, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019501086779959934, |
|
"loss": 0.9451, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019497219235618539, |
|
"loss": 0.9686, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019493351691277143, |
|
"loss": 0.6083, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019489484146935747, |
|
"loss": 0.8619, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001948561660259435, |
|
"loss": 0.6557, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019481749058252953, |
|
"loss": 0.8819, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019477881513911557, |
|
"loss": 0.8356, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019474013969570162, |
|
"loss": 0.8211, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019470146425228766, |
|
"loss": 0.8393, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001946627888088737, |
|
"loss": 1.0301, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019462411336545975, |
|
"loss": 0.7435, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001945854379220458, |
|
"loss": 0.71, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001945467624786318, |
|
"loss": 0.7786, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019450808703521787, |
|
"loss": 1.1273, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019446941159180392, |
|
"loss": 0.923, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019443073614838996, |
|
"loss": 0.8656, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000194392060704976, |
|
"loss": 0.8191, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019435338526156205, |
|
"loss": 0.8924, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001943147098181481, |
|
"loss": 0.9004, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001942760343747341, |
|
"loss": 0.6538, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019423735893132015, |
|
"loss": 0.8669, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001941986834879062, |
|
"loss": 0.9103, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019416000804449223, |
|
"loss": 0.8853, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019412133260107828, |
|
"loss": 0.7989, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019408265715766432, |
|
"loss": 0.6957, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019404398171425036, |
|
"loss": 0.8685, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001940053062708364, |
|
"loss": 0.6701, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019396663082742242, |
|
"loss": 0.7488, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001939279553840085, |
|
"loss": 0.9214, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019388927994059454, |
|
"loss": 0.7879, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019385060449718058, |
|
"loss": 0.8522, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019381192905376662, |
|
"loss": 0.9119, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019377325361035267, |
|
"loss": 0.6229, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001937345781669387, |
|
"loss": 0.8, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019369590272352472, |
|
"loss": 0.6705, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019365722728011077, |
|
"loss": 0.8694, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001936185518366968, |
|
"loss": 0.7932, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019357987639328285, |
|
"loss": 0.7311, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001935412009498689, |
|
"loss": 0.844, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019350252550645494, |
|
"loss": 0.8428, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019346385006304098, |
|
"loss": 0.8791, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019342517461962703, |
|
"loss": 0.9576, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019338649917621304, |
|
"loss": 0.821, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001933478237327991, |
|
"loss": 1.0343, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019330914828938515, |
|
"loss": 0.862, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001932704728459712, |
|
"loss": 0.6914, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019323179740255724, |
|
"loss": 1.0047, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019319312195914328, |
|
"loss": 0.7347, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019315444651572933, |
|
"loss": 0.7331, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019311577107231534, |
|
"loss": 0.9639, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019307709562890139, |
|
"loss": 0.7824, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019303842018548743, |
|
"loss": 0.8321, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019299974474207347, |
|
"loss": 1.053, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019296106929865951, |
|
"loss": 0.677, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019292239385524556, |
|
"loss": 0.8771, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001928837184118316, |
|
"loss": 0.7547, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019284504296841764, |
|
"loss": 0.7911, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019280636752500366, |
|
"loss": 0.8772, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019276769208158973, |
|
"loss": 1.0254, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019272901663817577, |
|
"loss": 0.9881, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019269034119476182, |
|
"loss": 0.9809, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019265166575134786, |
|
"loss": 0.6407, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001926129903079339, |
|
"loss": 0.8552, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019257431486451994, |
|
"loss": 0.5715, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019253563942110596, |
|
"loss": 0.7908, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000192496963977692, |
|
"loss": 0.8544, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019245828853427805, |
|
"loss": 0.7795, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001924196130908641, |
|
"loss": 0.7534, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019238093764745013, |
|
"loss": 0.9141, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019234226220403618, |
|
"loss": 0.6377, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019230358676062222, |
|
"loss": 0.8392, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019226491131720826, |
|
"loss": 0.8541, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001922262358737943, |
|
"loss": 0.7969, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019218756043038035, |
|
"loss": 0.6434, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001921488849869664, |
|
"loss": 0.9645, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019211020954355243, |
|
"loss": 0.8545, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019207153410013848, |
|
"loss": 0.669, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019203285865672452, |
|
"loss": 0.7878, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019199418321331056, |
|
"loss": 0.6872, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019195550776989658, |
|
"loss": 0.7578, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019191683232648262, |
|
"loss": 0.6626, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019187815688306866, |
|
"loss": 0.7433, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001918394814396547, |
|
"loss": 0.8421, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019180080599624075, |
|
"loss": 0.8302, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001917621305528268, |
|
"loss": 0.7689, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019172345510941284, |
|
"loss": 0.7695, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019168477966599888, |
|
"loss": 0.8601, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019164610422258492, |
|
"loss": 1.0576, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019160742877917097, |
|
"loss": 0.6168, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000191568753335757, |
|
"loss": 0.8053, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019153007789234305, |
|
"loss": 0.6034, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001914914024489291, |
|
"loss": 0.8146, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019145272700551514, |
|
"loss": 0.813, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019141405156210118, |
|
"loss": 0.7254, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019137537611868722, |
|
"loss": 0.8516, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019133670067527324, |
|
"loss": 0.8619, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019129802523185928, |
|
"loss": 0.8323, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019125934978844533, |
|
"loss": 0.802, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019122067434503137, |
|
"loss": 0.9431, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001911819989016174, |
|
"loss": 0.833, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019114332345820345, |
|
"loss": 0.8785, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001911046480147895, |
|
"loss": 0.894, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019106597257137554, |
|
"loss": 0.9139, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019102729712796158, |
|
"loss": 0.7368, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019098862168454763, |
|
"loss": 0.8103, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019094994624113367, |
|
"loss": 0.8547, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001909112707977197, |
|
"loss": 0.7595, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019087259535430576, |
|
"loss": 0.7432, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001908339199108918, |
|
"loss": 0.8002, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019079524446747784, |
|
"loss": 0.7238, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019075656902406386, |
|
"loss": 0.7368, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001907178935806499, |
|
"loss": 0.7619, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019067921813723594, |
|
"loss": 0.8555, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000190640542693822, |
|
"loss": 0.7968, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019060186725040803, |
|
"loss": 0.6167, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019056319180699407, |
|
"loss": 0.7268, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019052451636358012, |
|
"loss": 0.7624, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019048584092016616, |
|
"loss": 0.7498, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001904471654767522, |
|
"loss": 0.7913, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019040849003333825, |
|
"loss": 0.9818, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001903698145899243, |
|
"loss": 0.9653, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019033113914651033, |
|
"loss": 0.8576, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019029246370309637, |
|
"loss": 0.7793, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019025378825968242, |
|
"loss": 0.9986, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019021511281626846, |
|
"loss": 0.6204, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019017643737285448, |
|
"loss": 0.7432, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019013776192944052, |
|
"loss": 0.7728, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019009908648602656, |
|
"loss": 0.924, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001900604110426126, |
|
"loss": 0.8306, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019002173559919865, |
|
"loss": 0.9331, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001899830601557847, |
|
"loss": 0.9156, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018994438471237073, |
|
"loss": 0.7475, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018990570926895678, |
|
"loss": 0.8014, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018986703382554282, |
|
"loss": 0.7636, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018982835838212886, |
|
"loss": 0.8878, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001897896829387149, |
|
"loss": 0.7146, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018975100749530095, |
|
"loss": 0.7577, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000189712332051887, |
|
"loss": 0.9388, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018967365660847304, |
|
"loss": 0.7735, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018963498116505908, |
|
"loss": 0.6801, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001895963057216451, |
|
"loss": 0.7908, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018955763027823114, |
|
"loss": 0.7054, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00018951895483481718, |
|
"loss": 0.8082, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018948027939140322, |
|
"loss": 0.7959, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018944160394798927, |
|
"loss": 0.8319, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001894029285045753, |
|
"loss": 0.7559, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018936425306116135, |
|
"loss": 0.6439, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001893255776177474, |
|
"loss": 0.7906, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018928690217433344, |
|
"loss": 0.9517, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018924822673091948, |
|
"loss": 0.8082, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018920955128750552, |
|
"loss": 0.8872, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018917087584409157, |
|
"loss": 0.6533, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001891322004006776, |
|
"loss": 0.8846, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018909352495726365, |
|
"loss": 0.7644, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001890548495138497, |
|
"loss": 0.9197, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001890161740704357, |
|
"loss": 0.8356, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018897749862702176, |
|
"loss": 0.7626, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001889388231836078, |
|
"loss": 0.7978, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018890014774019384, |
|
"loss": 0.9382, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018886147229677988, |
|
"loss": 0.8213, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018882279685336593, |
|
"loss": 0.8098, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018878412140995197, |
|
"loss": 0.6624, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018874544596653801, |
|
"loss": 0.6901, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018870677052312406, |
|
"loss": 0.8449, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0001886680950797101, |
|
"loss": 0.992, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018862941963629614, |
|
"loss": 0.8163, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018859074419288219, |
|
"loss": 0.9079, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018855206874946823, |
|
"loss": 0.6967, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00018851339330605427, |
|
"loss": 0.7634, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018847471786264032, |
|
"loss": 0.881, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018843604241922633, |
|
"loss": 0.9108, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018839736697581237, |
|
"loss": 0.7132, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018835869153239842, |
|
"loss": 0.5067, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018832001608898446, |
|
"loss": 1.1357, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001882813406455705, |
|
"loss": 0.7256, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018824266520215655, |
|
"loss": 0.6846, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001882039897587426, |
|
"loss": 0.8358, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018816531431532863, |
|
"loss": 0.7776, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018812663887191468, |
|
"loss": 0.5573, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018808796342850072, |
|
"loss": 0.6548, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018804928798508676, |
|
"loss": 0.7813, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001880106125416728, |
|
"loss": 0.853, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018797193709825885, |
|
"loss": 0.757, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001879332616548449, |
|
"loss": 0.7511, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018789458621143093, |
|
"loss": 0.8809, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018785591076801698, |
|
"loss": 0.6439, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000187817235324603, |
|
"loss": 0.6401, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018777855988118904, |
|
"loss": 0.9463, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018773988443777508, |
|
"loss": 0.7206, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018770120899436112, |
|
"loss": 0.738, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018766253355094716, |
|
"loss": 0.8078, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001876238581075332, |
|
"loss": 0.8814, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018758518266411925, |
|
"loss": 0.7841, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001875465072207053, |
|
"loss": 0.9534, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00018750783177729134, |
|
"loss": 0.7588, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018746915633387738, |
|
"loss": 0.7467, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018743048089046342, |
|
"loss": 0.7402, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018739180544704947, |
|
"loss": 0.7391, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001873531300036355, |
|
"loss": 0.93, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018731445456022155, |
|
"loss": 0.673, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001872757791168076, |
|
"loss": 0.8719, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001872371036733936, |
|
"loss": 0.7977, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018719842822997965, |
|
"loss": 0.8446, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001871597527865657, |
|
"loss": 0.5509, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018712107734315174, |
|
"loss": 0.7187, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018708240189973778, |
|
"loss": 0.7886, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018704372645632383, |
|
"loss": 0.787, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018700505101290987, |
|
"loss": 0.8182, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001869663755694959, |
|
"loss": 0.7996, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018692770012608195, |
|
"loss": 1.0537, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000186889024682668, |
|
"loss": 0.7795, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018685034923925404, |
|
"loss": 0.6382, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018681167379584008, |
|
"loss": 0.8503, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018677299835242613, |
|
"loss": 0.7172, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018673432290901217, |
|
"loss": 0.8269, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001866956474655982, |
|
"loss": 0.8608, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018665697202218423, |
|
"loss": 0.5488, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018661829657877027, |
|
"loss": 0.6198, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018657962113535631, |
|
"loss": 0.6294, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00018654094569194236, |
|
"loss": 0.6832, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001865022702485284, |
|
"loss": 0.7857, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018646359480511444, |
|
"loss": 0.873, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018642491936170051, |
|
"loss": 0.8379, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018638624391828653, |
|
"loss": 0.716, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018634756847487257, |
|
"loss": 0.7123, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018630889303145862, |
|
"loss": 0.7582, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018627021758804466, |
|
"loss": 0.8126, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001862315421446307, |
|
"loss": 0.8564, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018619286670121674, |
|
"loss": 0.5869, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001861541912578028, |
|
"loss": 0.7508, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018611551581438883, |
|
"loss": 0.7061, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018607684037097485, |
|
"loss": 0.7345, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001860381649275609, |
|
"loss": 0.5775, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018599948948414693, |
|
"loss": 0.7817, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018596081404073298, |
|
"loss": 0.7201, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018592213859731902, |
|
"loss": 0.8352, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018588346315390506, |
|
"loss": 0.7986, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018584478771049113, |
|
"loss": 0.5892, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018580611226707715, |
|
"loss": 0.6573, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001857674368236632, |
|
"loss": 0.7291, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018572876138024923, |
|
"loss": 0.8477, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018569008593683528, |
|
"loss": 0.7634, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018565141049342132, |
|
"loss": 0.5596, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018561273505000736, |
|
"loss": 0.7536, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001855740596065934, |
|
"loss": 0.8015, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00018553538416317945, |
|
"loss": 0.9044, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018549670871976547, |
|
"loss": 0.7212, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001854580332763515, |
|
"loss": 0.6835, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018541935783293755, |
|
"loss": 0.6431, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001853806823895236, |
|
"loss": 0.6776, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018534200694610964, |
|
"loss": 0.8134, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018530333150269568, |
|
"loss": 0.7613, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018526465605928175, |
|
"loss": 0.6909, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018522598061586777, |
|
"loss": 0.5647, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001851873051724538, |
|
"loss": 0.845, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018514862972903985, |
|
"loss": 0.6676, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001851099542856259, |
|
"loss": 0.608, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018507127884221194, |
|
"loss": 0.6545, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018503260339879798, |
|
"loss": 0.8084, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018499392795538402, |
|
"loss": 0.9323, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018495525251197007, |
|
"loss": 0.7761, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018491657706855608, |
|
"loss": 0.7525, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018487790162514213, |
|
"loss": 0.7387, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018483922618172817, |
|
"loss": 0.7412, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001848005507383142, |
|
"loss": 0.6455, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018476187529490026, |
|
"loss": 0.6401, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018472319985148633, |
|
"loss": 0.7524, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018468452440807237, |
|
"loss": 0.8381, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018464584896465838, |
|
"loss": 0.7317, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018460717352124443, |
|
"loss": 0.7321, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00018456849807783047, |
|
"loss": 0.8627, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001845298226344165, |
|
"loss": 0.8806, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018449114719100256, |
|
"loss": 0.6949, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001844524717475886, |
|
"loss": 0.8466, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018441379630417464, |
|
"loss": 0.468, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018437512086076069, |
|
"loss": 0.8107, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001843364454173467, |
|
"loss": 0.8214, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018429776997393274, |
|
"loss": 0.6371, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001842590945305188, |
|
"loss": 0.8139, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018422041908710483, |
|
"loss": 0.8821, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018418174364369087, |
|
"loss": 0.749, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018414306820027694, |
|
"loss": 0.7666, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000184104392756863, |
|
"loss": 0.804, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000184065717313449, |
|
"loss": 0.8258, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018402704187003505, |
|
"loss": 0.6565, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001839883664266211, |
|
"loss": 0.6481, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018394969098320713, |
|
"loss": 0.5938, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018391101553979317, |
|
"loss": 0.6611, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018387234009637922, |
|
"loss": 0.9062, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018383366465296526, |
|
"loss": 0.6141, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001837949892095513, |
|
"loss": 0.6457, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018375631376613735, |
|
"loss": 0.5349, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018371763832272336, |
|
"loss": 0.6687, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001836789628793094, |
|
"loss": 1.0448, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018364028743589545, |
|
"loss": 0.8059, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001836016119924815, |
|
"loss": 0.6748, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00018356293654906756, |
|
"loss": 0.5979, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001835242611056536, |
|
"loss": 0.8469, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018348558566223962, |
|
"loss": 0.7463, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018344691021882566, |
|
"loss": 0.7493, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001834082347754117, |
|
"loss": 0.8654, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018336955933199775, |
|
"loss": 0.7216, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001833308838885838, |
|
"loss": 0.7847, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018329220844516984, |
|
"loss": 0.5339, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018325353300175588, |
|
"loss": 0.7045, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018321485755834192, |
|
"loss": 0.6995, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018317618211492797, |
|
"loss": 0.736, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018313750667151398, |
|
"loss": 0.7212, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018309883122810002, |
|
"loss": 0.6062, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018306015578468607, |
|
"loss": 0.889, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001830214803412721, |
|
"loss": 0.6812, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018298280489785818, |
|
"loss": 0.7713, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018294412945444422, |
|
"loss": 0.7462, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018290545401103027, |
|
"loss": 0.5084, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018286677856761628, |
|
"loss": 0.6875, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018282810312420233, |
|
"loss": 0.8552, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018278942768078837, |
|
"loss": 0.7549, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001827507522373744, |
|
"loss": 0.6307, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018271207679396045, |
|
"loss": 1.0293, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001826734013505465, |
|
"loss": 0.7603, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018263472590713254, |
|
"loss": 0.5218, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018259605046371858, |
|
"loss": 0.5962, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001825573750203046, |
|
"loss": 0.7793, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00018251869957689064, |
|
"loss": 0.6511, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018248002413347669, |
|
"loss": 0.6589, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018244134869006273, |
|
"loss": 0.826, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001824026732466488, |
|
"loss": 0.7561, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018236399780323484, |
|
"loss": 0.7605, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018232532235982088, |
|
"loss": 0.7887, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001822866469164069, |
|
"loss": 0.6065, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018224797147299294, |
|
"loss": 0.7631, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018220929602957899, |
|
"loss": 0.6708, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018217062058616503, |
|
"loss": 0.8115, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018213194514275107, |
|
"loss": 0.5469, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018209326969933712, |
|
"loss": 0.751, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018205459425592316, |
|
"loss": 0.6424, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001820159188125092, |
|
"loss": 0.7017, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018197724336909522, |
|
"loss": 0.615, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018193856792568126, |
|
"loss": 0.9175, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001818998924822673, |
|
"loss": 0.9438, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018186121703885335, |
|
"loss": 0.8183, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018182254159543942, |
|
"loss": 0.9829, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018178386615202546, |
|
"loss": 0.7703, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001817451907086115, |
|
"loss": 0.6007, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018170651526519752, |
|
"loss": 0.6095, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018166783982178356, |
|
"loss": 0.6811, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001816291643783696, |
|
"loss": 0.6791, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018159048893495565, |
|
"loss": 0.8032, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001815518134915417, |
|
"loss": 0.6968, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00018151313804812773, |
|
"loss": 0.7912, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018147446260471378, |
|
"loss": 0.6557, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018143578716129982, |
|
"loss": 0.7041, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018139711171788584, |
|
"loss": 0.7028, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018135843627447188, |
|
"loss": 0.5454, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018131976083105792, |
|
"loss": 0.8485, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018128108538764396, |
|
"loss": 0.6944, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018124240994423003, |
|
"loss": 0.6429, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018120373450081608, |
|
"loss": 0.9911, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018116505905740212, |
|
"loss": 0.7624, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018112638361398814, |
|
"loss": 0.8377, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018108770817057418, |
|
"loss": 0.7377, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018104903272716022, |
|
"loss": 0.8191, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018101035728374627, |
|
"loss": 0.6292, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001809716818403323, |
|
"loss": 0.7387, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018093300639691835, |
|
"loss": 0.7166, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001808943309535044, |
|
"loss": 0.787, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018085565551009044, |
|
"loss": 0.6451, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018081698006667645, |
|
"loss": 0.8142, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001807783046232625, |
|
"loss": 0.9096, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018073962917984854, |
|
"loss": 0.6828, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018070095373643458, |
|
"loss": 0.5445, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00018066227829302065, |
|
"loss": 0.8181, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018047301300240095, |
|
"loss": 1.124, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018043403698044964, |
|
"loss": 1.1854, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018039506095849834, |
|
"loss": 1.0665, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018035608493654706, |
|
"loss": 0.8882, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018031710891459575, |
|
"loss": 1.0253, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018027813289264445, |
|
"loss": 1.0734, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018023915687069315, |
|
"loss": 1.0606, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018020018084874187, |
|
"loss": 0.9589, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018016120482679056, |
|
"loss": 1.0562, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018012222880483926, |
|
"loss": 0.8676, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018008325278288798, |
|
"loss": 1.0997, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018004427676093668, |
|
"loss": 0.9763, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00018000530073898537, |
|
"loss": 0.8347, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001799663247170341, |
|
"loss": 0.9396, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001799273486950828, |
|
"loss": 0.9281, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001798883726731315, |
|
"loss": 0.9826, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001798493966511802, |
|
"loss": 0.8583, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001798104206292289, |
|
"loss": 0.8509, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017977144460727763, |
|
"loss": 0.8912, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001797324685853263, |
|
"loss": 0.8786, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017969349256337502, |
|
"loss": 0.8482, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017965451654142374, |
|
"loss": 0.9426, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017961554051947244, |
|
"loss": 0.9505, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017957656449752113, |
|
"loss": 0.8555, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00017953758847556983, |
|
"loss": 1.1169, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017949861245361855, |
|
"loss": 0.8806, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017945963643166725, |
|
"loss": 0.9295, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017942066040971594, |
|
"loss": 0.8931, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017938168438776466, |
|
"loss": 0.9139, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017934270836581336, |
|
"loss": 0.9318, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017930373234386205, |
|
"loss": 1.0256, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017926475632191078, |
|
"loss": 0.9042, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017922578029995947, |
|
"loss": 0.8945, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017918680427800817, |
|
"loss": 0.8622, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001791478282560569, |
|
"loss": 0.8348, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017910885223410559, |
|
"loss": 1.0544, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001790698762121543, |
|
"loss": 0.7097, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017903090019020298, |
|
"loss": 0.8808, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001789919241682517, |
|
"loss": 0.896, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017895294814630042, |
|
"loss": 1.0487, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017891397212434912, |
|
"loss": 0.9996, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001788749961023978, |
|
"loss": 0.9624, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001788360200804465, |
|
"loss": 0.9344, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017879704405849523, |
|
"loss": 0.9103, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017875806803654393, |
|
"loss": 0.7311, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017871909201459262, |
|
"loss": 0.9748, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017868011599264134, |
|
"loss": 0.7231, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017864113997069004, |
|
"loss": 0.9844, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017860216394873874, |
|
"loss": 0.9322, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017856318792678746, |
|
"loss": 0.9103, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00017852421190483615, |
|
"loss": 1.0132, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017848523588288485, |
|
"loss": 0.8617, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017844625986093354, |
|
"loss": 1.0296, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017840728383898227, |
|
"loss": 1.0048, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000178368307817031, |
|
"loss": 1.1557, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017832933179507966, |
|
"loss": 0.7993, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017829035577312838, |
|
"loss": 1.002, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001782513797511771, |
|
"loss": 1.0392, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001782124037292258, |
|
"loss": 0.8991, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001781734277072745, |
|
"loss": 0.8488, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001781344516853232, |
|
"loss": 0.8418, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001780954756633719, |
|
"loss": 1.02, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001780564996414206, |
|
"loss": 1.0404, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001780175236194693, |
|
"loss": 0.9571, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017797854759751803, |
|
"loss": 0.7724, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017793957157556672, |
|
"loss": 1.0129, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017790059555361542, |
|
"loss": 0.8916, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017786161953166414, |
|
"loss": 0.9504, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017782264350971283, |
|
"loss": 0.8393, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017778366748776153, |
|
"loss": 0.7675, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017774469146581023, |
|
"loss": 0.8273, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017770571544385895, |
|
"loss": 0.8967, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017766673942190764, |
|
"loss": 1.084, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017762776339995634, |
|
"loss": 0.7741, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017758878737800506, |
|
"loss": 1.1056, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017754981135605378, |
|
"loss": 1.183, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00017751083533410245, |
|
"loss": 1.0375, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017747185931215118, |
|
"loss": 1.2414, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017743288329019987, |
|
"loss": 1.2209, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001773939072682486, |
|
"loss": 0.8581, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001773549312462973, |
|
"loss": 1.0284, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017731595522434598, |
|
"loss": 0.8502, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001772769792023947, |
|
"loss": 0.6947, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001772380031804434, |
|
"loss": 0.7579, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001771990271584921, |
|
"loss": 0.9771, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017716005113654082, |
|
"loss": 0.8661, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017712107511458952, |
|
"loss": 0.8433, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001770820990926382, |
|
"loss": 0.9419, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001770431230706869, |
|
"loss": 1.076, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017700414704873563, |
|
"loss": 0.9966, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017696517102678433, |
|
"loss": 0.7618, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017692619500483302, |
|
"loss": 0.8269, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017688721898288174, |
|
"loss": 0.8109, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017684824296093047, |
|
"loss": 0.7426, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017680926693897913, |
|
"loss": 0.9972, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017677029091702786, |
|
"loss": 0.7991, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017673131489507655, |
|
"loss": 0.7988, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017669233887312527, |
|
"loss": 0.7364, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017665336285117397, |
|
"loss": 1.0258, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017661438682922267, |
|
"loss": 1.0606, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001765754108072714, |
|
"loss": 0.909, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00017653643478532008, |
|
"loss": 0.8428, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017649745876336878, |
|
"loss": 0.8707, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001764584827414175, |
|
"loss": 0.7147, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001764195067194662, |
|
"loss": 0.89, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001763805306975149, |
|
"loss": 0.847, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001763415546755636, |
|
"loss": 0.7826, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001763025786536123, |
|
"loss": 0.8614, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000176263602631661, |
|
"loss": 0.8078, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001762246266097097, |
|
"loss": 0.872, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017618565058775842, |
|
"loss": 0.8734, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017614667456580715, |
|
"loss": 0.8836, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017610769854385582, |
|
"loss": 1.042, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017606872252190454, |
|
"loss": 0.8561, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017602974649995323, |
|
"loss": 0.6824, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017599077047800196, |
|
"loss": 0.9277, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017595179445605065, |
|
"loss": 0.9887, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017591281843409935, |
|
"loss": 0.7925, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017587384241214807, |
|
"loss": 0.8944, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017583486639019674, |
|
"loss": 1.2576, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017579589036824546, |
|
"loss": 1.0837, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017575691434629418, |
|
"loss": 1.0205, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017571793832434288, |
|
"loss": 1.1691, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017567896230239157, |
|
"loss": 0.9541, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017563998628044027, |
|
"loss": 0.7735, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000175601010258489, |
|
"loss": 0.9219, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001755620342365377, |
|
"loss": 0.6201, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00017552305821458638, |
|
"loss": 0.8204, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001754840821926351, |
|
"loss": 0.7272, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001754451061706838, |
|
"loss": 0.864, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001754061301487325, |
|
"loss": 0.9935, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017536715412678122, |
|
"loss": 0.8651, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017532817810482992, |
|
"loss": 0.8508, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001752892020828786, |
|
"loss": 0.863, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017525022606092733, |
|
"loss": 0.9272, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017521125003897603, |
|
"loss": 0.9609, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017517227401702475, |
|
"loss": 1.1736, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017513329799507342, |
|
"loss": 0.8571, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017509432197312214, |
|
"loss": 0.758, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017505534595117086, |
|
"loss": 1.0157, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017501636992921956, |
|
"loss": 0.762, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017497739390726826, |
|
"loss": 0.7206, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017493841788531695, |
|
"loss": 0.9902, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017489944186336567, |
|
"loss": 0.8943, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017486046584141437, |
|
"loss": 0.9721, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017482148981946306, |
|
"loss": 0.9522, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001747825137975118, |
|
"loss": 0.9819, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017474353777556048, |
|
"loss": 1.0563, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017470456175360918, |
|
"loss": 0.782, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001746655857316579, |
|
"loss": 0.9609, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001746266097097066, |
|
"loss": 0.9329, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001745876336877553, |
|
"loss": 0.7019, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00017454865766580401, |
|
"loss": 0.9395, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001745096816438527, |
|
"loss": 0.7248, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017447070562190143, |
|
"loss": 0.7116, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001744317295999501, |
|
"loss": 0.92, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017439275357799882, |
|
"loss": 0.8105, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017435377755604755, |
|
"loss": 0.8492, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017431480153409624, |
|
"loss": 0.8305, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00017427582551214494, |
|
"loss": 0.882, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016390108569805799, |
|
"loss": 40.3284, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016384647311968892, |
|
"loss": 41.3929, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001637918605413199, |
|
"loss": 40.5771, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016373724796295083, |
|
"loss": 40.1587, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001636826353845818, |
|
"loss": 38.4849, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016362802280621276, |
|
"loss": 40.8953, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001635734102278437, |
|
"loss": 41.1837, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00016351879764947463, |
|
"loss": 41.4111, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001634641850711056, |
|
"loss": 39.7779, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016340957249273653, |
|
"loss": 39.6051, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001633549599143675, |
|
"loss": 39.1987, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016330034733599844, |
|
"loss": 36.4834, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001632457347576294, |
|
"loss": 38.9442, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016319112217926034, |
|
"loss": 38.7699, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016313650960089128, |
|
"loss": 38.1662, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016308189702252224, |
|
"loss": 38.3107, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016302728444415318, |
|
"loss": 37.137, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016297267186578414, |
|
"loss": 39.8413, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016291805928741508, |
|
"loss": 37.834, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016286344670904602, |
|
"loss": 38.752, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016280883413067698, |
|
"loss": 38.9749, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016275422155230792, |
|
"loss": 37.0203, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016269960897393888, |
|
"loss": 37.8575, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016264499639556982, |
|
"loss": 36.1197, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001625903838172008, |
|
"loss": 38.9567, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00016253577123883172, |
|
"loss": 36.921, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001624811586604627, |
|
"loss": 37.7047, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016242654608209365, |
|
"loss": 37.8749, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001623719335037246, |
|
"loss": 36.0547, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016231732092535553, |
|
"loss": 35.7079, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001622627083469865, |
|
"loss": 35.5162, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016220809576861743, |
|
"loss": 35.6316, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001621534831902484, |
|
"loss": 37.1081, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016209887061187933, |
|
"loss": 35.4266, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001620442580335103, |
|
"loss": 35.8718, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016198964545514124, |
|
"loss": 34.2143, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016193503287677217, |
|
"loss": 34.0882, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016188042029840314, |
|
"loss": 32.8758, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016182580772003408, |
|
"loss": 32.0339, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016177119514166504, |
|
"loss": 31.4164, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016171658256329598, |
|
"loss": 31.8205, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016166196998492694, |
|
"loss": 32.6587, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016160735740655788, |
|
"loss": 31.8695, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00016155274482818882, |
|
"loss": 31.0461, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016149813224981978, |
|
"loss": 30.1198, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016144351967145072, |
|
"loss": 28.9032, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016138890709308169, |
|
"loss": 30.3631, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016133429451471265, |
|
"loss": 29.2617, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001612796819363436, |
|
"loss": 28.4782, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016122506935797455, |
|
"loss": 28.6378, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001611704567796055, |
|
"loss": 28.3341, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016111584420123645, |
|
"loss": 27.6153, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001610612316228674, |
|
"loss": 26.5044, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016100661904449833, |
|
"loss": 26.8876, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001609520064661293, |
|
"loss": 26.9291, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016089739388776023, |
|
"loss": 24.024, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001608427813093912, |
|
"loss": 24.5533, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016078816873102213, |
|
"loss": 24.6948, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001607335561526531, |
|
"loss": 22.2483, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016067894357428404, |
|
"loss": 21.7253, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016062433099591497, |
|
"loss": 20.7581, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016056971841754594, |
|
"loss": 18.6484, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00016051510583917688, |
|
"loss": 19.3484, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016046049326080784, |
|
"loss": 15.8305, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016040588068243878, |
|
"loss": 16.644, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016035126810406972, |
|
"loss": 16.1415, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016029665552570068, |
|
"loss": 16.2331, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016024204294733162, |
|
"loss": 13.7222, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016018743036896258, |
|
"loss": 13.1968, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016013281779059355, |
|
"loss": 13.7183, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016007820521222449, |
|
"loss": 13.6719, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00016002359263385545, |
|
"loss": 12.565, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001599689800554864, |
|
"loss": 11.7014, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015991436747711735, |
|
"loss": 11.9391, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001598597548987483, |
|
"loss": 10.8187, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015980514232037923, |
|
"loss": 9.9151, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001597505297420102, |
|
"loss": 8.8924, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015969591716364113, |
|
"loss": 7.7144, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001596413045852721, |
|
"loss": 6.7915, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00015958669200690303, |
|
"loss": 6.1585, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.000159532079428534, |
|
"loss": 6.4101, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015947746685016494, |
|
"loss": 4.6158, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015942285427179587, |
|
"loss": 4.76, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015936824169342684, |
|
"loss": 4.0994, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015931362911505778, |
|
"loss": 4.7396, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015925901653668874, |
|
"loss": 3.6542, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015920440395831968, |
|
"loss": 3.4333, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015914979137995064, |
|
"loss": 4.575, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015909517880158158, |
|
"loss": 3.3926, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015904056622321252, |
|
"loss": 3.3063, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015898595364484348, |
|
"loss": 2.9068, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015893134106647445, |
|
"loss": 2.9475, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015887672848810538, |
|
"loss": 2.94, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015882211590973635, |
|
"loss": 3.2924, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001587675033313673, |
|
"loss": 3.4012, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015871289075299825, |
|
"loss": 2.8093, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001586582781746292, |
|
"loss": 3.115, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00015860366559626015, |
|
"loss": 2.4926, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001585490530178911, |
|
"loss": 2.3319, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015849444043952203, |
|
"loss": 2.4095, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000158439827861153, |
|
"loss": 2.563, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015838521528278393, |
|
"loss": 2.5545, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001583306027044149, |
|
"loss": 2.0663, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015827599012604583, |
|
"loss": 2.0732, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001582213775476768, |
|
"loss": 2.9127, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015816676496930774, |
|
"loss": 2.2365, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015811215239093867, |
|
"loss": 3.9376, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015805753981256964, |
|
"loss": 2.0433, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015800292723420058, |
|
"loss": 2.1487, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015794831465583154, |
|
"loss": 1.8283, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015789370207746248, |
|
"loss": 1.5619, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015783908949909342, |
|
"loss": 1.6508, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015778447692072438, |
|
"loss": 1.8076, |
|
"step": 38650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015772986434235535, |
|
"loss": 1.5081, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001576752517639863, |
|
"loss": 1.7372, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015762063918561725, |
|
"loss": 1.504, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015756602660724819, |
|
"loss": 1.4685, |
|
"step": 38850 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00015751141402887915, |
|
"loss": 1.366, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001574568014505101, |
|
"loss": 1.3556, |
|
"step": 38950 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015740218887214105, |
|
"loss": 1.328, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.000157347576293772, |
|
"loss": 1.672, |
|
"step": 39050 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015729296371540293, |
|
"loss": 1.2776, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001572383511370339, |
|
"loss": 1.619, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015718373855866483, |
|
"loss": 1.4484, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001571291259802958, |
|
"loss": 1.4561, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015707451340192673, |
|
"loss": 1.5445, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001570199008235577, |
|
"loss": 1.6477, |
|
"step": 39350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015696528824518864, |
|
"loss": 1.483, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015691067566681957, |
|
"loss": 1.4913, |
|
"step": 39450 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015685606308845054, |
|
"loss": 1.2746, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015680145051008148, |
|
"loss": 1.4588, |
|
"step": 39550 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015674683793171244, |
|
"loss": 1.3793, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015669222535334338, |
|
"loss": 1.4776, |
|
"step": 39650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015663761277497434, |
|
"loss": 1.7906, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001565830001966053, |
|
"loss": 1.4083, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00015652838761823624, |
|
"loss": 1.5248, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001564737750398672, |
|
"loss": 1.2159, |
|
"step": 39850 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015641916246149815, |
|
"loss": 1.4073, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015636454988312908, |
|
"loss": 1.2702, |
|
"step": 39950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015630993730476005, |
|
"loss": 1.33, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000156255324726391, |
|
"loss": 1.4365, |
|
"step": 40050 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015620071214802195, |
|
"loss": 1.2484, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001561460995696529, |
|
"loss": 1.2985, |
|
"step": 40150 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015609148699128385, |
|
"loss": 1.3169, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001560368744129148, |
|
"loss": 1.2415, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015598226183454573, |
|
"loss": 1.0357, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001559276492561767, |
|
"loss": 1.3613, |
|
"step": 40350 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015587303667780763, |
|
"loss": 1.1524, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001558184240994386, |
|
"loss": 1.4132, |
|
"step": 40450 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00015576381152106953, |
|
"loss": 1.4276, |
|
"step": 40500 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 183108, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 2.90367192517632e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|