mt5-base-finetuned-llava-test / trainer_state.json
jonathanjordan21's picture
Upload folder using huggingface_hub
3a7db38
raw
history blame contribute delete
No virus
99.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.44236188478930466,
"eval_steps": 500,
"global_step": 40500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 0.00019996132455658605,
"loss": 42.0911,
"step": 50
},
{
"epoch": 0.0,
"learning_rate": 0.0001999226491131721,
"loss": 38.7852,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 0.00019988397366975814,
"loss": 31.5332,
"step": 150
},
{
"epoch": 0.0,
"learning_rate": 0.00019984529822634418,
"loss": 25.7456,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 0.0001998066227829302,
"loss": 20.5779,
"step": 250
},
{
"epoch": 0.0,
"learning_rate": 0.00019976794733951624,
"loss": 17.4712,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 0.00019972927189610228,
"loss": 14.2287,
"step": 350
},
{
"epoch": 0.0,
"learning_rate": 0.00019969059645268835,
"loss": 12.748,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 0.0001996519210092744,
"loss": 11.9603,
"step": 450
},
{
"epoch": 0.0,
"learning_rate": 0.00019961324556586044,
"loss": 12.6114,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 0.00019957457012244646,
"loss": 11.2424,
"step": 550
},
{
"epoch": 0.0,
"learning_rate": 0.0001995358946790325,
"loss": 10.4185,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 0.00019949721923561854,
"loss": 10.6495,
"step": 650
},
{
"epoch": 0.01,
"learning_rate": 0.00019945854379220458,
"loss": 8.6583,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 0.00019941986834879063,
"loss": 7.9045,
"step": 750
},
{
"epoch": 0.01,
"learning_rate": 0.00019938119290537667,
"loss": 7.5867,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 0.00019934251746196271,
"loss": 6.344,
"step": 850
},
{
"epoch": 0.01,
"learning_rate": 0.00019930384201854876,
"loss": 7.0004,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 0.0001992651665751348,
"loss": 6.0177,
"step": 950
},
{
"epoch": 0.01,
"learning_rate": 0.00019922649113172082,
"loss": 6.4546,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 0.00019918781568830686,
"loss": 4.9639,
"step": 1050
},
{
"epoch": 0.01,
"learning_rate": 0.0001991491402448929,
"loss": 4.5082,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 0.00019911046480147897,
"loss": 4.9607,
"step": 1150
},
{
"epoch": 0.01,
"learning_rate": 0.00019907178935806501,
"loss": 4.6557,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 0.00019903311391465106,
"loss": 3.7942,
"step": 1250
},
{
"epoch": 0.01,
"learning_rate": 0.0001989944384712371,
"loss": 3.3466,
"step": 1300
},
{
"epoch": 0.01,
"learning_rate": 0.00019895576302782312,
"loss": 4.2555,
"step": 1350
},
{
"epoch": 0.01,
"learning_rate": 0.00019891708758440916,
"loss": 3.7983,
"step": 1400
},
{
"epoch": 0.01,
"learning_rate": 0.0001988784121409952,
"loss": 3.6397,
"step": 1450
},
{
"epoch": 0.01,
"learning_rate": 0.00019883973669758125,
"loss": 3.2206,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 0.0001988010612541673,
"loss": 2.9513,
"step": 1550
},
{
"epoch": 0.01,
"learning_rate": 0.00019876238581075333,
"loss": 3.443,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 0.00019872371036733938,
"loss": 2.6171,
"step": 1650
},
{
"epoch": 0.01,
"learning_rate": 0.00019868503492392542,
"loss": 2.6626,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 0.00019864635948051143,
"loss": 3.2079,
"step": 1750
},
{
"epoch": 0.01,
"learning_rate": 0.00019860768403709748,
"loss": 2.679,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 0.00019856900859368352,
"loss": 3.2509,
"step": 1850
},
{
"epoch": 0.01,
"learning_rate": 0.0001985303331502696,
"loss": 2.3529,
"step": 1900
},
{
"epoch": 0.02,
"learning_rate": 0.00019849165770685563,
"loss": 2.3721,
"step": 1950
},
{
"epoch": 0.02,
"learning_rate": 0.00019845298226344168,
"loss": 2.7719,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 0.00019841430682002772,
"loss": 2.3059,
"step": 2050
},
{
"epoch": 0.02,
"learning_rate": 0.00019837563137661374,
"loss": 2.9214,
"step": 2100
},
{
"epoch": 0.02,
"learning_rate": 0.00019833695593319978,
"loss": 2.4541,
"step": 2150
},
{
"epoch": 0.02,
"learning_rate": 0.00019829828048978582,
"loss": 2.3267,
"step": 2200
},
{
"epoch": 0.02,
"learning_rate": 0.00019825960504637186,
"loss": 2.1945,
"step": 2250
},
{
"epoch": 0.02,
"learning_rate": 0.0001982209296029579,
"loss": 2.3966,
"step": 2300
},
{
"epoch": 0.02,
"learning_rate": 0.00019818225415954395,
"loss": 2.5349,
"step": 2350
},
{
"epoch": 0.02,
"learning_rate": 0.00019814357871613,
"loss": 2.0588,
"step": 2400
},
{
"epoch": 0.02,
"learning_rate": 0.00019810490327271604,
"loss": 3.1209,
"step": 2450
},
{
"epoch": 0.02,
"learning_rate": 0.00019806622782930205,
"loss": 2.3281,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 0.0001980275523858881,
"loss": 2.0749,
"step": 2550
},
{
"epoch": 0.02,
"learning_rate": 0.00019798887694247414,
"loss": 2.1665,
"step": 2600
},
{
"epoch": 0.02,
"learning_rate": 0.0001979502014990602,
"loss": 2.5256,
"step": 2650
},
{
"epoch": 0.02,
"learning_rate": 0.00019791152605564625,
"loss": 2.3435,
"step": 2700
},
{
"epoch": 0.02,
"learning_rate": 0.0001978728506122323,
"loss": 2.2333,
"step": 2750
},
{
"epoch": 0.02,
"learning_rate": 0.00019783417516881834,
"loss": 1.9695,
"step": 2800
},
{
"epoch": 0.02,
"learning_rate": 0.00019779549972540435,
"loss": 2.3046,
"step": 2850
},
{
"epoch": 0.02,
"learning_rate": 0.0001977568242819904,
"loss": 2.1951,
"step": 2900
},
{
"epoch": 0.02,
"learning_rate": 0.00019771814883857644,
"loss": 2.2141,
"step": 2950
},
{
"epoch": 0.02,
"learning_rate": 0.00019767947339516248,
"loss": 2.3285,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 0.00019764079795174853,
"loss": 1.9263,
"step": 3050
},
{
"epoch": 0.02,
"learning_rate": 0.00019760212250833457,
"loss": 2.4391,
"step": 3100
},
{
"epoch": 0.02,
"learning_rate": 0.0001975634470649206,
"loss": 2.2386,
"step": 3150
},
{
"epoch": 0.02,
"learning_rate": 0.00019752477162150665,
"loss": 1.9979,
"step": 3200
},
{
"epoch": 0.03,
"learning_rate": 0.00019748609617809267,
"loss": 2.2926,
"step": 3250
},
{
"epoch": 0.03,
"learning_rate": 0.0001974474207346787,
"loss": 2.0263,
"step": 3300
},
{
"epoch": 0.03,
"learning_rate": 0.00019740874529126476,
"loss": 2.3533,
"step": 3350
},
{
"epoch": 0.03,
"learning_rate": 0.00019737006984785083,
"loss": 2.0248,
"step": 3400
},
{
"epoch": 0.03,
"learning_rate": 0.00019733139440443687,
"loss": 1.5322,
"step": 3450
},
{
"epoch": 0.03,
"learning_rate": 0.0001972927189610229,
"loss": 1.2563,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 0.00019725404351760896,
"loss": 1.2361,
"step": 3550
},
{
"epoch": 0.03,
"learning_rate": 0.00019721536807419497,
"loss": 1.3821,
"step": 3600
},
{
"epoch": 0.03,
"learning_rate": 0.00019717669263078101,
"loss": 1.0988,
"step": 3650
},
{
"epoch": 0.03,
"learning_rate": 0.00019713801718736706,
"loss": 1.2244,
"step": 3700
},
{
"epoch": 0.03,
"learning_rate": 0.0001970993417439531,
"loss": 0.9095,
"step": 3750
},
{
"epoch": 0.03,
"learning_rate": 0.00019706066630053914,
"loss": 1.2458,
"step": 3800
},
{
"epoch": 0.03,
"learning_rate": 0.0001970219908571252,
"loss": 1.1168,
"step": 3850
},
{
"epoch": 0.03,
"learning_rate": 0.00019698331541371123,
"loss": 0.7974,
"step": 3900
},
{
"epoch": 0.03,
"learning_rate": 0.00019694463997029727,
"loss": 1.0594,
"step": 3950
},
{
"epoch": 0.03,
"learning_rate": 0.0001969059645268833,
"loss": 1.2522,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 0.00019686728908346933,
"loss": 0.8916,
"step": 4050
},
{
"epoch": 0.03,
"learning_rate": 0.0001968286136400554,
"loss": 0.9284,
"step": 4100
},
{
"epoch": 0.03,
"learning_rate": 0.00019678993819664144,
"loss": 0.7177,
"step": 4150
},
{
"epoch": 0.03,
"learning_rate": 0.0001967512627532275,
"loss": 1.0662,
"step": 4200
},
{
"epoch": 0.03,
"learning_rate": 0.00019671258730981353,
"loss": 1.0509,
"step": 4250
},
{
"epoch": 0.03,
"learning_rate": 0.00019667391186639957,
"loss": 1.0486,
"step": 4300
},
{
"epoch": 0.03,
"learning_rate": 0.0001966352364229856,
"loss": 0.9541,
"step": 4350
},
{
"epoch": 0.03,
"learning_rate": 0.00019659656097957163,
"loss": 1.1056,
"step": 4400
},
{
"epoch": 0.03,
"learning_rate": 0.00019655788553615768,
"loss": 1.0613,
"step": 4450
},
{
"epoch": 0.03,
"learning_rate": 0.00019651921009274372,
"loss": 0.9647,
"step": 4500
},
{
"epoch": 0.04,
"learning_rate": 0.00019648053464932976,
"loss": 0.8281,
"step": 4550
},
{
"epoch": 0.04,
"learning_rate": 0.0001964418592059158,
"loss": 0.8205,
"step": 4600
},
{
"epoch": 0.04,
"learning_rate": 0.00019640318376250185,
"loss": 0.8107,
"step": 4650
},
{
"epoch": 0.04,
"learning_rate": 0.0001963645083190879,
"loss": 0.9087,
"step": 4700
},
{
"epoch": 0.04,
"learning_rate": 0.00019632583287567393,
"loss": 0.7476,
"step": 4750
},
{
"epoch": 0.04,
"learning_rate": 0.00019628715743225995,
"loss": 0.8191,
"step": 4800
},
{
"epoch": 0.04,
"learning_rate": 0.00019624848198884602,
"loss": 1.0138,
"step": 4850
},
{
"epoch": 0.04,
"learning_rate": 0.00019620980654543206,
"loss": 1.0121,
"step": 4900
},
{
"epoch": 0.04,
"learning_rate": 0.0001961711311020181,
"loss": 0.7376,
"step": 4950
},
{
"epoch": 0.04,
"learning_rate": 0.00019613245565860415,
"loss": 0.8335,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 0.0001960937802151902,
"loss": 0.9411,
"step": 5050
},
{
"epoch": 0.04,
"learning_rate": 0.0001960551047717762,
"loss": 0.8631,
"step": 5100
},
{
"epoch": 0.04,
"learning_rate": 0.00019601642932836225,
"loss": 0.936,
"step": 5150
},
{
"epoch": 0.04,
"learning_rate": 0.0001959777538849483,
"loss": 0.8524,
"step": 5200
},
{
"epoch": 0.04,
"learning_rate": 0.00019593907844153434,
"loss": 0.7093,
"step": 5250
},
{
"epoch": 0.04,
"learning_rate": 0.00019590040299812038,
"loss": 0.8302,
"step": 5300
},
{
"epoch": 0.04,
"learning_rate": 0.00019586172755470642,
"loss": 0.8756,
"step": 5350
},
{
"epoch": 0.04,
"learning_rate": 0.00019582305211129247,
"loss": 0.7728,
"step": 5400
},
{
"epoch": 0.04,
"learning_rate": 0.0001957843766678785,
"loss": 0.809,
"step": 5450
},
{
"epoch": 0.04,
"learning_rate": 0.00019574570122446455,
"loss": 0.7282,
"step": 5500
},
{
"epoch": 0.04,
"learning_rate": 0.00019570702578105057,
"loss": 0.8608,
"step": 5550
},
{
"epoch": 0.04,
"learning_rate": 0.00019566835033763664,
"loss": 1.0078,
"step": 5600
},
{
"epoch": 0.04,
"learning_rate": 0.00019562967489422268,
"loss": 0.9773,
"step": 5650
},
{
"epoch": 0.04,
"learning_rate": 0.00019559099945080872,
"loss": 0.7969,
"step": 5700
},
{
"epoch": 0.04,
"learning_rate": 0.00019555232400739477,
"loss": 0.9988,
"step": 5750
},
{
"epoch": 0.04,
"learning_rate": 0.0001955136485639808,
"loss": 0.7552,
"step": 5800
},
{
"epoch": 0.05,
"learning_rate": 0.00019547497312056685,
"loss": 0.8775,
"step": 5850
},
{
"epoch": 0.05,
"learning_rate": 0.00019543629767715287,
"loss": 0.8895,
"step": 5900
},
{
"epoch": 0.05,
"learning_rate": 0.0001953976222337389,
"loss": 0.8106,
"step": 5950
},
{
"epoch": 0.05,
"learning_rate": 0.00019535894679032496,
"loss": 0.6546,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 0.000195320271346911,
"loss": 0.9039,
"step": 6050
},
{
"epoch": 0.05,
"learning_rate": 0.00019528159590349704,
"loss": 0.652,
"step": 6100
},
{
"epoch": 0.05,
"learning_rate": 0.00019524292046008308,
"loss": 0.6561,
"step": 6150
},
{
"epoch": 0.05,
"learning_rate": 0.00019520424501666913,
"loss": 1.046,
"step": 6200
},
{
"epoch": 0.05,
"learning_rate": 0.00019516556957325517,
"loss": 0.8783,
"step": 6250
},
{
"epoch": 0.05,
"learning_rate": 0.00019512689412984119,
"loss": 0.7351,
"step": 6300
},
{
"epoch": 0.05,
"learning_rate": 0.00019508821868642726,
"loss": 0.733,
"step": 6350
},
{
"epoch": 0.05,
"learning_rate": 0.0001950495432430133,
"loss": 0.7675,
"step": 6400
},
{
"epoch": 0.05,
"learning_rate": 0.00019501086779959934,
"loss": 0.9451,
"step": 6450
},
{
"epoch": 0.05,
"learning_rate": 0.00019497219235618539,
"loss": 0.9686,
"step": 6500
},
{
"epoch": 0.05,
"learning_rate": 0.00019493351691277143,
"loss": 0.6083,
"step": 6550
},
{
"epoch": 0.05,
"learning_rate": 0.00019489484146935747,
"loss": 0.8619,
"step": 6600
},
{
"epoch": 0.05,
"learning_rate": 0.0001948561660259435,
"loss": 0.6557,
"step": 6650
},
{
"epoch": 0.05,
"learning_rate": 0.00019481749058252953,
"loss": 0.8819,
"step": 6700
},
{
"epoch": 0.05,
"learning_rate": 0.00019477881513911557,
"loss": 0.8356,
"step": 6750
},
{
"epoch": 0.05,
"learning_rate": 0.00019474013969570162,
"loss": 0.8211,
"step": 6800
},
{
"epoch": 0.05,
"learning_rate": 0.00019470146425228766,
"loss": 0.8393,
"step": 6850
},
{
"epoch": 0.05,
"learning_rate": 0.0001946627888088737,
"loss": 1.0301,
"step": 6900
},
{
"epoch": 0.05,
"learning_rate": 0.00019462411336545975,
"loss": 0.7435,
"step": 6950
},
{
"epoch": 0.05,
"learning_rate": 0.0001945854379220458,
"loss": 0.71,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 0.0001945467624786318,
"loss": 0.7786,
"step": 7050
},
{
"epoch": 0.05,
"learning_rate": 0.00019450808703521787,
"loss": 1.1273,
"step": 7100
},
{
"epoch": 0.06,
"learning_rate": 0.00019446941159180392,
"loss": 0.923,
"step": 7150
},
{
"epoch": 0.06,
"learning_rate": 0.00019443073614838996,
"loss": 0.8656,
"step": 7200
},
{
"epoch": 0.06,
"learning_rate": 0.000194392060704976,
"loss": 0.8191,
"step": 7250
},
{
"epoch": 0.06,
"learning_rate": 0.00019435338526156205,
"loss": 0.8924,
"step": 7300
},
{
"epoch": 0.06,
"learning_rate": 0.0001943147098181481,
"loss": 0.9004,
"step": 7350
},
{
"epoch": 0.06,
"learning_rate": 0.0001942760343747341,
"loss": 0.6538,
"step": 7400
},
{
"epoch": 0.06,
"learning_rate": 0.00019423735893132015,
"loss": 0.8669,
"step": 7450
},
{
"epoch": 0.06,
"learning_rate": 0.0001941986834879062,
"loss": 0.9103,
"step": 7500
},
{
"epoch": 0.06,
"learning_rate": 0.00019416000804449223,
"loss": 0.8853,
"step": 7550
},
{
"epoch": 0.06,
"learning_rate": 0.00019412133260107828,
"loss": 0.7989,
"step": 7600
},
{
"epoch": 0.06,
"learning_rate": 0.00019408265715766432,
"loss": 0.6957,
"step": 7650
},
{
"epoch": 0.06,
"learning_rate": 0.00019404398171425036,
"loss": 0.8685,
"step": 7700
},
{
"epoch": 0.06,
"learning_rate": 0.0001940053062708364,
"loss": 0.6701,
"step": 7750
},
{
"epoch": 0.06,
"learning_rate": 0.00019396663082742242,
"loss": 0.7488,
"step": 7800
},
{
"epoch": 0.06,
"learning_rate": 0.0001939279553840085,
"loss": 0.9214,
"step": 7850
},
{
"epoch": 0.06,
"learning_rate": 0.00019388927994059454,
"loss": 0.7879,
"step": 7900
},
{
"epoch": 0.06,
"learning_rate": 0.00019385060449718058,
"loss": 0.8522,
"step": 7950
},
{
"epoch": 0.06,
"learning_rate": 0.00019381192905376662,
"loss": 0.9119,
"step": 8000
},
{
"epoch": 0.06,
"learning_rate": 0.00019377325361035267,
"loss": 0.6229,
"step": 8050
},
{
"epoch": 0.06,
"learning_rate": 0.0001937345781669387,
"loss": 0.8,
"step": 8100
},
{
"epoch": 0.06,
"learning_rate": 0.00019369590272352472,
"loss": 0.6705,
"step": 8150
},
{
"epoch": 0.06,
"learning_rate": 0.00019365722728011077,
"loss": 0.8694,
"step": 8200
},
{
"epoch": 0.06,
"learning_rate": 0.0001936185518366968,
"loss": 0.7932,
"step": 8250
},
{
"epoch": 0.06,
"learning_rate": 0.00019357987639328285,
"loss": 0.7311,
"step": 8300
},
{
"epoch": 0.06,
"learning_rate": 0.0001935412009498689,
"loss": 0.844,
"step": 8350
},
{
"epoch": 0.06,
"learning_rate": 0.00019350252550645494,
"loss": 0.8428,
"step": 8400
},
{
"epoch": 0.07,
"learning_rate": 0.00019346385006304098,
"loss": 0.8791,
"step": 8450
},
{
"epoch": 0.07,
"learning_rate": 0.00019342517461962703,
"loss": 0.9576,
"step": 8500
},
{
"epoch": 0.07,
"learning_rate": 0.00019338649917621304,
"loss": 0.821,
"step": 8550
},
{
"epoch": 0.07,
"learning_rate": 0.0001933478237327991,
"loss": 1.0343,
"step": 8600
},
{
"epoch": 0.07,
"learning_rate": 0.00019330914828938515,
"loss": 0.862,
"step": 8650
},
{
"epoch": 0.07,
"learning_rate": 0.0001932704728459712,
"loss": 0.6914,
"step": 8700
},
{
"epoch": 0.07,
"learning_rate": 0.00019323179740255724,
"loss": 1.0047,
"step": 8750
},
{
"epoch": 0.07,
"learning_rate": 0.00019319312195914328,
"loss": 0.7347,
"step": 8800
},
{
"epoch": 0.07,
"learning_rate": 0.00019315444651572933,
"loss": 0.7331,
"step": 8850
},
{
"epoch": 0.07,
"learning_rate": 0.00019311577107231534,
"loss": 0.9639,
"step": 8900
},
{
"epoch": 0.07,
"learning_rate": 0.00019307709562890139,
"loss": 0.7824,
"step": 8950
},
{
"epoch": 0.07,
"learning_rate": 0.00019303842018548743,
"loss": 0.8321,
"step": 9000
},
{
"epoch": 0.07,
"learning_rate": 0.00019299974474207347,
"loss": 1.053,
"step": 9050
},
{
"epoch": 0.07,
"learning_rate": 0.00019296106929865951,
"loss": 0.677,
"step": 9100
},
{
"epoch": 0.07,
"learning_rate": 0.00019292239385524556,
"loss": 0.8771,
"step": 9150
},
{
"epoch": 0.07,
"learning_rate": 0.0001928837184118316,
"loss": 0.7547,
"step": 9200
},
{
"epoch": 0.07,
"learning_rate": 0.00019284504296841764,
"loss": 0.7911,
"step": 9250
},
{
"epoch": 0.07,
"learning_rate": 0.00019280636752500366,
"loss": 0.8772,
"step": 9300
},
{
"epoch": 0.07,
"learning_rate": 0.00019276769208158973,
"loss": 1.0254,
"step": 9350
},
{
"epoch": 0.07,
"learning_rate": 0.00019272901663817577,
"loss": 0.9881,
"step": 9400
},
{
"epoch": 0.07,
"learning_rate": 0.00019269034119476182,
"loss": 0.9809,
"step": 9450
},
{
"epoch": 0.07,
"learning_rate": 0.00019265166575134786,
"loss": 0.6407,
"step": 9500
},
{
"epoch": 0.07,
"learning_rate": 0.0001926129903079339,
"loss": 0.8552,
"step": 9550
},
{
"epoch": 0.07,
"learning_rate": 0.00019257431486451994,
"loss": 0.5715,
"step": 9600
},
{
"epoch": 0.07,
"learning_rate": 0.00019253563942110596,
"loss": 0.7908,
"step": 9650
},
{
"epoch": 0.08,
"learning_rate": 0.000192496963977692,
"loss": 0.8544,
"step": 9700
},
{
"epoch": 0.08,
"learning_rate": 0.00019245828853427805,
"loss": 0.7795,
"step": 9750
},
{
"epoch": 0.08,
"learning_rate": 0.0001924196130908641,
"loss": 0.7534,
"step": 9800
},
{
"epoch": 0.08,
"learning_rate": 0.00019238093764745013,
"loss": 0.9141,
"step": 9850
},
{
"epoch": 0.08,
"learning_rate": 0.00019234226220403618,
"loss": 0.6377,
"step": 9900
},
{
"epoch": 0.08,
"learning_rate": 0.00019230358676062222,
"loss": 0.8392,
"step": 9950
},
{
"epoch": 0.08,
"learning_rate": 0.00019226491131720826,
"loss": 0.8541,
"step": 10000
},
{
"epoch": 0.08,
"learning_rate": 0.0001922262358737943,
"loss": 0.7969,
"step": 10050
},
{
"epoch": 0.08,
"learning_rate": 0.00019218756043038035,
"loss": 0.6434,
"step": 10100
},
{
"epoch": 0.08,
"learning_rate": 0.0001921488849869664,
"loss": 0.9645,
"step": 10150
},
{
"epoch": 0.08,
"learning_rate": 0.00019211020954355243,
"loss": 0.8545,
"step": 10200
},
{
"epoch": 0.08,
"learning_rate": 0.00019207153410013848,
"loss": 0.669,
"step": 10250
},
{
"epoch": 0.08,
"learning_rate": 0.00019203285865672452,
"loss": 0.7878,
"step": 10300
},
{
"epoch": 0.08,
"learning_rate": 0.00019199418321331056,
"loss": 0.6872,
"step": 10350
},
{
"epoch": 0.08,
"learning_rate": 0.00019195550776989658,
"loss": 0.7578,
"step": 10400
},
{
"epoch": 0.08,
"learning_rate": 0.00019191683232648262,
"loss": 0.6626,
"step": 10450
},
{
"epoch": 0.08,
"learning_rate": 0.00019187815688306866,
"loss": 0.7433,
"step": 10500
},
{
"epoch": 0.08,
"learning_rate": 0.0001918394814396547,
"loss": 0.8421,
"step": 10550
},
{
"epoch": 0.08,
"learning_rate": 0.00019180080599624075,
"loss": 0.8302,
"step": 10600
},
{
"epoch": 0.08,
"learning_rate": 0.0001917621305528268,
"loss": 0.7689,
"step": 10650
},
{
"epoch": 0.08,
"learning_rate": 0.00019172345510941284,
"loss": 0.7695,
"step": 10700
},
{
"epoch": 0.08,
"learning_rate": 0.00019168477966599888,
"loss": 0.8601,
"step": 10750
},
{
"epoch": 0.08,
"learning_rate": 0.00019164610422258492,
"loss": 1.0576,
"step": 10800
},
{
"epoch": 0.08,
"learning_rate": 0.00019160742877917097,
"loss": 0.6168,
"step": 10850
},
{
"epoch": 0.08,
"learning_rate": 0.000191568753335757,
"loss": 0.8053,
"step": 10900
},
{
"epoch": 0.08,
"learning_rate": 0.00019153007789234305,
"loss": 0.6034,
"step": 10950
},
{
"epoch": 0.09,
"learning_rate": 0.0001914914024489291,
"loss": 0.8146,
"step": 11000
},
{
"epoch": 0.09,
"learning_rate": 0.00019145272700551514,
"loss": 0.813,
"step": 11050
},
{
"epoch": 0.09,
"learning_rate": 0.00019141405156210118,
"loss": 0.7254,
"step": 11100
},
{
"epoch": 0.09,
"learning_rate": 0.00019137537611868722,
"loss": 0.8516,
"step": 11150
},
{
"epoch": 0.09,
"learning_rate": 0.00019133670067527324,
"loss": 0.8619,
"step": 11200
},
{
"epoch": 0.09,
"learning_rate": 0.00019129802523185928,
"loss": 0.8323,
"step": 11250
},
{
"epoch": 0.09,
"learning_rate": 0.00019125934978844533,
"loss": 0.802,
"step": 11300
},
{
"epoch": 0.09,
"learning_rate": 0.00019122067434503137,
"loss": 0.9431,
"step": 11350
},
{
"epoch": 0.09,
"learning_rate": 0.0001911819989016174,
"loss": 0.833,
"step": 11400
},
{
"epoch": 0.09,
"learning_rate": 0.00019114332345820345,
"loss": 0.8785,
"step": 11450
},
{
"epoch": 0.09,
"learning_rate": 0.0001911046480147895,
"loss": 0.894,
"step": 11500
},
{
"epoch": 0.09,
"learning_rate": 0.00019106597257137554,
"loss": 0.9139,
"step": 11550
},
{
"epoch": 0.09,
"learning_rate": 0.00019102729712796158,
"loss": 0.7368,
"step": 11600
},
{
"epoch": 0.09,
"learning_rate": 0.00019098862168454763,
"loss": 0.8103,
"step": 11650
},
{
"epoch": 0.09,
"learning_rate": 0.00019094994624113367,
"loss": 0.8547,
"step": 11700
},
{
"epoch": 0.09,
"learning_rate": 0.0001909112707977197,
"loss": 0.7595,
"step": 11750
},
{
"epoch": 0.09,
"learning_rate": 0.00019087259535430576,
"loss": 0.7432,
"step": 11800
},
{
"epoch": 0.09,
"learning_rate": 0.0001908339199108918,
"loss": 0.8002,
"step": 11850
},
{
"epoch": 0.09,
"learning_rate": 0.00019079524446747784,
"loss": 0.7238,
"step": 11900
},
{
"epoch": 0.09,
"learning_rate": 0.00019075656902406386,
"loss": 0.7368,
"step": 11950
},
{
"epoch": 0.09,
"learning_rate": 0.0001907178935806499,
"loss": 0.7619,
"step": 12000
},
{
"epoch": 0.09,
"learning_rate": 0.00019067921813723594,
"loss": 0.8555,
"step": 12050
},
{
"epoch": 0.09,
"learning_rate": 0.000190640542693822,
"loss": 0.7968,
"step": 12100
},
{
"epoch": 0.09,
"learning_rate": 0.00019060186725040803,
"loss": 0.6167,
"step": 12150
},
{
"epoch": 0.09,
"learning_rate": 0.00019056319180699407,
"loss": 0.7268,
"step": 12200
},
{
"epoch": 0.09,
"learning_rate": 0.00019052451636358012,
"loss": 0.7624,
"step": 12250
},
{
"epoch": 0.1,
"learning_rate": 0.00019048584092016616,
"loss": 0.7498,
"step": 12300
},
{
"epoch": 0.1,
"learning_rate": 0.0001904471654767522,
"loss": 0.7913,
"step": 12350
},
{
"epoch": 0.1,
"learning_rate": 0.00019040849003333825,
"loss": 0.9818,
"step": 12400
},
{
"epoch": 0.1,
"learning_rate": 0.0001903698145899243,
"loss": 0.9653,
"step": 12450
},
{
"epoch": 0.1,
"learning_rate": 0.00019033113914651033,
"loss": 0.8576,
"step": 12500
},
{
"epoch": 0.1,
"learning_rate": 0.00019029246370309637,
"loss": 0.7793,
"step": 12550
},
{
"epoch": 0.1,
"learning_rate": 0.00019025378825968242,
"loss": 0.9986,
"step": 12600
},
{
"epoch": 0.1,
"learning_rate": 0.00019021511281626846,
"loss": 0.6204,
"step": 12650
},
{
"epoch": 0.1,
"learning_rate": 0.00019017643737285448,
"loss": 0.7432,
"step": 12700
},
{
"epoch": 0.1,
"learning_rate": 0.00019013776192944052,
"loss": 0.7728,
"step": 12750
},
{
"epoch": 0.1,
"learning_rate": 0.00019009908648602656,
"loss": 0.924,
"step": 12800
},
{
"epoch": 0.1,
"learning_rate": 0.0001900604110426126,
"loss": 0.8306,
"step": 12850
},
{
"epoch": 0.1,
"learning_rate": 0.00019002173559919865,
"loss": 0.9331,
"step": 12900
},
{
"epoch": 0.1,
"learning_rate": 0.0001899830601557847,
"loss": 0.9156,
"step": 12950
},
{
"epoch": 0.1,
"learning_rate": 0.00018994438471237073,
"loss": 0.7475,
"step": 13000
},
{
"epoch": 0.1,
"learning_rate": 0.00018990570926895678,
"loss": 0.8014,
"step": 13050
},
{
"epoch": 0.1,
"learning_rate": 0.00018986703382554282,
"loss": 0.7636,
"step": 13100
},
{
"epoch": 0.1,
"learning_rate": 0.00018982835838212886,
"loss": 0.8878,
"step": 13150
},
{
"epoch": 0.1,
"learning_rate": 0.0001897896829387149,
"loss": 0.7146,
"step": 13200
},
{
"epoch": 0.1,
"learning_rate": 0.00018975100749530095,
"loss": 0.7577,
"step": 13250
},
{
"epoch": 0.1,
"learning_rate": 0.000189712332051887,
"loss": 0.9388,
"step": 13300
},
{
"epoch": 0.1,
"learning_rate": 0.00018967365660847304,
"loss": 0.7735,
"step": 13350
},
{
"epoch": 0.1,
"learning_rate": 0.00018963498116505908,
"loss": 0.6801,
"step": 13400
},
{
"epoch": 0.1,
"learning_rate": 0.0001895963057216451,
"loss": 0.7908,
"step": 13450
},
{
"epoch": 0.1,
"learning_rate": 0.00018955763027823114,
"loss": 0.7054,
"step": 13500
},
{
"epoch": 0.1,
"learning_rate": 0.00018951895483481718,
"loss": 0.8082,
"step": 13550
},
{
"epoch": 0.11,
"learning_rate": 0.00018948027939140322,
"loss": 0.7959,
"step": 13600
},
{
"epoch": 0.11,
"learning_rate": 0.00018944160394798927,
"loss": 0.8319,
"step": 13650
},
{
"epoch": 0.11,
"learning_rate": 0.0001894029285045753,
"loss": 0.7559,
"step": 13700
},
{
"epoch": 0.11,
"learning_rate": 0.00018936425306116135,
"loss": 0.6439,
"step": 13750
},
{
"epoch": 0.11,
"learning_rate": 0.0001893255776177474,
"loss": 0.7906,
"step": 13800
},
{
"epoch": 0.11,
"learning_rate": 0.00018928690217433344,
"loss": 0.9517,
"step": 13850
},
{
"epoch": 0.11,
"learning_rate": 0.00018924822673091948,
"loss": 0.8082,
"step": 13900
},
{
"epoch": 0.11,
"learning_rate": 0.00018920955128750552,
"loss": 0.8872,
"step": 13950
},
{
"epoch": 0.11,
"learning_rate": 0.00018917087584409157,
"loss": 0.6533,
"step": 14000
},
{
"epoch": 0.11,
"learning_rate": 0.0001891322004006776,
"loss": 0.8846,
"step": 14050
},
{
"epoch": 0.11,
"learning_rate": 0.00018909352495726365,
"loss": 0.7644,
"step": 14100
},
{
"epoch": 0.11,
"learning_rate": 0.0001890548495138497,
"loss": 0.9197,
"step": 14150
},
{
"epoch": 0.11,
"learning_rate": 0.0001890161740704357,
"loss": 0.8356,
"step": 14200
},
{
"epoch": 0.11,
"learning_rate": 0.00018897749862702176,
"loss": 0.7626,
"step": 14250
},
{
"epoch": 0.11,
"learning_rate": 0.0001889388231836078,
"loss": 0.7978,
"step": 14300
},
{
"epoch": 0.11,
"learning_rate": 0.00018890014774019384,
"loss": 0.9382,
"step": 14350
},
{
"epoch": 0.11,
"learning_rate": 0.00018886147229677988,
"loss": 0.8213,
"step": 14400
},
{
"epoch": 0.11,
"learning_rate": 0.00018882279685336593,
"loss": 0.8098,
"step": 14450
},
{
"epoch": 0.11,
"learning_rate": 0.00018878412140995197,
"loss": 0.6624,
"step": 14500
},
{
"epoch": 0.11,
"learning_rate": 0.00018874544596653801,
"loss": 0.6901,
"step": 14550
},
{
"epoch": 0.11,
"learning_rate": 0.00018870677052312406,
"loss": 0.8449,
"step": 14600
},
{
"epoch": 0.11,
"learning_rate": 0.0001886680950797101,
"loss": 0.992,
"step": 14650
},
{
"epoch": 0.11,
"learning_rate": 0.00018862941963629614,
"loss": 0.8163,
"step": 14700
},
{
"epoch": 0.11,
"learning_rate": 0.00018859074419288219,
"loss": 0.9079,
"step": 14750
},
{
"epoch": 0.11,
"learning_rate": 0.00018855206874946823,
"loss": 0.6967,
"step": 14800
},
{
"epoch": 0.11,
"learning_rate": 0.00018851339330605427,
"loss": 0.7634,
"step": 14850
},
{
"epoch": 0.12,
"learning_rate": 0.00018847471786264032,
"loss": 0.881,
"step": 14900
},
{
"epoch": 0.12,
"learning_rate": 0.00018843604241922633,
"loss": 0.9108,
"step": 14950
},
{
"epoch": 0.12,
"learning_rate": 0.00018839736697581237,
"loss": 0.7132,
"step": 15000
},
{
"epoch": 0.12,
"learning_rate": 0.00018835869153239842,
"loss": 0.5067,
"step": 15050
},
{
"epoch": 0.12,
"learning_rate": 0.00018832001608898446,
"loss": 1.1357,
"step": 15100
},
{
"epoch": 0.12,
"learning_rate": 0.0001882813406455705,
"loss": 0.7256,
"step": 15150
},
{
"epoch": 0.12,
"learning_rate": 0.00018824266520215655,
"loss": 0.6846,
"step": 15200
},
{
"epoch": 0.12,
"learning_rate": 0.0001882039897587426,
"loss": 0.8358,
"step": 15250
},
{
"epoch": 0.12,
"learning_rate": 0.00018816531431532863,
"loss": 0.7776,
"step": 15300
},
{
"epoch": 0.12,
"learning_rate": 0.00018812663887191468,
"loss": 0.5573,
"step": 15350
},
{
"epoch": 0.12,
"learning_rate": 0.00018808796342850072,
"loss": 0.6548,
"step": 15400
},
{
"epoch": 0.12,
"learning_rate": 0.00018804928798508676,
"loss": 0.7813,
"step": 15450
},
{
"epoch": 0.12,
"learning_rate": 0.0001880106125416728,
"loss": 0.853,
"step": 15500
},
{
"epoch": 0.12,
"learning_rate": 0.00018797193709825885,
"loss": 0.757,
"step": 15550
},
{
"epoch": 0.12,
"learning_rate": 0.0001879332616548449,
"loss": 0.7511,
"step": 15600
},
{
"epoch": 0.12,
"learning_rate": 0.00018789458621143093,
"loss": 0.8809,
"step": 15650
},
{
"epoch": 0.12,
"learning_rate": 0.00018785591076801698,
"loss": 0.6439,
"step": 15700
},
{
"epoch": 0.12,
"learning_rate": 0.000187817235324603,
"loss": 0.6401,
"step": 15750
},
{
"epoch": 0.12,
"learning_rate": 0.00018777855988118904,
"loss": 0.9463,
"step": 15800
},
{
"epoch": 0.12,
"learning_rate": 0.00018773988443777508,
"loss": 0.7206,
"step": 15850
},
{
"epoch": 0.12,
"learning_rate": 0.00018770120899436112,
"loss": 0.738,
"step": 15900
},
{
"epoch": 0.12,
"learning_rate": 0.00018766253355094716,
"loss": 0.8078,
"step": 15950
},
{
"epoch": 0.12,
"learning_rate": 0.0001876238581075332,
"loss": 0.8814,
"step": 16000
},
{
"epoch": 0.12,
"learning_rate": 0.00018758518266411925,
"loss": 0.7841,
"step": 16050
},
{
"epoch": 0.12,
"learning_rate": 0.0001875465072207053,
"loss": 0.9534,
"step": 16100
},
{
"epoch": 0.12,
"learning_rate": 0.00018750783177729134,
"loss": 0.7588,
"step": 16150
},
{
"epoch": 0.13,
"learning_rate": 0.00018746915633387738,
"loss": 0.7467,
"step": 16200
},
{
"epoch": 0.13,
"learning_rate": 0.00018743048089046342,
"loss": 0.7402,
"step": 16250
},
{
"epoch": 0.13,
"learning_rate": 0.00018739180544704947,
"loss": 0.7391,
"step": 16300
},
{
"epoch": 0.13,
"learning_rate": 0.0001873531300036355,
"loss": 0.93,
"step": 16350
},
{
"epoch": 0.13,
"learning_rate": 0.00018731445456022155,
"loss": 0.673,
"step": 16400
},
{
"epoch": 0.13,
"learning_rate": 0.0001872757791168076,
"loss": 0.8719,
"step": 16450
},
{
"epoch": 0.13,
"learning_rate": 0.0001872371036733936,
"loss": 0.7977,
"step": 16500
},
{
"epoch": 0.13,
"learning_rate": 0.00018719842822997965,
"loss": 0.8446,
"step": 16550
},
{
"epoch": 0.13,
"learning_rate": 0.0001871597527865657,
"loss": 0.5509,
"step": 16600
},
{
"epoch": 0.13,
"learning_rate": 0.00018712107734315174,
"loss": 0.7187,
"step": 16650
},
{
"epoch": 0.13,
"learning_rate": 0.00018708240189973778,
"loss": 0.7886,
"step": 16700
},
{
"epoch": 0.13,
"learning_rate": 0.00018704372645632383,
"loss": 0.787,
"step": 16750
},
{
"epoch": 0.13,
"learning_rate": 0.00018700505101290987,
"loss": 0.8182,
"step": 16800
},
{
"epoch": 0.13,
"learning_rate": 0.0001869663755694959,
"loss": 0.7996,
"step": 16850
},
{
"epoch": 0.13,
"learning_rate": 0.00018692770012608195,
"loss": 1.0537,
"step": 16900
},
{
"epoch": 0.13,
"learning_rate": 0.000186889024682668,
"loss": 0.7795,
"step": 16950
},
{
"epoch": 0.13,
"learning_rate": 0.00018685034923925404,
"loss": 0.6382,
"step": 17000
},
{
"epoch": 0.13,
"learning_rate": 0.00018681167379584008,
"loss": 0.8503,
"step": 17050
},
{
"epoch": 0.13,
"learning_rate": 0.00018677299835242613,
"loss": 0.7172,
"step": 17100
},
{
"epoch": 0.13,
"learning_rate": 0.00018673432290901217,
"loss": 0.8269,
"step": 17150
},
{
"epoch": 0.13,
"learning_rate": 0.0001866956474655982,
"loss": 0.8608,
"step": 17200
},
{
"epoch": 0.13,
"learning_rate": 0.00018665697202218423,
"loss": 0.5488,
"step": 17250
},
{
"epoch": 0.13,
"learning_rate": 0.00018661829657877027,
"loss": 0.6198,
"step": 17300
},
{
"epoch": 0.13,
"learning_rate": 0.00018657962113535631,
"loss": 0.6294,
"step": 17350
},
{
"epoch": 0.13,
"learning_rate": 0.00018654094569194236,
"loss": 0.6832,
"step": 17400
},
{
"epoch": 0.13,
"learning_rate": 0.0001865022702485284,
"loss": 0.7857,
"step": 17450
},
{
"epoch": 0.14,
"learning_rate": 0.00018646359480511444,
"loss": 0.873,
"step": 17500
},
{
"epoch": 0.14,
"learning_rate": 0.00018642491936170051,
"loss": 0.8379,
"step": 17550
},
{
"epoch": 0.14,
"learning_rate": 0.00018638624391828653,
"loss": 0.716,
"step": 17600
},
{
"epoch": 0.14,
"learning_rate": 0.00018634756847487257,
"loss": 0.7123,
"step": 17650
},
{
"epoch": 0.14,
"learning_rate": 0.00018630889303145862,
"loss": 0.7582,
"step": 17700
},
{
"epoch": 0.14,
"learning_rate": 0.00018627021758804466,
"loss": 0.8126,
"step": 17750
},
{
"epoch": 0.14,
"learning_rate": 0.0001862315421446307,
"loss": 0.8564,
"step": 17800
},
{
"epoch": 0.14,
"learning_rate": 0.00018619286670121674,
"loss": 0.5869,
"step": 17850
},
{
"epoch": 0.14,
"learning_rate": 0.0001861541912578028,
"loss": 0.7508,
"step": 17900
},
{
"epoch": 0.14,
"learning_rate": 0.00018611551581438883,
"loss": 0.7061,
"step": 17950
},
{
"epoch": 0.14,
"learning_rate": 0.00018607684037097485,
"loss": 0.7345,
"step": 18000
},
{
"epoch": 0.14,
"learning_rate": 0.0001860381649275609,
"loss": 0.5775,
"step": 18050
},
{
"epoch": 0.14,
"learning_rate": 0.00018599948948414693,
"loss": 0.7817,
"step": 18100
},
{
"epoch": 0.14,
"learning_rate": 0.00018596081404073298,
"loss": 0.7201,
"step": 18150
},
{
"epoch": 0.14,
"learning_rate": 0.00018592213859731902,
"loss": 0.8352,
"step": 18200
},
{
"epoch": 0.14,
"learning_rate": 0.00018588346315390506,
"loss": 0.7986,
"step": 18250
},
{
"epoch": 0.14,
"learning_rate": 0.00018584478771049113,
"loss": 0.5892,
"step": 18300
},
{
"epoch": 0.14,
"learning_rate": 0.00018580611226707715,
"loss": 0.6573,
"step": 18350
},
{
"epoch": 0.14,
"learning_rate": 0.0001857674368236632,
"loss": 0.7291,
"step": 18400
},
{
"epoch": 0.14,
"learning_rate": 0.00018572876138024923,
"loss": 0.8477,
"step": 18450
},
{
"epoch": 0.14,
"learning_rate": 0.00018569008593683528,
"loss": 0.7634,
"step": 18500
},
{
"epoch": 0.14,
"learning_rate": 0.00018565141049342132,
"loss": 0.5596,
"step": 18550
},
{
"epoch": 0.14,
"learning_rate": 0.00018561273505000736,
"loss": 0.7536,
"step": 18600
},
{
"epoch": 0.14,
"learning_rate": 0.0001855740596065934,
"loss": 0.8015,
"step": 18650
},
{
"epoch": 0.14,
"learning_rate": 0.00018553538416317945,
"loss": 0.9044,
"step": 18700
},
{
"epoch": 0.15,
"learning_rate": 0.00018549670871976547,
"loss": 0.7212,
"step": 18750
},
{
"epoch": 0.15,
"learning_rate": 0.0001854580332763515,
"loss": 0.6835,
"step": 18800
},
{
"epoch": 0.15,
"learning_rate": 0.00018541935783293755,
"loss": 0.6431,
"step": 18850
},
{
"epoch": 0.15,
"learning_rate": 0.0001853806823895236,
"loss": 0.6776,
"step": 18900
},
{
"epoch": 0.15,
"learning_rate": 0.00018534200694610964,
"loss": 0.8134,
"step": 18950
},
{
"epoch": 0.15,
"learning_rate": 0.00018530333150269568,
"loss": 0.7613,
"step": 19000
},
{
"epoch": 0.15,
"learning_rate": 0.00018526465605928175,
"loss": 0.6909,
"step": 19050
},
{
"epoch": 0.15,
"learning_rate": 0.00018522598061586777,
"loss": 0.5647,
"step": 19100
},
{
"epoch": 0.15,
"learning_rate": 0.0001851873051724538,
"loss": 0.845,
"step": 19150
},
{
"epoch": 0.15,
"learning_rate": 0.00018514862972903985,
"loss": 0.6676,
"step": 19200
},
{
"epoch": 0.15,
"learning_rate": 0.0001851099542856259,
"loss": 0.608,
"step": 19250
},
{
"epoch": 0.15,
"learning_rate": 0.00018507127884221194,
"loss": 0.6545,
"step": 19300
},
{
"epoch": 0.15,
"learning_rate": 0.00018503260339879798,
"loss": 0.8084,
"step": 19350
},
{
"epoch": 0.15,
"learning_rate": 0.00018499392795538402,
"loss": 0.9323,
"step": 19400
},
{
"epoch": 0.15,
"learning_rate": 0.00018495525251197007,
"loss": 0.7761,
"step": 19450
},
{
"epoch": 0.15,
"learning_rate": 0.00018491657706855608,
"loss": 0.7525,
"step": 19500
},
{
"epoch": 0.15,
"learning_rate": 0.00018487790162514213,
"loss": 0.7387,
"step": 19550
},
{
"epoch": 0.15,
"learning_rate": 0.00018483922618172817,
"loss": 0.7412,
"step": 19600
},
{
"epoch": 0.15,
"learning_rate": 0.0001848005507383142,
"loss": 0.6455,
"step": 19650
},
{
"epoch": 0.15,
"learning_rate": 0.00018476187529490026,
"loss": 0.6401,
"step": 19700
},
{
"epoch": 0.15,
"learning_rate": 0.00018472319985148633,
"loss": 0.7524,
"step": 19750
},
{
"epoch": 0.15,
"learning_rate": 0.00018468452440807237,
"loss": 0.8381,
"step": 19800
},
{
"epoch": 0.15,
"learning_rate": 0.00018464584896465838,
"loss": 0.7317,
"step": 19850
},
{
"epoch": 0.15,
"learning_rate": 0.00018460717352124443,
"loss": 0.7321,
"step": 19900
},
{
"epoch": 0.15,
"learning_rate": 0.00018456849807783047,
"loss": 0.8627,
"step": 19950
},
{
"epoch": 0.15,
"learning_rate": 0.0001845298226344165,
"loss": 0.8806,
"step": 20000
},
{
"epoch": 0.16,
"learning_rate": 0.00018449114719100256,
"loss": 0.6949,
"step": 20050
},
{
"epoch": 0.16,
"learning_rate": 0.0001844524717475886,
"loss": 0.8466,
"step": 20100
},
{
"epoch": 0.16,
"learning_rate": 0.00018441379630417464,
"loss": 0.468,
"step": 20150
},
{
"epoch": 0.16,
"learning_rate": 0.00018437512086076069,
"loss": 0.8107,
"step": 20200
},
{
"epoch": 0.16,
"learning_rate": 0.0001843364454173467,
"loss": 0.8214,
"step": 20250
},
{
"epoch": 0.16,
"learning_rate": 0.00018429776997393274,
"loss": 0.6371,
"step": 20300
},
{
"epoch": 0.16,
"learning_rate": 0.0001842590945305188,
"loss": 0.8139,
"step": 20350
},
{
"epoch": 0.16,
"learning_rate": 0.00018422041908710483,
"loss": 0.8821,
"step": 20400
},
{
"epoch": 0.16,
"learning_rate": 0.00018418174364369087,
"loss": 0.749,
"step": 20450
},
{
"epoch": 0.16,
"learning_rate": 0.00018414306820027694,
"loss": 0.7666,
"step": 20500
},
{
"epoch": 0.16,
"learning_rate": 0.000184104392756863,
"loss": 0.804,
"step": 20550
},
{
"epoch": 0.16,
"learning_rate": 0.000184065717313449,
"loss": 0.8258,
"step": 20600
},
{
"epoch": 0.16,
"learning_rate": 0.00018402704187003505,
"loss": 0.6565,
"step": 20650
},
{
"epoch": 0.16,
"learning_rate": 0.0001839883664266211,
"loss": 0.6481,
"step": 20700
},
{
"epoch": 0.16,
"learning_rate": 0.00018394969098320713,
"loss": 0.5938,
"step": 20750
},
{
"epoch": 0.16,
"learning_rate": 0.00018391101553979317,
"loss": 0.6611,
"step": 20800
},
{
"epoch": 0.16,
"learning_rate": 0.00018387234009637922,
"loss": 0.9062,
"step": 20850
},
{
"epoch": 0.16,
"learning_rate": 0.00018383366465296526,
"loss": 0.6141,
"step": 20900
},
{
"epoch": 0.16,
"learning_rate": 0.0001837949892095513,
"loss": 0.6457,
"step": 20950
},
{
"epoch": 0.16,
"learning_rate": 0.00018375631376613735,
"loss": 0.5349,
"step": 21000
},
{
"epoch": 0.16,
"learning_rate": 0.00018371763832272336,
"loss": 0.6687,
"step": 21050
},
{
"epoch": 0.16,
"learning_rate": 0.0001836789628793094,
"loss": 1.0448,
"step": 21100
},
{
"epoch": 0.16,
"learning_rate": 0.00018364028743589545,
"loss": 0.8059,
"step": 21150
},
{
"epoch": 0.16,
"learning_rate": 0.0001836016119924815,
"loss": 0.6748,
"step": 21200
},
{
"epoch": 0.16,
"learning_rate": 0.00018356293654906756,
"loss": 0.5979,
"step": 21250
},
{
"epoch": 0.16,
"learning_rate": 0.0001835242611056536,
"loss": 0.8469,
"step": 21300
},
{
"epoch": 0.17,
"learning_rate": 0.00018348558566223962,
"loss": 0.7463,
"step": 21350
},
{
"epoch": 0.17,
"learning_rate": 0.00018344691021882566,
"loss": 0.7493,
"step": 21400
},
{
"epoch": 0.17,
"learning_rate": 0.0001834082347754117,
"loss": 0.8654,
"step": 21450
},
{
"epoch": 0.17,
"learning_rate": 0.00018336955933199775,
"loss": 0.7216,
"step": 21500
},
{
"epoch": 0.17,
"learning_rate": 0.0001833308838885838,
"loss": 0.7847,
"step": 21550
},
{
"epoch": 0.17,
"learning_rate": 0.00018329220844516984,
"loss": 0.5339,
"step": 21600
},
{
"epoch": 0.17,
"learning_rate": 0.00018325353300175588,
"loss": 0.7045,
"step": 21650
},
{
"epoch": 0.17,
"learning_rate": 0.00018321485755834192,
"loss": 0.6995,
"step": 21700
},
{
"epoch": 0.17,
"learning_rate": 0.00018317618211492797,
"loss": 0.736,
"step": 21750
},
{
"epoch": 0.17,
"learning_rate": 0.00018313750667151398,
"loss": 0.7212,
"step": 21800
},
{
"epoch": 0.17,
"learning_rate": 0.00018309883122810002,
"loss": 0.6062,
"step": 21850
},
{
"epoch": 0.17,
"learning_rate": 0.00018306015578468607,
"loss": 0.889,
"step": 21900
},
{
"epoch": 0.17,
"learning_rate": 0.0001830214803412721,
"loss": 0.6812,
"step": 21950
},
{
"epoch": 0.17,
"learning_rate": 0.00018298280489785818,
"loss": 0.7713,
"step": 22000
},
{
"epoch": 0.17,
"learning_rate": 0.00018294412945444422,
"loss": 0.7462,
"step": 22050
},
{
"epoch": 0.17,
"learning_rate": 0.00018290545401103027,
"loss": 0.5084,
"step": 22100
},
{
"epoch": 0.17,
"learning_rate": 0.00018286677856761628,
"loss": 0.6875,
"step": 22150
},
{
"epoch": 0.17,
"learning_rate": 0.00018282810312420233,
"loss": 0.8552,
"step": 22200
},
{
"epoch": 0.17,
"learning_rate": 0.00018278942768078837,
"loss": 0.7549,
"step": 22250
},
{
"epoch": 0.17,
"learning_rate": 0.0001827507522373744,
"loss": 0.6307,
"step": 22300
},
{
"epoch": 0.17,
"learning_rate": 0.00018271207679396045,
"loss": 1.0293,
"step": 22350
},
{
"epoch": 0.17,
"learning_rate": 0.0001826734013505465,
"loss": 0.7603,
"step": 22400
},
{
"epoch": 0.17,
"learning_rate": 0.00018263472590713254,
"loss": 0.5218,
"step": 22450
},
{
"epoch": 0.17,
"learning_rate": 0.00018259605046371858,
"loss": 0.5962,
"step": 22500
},
{
"epoch": 0.17,
"learning_rate": 0.0001825573750203046,
"loss": 0.7793,
"step": 22550
},
{
"epoch": 0.17,
"learning_rate": 0.00018251869957689064,
"loss": 0.6511,
"step": 22600
},
{
"epoch": 0.18,
"learning_rate": 0.00018248002413347669,
"loss": 0.6589,
"step": 22650
},
{
"epoch": 0.18,
"learning_rate": 0.00018244134869006273,
"loss": 0.826,
"step": 22700
},
{
"epoch": 0.18,
"learning_rate": 0.0001824026732466488,
"loss": 0.7561,
"step": 22750
},
{
"epoch": 0.18,
"learning_rate": 0.00018236399780323484,
"loss": 0.7605,
"step": 22800
},
{
"epoch": 0.18,
"learning_rate": 0.00018232532235982088,
"loss": 0.7887,
"step": 22850
},
{
"epoch": 0.18,
"learning_rate": 0.0001822866469164069,
"loss": 0.6065,
"step": 22900
},
{
"epoch": 0.18,
"learning_rate": 0.00018224797147299294,
"loss": 0.7631,
"step": 22950
},
{
"epoch": 0.18,
"learning_rate": 0.00018220929602957899,
"loss": 0.6708,
"step": 23000
},
{
"epoch": 0.18,
"learning_rate": 0.00018217062058616503,
"loss": 0.8115,
"step": 23050
},
{
"epoch": 0.18,
"learning_rate": 0.00018213194514275107,
"loss": 0.5469,
"step": 23100
},
{
"epoch": 0.18,
"learning_rate": 0.00018209326969933712,
"loss": 0.751,
"step": 23150
},
{
"epoch": 0.18,
"learning_rate": 0.00018205459425592316,
"loss": 0.6424,
"step": 23200
},
{
"epoch": 0.18,
"learning_rate": 0.0001820159188125092,
"loss": 0.7017,
"step": 23250
},
{
"epoch": 0.18,
"learning_rate": 0.00018197724336909522,
"loss": 0.615,
"step": 23300
},
{
"epoch": 0.18,
"learning_rate": 0.00018193856792568126,
"loss": 0.9175,
"step": 23350
},
{
"epoch": 0.18,
"learning_rate": 0.0001818998924822673,
"loss": 0.9438,
"step": 23400
},
{
"epoch": 0.18,
"learning_rate": 0.00018186121703885335,
"loss": 0.8183,
"step": 23450
},
{
"epoch": 0.18,
"learning_rate": 0.00018182254159543942,
"loss": 0.9829,
"step": 23500
},
{
"epoch": 0.18,
"learning_rate": 0.00018178386615202546,
"loss": 0.7703,
"step": 23550
},
{
"epoch": 0.18,
"learning_rate": 0.0001817451907086115,
"loss": 0.6007,
"step": 23600
},
{
"epoch": 0.18,
"learning_rate": 0.00018170651526519752,
"loss": 0.6095,
"step": 23650
},
{
"epoch": 0.18,
"learning_rate": 0.00018166783982178356,
"loss": 0.6811,
"step": 23700
},
{
"epoch": 0.18,
"learning_rate": 0.0001816291643783696,
"loss": 0.6791,
"step": 23750
},
{
"epoch": 0.18,
"learning_rate": 0.00018159048893495565,
"loss": 0.8032,
"step": 23800
},
{
"epoch": 0.18,
"learning_rate": 0.0001815518134915417,
"loss": 0.6968,
"step": 23850
},
{
"epoch": 0.18,
"learning_rate": 0.00018151313804812773,
"loss": 0.7912,
"step": 23900
},
{
"epoch": 0.19,
"learning_rate": 0.00018147446260471378,
"loss": 0.6557,
"step": 23950
},
{
"epoch": 0.19,
"learning_rate": 0.00018143578716129982,
"loss": 0.7041,
"step": 24000
},
{
"epoch": 0.19,
"learning_rate": 0.00018139711171788584,
"loss": 0.7028,
"step": 24050
},
{
"epoch": 0.19,
"learning_rate": 0.00018135843627447188,
"loss": 0.5454,
"step": 24100
},
{
"epoch": 0.19,
"learning_rate": 0.00018131976083105792,
"loss": 0.8485,
"step": 24150
},
{
"epoch": 0.19,
"learning_rate": 0.00018128108538764396,
"loss": 0.6944,
"step": 24200
},
{
"epoch": 0.19,
"learning_rate": 0.00018124240994423003,
"loss": 0.6429,
"step": 24250
},
{
"epoch": 0.19,
"learning_rate": 0.00018120373450081608,
"loss": 0.9911,
"step": 24300
},
{
"epoch": 0.19,
"learning_rate": 0.00018116505905740212,
"loss": 0.7624,
"step": 24350
},
{
"epoch": 0.19,
"learning_rate": 0.00018112638361398814,
"loss": 0.8377,
"step": 24400
},
{
"epoch": 0.19,
"learning_rate": 0.00018108770817057418,
"loss": 0.7377,
"step": 24450
},
{
"epoch": 0.19,
"learning_rate": 0.00018104903272716022,
"loss": 0.8191,
"step": 24500
},
{
"epoch": 0.19,
"learning_rate": 0.00018101035728374627,
"loss": 0.6292,
"step": 24550
},
{
"epoch": 0.19,
"learning_rate": 0.0001809716818403323,
"loss": 0.7387,
"step": 24600
},
{
"epoch": 0.19,
"learning_rate": 0.00018093300639691835,
"loss": 0.7166,
"step": 24650
},
{
"epoch": 0.19,
"learning_rate": 0.0001808943309535044,
"loss": 0.787,
"step": 24700
},
{
"epoch": 0.19,
"learning_rate": 0.00018085565551009044,
"loss": 0.6451,
"step": 24750
},
{
"epoch": 0.19,
"learning_rate": 0.00018081698006667645,
"loss": 0.8142,
"step": 24800
},
{
"epoch": 0.19,
"learning_rate": 0.0001807783046232625,
"loss": 0.9096,
"step": 24850
},
{
"epoch": 0.19,
"learning_rate": 0.00018073962917984854,
"loss": 0.6828,
"step": 24900
},
{
"epoch": 0.19,
"learning_rate": 0.00018070095373643458,
"loss": 0.5445,
"step": 24950
},
{
"epoch": 0.19,
"learning_rate": 0.00018066227829302065,
"loss": 0.8181,
"step": 25000
},
{
"epoch": 0.2,
"learning_rate": 0.00018047301300240095,
"loss": 1.124,
"step": 25050
},
{
"epoch": 0.2,
"learning_rate": 0.00018043403698044964,
"loss": 1.1854,
"step": 25100
},
{
"epoch": 0.2,
"learning_rate": 0.00018039506095849834,
"loss": 1.0665,
"step": 25150
},
{
"epoch": 0.2,
"learning_rate": 0.00018035608493654706,
"loss": 0.8882,
"step": 25200
},
{
"epoch": 0.2,
"learning_rate": 0.00018031710891459575,
"loss": 1.0253,
"step": 25250
},
{
"epoch": 0.2,
"learning_rate": 0.00018027813289264445,
"loss": 1.0734,
"step": 25300
},
{
"epoch": 0.2,
"learning_rate": 0.00018023915687069315,
"loss": 1.0606,
"step": 25350
},
{
"epoch": 0.2,
"learning_rate": 0.00018020018084874187,
"loss": 0.9589,
"step": 25400
},
{
"epoch": 0.2,
"learning_rate": 0.00018016120482679056,
"loss": 1.0562,
"step": 25450
},
{
"epoch": 0.2,
"learning_rate": 0.00018012222880483926,
"loss": 0.8676,
"step": 25500
},
{
"epoch": 0.2,
"learning_rate": 0.00018008325278288798,
"loss": 1.0997,
"step": 25550
},
{
"epoch": 0.2,
"learning_rate": 0.00018004427676093668,
"loss": 0.9763,
"step": 25600
},
{
"epoch": 0.2,
"learning_rate": 0.00018000530073898537,
"loss": 0.8347,
"step": 25650
},
{
"epoch": 0.2,
"learning_rate": 0.0001799663247170341,
"loss": 0.9396,
"step": 25700
},
{
"epoch": 0.2,
"learning_rate": 0.0001799273486950828,
"loss": 0.9281,
"step": 25750
},
{
"epoch": 0.2,
"learning_rate": 0.0001798883726731315,
"loss": 0.9826,
"step": 25800
},
{
"epoch": 0.2,
"learning_rate": 0.0001798493966511802,
"loss": 0.8583,
"step": 25850
},
{
"epoch": 0.2,
"learning_rate": 0.0001798104206292289,
"loss": 0.8509,
"step": 25900
},
{
"epoch": 0.2,
"learning_rate": 0.00017977144460727763,
"loss": 0.8912,
"step": 25950
},
{
"epoch": 0.2,
"learning_rate": 0.0001797324685853263,
"loss": 0.8786,
"step": 26000
},
{
"epoch": 0.2,
"learning_rate": 0.00017969349256337502,
"loss": 0.8482,
"step": 26050
},
{
"epoch": 0.2,
"learning_rate": 0.00017965451654142374,
"loss": 0.9426,
"step": 26100
},
{
"epoch": 0.2,
"learning_rate": 0.00017961554051947244,
"loss": 0.9505,
"step": 26150
},
{
"epoch": 0.2,
"learning_rate": 0.00017957656449752113,
"loss": 0.8555,
"step": 26200
},
{
"epoch": 0.2,
"learning_rate": 0.00017953758847556983,
"loss": 1.1169,
"step": 26250
},
{
"epoch": 0.21,
"learning_rate": 0.00017949861245361855,
"loss": 0.8806,
"step": 26300
},
{
"epoch": 0.21,
"learning_rate": 0.00017945963643166725,
"loss": 0.9295,
"step": 26350
},
{
"epoch": 0.21,
"learning_rate": 0.00017942066040971594,
"loss": 0.8931,
"step": 26400
},
{
"epoch": 0.21,
"learning_rate": 0.00017938168438776466,
"loss": 0.9139,
"step": 26450
},
{
"epoch": 0.21,
"learning_rate": 0.00017934270836581336,
"loss": 0.9318,
"step": 26500
},
{
"epoch": 0.21,
"learning_rate": 0.00017930373234386205,
"loss": 1.0256,
"step": 26550
},
{
"epoch": 0.21,
"learning_rate": 0.00017926475632191078,
"loss": 0.9042,
"step": 26600
},
{
"epoch": 0.21,
"learning_rate": 0.00017922578029995947,
"loss": 0.8945,
"step": 26650
},
{
"epoch": 0.21,
"learning_rate": 0.00017918680427800817,
"loss": 0.8622,
"step": 26700
},
{
"epoch": 0.21,
"learning_rate": 0.0001791478282560569,
"loss": 0.8348,
"step": 26750
},
{
"epoch": 0.21,
"learning_rate": 0.00017910885223410559,
"loss": 1.0544,
"step": 26800
},
{
"epoch": 0.21,
"learning_rate": 0.0001790698762121543,
"loss": 0.7097,
"step": 26850
},
{
"epoch": 0.21,
"learning_rate": 0.00017903090019020298,
"loss": 0.8808,
"step": 26900
},
{
"epoch": 0.21,
"learning_rate": 0.0001789919241682517,
"loss": 0.896,
"step": 26950
},
{
"epoch": 0.21,
"learning_rate": 0.00017895294814630042,
"loss": 1.0487,
"step": 27000
},
{
"epoch": 0.21,
"learning_rate": 0.00017891397212434912,
"loss": 0.9996,
"step": 27050
},
{
"epoch": 0.21,
"learning_rate": 0.0001788749961023978,
"loss": 0.9624,
"step": 27100
},
{
"epoch": 0.21,
"learning_rate": 0.0001788360200804465,
"loss": 0.9344,
"step": 27150
},
{
"epoch": 0.21,
"learning_rate": 0.00017879704405849523,
"loss": 0.9103,
"step": 27200
},
{
"epoch": 0.21,
"learning_rate": 0.00017875806803654393,
"loss": 0.7311,
"step": 27250
},
{
"epoch": 0.21,
"learning_rate": 0.00017871909201459262,
"loss": 0.9748,
"step": 27300
},
{
"epoch": 0.21,
"learning_rate": 0.00017868011599264134,
"loss": 0.7231,
"step": 27350
},
{
"epoch": 0.21,
"learning_rate": 0.00017864113997069004,
"loss": 0.9844,
"step": 27400
},
{
"epoch": 0.21,
"learning_rate": 0.00017860216394873874,
"loss": 0.9322,
"step": 27450
},
{
"epoch": 0.21,
"learning_rate": 0.00017856318792678746,
"loss": 0.9103,
"step": 27500
},
{
"epoch": 0.21,
"learning_rate": 0.00017852421190483615,
"loss": 1.0132,
"step": 27550
},
{
"epoch": 0.22,
"learning_rate": 0.00017848523588288485,
"loss": 0.8617,
"step": 27600
},
{
"epoch": 0.22,
"learning_rate": 0.00017844625986093354,
"loss": 1.0296,
"step": 27650
},
{
"epoch": 0.22,
"learning_rate": 0.00017840728383898227,
"loss": 1.0048,
"step": 27700
},
{
"epoch": 0.22,
"learning_rate": 0.000178368307817031,
"loss": 1.1557,
"step": 27750
},
{
"epoch": 0.22,
"learning_rate": 0.00017832933179507966,
"loss": 0.7993,
"step": 27800
},
{
"epoch": 0.22,
"learning_rate": 0.00017829035577312838,
"loss": 1.002,
"step": 27850
},
{
"epoch": 0.22,
"learning_rate": 0.0001782513797511771,
"loss": 1.0392,
"step": 27900
},
{
"epoch": 0.22,
"learning_rate": 0.0001782124037292258,
"loss": 0.8991,
"step": 27950
},
{
"epoch": 0.22,
"learning_rate": 0.0001781734277072745,
"loss": 0.8488,
"step": 28000
},
{
"epoch": 0.22,
"learning_rate": 0.0001781344516853232,
"loss": 0.8418,
"step": 28050
},
{
"epoch": 0.22,
"learning_rate": 0.0001780954756633719,
"loss": 1.02,
"step": 28100
},
{
"epoch": 0.22,
"learning_rate": 0.0001780564996414206,
"loss": 1.0404,
"step": 28150
},
{
"epoch": 0.22,
"learning_rate": 0.0001780175236194693,
"loss": 0.9571,
"step": 28200
},
{
"epoch": 0.22,
"learning_rate": 0.00017797854759751803,
"loss": 0.7724,
"step": 28250
},
{
"epoch": 0.22,
"learning_rate": 0.00017793957157556672,
"loss": 1.0129,
"step": 28300
},
{
"epoch": 0.22,
"learning_rate": 0.00017790059555361542,
"loss": 0.8916,
"step": 28350
},
{
"epoch": 0.22,
"learning_rate": 0.00017786161953166414,
"loss": 0.9504,
"step": 28400
},
{
"epoch": 0.22,
"learning_rate": 0.00017782264350971283,
"loss": 0.8393,
"step": 28450
},
{
"epoch": 0.22,
"learning_rate": 0.00017778366748776153,
"loss": 0.7675,
"step": 28500
},
{
"epoch": 0.22,
"learning_rate": 0.00017774469146581023,
"loss": 0.8273,
"step": 28550
},
{
"epoch": 0.22,
"learning_rate": 0.00017770571544385895,
"loss": 0.8967,
"step": 28600
},
{
"epoch": 0.22,
"learning_rate": 0.00017766673942190764,
"loss": 1.084,
"step": 28650
},
{
"epoch": 0.22,
"learning_rate": 0.00017762776339995634,
"loss": 0.7741,
"step": 28700
},
{
"epoch": 0.22,
"learning_rate": 0.00017758878737800506,
"loss": 1.1056,
"step": 28750
},
{
"epoch": 0.22,
"learning_rate": 0.00017754981135605378,
"loss": 1.183,
"step": 28800
},
{
"epoch": 0.22,
"learning_rate": 0.00017751083533410245,
"loss": 1.0375,
"step": 28850
},
{
"epoch": 0.23,
"learning_rate": 0.00017747185931215118,
"loss": 1.2414,
"step": 28900
},
{
"epoch": 0.23,
"learning_rate": 0.00017743288329019987,
"loss": 1.2209,
"step": 28950
},
{
"epoch": 0.23,
"learning_rate": 0.0001773939072682486,
"loss": 0.8581,
"step": 29000
},
{
"epoch": 0.23,
"learning_rate": 0.0001773549312462973,
"loss": 1.0284,
"step": 29050
},
{
"epoch": 0.23,
"learning_rate": 0.00017731595522434598,
"loss": 0.8502,
"step": 29100
},
{
"epoch": 0.23,
"learning_rate": 0.0001772769792023947,
"loss": 0.6947,
"step": 29150
},
{
"epoch": 0.23,
"learning_rate": 0.0001772380031804434,
"loss": 0.7579,
"step": 29200
},
{
"epoch": 0.23,
"learning_rate": 0.0001771990271584921,
"loss": 0.9771,
"step": 29250
},
{
"epoch": 0.23,
"learning_rate": 0.00017716005113654082,
"loss": 0.8661,
"step": 29300
},
{
"epoch": 0.23,
"learning_rate": 0.00017712107511458952,
"loss": 0.8433,
"step": 29350
},
{
"epoch": 0.23,
"learning_rate": 0.0001770820990926382,
"loss": 0.9419,
"step": 29400
},
{
"epoch": 0.23,
"learning_rate": 0.0001770431230706869,
"loss": 1.076,
"step": 29450
},
{
"epoch": 0.23,
"learning_rate": 0.00017700414704873563,
"loss": 0.9966,
"step": 29500
},
{
"epoch": 0.23,
"learning_rate": 0.00017696517102678433,
"loss": 0.7618,
"step": 29550
},
{
"epoch": 0.23,
"learning_rate": 0.00017692619500483302,
"loss": 0.8269,
"step": 29600
},
{
"epoch": 0.23,
"learning_rate": 0.00017688721898288174,
"loss": 0.8109,
"step": 29650
},
{
"epoch": 0.23,
"learning_rate": 0.00017684824296093047,
"loss": 0.7426,
"step": 29700
},
{
"epoch": 0.23,
"learning_rate": 0.00017680926693897913,
"loss": 0.9972,
"step": 29750
},
{
"epoch": 0.23,
"learning_rate": 0.00017677029091702786,
"loss": 0.7991,
"step": 29800
},
{
"epoch": 0.23,
"learning_rate": 0.00017673131489507655,
"loss": 0.7988,
"step": 29850
},
{
"epoch": 0.23,
"learning_rate": 0.00017669233887312527,
"loss": 0.7364,
"step": 29900
},
{
"epoch": 0.23,
"learning_rate": 0.00017665336285117397,
"loss": 1.0258,
"step": 29950
},
{
"epoch": 0.23,
"learning_rate": 0.00017661438682922267,
"loss": 1.0606,
"step": 30000
},
{
"epoch": 0.23,
"learning_rate": 0.0001765754108072714,
"loss": 0.909,
"step": 30050
},
{
"epoch": 0.23,
"learning_rate": 0.00017653643478532008,
"loss": 0.8428,
"step": 30100
},
{
"epoch": 0.24,
"learning_rate": 0.00017649745876336878,
"loss": 0.8707,
"step": 30150
},
{
"epoch": 0.24,
"learning_rate": 0.0001764584827414175,
"loss": 0.7147,
"step": 30200
},
{
"epoch": 0.24,
"learning_rate": 0.0001764195067194662,
"loss": 0.89,
"step": 30250
},
{
"epoch": 0.24,
"learning_rate": 0.0001763805306975149,
"loss": 0.847,
"step": 30300
},
{
"epoch": 0.24,
"learning_rate": 0.0001763415546755636,
"loss": 0.7826,
"step": 30350
},
{
"epoch": 0.24,
"learning_rate": 0.0001763025786536123,
"loss": 0.8614,
"step": 30400
},
{
"epoch": 0.24,
"learning_rate": 0.000176263602631661,
"loss": 0.8078,
"step": 30450
},
{
"epoch": 0.24,
"learning_rate": 0.0001762246266097097,
"loss": 0.872,
"step": 30500
},
{
"epoch": 0.24,
"learning_rate": 0.00017618565058775842,
"loss": 0.8734,
"step": 30550
},
{
"epoch": 0.24,
"learning_rate": 0.00017614667456580715,
"loss": 0.8836,
"step": 30600
},
{
"epoch": 0.24,
"learning_rate": 0.00017610769854385582,
"loss": 1.042,
"step": 30650
},
{
"epoch": 0.24,
"learning_rate": 0.00017606872252190454,
"loss": 0.8561,
"step": 30700
},
{
"epoch": 0.24,
"learning_rate": 0.00017602974649995323,
"loss": 0.6824,
"step": 30750
},
{
"epoch": 0.24,
"learning_rate": 0.00017599077047800196,
"loss": 0.9277,
"step": 30800
},
{
"epoch": 0.24,
"learning_rate": 0.00017595179445605065,
"loss": 0.9887,
"step": 30850
},
{
"epoch": 0.24,
"learning_rate": 0.00017591281843409935,
"loss": 0.7925,
"step": 30900
},
{
"epoch": 0.24,
"learning_rate": 0.00017587384241214807,
"loss": 0.8944,
"step": 30950
},
{
"epoch": 0.24,
"learning_rate": 0.00017583486639019674,
"loss": 1.2576,
"step": 31000
},
{
"epoch": 0.24,
"learning_rate": 0.00017579589036824546,
"loss": 1.0837,
"step": 31050
},
{
"epoch": 0.24,
"learning_rate": 0.00017575691434629418,
"loss": 1.0205,
"step": 31100
},
{
"epoch": 0.24,
"learning_rate": 0.00017571793832434288,
"loss": 1.1691,
"step": 31150
},
{
"epoch": 0.24,
"learning_rate": 0.00017567896230239157,
"loss": 0.9541,
"step": 31200
},
{
"epoch": 0.24,
"learning_rate": 0.00017563998628044027,
"loss": 0.7735,
"step": 31250
},
{
"epoch": 0.24,
"learning_rate": 0.000175601010258489,
"loss": 0.9219,
"step": 31300
},
{
"epoch": 0.24,
"learning_rate": 0.0001755620342365377,
"loss": 0.6201,
"step": 31350
},
{
"epoch": 0.24,
"learning_rate": 0.00017552305821458638,
"loss": 0.8204,
"step": 31400
},
{
"epoch": 0.25,
"learning_rate": 0.0001754840821926351,
"loss": 0.7272,
"step": 31450
},
{
"epoch": 0.25,
"learning_rate": 0.0001754451061706838,
"loss": 0.864,
"step": 31500
},
{
"epoch": 0.25,
"learning_rate": 0.0001754061301487325,
"loss": 0.9935,
"step": 31550
},
{
"epoch": 0.25,
"learning_rate": 0.00017536715412678122,
"loss": 0.8651,
"step": 31600
},
{
"epoch": 0.25,
"learning_rate": 0.00017532817810482992,
"loss": 0.8508,
"step": 31650
},
{
"epoch": 0.25,
"learning_rate": 0.0001752892020828786,
"loss": 0.863,
"step": 31700
},
{
"epoch": 0.25,
"learning_rate": 0.00017525022606092733,
"loss": 0.9272,
"step": 31750
},
{
"epoch": 0.25,
"learning_rate": 0.00017521125003897603,
"loss": 0.9609,
"step": 31800
},
{
"epoch": 0.25,
"learning_rate": 0.00017517227401702475,
"loss": 1.1736,
"step": 31850
},
{
"epoch": 0.25,
"learning_rate": 0.00017513329799507342,
"loss": 0.8571,
"step": 31900
},
{
"epoch": 0.25,
"learning_rate": 0.00017509432197312214,
"loss": 0.758,
"step": 31950
},
{
"epoch": 0.25,
"learning_rate": 0.00017505534595117086,
"loss": 1.0157,
"step": 32000
},
{
"epoch": 0.25,
"learning_rate": 0.00017501636992921956,
"loss": 0.762,
"step": 32050
},
{
"epoch": 0.25,
"learning_rate": 0.00017497739390726826,
"loss": 0.7206,
"step": 32100
},
{
"epoch": 0.25,
"learning_rate": 0.00017493841788531695,
"loss": 0.9902,
"step": 32150
},
{
"epoch": 0.25,
"learning_rate": 0.00017489944186336567,
"loss": 0.8943,
"step": 32200
},
{
"epoch": 0.25,
"learning_rate": 0.00017486046584141437,
"loss": 0.9721,
"step": 32250
},
{
"epoch": 0.25,
"learning_rate": 0.00017482148981946306,
"loss": 0.9522,
"step": 32300
},
{
"epoch": 0.25,
"learning_rate": 0.0001747825137975118,
"loss": 0.9819,
"step": 32350
},
{
"epoch": 0.25,
"learning_rate": 0.00017474353777556048,
"loss": 1.0563,
"step": 32400
},
{
"epoch": 0.25,
"learning_rate": 0.00017470456175360918,
"loss": 0.782,
"step": 32450
},
{
"epoch": 0.25,
"learning_rate": 0.0001746655857316579,
"loss": 0.9609,
"step": 32500
},
{
"epoch": 0.25,
"learning_rate": 0.0001746266097097066,
"loss": 0.9329,
"step": 32550
},
{
"epoch": 0.25,
"learning_rate": 0.0001745876336877553,
"loss": 0.7019,
"step": 32600
},
{
"epoch": 0.25,
"learning_rate": 0.00017454865766580401,
"loss": 0.9395,
"step": 32650
},
{
"epoch": 0.25,
"learning_rate": 0.0001745096816438527,
"loss": 0.7248,
"step": 32700
},
{
"epoch": 0.26,
"learning_rate": 0.00017447070562190143,
"loss": 0.7116,
"step": 32750
},
{
"epoch": 0.26,
"learning_rate": 0.0001744317295999501,
"loss": 0.92,
"step": 32800
},
{
"epoch": 0.26,
"learning_rate": 0.00017439275357799882,
"loss": 0.8105,
"step": 32850
},
{
"epoch": 0.26,
"learning_rate": 0.00017435377755604755,
"loss": 0.8492,
"step": 32900
},
{
"epoch": 0.26,
"learning_rate": 0.00017431480153409624,
"loss": 0.8305,
"step": 32950
},
{
"epoch": 0.26,
"learning_rate": 0.00017427582551214494,
"loss": 0.882,
"step": 33000
},
{
"epoch": 0.36,
"learning_rate": 0.00016390108569805799,
"loss": 40.3284,
"step": 33050
},
{
"epoch": 0.36,
"learning_rate": 0.00016384647311968892,
"loss": 41.3929,
"step": 33100
},
{
"epoch": 0.36,
"learning_rate": 0.0001637918605413199,
"loss": 40.5771,
"step": 33150
},
{
"epoch": 0.36,
"learning_rate": 0.00016373724796295083,
"loss": 40.1587,
"step": 33200
},
{
"epoch": 0.36,
"learning_rate": 0.0001636826353845818,
"loss": 38.4849,
"step": 33250
},
{
"epoch": 0.36,
"learning_rate": 0.00016362802280621276,
"loss": 40.8953,
"step": 33300
},
{
"epoch": 0.36,
"learning_rate": 0.0001635734102278437,
"loss": 41.1837,
"step": 33350
},
{
"epoch": 0.36,
"learning_rate": 0.00016351879764947463,
"loss": 41.4111,
"step": 33400
},
{
"epoch": 0.37,
"learning_rate": 0.0001634641850711056,
"loss": 39.7779,
"step": 33450
},
{
"epoch": 0.37,
"learning_rate": 0.00016340957249273653,
"loss": 39.6051,
"step": 33500
},
{
"epoch": 0.37,
"learning_rate": 0.0001633549599143675,
"loss": 39.1987,
"step": 33550
},
{
"epoch": 0.37,
"learning_rate": 0.00016330034733599844,
"loss": 36.4834,
"step": 33600
},
{
"epoch": 0.37,
"learning_rate": 0.0001632457347576294,
"loss": 38.9442,
"step": 33650
},
{
"epoch": 0.37,
"learning_rate": 0.00016319112217926034,
"loss": 38.7699,
"step": 33700
},
{
"epoch": 0.37,
"learning_rate": 0.00016313650960089128,
"loss": 38.1662,
"step": 33750
},
{
"epoch": 0.37,
"learning_rate": 0.00016308189702252224,
"loss": 38.3107,
"step": 33800
},
{
"epoch": 0.37,
"learning_rate": 0.00016302728444415318,
"loss": 37.137,
"step": 33850
},
{
"epoch": 0.37,
"learning_rate": 0.00016297267186578414,
"loss": 39.8413,
"step": 33900
},
{
"epoch": 0.37,
"learning_rate": 0.00016291805928741508,
"loss": 37.834,
"step": 33950
},
{
"epoch": 0.37,
"learning_rate": 0.00016286344670904602,
"loss": 38.752,
"step": 34000
},
{
"epoch": 0.37,
"learning_rate": 0.00016280883413067698,
"loss": 38.9749,
"step": 34050
},
{
"epoch": 0.37,
"learning_rate": 0.00016275422155230792,
"loss": 37.0203,
"step": 34100
},
{
"epoch": 0.37,
"learning_rate": 0.00016269960897393888,
"loss": 37.8575,
"step": 34150
},
{
"epoch": 0.37,
"learning_rate": 0.00016264499639556982,
"loss": 36.1197,
"step": 34200
},
{
"epoch": 0.37,
"learning_rate": 0.0001625903838172008,
"loss": 38.9567,
"step": 34250
},
{
"epoch": 0.37,
"learning_rate": 0.00016253577123883172,
"loss": 36.921,
"step": 34300
},
{
"epoch": 0.38,
"learning_rate": 0.0001624811586604627,
"loss": 37.7047,
"step": 34350
},
{
"epoch": 0.38,
"learning_rate": 0.00016242654608209365,
"loss": 37.8749,
"step": 34400
},
{
"epoch": 0.38,
"learning_rate": 0.0001623719335037246,
"loss": 36.0547,
"step": 34450
},
{
"epoch": 0.38,
"learning_rate": 0.00016231732092535553,
"loss": 35.7079,
"step": 34500
},
{
"epoch": 0.38,
"learning_rate": 0.0001622627083469865,
"loss": 35.5162,
"step": 34550
},
{
"epoch": 0.38,
"learning_rate": 0.00016220809576861743,
"loss": 35.6316,
"step": 34600
},
{
"epoch": 0.38,
"learning_rate": 0.0001621534831902484,
"loss": 37.1081,
"step": 34650
},
{
"epoch": 0.38,
"learning_rate": 0.00016209887061187933,
"loss": 35.4266,
"step": 34700
},
{
"epoch": 0.38,
"learning_rate": 0.0001620442580335103,
"loss": 35.8718,
"step": 34750
},
{
"epoch": 0.38,
"learning_rate": 0.00016198964545514124,
"loss": 34.2143,
"step": 34800
},
{
"epoch": 0.38,
"learning_rate": 0.00016193503287677217,
"loss": 34.0882,
"step": 34850
},
{
"epoch": 0.38,
"learning_rate": 0.00016188042029840314,
"loss": 32.8758,
"step": 34900
},
{
"epoch": 0.38,
"learning_rate": 0.00016182580772003408,
"loss": 32.0339,
"step": 34950
},
{
"epoch": 0.38,
"learning_rate": 0.00016177119514166504,
"loss": 31.4164,
"step": 35000
},
{
"epoch": 0.38,
"learning_rate": 0.00016171658256329598,
"loss": 31.8205,
"step": 35050
},
{
"epoch": 0.38,
"learning_rate": 0.00016166196998492694,
"loss": 32.6587,
"step": 35100
},
{
"epoch": 0.38,
"learning_rate": 0.00016160735740655788,
"loss": 31.8695,
"step": 35150
},
{
"epoch": 0.38,
"learning_rate": 0.00016155274482818882,
"loss": 31.0461,
"step": 35200
},
{
"epoch": 0.39,
"learning_rate": 0.00016149813224981978,
"loss": 30.1198,
"step": 35250
},
{
"epoch": 0.39,
"learning_rate": 0.00016144351967145072,
"loss": 28.9032,
"step": 35300
},
{
"epoch": 0.39,
"learning_rate": 0.00016138890709308169,
"loss": 30.3631,
"step": 35350
},
{
"epoch": 0.39,
"learning_rate": 0.00016133429451471265,
"loss": 29.2617,
"step": 35400
},
{
"epoch": 0.39,
"learning_rate": 0.0001612796819363436,
"loss": 28.4782,
"step": 35450
},
{
"epoch": 0.39,
"learning_rate": 0.00016122506935797455,
"loss": 28.6378,
"step": 35500
},
{
"epoch": 0.39,
"learning_rate": 0.0001611704567796055,
"loss": 28.3341,
"step": 35550
},
{
"epoch": 0.39,
"learning_rate": 0.00016111584420123645,
"loss": 27.6153,
"step": 35600
},
{
"epoch": 0.39,
"learning_rate": 0.0001610612316228674,
"loss": 26.5044,
"step": 35650
},
{
"epoch": 0.39,
"learning_rate": 0.00016100661904449833,
"loss": 26.8876,
"step": 35700
},
{
"epoch": 0.39,
"learning_rate": 0.0001609520064661293,
"loss": 26.9291,
"step": 35750
},
{
"epoch": 0.39,
"learning_rate": 0.00016089739388776023,
"loss": 24.024,
"step": 35800
},
{
"epoch": 0.39,
"learning_rate": 0.0001608427813093912,
"loss": 24.5533,
"step": 35850
},
{
"epoch": 0.39,
"learning_rate": 0.00016078816873102213,
"loss": 24.6948,
"step": 35900
},
{
"epoch": 0.39,
"learning_rate": 0.0001607335561526531,
"loss": 22.2483,
"step": 35950
},
{
"epoch": 0.39,
"learning_rate": 0.00016067894357428404,
"loss": 21.7253,
"step": 36000
},
{
"epoch": 0.39,
"learning_rate": 0.00016062433099591497,
"loss": 20.7581,
"step": 36050
},
{
"epoch": 0.39,
"learning_rate": 0.00016056971841754594,
"loss": 18.6484,
"step": 36100
},
{
"epoch": 0.39,
"learning_rate": 0.00016051510583917688,
"loss": 19.3484,
"step": 36150
},
{
"epoch": 0.4,
"learning_rate": 0.00016046049326080784,
"loss": 15.8305,
"step": 36200
},
{
"epoch": 0.4,
"learning_rate": 0.00016040588068243878,
"loss": 16.644,
"step": 36250
},
{
"epoch": 0.4,
"learning_rate": 0.00016035126810406972,
"loss": 16.1415,
"step": 36300
},
{
"epoch": 0.4,
"learning_rate": 0.00016029665552570068,
"loss": 16.2331,
"step": 36350
},
{
"epoch": 0.4,
"learning_rate": 0.00016024204294733162,
"loss": 13.7222,
"step": 36400
},
{
"epoch": 0.4,
"learning_rate": 0.00016018743036896258,
"loss": 13.1968,
"step": 36450
},
{
"epoch": 0.4,
"learning_rate": 0.00016013281779059355,
"loss": 13.7183,
"step": 36500
},
{
"epoch": 0.4,
"learning_rate": 0.00016007820521222449,
"loss": 13.6719,
"step": 36550
},
{
"epoch": 0.4,
"learning_rate": 0.00016002359263385545,
"loss": 12.565,
"step": 36600
},
{
"epoch": 0.4,
"learning_rate": 0.0001599689800554864,
"loss": 11.7014,
"step": 36650
},
{
"epoch": 0.4,
"learning_rate": 0.00015991436747711735,
"loss": 11.9391,
"step": 36700
},
{
"epoch": 0.4,
"learning_rate": 0.0001598597548987483,
"loss": 10.8187,
"step": 36750
},
{
"epoch": 0.4,
"learning_rate": 0.00015980514232037923,
"loss": 9.9151,
"step": 36800
},
{
"epoch": 0.4,
"learning_rate": 0.0001597505297420102,
"loss": 8.8924,
"step": 36850
},
{
"epoch": 0.4,
"learning_rate": 0.00015969591716364113,
"loss": 7.7144,
"step": 36900
},
{
"epoch": 0.4,
"learning_rate": 0.0001596413045852721,
"loss": 6.7915,
"step": 36950
},
{
"epoch": 0.4,
"learning_rate": 0.00015958669200690303,
"loss": 6.1585,
"step": 37000
},
{
"epoch": 0.4,
"learning_rate": 0.000159532079428534,
"loss": 6.4101,
"step": 37050
},
{
"epoch": 0.41,
"learning_rate": 0.00015947746685016494,
"loss": 4.6158,
"step": 37100
},
{
"epoch": 0.41,
"learning_rate": 0.00015942285427179587,
"loss": 4.76,
"step": 37150
},
{
"epoch": 0.41,
"learning_rate": 0.00015936824169342684,
"loss": 4.0994,
"step": 37200
},
{
"epoch": 0.41,
"learning_rate": 0.00015931362911505778,
"loss": 4.7396,
"step": 37250
},
{
"epoch": 0.41,
"learning_rate": 0.00015925901653668874,
"loss": 3.6542,
"step": 37300
},
{
"epoch": 0.41,
"learning_rate": 0.00015920440395831968,
"loss": 3.4333,
"step": 37350
},
{
"epoch": 0.41,
"learning_rate": 0.00015914979137995064,
"loss": 4.575,
"step": 37400
},
{
"epoch": 0.41,
"learning_rate": 0.00015909517880158158,
"loss": 3.3926,
"step": 37450
},
{
"epoch": 0.41,
"learning_rate": 0.00015904056622321252,
"loss": 3.3063,
"step": 37500
},
{
"epoch": 0.41,
"learning_rate": 0.00015898595364484348,
"loss": 2.9068,
"step": 37550
},
{
"epoch": 0.41,
"learning_rate": 0.00015893134106647445,
"loss": 2.9475,
"step": 37600
},
{
"epoch": 0.41,
"learning_rate": 0.00015887672848810538,
"loss": 2.94,
"step": 37650
},
{
"epoch": 0.41,
"learning_rate": 0.00015882211590973635,
"loss": 3.2924,
"step": 37700
},
{
"epoch": 0.41,
"learning_rate": 0.0001587675033313673,
"loss": 3.4012,
"step": 37750
},
{
"epoch": 0.41,
"learning_rate": 0.00015871289075299825,
"loss": 2.8093,
"step": 37800
},
{
"epoch": 0.41,
"learning_rate": 0.0001586582781746292,
"loss": 3.115,
"step": 37850
},
{
"epoch": 0.41,
"learning_rate": 0.00015860366559626015,
"loss": 2.4926,
"step": 37900
},
{
"epoch": 0.41,
"learning_rate": 0.0001585490530178911,
"loss": 2.3319,
"step": 37950
},
{
"epoch": 0.42,
"learning_rate": 0.00015849444043952203,
"loss": 2.4095,
"step": 38000
},
{
"epoch": 0.42,
"learning_rate": 0.000158439827861153,
"loss": 2.563,
"step": 38050
},
{
"epoch": 0.42,
"learning_rate": 0.00015838521528278393,
"loss": 2.5545,
"step": 38100
},
{
"epoch": 0.42,
"learning_rate": 0.0001583306027044149,
"loss": 2.0663,
"step": 38150
},
{
"epoch": 0.42,
"learning_rate": 0.00015827599012604583,
"loss": 2.0732,
"step": 38200
},
{
"epoch": 0.42,
"learning_rate": 0.0001582213775476768,
"loss": 2.9127,
"step": 38250
},
{
"epoch": 0.42,
"learning_rate": 0.00015816676496930774,
"loss": 2.2365,
"step": 38300
},
{
"epoch": 0.42,
"learning_rate": 0.00015811215239093867,
"loss": 3.9376,
"step": 38350
},
{
"epoch": 0.42,
"learning_rate": 0.00015805753981256964,
"loss": 2.0433,
"step": 38400
},
{
"epoch": 0.42,
"learning_rate": 0.00015800292723420058,
"loss": 2.1487,
"step": 38450
},
{
"epoch": 0.42,
"learning_rate": 0.00015794831465583154,
"loss": 1.8283,
"step": 38500
},
{
"epoch": 0.42,
"learning_rate": 0.00015789370207746248,
"loss": 1.5619,
"step": 38550
},
{
"epoch": 0.42,
"learning_rate": 0.00015783908949909342,
"loss": 1.6508,
"step": 38600
},
{
"epoch": 0.42,
"learning_rate": 0.00015778447692072438,
"loss": 1.8076,
"step": 38650
},
{
"epoch": 0.42,
"learning_rate": 0.00015772986434235535,
"loss": 1.5081,
"step": 38700
},
{
"epoch": 0.42,
"learning_rate": 0.0001576752517639863,
"loss": 1.7372,
"step": 38750
},
{
"epoch": 0.42,
"learning_rate": 0.00015762063918561725,
"loss": 1.504,
"step": 38800
},
{
"epoch": 0.42,
"learning_rate": 0.00015756602660724819,
"loss": 1.4685,
"step": 38850
},
{
"epoch": 0.42,
"learning_rate": 0.00015751141402887915,
"loss": 1.366,
"step": 38900
},
{
"epoch": 0.43,
"learning_rate": 0.0001574568014505101,
"loss": 1.3556,
"step": 38950
},
{
"epoch": 0.43,
"learning_rate": 0.00015740218887214105,
"loss": 1.328,
"step": 39000
},
{
"epoch": 0.43,
"learning_rate": 0.000157347576293772,
"loss": 1.672,
"step": 39050
},
{
"epoch": 0.43,
"learning_rate": 0.00015729296371540293,
"loss": 1.2776,
"step": 39100
},
{
"epoch": 0.43,
"learning_rate": 0.0001572383511370339,
"loss": 1.619,
"step": 39150
},
{
"epoch": 0.43,
"learning_rate": 0.00015718373855866483,
"loss": 1.4484,
"step": 39200
},
{
"epoch": 0.43,
"learning_rate": 0.0001571291259802958,
"loss": 1.4561,
"step": 39250
},
{
"epoch": 0.43,
"learning_rate": 0.00015707451340192673,
"loss": 1.5445,
"step": 39300
},
{
"epoch": 0.43,
"learning_rate": 0.0001570199008235577,
"loss": 1.6477,
"step": 39350
},
{
"epoch": 0.43,
"learning_rate": 0.00015696528824518864,
"loss": 1.483,
"step": 39400
},
{
"epoch": 0.43,
"learning_rate": 0.00015691067566681957,
"loss": 1.4913,
"step": 39450
},
{
"epoch": 0.43,
"learning_rate": 0.00015685606308845054,
"loss": 1.2746,
"step": 39500
},
{
"epoch": 0.43,
"learning_rate": 0.00015680145051008148,
"loss": 1.4588,
"step": 39550
},
{
"epoch": 0.43,
"learning_rate": 0.00015674683793171244,
"loss": 1.3793,
"step": 39600
},
{
"epoch": 0.43,
"learning_rate": 0.00015669222535334338,
"loss": 1.4776,
"step": 39650
},
{
"epoch": 0.43,
"learning_rate": 0.00015663761277497434,
"loss": 1.7906,
"step": 39700
},
{
"epoch": 0.43,
"learning_rate": 0.0001565830001966053,
"loss": 1.4083,
"step": 39750
},
{
"epoch": 0.43,
"learning_rate": 0.00015652838761823624,
"loss": 1.5248,
"step": 39800
},
{
"epoch": 0.44,
"learning_rate": 0.0001564737750398672,
"loss": 1.2159,
"step": 39850
},
{
"epoch": 0.44,
"learning_rate": 0.00015641916246149815,
"loss": 1.4073,
"step": 39900
},
{
"epoch": 0.44,
"learning_rate": 0.00015636454988312908,
"loss": 1.2702,
"step": 39950
},
{
"epoch": 0.44,
"learning_rate": 0.00015630993730476005,
"loss": 1.33,
"step": 40000
},
{
"epoch": 0.44,
"learning_rate": 0.000156255324726391,
"loss": 1.4365,
"step": 40050
},
{
"epoch": 0.44,
"learning_rate": 0.00015620071214802195,
"loss": 1.2484,
"step": 40100
},
{
"epoch": 0.44,
"learning_rate": 0.0001561460995696529,
"loss": 1.2985,
"step": 40150
},
{
"epoch": 0.44,
"learning_rate": 0.00015609148699128385,
"loss": 1.3169,
"step": 40200
},
{
"epoch": 0.44,
"learning_rate": 0.0001560368744129148,
"loss": 1.2415,
"step": 40250
},
{
"epoch": 0.44,
"learning_rate": 0.00015598226183454573,
"loss": 1.0357,
"step": 40300
},
{
"epoch": 0.44,
"learning_rate": 0.0001559276492561767,
"loss": 1.3613,
"step": 40350
},
{
"epoch": 0.44,
"learning_rate": 0.00015587303667780763,
"loss": 1.1524,
"step": 40400
},
{
"epoch": 0.44,
"learning_rate": 0.0001558184240994386,
"loss": 1.4132,
"step": 40450
},
{
"epoch": 0.44,
"learning_rate": 0.00015576381152106953,
"loss": 1.4276,
"step": 40500
}
],
"logging_steps": 50,
"max_steps": 183108,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 2.90367192517632e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}