llama-68m / trainer_state.json
JackFram's picture
Upload folder using huggingface_hub
3f24d44
raw
history blame
114 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9992908258655148,
"global_step": 93000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9946274686781424e-05,
"loss": 7.7636,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 4.989254937356285e-05,
"loss": 6.4663,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 4.9838824060344274e-05,
"loss": 6.0292,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 4.9785098747125696e-05,
"loss": 5.7747,
"step": 400
},
{
"epoch": 0.01,
"learning_rate": 4.9731373433907124e-05,
"loss": 5.592,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 4.9677648120688546e-05,
"loss": 5.4925,
"step": 600
},
{
"epoch": 0.01,
"learning_rate": 4.962392280746997e-05,
"loss": 5.3733,
"step": 700
},
{
"epoch": 0.01,
"learning_rate": 4.957019749425139e-05,
"loss": 5.2595,
"step": 800
},
{
"epoch": 0.01,
"learning_rate": 4.951647218103282e-05,
"loss": 5.179,
"step": 900
},
{
"epoch": 0.01,
"learning_rate": 4.946274686781424e-05,
"loss": 5.1054,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.940902155459566e-05,
"loss": 5.053,
"step": 1100
},
{
"epoch": 0.01,
"learning_rate": 4.935529624137709e-05,
"loss": 4.9869,
"step": 1200
},
{
"epoch": 0.01,
"learning_rate": 4.930157092815851e-05,
"loss": 4.9311,
"step": 1300
},
{
"epoch": 0.02,
"learning_rate": 4.924784561493994e-05,
"loss": 4.859,
"step": 1400
},
{
"epoch": 0.02,
"learning_rate": 4.919412030172136e-05,
"loss": 4.8506,
"step": 1500
},
{
"epoch": 0.02,
"learning_rate": 4.914039498850279e-05,
"loss": 4.8073,
"step": 1600
},
{
"epoch": 0.02,
"learning_rate": 4.908666967528421e-05,
"loss": 4.762,
"step": 1700
},
{
"epoch": 0.02,
"learning_rate": 4.903294436206564e-05,
"loss": 4.7197,
"step": 1800
},
{
"epoch": 0.02,
"learning_rate": 4.897921904884706e-05,
"loss": 4.6881,
"step": 1900
},
{
"epoch": 0.02,
"learning_rate": 4.892549373562848e-05,
"loss": 4.6526,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 4.8871768422409904e-05,
"loss": 4.6091,
"step": 2100
},
{
"epoch": 0.02,
"learning_rate": 4.881804310919133e-05,
"loss": 4.5926,
"step": 2200
},
{
"epoch": 0.02,
"learning_rate": 4.8764317795972754e-05,
"loss": 4.557,
"step": 2300
},
{
"epoch": 0.03,
"learning_rate": 4.8710592482754176e-05,
"loss": 4.5554,
"step": 2400
},
{
"epoch": 0.03,
"learning_rate": 4.8656867169535604e-05,
"loss": 4.5275,
"step": 2500
},
{
"epoch": 0.03,
"learning_rate": 4.8603141856317026e-05,
"loss": 4.5023,
"step": 2600
},
{
"epoch": 0.03,
"learning_rate": 4.854941654309845e-05,
"loss": 4.471,
"step": 2700
},
{
"epoch": 0.03,
"learning_rate": 4.8495691229879876e-05,
"loss": 4.4634,
"step": 2800
},
{
"epoch": 0.03,
"learning_rate": 4.84419659166613e-05,
"loss": 4.4476,
"step": 2900
},
{
"epoch": 0.03,
"learning_rate": 4.838824060344272e-05,
"loss": 4.425,
"step": 3000
},
{
"epoch": 0.03,
"learning_rate": 4.833451529022414e-05,
"loss": 4.4013,
"step": 3100
},
{
"epoch": 0.03,
"learning_rate": 4.828078997700557e-05,
"loss": 4.3986,
"step": 3200
},
{
"epoch": 0.04,
"learning_rate": 4.822706466378699e-05,
"loss": 4.3495,
"step": 3300
},
{
"epoch": 0.04,
"learning_rate": 4.817333935056841e-05,
"loss": 4.3396,
"step": 3400
},
{
"epoch": 0.04,
"learning_rate": 4.811961403734984e-05,
"loss": 4.3402,
"step": 3500
},
{
"epoch": 0.04,
"learning_rate": 4.806588872413126e-05,
"loss": 4.2908,
"step": 3600
},
{
"epoch": 0.04,
"learning_rate": 4.8012163410912684e-05,
"loss": 4.2989,
"step": 3700
},
{
"epoch": 0.04,
"learning_rate": 4.795843809769411e-05,
"loss": 4.2971,
"step": 3800
},
{
"epoch": 0.04,
"learning_rate": 4.7904712784475534e-05,
"loss": 4.2611,
"step": 3900
},
{
"epoch": 0.04,
"learning_rate": 4.7850987471256956e-05,
"loss": 4.2654,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 4.7797262158038384e-05,
"loss": 4.2545,
"step": 4100
},
{
"epoch": 0.05,
"learning_rate": 4.7743536844819806e-05,
"loss": 4.2243,
"step": 4200
},
{
"epoch": 0.05,
"learning_rate": 4.768981153160123e-05,
"loss": 4.2148,
"step": 4300
},
{
"epoch": 0.05,
"learning_rate": 4.7636086218382656e-05,
"loss": 4.2041,
"step": 4400
},
{
"epoch": 0.05,
"learning_rate": 4.758236090516408e-05,
"loss": 4.1845,
"step": 4500
},
{
"epoch": 0.05,
"learning_rate": 4.7528635591945506e-05,
"loss": 4.1751,
"step": 4600
},
{
"epoch": 0.05,
"learning_rate": 4.747491027872693e-05,
"loss": 4.1826,
"step": 4700
},
{
"epoch": 0.05,
"learning_rate": 4.7421184965508356e-05,
"loss": 4.1715,
"step": 4800
},
{
"epoch": 0.05,
"learning_rate": 4.736745965228978e-05,
"loss": 4.1371,
"step": 4900
},
{
"epoch": 0.05,
"learning_rate": 4.73137343390712e-05,
"loss": 4.1361,
"step": 5000
},
{
"epoch": 0.05,
"learning_rate": 4.726000902585263e-05,
"loss": 4.1224,
"step": 5100
},
{
"epoch": 0.06,
"learning_rate": 4.720628371263405e-05,
"loss": 4.1226,
"step": 5200
},
{
"epoch": 0.06,
"learning_rate": 4.715255839941547e-05,
"loss": 4.0926,
"step": 5300
},
{
"epoch": 0.06,
"learning_rate": 4.70988330861969e-05,
"loss": 4.0933,
"step": 5400
},
{
"epoch": 0.06,
"learning_rate": 4.704510777297832e-05,
"loss": 4.0768,
"step": 5500
},
{
"epoch": 0.06,
"learning_rate": 4.699138245975974e-05,
"loss": 4.0659,
"step": 5600
},
{
"epoch": 0.06,
"learning_rate": 4.6937657146541164e-05,
"loss": 4.0727,
"step": 5700
},
{
"epoch": 0.06,
"learning_rate": 4.688393183332259e-05,
"loss": 4.0501,
"step": 5800
},
{
"epoch": 0.06,
"learning_rate": 4.6830206520104014e-05,
"loss": 4.0401,
"step": 5900
},
{
"epoch": 0.06,
"learning_rate": 4.6776481206885436e-05,
"loss": 4.0494,
"step": 6000
},
{
"epoch": 0.07,
"learning_rate": 4.6722755893666864e-05,
"loss": 4.012,
"step": 6100
},
{
"epoch": 0.07,
"learning_rate": 4.6669030580448286e-05,
"loss": 4.0165,
"step": 6200
},
{
"epoch": 0.07,
"learning_rate": 4.661530526722971e-05,
"loss": 4.0203,
"step": 6300
},
{
"epoch": 0.07,
"learning_rate": 4.6561579954011136e-05,
"loss": 4.0026,
"step": 6400
},
{
"epoch": 0.07,
"learning_rate": 4.650785464079256e-05,
"loss": 4.0005,
"step": 6500
},
{
"epoch": 0.07,
"learning_rate": 4.645412932757398e-05,
"loss": 3.9956,
"step": 6600
},
{
"epoch": 0.07,
"learning_rate": 4.640040401435541e-05,
"loss": 3.9539,
"step": 6700
},
{
"epoch": 0.07,
"learning_rate": 4.634667870113683e-05,
"loss": 3.9839,
"step": 6800
},
{
"epoch": 0.07,
"learning_rate": 4.629295338791825e-05,
"loss": 3.9575,
"step": 6900
},
{
"epoch": 0.08,
"learning_rate": 4.623922807469967e-05,
"loss": 3.9549,
"step": 7000
},
{
"epoch": 0.08,
"learning_rate": 4.61855027614811e-05,
"loss": 3.9721,
"step": 7100
},
{
"epoch": 0.08,
"learning_rate": 4.613177744826252e-05,
"loss": 3.9359,
"step": 7200
},
{
"epoch": 0.08,
"learning_rate": 4.607805213504395e-05,
"loss": 3.9508,
"step": 7300
},
{
"epoch": 0.08,
"learning_rate": 4.602432682182537e-05,
"loss": 3.936,
"step": 7400
},
{
"epoch": 0.08,
"learning_rate": 4.59706015086068e-05,
"loss": 3.9156,
"step": 7500
},
{
"epoch": 0.08,
"learning_rate": 4.591687619538822e-05,
"loss": 3.8848,
"step": 7600
},
{
"epoch": 0.08,
"learning_rate": 4.586315088216965e-05,
"loss": 3.9082,
"step": 7700
},
{
"epoch": 0.08,
"learning_rate": 4.580942556895107e-05,
"loss": 3.8896,
"step": 7800
},
{
"epoch": 0.08,
"learning_rate": 4.5755700255732494e-05,
"loss": 3.9116,
"step": 7900
},
{
"epoch": 0.09,
"learning_rate": 4.5701974942513916e-05,
"loss": 3.8913,
"step": 8000
},
{
"epoch": 0.09,
"learning_rate": 4.5648249629295344e-05,
"loss": 3.8855,
"step": 8100
},
{
"epoch": 0.09,
"learning_rate": 4.5594524316076766e-05,
"loss": 3.8861,
"step": 8200
},
{
"epoch": 0.09,
"learning_rate": 4.554079900285819e-05,
"loss": 3.8898,
"step": 8300
},
{
"epoch": 0.09,
"learning_rate": 4.5487073689639616e-05,
"loss": 3.8646,
"step": 8400
},
{
"epoch": 0.09,
"learning_rate": 4.543334837642104e-05,
"loss": 3.8709,
"step": 8500
},
{
"epoch": 0.09,
"learning_rate": 4.537962306320246e-05,
"loss": 3.8733,
"step": 8600
},
{
"epoch": 0.09,
"learning_rate": 4.532589774998389e-05,
"loss": 3.8561,
"step": 8700
},
{
"epoch": 0.09,
"learning_rate": 4.527217243676531e-05,
"loss": 3.8441,
"step": 8800
},
{
"epoch": 0.1,
"learning_rate": 4.521844712354673e-05,
"loss": 3.8287,
"step": 8900
},
{
"epoch": 0.1,
"learning_rate": 4.516472181032816e-05,
"loss": 3.8163,
"step": 9000
},
{
"epoch": 0.1,
"learning_rate": 4.511099649710958e-05,
"loss": 3.8502,
"step": 9100
},
{
"epoch": 0.1,
"learning_rate": 4.5057271183891e-05,
"loss": 3.8384,
"step": 9200
},
{
"epoch": 0.1,
"learning_rate": 4.5003545870672424e-05,
"loss": 3.8434,
"step": 9300
},
{
"epoch": 0.1,
"learning_rate": 4.494982055745385e-05,
"loss": 3.8016,
"step": 9400
},
{
"epoch": 0.1,
"learning_rate": 4.4896095244235274e-05,
"loss": 3.8089,
"step": 9500
},
{
"epoch": 0.1,
"learning_rate": 4.4842369931016696e-05,
"loss": 3.7775,
"step": 9600
},
{
"epoch": 0.1,
"learning_rate": 4.4788644617798124e-05,
"loss": 3.8009,
"step": 9700
},
{
"epoch": 0.11,
"learning_rate": 4.4734919304579546e-05,
"loss": 3.7925,
"step": 9800
},
{
"epoch": 0.11,
"learning_rate": 4.468119399136097e-05,
"loss": 3.781,
"step": 9900
},
{
"epoch": 0.11,
"learning_rate": 4.4627468678142396e-05,
"loss": 3.7895,
"step": 10000
},
{
"epoch": 0.11,
"learning_rate": 4.457374336492382e-05,
"loss": 3.7623,
"step": 10100
},
{
"epoch": 0.11,
"learning_rate": 4.452001805170524e-05,
"loss": 3.771,
"step": 10200
},
{
"epoch": 0.11,
"learning_rate": 4.446629273848667e-05,
"loss": 3.777,
"step": 10300
},
{
"epoch": 0.11,
"learning_rate": 4.441256742526809e-05,
"loss": 3.7779,
"step": 10400
},
{
"epoch": 0.11,
"learning_rate": 4.435884211204952e-05,
"loss": 3.7824,
"step": 10500
},
{
"epoch": 0.11,
"learning_rate": 4.430511679883094e-05,
"loss": 3.7539,
"step": 10600
},
{
"epoch": 0.11,
"learning_rate": 4.425139148561237e-05,
"loss": 3.7346,
"step": 10700
},
{
"epoch": 0.12,
"learning_rate": 4.419766617239379e-05,
"loss": 3.7459,
"step": 10800
},
{
"epoch": 0.12,
"learning_rate": 4.414394085917521e-05,
"loss": 3.7569,
"step": 10900
},
{
"epoch": 0.12,
"learning_rate": 4.409021554595664e-05,
"loss": 3.7202,
"step": 11000
},
{
"epoch": 0.12,
"learning_rate": 4.403649023273806e-05,
"loss": 3.7187,
"step": 11100
},
{
"epoch": 0.12,
"learning_rate": 4.398276491951948e-05,
"loss": 3.7233,
"step": 11200
},
{
"epoch": 0.12,
"learning_rate": 4.392903960630091e-05,
"loss": 3.7294,
"step": 11300
},
{
"epoch": 0.12,
"learning_rate": 4.387531429308233e-05,
"loss": 3.7285,
"step": 11400
},
{
"epoch": 0.12,
"learning_rate": 4.3821588979863754e-05,
"loss": 3.7293,
"step": 11500
},
{
"epoch": 0.12,
"learning_rate": 4.376786366664518e-05,
"loss": 3.7306,
"step": 11600
},
{
"epoch": 0.13,
"learning_rate": 4.3714138353426604e-05,
"loss": 3.6982,
"step": 11700
},
{
"epoch": 0.13,
"learning_rate": 4.3660413040208026e-05,
"loss": 3.7008,
"step": 11800
},
{
"epoch": 0.13,
"learning_rate": 4.360668772698945e-05,
"loss": 3.7103,
"step": 11900
},
{
"epoch": 0.13,
"learning_rate": 4.3552962413770876e-05,
"loss": 3.7016,
"step": 12000
},
{
"epoch": 0.13,
"learning_rate": 4.34992371005523e-05,
"loss": 3.7023,
"step": 12100
},
{
"epoch": 0.13,
"learning_rate": 4.344551178733372e-05,
"loss": 3.6621,
"step": 12200
},
{
"epoch": 0.13,
"learning_rate": 4.339178647411515e-05,
"loss": 3.6578,
"step": 12300
},
{
"epoch": 0.13,
"learning_rate": 4.333806116089657e-05,
"loss": 3.6704,
"step": 12400
},
{
"epoch": 0.13,
"learning_rate": 4.328433584767799e-05,
"loss": 3.6973,
"step": 12500
},
{
"epoch": 0.14,
"learning_rate": 4.323061053445942e-05,
"loss": 3.6702,
"step": 12600
},
{
"epoch": 0.14,
"learning_rate": 4.317688522124084e-05,
"loss": 3.6582,
"step": 12700
},
{
"epoch": 0.14,
"learning_rate": 4.312315990802226e-05,
"loss": 3.6654,
"step": 12800
},
{
"epoch": 0.14,
"learning_rate": 4.3069434594803684e-05,
"loss": 3.6911,
"step": 12900
},
{
"epoch": 0.14,
"learning_rate": 4.301570928158511e-05,
"loss": 3.6679,
"step": 13000
},
{
"epoch": 0.14,
"learning_rate": 4.2961983968366534e-05,
"loss": 3.6774,
"step": 13100
},
{
"epoch": 0.14,
"learning_rate": 4.290825865514796e-05,
"loss": 3.6684,
"step": 13200
},
{
"epoch": 0.14,
"learning_rate": 4.2854533341929384e-05,
"loss": 3.6527,
"step": 13300
},
{
"epoch": 0.14,
"learning_rate": 4.280080802871081e-05,
"loss": 3.6351,
"step": 13400
},
{
"epoch": 0.15,
"learning_rate": 4.2747082715492234e-05,
"loss": 3.6591,
"step": 13500
},
{
"epoch": 0.15,
"learning_rate": 4.269335740227366e-05,
"loss": 3.6161,
"step": 13600
},
{
"epoch": 0.15,
"learning_rate": 4.2639632089055084e-05,
"loss": 3.649,
"step": 13700
},
{
"epoch": 0.15,
"learning_rate": 4.2585906775836506e-05,
"loss": 3.6286,
"step": 13800
},
{
"epoch": 0.15,
"learning_rate": 4.2532181462617934e-05,
"loss": 3.6198,
"step": 13900
},
{
"epoch": 0.15,
"learning_rate": 4.2478456149399356e-05,
"loss": 3.6225,
"step": 14000
},
{
"epoch": 0.15,
"learning_rate": 4.242473083618078e-05,
"loss": 3.6132,
"step": 14100
},
{
"epoch": 0.15,
"learning_rate": 4.23710055229622e-05,
"loss": 3.6314,
"step": 14200
},
{
"epoch": 0.15,
"learning_rate": 4.231728020974363e-05,
"loss": 3.6117,
"step": 14300
},
{
"epoch": 0.15,
"learning_rate": 4.226355489652505e-05,
"loss": 3.6054,
"step": 14400
},
{
"epoch": 0.16,
"learning_rate": 4.220982958330647e-05,
"loss": 3.6041,
"step": 14500
},
{
"epoch": 0.16,
"learning_rate": 4.21561042700879e-05,
"loss": 3.617,
"step": 14600
},
{
"epoch": 0.16,
"learning_rate": 4.210237895686932e-05,
"loss": 3.6008,
"step": 14700
},
{
"epoch": 0.16,
"learning_rate": 4.204865364365074e-05,
"loss": 3.6203,
"step": 14800
},
{
"epoch": 0.16,
"learning_rate": 4.199492833043217e-05,
"loss": 3.6015,
"step": 14900
},
{
"epoch": 0.16,
"learning_rate": 4.194120301721359e-05,
"loss": 3.6095,
"step": 15000
},
{
"epoch": 0.16,
"learning_rate": 4.1887477703995014e-05,
"loss": 3.5943,
"step": 15100
},
{
"epoch": 0.16,
"learning_rate": 4.183375239077644e-05,
"loss": 3.5944,
"step": 15200
},
{
"epoch": 0.16,
"learning_rate": 4.1780027077557864e-05,
"loss": 3.6065,
"step": 15300
},
{
"epoch": 0.17,
"learning_rate": 4.1726301764339286e-05,
"loss": 3.5875,
"step": 15400
},
{
"epoch": 0.17,
"learning_rate": 4.167257645112071e-05,
"loss": 3.5759,
"step": 15500
},
{
"epoch": 0.17,
"learning_rate": 4.1618851137902136e-05,
"loss": 3.5856,
"step": 15600
},
{
"epoch": 0.17,
"learning_rate": 4.156512582468356e-05,
"loss": 3.5943,
"step": 15700
},
{
"epoch": 0.17,
"learning_rate": 4.151140051146498e-05,
"loss": 3.5796,
"step": 15800
},
{
"epoch": 0.17,
"learning_rate": 4.145767519824641e-05,
"loss": 3.5752,
"step": 15900
},
{
"epoch": 0.17,
"learning_rate": 4.140394988502783e-05,
"loss": 3.5666,
"step": 16000
},
{
"epoch": 0.17,
"learning_rate": 4.135022457180926e-05,
"loss": 3.5624,
"step": 16100
},
{
"epoch": 0.17,
"learning_rate": 4.129649925859068e-05,
"loss": 3.5564,
"step": 16200
},
{
"epoch": 0.18,
"learning_rate": 4.124277394537211e-05,
"loss": 3.5533,
"step": 16300
},
{
"epoch": 0.18,
"learning_rate": 4.118904863215353e-05,
"loss": 3.5688,
"step": 16400
},
{
"epoch": 0.18,
"learning_rate": 4.113532331893496e-05,
"loss": 3.5587,
"step": 16500
},
{
"epoch": 0.18,
"learning_rate": 4.108159800571638e-05,
"loss": 3.5537,
"step": 16600
},
{
"epoch": 0.18,
"learning_rate": 4.10278726924978e-05,
"loss": 3.5594,
"step": 16700
},
{
"epoch": 0.18,
"learning_rate": 4.097414737927922e-05,
"loss": 3.551,
"step": 16800
},
{
"epoch": 0.18,
"learning_rate": 4.092042206606065e-05,
"loss": 3.5696,
"step": 16900
},
{
"epoch": 0.18,
"learning_rate": 4.086669675284207e-05,
"loss": 3.5331,
"step": 17000
},
{
"epoch": 0.18,
"learning_rate": 4.0812971439623494e-05,
"loss": 3.5312,
"step": 17100
},
{
"epoch": 0.18,
"learning_rate": 4.075924612640492e-05,
"loss": 3.5508,
"step": 17200
},
{
"epoch": 0.19,
"learning_rate": 4.0705520813186344e-05,
"loss": 3.5345,
"step": 17300
},
{
"epoch": 0.19,
"learning_rate": 4.0651795499967766e-05,
"loss": 3.5264,
"step": 17400
},
{
"epoch": 0.19,
"learning_rate": 4.0598070186749194e-05,
"loss": 3.5412,
"step": 17500
},
{
"epoch": 0.19,
"learning_rate": 4.0544344873530616e-05,
"loss": 3.5237,
"step": 17600
},
{
"epoch": 0.19,
"learning_rate": 4.049061956031204e-05,
"loss": 3.5538,
"step": 17700
},
{
"epoch": 0.19,
"learning_rate": 4.043689424709346e-05,
"loss": 3.5171,
"step": 17800
},
{
"epoch": 0.19,
"learning_rate": 4.038316893387489e-05,
"loss": 3.525,
"step": 17900
},
{
"epoch": 0.19,
"learning_rate": 4.032944362065631e-05,
"loss": 3.5248,
"step": 18000
},
{
"epoch": 0.19,
"learning_rate": 4.027571830743773e-05,
"loss": 3.5384,
"step": 18100
},
{
"epoch": 0.2,
"learning_rate": 4.022199299421916e-05,
"loss": 3.5208,
"step": 18200
},
{
"epoch": 0.2,
"learning_rate": 4.016826768100058e-05,
"loss": 3.509,
"step": 18300
},
{
"epoch": 0.2,
"learning_rate": 4.0114542367782e-05,
"loss": 3.4961,
"step": 18400
},
{
"epoch": 0.2,
"learning_rate": 4.006081705456343e-05,
"loss": 3.5075,
"step": 18500
},
{
"epoch": 0.2,
"learning_rate": 4.000709174134485e-05,
"loss": 3.5087,
"step": 18600
},
{
"epoch": 0.2,
"learning_rate": 3.9953366428126274e-05,
"loss": 3.5195,
"step": 18700
},
{
"epoch": 0.2,
"learning_rate": 3.98996411149077e-05,
"loss": 3.5037,
"step": 18800
},
{
"epoch": 0.2,
"learning_rate": 3.9845915801689124e-05,
"loss": 3.4878,
"step": 18900
},
{
"epoch": 0.2,
"learning_rate": 3.9792190488470546e-05,
"loss": 3.4923,
"step": 19000
},
{
"epoch": 0.21,
"learning_rate": 3.9738465175251974e-05,
"loss": 3.4896,
"step": 19100
},
{
"epoch": 0.21,
"learning_rate": 3.9684739862033396e-05,
"loss": 3.4887,
"step": 19200
},
{
"epoch": 0.21,
"learning_rate": 3.9631014548814824e-05,
"loss": 3.4944,
"step": 19300
},
{
"epoch": 0.21,
"learning_rate": 3.9577289235596246e-05,
"loss": 3.4762,
"step": 19400
},
{
"epoch": 0.21,
"learning_rate": 3.9523563922377674e-05,
"loss": 3.4909,
"step": 19500
},
{
"epoch": 0.21,
"learning_rate": 3.9469838609159096e-05,
"loss": 3.4957,
"step": 19600
},
{
"epoch": 0.21,
"learning_rate": 3.941611329594052e-05,
"loss": 3.486,
"step": 19700
},
{
"epoch": 0.21,
"learning_rate": 3.9362387982721946e-05,
"loss": 3.47,
"step": 19800
},
{
"epoch": 0.21,
"learning_rate": 3.930866266950337e-05,
"loss": 3.4813,
"step": 19900
},
{
"epoch": 0.21,
"learning_rate": 3.925493735628479e-05,
"loss": 3.4835,
"step": 20000
},
{
"epoch": 0.22,
"learning_rate": 3.920121204306622e-05,
"loss": 3.4677,
"step": 20100
},
{
"epoch": 0.22,
"learning_rate": 3.914748672984764e-05,
"loss": 3.4771,
"step": 20200
},
{
"epoch": 0.22,
"learning_rate": 3.909376141662906e-05,
"loss": 3.4582,
"step": 20300
},
{
"epoch": 0.22,
"learning_rate": 3.904003610341048e-05,
"loss": 3.4537,
"step": 20400
},
{
"epoch": 0.22,
"learning_rate": 3.898631079019191e-05,
"loss": 3.4821,
"step": 20500
},
{
"epoch": 0.22,
"learning_rate": 3.893258547697333e-05,
"loss": 3.4783,
"step": 20600
},
{
"epoch": 0.22,
"learning_rate": 3.8878860163754754e-05,
"loss": 3.476,
"step": 20700
},
{
"epoch": 0.22,
"learning_rate": 3.882513485053618e-05,
"loss": 3.4741,
"step": 20800
},
{
"epoch": 0.22,
"learning_rate": 3.8771409537317604e-05,
"loss": 3.437,
"step": 20900
},
{
"epoch": 0.23,
"learning_rate": 3.8717684224099026e-05,
"loss": 3.4547,
"step": 21000
},
{
"epoch": 0.23,
"learning_rate": 3.8663958910880454e-05,
"loss": 3.451,
"step": 21100
},
{
"epoch": 0.23,
"learning_rate": 3.8610233597661876e-05,
"loss": 3.4419,
"step": 21200
},
{
"epoch": 0.23,
"learning_rate": 3.85565082844433e-05,
"loss": 3.4506,
"step": 21300
},
{
"epoch": 0.23,
"learning_rate": 3.8502782971224726e-05,
"loss": 3.431,
"step": 21400
},
{
"epoch": 0.23,
"learning_rate": 3.844905765800615e-05,
"loss": 3.4346,
"step": 21500
},
{
"epoch": 0.23,
"learning_rate": 3.839533234478757e-05,
"loss": 3.4519,
"step": 21600
},
{
"epoch": 0.23,
"learning_rate": 3.834160703156899e-05,
"loss": 3.416,
"step": 21700
},
{
"epoch": 0.23,
"learning_rate": 3.828788171835042e-05,
"loss": 3.4357,
"step": 21800
},
{
"epoch": 0.24,
"learning_rate": 3.823415640513184e-05,
"loss": 3.4319,
"step": 21900
},
{
"epoch": 0.24,
"learning_rate": 3.818043109191327e-05,
"loss": 3.4303,
"step": 22000
},
{
"epoch": 0.24,
"learning_rate": 3.812670577869469e-05,
"loss": 3.4253,
"step": 22100
},
{
"epoch": 0.24,
"learning_rate": 3.807298046547612e-05,
"loss": 3.4216,
"step": 22200
},
{
"epoch": 0.24,
"learning_rate": 3.801925515225754e-05,
"loss": 3.4161,
"step": 22300
},
{
"epoch": 0.24,
"learning_rate": 3.796552983903897e-05,
"loss": 3.4169,
"step": 22400
},
{
"epoch": 0.24,
"learning_rate": 3.791180452582039e-05,
"loss": 3.4159,
"step": 22500
},
{
"epoch": 0.24,
"learning_rate": 3.785807921260181e-05,
"loss": 3.4083,
"step": 22600
},
{
"epoch": 0.24,
"learning_rate": 3.7804353899383234e-05,
"loss": 3.4377,
"step": 22700
},
{
"epoch": 0.24,
"learning_rate": 3.775062858616466e-05,
"loss": 3.4212,
"step": 22800
},
{
"epoch": 0.25,
"learning_rate": 3.7696903272946084e-05,
"loss": 3.4037,
"step": 22900
},
{
"epoch": 0.25,
"learning_rate": 3.7643177959727506e-05,
"loss": 3.4036,
"step": 23000
},
{
"epoch": 0.25,
"learning_rate": 3.7589452646508934e-05,
"loss": 3.4091,
"step": 23100
},
{
"epoch": 0.25,
"learning_rate": 3.7535727333290356e-05,
"loss": 3.4189,
"step": 23200
},
{
"epoch": 0.25,
"learning_rate": 3.748200202007178e-05,
"loss": 3.3798,
"step": 23300
},
{
"epoch": 0.25,
"learning_rate": 3.7428276706853206e-05,
"loss": 3.3914,
"step": 23400
},
{
"epoch": 0.25,
"learning_rate": 3.737455139363463e-05,
"loss": 3.3939,
"step": 23500
},
{
"epoch": 0.25,
"learning_rate": 3.732082608041605e-05,
"loss": 3.4001,
"step": 23600
},
{
"epoch": 0.25,
"learning_rate": 3.726710076719748e-05,
"loss": 3.4028,
"step": 23700
},
{
"epoch": 0.26,
"learning_rate": 3.72133754539789e-05,
"loss": 3.379,
"step": 23800
},
{
"epoch": 0.26,
"learning_rate": 3.715965014076032e-05,
"loss": 3.3824,
"step": 23900
},
{
"epoch": 0.26,
"learning_rate": 3.710592482754174e-05,
"loss": 3.392,
"step": 24000
},
{
"epoch": 0.26,
"learning_rate": 3.705219951432317e-05,
"loss": 3.3953,
"step": 24100
},
{
"epoch": 0.26,
"learning_rate": 3.699847420110459e-05,
"loss": 3.3776,
"step": 24200
},
{
"epoch": 0.26,
"learning_rate": 3.6944748887886014e-05,
"loss": 3.3937,
"step": 24300
},
{
"epoch": 0.26,
"learning_rate": 3.689102357466744e-05,
"loss": 3.3954,
"step": 24400
},
{
"epoch": 0.26,
"learning_rate": 3.6837298261448864e-05,
"loss": 3.3905,
"step": 24500
},
{
"epoch": 0.26,
"learning_rate": 3.6783572948230286e-05,
"loss": 3.3676,
"step": 24600
},
{
"epoch": 0.27,
"learning_rate": 3.6729847635011714e-05,
"loss": 3.3678,
"step": 24700
},
{
"epoch": 0.27,
"learning_rate": 3.6676122321793136e-05,
"loss": 3.3548,
"step": 24800
},
{
"epoch": 0.27,
"learning_rate": 3.662239700857456e-05,
"loss": 3.3502,
"step": 24900
},
{
"epoch": 0.27,
"learning_rate": 3.6568671695355986e-05,
"loss": 3.3743,
"step": 25000
},
{
"epoch": 0.27,
"learning_rate": 3.651494638213741e-05,
"loss": 3.3593,
"step": 25100
},
{
"epoch": 0.27,
"learning_rate": 3.6461221068918836e-05,
"loss": 3.3581,
"step": 25200
},
{
"epoch": 0.27,
"learning_rate": 3.640749575570026e-05,
"loss": 3.356,
"step": 25300
},
{
"epoch": 0.27,
"learning_rate": 3.6353770442481686e-05,
"loss": 3.3333,
"step": 25400
},
{
"epoch": 0.27,
"learning_rate": 3.630004512926311e-05,
"loss": 3.3463,
"step": 25500
},
{
"epoch": 0.28,
"learning_rate": 3.624631981604453e-05,
"loss": 3.3273,
"step": 25600
},
{
"epoch": 0.28,
"learning_rate": 3.619259450282596e-05,
"loss": 3.354,
"step": 25700
},
{
"epoch": 0.28,
"learning_rate": 3.613886918960738e-05,
"loss": 3.3302,
"step": 25800
},
{
"epoch": 0.28,
"learning_rate": 3.60851438763888e-05,
"loss": 3.3569,
"step": 25900
},
{
"epoch": 0.28,
"learning_rate": 3.603141856317023e-05,
"loss": 3.339,
"step": 26000
},
{
"epoch": 0.28,
"learning_rate": 3.597769324995165e-05,
"loss": 3.3359,
"step": 26100
},
{
"epoch": 0.28,
"learning_rate": 3.592396793673307e-05,
"loss": 3.3382,
"step": 26200
},
{
"epoch": 0.28,
"learning_rate": 3.58702426235145e-05,
"loss": 3.3257,
"step": 26300
},
{
"epoch": 0.28,
"learning_rate": 3.581651731029592e-05,
"loss": 3.3159,
"step": 26400
},
{
"epoch": 0.28,
"learning_rate": 3.5762791997077344e-05,
"loss": 3.3236,
"step": 26500
},
{
"epoch": 0.29,
"learning_rate": 3.5709066683858766e-05,
"loss": 3.3226,
"step": 26600
},
{
"epoch": 0.29,
"learning_rate": 3.5655341370640194e-05,
"loss": 3.3157,
"step": 26700
},
{
"epoch": 0.29,
"learning_rate": 3.5601616057421616e-05,
"loss": 3.3511,
"step": 26800
},
{
"epoch": 0.29,
"learning_rate": 3.554789074420304e-05,
"loss": 3.3082,
"step": 26900
},
{
"epoch": 0.29,
"learning_rate": 3.5494165430984466e-05,
"loss": 3.3117,
"step": 27000
},
{
"epoch": 0.29,
"learning_rate": 3.544044011776589e-05,
"loss": 3.3165,
"step": 27100
},
{
"epoch": 0.29,
"learning_rate": 3.538671480454731e-05,
"loss": 3.3211,
"step": 27200
},
{
"epoch": 0.29,
"learning_rate": 3.533298949132874e-05,
"loss": 3.3134,
"step": 27300
},
{
"epoch": 0.29,
"learning_rate": 3.527926417811016e-05,
"loss": 3.3036,
"step": 27400
},
{
"epoch": 0.3,
"learning_rate": 3.522553886489158e-05,
"loss": 3.3088,
"step": 27500
},
{
"epoch": 0.3,
"learning_rate": 3.5171813551673e-05,
"loss": 3.2946,
"step": 27600
},
{
"epoch": 0.3,
"learning_rate": 3.511808823845443e-05,
"loss": 3.2971,
"step": 27700
},
{
"epoch": 0.3,
"learning_rate": 3.506436292523585e-05,
"loss": 3.2943,
"step": 27800
},
{
"epoch": 0.3,
"learning_rate": 3.501063761201728e-05,
"loss": 3.2995,
"step": 27900
},
{
"epoch": 0.3,
"learning_rate": 3.49569122987987e-05,
"loss": 3.2877,
"step": 28000
},
{
"epoch": 0.3,
"learning_rate": 3.490318698558013e-05,
"loss": 3.2879,
"step": 28100
},
{
"epoch": 0.3,
"learning_rate": 3.484946167236155e-05,
"loss": 3.2784,
"step": 28200
},
{
"epoch": 0.3,
"learning_rate": 3.479573635914298e-05,
"loss": 3.3089,
"step": 28300
},
{
"epoch": 0.31,
"learning_rate": 3.47420110459244e-05,
"loss": 3.2818,
"step": 28400
},
{
"epoch": 0.31,
"learning_rate": 3.4688285732705824e-05,
"loss": 3.2698,
"step": 28500
},
{
"epoch": 0.31,
"learning_rate": 3.463456041948725e-05,
"loss": 3.2706,
"step": 28600
},
{
"epoch": 0.31,
"learning_rate": 3.4580835106268674e-05,
"loss": 3.2884,
"step": 28700
},
{
"epoch": 0.31,
"learning_rate": 3.4527109793050096e-05,
"loss": 3.2786,
"step": 28800
},
{
"epoch": 0.31,
"learning_rate": 3.447338447983152e-05,
"loss": 3.2662,
"step": 28900
},
{
"epoch": 0.31,
"learning_rate": 3.4419659166612946e-05,
"loss": 3.2616,
"step": 29000
},
{
"epoch": 0.31,
"learning_rate": 3.436593385339437e-05,
"loss": 3.2569,
"step": 29100
},
{
"epoch": 0.31,
"learning_rate": 3.431220854017579e-05,
"loss": 3.2689,
"step": 29200
},
{
"epoch": 0.31,
"learning_rate": 3.425848322695722e-05,
"loss": 3.2591,
"step": 29300
},
{
"epoch": 0.32,
"learning_rate": 3.420475791373864e-05,
"loss": 3.2453,
"step": 29400
},
{
"epoch": 0.32,
"learning_rate": 3.415103260052006e-05,
"loss": 3.2755,
"step": 29500
},
{
"epoch": 0.32,
"learning_rate": 3.409730728730149e-05,
"loss": 3.2599,
"step": 29600
},
{
"epoch": 0.32,
"learning_rate": 3.404358197408291e-05,
"loss": 3.2462,
"step": 29700
},
{
"epoch": 0.32,
"learning_rate": 3.398985666086433e-05,
"loss": 3.2592,
"step": 29800
},
{
"epoch": 0.32,
"learning_rate": 3.393613134764576e-05,
"loss": 3.2619,
"step": 29900
},
{
"epoch": 0.32,
"learning_rate": 3.388240603442718e-05,
"loss": 3.252,
"step": 30000
},
{
"epoch": 0.32,
"learning_rate": 3.3828680721208604e-05,
"loss": 3.2454,
"step": 30100
},
{
"epoch": 0.32,
"learning_rate": 3.3774955407990026e-05,
"loss": 3.2344,
"step": 30200
},
{
"epoch": 0.33,
"learning_rate": 3.3721230094771454e-05,
"loss": 3.2465,
"step": 30300
},
{
"epoch": 0.33,
"learning_rate": 3.3667504781552876e-05,
"loss": 3.2462,
"step": 30400
},
{
"epoch": 0.33,
"learning_rate": 3.36137794683343e-05,
"loss": 3.2342,
"step": 30500
},
{
"epoch": 0.33,
"learning_rate": 3.3560054155115726e-05,
"loss": 3.2186,
"step": 30600
},
{
"epoch": 0.33,
"learning_rate": 3.350632884189715e-05,
"loss": 3.2556,
"step": 30700
},
{
"epoch": 0.33,
"learning_rate": 3.345260352867857e-05,
"loss": 3.2477,
"step": 30800
},
{
"epoch": 0.33,
"learning_rate": 3.339887821546e-05,
"loss": 3.2139,
"step": 30900
},
{
"epoch": 0.33,
"learning_rate": 3.334515290224142e-05,
"loss": 3.2478,
"step": 31000
},
{
"epoch": 0.33,
"learning_rate": 3.329142758902285e-05,
"loss": 3.2423,
"step": 31100
},
{
"epoch": 0.34,
"learning_rate": 3.323770227580427e-05,
"loss": 3.242,
"step": 31200
},
{
"epoch": 0.34,
"learning_rate": 3.31839769625857e-05,
"loss": 3.2452,
"step": 31300
},
{
"epoch": 0.34,
"learning_rate": 3.313025164936712e-05,
"loss": 3.217,
"step": 31400
},
{
"epoch": 0.34,
"learning_rate": 3.307652633614854e-05,
"loss": 3.2212,
"step": 31500
},
{
"epoch": 0.34,
"learning_rate": 3.302280102292997e-05,
"loss": 3.2293,
"step": 31600
},
{
"epoch": 0.34,
"learning_rate": 3.296907570971139e-05,
"loss": 3.2257,
"step": 31700
},
{
"epoch": 0.34,
"learning_rate": 3.291535039649281e-05,
"loss": 3.1933,
"step": 31800
},
{
"epoch": 0.34,
"learning_rate": 3.286162508327424e-05,
"loss": 3.2092,
"step": 31900
},
{
"epoch": 0.34,
"learning_rate": 3.280789977005566e-05,
"loss": 3.1993,
"step": 32000
},
{
"epoch": 0.34,
"learning_rate": 3.2754174456837084e-05,
"loss": 3.2368,
"step": 32100
},
{
"epoch": 0.35,
"learning_rate": 3.270044914361851e-05,
"loss": 3.2216,
"step": 32200
},
{
"epoch": 0.35,
"learning_rate": 3.2646723830399934e-05,
"loss": 3.1913,
"step": 32300
},
{
"epoch": 0.35,
"learning_rate": 3.2592998517181356e-05,
"loss": 3.2121,
"step": 32400
},
{
"epoch": 0.35,
"learning_rate": 3.253927320396278e-05,
"loss": 3.224,
"step": 32500
},
{
"epoch": 0.35,
"learning_rate": 3.2485547890744206e-05,
"loss": 3.2091,
"step": 32600
},
{
"epoch": 0.35,
"learning_rate": 3.243182257752563e-05,
"loss": 3.1823,
"step": 32700
},
{
"epoch": 0.35,
"learning_rate": 3.237809726430705e-05,
"loss": 3.188,
"step": 32800
},
{
"epoch": 0.35,
"learning_rate": 3.232437195108848e-05,
"loss": 3.2111,
"step": 32900
},
{
"epoch": 0.35,
"learning_rate": 3.22706466378699e-05,
"loss": 3.2252,
"step": 33000
},
{
"epoch": 0.36,
"learning_rate": 3.221692132465132e-05,
"loss": 3.1869,
"step": 33100
},
{
"epoch": 0.36,
"learning_rate": 3.216319601143275e-05,
"loss": 3.2025,
"step": 33200
},
{
"epoch": 0.36,
"learning_rate": 3.210947069821417e-05,
"loss": 3.2068,
"step": 33300
},
{
"epoch": 0.36,
"learning_rate": 3.205574538499559e-05,
"loss": 3.1969,
"step": 33400
},
{
"epoch": 0.36,
"learning_rate": 3.200202007177702e-05,
"loss": 3.2075,
"step": 33500
},
{
"epoch": 0.36,
"learning_rate": 3.194829475855844e-05,
"loss": 3.2,
"step": 33600
},
{
"epoch": 0.36,
"learning_rate": 3.1894569445339864e-05,
"loss": 3.1982,
"step": 33700
},
{
"epoch": 0.36,
"learning_rate": 3.184084413212129e-05,
"loss": 3.185,
"step": 33800
},
{
"epoch": 0.36,
"learning_rate": 3.1787118818902714e-05,
"loss": 3.1821,
"step": 33900
},
{
"epoch": 0.37,
"learning_rate": 3.173339350568414e-05,
"loss": 3.1602,
"step": 34000
},
{
"epoch": 0.37,
"learning_rate": 3.1679668192465564e-05,
"loss": 3.1737,
"step": 34100
},
{
"epoch": 0.37,
"learning_rate": 3.162594287924699e-05,
"loss": 3.1797,
"step": 34200
},
{
"epoch": 0.37,
"learning_rate": 3.1572217566028414e-05,
"loss": 3.1765,
"step": 34300
},
{
"epoch": 0.37,
"learning_rate": 3.1518492252809836e-05,
"loss": 3.1776,
"step": 34400
},
{
"epoch": 0.37,
"learning_rate": 3.1464766939591264e-05,
"loss": 3.1902,
"step": 34500
},
{
"epoch": 0.37,
"learning_rate": 3.1411041626372686e-05,
"loss": 3.1847,
"step": 34600
},
{
"epoch": 0.37,
"learning_rate": 3.135731631315411e-05,
"loss": 3.1871,
"step": 34700
},
{
"epoch": 0.37,
"learning_rate": 3.1303590999935536e-05,
"loss": 3.1669,
"step": 34800
},
{
"epoch": 0.38,
"learning_rate": 3.124986568671696e-05,
"loss": 3.1794,
"step": 34900
},
{
"epoch": 0.38,
"learning_rate": 3.119614037349838e-05,
"loss": 3.1571,
"step": 35000
},
{
"epoch": 0.38,
"learning_rate": 3.11424150602798e-05,
"loss": 3.1599,
"step": 35100
},
{
"epoch": 0.38,
"learning_rate": 3.108868974706123e-05,
"loss": 3.167,
"step": 35200
},
{
"epoch": 0.38,
"learning_rate": 3.103496443384265e-05,
"loss": 3.1612,
"step": 35300
},
{
"epoch": 0.38,
"learning_rate": 3.098123912062407e-05,
"loss": 3.1751,
"step": 35400
},
{
"epoch": 0.38,
"learning_rate": 3.09275138074055e-05,
"loss": 3.1877,
"step": 35500
},
{
"epoch": 0.38,
"learning_rate": 3.087378849418692e-05,
"loss": 3.168,
"step": 35600
},
{
"epoch": 0.38,
"learning_rate": 3.0820063180968344e-05,
"loss": 3.1767,
"step": 35700
},
{
"epoch": 0.38,
"learning_rate": 3.076633786774977e-05,
"loss": 3.1769,
"step": 35800
},
{
"epoch": 0.39,
"learning_rate": 3.0712612554531194e-05,
"loss": 3.1486,
"step": 35900
},
{
"epoch": 0.39,
"learning_rate": 3.0658887241312616e-05,
"loss": 3.164,
"step": 36000
},
{
"epoch": 0.39,
"learning_rate": 3.0605161928094044e-05,
"loss": 3.1753,
"step": 36100
},
{
"epoch": 0.39,
"learning_rate": 3.0551436614875466e-05,
"loss": 3.1644,
"step": 36200
},
{
"epoch": 0.39,
"learning_rate": 3.049771130165689e-05,
"loss": 3.1607,
"step": 36300
},
{
"epoch": 0.39,
"learning_rate": 3.0443985988438312e-05,
"loss": 3.1605,
"step": 36400
},
{
"epoch": 0.39,
"learning_rate": 3.039026067521974e-05,
"loss": 3.148,
"step": 36500
},
{
"epoch": 0.39,
"learning_rate": 3.0336535362001162e-05,
"loss": 3.1428,
"step": 36600
},
{
"epoch": 0.39,
"learning_rate": 3.0282810048782584e-05,
"loss": 3.1677,
"step": 36700
},
{
"epoch": 0.4,
"learning_rate": 3.0229084735564012e-05,
"loss": 3.1525,
"step": 36800
},
{
"epoch": 0.4,
"learning_rate": 3.0175359422345434e-05,
"loss": 3.1598,
"step": 36900
},
{
"epoch": 0.4,
"learning_rate": 3.0121634109126856e-05,
"loss": 3.1578,
"step": 37000
},
{
"epoch": 0.4,
"learning_rate": 3.0067908795908284e-05,
"loss": 3.1406,
"step": 37100
},
{
"epoch": 0.4,
"learning_rate": 3.0014183482689706e-05,
"loss": 3.1457,
"step": 37200
},
{
"epoch": 0.4,
"learning_rate": 2.9960458169471127e-05,
"loss": 3.1567,
"step": 37300
},
{
"epoch": 0.4,
"learning_rate": 2.9906732856252552e-05,
"loss": 3.1567,
"step": 37400
},
{
"epoch": 0.4,
"learning_rate": 2.9853007543033977e-05,
"loss": 3.1289,
"step": 37500
},
{
"epoch": 0.4,
"learning_rate": 2.9799282229815402e-05,
"loss": 3.1511,
"step": 37600
},
{
"epoch": 0.41,
"learning_rate": 2.9745556916596824e-05,
"loss": 3.1567,
"step": 37700
},
{
"epoch": 0.41,
"learning_rate": 2.9691831603378252e-05,
"loss": 3.1316,
"step": 37800
},
{
"epoch": 0.41,
"learning_rate": 2.9638106290159674e-05,
"loss": 3.1228,
"step": 37900
},
{
"epoch": 0.41,
"learning_rate": 2.9584380976941096e-05,
"loss": 3.1531,
"step": 38000
},
{
"epoch": 0.41,
"learning_rate": 2.9530655663722524e-05,
"loss": 3.1342,
"step": 38100
},
{
"epoch": 0.41,
"learning_rate": 2.9476930350503946e-05,
"loss": 3.15,
"step": 38200
},
{
"epoch": 0.41,
"learning_rate": 2.9423205037285367e-05,
"loss": 3.1372,
"step": 38300
},
{
"epoch": 0.41,
"learning_rate": 2.9369479724066796e-05,
"loss": 3.1432,
"step": 38400
},
{
"epoch": 0.41,
"learning_rate": 2.9315754410848217e-05,
"loss": 3.1214,
"step": 38500
},
{
"epoch": 0.41,
"learning_rate": 2.926202909762964e-05,
"loss": 3.1377,
"step": 38600
},
{
"epoch": 0.42,
"learning_rate": 2.920830378441106e-05,
"loss": 3.1357,
"step": 38700
},
{
"epoch": 0.42,
"learning_rate": 2.915457847119249e-05,
"loss": 3.1488,
"step": 38800
},
{
"epoch": 0.42,
"learning_rate": 2.910085315797391e-05,
"loss": 3.1583,
"step": 38900
},
{
"epoch": 0.42,
"learning_rate": 2.9047127844755336e-05,
"loss": 3.1319,
"step": 39000
},
{
"epoch": 0.42,
"learning_rate": 2.899340253153676e-05,
"loss": 3.1085,
"step": 39100
},
{
"epoch": 0.42,
"learning_rate": 2.8939677218318186e-05,
"loss": 3.1086,
"step": 39200
},
{
"epoch": 0.42,
"learning_rate": 2.8885951905099607e-05,
"loss": 3.1263,
"step": 39300
},
{
"epoch": 0.42,
"learning_rate": 2.8832226591881036e-05,
"loss": 3.1347,
"step": 39400
},
{
"epoch": 0.42,
"learning_rate": 2.8778501278662457e-05,
"loss": 3.1416,
"step": 39500
},
{
"epoch": 0.43,
"learning_rate": 2.872477596544388e-05,
"loss": 3.1167,
"step": 39600
},
{
"epoch": 0.43,
"learning_rate": 2.8671050652225307e-05,
"loss": 3.1124,
"step": 39700
},
{
"epoch": 0.43,
"learning_rate": 2.861732533900673e-05,
"loss": 3.1183,
"step": 39800
},
{
"epoch": 0.43,
"learning_rate": 2.856360002578815e-05,
"loss": 3.1373,
"step": 39900
},
{
"epoch": 0.43,
"learning_rate": 2.8509874712569572e-05,
"loss": 3.1123,
"step": 40000
},
{
"epoch": 0.43,
"learning_rate": 2.8456149399351e-05,
"loss": 3.1323,
"step": 40100
},
{
"epoch": 0.43,
"learning_rate": 2.8402424086132422e-05,
"loss": 3.1216,
"step": 40200
},
{
"epoch": 0.43,
"learning_rate": 2.8348698772913844e-05,
"loss": 3.1145,
"step": 40300
},
{
"epoch": 0.43,
"learning_rate": 2.8294973459695272e-05,
"loss": 3.1367,
"step": 40400
},
{
"epoch": 0.44,
"learning_rate": 2.8241248146476694e-05,
"loss": 3.1081,
"step": 40500
},
{
"epoch": 0.44,
"learning_rate": 2.818752283325812e-05,
"loss": 3.1199,
"step": 40600
},
{
"epoch": 0.44,
"learning_rate": 2.8133797520039544e-05,
"loss": 3.1251,
"step": 40700
},
{
"epoch": 0.44,
"learning_rate": 2.808007220682097e-05,
"loss": 3.1267,
"step": 40800
},
{
"epoch": 0.44,
"learning_rate": 2.802634689360239e-05,
"loss": 3.1151,
"step": 40900
},
{
"epoch": 0.44,
"learning_rate": 2.797262158038382e-05,
"loss": 3.1247,
"step": 41000
},
{
"epoch": 0.44,
"learning_rate": 2.791889626716524e-05,
"loss": 3.1093,
"step": 41100
},
{
"epoch": 0.44,
"learning_rate": 2.7865170953946662e-05,
"loss": 3.1166,
"step": 41200
},
{
"epoch": 0.44,
"learning_rate": 2.7811445640728084e-05,
"loss": 3.1109,
"step": 41300
},
{
"epoch": 0.44,
"learning_rate": 2.7757720327509512e-05,
"loss": 3.1196,
"step": 41400
},
{
"epoch": 0.45,
"learning_rate": 2.7703995014290934e-05,
"loss": 3.1193,
"step": 41500
},
{
"epoch": 0.45,
"learning_rate": 2.7650269701072356e-05,
"loss": 3.1105,
"step": 41600
},
{
"epoch": 0.45,
"learning_rate": 2.7596544387853784e-05,
"loss": 3.136,
"step": 41700
},
{
"epoch": 0.45,
"learning_rate": 2.7542819074635206e-05,
"loss": 3.1115,
"step": 41800
},
{
"epoch": 0.45,
"learning_rate": 2.7489093761416627e-05,
"loss": 3.0942,
"step": 41900
},
{
"epoch": 0.45,
"learning_rate": 2.7435368448198056e-05,
"loss": 3.1059,
"step": 42000
},
{
"epoch": 0.45,
"learning_rate": 2.7381643134979477e-05,
"loss": 3.1198,
"step": 42100
},
{
"epoch": 0.45,
"learning_rate": 2.7327917821760902e-05,
"loss": 3.0933,
"step": 42200
},
{
"epoch": 0.45,
"learning_rate": 2.7274192508542327e-05,
"loss": 3.0963,
"step": 42300
},
{
"epoch": 0.46,
"learning_rate": 2.7220467195323752e-05,
"loss": 3.1158,
"step": 42400
},
{
"epoch": 0.46,
"learning_rate": 2.7166741882105174e-05,
"loss": 3.098,
"step": 42500
},
{
"epoch": 0.46,
"learning_rate": 2.7113016568886596e-05,
"loss": 3.0857,
"step": 42600
},
{
"epoch": 0.46,
"learning_rate": 2.7059291255668024e-05,
"loss": 3.0835,
"step": 42700
},
{
"epoch": 0.46,
"learning_rate": 2.7005565942449446e-05,
"loss": 3.1004,
"step": 42800
},
{
"epoch": 0.46,
"learning_rate": 2.6951840629230867e-05,
"loss": 3.0934,
"step": 42900
},
{
"epoch": 0.46,
"learning_rate": 2.6898115316012296e-05,
"loss": 3.1017,
"step": 43000
},
{
"epoch": 0.46,
"learning_rate": 2.6844390002793717e-05,
"loss": 3.1098,
"step": 43100
},
{
"epoch": 0.46,
"learning_rate": 2.679066468957514e-05,
"loss": 3.1095,
"step": 43200
},
{
"epoch": 0.47,
"learning_rate": 2.6736939376356567e-05,
"loss": 3.0855,
"step": 43300
},
{
"epoch": 0.47,
"learning_rate": 2.668321406313799e-05,
"loss": 3.0745,
"step": 43400
},
{
"epoch": 0.47,
"learning_rate": 2.6629488749919414e-05,
"loss": 3.0847,
"step": 43500
},
{
"epoch": 0.47,
"learning_rate": 2.6575763436700836e-05,
"loss": 3.1013,
"step": 43600
},
{
"epoch": 0.47,
"learning_rate": 2.6522038123482264e-05,
"loss": 3.0905,
"step": 43700
},
{
"epoch": 0.47,
"learning_rate": 2.6468312810263686e-05,
"loss": 3.0946,
"step": 43800
},
{
"epoch": 0.47,
"learning_rate": 2.6414587497045107e-05,
"loss": 3.0838,
"step": 43900
},
{
"epoch": 0.47,
"learning_rate": 2.6360862183826536e-05,
"loss": 3.0921,
"step": 44000
},
{
"epoch": 0.47,
"learning_rate": 2.6307136870607957e-05,
"loss": 3.0972,
"step": 44100
},
{
"epoch": 0.47,
"learning_rate": 2.625341155738938e-05,
"loss": 3.0854,
"step": 44200
},
{
"epoch": 0.48,
"learning_rate": 2.6199686244170807e-05,
"loss": 3.0998,
"step": 44300
},
{
"epoch": 0.48,
"learning_rate": 2.614596093095223e-05,
"loss": 3.0921,
"step": 44400
},
{
"epoch": 0.48,
"learning_rate": 2.609223561773365e-05,
"loss": 3.0999,
"step": 44500
},
{
"epoch": 0.48,
"learning_rate": 2.603851030451508e-05,
"loss": 3.0988,
"step": 44600
},
{
"epoch": 0.48,
"learning_rate": 2.59847849912965e-05,
"loss": 3.0667,
"step": 44700
},
{
"epoch": 0.48,
"learning_rate": 2.5931059678077922e-05,
"loss": 3.0695,
"step": 44800
},
{
"epoch": 0.48,
"learning_rate": 2.5877334364859347e-05,
"loss": 3.0685,
"step": 44900
},
{
"epoch": 0.48,
"learning_rate": 2.5823609051640772e-05,
"loss": 3.0959,
"step": 45000
},
{
"epoch": 0.48,
"learning_rate": 2.5769883738422197e-05,
"loss": 3.0912,
"step": 45100
},
{
"epoch": 0.49,
"learning_rate": 2.571615842520362e-05,
"loss": 3.0751,
"step": 45200
},
{
"epoch": 0.49,
"learning_rate": 2.5662433111985047e-05,
"loss": 3.0864,
"step": 45300
},
{
"epoch": 0.49,
"learning_rate": 2.560870779876647e-05,
"loss": 3.0713,
"step": 45400
},
{
"epoch": 0.49,
"learning_rate": 2.555498248554789e-05,
"loss": 3.069,
"step": 45500
},
{
"epoch": 0.49,
"learning_rate": 2.550125717232932e-05,
"loss": 3.0644,
"step": 45600
},
{
"epoch": 0.49,
"learning_rate": 2.544753185911074e-05,
"loss": 3.061,
"step": 45700
},
{
"epoch": 0.49,
"learning_rate": 2.5393806545892162e-05,
"loss": 3.0784,
"step": 45800
},
{
"epoch": 0.49,
"learning_rate": 2.534008123267359e-05,
"loss": 3.0646,
"step": 45900
},
{
"epoch": 0.49,
"learning_rate": 2.5286355919455012e-05,
"loss": 3.0699,
"step": 46000
},
{
"epoch": 0.5,
"learning_rate": 2.5232630606236434e-05,
"loss": 3.083,
"step": 46100
},
{
"epoch": 0.5,
"learning_rate": 2.5178905293017856e-05,
"loss": 3.0713,
"step": 46200
},
{
"epoch": 0.5,
"learning_rate": 2.5125179979799284e-05,
"loss": 3.0824,
"step": 46300
},
{
"epoch": 0.5,
"learning_rate": 2.5071454666580706e-05,
"loss": 3.0586,
"step": 46400
},
{
"epoch": 0.5,
"learning_rate": 2.501772935336213e-05,
"loss": 3.062,
"step": 46500
},
{
"epoch": 0.5,
"learning_rate": 2.4964004040143556e-05,
"loss": 3.0625,
"step": 46600
},
{
"epoch": 0.5,
"learning_rate": 2.491027872692498e-05,
"loss": 3.0978,
"step": 46700
},
{
"epoch": 0.5,
"learning_rate": 2.4856553413706406e-05,
"loss": 3.0756,
"step": 46800
},
{
"epoch": 0.5,
"learning_rate": 2.4802828100487827e-05,
"loss": 3.0696,
"step": 46900
},
{
"epoch": 0.51,
"learning_rate": 2.4749102787269252e-05,
"loss": 3.0827,
"step": 47000
},
{
"epoch": 0.51,
"learning_rate": 2.4695377474050674e-05,
"loss": 3.0644,
"step": 47100
},
{
"epoch": 0.51,
"learning_rate": 2.46416521608321e-05,
"loss": 3.0676,
"step": 47200
},
{
"epoch": 0.51,
"learning_rate": 2.4587926847613524e-05,
"loss": 3.0826,
"step": 47300
},
{
"epoch": 0.51,
"learning_rate": 2.4534201534394946e-05,
"loss": 3.0575,
"step": 47400
},
{
"epoch": 0.51,
"learning_rate": 2.448047622117637e-05,
"loss": 3.0869,
"step": 47500
},
{
"epoch": 0.51,
"learning_rate": 2.4426750907957792e-05,
"loss": 3.0646,
"step": 47600
},
{
"epoch": 0.51,
"learning_rate": 2.4373025594739217e-05,
"loss": 3.0734,
"step": 47700
},
{
"epoch": 0.51,
"learning_rate": 2.4319300281520642e-05,
"loss": 3.0642,
"step": 47800
},
{
"epoch": 0.51,
"learning_rate": 2.4265574968302064e-05,
"loss": 3.0662,
"step": 47900
},
{
"epoch": 0.52,
"learning_rate": 2.421184965508349e-05,
"loss": 3.054,
"step": 48000
},
{
"epoch": 0.52,
"learning_rate": 2.4158124341864914e-05,
"loss": 3.0639,
"step": 48100
},
{
"epoch": 0.52,
"learning_rate": 2.410439902864634e-05,
"loss": 3.0488,
"step": 48200
},
{
"epoch": 0.52,
"learning_rate": 2.4050673715427764e-05,
"loss": 3.064,
"step": 48300
},
{
"epoch": 0.52,
"learning_rate": 2.3996948402209186e-05,
"loss": 3.0576,
"step": 48400
},
{
"epoch": 0.52,
"learning_rate": 2.394322308899061e-05,
"loss": 3.0664,
"step": 48500
},
{
"epoch": 0.52,
"learning_rate": 2.3889497775772036e-05,
"loss": 3.0598,
"step": 48600
},
{
"epoch": 0.52,
"learning_rate": 2.3835772462553457e-05,
"loss": 3.0482,
"step": 48700
},
{
"epoch": 0.52,
"learning_rate": 2.3782047149334882e-05,
"loss": 3.0439,
"step": 48800
},
{
"epoch": 0.53,
"learning_rate": 2.3728321836116304e-05,
"loss": 3.0662,
"step": 48900
},
{
"epoch": 0.53,
"learning_rate": 2.367459652289773e-05,
"loss": 3.0659,
"step": 49000
},
{
"epoch": 0.53,
"learning_rate": 2.3620871209679154e-05,
"loss": 3.043,
"step": 49100
},
{
"epoch": 0.53,
"learning_rate": 2.3567145896460576e-05,
"loss": 3.0675,
"step": 49200
},
{
"epoch": 0.53,
"learning_rate": 2.3513420583242e-05,
"loss": 3.0336,
"step": 49300
},
{
"epoch": 0.53,
"learning_rate": 2.3459695270023426e-05,
"loss": 3.0522,
"step": 49400
},
{
"epoch": 0.53,
"learning_rate": 2.340596995680485e-05,
"loss": 3.0555,
"step": 49500
},
{
"epoch": 0.53,
"learning_rate": 2.3352244643586276e-05,
"loss": 3.0536,
"step": 49600
},
{
"epoch": 0.53,
"learning_rate": 2.3298519330367697e-05,
"loss": 3.0615,
"step": 49700
},
{
"epoch": 0.54,
"learning_rate": 2.3244794017149122e-05,
"loss": 3.0615,
"step": 49800
},
{
"epoch": 0.54,
"learning_rate": 2.3191068703930547e-05,
"loss": 3.0581,
"step": 49900
},
{
"epoch": 0.54,
"learning_rate": 2.313734339071197e-05,
"loss": 3.0552,
"step": 50000
},
{
"epoch": 0.54,
"learning_rate": 2.3083618077493394e-05,
"loss": 3.0419,
"step": 50100
},
{
"epoch": 0.54,
"learning_rate": 2.3029892764274816e-05,
"loss": 3.0583,
"step": 50200
},
{
"epoch": 0.54,
"learning_rate": 2.297616745105624e-05,
"loss": 3.0504,
"step": 50300
},
{
"epoch": 0.54,
"learning_rate": 2.2922442137837666e-05,
"loss": 3.0505,
"step": 50400
},
{
"epoch": 0.54,
"learning_rate": 2.2868716824619087e-05,
"loss": 3.0702,
"step": 50500
},
{
"epoch": 0.54,
"learning_rate": 2.2814991511400512e-05,
"loss": 3.0522,
"step": 50600
},
{
"epoch": 0.54,
"learning_rate": 2.2761266198181934e-05,
"loss": 3.0587,
"step": 50700
},
{
"epoch": 0.55,
"learning_rate": 2.270754088496336e-05,
"loss": 3.0547,
"step": 50800
},
{
"epoch": 0.55,
"learning_rate": 2.2653815571744784e-05,
"loss": 3.0581,
"step": 50900
},
{
"epoch": 0.55,
"learning_rate": 2.260009025852621e-05,
"loss": 3.0313,
"step": 51000
},
{
"epoch": 0.55,
"learning_rate": 2.2546364945307634e-05,
"loss": 3.047,
"step": 51100
},
{
"epoch": 0.55,
"learning_rate": 2.2492639632089056e-05,
"loss": 3.0245,
"step": 51200
},
{
"epoch": 0.55,
"learning_rate": 2.243891431887048e-05,
"loss": 3.0535,
"step": 51300
},
{
"epoch": 0.55,
"learning_rate": 2.2385189005651906e-05,
"loss": 3.049,
"step": 51400
},
{
"epoch": 0.55,
"learning_rate": 2.2331463692433327e-05,
"loss": 3.0531,
"step": 51500
},
{
"epoch": 0.55,
"learning_rate": 2.2277738379214752e-05,
"loss": 3.0522,
"step": 51600
},
{
"epoch": 0.56,
"learning_rate": 2.2224013065996177e-05,
"loss": 3.0573,
"step": 51700
},
{
"epoch": 0.56,
"learning_rate": 2.21702877527776e-05,
"loss": 3.0484,
"step": 51800
},
{
"epoch": 0.56,
"learning_rate": 2.2116562439559024e-05,
"loss": 3.0458,
"step": 51900
},
{
"epoch": 0.56,
"learning_rate": 2.2062837126340446e-05,
"loss": 3.0582,
"step": 52000
},
{
"epoch": 0.56,
"learning_rate": 2.200911181312187e-05,
"loss": 3.0332,
"step": 52100
},
{
"epoch": 0.56,
"learning_rate": 2.1955386499903296e-05,
"loss": 3.0337,
"step": 52200
},
{
"epoch": 0.56,
"learning_rate": 2.1901661186684717e-05,
"loss": 3.0453,
"step": 52300
},
{
"epoch": 0.56,
"learning_rate": 2.1847935873466142e-05,
"loss": 3.06,
"step": 52400
},
{
"epoch": 0.56,
"learning_rate": 2.1794210560247567e-05,
"loss": 3.0498,
"step": 52500
},
{
"epoch": 0.57,
"learning_rate": 2.1740485247028992e-05,
"loss": 3.0439,
"step": 52600
},
{
"epoch": 0.57,
"learning_rate": 2.1686759933810417e-05,
"loss": 3.0293,
"step": 52700
},
{
"epoch": 0.57,
"learning_rate": 2.163303462059184e-05,
"loss": 3.0305,
"step": 52800
},
{
"epoch": 0.57,
"learning_rate": 2.1579309307373264e-05,
"loss": 3.0425,
"step": 52900
},
{
"epoch": 0.57,
"learning_rate": 2.152558399415469e-05,
"loss": 3.0513,
"step": 53000
},
{
"epoch": 0.57,
"learning_rate": 2.147185868093611e-05,
"loss": 3.029,
"step": 53100
},
{
"epoch": 0.57,
"learning_rate": 2.1418133367717536e-05,
"loss": 3.0513,
"step": 53200
},
{
"epoch": 0.57,
"learning_rate": 2.1364408054498957e-05,
"loss": 3.0481,
"step": 53300
},
{
"epoch": 0.57,
"learning_rate": 2.1310682741280382e-05,
"loss": 3.0453,
"step": 53400
},
{
"epoch": 0.57,
"learning_rate": 2.1256957428061807e-05,
"loss": 3.0295,
"step": 53500
},
{
"epoch": 0.58,
"learning_rate": 2.120323211484323e-05,
"loss": 3.0375,
"step": 53600
},
{
"epoch": 0.58,
"learning_rate": 2.1149506801624654e-05,
"loss": 3.0304,
"step": 53700
},
{
"epoch": 0.58,
"learning_rate": 2.109578148840608e-05,
"loss": 3.0349,
"step": 53800
},
{
"epoch": 0.58,
"learning_rate": 2.1042056175187504e-05,
"loss": 3.0427,
"step": 53900
},
{
"epoch": 0.58,
"learning_rate": 2.098833086196893e-05,
"loss": 3.0211,
"step": 54000
},
{
"epoch": 0.58,
"learning_rate": 2.093460554875035e-05,
"loss": 3.0192,
"step": 54100
},
{
"epoch": 0.58,
"learning_rate": 2.0880880235531776e-05,
"loss": 3.0284,
"step": 54200
},
{
"epoch": 0.58,
"learning_rate": 2.0827154922313197e-05,
"loss": 3.0343,
"step": 54300
},
{
"epoch": 0.58,
"learning_rate": 2.0773429609094622e-05,
"loss": 3.0187,
"step": 54400
},
{
"epoch": 0.59,
"learning_rate": 2.0719704295876047e-05,
"loss": 3.046,
"step": 54500
},
{
"epoch": 0.59,
"learning_rate": 2.066597898265747e-05,
"loss": 3.0448,
"step": 54600
},
{
"epoch": 0.59,
"learning_rate": 2.0612253669438894e-05,
"loss": 3.0487,
"step": 54700
},
{
"epoch": 0.59,
"learning_rate": 2.055852835622032e-05,
"loss": 3.0403,
"step": 54800
},
{
"epoch": 0.59,
"learning_rate": 2.050480304300174e-05,
"loss": 3.0143,
"step": 54900
},
{
"epoch": 0.59,
"learning_rate": 2.0451077729783166e-05,
"loss": 3.0194,
"step": 55000
},
{
"epoch": 0.59,
"learning_rate": 2.0397352416564587e-05,
"loss": 3.0362,
"step": 55100
},
{
"epoch": 0.59,
"learning_rate": 2.0343627103346012e-05,
"loss": 3.0367,
"step": 55200
},
{
"epoch": 0.59,
"learning_rate": 2.0289901790127437e-05,
"loss": 3.0162,
"step": 55300
},
{
"epoch": 0.6,
"learning_rate": 2.0236176476908862e-05,
"loss": 3.0031,
"step": 55400
},
{
"epoch": 0.6,
"learning_rate": 2.0182451163690287e-05,
"loss": 3.0176,
"step": 55500
},
{
"epoch": 0.6,
"learning_rate": 2.012872585047171e-05,
"loss": 3.0395,
"step": 55600
},
{
"epoch": 0.6,
"learning_rate": 2.0075000537253134e-05,
"loss": 3.0336,
"step": 55700
},
{
"epoch": 0.6,
"learning_rate": 2.002127522403456e-05,
"loss": 3.0195,
"step": 55800
},
{
"epoch": 0.6,
"learning_rate": 1.996754991081598e-05,
"loss": 3.0234,
"step": 55900
},
{
"epoch": 0.6,
"learning_rate": 1.9913824597597406e-05,
"loss": 3.0178,
"step": 56000
},
{
"epoch": 0.6,
"learning_rate": 1.9860099284378827e-05,
"loss": 3.0128,
"step": 56100
},
{
"epoch": 0.6,
"learning_rate": 1.9806373971160252e-05,
"loss": 3.0201,
"step": 56200
},
{
"epoch": 0.6,
"learning_rate": 1.9752648657941677e-05,
"loss": 3.0197,
"step": 56300
},
{
"epoch": 0.61,
"learning_rate": 1.96989233447231e-05,
"loss": 3.0305,
"step": 56400
},
{
"epoch": 0.61,
"learning_rate": 1.9645198031504524e-05,
"loss": 3.0272,
"step": 56500
},
{
"epoch": 0.61,
"learning_rate": 1.959147271828595e-05,
"loss": 3.02,
"step": 56600
},
{
"epoch": 0.61,
"learning_rate": 1.953774740506737e-05,
"loss": 3.0387,
"step": 56700
},
{
"epoch": 0.61,
"learning_rate": 1.9484022091848796e-05,
"loss": 3.0086,
"step": 56800
},
{
"epoch": 0.61,
"learning_rate": 1.943029677863022e-05,
"loss": 3.0139,
"step": 56900
},
{
"epoch": 0.61,
"learning_rate": 1.9376571465411646e-05,
"loss": 3.0279,
"step": 57000
},
{
"epoch": 0.61,
"learning_rate": 1.932284615219307e-05,
"loss": 3.0129,
"step": 57100
},
{
"epoch": 0.61,
"learning_rate": 1.9269120838974492e-05,
"loss": 3.0109,
"step": 57200
},
{
"epoch": 0.62,
"learning_rate": 1.9215395525755917e-05,
"loss": 3.0356,
"step": 57300
},
{
"epoch": 0.62,
"learning_rate": 1.916167021253734e-05,
"loss": 3.0204,
"step": 57400
},
{
"epoch": 0.62,
"learning_rate": 1.9107944899318764e-05,
"loss": 3.0166,
"step": 57500
},
{
"epoch": 0.62,
"learning_rate": 1.905421958610019e-05,
"loss": 3.0198,
"step": 57600
},
{
"epoch": 0.62,
"learning_rate": 1.900049427288161e-05,
"loss": 3.0122,
"step": 57700
},
{
"epoch": 0.62,
"learning_rate": 1.8946768959663036e-05,
"loss": 3.0142,
"step": 57800
},
{
"epoch": 0.62,
"learning_rate": 1.889304364644446e-05,
"loss": 3.0273,
"step": 57900
},
{
"epoch": 0.62,
"learning_rate": 1.8839318333225882e-05,
"loss": 3.0013,
"step": 58000
},
{
"epoch": 0.62,
"learning_rate": 1.8785593020007307e-05,
"loss": 3.0138,
"step": 58100
},
{
"epoch": 0.63,
"learning_rate": 1.873186770678873e-05,
"loss": 3.0252,
"step": 58200
},
{
"epoch": 0.63,
"learning_rate": 1.8678142393570154e-05,
"loss": 3.0066,
"step": 58300
},
{
"epoch": 0.63,
"learning_rate": 1.862441708035158e-05,
"loss": 3.009,
"step": 58400
},
{
"epoch": 0.63,
"learning_rate": 1.8570691767133004e-05,
"loss": 3.0229,
"step": 58500
},
{
"epoch": 0.63,
"learning_rate": 1.851696645391443e-05,
"loss": 3.0152,
"step": 58600
},
{
"epoch": 0.63,
"learning_rate": 1.846324114069585e-05,
"loss": 3.0065,
"step": 58700
},
{
"epoch": 0.63,
"learning_rate": 1.8409515827477276e-05,
"loss": 3.0281,
"step": 58800
},
{
"epoch": 0.63,
"learning_rate": 1.83557905142587e-05,
"loss": 3.0189,
"step": 58900
},
{
"epoch": 0.63,
"learning_rate": 1.8302065201040122e-05,
"loss": 3.0405,
"step": 59000
},
{
"epoch": 0.64,
"learning_rate": 1.8248339887821547e-05,
"loss": 2.9986,
"step": 59100
},
{
"epoch": 0.64,
"learning_rate": 1.819461457460297e-05,
"loss": 3.0205,
"step": 59200
},
{
"epoch": 0.64,
"learning_rate": 1.8140889261384394e-05,
"loss": 3.0166,
"step": 59300
},
{
"epoch": 0.64,
"learning_rate": 1.808716394816582e-05,
"loss": 3.0273,
"step": 59400
},
{
"epoch": 0.64,
"learning_rate": 1.803343863494724e-05,
"loss": 3.0055,
"step": 59500
},
{
"epoch": 0.64,
"learning_rate": 1.7979713321728666e-05,
"loss": 3.0037,
"step": 59600
},
{
"epoch": 0.64,
"learning_rate": 1.792598800851009e-05,
"loss": 3.0052,
"step": 59700
},
{
"epoch": 0.64,
"learning_rate": 1.7872262695291516e-05,
"loss": 2.9829,
"step": 59800
},
{
"epoch": 0.64,
"learning_rate": 1.781853738207294e-05,
"loss": 3.023,
"step": 59900
},
{
"epoch": 0.64,
"learning_rate": 1.7764812068854362e-05,
"loss": 3.0099,
"step": 60000
},
{
"epoch": 0.65,
"learning_rate": 1.7711086755635787e-05,
"loss": 3.0027,
"step": 60100
},
{
"epoch": 0.65,
"learning_rate": 1.7657361442417212e-05,
"loss": 3.0003,
"step": 60200
},
{
"epoch": 0.65,
"learning_rate": 1.7603636129198634e-05,
"loss": 2.9984,
"step": 60300
},
{
"epoch": 0.65,
"learning_rate": 1.754991081598006e-05,
"loss": 3.0119,
"step": 60400
},
{
"epoch": 0.65,
"learning_rate": 1.749618550276148e-05,
"loss": 3.0055,
"step": 60500
},
{
"epoch": 0.65,
"learning_rate": 1.7442460189542906e-05,
"loss": 2.9947,
"step": 60600
},
{
"epoch": 0.65,
"learning_rate": 1.738873487632433e-05,
"loss": 3.0096,
"step": 60700
},
{
"epoch": 0.65,
"learning_rate": 1.7335009563105752e-05,
"loss": 3.0118,
"step": 60800
},
{
"epoch": 0.65,
"learning_rate": 1.7281284249887177e-05,
"loss": 3.0163,
"step": 60900
},
{
"epoch": 0.66,
"learning_rate": 1.7227558936668602e-05,
"loss": 2.9818,
"step": 61000
},
{
"epoch": 0.66,
"learning_rate": 1.7173833623450024e-05,
"loss": 3.0006,
"step": 61100
},
{
"epoch": 0.66,
"learning_rate": 1.712010831023145e-05,
"loss": 3.0059,
"step": 61200
},
{
"epoch": 0.66,
"learning_rate": 1.7066382997012874e-05,
"loss": 3.0002,
"step": 61300
},
{
"epoch": 0.66,
"learning_rate": 1.70126576837943e-05,
"loss": 3.0119,
"step": 61400
},
{
"epoch": 0.66,
"learning_rate": 1.6958932370575724e-05,
"loss": 3.0018,
"step": 61500
},
{
"epoch": 0.66,
"learning_rate": 1.6905207057357146e-05,
"loss": 2.9874,
"step": 61600
},
{
"epoch": 0.66,
"learning_rate": 1.685148174413857e-05,
"loss": 3.0156,
"step": 61700
},
{
"epoch": 0.66,
"learning_rate": 1.6797756430919992e-05,
"loss": 2.9997,
"step": 61800
},
{
"epoch": 0.67,
"learning_rate": 1.6744031117701417e-05,
"loss": 2.9948,
"step": 61900
},
{
"epoch": 0.67,
"learning_rate": 1.6690305804482842e-05,
"loss": 2.9873,
"step": 62000
},
{
"epoch": 0.67,
"learning_rate": 1.6636580491264264e-05,
"loss": 2.9876,
"step": 62100
},
{
"epoch": 0.67,
"learning_rate": 1.658285517804569e-05,
"loss": 2.9849,
"step": 62200
},
{
"epoch": 0.67,
"learning_rate": 1.652912986482711e-05,
"loss": 2.9775,
"step": 62300
},
{
"epoch": 0.67,
"learning_rate": 1.6475404551608536e-05,
"loss": 3.0072,
"step": 62400
},
{
"epoch": 0.67,
"learning_rate": 1.642167923838996e-05,
"loss": 2.998,
"step": 62500
},
{
"epoch": 0.67,
"learning_rate": 1.6367953925171382e-05,
"loss": 3.013,
"step": 62600
},
{
"epoch": 0.67,
"learning_rate": 1.6314228611952807e-05,
"loss": 2.9793,
"step": 62700
},
{
"epoch": 0.67,
"learning_rate": 1.6260503298734232e-05,
"loss": 3.0064,
"step": 62800
},
{
"epoch": 0.68,
"learning_rate": 1.6206777985515657e-05,
"loss": 2.9916,
"step": 62900
},
{
"epoch": 0.68,
"learning_rate": 1.6153052672297082e-05,
"loss": 2.9871,
"step": 63000
},
{
"epoch": 0.68,
"learning_rate": 1.6099327359078504e-05,
"loss": 3.0033,
"step": 63100
},
{
"epoch": 0.68,
"learning_rate": 1.604560204585993e-05,
"loss": 3.006,
"step": 63200
},
{
"epoch": 0.68,
"learning_rate": 1.5991876732641354e-05,
"loss": 2.9815,
"step": 63300
},
{
"epoch": 0.68,
"learning_rate": 1.5938151419422776e-05,
"loss": 3.0001,
"step": 63400
},
{
"epoch": 0.68,
"learning_rate": 1.58844261062042e-05,
"loss": 2.9908,
"step": 63500
},
{
"epoch": 0.68,
"learning_rate": 1.5830700792985622e-05,
"loss": 2.9877,
"step": 63600
},
{
"epoch": 0.68,
"learning_rate": 1.5776975479767047e-05,
"loss": 2.9779,
"step": 63700
},
{
"epoch": 0.69,
"learning_rate": 1.5723250166548472e-05,
"loss": 3.009,
"step": 63800
},
{
"epoch": 0.69,
"learning_rate": 1.5669524853329894e-05,
"loss": 3.0003,
"step": 63900
},
{
"epoch": 0.69,
"learning_rate": 1.561579954011132e-05,
"loss": 2.9948,
"step": 64000
},
{
"epoch": 0.69,
"learning_rate": 1.5562074226892744e-05,
"loss": 2.9978,
"step": 64100
},
{
"epoch": 0.69,
"learning_rate": 1.550834891367417e-05,
"loss": 3.0091,
"step": 64200
},
{
"epoch": 0.69,
"learning_rate": 1.5454623600455594e-05,
"loss": 2.9987,
"step": 64300
},
{
"epoch": 0.69,
"learning_rate": 1.5400898287237016e-05,
"loss": 3.0084,
"step": 64400
},
{
"epoch": 0.69,
"learning_rate": 1.534717297401844e-05,
"loss": 2.9817,
"step": 64500
},
{
"epoch": 0.69,
"learning_rate": 1.5293447660799866e-05,
"loss": 2.986,
"step": 64600
},
{
"epoch": 0.7,
"learning_rate": 1.5239722347581287e-05,
"loss": 2.9616,
"step": 64700
},
{
"epoch": 0.7,
"learning_rate": 1.5185997034362712e-05,
"loss": 3.0026,
"step": 64800
},
{
"epoch": 0.7,
"learning_rate": 1.5132271721144134e-05,
"loss": 3.0036,
"step": 64900
},
{
"epoch": 0.7,
"learning_rate": 1.5078546407925559e-05,
"loss": 2.9779,
"step": 65000
},
{
"epoch": 0.7,
"learning_rate": 1.5024821094706984e-05,
"loss": 2.9908,
"step": 65100
},
{
"epoch": 0.7,
"learning_rate": 1.4971095781488406e-05,
"loss": 2.9599,
"step": 65200
},
{
"epoch": 0.7,
"learning_rate": 1.491737046826983e-05,
"loss": 2.983,
"step": 65300
},
{
"epoch": 0.7,
"learning_rate": 1.4863645155051254e-05,
"loss": 2.9879,
"step": 65400
},
{
"epoch": 0.7,
"learning_rate": 1.4809919841832679e-05,
"loss": 2.9967,
"step": 65500
},
{
"epoch": 0.7,
"learning_rate": 1.4756194528614104e-05,
"loss": 2.9856,
"step": 65600
},
{
"epoch": 0.71,
"learning_rate": 1.4702469215395526e-05,
"loss": 2.9935,
"step": 65700
},
{
"epoch": 0.71,
"learning_rate": 1.464874390217695e-05,
"loss": 3.0014,
"step": 65800
},
{
"epoch": 0.71,
"learning_rate": 1.4595018588958376e-05,
"loss": 2.9788,
"step": 65900
},
{
"epoch": 0.71,
"learning_rate": 1.4541293275739797e-05,
"loss": 2.9903,
"step": 66000
},
{
"epoch": 0.71,
"learning_rate": 1.4487567962521222e-05,
"loss": 2.9852,
"step": 66100
},
{
"epoch": 0.71,
"learning_rate": 1.4433842649302646e-05,
"loss": 2.9745,
"step": 66200
},
{
"epoch": 0.71,
"learning_rate": 1.438011733608407e-05,
"loss": 2.9798,
"step": 66300
},
{
"epoch": 0.71,
"learning_rate": 1.4326392022865496e-05,
"loss": 2.9742,
"step": 66400
},
{
"epoch": 0.71,
"learning_rate": 1.4272666709646917e-05,
"loss": 2.9823,
"step": 66500
},
{
"epoch": 0.72,
"learning_rate": 1.4218941396428342e-05,
"loss": 3.0184,
"step": 66600
},
{
"epoch": 0.72,
"learning_rate": 1.4165216083209766e-05,
"loss": 2.9815,
"step": 66700
},
{
"epoch": 0.72,
"learning_rate": 1.411149076999119e-05,
"loss": 2.9928,
"step": 66800
},
{
"epoch": 0.72,
"learning_rate": 1.4057765456772614e-05,
"loss": 2.9789,
"step": 66900
},
{
"epoch": 0.72,
"learning_rate": 1.4004040143554037e-05,
"loss": 2.9855,
"step": 67000
},
{
"epoch": 0.72,
"learning_rate": 1.3950314830335462e-05,
"loss": 2.9864,
"step": 67100
},
{
"epoch": 0.72,
"learning_rate": 1.3896589517116884e-05,
"loss": 2.9785,
"step": 67200
},
{
"epoch": 0.72,
"learning_rate": 1.3842864203898309e-05,
"loss": 2.9839,
"step": 67300
},
{
"epoch": 0.72,
"learning_rate": 1.3789138890679734e-05,
"loss": 2.9921,
"step": 67400
},
{
"epoch": 0.73,
"learning_rate": 1.3735413577461157e-05,
"loss": 2.9869,
"step": 67500
},
{
"epoch": 0.73,
"learning_rate": 1.3681688264242582e-05,
"loss": 2.9844,
"step": 67600
},
{
"epoch": 0.73,
"learning_rate": 1.3627962951024007e-05,
"loss": 2.9994,
"step": 67700
},
{
"epoch": 0.73,
"learning_rate": 1.3574237637805429e-05,
"loss": 3.0018,
"step": 67800
},
{
"epoch": 0.73,
"learning_rate": 1.3520512324586854e-05,
"loss": 2.9944,
"step": 67900
},
{
"epoch": 0.73,
"learning_rate": 1.3466787011368275e-05,
"loss": 2.9901,
"step": 68000
},
{
"epoch": 0.73,
"learning_rate": 1.34130616981497e-05,
"loss": 2.9732,
"step": 68100
},
{
"epoch": 0.73,
"learning_rate": 1.3359336384931126e-05,
"loss": 2.9796,
"step": 68200
},
{
"epoch": 0.73,
"learning_rate": 1.3305611071712549e-05,
"loss": 2.9846,
"step": 68300
},
{
"epoch": 0.73,
"learning_rate": 1.3251885758493974e-05,
"loss": 2.9687,
"step": 68400
},
{
"epoch": 0.74,
"learning_rate": 1.3198160445275395e-05,
"loss": 2.9571,
"step": 68500
},
{
"epoch": 0.74,
"learning_rate": 1.314443513205682e-05,
"loss": 2.9861,
"step": 68600
},
{
"epoch": 0.74,
"learning_rate": 1.3090709818838246e-05,
"loss": 2.9738,
"step": 68700
},
{
"epoch": 0.74,
"learning_rate": 1.3036984505619667e-05,
"loss": 2.9821,
"step": 68800
},
{
"epoch": 0.74,
"learning_rate": 1.2983259192401092e-05,
"loss": 2.9789,
"step": 68900
},
{
"epoch": 0.74,
"learning_rate": 1.2929533879182515e-05,
"loss": 2.974,
"step": 69000
},
{
"epoch": 0.74,
"learning_rate": 1.287580856596394e-05,
"loss": 2.9888,
"step": 69100
},
{
"epoch": 0.74,
"learning_rate": 1.2822083252745366e-05,
"loss": 2.9694,
"step": 69200
},
{
"epoch": 0.74,
"learning_rate": 1.2768357939526787e-05,
"loss": 2.9983,
"step": 69300
},
{
"epoch": 0.75,
"learning_rate": 1.2714632626308212e-05,
"loss": 2.9751,
"step": 69400
},
{
"epoch": 0.75,
"learning_rate": 1.2660907313089637e-05,
"loss": 2.9692,
"step": 69500
},
{
"epoch": 0.75,
"learning_rate": 1.2607181999871059e-05,
"loss": 2.984,
"step": 69600
},
{
"epoch": 0.75,
"learning_rate": 1.2553456686652484e-05,
"loss": 2.9872,
"step": 69700
},
{
"epoch": 0.75,
"learning_rate": 1.2499731373433909e-05,
"loss": 2.9707,
"step": 69800
},
{
"epoch": 0.75,
"learning_rate": 1.2446006060215332e-05,
"loss": 2.9773,
"step": 69900
},
{
"epoch": 0.75,
"learning_rate": 1.2392280746996756e-05,
"loss": 2.9541,
"step": 70000
},
{
"epoch": 0.75,
"learning_rate": 1.2338555433778179e-05,
"loss": 2.9744,
"step": 70100
},
{
"epoch": 0.75,
"learning_rate": 1.2284830120559604e-05,
"loss": 2.9663,
"step": 70200
},
{
"epoch": 0.76,
"learning_rate": 1.2231104807341027e-05,
"loss": 2.9829,
"step": 70300
},
{
"epoch": 0.76,
"learning_rate": 1.217737949412245e-05,
"loss": 2.9608,
"step": 70400
},
{
"epoch": 0.76,
"learning_rate": 1.2123654180903876e-05,
"loss": 2.9643,
"step": 70500
},
{
"epoch": 0.76,
"learning_rate": 1.20699288676853e-05,
"loss": 2.98,
"step": 70600
},
{
"epoch": 0.76,
"learning_rate": 1.2016203554466724e-05,
"loss": 2.9789,
"step": 70700
},
{
"epoch": 0.76,
"learning_rate": 1.1962478241248147e-05,
"loss": 2.9822,
"step": 70800
},
{
"epoch": 0.76,
"learning_rate": 1.190875292802957e-05,
"loss": 2.9851,
"step": 70900
},
{
"epoch": 0.76,
"learning_rate": 1.1855027614810994e-05,
"loss": 2.984,
"step": 71000
},
{
"epoch": 0.76,
"learning_rate": 1.1801302301592419e-05,
"loss": 2.965,
"step": 71100
},
{
"epoch": 0.77,
"learning_rate": 1.1747576988373842e-05,
"loss": 2.9655,
"step": 71200
},
{
"epoch": 0.77,
"learning_rate": 1.1693851675155267e-05,
"loss": 2.9702,
"step": 71300
},
{
"epoch": 0.77,
"learning_rate": 1.164012636193669e-05,
"loss": 2.955,
"step": 71400
},
{
"epoch": 0.77,
"learning_rate": 1.1586401048718116e-05,
"loss": 2.9786,
"step": 71500
},
{
"epoch": 0.77,
"learning_rate": 1.1532675735499539e-05,
"loss": 2.9561,
"step": 71600
},
{
"epoch": 0.77,
"learning_rate": 1.1478950422280962e-05,
"loss": 2.9682,
"step": 71700
},
{
"epoch": 0.77,
"learning_rate": 1.1425225109062385e-05,
"loss": 2.9724,
"step": 71800
},
{
"epoch": 0.77,
"learning_rate": 1.1371499795843809e-05,
"loss": 2.9945,
"step": 71900
},
{
"epoch": 0.77,
"learning_rate": 1.1317774482625234e-05,
"loss": 2.9724,
"step": 72000
},
{
"epoch": 0.77,
"learning_rate": 1.1264049169406659e-05,
"loss": 2.9741,
"step": 72100
},
{
"epoch": 0.78,
"learning_rate": 1.1210323856188082e-05,
"loss": 2.9821,
"step": 72200
},
{
"epoch": 0.78,
"learning_rate": 1.1156598542969505e-05,
"loss": 2.9709,
"step": 72300
},
{
"epoch": 0.78,
"learning_rate": 1.110287322975093e-05,
"loss": 2.9856,
"step": 72400
},
{
"epoch": 0.78,
"learning_rate": 1.1049147916532354e-05,
"loss": 2.9851,
"step": 72500
},
{
"epoch": 0.78,
"learning_rate": 1.0995422603313777e-05,
"loss": 2.9593,
"step": 72600
},
{
"epoch": 0.78,
"learning_rate": 1.0941697290095202e-05,
"loss": 2.9824,
"step": 72700
},
{
"epoch": 0.78,
"learning_rate": 1.0887971976876627e-05,
"loss": 2.9747,
"step": 72800
},
{
"epoch": 0.78,
"learning_rate": 1.083424666365805e-05,
"loss": 2.9932,
"step": 72900
},
{
"epoch": 0.78,
"learning_rate": 1.0780521350439474e-05,
"loss": 2.9805,
"step": 73000
},
{
"epoch": 0.79,
"learning_rate": 1.0726796037220897e-05,
"loss": 2.9625,
"step": 73100
},
{
"epoch": 0.79,
"learning_rate": 1.067307072400232e-05,
"loss": 2.9838,
"step": 73200
},
{
"epoch": 0.79,
"learning_rate": 1.0619345410783745e-05,
"loss": 2.9478,
"step": 73300
},
{
"epoch": 0.79,
"learning_rate": 1.0565620097565169e-05,
"loss": 2.9683,
"step": 73400
},
{
"epoch": 0.79,
"learning_rate": 1.0511894784346594e-05,
"loss": 2.9608,
"step": 73500
},
{
"epoch": 0.79,
"learning_rate": 1.0458169471128017e-05,
"loss": 2.978,
"step": 73600
},
{
"epoch": 0.79,
"learning_rate": 1.0404444157909442e-05,
"loss": 2.968,
"step": 73700
},
{
"epoch": 0.79,
"learning_rate": 1.0350718844690866e-05,
"loss": 2.9686,
"step": 73800
},
{
"epoch": 0.79,
"learning_rate": 1.0296993531472289e-05,
"loss": 2.9813,
"step": 73900
},
{
"epoch": 0.8,
"learning_rate": 1.0243268218253712e-05,
"loss": 2.9761,
"step": 74000
},
{
"epoch": 0.8,
"learning_rate": 1.0189542905035135e-05,
"loss": 2.9858,
"step": 74100
},
{
"epoch": 0.8,
"learning_rate": 1.013581759181656e-05,
"loss": 2.9616,
"step": 74200
},
{
"epoch": 0.8,
"learning_rate": 1.0082092278597986e-05,
"loss": 2.9711,
"step": 74300
},
{
"epoch": 0.8,
"learning_rate": 1.0028366965379409e-05,
"loss": 2.9809,
"step": 74400
},
{
"epoch": 0.8,
"learning_rate": 9.974641652160832e-06,
"loss": 2.9619,
"step": 74500
},
{
"epoch": 0.8,
"learning_rate": 9.920916338942257e-06,
"loss": 2.9628,
"step": 74600
},
{
"epoch": 0.8,
"learning_rate": 9.86719102572368e-06,
"loss": 2.9793,
"step": 74700
},
{
"epoch": 0.8,
"learning_rate": 9.813465712505104e-06,
"loss": 2.9596,
"step": 74800
},
{
"epoch": 0.8,
"learning_rate": 9.759740399286529e-06,
"loss": 2.9769,
"step": 74900
},
{
"epoch": 0.81,
"learning_rate": 9.706015086067952e-06,
"loss": 2.9778,
"step": 75000
},
{
"epoch": 0.81,
"learning_rate": 9.652289772849377e-06,
"loss": 2.9649,
"step": 75100
},
{
"epoch": 0.81,
"learning_rate": 9.5985644596308e-06,
"loss": 2.9731,
"step": 75200
},
{
"epoch": 0.81,
"learning_rate": 9.544839146412224e-06,
"loss": 2.9628,
"step": 75300
},
{
"epoch": 0.81,
"learning_rate": 9.491113833193647e-06,
"loss": 2.9702,
"step": 75400
},
{
"epoch": 0.81,
"learning_rate": 9.437388519975072e-06,
"loss": 2.9645,
"step": 75500
},
{
"epoch": 0.81,
"learning_rate": 9.383663206756495e-06,
"loss": 2.9748,
"step": 75600
},
{
"epoch": 0.81,
"learning_rate": 9.32993789353792e-06,
"loss": 2.9696,
"step": 75700
},
{
"epoch": 0.81,
"learning_rate": 9.276212580319344e-06,
"loss": 2.9631,
"step": 75800
},
{
"epoch": 0.82,
"learning_rate": 9.222487267100767e-06,
"loss": 2.959,
"step": 75900
},
{
"epoch": 0.82,
"learning_rate": 9.168761953882192e-06,
"loss": 2.9405,
"step": 76000
},
{
"epoch": 0.82,
"learning_rate": 9.115036640663615e-06,
"loss": 2.9729,
"step": 76100
},
{
"epoch": 0.82,
"learning_rate": 9.061311327445039e-06,
"loss": 2.9618,
"step": 76200
},
{
"epoch": 0.82,
"learning_rate": 9.007586014226462e-06,
"loss": 2.9566,
"step": 76300
},
{
"epoch": 0.82,
"learning_rate": 8.953860701007887e-06,
"loss": 2.9647,
"step": 76400
},
{
"epoch": 0.82,
"learning_rate": 8.900135387789312e-06,
"loss": 2.9536,
"step": 76500
},
{
"epoch": 0.82,
"learning_rate": 8.846410074570735e-06,
"loss": 2.9571,
"step": 76600
},
{
"epoch": 0.82,
"learning_rate": 8.792684761352159e-06,
"loss": 2.9656,
"step": 76700
},
{
"epoch": 0.83,
"learning_rate": 8.738959448133582e-06,
"loss": 2.9677,
"step": 76800
},
{
"epoch": 0.83,
"learning_rate": 8.685234134915007e-06,
"loss": 2.9682,
"step": 76900
},
{
"epoch": 0.83,
"learning_rate": 8.63150882169643e-06,
"loss": 2.9556,
"step": 77000
},
{
"epoch": 0.83,
"learning_rate": 8.577783508477854e-06,
"loss": 2.9492,
"step": 77100
},
{
"epoch": 0.83,
"learning_rate": 8.524058195259279e-06,
"loss": 2.9708,
"step": 77200
},
{
"epoch": 0.83,
"learning_rate": 8.470332882040704e-06,
"loss": 2.9656,
"step": 77300
},
{
"epoch": 0.83,
"learning_rate": 8.416607568822127e-06,
"loss": 2.9421,
"step": 77400
},
{
"epoch": 0.83,
"learning_rate": 8.36288225560355e-06,
"loss": 2.9586,
"step": 77500
},
{
"epoch": 0.83,
"learning_rate": 8.309156942384974e-06,
"loss": 2.9567,
"step": 77600
},
{
"epoch": 0.83,
"learning_rate": 8.255431629166399e-06,
"loss": 2.9479,
"step": 77700
},
{
"epoch": 0.84,
"learning_rate": 8.201706315947822e-06,
"loss": 2.9617,
"step": 77800
},
{
"epoch": 0.84,
"learning_rate": 8.147981002729247e-06,
"loss": 2.9502,
"step": 77900
},
{
"epoch": 0.84,
"learning_rate": 8.09425568951067e-06,
"loss": 2.9544,
"step": 78000
},
{
"epoch": 0.84,
"learning_rate": 8.040530376292094e-06,
"loss": 2.9817,
"step": 78100
},
{
"epoch": 0.84,
"learning_rate": 7.986805063073519e-06,
"loss": 2.9541,
"step": 78200
},
{
"epoch": 0.84,
"learning_rate": 7.933079749854942e-06,
"loss": 2.9334,
"step": 78300
},
{
"epoch": 0.84,
"learning_rate": 7.879354436636365e-06,
"loss": 2.961,
"step": 78400
},
{
"epoch": 0.84,
"learning_rate": 7.825629123417789e-06,
"loss": 2.9474,
"step": 78500
},
{
"epoch": 0.84,
"learning_rate": 7.771903810199214e-06,
"loss": 2.9542,
"step": 78600
},
{
"epoch": 0.85,
"learning_rate": 7.718178496980639e-06,
"loss": 2.9471,
"step": 78700
},
{
"epoch": 0.85,
"learning_rate": 7.664453183762062e-06,
"loss": 2.9687,
"step": 78800
},
{
"epoch": 0.85,
"learning_rate": 7.6107278705434855e-06,
"loss": 2.9729,
"step": 78900
},
{
"epoch": 0.85,
"learning_rate": 7.557002557324909e-06,
"loss": 2.9371,
"step": 79000
},
{
"epoch": 0.85,
"learning_rate": 7.503277244106334e-06,
"loss": 2.9538,
"step": 79100
},
{
"epoch": 0.85,
"learning_rate": 7.449551930887758e-06,
"loss": 2.9722,
"step": 79200
},
{
"epoch": 0.85,
"learning_rate": 7.395826617669181e-06,
"loss": 2.9506,
"step": 79300
},
{
"epoch": 0.85,
"learning_rate": 7.342101304450605e-06,
"loss": 2.9599,
"step": 79400
},
{
"epoch": 0.85,
"learning_rate": 7.28837599123203e-06,
"loss": 2.9455,
"step": 79500
},
{
"epoch": 0.86,
"learning_rate": 7.234650678013454e-06,
"loss": 2.9531,
"step": 79600
},
{
"epoch": 0.86,
"learning_rate": 7.180925364794877e-06,
"loss": 2.9683,
"step": 79700
},
{
"epoch": 0.86,
"learning_rate": 7.1272000515763005e-06,
"loss": 2.9588,
"step": 79800
},
{
"epoch": 0.86,
"learning_rate": 7.073474738357725e-06,
"loss": 2.9451,
"step": 79900
},
{
"epoch": 0.86,
"learning_rate": 7.01974942513915e-06,
"loss": 2.9675,
"step": 80000
},
{
"epoch": 0.86,
"learning_rate": 6.966024111920573e-06,
"loss": 2.977,
"step": 80100
},
{
"epoch": 0.86,
"learning_rate": 6.912298798701996e-06,
"loss": 2.9605,
"step": 80200
},
{
"epoch": 0.86,
"learning_rate": 6.8585734854834205e-06,
"loss": 2.9543,
"step": 80300
},
{
"epoch": 0.86,
"learning_rate": 6.8048481722648455e-06,
"loss": 2.9674,
"step": 80400
},
{
"epoch": 0.86,
"learning_rate": 6.751122859046269e-06,
"loss": 2.9502,
"step": 80500
},
{
"epoch": 0.87,
"learning_rate": 6.697397545827692e-06,
"loss": 2.9688,
"step": 80600
},
{
"epoch": 0.87,
"learning_rate": 6.643672232609116e-06,
"loss": 2.953,
"step": 80700
},
{
"epoch": 0.87,
"learning_rate": 6.58994691939054e-06,
"loss": 2.9611,
"step": 80800
},
{
"epoch": 0.87,
"learning_rate": 6.536221606171965e-06,
"loss": 2.9709,
"step": 80900
},
{
"epoch": 0.87,
"learning_rate": 6.482496292953388e-06,
"loss": 2.9602,
"step": 81000
},
{
"epoch": 0.87,
"learning_rate": 6.428770979734812e-06,
"loss": 2.9573,
"step": 81100
},
{
"epoch": 0.87,
"learning_rate": 6.3750456665162355e-06,
"loss": 2.9386,
"step": 81200
},
{
"epoch": 0.87,
"learning_rate": 6.3213203532976605e-06,
"loss": 2.9374,
"step": 81300
},
{
"epoch": 0.87,
"learning_rate": 6.267595040079084e-06,
"loss": 2.9537,
"step": 81400
},
{
"epoch": 0.88,
"learning_rate": 6.213869726860508e-06,
"loss": 2.9391,
"step": 81500
},
{
"epoch": 0.88,
"learning_rate": 6.160144413641932e-06,
"loss": 2.9754,
"step": 81600
},
{
"epoch": 0.88,
"learning_rate": 6.1064191004233555e-06,
"loss": 2.9434,
"step": 81700
},
{
"epoch": 0.88,
"learning_rate": 6.05269378720478e-06,
"loss": 2.9432,
"step": 81800
},
{
"epoch": 0.88,
"learning_rate": 5.998968473986204e-06,
"loss": 2.9243,
"step": 81900
},
{
"epoch": 0.88,
"learning_rate": 5.945243160767627e-06,
"loss": 2.9679,
"step": 82000
},
{
"epoch": 0.88,
"learning_rate": 5.891517847549051e-06,
"loss": 2.9509,
"step": 82100
},
{
"epoch": 0.88,
"learning_rate": 5.8377925343304755e-06,
"loss": 2.9465,
"step": 82200
},
{
"epoch": 0.88,
"learning_rate": 5.7840672211119e-06,
"loss": 2.962,
"step": 82300
},
{
"epoch": 0.89,
"learning_rate": 5.730341907893323e-06,
"loss": 2.9654,
"step": 82400
},
{
"epoch": 0.89,
"learning_rate": 5.676616594674747e-06,
"loss": 2.9357,
"step": 82500
},
{
"epoch": 0.89,
"learning_rate": 5.622891281456171e-06,
"loss": 2.9672,
"step": 82600
},
{
"epoch": 0.89,
"learning_rate": 5.569165968237595e-06,
"loss": 2.9374,
"step": 82700
},
{
"epoch": 0.89,
"learning_rate": 5.515440655019019e-06,
"loss": 2.954,
"step": 82800
},
{
"epoch": 0.89,
"learning_rate": 5.461715341800443e-06,
"loss": 2.9793,
"step": 82900
},
{
"epoch": 0.89,
"learning_rate": 5.407990028581867e-06,
"loss": 2.9376,
"step": 83000
},
{
"epoch": 0.89,
"learning_rate": 5.3542647153632905e-06,
"loss": 2.9496,
"step": 83100
},
{
"epoch": 0.89,
"learning_rate": 5.300539402144715e-06,
"loss": 2.958,
"step": 83200
},
{
"epoch": 0.9,
"learning_rate": 5.246814088926139e-06,
"loss": 2.9588,
"step": 83300
},
{
"epoch": 0.9,
"learning_rate": 5.193088775707563e-06,
"loss": 2.9388,
"step": 83400
},
{
"epoch": 0.9,
"learning_rate": 5.139363462488986e-06,
"loss": 2.9555,
"step": 83500
},
{
"epoch": 0.9,
"learning_rate": 5.0856381492704105e-06,
"loss": 2.9699,
"step": 83600
},
{
"epoch": 0.9,
"learning_rate": 5.031912836051835e-06,
"loss": 2.9568,
"step": 83700
},
{
"epoch": 0.9,
"learning_rate": 4.978187522833258e-06,
"loss": 2.9578,
"step": 83800
},
{
"epoch": 0.9,
"learning_rate": 4.924462209614682e-06,
"loss": 2.955,
"step": 83900
},
{
"epoch": 0.9,
"learning_rate": 4.870736896396106e-06,
"loss": 2.9452,
"step": 84000
},
{
"epoch": 0.9,
"learning_rate": 4.8170115831775305e-06,
"loss": 2.9506,
"step": 84100
},
{
"epoch": 0.9,
"learning_rate": 4.763286269958954e-06,
"loss": 2.954,
"step": 84200
},
{
"epoch": 0.91,
"learning_rate": 4.709560956740378e-06,
"loss": 2.9648,
"step": 84300
},
{
"epoch": 0.91,
"learning_rate": 4.655835643521802e-06,
"loss": 2.9492,
"step": 84400
},
{
"epoch": 0.91,
"learning_rate": 4.602110330303226e-06,
"loss": 2.9439,
"step": 84500
},
{
"epoch": 0.91,
"learning_rate": 4.54838501708465e-06,
"loss": 2.9686,
"step": 84600
},
{
"epoch": 0.91,
"learning_rate": 4.494659703866074e-06,
"loss": 2.9298,
"step": 84700
},
{
"epoch": 0.91,
"learning_rate": 4.440934390647498e-06,
"loss": 2.9509,
"step": 84800
},
{
"epoch": 0.91,
"learning_rate": 4.387209077428921e-06,
"loss": 2.9489,
"step": 84900
},
{
"epoch": 0.91,
"learning_rate": 4.3334837642103455e-06,
"loss": 2.9448,
"step": 85000
},
{
"epoch": 0.91,
"learning_rate": 4.27975845099177e-06,
"loss": 2.9309,
"step": 85100
},
{
"epoch": 0.92,
"learning_rate": 4.226033137773194e-06,
"loss": 2.9514,
"step": 85200
},
{
"epoch": 0.92,
"learning_rate": 4.172307824554617e-06,
"loss": 2.9408,
"step": 85300
},
{
"epoch": 0.92,
"learning_rate": 4.118582511336041e-06,
"loss": 2.9532,
"step": 85400
},
{
"epoch": 0.92,
"learning_rate": 4.0648571981174655e-06,
"loss": 2.9538,
"step": 85500
},
{
"epoch": 0.92,
"learning_rate": 4.01113188489889e-06,
"loss": 2.9652,
"step": 85600
},
{
"epoch": 0.92,
"learning_rate": 3.957406571680313e-06,
"loss": 2.96,
"step": 85700
},
{
"epoch": 0.92,
"learning_rate": 3.903681258461737e-06,
"loss": 2.9516,
"step": 85800
},
{
"epoch": 0.92,
"learning_rate": 3.849955945243161e-06,
"loss": 2.9592,
"step": 85900
},
{
"epoch": 0.92,
"learning_rate": 3.7962306320245846e-06,
"loss": 2.9412,
"step": 86000
},
{
"epoch": 0.93,
"learning_rate": 3.742505318806009e-06,
"loss": 2.9633,
"step": 86100
},
{
"epoch": 0.93,
"learning_rate": 3.6887800055874325e-06,
"loss": 2.9539,
"step": 86200
},
{
"epoch": 0.93,
"learning_rate": 3.635054692368857e-06,
"loss": 2.9439,
"step": 86300
},
{
"epoch": 0.93,
"learning_rate": 3.5813293791502805e-06,
"loss": 2.9333,
"step": 86400
},
{
"epoch": 0.93,
"learning_rate": 3.527604065931705e-06,
"loss": 2.9486,
"step": 86500
},
{
"epoch": 0.93,
"learning_rate": 3.4738787527131284e-06,
"loss": 2.9526,
"step": 86600
},
{
"epoch": 0.93,
"learning_rate": 3.420153439494552e-06,
"loss": 2.9414,
"step": 86700
},
{
"epoch": 0.93,
"learning_rate": 3.3664281262759763e-06,
"loss": 2.9411,
"step": 86800
},
{
"epoch": 0.93,
"learning_rate": 3.3127028130574e-06,
"loss": 2.9524,
"step": 86900
},
{
"epoch": 0.93,
"learning_rate": 3.258977499838824e-06,
"loss": 2.935,
"step": 87000
},
{
"epoch": 0.94,
"learning_rate": 3.205252186620248e-06,
"loss": 2.9573,
"step": 87100
},
{
"epoch": 0.94,
"learning_rate": 3.151526873401672e-06,
"loss": 2.9461,
"step": 87200
},
{
"epoch": 0.94,
"learning_rate": 3.097801560183096e-06,
"loss": 2.9542,
"step": 87300
},
{
"epoch": 0.94,
"learning_rate": 3.04407624696452e-06,
"loss": 2.9429,
"step": 87400
},
{
"epoch": 0.94,
"learning_rate": 2.9903509337459438e-06,
"loss": 2.9306,
"step": 87500
},
{
"epoch": 0.94,
"learning_rate": 2.936625620527368e-06,
"loss": 2.9299,
"step": 87600
},
{
"epoch": 0.94,
"learning_rate": 2.8829003073087917e-06,
"loss": 2.9419,
"step": 87700
},
{
"epoch": 0.94,
"learning_rate": 2.829174994090216e-06,
"loss": 2.9431,
"step": 87800
},
{
"epoch": 0.94,
"learning_rate": 2.7754496808716396e-06,
"loss": 2.9366,
"step": 87900
},
{
"epoch": 0.95,
"learning_rate": 2.7217243676530638e-06,
"loss": 2.9337,
"step": 88000
},
{
"epoch": 0.95,
"learning_rate": 2.667999054434487e-06,
"loss": 2.9475,
"step": 88100
},
{
"epoch": 0.95,
"learning_rate": 2.6142737412159113e-06,
"loss": 2.9555,
"step": 88200
},
{
"epoch": 0.95,
"learning_rate": 2.5605484279973355e-06,
"loss": 2.9391,
"step": 88300
},
{
"epoch": 0.95,
"learning_rate": 2.506823114778759e-06,
"loss": 2.9554,
"step": 88400
},
{
"epoch": 0.95,
"learning_rate": 2.4530978015601834e-06,
"loss": 2.9429,
"step": 88500
},
{
"epoch": 0.95,
"learning_rate": 2.399372488341607e-06,
"loss": 2.9365,
"step": 88600
},
{
"epoch": 0.95,
"learning_rate": 2.3456471751230313e-06,
"loss": 2.9501,
"step": 88700
},
{
"epoch": 0.95,
"learning_rate": 2.291921861904455e-06,
"loss": 2.9633,
"step": 88800
},
{
"epoch": 0.96,
"learning_rate": 2.238196548685879e-06,
"loss": 2.9606,
"step": 88900
},
{
"epoch": 0.96,
"learning_rate": 2.184471235467303e-06,
"loss": 2.9359,
"step": 89000
},
{
"epoch": 0.96,
"learning_rate": 2.1307459222487267e-06,
"loss": 2.9599,
"step": 89100
},
{
"epoch": 0.96,
"learning_rate": 2.0770206090301504e-06,
"loss": 2.9549,
"step": 89200
},
{
"epoch": 0.96,
"learning_rate": 2.0232952958115746e-06,
"loss": 2.9354,
"step": 89300
},
{
"epoch": 0.96,
"learning_rate": 1.9695699825929984e-06,
"loss": 2.9683,
"step": 89400
},
{
"epoch": 0.96,
"learning_rate": 1.9158446693744225e-06,
"loss": 2.9424,
"step": 89500
},
{
"epoch": 0.96,
"learning_rate": 1.8621193561558465e-06,
"loss": 2.9396,
"step": 89600
},
{
"epoch": 0.96,
"learning_rate": 1.8083940429372704e-06,
"loss": 2.9411,
"step": 89700
},
{
"epoch": 0.96,
"learning_rate": 1.7546687297186944e-06,
"loss": 2.9527,
"step": 89800
},
{
"epoch": 0.97,
"learning_rate": 1.7009434165001184e-06,
"loss": 2.9359,
"step": 89900
},
{
"epoch": 0.97,
"learning_rate": 1.6472181032815423e-06,
"loss": 2.9402,
"step": 90000
},
{
"epoch": 0.97,
"learning_rate": 1.593492790062966e-06,
"loss": 2.9416,
"step": 90100
},
{
"epoch": 0.97,
"learning_rate": 1.5397674768443902e-06,
"loss": 2.9483,
"step": 90200
},
{
"epoch": 0.97,
"learning_rate": 1.486042163625814e-06,
"loss": 2.948,
"step": 90300
},
{
"epoch": 0.97,
"learning_rate": 1.432316850407238e-06,
"loss": 2.9253,
"step": 90400
},
{
"epoch": 0.97,
"learning_rate": 1.378591537188662e-06,
"loss": 2.9431,
"step": 90500
},
{
"epoch": 0.97,
"learning_rate": 1.3248662239700859e-06,
"loss": 2.9398,
"step": 90600
},
{
"epoch": 0.97,
"learning_rate": 1.2711409107515098e-06,
"loss": 2.9433,
"step": 90700
},
{
"epoch": 0.98,
"learning_rate": 1.2174155975329336e-06,
"loss": 2.9571,
"step": 90800
},
{
"epoch": 0.98,
"learning_rate": 1.1636902843143575e-06,
"loss": 2.9343,
"step": 90900
},
{
"epoch": 0.98,
"learning_rate": 1.1099649710957815e-06,
"loss": 2.9358,
"step": 91000
},
{
"epoch": 0.98,
"learning_rate": 1.0562396578772054e-06,
"loss": 2.9459,
"step": 91100
},
{
"epoch": 0.98,
"learning_rate": 1.0025143446586294e-06,
"loss": 2.9464,
"step": 91200
},
{
"epoch": 0.98,
"learning_rate": 9.487890314400532e-07,
"loss": 2.9523,
"step": 91300
},
{
"epoch": 0.98,
"learning_rate": 8.950637182214772e-07,
"loss": 2.9304,
"step": 91400
},
{
"epoch": 0.98,
"learning_rate": 8.413384050029012e-07,
"loss": 2.9382,
"step": 91500
},
{
"epoch": 0.98,
"learning_rate": 7.876130917843251e-07,
"loss": 2.9499,
"step": 91600
},
{
"epoch": 0.99,
"learning_rate": 7.338877785657491e-07,
"loss": 2.9389,
"step": 91700
},
{
"epoch": 0.99,
"learning_rate": 6.80162465347173e-07,
"loss": 2.9355,
"step": 91800
},
{
"epoch": 0.99,
"learning_rate": 6.264371521285969e-07,
"loss": 2.9386,
"step": 91900
},
{
"epoch": 0.99,
"learning_rate": 5.727118389100209e-07,
"loss": 2.9285,
"step": 92000
},
{
"epoch": 0.99,
"learning_rate": 5.189865256914448e-07,
"loss": 2.9309,
"step": 92100
},
{
"epoch": 0.99,
"learning_rate": 4.652612124728687e-07,
"loss": 2.9544,
"step": 92200
},
{
"epoch": 0.99,
"learning_rate": 4.115358992542927e-07,
"loss": 2.9408,
"step": 92300
},
{
"epoch": 0.99,
"learning_rate": 3.5781058603571663e-07,
"loss": 2.9369,
"step": 92400
},
{
"epoch": 0.99,
"learning_rate": 3.0408527281714054e-07,
"loss": 2.9509,
"step": 92500
},
{
"epoch": 0.99,
"learning_rate": 2.5035995959856444e-07,
"loss": 2.9322,
"step": 92600
},
{
"epoch": 1.0,
"learning_rate": 1.966346463799884e-07,
"loss": 2.9314,
"step": 92700
},
{
"epoch": 1.0,
"learning_rate": 1.4290933316141234e-07,
"loss": 2.9489,
"step": 92800
},
{
"epoch": 1.0,
"learning_rate": 8.918401994283627e-08,
"loss": 2.948,
"step": 92900
},
{
"epoch": 1.0,
"learning_rate": 3.5458706724260206e-08,
"loss": 2.9466,
"step": 93000
}
],
"max_steps": 93066,
"num_train_epochs": 1,
"total_flos": 7.94540378161152e+17,
"trial_name": null,
"trial_params": null
}