{ "best_metric": null, "best_model_checkpoint": null, "epoch": 85.33443739074431, "global_step": 1855000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.9988499401968906e-05, "loss": 4.1394, "step": 500 }, { "epoch": 0.05, "learning_rate": 4.997699880393781e-05, "loss": 3.4231, "step": 1000 }, { "epoch": 0.07, "learning_rate": 4.9965498205906705e-05, "loss": 3.1982, "step": 1500 }, { "epoch": 0.09, "learning_rate": 4.9953997607875615e-05, "loss": 3.0202, "step": 2000 }, { "epoch": 0.12, "learning_rate": 4.994249700984451e-05, "loss": 2.92, "step": 2500 }, { "epoch": 0.14, "learning_rate": 4.993099641181342e-05, "loss": 2.8251, "step": 3000 }, { "epoch": 0.16, "learning_rate": 4.991949581378232e-05, "loss": 2.7333, "step": 3500 }, { "epoch": 0.18, "learning_rate": 4.990799521575122e-05, "loss": 2.7138, "step": 4000 }, { "epoch": 0.21, "learning_rate": 4.9896494617720125e-05, "loss": 2.6168, "step": 4500 }, { "epoch": 0.23, "learning_rate": 4.988499401968902e-05, "loss": 2.6025, "step": 5000 }, { "epoch": 0.25, "learning_rate": 4.987349342165793e-05, "loss": 2.5392, "step": 5500 }, { "epoch": 0.28, "learning_rate": 4.986199282362683e-05, "loss": 2.4974, "step": 6000 }, { "epoch": 0.3, "learning_rate": 4.985049222559574e-05, "loss": 2.4775, "step": 6500 }, { "epoch": 0.32, "learning_rate": 4.9838991627564634e-05, "loss": 2.4232, "step": 7000 }, { "epoch": 0.35, "learning_rate": 4.982749102953354e-05, "loss": 2.3939, "step": 7500 }, { "epoch": 0.37, "learning_rate": 4.981599043150244e-05, "loss": 2.3645, "step": 8000 }, { "epoch": 0.39, "learning_rate": 4.9804489833471344e-05, "loss": 2.3559, "step": 8500 }, { "epoch": 0.41, "learning_rate": 4.979298923544025e-05, "loss": 2.335, "step": 9000 }, { "epoch": 0.44, "learning_rate": 4.9781488637409144e-05, "loss": 2.3198, "step": 9500 }, { "epoch": 0.46, "learning_rate": 4.9769988039378054e-05, "loss": 2.2551, "step": 10000 }, { "epoch": 0.48, "learning_rate": 4.975848744134695e-05, "loss": 2.2582, "step": 10500 }, { "epoch": 0.51, "learning_rate": 4.974698684331586e-05, "loss": 2.2212, "step": 11000 }, { "epoch": 0.53, "learning_rate": 4.9735486245284756e-05, "loss": 2.2338, "step": 11500 }, { "epoch": 0.55, "learning_rate": 4.972398564725366e-05, "loss": 2.2007, "step": 12000 }, { "epoch": 0.58, "learning_rate": 4.971248504922256e-05, "loss": 2.2119, "step": 12500 }, { "epoch": 0.6, "learning_rate": 4.970098445119146e-05, "loss": 2.1805, "step": 13000 }, { "epoch": 0.62, "learning_rate": 4.968948385316037e-05, "loss": 2.1515, "step": 13500 }, { "epoch": 0.64, "learning_rate": 4.9677983255129266e-05, "loss": 2.1532, "step": 14000 }, { "epoch": 0.67, "learning_rate": 4.9666482657098176e-05, "loss": 2.1269, "step": 14500 }, { "epoch": 0.69, "learning_rate": 4.965498205906707e-05, "loss": 2.1356, "step": 15000 }, { "epoch": 0.71, "learning_rate": 4.9643481461035976e-05, "loss": 2.1202, "step": 15500 }, { "epoch": 0.74, "learning_rate": 4.963198086300488e-05, "loss": 2.0694, "step": 16000 }, { "epoch": 0.76, "learning_rate": 4.962048026497378e-05, "loss": 2.0691, "step": 16500 }, { "epoch": 0.78, "learning_rate": 4.9608979666942685e-05, "loss": 2.0844, "step": 17000 }, { "epoch": 0.81, "learning_rate": 4.959747906891158e-05, "loss": 2.0485, "step": 17500 }, { "epoch": 0.83, "learning_rate": 4.958597847088049e-05, "loss": 2.0348, "step": 18000 }, { "epoch": 0.85, "learning_rate": 4.957447787284939e-05, "loss": 2.0198, "step": 18500 }, { "epoch": 0.87, "learning_rate": 4.95629772748183e-05, "loss": 2.0317, "step": 19000 }, { "epoch": 0.9, "learning_rate": 4.9551476676787195e-05, "loss": 2.0125, "step": 19500 }, { "epoch": 0.92, "learning_rate": 4.95399760787561e-05, "loss": 2.059, "step": 20000 }, { "epoch": 0.94, "learning_rate": 4.9528475480725e-05, "loss": 1.9968, "step": 20500 }, { "epoch": 0.97, "learning_rate": 4.95169748826939e-05, "loss": 2.0136, "step": 21000 }, { "epoch": 0.99, "learning_rate": 4.950547428466281e-05, "loss": 1.9867, "step": 21500 }, { "epoch": 1.01, "learning_rate": 4.9493973686631704e-05, "loss": 1.9583, "step": 22000 }, { "epoch": 1.04, "learning_rate": 4.9482473088600614e-05, "loss": 1.8683, "step": 22500 }, { "epoch": 1.06, "learning_rate": 4.947097249056951e-05, "loss": 1.8724, "step": 23000 }, { "epoch": 1.08, "learning_rate": 4.9459471892538414e-05, "loss": 1.8524, "step": 23500 }, { "epoch": 1.1, "learning_rate": 4.944797129450732e-05, "loss": 1.8339, "step": 24000 }, { "epoch": 1.13, "learning_rate": 4.943647069647622e-05, "loss": 1.828, "step": 24500 }, { "epoch": 1.15, "learning_rate": 4.9424970098445123e-05, "loss": 1.8331, "step": 25000 }, { "epoch": 1.17, "learning_rate": 4.941346950041402e-05, "loss": 1.8038, "step": 25500 }, { "epoch": 1.2, "learning_rate": 4.940196890238293e-05, "loss": 1.8314, "step": 26000 }, { "epoch": 1.22, "learning_rate": 4.9390468304351826e-05, "loss": 1.8386, "step": 26500 }, { "epoch": 1.24, "learning_rate": 4.9378967706320736e-05, "loss": 1.842, "step": 27000 }, { "epoch": 1.27, "learning_rate": 4.936746710828963e-05, "loss": 1.8419, "step": 27500 }, { "epoch": 1.29, "learning_rate": 4.9355966510258536e-05, "loss": 1.8057, "step": 28000 }, { "epoch": 1.31, "learning_rate": 4.934446591222744e-05, "loss": 1.8428, "step": 28500 }, { "epoch": 1.33, "learning_rate": 4.9332965314196336e-05, "loss": 1.8108, "step": 29000 }, { "epoch": 1.36, "learning_rate": 4.9321464716165246e-05, "loss": 1.8062, "step": 29500 }, { "epoch": 1.38, "learning_rate": 4.930996411813414e-05, "loss": 1.7831, "step": 30000 }, { "epoch": 1.4, "learning_rate": 4.929846352010305e-05, "loss": 1.7931, "step": 30500 }, { "epoch": 1.43, "learning_rate": 4.928696292207195e-05, "loss": 1.8185, "step": 31000 }, { "epoch": 1.45, "learning_rate": 4.927546232404085e-05, "loss": 1.8415, "step": 31500 }, { "epoch": 1.47, "learning_rate": 4.9263961726009755e-05, "loss": 1.8057, "step": 32000 }, { "epoch": 1.5, "learning_rate": 4.925246112797866e-05, "loss": 1.8088, "step": 32500 }, { "epoch": 1.52, "learning_rate": 4.924096052994756e-05, "loss": 1.7975, "step": 33000 }, { "epoch": 1.54, "learning_rate": 4.922945993191646e-05, "loss": 1.7942, "step": 33500 }, { "epoch": 1.56, "learning_rate": 4.921795933388537e-05, "loss": 1.7869, "step": 34000 }, { "epoch": 1.59, "learning_rate": 4.9206458735854265e-05, "loss": 1.8013, "step": 34500 }, { "epoch": 1.61, "learning_rate": 4.9194958137823175e-05, "loss": 1.8058, "step": 35000 }, { "epoch": 1.63, "learning_rate": 4.918345753979207e-05, "loss": 1.7968, "step": 35500 }, { "epoch": 1.66, "learning_rate": 4.9171956941760974e-05, "loss": 1.7783, "step": 36000 }, { "epoch": 1.68, "learning_rate": 4.916045634372988e-05, "loss": 1.7421, "step": 36500 }, { "epoch": 1.7, "learning_rate": 4.9148955745698774e-05, "loss": 1.7736, "step": 37000 }, { "epoch": 1.73, "learning_rate": 4.9137455147667684e-05, "loss": 1.7649, "step": 37500 }, { "epoch": 1.75, "learning_rate": 4.912595454963658e-05, "loss": 1.7759, "step": 38000 }, { "epoch": 1.77, "learning_rate": 4.911445395160549e-05, "loss": 1.7874, "step": 38500 }, { "epoch": 1.79, "learning_rate": 4.910295335357439e-05, "loss": 1.7612, "step": 39000 }, { "epoch": 1.82, "learning_rate": 4.909145275554329e-05, "loss": 1.7559, "step": 39500 }, { "epoch": 1.84, "learning_rate": 4.907995215751219e-05, "loss": 1.7542, "step": 40000 }, { "epoch": 1.86, "learning_rate": 4.90684515594811e-05, "loss": 1.7343, "step": 40500 }, { "epoch": 1.89, "learning_rate": 4.905695096145e-05, "loss": 1.7492, "step": 41000 }, { "epoch": 1.91, "learning_rate": 4.9045450363418896e-05, "loss": 1.7577, "step": 41500 }, { "epoch": 1.93, "learning_rate": 4.9033949765387806e-05, "loss": 1.7695, "step": 42000 }, { "epoch": 1.96, "learning_rate": 4.90224491673567e-05, "loss": 1.7387, "step": 42500 }, { "epoch": 1.98, "learning_rate": 4.901094856932561e-05, "loss": 1.7388, "step": 43000 }, { "epoch": 2.0, "learning_rate": 4.899944797129451e-05, "loss": 1.7229, "step": 43500 }, { "epoch": 2.02, "learning_rate": 4.898794737326341e-05, "loss": 1.554, "step": 44000 }, { "epoch": 2.05, "learning_rate": 4.8976446775232316e-05, "loss": 1.5624, "step": 44500 }, { "epoch": 2.07, "learning_rate": 4.896494617720121e-05, "loss": 1.5646, "step": 45000 }, { "epoch": 2.09, "learning_rate": 4.895344557917012e-05, "loss": 1.574, "step": 45500 }, { "epoch": 2.12, "learning_rate": 4.894194498113902e-05, "loss": 1.5517, "step": 46000 }, { "epoch": 2.14, "learning_rate": 4.893044438310793e-05, "loss": 1.6049, "step": 46500 }, { "epoch": 2.16, "learning_rate": 4.8918943785076825e-05, "loss": 1.5991, "step": 47000 }, { "epoch": 2.19, "learning_rate": 4.890744318704573e-05, "loss": 1.5896, "step": 47500 }, { "epoch": 2.21, "learning_rate": 4.889594258901463e-05, "loss": 1.5615, "step": 48000 }, { "epoch": 2.23, "learning_rate": 4.8884441990983535e-05, "loss": 1.5911, "step": 48500 }, { "epoch": 2.25, "learning_rate": 4.887294139295244e-05, "loss": 1.5743, "step": 49000 }, { "epoch": 2.28, "learning_rate": 4.8861440794921335e-05, "loss": 1.5505, "step": 49500 }, { "epoch": 2.3, "learning_rate": 4.8849940196890245e-05, "loss": 1.6073, "step": 50000 }, { "epoch": 2.32, "learning_rate": 4.883843959885914e-05, "loss": 1.5914, "step": 50500 }, { "epoch": 2.35, "learning_rate": 4.882693900082805e-05, "loss": 1.6086, "step": 51000 }, { "epoch": 2.37, "learning_rate": 4.881543840279695e-05, "loss": 1.5998, "step": 51500 }, { "epoch": 2.39, "learning_rate": 4.880393780476585e-05, "loss": 1.5733, "step": 52000 }, { "epoch": 2.42, "learning_rate": 4.8792437206734754e-05, "loss": 1.5914, "step": 52500 }, { "epoch": 2.44, "learning_rate": 4.878093660870365e-05, "loss": 1.5707, "step": 53000 }, { "epoch": 2.46, "learning_rate": 4.876943601067256e-05, "loss": 1.5572, "step": 53500 }, { "epoch": 2.48, "learning_rate": 4.875793541264146e-05, "loss": 1.5712, "step": 54000 }, { "epoch": 2.51, "learning_rate": 4.874643481461037e-05, "loss": 1.5937, "step": 54500 }, { "epoch": 2.53, "learning_rate": 4.873493421657926e-05, "loss": 1.5874, "step": 55000 }, { "epoch": 2.55, "learning_rate": 4.8723433618548167e-05, "loss": 1.5703, "step": 55500 }, { "epoch": 2.58, "learning_rate": 4.871193302051707e-05, "loss": 1.5958, "step": 56000 }, { "epoch": 2.6, "learning_rate": 4.870043242248597e-05, "loss": 1.5913, "step": 56500 }, { "epoch": 2.62, "learning_rate": 4.8688931824454876e-05, "loss": 1.6187, "step": 57000 }, { "epoch": 2.65, "learning_rate": 4.867743122642377e-05, "loss": 1.6237, "step": 57500 }, { "epoch": 2.67, "learning_rate": 4.866593062839268e-05, "loss": 1.5911, "step": 58000 }, { "epoch": 2.69, "learning_rate": 4.865443003036158e-05, "loss": 1.5571, "step": 58500 }, { "epoch": 2.71, "learning_rate": 4.864292943233049e-05, "loss": 1.5788, "step": 59000 }, { "epoch": 2.74, "learning_rate": 4.8631428834299386e-05, "loss": 1.5564, "step": 59500 }, { "epoch": 2.76, "learning_rate": 4.861992823626829e-05, "loss": 1.592, "step": 60000 }, { "epoch": 2.78, "learning_rate": 4.860842763823719e-05, "loss": 1.551, "step": 60500 }, { "epoch": 2.81, "learning_rate": 4.859692704020609e-05, "loss": 1.5662, "step": 61000 }, { "epoch": 2.83, "learning_rate": 4.8585426442175e-05, "loss": 1.5732, "step": 61500 }, { "epoch": 2.85, "learning_rate": 4.8573925844143895e-05, "loss": 1.5769, "step": 62000 }, { "epoch": 2.88, "learning_rate": 4.8562425246112805e-05, "loss": 1.5772, "step": 62500 }, { "epoch": 2.9, "learning_rate": 4.85509246480817e-05, "loss": 1.5543, "step": 63000 }, { "epoch": 2.92, "learning_rate": 4.8539424050050605e-05, "loss": 1.554, "step": 63500 }, { "epoch": 2.94, "learning_rate": 4.852792345201951e-05, "loss": 1.5842, "step": 64000 }, { "epoch": 2.97, "learning_rate": 4.851642285398841e-05, "loss": 1.6101, "step": 64500 }, { "epoch": 2.99, "learning_rate": 4.8504922255957314e-05, "loss": 1.5696, "step": 65000 }, { "epoch": 3.01, "learning_rate": 4.849342165792621e-05, "loss": 1.4724, "step": 65500 }, { "epoch": 3.04, "learning_rate": 4.848192105989512e-05, "loss": 1.3486, "step": 66000 }, { "epoch": 3.06, "learning_rate": 4.847042046186402e-05, "loss": 1.3605, "step": 66500 }, { "epoch": 3.08, "learning_rate": 4.845891986383292e-05, "loss": 1.3871, "step": 67000 }, { "epoch": 3.11, "learning_rate": 4.8447419265801824e-05, "loss": 1.4126, "step": 67500 }, { "epoch": 3.13, "learning_rate": 4.843591866777073e-05, "loss": 1.4092, "step": 68000 }, { "epoch": 3.15, "learning_rate": 4.842441806973963e-05, "loss": 1.4001, "step": 68500 }, { "epoch": 3.17, "learning_rate": 4.841291747170853e-05, "loss": 1.4074, "step": 69000 }, { "epoch": 3.2, "learning_rate": 4.840141687367744e-05, "loss": 1.3978, "step": 69500 }, { "epoch": 3.22, "learning_rate": 4.838991627564633e-05, "loss": 1.3893, "step": 70000 }, { "epoch": 3.24, "learning_rate": 4.837841567761524e-05, "loss": 1.3779, "step": 70500 }, { "epoch": 3.27, "learning_rate": 4.836691507958414e-05, "loss": 1.3949, "step": 71000 }, { "epoch": 3.29, "learning_rate": 4.835541448155304e-05, "loss": 1.4107, "step": 71500 }, { "epoch": 3.31, "learning_rate": 4.8343913883521946e-05, "loss": 1.4477, "step": 72000 }, { "epoch": 3.34, "learning_rate": 4.833241328549085e-05, "loss": 1.3949, "step": 72500 }, { "epoch": 3.36, "learning_rate": 4.832091268745975e-05, "loss": 1.3989, "step": 73000 }, { "epoch": 3.38, "learning_rate": 4.830941208942865e-05, "loss": 1.4085, "step": 73500 }, { "epoch": 3.4, "learning_rate": 4.829791149139756e-05, "loss": 1.4141, "step": 74000 }, { "epoch": 3.43, "learning_rate": 4.8286410893366456e-05, "loss": 1.4409, "step": 74500 }, { "epoch": 3.45, "learning_rate": 4.827491029533536e-05, "loss": 1.3948, "step": 75000 }, { "epoch": 3.47, "learning_rate": 4.826340969730426e-05, "loss": 1.4178, "step": 75500 }, { "epoch": 3.5, "learning_rate": 4.8251909099273165e-05, "loss": 1.411, "step": 76000 }, { "epoch": 3.52, "learning_rate": 4.824040850124207e-05, "loss": 1.4172, "step": 76500 }, { "epoch": 3.54, "learning_rate": 4.8228907903210965e-05, "loss": 1.4597, "step": 77000 }, { "epoch": 3.57, "learning_rate": 4.8217407305179875e-05, "loss": 1.413, "step": 77500 }, { "epoch": 3.59, "learning_rate": 4.820590670714877e-05, "loss": 1.4253, "step": 78000 }, { "epoch": 3.61, "learning_rate": 4.819440610911768e-05, "loss": 1.4401, "step": 78500 }, { "epoch": 3.63, "learning_rate": 4.818290551108658e-05, "loss": 1.4249, "step": 79000 }, { "epoch": 3.66, "learning_rate": 4.817140491305548e-05, "loss": 1.4135, "step": 79500 }, { "epoch": 3.68, "learning_rate": 4.8159904315024384e-05, "loss": 1.4085, "step": 80000 }, { "epoch": 3.7, "learning_rate": 4.814840371699329e-05, "loss": 1.4119, "step": 80500 }, { "epoch": 3.73, "learning_rate": 4.813690311896219e-05, "loss": 1.4267, "step": 81000 }, { "epoch": 3.75, "learning_rate": 4.812540252093109e-05, "loss": 1.4302, "step": 81500 }, { "epoch": 3.77, "learning_rate": 4.81139019229e-05, "loss": 1.4357, "step": 82000 }, { "epoch": 3.8, "learning_rate": 4.8102401324868894e-05, "loss": 1.4353, "step": 82500 }, { "epoch": 3.82, "learning_rate": 4.80909007268378e-05, "loss": 1.4447, "step": 83000 }, { "epoch": 3.84, "learning_rate": 4.80794001288067e-05, "loss": 1.4115, "step": 83500 }, { "epoch": 3.86, "learning_rate": 4.8067899530775603e-05, "loss": 1.4137, "step": 84000 }, { "epoch": 3.89, "learning_rate": 4.805639893274451e-05, "loss": 1.4482, "step": 84500 }, { "epoch": 3.91, "learning_rate": 4.80448983347134e-05, "loss": 1.4328, "step": 85000 }, { "epoch": 3.93, "learning_rate": 4.803339773668231e-05, "loss": 1.4029, "step": 85500 }, { "epoch": 3.96, "learning_rate": 4.802189713865121e-05, "loss": 1.4184, "step": 86000 }, { "epoch": 3.98, "learning_rate": 4.801039654062012e-05, "loss": 1.4408, "step": 86500 }, { "epoch": 4.0, "learning_rate": 4.7998895942589016e-05, "loss": 1.4384, "step": 87000 }, { "epoch": 4.03, "learning_rate": 4.798739534455792e-05, "loss": 1.2064, "step": 87500 }, { "epoch": 4.05, "learning_rate": 4.797589474652682e-05, "loss": 1.2228, "step": 88000 }, { "epoch": 4.07, "learning_rate": 4.7964394148495726e-05, "loss": 1.1962, "step": 88500 }, { "epoch": 4.09, "learning_rate": 4.795289355046463e-05, "loss": 1.2734, "step": 89000 }, { "epoch": 4.12, "learning_rate": 4.7941392952433525e-05, "loss": 1.2279, "step": 89500 }, { "epoch": 4.14, "learning_rate": 4.7929892354402435e-05, "loss": 1.2713, "step": 90000 }, { "epoch": 4.16, "learning_rate": 4.791839175637133e-05, "loss": 1.2328, "step": 90500 }, { "epoch": 4.19, "learning_rate": 4.7906891158340235e-05, "loss": 1.2011, "step": 91000 }, { "epoch": 4.21, "learning_rate": 4.789539056030914e-05, "loss": 1.2529, "step": 91500 }, { "epoch": 4.23, "learning_rate": 4.788388996227804e-05, "loss": 1.247, "step": 92000 }, { "epoch": 4.26, "learning_rate": 4.7872389364246945e-05, "loss": 1.2521, "step": 92500 }, { "epoch": 4.28, "learning_rate": 4.786088876621584e-05, "loss": 1.2368, "step": 93000 }, { "epoch": 4.3, "learning_rate": 4.784938816818475e-05, "loss": 1.255, "step": 93500 }, { "epoch": 4.32, "learning_rate": 4.783788757015365e-05, "loss": 1.2664, "step": 94000 }, { "epoch": 4.35, "learning_rate": 4.782638697212255e-05, "loss": 1.2671, "step": 94500 }, { "epoch": 4.37, "learning_rate": 4.7814886374091454e-05, "loss": 1.2945, "step": 95000 }, { "epoch": 4.39, "learning_rate": 4.780338577606036e-05, "loss": 1.2564, "step": 95500 }, { "epoch": 4.42, "learning_rate": 4.779188517802926e-05, "loss": 1.2843, "step": 96000 }, { "epoch": 4.44, "learning_rate": 4.7780384579998164e-05, "loss": 1.2875, "step": 96500 }, { "epoch": 4.46, "learning_rate": 4.776888398196707e-05, "loss": 1.2742, "step": 97000 }, { "epoch": 4.49, "learning_rate": 4.7757383383935964e-05, "loss": 1.2835, "step": 97500 }, { "epoch": 4.51, "learning_rate": 4.7745882785904874e-05, "loss": 1.2828, "step": 98000 }, { "epoch": 4.53, "learning_rate": 4.773438218787377e-05, "loss": 1.2869, "step": 98500 }, { "epoch": 4.55, "learning_rate": 4.772288158984267e-05, "loss": 1.2763, "step": 99000 }, { "epoch": 4.58, "learning_rate": 4.771138099181158e-05, "loss": 1.2688, "step": 99500 }, { "epoch": 4.6, "learning_rate": 4.769988039378048e-05, "loss": 1.2746, "step": 100000 }, { "epoch": 4.62, "learning_rate": 4.768837979574938e-05, "loss": 1.2778, "step": 100500 }, { "epoch": 4.65, "learning_rate": 4.767687919771828e-05, "loss": 1.2923, "step": 101000 }, { "epoch": 4.67, "learning_rate": 4.766537859968719e-05, "loss": 1.2947, "step": 101500 }, { "epoch": 4.69, "learning_rate": 4.7653878001656086e-05, "loss": 1.3191, "step": 102000 }, { "epoch": 4.72, "learning_rate": 4.764237740362499e-05, "loss": 1.2942, "step": 102500 }, { "epoch": 4.74, "learning_rate": 4.763087680559389e-05, "loss": 1.2977, "step": 103000 }, { "epoch": 4.76, "learning_rate": 4.7619376207562796e-05, "loss": 1.3168, "step": 103500 }, { "epoch": 4.78, "learning_rate": 4.76078756095317e-05, "loss": 1.2905, "step": 104000 }, { "epoch": 4.81, "learning_rate": 4.75963750115006e-05, "loss": 1.2959, "step": 104500 }, { "epoch": 4.83, "learning_rate": 4.7584874413469505e-05, "loss": 1.2915, "step": 105000 }, { "epoch": 4.85, "learning_rate": 4.75733738154384e-05, "loss": 1.2918, "step": 105500 }, { "epoch": 4.88, "learning_rate": 4.756187321740731e-05, "loss": 1.3042, "step": 106000 }, { "epoch": 4.9, "learning_rate": 4.755037261937621e-05, "loss": 1.3059, "step": 106500 }, { "epoch": 4.92, "learning_rate": 4.753887202134511e-05, "loss": 1.3001, "step": 107000 }, { "epoch": 4.95, "learning_rate": 4.7527371423314015e-05, "loss": 1.2828, "step": 107500 }, { "epoch": 4.97, "learning_rate": 4.751587082528292e-05, "loss": 1.3075, "step": 108000 }, { "epoch": 4.99, "learning_rate": 4.750437022725182e-05, "loss": 1.314, "step": 108500 }, { "epoch": 5.01, "learning_rate": 4.749286962922072e-05, "loss": 1.1597, "step": 109000 }, { "epoch": 5.04, "learning_rate": 4.748136903118963e-05, "loss": 1.0852, "step": 109500 }, { "epoch": 5.06, "learning_rate": 4.7469868433158524e-05, "loss": 1.0851, "step": 110000 }, { "epoch": 5.08, "learning_rate": 4.745836783512743e-05, "loss": 1.0871, "step": 110500 }, { "epoch": 5.11, "learning_rate": 4.744686723709633e-05, "loss": 1.0978, "step": 111000 }, { "epoch": 5.13, "learning_rate": 4.7435366639065234e-05, "loss": 1.0862, "step": 111500 }, { "epoch": 5.15, "learning_rate": 4.742386604103414e-05, "loss": 1.0978, "step": 112000 }, { "epoch": 5.18, "learning_rate": 4.741236544300304e-05, "loss": 1.1092, "step": 112500 }, { "epoch": 5.2, "learning_rate": 4.7400864844971944e-05, "loss": 1.1202, "step": 113000 }, { "epoch": 5.22, "learning_rate": 4.738936424694084e-05, "loss": 1.1298, "step": 113500 }, { "epoch": 5.24, "learning_rate": 4.737786364890975e-05, "loss": 1.1056, "step": 114000 }, { "epoch": 5.27, "learning_rate": 4.7366363050878647e-05, "loss": 1.1331, "step": 114500 }, { "epoch": 5.29, "learning_rate": 4.735486245284755e-05, "loss": 1.1334, "step": 115000 }, { "epoch": 5.31, "learning_rate": 4.734336185481645e-05, "loss": 1.1227, "step": 115500 }, { "epoch": 5.34, "learning_rate": 4.7331861256785356e-05, "loss": 1.1623, "step": 116000 }, { "epoch": 5.36, "learning_rate": 4.732036065875426e-05, "loss": 1.1565, "step": 116500 }, { "epoch": 5.38, "learning_rate": 4.7308860060723156e-05, "loss": 1.151, "step": 117000 }, { "epoch": 5.41, "learning_rate": 4.7297359462692066e-05, "loss": 1.1621, "step": 117500 }, { "epoch": 5.43, "learning_rate": 4.728585886466096e-05, "loss": 1.1436, "step": 118000 }, { "epoch": 5.45, "learning_rate": 4.7274358266629866e-05, "loss": 1.159, "step": 118500 }, { "epoch": 5.47, "learning_rate": 4.726285766859877e-05, "loss": 1.1527, "step": 119000 }, { "epoch": 5.5, "learning_rate": 4.725135707056767e-05, "loss": 1.1668, "step": 119500 }, { "epoch": 5.52, "learning_rate": 4.7239856472536575e-05, "loss": 1.1551, "step": 120000 }, { "epoch": 5.54, "learning_rate": 4.722835587450548e-05, "loss": 1.1425, "step": 120500 }, { "epoch": 5.57, "learning_rate": 4.721685527647438e-05, "loss": 1.1798, "step": 121000 }, { "epoch": 5.59, "learning_rate": 4.720535467844328e-05, "loss": 1.1454, "step": 121500 }, { "epoch": 5.61, "learning_rate": 4.719385408041218e-05, "loss": 1.1652, "step": 122000 }, { "epoch": 5.64, "learning_rate": 4.7182353482381085e-05, "loss": 1.1817, "step": 122500 }, { "epoch": 5.66, "learning_rate": 4.717085288434999e-05, "loss": 1.1584, "step": 123000 }, { "epoch": 5.68, "learning_rate": 4.715935228631889e-05, "loss": 1.1468, "step": 123500 }, { "epoch": 5.7, "learning_rate": 4.7147851688287794e-05, "loss": 1.1788, "step": 124000 }, { "epoch": 5.73, "learning_rate": 4.71363510902567e-05, "loss": 1.199, "step": 124500 }, { "epoch": 5.75, "learning_rate": 4.7124850492225594e-05, "loss": 1.1829, "step": 125000 }, { "epoch": 5.77, "learning_rate": 4.7113349894194504e-05, "loss": 1.1782, "step": 125500 }, { "epoch": 5.8, "learning_rate": 4.71018492961634e-05, "loss": 1.1675, "step": 126000 }, { "epoch": 5.82, "learning_rate": 4.7090348698132304e-05, "loss": 1.1787, "step": 126500 }, { "epoch": 5.84, "learning_rate": 4.707884810010121e-05, "loss": 1.1519, "step": 127000 }, { "epoch": 5.87, "learning_rate": 4.706734750207011e-05, "loss": 1.1754, "step": 127500 }, { "epoch": 5.89, "learning_rate": 4.7055846904039014e-05, "loss": 1.1756, "step": 128000 }, { "epoch": 5.91, "learning_rate": 4.704434630600792e-05, "loss": 1.1421, "step": 128500 }, { "epoch": 5.93, "learning_rate": 4.703284570797682e-05, "loss": 1.171, "step": 129000 }, { "epoch": 5.96, "learning_rate": 4.7021345109945716e-05, "loss": 1.1677, "step": 129500 }, { "epoch": 5.98, "learning_rate": 4.700984451191462e-05, "loss": 1.1775, "step": 130000 }, { "epoch": 6.0, "learning_rate": 4.699834391388352e-05, "loss": 1.1495, "step": 130500 }, { "epoch": 6.03, "learning_rate": 4.6986843315852426e-05, "loss": 0.9604, "step": 131000 }, { "epoch": 6.05, "learning_rate": 4.697534271782133e-05, "loss": 0.9559, "step": 131500 }, { "epoch": 6.07, "learning_rate": 4.696384211979023e-05, "loss": 0.9822, "step": 132000 }, { "epoch": 6.1, "learning_rate": 4.6952341521759136e-05, "loss": 0.969, "step": 132500 }, { "epoch": 6.12, "learning_rate": 4.694084092372803e-05, "loss": 0.9935, "step": 133000 }, { "epoch": 6.14, "learning_rate": 4.692934032569694e-05, "loss": 0.9884, "step": 133500 }, { "epoch": 6.16, "learning_rate": 4.691783972766584e-05, "loss": 0.9653, "step": 134000 }, { "epoch": 6.19, "learning_rate": 4.690633912963474e-05, "loss": 0.9704, "step": 134500 }, { "epoch": 6.21, "learning_rate": 4.6894838531603645e-05, "loss": 0.9935, "step": 135000 }, { "epoch": 6.23, "learning_rate": 4.688333793357255e-05, "loss": 0.9905, "step": 135500 }, { "epoch": 6.26, "learning_rate": 4.687183733554145e-05, "loss": 1.0012, "step": 136000 }, { "epoch": 6.28, "learning_rate": 4.6860336737510355e-05, "loss": 1.0241, "step": 136500 }, { "epoch": 6.3, "learning_rate": 4.684883613947926e-05, "loss": 1.0001, "step": 137000 }, { "epoch": 6.33, "learning_rate": 4.6837335541448155e-05, "loss": 0.9964, "step": 137500 }, { "epoch": 6.35, "learning_rate": 4.682583494341706e-05, "loss": 1.0082, "step": 138000 }, { "epoch": 6.37, "learning_rate": 4.681433434538596e-05, "loss": 1.0138, "step": 138500 }, { "epoch": 6.39, "learning_rate": 4.6802833747354864e-05, "loss": 1.0223, "step": 139000 }, { "epoch": 6.42, "learning_rate": 4.679133314932377e-05, "loss": 1.0586, "step": 139500 }, { "epoch": 6.44, "learning_rate": 4.677983255129267e-05, "loss": 1.0271, "step": 140000 }, { "epoch": 6.46, "learning_rate": 4.6768331953261574e-05, "loss": 1.0291, "step": 140500 }, { "epoch": 6.49, "learning_rate": 4.675683135523047e-05, "loss": 1.0284, "step": 141000 }, { "epoch": 6.51, "learning_rate": 4.674533075719938e-05, "loss": 1.0447, "step": 141500 }, { "epoch": 6.53, "learning_rate": 4.673383015916828e-05, "loss": 1.0405, "step": 142000 }, { "epoch": 6.56, "learning_rate": 4.672232956113718e-05, "loss": 1.0678, "step": 142500 }, { "epoch": 6.58, "learning_rate": 4.6710828963106083e-05, "loss": 1.0448, "step": 143000 }, { "epoch": 6.6, "learning_rate": 4.669932836507499e-05, "loss": 1.051, "step": 143500 }, { "epoch": 6.62, "learning_rate": 4.668782776704389e-05, "loss": 1.0422, "step": 144000 }, { "epoch": 6.65, "learning_rate": 4.667632716901279e-05, "loss": 1.0759, "step": 144500 }, { "epoch": 6.67, "learning_rate": 4.6664826570981696e-05, "loss": 1.0512, "step": 145000 }, { "epoch": 6.69, "learning_rate": 4.665332597295059e-05, "loss": 1.0742, "step": 145500 }, { "epoch": 6.72, "learning_rate": 4.6641825374919496e-05, "loss": 1.0525, "step": 146000 }, { "epoch": 6.74, "learning_rate": 4.66303247768884e-05, "loss": 1.0517, "step": 146500 }, { "epoch": 6.76, "learning_rate": 4.66188241788573e-05, "loss": 1.0596, "step": 147000 }, { "epoch": 6.79, "learning_rate": 4.6607323580826206e-05, "loss": 1.0848, "step": 147500 }, { "epoch": 6.81, "learning_rate": 4.659582298279511e-05, "loss": 1.0672, "step": 148000 }, { "epoch": 6.83, "learning_rate": 4.658432238476401e-05, "loss": 1.0763, "step": 148500 }, { "epoch": 6.85, "learning_rate": 4.657282178673291e-05, "loss": 1.0707, "step": 149000 }, { "epoch": 6.88, "learning_rate": 4.656132118870182e-05, "loss": 1.0728, "step": 149500 }, { "epoch": 6.9, "learning_rate": 4.6549820590670715e-05, "loss": 1.0816, "step": 150000 }, { "epoch": 6.92, "learning_rate": 4.653831999263962e-05, "loss": 1.0853, "step": 150500 }, { "epoch": 6.95, "learning_rate": 4.652681939460852e-05, "loss": 1.0611, "step": 151000 }, { "epoch": 6.97, "learning_rate": 4.6515318796577425e-05, "loss": 1.0763, "step": 151500 }, { "epoch": 6.99, "learning_rate": 4.650381819854633e-05, "loss": 1.0958, "step": 152000 }, { "epoch": 7.02, "learning_rate": 4.649231760051523e-05, "loss": 0.9211, "step": 152500 }, { "epoch": 7.04, "learning_rate": 4.6480817002484135e-05, "loss": 0.8599, "step": 153000 }, { "epoch": 7.06, "learning_rate": 4.646931640445303e-05, "loss": 0.8387, "step": 153500 }, { "epoch": 7.08, "learning_rate": 4.6457815806421934e-05, "loss": 0.8712, "step": 154000 }, { "epoch": 7.11, "learning_rate": 4.644631520839084e-05, "loss": 0.8741, "step": 154500 }, { "epoch": 7.13, "learning_rate": 4.643481461035974e-05, "loss": 0.8452, "step": 155000 }, { "epoch": 7.15, "learning_rate": 4.6423314012328644e-05, "loss": 0.873, "step": 155500 }, { "epoch": 7.18, "learning_rate": 4.641181341429755e-05, "loss": 0.9031, "step": 156000 }, { "epoch": 7.2, "learning_rate": 4.640031281626645e-05, "loss": 0.8775, "step": 156500 }, { "epoch": 7.22, "learning_rate": 4.638881221823535e-05, "loss": 0.8856, "step": 157000 }, { "epoch": 7.25, "learning_rate": 4.637731162020425e-05, "loss": 0.8859, "step": 157500 }, { "epoch": 7.27, "learning_rate": 4.6365811022173153e-05, "loss": 0.9054, "step": 158000 }, { "epoch": 7.29, "learning_rate": 4.635431042414206e-05, "loss": 0.8887, "step": 158500 }, { "epoch": 7.31, "learning_rate": 4.634280982611096e-05, "loss": 0.8922, "step": 159000 }, { "epoch": 7.34, "learning_rate": 4.633130922807986e-05, "loss": 0.9209, "step": 159500 }, { "epoch": 7.36, "learning_rate": 4.6319808630048766e-05, "loss": 0.9255, "step": 160000 }, { "epoch": 7.38, "learning_rate": 4.630830803201767e-05, "loss": 0.9108, "step": 160500 }, { "epoch": 7.41, "learning_rate": 4.629680743398657e-05, "loss": 0.9096, "step": 161000 }, { "epoch": 7.43, "learning_rate": 4.628530683595547e-05, "loss": 0.9076, "step": 161500 }, { "epoch": 7.45, "learning_rate": 4.627380623792437e-05, "loss": 0.9396, "step": 162000 }, { "epoch": 7.48, "learning_rate": 4.6262305639893276e-05, "loss": 0.9545, "step": 162500 }, { "epoch": 7.5, "learning_rate": 4.625080504186218e-05, "loss": 0.9309, "step": 163000 }, { "epoch": 7.52, "learning_rate": 4.623930444383108e-05, "loss": 0.9289, "step": 163500 }, { "epoch": 7.54, "learning_rate": 4.6227803845799985e-05, "loss": 0.9563, "step": 164000 }, { "epoch": 7.57, "learning_rate": 4.621630324776889e-05, "loss": 0.9384, "step": 164500 }, { "epoch": 7.59, "learning_rate": 4.6204802649737785e-05, "loss": 0.939, "step": 165000 }, { "epoch": 7.61, "learning_rate": 4.619330205170669e-05, "loss": 0.962, "step": 165500 }, { "epoch": 7.64, "learning_rate": 4.618180145367559e-05, "loss": 0.9547, "step": 166000 }, { "epoch": 7.66, "learning_rate": 4.6170300855644495e-05, "loss": 0.9558, "step": 166500 }, { "epoch": 7.68, "learning_rate": 4.61588002576134e-05, "loss": 0.9582, "step": 167000 }, { "epoch": 7.71, "learning_rate": 4.61472996595823e-05, "loss": 0.9582, "step": 167500 }, { "epoch": 7.73, "learning_rate": 4.6135799061551205e-05, "loss": 0.9591, "step": 168000 }, { "epoch": 7.75, "learning_rate": 4.612429846352011e-05, "loss": 0.9957, "step": 168500 }, { "epoch": 7.77, "learning_rate": 4.611279786548901e-05, "loss": 0.9549, "step": 169000 }, { "epoch": 7.8, "learning_rate": 4.610129726745791e-05, "loss": 0.9675, "step": 169500 }, { "epoch": 7.82, "learning_rate": 4.608979666942681e-05, "loss": 0.9681, "step": 170000 }, { "epoch": 7.84, "learning_rate": 4.6078296071395714e-05, "loss": 0.9813, "step": 170500 }, { "epoch": 7.87, "learning_rate": 4.606679547336462e-05, "loss": 0.9806, "step": 171000 }, { "epoch": 7.89, "learning_rate": 4.605529487533352e-05, "loss": 0.9915, "step": 171500 }, { "epoch": 7.91, "learning_rate": 4.6043794277302424e-05, "loss": 0.9626, "step": 172000 }, { "epoch": 7.94, "learning_rate": 4.603229367927133e-05, "loss": 1.0019, "step": 172500 }, { "epoch": 7.96, "learning_rate": 4.602079308124022e-05, "loss": 0.9931, "step": 173000 }, { "epoch": 7.98, "learning_rate": 4.6009292483209127e-05, "loss": 0.981, "step": 173500 }, { "epoch": 8.0, "learning_rate": 4.599779188517803e-05, "loss": 0.9236, "step": 174000 }, { "epoch": 8.03, "learning_rate": 4.598629128714693e-05, "loss": 0.7649, "step": 174500 }, { "epoch": 8.05, "learning_rate": 4.5974790689115836e-05, "loss": 0.7554, "step": 175000 }, { "epoch": 8.07, "learning_rate": 4.596329009108474e-05, "loss": 0.7711, "step": 175500 }, { "epoch": 8.1, "learning_rate": 4.595178949305364e-05, "loss": 0.7783, "step": 176000 }, { "epoch": 8.12, "learning_rate": 4.5940288895022546e-05, "loss": 0.7734, "step": 176500 }, { "epoch": 8.14, "learning_rate": 4.592878829699145e-05, "loss": 0.7672, "step": 177000 }, { "epoch": 8.17, "learning_rate": 4.5917287698960346e-05, "loss": 0.7774, "step": 177500 }, { "epoch": 8.19, "learning_rate": 4.590578710092925e-05, "loss": 0.7923, "step": 178000 }, { "epoch": 8.21, "learning_rate": 4.589428650289815e-05, "loss": 0.8061, "step": 178500 }, { "epoch": 8.23, "learning_rate": 4.5882785904867055e-05, "loss": 0.7932, "step": 179000 }, { "epoch": 8.26, "learning_rate": 4.587128530683596e-05, "loss": 0.803, "step": 179500 }, { "epoch": 8.28, "learning_rate": 4.585978470880486e-05, "loss": 0.8355, "step": 180000 }, { "epoch": 8.3, "learning_rate": 4.5848284110773765e-05, "loss": 0.805, "step": 180500 }, { "epoch": 8.33, "learning_rate": 4.583678351274266e-05, "loss": 0.811, "step": 181000 }, { "epoch": 8.35, "learning_rate": 4.5825282914711565e-05, "loss": 0.837, "step": 181500 }, { "epoch": 8.37, "learning_rate": 4.581378231668047e-05, "loss": 0.8298, "step": 182000 }, { "epoch": 8.4, "learning_rate": 4.580228171864937e-05, "loss": 0.8324, "step": 182500 }, { "epoch": 8.42, "learning_rate": 4.5790781120618274e-05, "loss": 0.8329, "step": 183000 }, { "epoch": 8.44, "learning_rate": 4.577928052258718e-05, "loss": 0.8196, "step": 183500 }, { "epoch": 8.46, "learning_rate": 4.576777992455608e-05, "loss": 0.8336, "step": 184000 }, { "epoch": 8.49, "learning_rate": 4.5756279326524984e-05, "loss": 0.8337, "step": 184500 }, { "epoch": 8.51, "learning_rate": 4.574477872849388e-05, "loss": 0.8294, "step": 185000 }, { "epoch": 8.53, "learning_rate": 4.5733278130462784e-05, "loss": 0.8437, "step": 185500 }, { "epoch": 8.56, "learning_rate": 4.572177753243169e-05, "loss": 0.8462, "step": 186000 }, { "epoch": 8.58, "learning_rate": 4.571027693440059e-05, "loss": 0.8514, "step": 186500 }, { "epoch": 8.6, "learning_rate": 4.5698776336369494e-05, "loss": 0.8645, "step": 187000 }, { "epoch": 8.63, "learning_rate": 4.56872757383384e-05, "loss": 0.8834, "step": 187500 }, { "epoch": 8.65, "learning_rate": 4.56757751403073e-05, "loss": 0.8757, "step": 188000 }, { "epoch": 8.67, "learning_rate": 4.56642745422762e-05, "loss": 0.8726, "step": 188500 }, { "epoch": 8.69, "learning_rate": 4.56527739442451e-05, "loss": 0.8495, "step": 189000 }, { "epoch": 8.72, "learning_rate": 4.5641273346214e-05, "loss": 0.874, "step": 189500 }, { "epoch": 8.74, "learning_rate": 4.5629772748182906e-05, "loss": 0.8808, "step": 190000 }, { "epoch": 8.76, "learning_rate": 4.561827215015181e-05, "loss": 0.8852, "step": 190500 }, { "epoch": 8.79, "learning_rate": 4.560677155212071e-05, "loss": 0.8542, "step": 191000 }, { "epoch": 8.81, "learning_rate": 4.5595270954089616e-05, "loss": 0.8798, "step": 191500 }, { "epoch": 8.83, "learning_rate": 4.558377035605852e-05, "loss": 0.8848, "step": 192000 }, { "epoch": 8.86, "learning_rate": 4.557226975802742e-05, "loss": 0.8785, "step": 192500 }, { "epoch": 8.88, "learning_rate": 4.556076915999632e-05, "loss": 0.8478, "step": 193000 }, { "epoch": 8.9, "learning_rate": 4.554926856196522e-05, "loss": 0.9228, "step": 193500 }, { "epoch": 8.92, "learning_rate": 4.5537767963934125e-05, "loss": 0.9017, "step": 194000 }, { "epoch": 8.95, "learning_rate": 4.552626736590303e-05, "loss": 0.9014, "step": 194500 }, { "epoch": 8.97, "learning_rate": 4.551476676787193e-05, "loss": 0.9045, "step": 195000 }, { "epoch": 8.99, "learning_rate": 4.5503266169840835e-05, "loss": 0.8831, "step": 195500 }, { "epoch": 9.02, "learning_rate": 4.549176557180974e-05, "loss": 0.7596, "step": 196000 }, { "epoch": 9.04, "learning_rate": 4.548026497377864e-05, "loss": 0.6873, "step": 196500 }, { "epoch": 9.06, "learning_rate": 4.546876437574754e-05, "loss": 0.6767, "step": 197000 }, { "epoch": 9.09, "learning_rate": 4.545726377771644e-05, "loss": 0.6868, "step": 197500 }, { "epoch": 9.11, "learning_rate": 4.5445763179685344e-05, "loss": 0.6838, "step": 198000 }, { "epoch": 9.13, "learning_rate": 4.543426258165425e-05, "loss": 0.6986, "step": 198500 }, { "epoch": 9.15, "learning_rate": 4.542276198362315e-05, "loss": 0.7152, "step": 199000 }, { "epoch": 9.18, "learning_rate": 4.5411261385592054e-05, "loss": 0.71, "step": 199500 }, { "epoch": 9.2, "learning_rate": 4.539976078756096e-05, "loss": 0.7153, "step": 200000 }, { "epoch": 9.22, "learning_rate": 4.538826018952986e-05, "loss": 0.7096, "step": 200500 }, { "epoch": 9.25, "learning_rate": 4.537675959149876e-05, "loss": 0.7343, "step": 201000 }, { "epoch": 9.27, "learning_rate": 4.536525899346766e-05, "loss": 0.73, "step": 201500 }, { "epoch": 9.29, "learning_rate": 4.5353758395436563e-05, "loss": 0.7484, "step": 202000 }, { "epoch": 9.32, "learning_rate": 4.534225779740547e-05, "loss": 0.7385, "step": 202500 }, { "epoch": 9.34, "learning_rate": 4.533075719937437e-05, "loss": 0.7366, "step": 203000 }, { "epoch": 9.36, "learning_rate": 4.531925660134327e-05, "loss": 0.7558, "step": 203500 }, { "epoch": 9.38, "learning_rate": 4.5307756003312176e-05, "loss": 0.7212, "step": 204000 }, { "epoch": 9.41, "learning_rate": 4.529625540528108e-05, "loss": 0.746, "step": 204500 }, { "epoch": 9.43, "learning_rate": 4.5284754807249976e-05, "loss": 0.747, "step": 205000 }, { "epoch": 9.45, "learning_rate": 4.527325420921888e-05, "loss": 0.7353, "step": 205500 }, { "epoch": 9.48, "learning_rate": 4.526175361118778e-05, "loss": 0.7629, "step": 206000 }, { "epoch": 9.5, "learning_rate": 4.5250253013156686e-05, "loss": 0.7866, "step": 206500 }, { "epoch": 9.52, "learning_rate": 4.523875241512559e-05, "loss": 0.7613, "step": 207000 }, { "epoch": 9.55, "learning_rate": 4.522725181709449e-05, "loss": 0.7546, "step": 207500 }, { "epoch": 9.57, "learning_rate": 4.5215751219063396e-05, "loss": 0.7734, "step": 208000 }, { "epoch": 9.59, "learning_rate": 4.52042506210323e-05, "loss": 0.7609, "step": 208500 }, { "epoch": 9.61, "learning_rate": 4.5192750023001195e-05, "loss": 0.7721, "step": 209000 }, { "epoch": 9.64, "learning_rate": 4.51812494249701e-05, "loss": 0.7798, "step": 209500 }, { "epoch": 9.66, "learning_rate": 4.5169748826939e-05, "loss": 0.7905, "step": 210000 }, { "epoch": 9.68, "learning_rate": 4.5158248228907905e-05, "loss": 0.7915, "step": 210500 }, { "epoch": 9.71, "learning_rate": 4.514674763087681e-05, "loss": 0.7834, "step": 211000 }, { "epoch": 9.73, "learning_rate": 4.513524703284571e-05, "loss": 0.8098, "step": 211500 }, { "epoch": 9.75, "learning_rate": 4.5123746434814615e-05, "loss": 0.8088, "step": 212000 }, { "epoch": 9.78, "learning_rate": 4.511224583678352e-05, "loss": 0.8201, "step": 212500 }, { "epoch": 9.8, "learning_rate": 4.5100745238752414e-05, "loss": 0.7906, "step": 213000 }, { "epoch": 9.82, "learning_rate": 4.508924464072132e-05, "loss": 0.7959, "step": 213500 }, { "epoch": 9.84, "learning_rate": 4.507774404269022e-05, "loss": 0.8059, "step": 214000 }, { "epoch": 9.87, "learning_rate": 4.5066243444659124e-05, "loss": 0.8022, "step": 214500 }, { "epoch": 9.89, "learning_rate": 4.505474284662803e-05, "loss": 0.8082, "step": 215000 }, { "epoch": 9.91, "learning_rate": 4.504324224859693e-05, "loss": 0.8131, "step": 215500 }, { "epoch": 9.94, "learning_rate": 4.5031741650565834e-05, "loss": 0.8232, "step": 216000 }, { "epoch": 9.96, "learning_rate": 4.502024105253474e-05, "loss": 0.8018, "step": 216500 }, { "epoch": 9.98, "learning_rate": 4.5008740454503633e-05, "loss": 0.81, "step": 217000 }, { "epoch": 10.01, "learning_rate": 4.499723985647254e-05, "loss": 0.7734, "step": 217500 }, { "epoch": 10.03, "learning_rate": 4.498573925844144e-05, "loss": 0.626, "step": 218000 }, { "epoch": 10.05, "learning_rate": 4.497423866041034e-05, "loss": 0.5876, "step": 218500 }, { "epoch": 10.07, "learning_rate": 4.4962738062379246e-05, "loss": 0.6178, "step": 219000 }, { "epoch": 10.1, "learning_rate": 4.495123746434815e-05, "loss": 0.6161, "step": 219500 }, { "epoch": 10.12, "learning_rate": 4.493973686631705e-05, "loss": 0.6274, "step": 220000 }, { "epoch": 10.14, "learning_rate": 4.492823626828595e-05, "loss": 0.6562, "step": 220500 }, { "epoch": 10.17, "learning_rate": 4.491673567025485e-05, "loss": 0.6388, "step": 221000 }, { "epoch": 10.19, "learning_rate": 4.4905235072223756e-05, "loss": 0.6397, "step": 221500 }, { "epoch": 10.21, "learning_rate": 4.489373447419266e-05, "loss": 0.6644, "step": 222000 }, { "epoch": 10.24, "learning_rate": 4.488223387616156e-05, "loss": 0.66, "step": 222500 }, { "epoch": 10.26, "learning_rate": 4.4870733278130465e-05, "loss": 0.6508, "step": 223000 }, { "epoch": 10.28, "learning_rate": 4.485923268009937e-05, "loss": 0.6597, "step": 223500 }, { "epoch": 10.3, "learning_rate": 4.484773208206827e-05, "loss": 0.666, "step": 224000 }, { "epoch": 10.33, "learning_rate": 4.4836231484037175e-05, "loss": 0.6741, "step": 224500 }, { "epoch": 10.35, "learning_rate": 4.482473088600607e-05, "loss": 0.686, "step": 225000 }, { "epoch": 10.37, "learning_rate": 4.4813230287974975e-05, "loss": 0.6748, "step": 225500 }, { "epoch": 10.4, "learning_rate": 4.480172968994388e-05, "loss": 0.6879, "step": 226000 }, { "epoch": 10.42, "learning_rate": 4.479022909191278e-05, "loss": 0.6928, "step": 226500 }, { "epoch": 10.44, "learning_rate": 4.4778728493881685e-05, "loss": 0.6739, "step": 227000 }, { "epoch": 10.47, "learning_rate": 4.476722789585059e-05, "loss": 0.707, "step": 227500 }, { "epoch": 10.49, "learning_rate": 4.475572729781949e-05, "loss": 0.6898, "step": 228000 }, { "epoch": 10.51, "learning_rate": 4.474422669978839e-05, "loss": 0.7025, "step": 228500 }, { "epoch": 10.53, "learning_rate": 4.473272610175729e-05, "loss": 0.69, "step": 229000 }, { "epoch": 10.56, "learning_rate": 4.4721225503726194e-05, "loss": 0.7003, "step": 229500 }, { "epoch": 10.58, "learning_rate": 4.47097249056951e-05, "loss": 0.6825, "step": 230000 }, { "epoch": 10.6, "learning_rate": 4.4698224307664e-05, "loss": 0.7094, "step": 230500 }, { "epoch": 10.63, "learning_rate": 4.4686723709632904e-05, "loss": 0.7119, "step": 231000 }, { "epoch": 10.65, "learning_rate": 4.467522311160181e-05, "loss": 0.7057, "step": 231500 }, { "epoch": 10.67, "learning_rate": 4.466372251357071e-05, "loss": 0.7141, "step": 232000 }, { "epoch": 10.7, "learning_rate": 4.465222191553961e-05, "loss": 0.7458, "step": 232500 }, { "epoch": 10.72, "learning_rate": 4.464072131750851e-05, "loss": 0.7195, "step": 233000 }, { "epoch": 10.74, "learning_rate": 4.462922071947741e-05, "loss": 0.7228, "step": 233500 }, { "epoch": 10.76, "learning_rate": 4.4617720121446316e-05, "loss": 0.737, "step": 234000 }, { "epoch": 10.79, "learning_rate": 4.460621952341522e-05, "loss": 0.7316, "step": 234500 }, { "epoch": 10.81, "learning_rate": 4.459471892538412e-05, "loss": 0.7257, "step": 235000 }, { "epoch": 10.83, "learning_rate": 4.4583218327353026e-05, "loss": 0.7197, "step": 235500 }, { "epoch": 10.86, "learning_rate": 4.457171772932193e-05, "loss": 0.7223, "step": 236000 }, { "epoch": 10.88, "learning_rate": 4.4560217131290826e-05, "loss": 0.7491, "step": 236500 }, { "epoch": 10.9, "learning_rate": 4.454871653325973e-05, "loss": 0.7744, "step": 237000 }, { "epoch": 10.93, "learning_rate": 4.453721593522863e-05, "loss": 0.734, "step": 237500 }, { "epoch": 10.95, "learning_rate": 4.4525715337197535e-05, "loss": 0.7521, "step": 238000 }, { "epoch": 10.97, "learning_rate": 4.451421473916644e-05, "loss": 0.7342, "step": 238500 }, { "epoch": 10.99, "learning_rate": 4.450271414113534e-05, "loss": 0.7428, "step": 239000 }, { "epoch": 11.02, "learning_rate": 4.4491213543104245e-05, "loss": 0.6068, "step": 239500 }, { "epoch": 11.04, "learning_rate": 4.447971294507315e-05, "loss": 0.5503, "step": 240000 }, { "epoch": 11.06, "learning_rate": 4.446821234704205e-05, "loss": 0.552, "step": 240500 }, { "epoch": 11.09, "learning_rate": 4.445671174901095e-05, "loss": 0.5785, "step": 241000 }, { "epoch": 11.11, "learning_rate": 4.444521115097985e-05, "loss": 0.571, "step": 241500 }, { "epoch": 11.13, "learning_rate": 4.4433710552948754e-05, "loss": 0.566, "step": 242000 }, { "epoch": 11.16, "learning_rate": 4.442220995491766e-05, "loss": 0.5834, "step": 242500 }, { "epoch": 11.18, "learning_rate": 4.441070935688656e-05, "loss": 0.572, "step": 243000 }, { "epoch": 11.2, "learning_rate": 4.4399208758855464e-05, "loss": 0.6032, "step": 243500 }, { "epoch": 11.22, "learning_rate": 4.438770816082437e-05, "loss": 0.6007, "step": 244000 }, { "epoch": 11.25, "learning_rate": 4.4376207562793264e-05, "loss": 0.5947, "step": 244500 }, { "epoch": 11.27, "learning_rate": 4.4364706964762174e-05, "loss": 0.6127, "step": 245000 }, { "epoch": 11.29, "learning_rate": 4.435320636673107e-05, "loss": 0.6087, "step": 245500 }, { "epoch": 11.32, "learning_rate": 4.4341705768699974e-05, "loss": 0.6003, "step": 246000 }, { "epoch": 11.34, "learning_rate": 4.433020517066888e-05, "loss": 0.6039, "step": 246500 }, { "epoch": 11.36, "learning_rate": 4.431870457263778e-05, "loss": 0.6113, "step": 247000 }, { "epoch": 11.39, "learning_rate": 4.430720397460668e-05, "loss": 0.6257, "step": 247500 }, { "epoch": 11.41, "learning_rate": 4.429570337657558e-05, "loss": 0.6172, "step": 248000 }, { "epoch": 11.43, "learning_rate": 4.428420277854449e-05, "loss": 0.6161, "step": 248500 }, { "epoch": 11.45, "learning_rate": 4.4272702180513386e-05, "loss": 0.6098, "step": 249000 }, { "epoch": 11.48, "learning_rate": 4.426120158248229e-05, "loss": 0.6326, "step": 249500 }, { "epoch": 11.5, "learning_rate": 4.424970098445119e-05, "loss": 0.629, "step": 250000 }, { "epoch": 11.52, "learning_rate": 4.4238200386420096e-05, "loss": 0.6379, "step": 250500 }, { "epoch": 11.55, "learning_rate": 4.4226699788389e-05, "loss": 0.6503, "step": 251000 }, { "epoch": 11.57, "learning_rate": 4.42151991903579e-05, "loss": 0.6537, "step": 251500 }, { "epoch": 11.59, "learning_rate": 4.4203698592326806e-05, "loss": 0.6357, "step": 252000 }, { "epoch": 11.62, "learning_rate": 4.41921979942957e-05, "loss": 0.6551, "step": 252500 }, { "epoch": 11.64, "learning_rate": 4.418069739626461e-05, "loss": 0.6609, "step": 253000 }, { "epoch": 11.66, "learning_rate": 4.416919679823351e-05, "loss": 0.6669, "step": 253500 }, { "epoch": 11.68, "learning_rate": 4.415769620020241e-05, "loss": 0.6495, "step": 254000 }, { "epoch": 11.71, "learning_rate": 4.4146195602171315e-05, "loss": 0.6614, "step": 254500 }, { "epoch": 11.73, "learning_rate": 4.413469500414022e-05, "loss": 0.678, "step": 255000 }, { "epoch": 11.75, "learning_rate": 4.412319440610912e-05, "loss": 0.6655, "step": 255500 }, { "epoch": 11.78, "learning_rate": 4.411169380807802e-05, "loss": 0.6546, "step": 256000 }, { "epoch": 11.8, "learning_rate": 4.410019321004693e-05, "loss": 0.6668, "step": 256500 }, { "epoch": 11.82, "learning_rate": 4.4088692612015824e-05, "loss": 0.6573, "step": 257000 }, { "epoch": 11.85, "learning_rate": 4.407719201398473e-05, "loss": 0.6743, "step": 257500 }, { "epoch": 11.87, "learning_rate": 4.406569141595363e-05, "loss": 0.6796, "step": 258000 }, { "epoch": 11.89, "learning_rate": 4.4054190817922534e-05, "loss": 0.6642, "step": 258500 }, { "epoch": 11.91, "learning_rate": 4.404269021989144e-05, "loss": 0.6751, "step": 259000 }, { "epoch": 11.94, "learning_rate": 4.403118962186034e-05, "loss": 0.6765, "step": 259500 }, { "epoch": 11.96, "learning_rate": 4.4019689023829244e-05, "loss": 0.6893, "step": 260000 }, { "epoch": 11.98, "learning_rate": 4.400818842579814e-05, "loss": 0.6921, "step": 260500 }, { "epoch": 12.01, "learning_rate": 4.399668782776705e-05, "loss": 0.6385, "step": 261000 }, { "epoch": 12.03, "learning_rate": 4.398518722973595e-05, "loss": 0.5097, "step": 261500 }, { "epoch": 12.05, "learning_rate": 4.397368663170485e-05, "loss": 0.5095, "step": 262000 }, { "epoch": 12.08, "learning_rate": 4.396218603367375e-05, "loss": 0.5225, "step": 262500 }, { "epoch": 12.1, "learning_rate": 4.3950685435642656e-05, "loss": 0.5147, "step": 263000 }, { "epoch": 12.12, "learning_rate": 4.393918483761156e-05, "loss": 0.5228, "step": 263500 }, { "epoch": 12.14, "learning_rate": 4.3927684239580456e-05, "loss": 0.5333, "step": 264000 }, { "epoch": 12.17, "learning_rate": 4.3916183641549366e-05, "loss": 0.5395, "step": 264500 }, { "epoch": 12.19, "learning_rate": 4.390468304351826e-05, "loss": 0.532, "step": 265000 }, { "epoch": 12.21, "learning_rate": 4.3893182445487166e-05, "loss": 0.5339, "step": 265500 }, { "epoch": 12.24, "learning_rate": 4.388168184745607e-05, "loss": 0.5432, "step": 266000 }, { "epoch": 12.26, "learning_rate": 4.387018124942497e-05, "loss": 0.5429, "step": 266500 }, { "epoch": 12.28, "learning_rate": 4.3858680651393876e-05, "loss": 0.5649, "step": 267000 }, { "epoch": 12.31, "learning_rate": 4.384718005336278e-05, "loss": 0.5575, "step": 267500 }, { "epoch": 12.33, "learning_rate": 4.383567945533168e-05, "loss": 0.564, "step": 268000 }, { "epoch": 12.35, "learning_rate": 4.382417885730058e-05, "loss": 0.5797, "step": 268500 }, { "epoch": 12.37, "learning_rate": 4.381267825926949e-05, "loss": 0.5729, "step": 269000 }, { "epoch": 12.4, "learning_rate": 4.3801177661238385e-05, "loss": 0.5786, "step": 269500 }, { "epoch": 12.42, "learning_rate": 4.378967706320729e-05, "loss": 0.5727, "step": 270000 }, { "epoch": 12.44, "learning_rate": 4.377817646517619e-05, "loss": 0.5635, "step": 270500 }, { "epoch": 12.47, "learning_rate": 4.3766675867145095e-05, "loss": 0.5754, "step": 271000 }, { "epoch": 12.49, "learning_rate": 4.3755175269114e-05, "loss": 0.5766, "step": 271500 }, { "epoch": 12.51, "learning_rate": 4.3743674671082894e-05, "loss": 0.5828, "step": 272000 }, { "epoch": 12.54, "learning_rate": 4.3732174073051804e-05, "loss": 0.5872, "step": 272500 }, { "epoch": 12.56, "learning_rate": 4.37206734750207e-05, "loss": 0.5914, "step": 273000 }, { "epoch": 12.58, "learning_rate": 4.3709172876989604e-05, "loss": 0.5872, "step": 273500 }, { "epoch": 12.6, "learning_rate": 4.369767227895851e-05, "loss": 0.5894, "step": 274000 }, { "epoch": 12.63, "learning_rate": 4.368617168092741e-05, "loss": 0.605, "step": 274500 }, { "epoch": 12.65, "learning_rate": 4.3674671082896314e-05, "loss": 0.5987, "step": 275000 }, { "epoch": 12.67, "learning_rate": 4.366317048486521e-05, "loss": 0.6165, "step": 275500 }, { "epoch": 12.7, "learning_rate": 4.365166988683412e-05, "loss": 0.6005, "step": 276000 }, { "epoch": 12.72, "learning_rate": 4.364016928880302e-05, "loss": 0.6164, "step": 276500 }, { "epoch": 12.74, "learning_rate": 4.362866869077193e-05, "loss": 0.6115, "step": 277000 }, { "epoch": 12.77, "learning_rate": 4.361716809274082e-05, "loss": 0.618, "step": 277500 }, { "epoch": 12.79, "learning_rate": 4.3605667494709726e-05, "loss": 0.6112, "step": 278000 }, { "epoch": 12.81, "learning_rate": 4.359416689667863e-05, "loss": 0.6314, "step": 278500 }, { "epoch": 12.83, "learning_rate": 4.358266629864753e-05, "loss": 0.6044, "step": 279000 }, { "epoch": 12.86, "learning_rate": 4.3571165700616436e-05, "loss": 0.6204, "step": 279500 }, { "epoch": 12.88, "learning_rate": 4.355966510258533e-05, "loss": 0.6224, "step": 280000 }, { "epoch": 12.9, "learning_rate": 4.354816450455424e-05, "loss": 0.6067, "step": 280500 }, { "epoch": 12.93, "learning_rate": 4.353666390652314e-05, "loss": 0.6441, "step": 281000 }, { "epoch": 12.95, "learning_rate": 4.352516330849204e-05, "loss": 0.6264, "step": 281500 }, { "epoch": 12.97, "learning_rate": 4.3513662710460945e-05, "loss": 0.6244, "step": 282000 }, { "epoch": 13.0, "learning_rate": 4.350216211242985e-05, "loss": 0.6287, "step": 282500 }, { "epoch": 13.02, "learning_rate": 4.349066151439875e-05, "loss": 0.4926, "step": 283000 }, { "epoch": 13.04, "learning_rate": 4.347916091636765e-05, "loss": 0.4611, "step": 283500 }, { "epoch": 13.06, "learning_rate": 4.346766031833656e-05, "loss": 0.4717, "step": 284000 }, { "epoch": 13.09, "learning_rate": 4.3456159720305455e-05, "loss": 0.4781, "step": 284500 }, { "epoch": 13.11, "learning_rate": 4.3444659122274365e-05, "loss": 0.4897, "step": 285000 }, { "epoch": 13.13, "learning_rate": 4.343315852424326e-05, "loss": 0.4941, "step": 285500 }, { "epoch": 13.16, "learning_rate": 4.3421657926212165e-05, "loss": 0.4976, "step": 286000 }, { "epoch": 13.18, "learning_rate": 4.341015732818107e-05, "loss": 0.507, "step": 286500 }, { "epoch": 13.2, "learning_rate": 4.339865673014997e-05, "loss": 0.4944, "step": 287000 }, { "epoch": 13.23, "learning_rate": 4.3387156132118874e-05, "loss": 0.4972, "step": 287500 }, { "epoch": 13.25, "learning_rate": 4.337565553408777e-05, "loss": 0.5015, "step": 288000 }, { "epoch": 13.27, "learning_rate": 4.336415493605668e-05, "loss": 0.516, "step": 288500 }, { "epoch": 13.29, "learning_rate": 4.335265433802558e-05, "loss": 0.5248, "step": 289000 }, { "epoch": 13.32, "learning_rate": 4.334115373999448e-05, "loss": 0.525, "step": 289500 }, { "epoch": 13.34, "learning_rate": 4.3329653141963384e-05, "loss": 0.5164, "step": 290000 }, { "epoch": 13.36, "learning_rate": 4.331815254393229e-05, "loss": 0.5259, "step": 290500 }, { "epoch": 13.39, "learning_rate": 4.330665194590119e-05, "loss": 0.5273, "step": 291000 }, { "epoch": 13.41, "learning_rate": 4.3295151347870087e-05, "loss": 0.5263, "step": 291500 }, { "epoch": 13.43, "learning_rate": 4.3283650749838997e-05, "loss": 0.5376, "step": 292000 }, { "epoch": 13.46, "learning_rate": 4.327215015180789e-05, "loss": 0.5324, "step": 292500 }, { "epoch": 13.48, "learning_rate": 4.32606495537768e-05, "loss": 0.5325, "step": 293000 }, { "epoch": 13.5, "learning_rate": 4.32491489557457e-05, "loss": 0.5389, "step": 293500 }, { "epoch": 13.52, "learning_rate": 4.32376483577146e-05, "loss": 0.5601, "step": 294000 }, { "epoch": 13.55, "learning_rate": 4.3226147759683506e-05, "loss": 0.5432, "step": 294500 }, { "epoch": 13.57, "learning_rate": 4.321464716165241e-05, "loss": 0.5772, "step": 295000 }, { "epoch": 13.59, "learning_rate": 4.320314656362131e-05, "loss": 0.5396, "step": 295500 }, { "epoch": 13.62, "learning_rate": 4.319164596559021e-05, "loss": 0.5436, "step": 296000 }, { "epoch": 13.64, "learning_rate": 4.318014536755912e-05, "loss": 0.5663, "step": 296500 }, { "epoch": 13.66, "learning_rate": 4.3168644769528015e-05, "loss": 0.5509, "step": 297000 }, { "epoch": 13.69, "learning_rate": 4.315714417149692e-05, "loss": 0.5716, "step": 297500 }, { "epoch": 13.71, "learning_rate": 4.314564357346582e-05, "loss": 0.5665, "step": 298000 }, { "epoch": 13.73, "learning_rate": 4.3134142975434725e-05, "loss": 0.5588, "step": 298500 }, { "epoch": 13.75, "learning_rate": 4.312264237740363e-05, "loss": 0.5744, "step": 299000 }, { "epoch": 13.78, "learning_rate": 4.3111141779372525e-05, "loss": 0.5684, "step": 299500 }, { "epoch": 13.8, "learning_rate": 4.3099641181341435e-05, "loss": 0.5725, "step": 300000 }, { "epoch": 13.82, "learning_rate": 4.308814058331033e-05, "loss": 0.5713, "step": 300500 }, { "epoch": 13.85, "learning_rate": 4.307663998527924e-05, "loss": 0.5689, "step": 301000 }, { "epoch": 13.87, "learning_rate": 4.306513938724814e-05, "loss": 0.5714, "step": 301500 }, { "epoch": 13.89, "learning_rate": 4.305363878921704e-05, "loss": 0.5767, "step": 302000 }, { "epoch": 13.92, "learning_rate": 4.3042138191185944e-05, "loss": 0.5813, "step": 302500 }, { "epoch": 13.94, "learning_rate": 4.303063759315485e-05, "loss": 0.5833, "step": 303000 }, { "epoch": 13.96, "learning_rate": 4.301913699512375e-05, "loss": 0.5865, "step": 303500 }, { "epoch": 13.98, "learning_rate": 4.300763639709265e-05, "loss": 0.5869, "step": 304000 }, { "epoch": 14.01, "learning_rate": 4.299613579906156e-05, "loss": 0.5397, "step": 304500 }, { "epoch": 14.03, "learning_rate": 4.2984635201030454e-05, "loss": 0.4388, "step": 305000 }, { "epoch": 14.05, "learning_rate": 4.297313460299936e-05, "loss": 0.4457, "step": 305500 }, { "epoch": 14.08, "learning_rate": 4.296163400496826e-05, "loss": 0.4438, "step": 306000 }, { "epoch": 14.1, "learning_rate": 4.295013340693716e-05, "loss": 0.4466, "step": 306500 }, { "epoch": 14.12, "learning_rate": 4.2938632808906066e-05, "loss": 0.4655, "step": 307000 }, { "epoch": 14.15, "learning_rate": 4.292713221087496e-05, "loss": 0.4524, "step": 307500 }, { "epoch": 14.17, "learning_rate": 4.291563161284387e-05, "loss": 0.4516, "step": 308000 }, { "epoch": 14.19, "learning_rate": 4.290413101481277e-05, "loss": 0.4676, "step": 308500 }, { "epoch": 14.21, "learning_rate": 4.289263041678168e-05, "loss": 0.4657, "step": 309000 }, { "epoch": 14.24, "learning_rate": 4.2881129818750576e-05, "loss": 0.4707, "step": 309500 }, { "epoch": 14.26, "learning_rate": 4.286962922071948e-05, "loss": 0.4829, "step": 310000 }, { "epoch": 14.28, "learning_rate": 4.285812862268838e-05, "loss": 0.476, "step": 310500 }, { "epoch": 14.31, "learning_rate": 4.284662802465728e-05, "loss": 0.4893, "step": 311000 }, { "epoch": 14.33, "learning_rate": 4.283512742662619e-05, "loss": 0.4823, "step": 311500 }, { "epoch": 14.35, "learning_rate": 4.2823626828595085e-05, "loss": 0.4947, "step": 312000 }, { "epoch": 14.38, "learning_rate": 4.2812126230563995e-05, "loss": 0.4843, "step": 312500 }, { "epoch": 14.4, "learning_rate": 4.280062563253289e-05, "loss": 0.4926, "step": 313000 }, { "epoch": 14.42, "learning_rate": 4.2789125034501795e-05, "loss": 0.5091, "step": 313500 }, { "epoch": 14.44, "learning_rate": 4.27776244364707e-05, "loss": 0.4938, "step": 314000 }, { "epoch": 14.47, "learning_rate": 4.27661238384396e-05, "loss": 0.4959, "step": 314500 }, { "epoch": 14.49, "learning_rate": 4.2754623240408505e-05, "loss": 0.5088, "step": 315000 }, { "epoch": 14.51, "learning_rate": 4.27431226423774e-05, "loss": 0.5012, "step": 315500 }, { "epoch": 14.54, "learning_rate": 4.273162204434631e-05, "loss": 0.5063, "step": 316000 }, { "epoch": 14.56, "learning_rate": 4.272012144631521e-05, "loss": 0.5135, "step": 316500 }, { "epoch": 14.58, "learning_rate": 4.270862084828412e-05, "loss": 0.5212, "step": 317000 }, { "epoch": 14.61, "learning_rate": 4.2697120250253014e-05, "loss": 0.5166, "step": 317500 }, { "epoch": 14.63, "learning_rate": 4.268561965222192e-05, "loss": 0.521, "step": 318000 }, { "epoch": 14.65, "learning_rate": 4.267411905419082e-05, "loss": 0.5225, "step": 318500 }, { "epoch": 14.67, "learning_rate": 4.266261845615972e-05, "loss": 0.5248, "step": 319000 }, { "epoch": 14.7, "learning_rate": 4.265111785812863e-05, "loss": 0.5223, "step": 319500 }, { "epoch": 14.72, "learning_rate": 4.2639617260097523e-05, "loss": 0.5177, "step": 320000 }, { "epoch": 14.74, "learning_rate": 4.2628116662066433e-05, "loss": 0.5383, "step": 320500 }, { "epoch": 14.77, "learning_rate": 4.261661606403533e-05, "loss": 0.5249, "step": 321000 }, { "epoch": 14.79, "learning_rate": 4.260511546600423e-05, "loss": 0.5382, "step": 321500 }, { "epoch": 14.81, "learning_rate": 4.2593614867973136e-05, "loss": 0.5288, "step": 322000 }, { "epoch": 14.84, "learning_rate": 4.258211426994204e-05, "loss": 0.5317, "step": 322500 }, { "epoch": 14.86, "learning_rate": 4.257061367191094e-05, "loss": 0.5369, "step": 323000 }, { "epoch": 14.88, "learning_rate": 4.255911307387984e-05, "loss": 0.5293, "step": 323500 }, { "epoch": 14.9, "learning_rate": 4.254761247584875e-05, "loss": 0.5487, "step": 324000 }, { "epoch": 14.93, "learning_rate": 4.2536111877817646e-05, "loss": 0.5443, "step": 324500 }, { "epoch": 14.95, "learning_rate": 4.2524611279786556e-05, "loss": 0.5432, "step": 325000 }, { "epoch": 14.97, "learning_rate": 4.251311068175545e-05, "loss": 0.5363, "step": 325500 }, { "epoch": 15.0, "learning_rate": 4.2501610083724356e-05, "loss": 0.5523, "step": 326000 }, { "epoch": 15.02, "learning_rate": 4.249010948569326e-05, "loss": 0.4303, "step": 326500 }, { "epoch": 15.04, "learning_rate": 4.2478608887662155e-05, "loss": 0.4193, "step": 327000 }, { "epoch": 15.07, "learning_rate": 4.2467108289631065e-05, "loss": 0.4129, "step": 327500 }, { "epoch": 15.09, "learning_rate": 4.245560769159996e-05, "loss": 0.4342, "step": 328000 }, { "epoch": 15.11, "learning_rate": 4.244410709356887e-05, "loss": 0.4214, "step": 328500 }, { "epoch": 15.13, "learning_rate": 4.243260649553777e-05, "loss": 0.4347, "step": 329000 }, { "epoch": 15.16, "learning_rate": 4.242110589750667e-05, "loss": 0.4331, "step": 329500 }, { "epoch": 15.18, "learning_rate": 4.2409605299475575e-05, "loss": 0.4381, "step": 330000 }, { "epoch": 15.2, "learning_rate": 4.239810470144448e-05, "loss": 0.4414, "step": 330500 }, { "epoch": 15.23, "learning_rate": 4.238660410341338e-05, "loss": 0.4417, "step": 331000 }, { "epoch": 15.25, "learning_rate": 4.237510350538228e-05, "loss": 0.4467, "step": 331500 }, { "epoch": 15.27, "learning_rate": 4.236360290735119e-05, "loss": 0.4511, "step": 332000 }, { "epoch": 15.3, "learning_rate": 4.2352102309320084e-05, "loss": 0.4535, "step": 332500 }, { "epoch": 15.32, "learning_rate": 4.2340601711288994e-05, "loss": 0.4567, "step": 333000 }, { "epoch": 15.34, "learning_rate": 4.232910111325789e-05, "loss": 0.4619, "step": 333500 }, { "epoch": 15.36, "learning_rate": 4.2317600515226794e-05, "loss": 0.4538, "step": 334000 }, { "epoch": 15.39, "learning_rate": 4.23060999171957e-05, "loss": 0.4543, "step": 334500 }, { "epoch": 15.41, "learning_rate": 4.2294599319164593e-05, "loss": 0.4646, "step": 335000 }, { "epoch": 15.43, "learning_rate": 4.2283098721133503e-05, "loss": 0.4691, "step": 335500 }, { "epoch": 15.46, "learning_rate": 4.22715981231024e-05, "loss": 0.4788, "step": 336000 }, { "epoch": 15.48, "learning_rate": 4.226009752507131e-05, "loss": 0.4814, "step": 336500 }, { "epoch": 15.5, "learning_rate": 4.2248596927040206e-05, "loss": 0.4736, "step": 337000 }, { "epoch": 15.53, "learning_rate": 4.223709632900911e-05, "loss": 0.4844, "step": 337500 }, { "epoch": 15.55, "learning_rate": 4.222559573097801e-05, "loss": 0.4841, "step": 338000 }, { "epoch": 15.57, "learning_rate": 4.221409513294691e-05, "loss": 0.4825, "step": 338500 }, { "epoch": 15.59, "learning_rate": 4.220259453491582e-05, "loss": 0.4818, "step": 339000 }, { "epoch": 15.62, "learning_rate": 4.2191093936884716e-05, "loss": 0.4914, "step": 339500 }, { "epoch": 15.64, "learning_rate": 4.2179593338853626e-05, "loss": 0.489, "step": 340000 }, { "epoch": 15.66, "learning_rate": 4.216809274082252e-05, "loss": 0.4885, "step": 340500 }, { "epoch": 15.69, "learning_rate": 4.215659214279143e-05, "loss": 0.4842, "step": 341000 }, { "epoch": 15.71, "learning_rate": 4.214509154476033e-05, "loss": 0.4921, "step": 341500 }, { "epoch": 15.73, "learning_rate": 4.213359094672923e-05, "loss": 0.4845, "step": 342000 }, { "epoch": 15.76, "learning_rate": 4.2122090348698135e-05, "loss": 0.5083, "step": 342500 }, { "epoch": 15.78, "learning_rate": 4.211058975066703e-05, "loss": 0.4977, "step": 343000 }, { "epoch": 15.8, "learning_rate": 4.209908915263594e-05, "loss": 0.5086, "step": 343500 }, { "epoch": 15.82, "learning_rate": 4.208758855460484e-05, "loss": 0.4974, "step": 344000 }, { "epoch": 15.85, "learning_rate": 4.207608795657375e-05, "loss": 0.4957, "step": 344500 }, { "epoch": 15.87, "learning_rate": 4.2064587358542645e-05, "loss": 0.5089, "step": 345000 }, { "epoch": 15.89, "learning_rate": 4.205308676051155e-05, "loss": 0.5152, "step": 345500 }, { "epoch": 15.92, "learning_rate": 4.204158616248045e-05, "loss": 0.5119, "step": 346000 }, { "epoch": 15.94, "learning_rate": 4.203008556444935e-05, "loss": 0.5188, "step": 346500 }, { "epoch": 15.96, "learning_rate": 4.201858496641826e-05, "loss": 0.5116, "step": 347000 }, { "epoch": 15.99, "learning_rate": 4.2007084368387154e-05, "loss": 0.5179, "step": 347500 }, { "epoch": 16.01, "learning_rate": 4.1995583770356064e-05, "loss": 0.4619, "step": 348000 }, { "epoch": 16.03, "learning_rate": 4.198408317232496e-05, "loss": 0.3856, "step": 348500 }, { "epoch": 16.05, "learning_rate": 4.197258257429387e-05, "loss": 0.3911, "step": 349000 }, { "epoch": 16.08, "learning_rate": 4.196108197626277e-05, "loss": 0.4011, "step": 349500 }, { "epoch": 16.1, "learning_rate": 4.194958137823167e-05, "loss": 0.3983, "step": 350000 }, { "epoch": 16.12, "learning_rate": 4.193808078020057e-05, "loss": 0.4152, "step": 350500 }, { "epoch": 16.15, "learning_rate": 4.192658018216947e-05, "loss": 0.4156, "step": 351000 }, { "epoch": 16.17, "learning_rate": 4.191507958413838e-05, "loss": 0.412, "step": 351500 }, { "epoch": 16.19, "learning_rate": 4.1903578986107276e-05, "loss": 0.4186, "step": 352000 }, { "epoch": 16.22, "learning_rate": 4.1892078388076186e-05, "loss": 0.4197, "step": 352500 }, { "epoch": 16.24, "learning_rate": 4.188057779004508e-05, "loss": 0.4181, "step": 353000 }, { "epoch": 16.26, "learning_rate": 4.1869077192013986e-05, "loss": 0.4326, "step": 353500 }, { "epoch": 16.28, "learning_rate": 4.185757659398289e-05, "loss": 0.428, "step": 354000 }, { "epoch": 16.31, "learning_rate": 4.1846075995951786e-05, "loss": 0.4316, "step": 354500 }, { "epoch": 16.33, "learning_rate": 4.1834575397920696e-05, "loss": 0.431, "step": 355000 }, { "epoch": 16.35, "learning_rate": 4.182307479988959e-05, "loss": 0.4296, "step": 355500 }, { "epoch": 16.38, "learning_rate": 4.18115742018585e-05, "loss": 0.4434, "step": 356000 }, { "epoch": 16.4, "learning_rate": 4.18000736038274e-05, "loss": 0.4345, "step": 356500 }, { "epoch": 16.42, "learning_rate": 4.178857300579631e-05, "loss": 0.4435, "step": 357000 }, { "epoch": 16.45, "learning_rate": 4.1777072407765205e-05, "loss": 0.4439, "step": 357500 }, { "epoch": 16.47, "learning_rate": 4.176557180973411e-05, "loss": 0.4447, "step": 358000 }, { "epoch": 16.49, "learning_rate": 4.175407121170301e-05, "loss": 0.4478, "step": 358500 }, { "epoch": 16.51, "learning_rate": 4.174257061367191e-05, "loss": 0.449, "step": 359000 }, { "epoch": 16.54, "learning_rate": 4.173107001564082e-05, "loss": 0.4528, "step": 359500 }, { "epoch": 16.56, "learning_rate": 4.1719569417609714e-05, "loss": 0.4612, "step": 360000 }, { "epoch": 16.58, "learning_rate": 4.1708068819578624e-05, "loss": 0.4615, "step": 360500 }, { "epoch": 16.61, "learning_rate": 4.169656822154752e-05, "loss": 0.456, "step": 361000 }, { "epoch": 16.63, "learning_rate": 4.1685067623516424e-05, "loss": 0.4703, "step": 361500 }, { "epoch": 16.65, "learning_rate": 4.167356702548533e-05, "loss": 0.4612, "step": 362000 }, { "epoch": 16.68, "learning_rate": 4.1662066427454224e-05, "loss": 0.459, "step": 362500 }, { "epoch": 16.7, "learning_rate": 4.1650565829423134e-05, "loss": 0.4552, "step": 363000 }, { "epoch": 16.72, "learning_rate": 4.163906523139203e-05, "loss": 0.4637, "step": 363500 }, { "epoch": 16.74, "learning_rate": 4.162756463336094e-05, "loss": 0.47, "step": 364000 }, { "epoch": 16.77, "learning_rate": 4.161606403532984e-05, "loss": 0.4673, "step": 364500 }, { "epoch": 16.79, "learning_rate": 4.160456343729875e-05, "loss": 0.4678, "step": 365000 }, { "epoch": 16.81, "learning_rate": 4.159306283926764e-05, "loss": 0.4777, "step": 365500 }, { "epoch": 16.84, "learning_rate": 4.1581562241236546e-05, "loss": 0.4731, "step": 366000 }, { "epoch": 16.86, "learning_rate": 4.157006164320545e-05, "loss": 0.4756, "step": 366500 }, { "epoch": 16.88, "learning_rate": 4.1558561045174346e-05, "loss": 0.483, "step": 367000 }, { "epoch": 16.91, "learning_rate": 4.1547060447143256e-05, "loss": 0.4758, "step": 367500 }, { "epoch": 16.93, "learning_rate": 4.153555984911215e-05, "loss": 0.4805, "step": 368000 }, { "epoch": 16.95, "learning_rate": 4.152405925108106e-05, "loss": 0.472, "step": 368500 }, { "epoch": 16.97, "learning_rate": 4.151255865304996e-05, "loss": 0.4973, "step": 369000 }, { "epoch": 17.0, "learning_rate": 4.150105805501886e-05, "loss": 0.4941, "step": 369500 }, { "epoch": 17.02, "learning_rate": 4.1489557456987766e-05, "loss": 0.3828, "step": 370000 }, { "epoch": 17.04, "learning_rate": 4.147805685895666e-05, "loss": 0.3745, "step": 370500 }, { "epoch": 17.07, "learning_rate": 4.146655626092557e-05, "loss": 0.3841, "step": 371000 }, { "epoch": 17.09, "learning_rate": 4.145505566289447e-05, "loss": 0.3885, "step": 371500 }, { "epoch": 17.11, "learning_rate": 4.144355506486338e-05, "loss": 0.3846, "step": 372000 }, { "epoch": 17.14, "learning_rate": 4.1432054466832275e-05, "loss": 0.3964, "step": 372500 }, { "epoch": 17.16, "learning_rate": 4.1420553868801185e-05, "loss": 0.3998, "step": 373000 }, { "epoch": 17.18, "learning_rate": 4.140905327077008e-05, "loss": 0.3947, "step": 373500 }, { "epoch": 17.2, "learning_rate": 4.1397552672738985e-05, "loss": 0.3935, "step": 374000 }, { "epoch": 17.23, "learning_rate": 4.138605207470789e-05, "loss": 0.4021, "step": 374500 }, { "epoch": 17.25, "learning_rate": 4.1374551476676784e-05, "loss": 0.4008, "step": 375000 }, { "epoch": 17.27, "learning_rate": 4.1363050878645694e-05, "loss": 0.4074, "step": 375500 }, { "epoch": 17.3, "learning_rate": 4.135155028061459e-05, "loss": 0.4104, "step": 376000 }, { "epoch": 17.32, "learning_rate": 4.13400496825835e-05, "loss": 0.4229, "step": 376500 }, { "epoch": 17.34, "learning_rate": 4.13285490845524e-05, "loss": 0.4156, "step": 377000 }, { "epoch": 17.37, "learning_rate": 4.13170484865213e-05, "loss": 0.418, "step": 377500 }, { "epoch": 17.39, "learning_rate": 4.1305547888490204e-05, "loss": 0.425, "step": 378000 }, { "epoch": 17.41, "learning_rate": 4.12940472904591e-05, "loss": 0.4137, "step": 378500 }, { "epoch": 17.43, "learning_rate": 4.128254669242801e-05, "loss": 0.4249, "step": 379000 }, { "epoch": 17.46, "learning_rate": 4.127104609439691e-05, "loss": 0.4294, "step": 379500 }, { "epoch": 17.48, "learning_rate": 4.125954549636582e-05, "loss": 0.4181, "step": 380000 }, { "epoch": 17.5, "learning_rate": 4.124804489833471e-05, "loss": 0.427, "step": 380500 }, { "epoch": 17.53, "learning_rate": 4.123654430030362e-05, "loss": 0.4268, "step": 381000 }, { "epoch": 17.55, "learning_rate": 4.122504370227252e-05, "loss": 0.4318, "step": 381500 }, { "epoch": 17.57, "learning_rate": 4.121354310424142e-05, "loss": 0.435, "step": 382000 }, { "epoch": 17.6, "learning_rate": 4.1202042506210326e-05, "loss": 0.4388, "step": 382500 }, { "epoch": 17.62, "learning_rate": 4.119054190817922e-05, "loss": 0.435, "step": 383000 }, { "epoch": 17.64, "learning_rate": 4.117904131014813e-05, "loss": 0.4391, "step": 383500 }, { "epoch": 17.66, "learning_rate": 4.116754071211703e-05, "loss": 0.4414, "step": 384000 }, { "epoch": 17.69, "learning_rate": 4.115604011408594e-05, "loss": 0.4335, "step": 384500 }, { "epoch": 17.71, "learning_rate": 4.1144539516054836e-05, "loss": 0.4391, "step": 385000 }, { "epoch": 17.73, "learning_rate": 4.113303891802374e-05, "loss": 0.4447, "step": 385500 }, { "epoch": 17.76, "learning_rate": 4.112153831999264e-05, "loss": 0.4451, "step": 386000 }, { "epoch": 17.78, "learning_rate": 4.111003772196154e-05, "loss": 0.4502, "step": 386500 }, { "epoch": 17.8, "learning_rate": 4.109853712393045e-05, "loss": 0.4456, "step": 387000 }, { "epoch": 17.83, "learning_rate": 4.1087036525899345e-05, "loss": 0.4428, "step": 387500 }, { "epoch": 17.85, "learning_rate": 4.1075535927868255e-05, "loss": 0.4537, "step": 388000 }, { "epoch": 17.87, "learning_rate": 4.106403532983715e-05, "loss": 0.4496, "step": 388500 }, { "epoch": 17.89, "learning_rate": 4.105253473180606e-05, "loss": 0.4566, "step": 389000 }, { "epoch": 17.92, "learning_rate": 4.104103413377496e-05, "loss": 0.4556, "step": 389500 }, { "epoch": 17.94, "learning_rate": 4.102953353574386e-05, "loss": 0.4638, "step": 390000 }, { "epoch": 17.96, "learning_rate": 4.1018032937712764e-05, "loss": 0.4587, "step": 390500 }, { "epoch": 17.99, "learning_rate": 4.100653233968166e-05, "loss": 0.4567, "step": 391000 }, { "epoch": 18.01, "learning_rate": 4.099503174165057e-05, "loss": 0.4147, "step": 391500 }, { "epoch": 18.03, "learning_rate": 4.098353114361947e-05, "loss": 0.3594, "step": 392000 }, { "epoch": 18.06, "learning_rate": 4.097203054558838e-05, "loss": 0.361, "step": 392500 }, { "epoch": 18.08, "learning_rate": 4.0960529947557274e-05, "loss": 0.3704, "step": 393000 }, { "epoch": 18.1, "learning_rate": 4.094902934952618e-05, "loss": 0.3697, "step": 393500 }, { "epoch": 18.12, "learning_rate": 4.093752875149508e-05, "loss": 0.3811, "step": 394000 }, { "epoch": 18.15, "learning_rate": 4.092602815346398e-05, "loss": 0.3787, "step": 394500 }, { "epoch": 18.17, "learning_rate": 4.091452755543289e-05, "loss": 0.3786, "step": 395000 }, { "epoch": 18.19, "learning_rate": 4.090302695740178e-05, "loss": 0.3759, "step": 395500 }, { "epoch": 18.22, "learning_rate": 4.089152635937069e-05, "loss": 0.389, "step": 396000 }, { "epoch": 18.24, "learning_rate": 4.088002576133959e-05, "loss": 0.3867, "step": 396500 }, { "epoch": 18.26, "learning_rate": 4.08685251633085e-05, "loss": 0.3847, "step": 397000 }, { "epoch": 18.29, "learning_rate": 4.0857024565277396e-05, "loss": 0.394, "step": 397500 }, { "epoch": 18.31, "learning_rate": 4.08455239672463e-05, "loss": 0.3931, "step": 398000 }, { "epoch": 18.33, "learning_rate": 4.08340233692152e-05, "loss": 0.4003, "step": 398500 }, { "epoch": 18.35, "learning_rate": 4.08225227711841e-05, "loss": 0.406, "step": 399000 }, { "epoch": 18.38, "learning_rate": 4.081102217315301e-05, "loss": 0.3985, "step": 399500 }, { "epoch": 18.4, "learning_rate": 4.0799521575121905e-05, "loss": 0.4005, "step": 400000 }, { "epoch": 18.42, "learning_rate": 4.0788020977090815e-05, "loss": 0.4038, "step": 400500 }, { "epoch": 18.45, "learning_rate": 4.077652037905971e-05, "loss": 0.403, "step": 401000 }, { "epoch": 18.47, "learning_rate": 4.0765019781028615e-05, "loss": 0.412, "step": 401500 }, { "epoch": 18.49, "learning_rate": 4.075351918299752e-05, "loss": 0.4117, "step": 402000 }, { "epoch": 18.52, "learning_rate": 4.0742018584966415e-05, "loss": 0.4155, "step": 402500 }, { "epoch": 18.54, "learning_rate": 4.0730517986935325e-05, "loss": 0.4263, "step": 403000 }, { "epoch": 18.56, "learning_rate": 4.071901738890422e-05, "loss": 0.416, "step": 403500 }, { "epoch": 18.58, "learning_rate": 4.070751679087313e-05, "loss": 0.4075, "step": 404000 }, { "epoch": 18.61, "learning_rate": 4.069601619284203e-05, "loss": 0.4252, "step": 404500 }, { "epoch": 18.63, "learning_rate": 4.068451559481094e-05, "loss": 0.4294, "step": 405000 }, { "epoch": 18.65, "learning_rate": 4.0673014996779834e-05, "loss": 0.416, "step": 405500 }, { "epoch": 18.68, "learning_rate": 4.066151439874874e-05, "loss": 0.4185, "step": 406000 }, { "epoch": 18.7, "learning_rate": 4.065001380071764e-05, "loss": 0.4272, "step": 406500 }, { "epoch": 18.72, "learning_rate": 4.063851320268654e-05, "loss": 0.4292, "step": 407000 }, { "epoch": 18.75, "learning_rate": 4.062701260465545e-05, "loss": 0.4295, "step": 407500 }, { "epoch": 18.77, "learning_rate": 4.0615512006624344e-05, "loss": 0.4268, "step": 408000 }, { "epoch": 18.79, "learning_rate": 4.0604011408593254e-05, "loss": 0.4271, "step": 408500 }, { "epoch": 18.81, "learning_rate": 4.059251081056215e-05, "loss": 0.4354, "step": 409000 }, { "epoch": 18.84, "learning_rate": 4.058101021253105e-05, "loss": 0.4272, "step": 409500 }, { "epoch": 18.86, "learning_rate": 4.0569509614499957e-05, "loss": 0.4339, "step": 410000 }, { "epoch": 18.88, "learning_rate": 4.055800901646885e-05, "loss": 0.4297, "step": 410500 }, { "epoch": 18.91, "learning_rate": 4.054650841843776e-05, "loss": 0.4425, "step": 411000 }, { "epoch": 18.93, "learning_rate": 4.053500782040666e-05, "loss": 0.4396, "step": 411500 }, { "epoch": 18.95, "learning_rate": 4.052350722237557e-05, "loss": 0.4387, "step": 412000 }, { "epoch": 18.98, "learning_rate": 4.0512006624344466e-05, "loss": 0.4359, "step": 412500 }, { "epoch": 19.0, "learning_rate": 4.0500506026313376e-05, "loss": 0.4436, "step": 413000 }, { "epoch": 19.02, "learning_rate": 4.048900542828227e-05, "loss": 0.3538, "step": 413500 }, { "epoch": 19.04, "learning_rate": 4.0477504830251176e-05, "loss": 0.3486, "step": 414000 }, { "epoch": 19.07, "learning_rate": 4.046600423222008e-05, "loss": 0.3563, "step": 414500 }, { "epoch": 19.09, "learning_rate": 4.0454503634188975e-05, "loss": 0.3627, "step": 415000 }, { "epoch": 19.11, "learning_rate": 4.0443003036157885e-05, "loss": 0.362, "step": 415500 }, { "epoch": 19.14, "learning_rate": 4.043150243812678e-05, "loss": 0.3652, "step": 416000 }, { "epoch": 19.16, "learning_rate": 4.042000184009569e-05, "loss": 0.3663, "step": 416500 }, { "epoch": 19.18, "learning_rate": 4.040850124206459e-05, "loss": 0.3693, "step": 417000 }, { "epoch": 19.21, "learning_rate": 4.039700064403349e-05, "loss": 0.3689, "step": 417500 }, { "epoch": 19.23, "learning_rate": 4.0385500046002395e-05, "loss": 0.3742, "step": 418000 }, { "epoch": 19.25, "learning_rate": 4.03739994479713e-05, "loss": 0.372, "step": 418500 }, { "epoch": 19.28, "learning_rate": 4.03624988499402e-05, "loss": 0.3773, "step": 419000 }, { "epoch": 19.3, "learning_rate": 4.03509982519091e-05, "loss": 0.3801, "step": 419500 }, { "epoch": 19.32, "learning_rate": 4.033949765387801e-05, "loss": 0.3803, "step": 420000 }, { "epoch": 19.34, "learning_rate": 4.0327997055846904e-05, "loss": 0.386, "step": 420500 }, { "epoch": 19.37, "learning_rate": 4.0316496457815814e-05, "loss": 0.3871, "step": 421000 }, { "epoch": 19.39, "learning_rate": 4.030499585978471e-05, "loss": 0.39, "step": 421500 }, { "epoch": 19.41, "learning_rate": 4.0293495261753614e-05, "loss": 0.3814, "step": 422000 }, { "epoch": 19.44, "learning_rate": 4.028199466372252e-05, "loss": 0.3903, "step": 422500 }, { "epoch": 19.46, "learning_rate": 4.0270494065691414e-05, "loss": 0.3882, "step": 423000 }, { "epoch": 19.48, "learning_rate": 4.0258993467660324e-05, "loss": 0.3973, "step": 423500 }, { "epoch": 19.51, "learning_rate": 4.024749286962922e-05, "loss": 0.3941, "step": 424000 }, { "epoch": 19.53, "learning_rate": 4.023599227159813e-05, "loss": 0.3952, "step": 424500 }, { "epoch": 19.55, "learning_rate": 4.0224491673567027e-05, "loss": 0.4007, "step": 425000 }, { "epoch": 19.57, "learning_rate": 4.021299107553593e-05, "loss": 0.3974, "step": 425500 }, { "epoch": 19.6, "learning_rate": 4.020149047750483e-05, "loss": 0.3981, "step": 426000 }, { "epoch": 19.62, "learning_rate": 4.0189989879473736e-05, "loss": 0.4039, "step": 426500 }, { "epoch": 19.64, "learning_rate": 4.017848928144264e-05, "loss": 0.4038, "step": 427000 }, { "epoch": 19.67, "learning_rate": 4.0166988683411536e-05, "loss": 0.3981, "step": 427500 }, { "epoch": 19.69, "learning_rate": 4.0155488085380446e-05, "loss": 0.4144, "step": 428000 }, { "epoch": 19.71, "learning_rate": 4.014398748734934e-05, "loss": 0.415, "step": 428500 }, { "epoch": 19.74, "learning_rate": 4.0132486889318246e-05, "loss": 0.4079, "step": 429000 }, { "epoch": 19.76, "learning_rate": 4.012098629128715e-05, "loss": 0.4066, "step": 429500 }, { "epoch": 19.78, "learning_rate": 4.010948569325605e-05, "loss": 0.4064, "step": 430000 }, { "epoch": 19.8, "learning_rate": 4.0097985095224955e-05, "loss": 0.4176, "step": 430500 }, { "epoch": 19.83, "learning_rate": 4.008648449719385e-05, "loss": 0.4103, "step": 431000 }, { "epoch": 19.85, "learning_rate": 4.007498389916276e-05, "loss": 0.4167, "step": 431500 }, { "epoch": 19.87, "learning_rate": 4.006348330113166e-05, "loss": 0.4194, "step": 432000 }, { "epoch": 19.9, "learning_rate": 4.005198270310057e-05, "loss": 0.4168, "step": 432500 }, { "epoch": 19.92, "learning_rate": 4.0040482105069465e-05, "loss": 0.4083, "step": 433000 }, { "epoch": 19.94, "learning_rate": 4.002898150703837e-05, "loss": 0.4144, "step": 433500 }, { "epoch": 19.97, "learning_rate": 4.001748090900727e-05, "loss": 0.4239, "step": 434000 }, { "epoch": 19.99, "learning_rate": 4.0005980310976174e-05, "loss": 0.4139, "step": 434500 }, { "epoch": 20.01, "learning_rate": 3.999447971294508e-05, "loss": 0.3805, "step": 435000 }, { "epoch": 20.03, "learning_rate": 3.9982979114913974e-05, "loss": 0.3343, "step": 435500 }, { "epoch": 20.06, "learning_rate": 3.9971478516882884e-05, "loss": 0.347, "step": 436000 }, { "epoch": 20.08, "learning_rate": 3.995997791885178e-05, "loss": 0.3427, "step": 436500 }, { "epoch": 20.1, "learning_rate": 3.9948477320820684e-05, "loss": 0.3482, "step": 437000 }, { "epoch": 20.13, "learning_rate": 3.993697672278959e-05, "loss": 0.3549, "step": 437500 }, { "epoch": 20.15, "learning_rate": 3.992547612475849e-05, "loss": 0.3529, "step": 438000 }, { "epoch": 20.17, "learning_rate": 3.9913975526727394e-05, "loss": 0.3599, "step": 438500 }, { "epoch": 20.2, "learning_rate": 3.990247492869629e-05, "loss": 0.3576, "step": 439000 }, { "epoch": 20.22, "learning_rate": 3.98909743306652e-05, "loss": 0.3615, "step": 439500 }, { "epoch": 20.24, "learning_rate": 3.9879473732634096e-05, "loss": 0.3632, "step": 440000 }, { "epoch": 20.26, "learning_rate": 3.9867973134603006e-05, "loss": 0.3673, "step": 440500 }, { "epoch": 20.29, "learning_rate": 3.98564725365719e-05, "loss": 0.3636, "step": 441000 }, { "epoch": 20.31, "learning_rate": 3.9844971938540806e-05, "loss": 0.3683, "step": 441500 }, { "epoch": 20.33, "learning_rate": 3.983347134050971e-05, "loss": 0.3711, "step": 442000 }, { "epoch": 20.36, "learning_rate": 3.982197074247861e-05, "loss": 0.373, "step": 442500 }, { "epoch": 20.38, "learning_rate": 3.9810470144447516e-05, "loss": 0.3734, "step": 443000 }, { "epoch": 20.4, "learning_rate": 3.979896954641641e-05, "loss": 0.3745, "step": 443500 }, { "epoch": 20.43, "learning_rate": 3.978746894838532e-05, "loss": 0.3764, "step": 444000 }, { "epoch": 20.45, "learning_rate": 3.977596835035422e-05, "loss": 0.3784, "step": 444500 }, { "epoch": 20.47, "learning_rate": 3.976446775232312e-05, "loss": 0.3835, "step": 445000 }, { "epoch": 20.49, "learning_rate": 3.9752967154292025e-05, "loss": 0.3817, "step": 445500 }, { "epoch": 20.52, "learning_rate": 3.974146655626093e-05, "loss": 0.3784, "step": 446000 }, { "epoch": 20.54, "learning_rate": 3.972996595822983e-05, "loss": 0.3857, "step": 446500 }, { "epoch": 20.56, "learning_rate": 3.971846536019873e-05, "loss": 0.3893, "step": 447000 }, { "epoch": 20.59, "learning_rate": 3.970696476216764e-05, "loss": 0.3919, "step": 447500 }, { "epoch": 20.61, "learning_rate": 3.9695464164136535e-05, "loss": 0.3813, "step": 448000 }, { "epoch": 20.63, "learning_rate": 3.9683963566105445e-05, "loss": 0.3912, "step": 448500 }, { "epoch": 20.66, "learning_rate": 3.967246296807434e-05, "loss": 0.3844, "step": 449000 }, { "epoch": 20.68, "learning_rate": 3.9660962370043244e-05, "loss": 0.3866, "step": 449500 }, { "epoch": 20.7, "learning_rate": 3.964946177201215e-05, "loss": 0.3992, "step": 450000 }, { "epoch": 20.72, "learning_rate": 3.963796117398105e-05, "loss": 0.398, "step": 450500 }, { "epoch": 20.75, "learning_rate": 3.9626460575949954e-05, "loss": 0.3927, "step": 451000 }, { "epoch": 20.77, "learning_rate": 3.961495997791885e-05, "loss": 0.3935, "step": 451500 }, { "epoch": 20.79, "learning_rate": 3.960345937988776e-05, "loss": 0.3978, "step": 452000 }, { "epoch": 20.82, "learning_rate": 3.959195878185666e-05, "loss": 0.4075, "step": 452500 }, { "epoch": 20.84, "learning_rate": 3.958045818382556e-05, "loss": 0.3862, "step": 453000 }, { "epoch": 20.86, "learning_rate": 3.9568957585794463e-05, "loss": 0.4076, "step": 453500 }, { "epoch": 20.89, "learning_rate": 3.955745698776337e-05, "loss": 0.3977, "step": 454000 }, { "epoch": 20.91, "learning_rate": 3.954595638973227e-05, "loss": 0.4084, "step": 454500 }, { "epoch": 20.93, "learning_rate": 3.9534455791701166e-05, "loss": 0.3951, "step": 455000 }, { "epoch": 20.95, "learning_rate": 3.9522955193670076e-05, "loss": 0.398, "step": 455500 }, { "epoch": 20.98, "learning_rate": 3.951145459563897e-05, "loss": 0.4075, "step": 456000 }, { "epoch": 21.0, "learning_rate": 3.9499953997607876e-05, "loss": 0.4149, "step": 456500 }, { "epoch": 21.02, "learning_rate": 3.948845339957678e-05, "loss": 0.3234, "step": 457000 }, { "epoch": 21.05, "learning_rate": 3.947695280154568e-05, "loss": 0.3287, "step": 457500 }, { "epoch": 21.07, "learning_rate": 3.9465452203514586e-05, "loss": 0.3325, "step": 458000 }, { "epoch": 21.09, "learning_rate": 3.945395160548349e-05, "loss": 0.3358, "step": 458500 }, { "epoch": 21.12, "learning_rate": 3.944245100745239e-05, "loss": 0.3358, "step": 459000 }, { "epoch": 21.14, "learning_rate": 3.943095040942129e-05, "loss": 0.3459, "step": 459500 }, { "epoch": 21.16, "learning_rate": 3.94194498113902e-05, "loss": 0.3452, "step": 460000 }, { "epoch": 21.18, "learning_rate": 3.9407949213359095e-05, "loss": 0.3443, "step": 460500 }, { "epoch": 21.21, "learning_rate": 3.9396448615328e-05, "loss": 0.3424, "step": 461000 }, { "epoch": 21.23, "learning_rate": 3.93849480172969e-05, "loss": 0.3574, "step": 461500 }, { "epoch": 21.25, "learning_rate": 3.9373447419265805e-05, "loss": 0.3525, "step": 462000 }, { "epoch": 21.28, "learning_rate": 3.936194682123471e-05, "loss": 0.3636, "step": 462500 }, { "epoch": 21.3, "learning_rate": 3.9350446223203605e-05, "loss": 0.3592, "step": 463000 }, { "epoch": 21.32, "learning_rate": 3.9338945625172515e-05, "loss": 0.3598, "step": 463500 }, { "epoch": 21.35, "learning_rate": 3.932744502714141e-05, "loss": 0.3603, "step": 464000 }, { "epoch": 21.37, "learning_rate": 3.9315944429110314e-05, "loss": 0.3705, "step": 464500 }, { "epoch": 21.39, "learning_rate": 3.930444383107922e-05, "loss": 0.3652, "step": 465000 }, { "epoch": 21.41, "learning_rate": 3.929294323304812e-05, "loss": 0.3607, "step": 465500 }, { "epoch": 21.44, "learning_rate": 3.9281442635017024e-05, "loss": 0.3686, "step": 466000 }, { "epoch": 21.46, "learning_rate": 3.926994203698593e-05, "loss": 0.36, "step": 466500 }, { "epoch": 21.48, "learning_rate": 3.925844143895483e-05, "loss": 0.3714, "step": 467000 }, { "epoch": 21.51, "learning_rate": 3.924694084092373e-05, "loss": 0.3673, "step": 467500 }, { "epoch": 21.53, "learning_rate": 3.923544024289264e-05, "loss": 0.3714, "step": 468000 }, { "epoch": 21.55, "learning_rate": 3.922393964486153e-05, "loss": 0.3689, "step": 468500 }, { "epoch": 21.58, "learning_rate": 3.9212439046830437e-05, "loss": 0.3792, "step": 469000 }, { "epoch": 21.6, "learning_rate": 3.920093844879934e-05, "loss": 0.3708, "step": 469500 }, { "epoch": 21.62, "learning_rate": 3.918943785076824e-05, "loss": 0.3744, "step": 470000 }, { "epoch": 21.64, "learning_rate": 3.9177937252737146e-05, "loss": 0.3811, "step": 470500 }, { "epoch": 21.67, "learning_rate": 3.916643665470604e-05, "loss": 0.3766, "step": 471000 }, { "epoch": 21.69, "learning_rate": 3.915493605667495e-05, "loss": 0.38, "step": 471500 }, { "epoch": 21.71, "learning_rate": 3.914343545864385e-05, "loss": 0.3858, "step": 472000 }, { "epoch": 21.74, "learning_rate": 3.913193486061275e-05, "loss": 0.3756, "step": 472500 }, { "epoch": 21.76, "learning_rate": 3.9120434262581656e-05, "loss": 0.3776, "step": 473000 }, { "epoch": 21.78, "learning_rate": 3.910893366455056e-05, "loss": 0.3882, "step": 473500 }, { "epoch": 21.81, "learning_rate": 3.909743306651946e-05, "loss": 0.3795, "step": 474000 }, { "epoch": 21.83, "learning_rate": 3.9085932468488365e-05, "loss": 0.3853, "step": 474500 }, { "epoch": 21.85, "learning_rate": 3.907443187045727e-05, "loss": 0.3888, "step": 475000 }, { "epoch": 21.87, "learning_rate": 3.9062931272426165e-05, "loss": 0.3909, "step": 475500 }, { "epoch": 21.9, "learning_rate": 3.9051430674395075e-05, "loss": 0.3849, "step": 476000 }, { "epoch": 21.92, "learning_rate": 3.903993007636397e-05, "loss": 0.39, "step": 476500 }, { "epoch": 21.94, "learning_rate": 3.9028429478332875e-05, "loss": 0.3914, "step": 477000 }, { "epoch": 21.97, "learning_rate": 3.901692888030178e-05, "loss": 0.3921, "step": 477500 }, { "epoch": 21.99, "learning_rate": 3.900542828227068e-05, "loss": 0.3935, "step": 478000 }, { "epoch": 22.01, "learning_rate": 3.8993927684239584e-05, "loss": 0.3506, "step": 478500 }, { "epoch": 22.04, "learning_rate": 3.898242708620848e-05, "loss": 0.315, "step": 479000 }, { "epoch": 22.06, "learning_rate": 3.897092648817739e-05, "loss": 0.3278, "step": 479500 }, { "epoch": 22.08, "learning_rate": 3.895942589014629e-05, "loss": 0.3301, "step": 480000 }, { "epoch": 22.1, "learning_rate": 3.894792529211519e-05, "loss": 0.3268, "step": 480500 }, { "epoch": 22.13, "learning_rate": 3.8936424694084094e-05, "loss": 0.3301, "step": 481000 }, { "epoch": 22.15, "learning_rate": 3.8924924096053e-05, "loss": 0.3372, "step": 481500 }, { "epoch": 22.17, "learning_rate": 3.89134234980219e-05, "loss": 0.3294, "step": 482000 }, { "epoch": 22.2, "learning_rate": 3.8901922899990804e-05, "loss": 0.3403, "step": 482500 }, { "epoch": 22.22, "learning_rate": 3.889042230195971e-05, "loss": 0.3405, "step": 483000 }, { "epoch": 22.24, "learning_rate": 3.88789217039286e-05, "loss": 0.3466, "step": 483500 }, { "epoch": 22.27, "learning_rate": 3.886742110589751e-05, "loss": 0.3466, "step": 484000 }, { "epoch": 22.29, "learning_rate": 3.885592050786641e-05, "loss": 0.3478, "step": 484500 }, { "epoch": 22.31, "learning_rate": 3.884441990983531e-05, "loss": 0.3418, "step": 485000 }, { "epoch": 22.33, "learning_rate": 3.8832919311804216e-05, "loss": 0.3524, "step": 485500 }, { "epoch": 22.36, "learning_rate": 3.882141871377312e-05, "loss": 0.3512, "step": 486000 }, { "epoch": 22.38, "learning_rate": 3.880991811574202e-05, "loss": 0.3512, "step": 486500 }, { "epoch": 22.4, "learning_rate": 3.879841751771092e-05, "loss": 0.3529, "step": 487000 }, { "epoch": 22.43, "learning_rate": 3.878691691967983e-05, "loss": 0.3498, "step": 487500 }, { "epoch": 22.45, "learning_rate": 3.8775416321648726e-05, "loss": 0.3584, "step": 488000 }, { "epoch": 22.47, "learning_rate": 3.876391572361763e-05, "loss": 0.358, "step": 488500 }, { "epoch": 22.5, "learning_rate": 3.875241512558653e-05, "loss": 0.3615, "step": 489000 }, { "epoch": 22.52, "learning_rate": 3.8740914527555435e-05, "loss": 0.3521, "step": 489500 }, { "epoch": 22.54, "learning_rate": 3.872941392952434e-05, "loss": 0.3616, "step": 490000 }, { "epoch": 22.56, "learning_rate": 3.871791333149324e-05, "loss": 0.3584, "step": 490500 }, { "epoch": 22.59, "learning_rate": 3.8706412733462145e-05, "loss": 0.3618, "step": 491000 }, { "epoch": 22.61, "learning_rate": 3.869491213543104e-05, "loss": 0.3693, "step": 491500 }, { "epoch": 22.63, "learning_rate": 3.8683411537399945e-05, "loss": 0.3579, "step": 492000 }, { "epoch": 22.66, "learning_rate": 3.867191093936885e-05, "loss": 0.3742, "step": 492500 }, { "epoch": 22.68, "learning_rate": 3.866041034133775e-05, "loss": 0.3682, "step": 493000 }, { "epoch": 22.7, "learning_rate": 3.8648909743306654e-05, "loss": 0.3669, "step": 493500 }, { "epoch": 22.73, "learning_rate": 3.863740914527556e-05, "loss": 0.3667, "step": 494000 }, { "epoch": 22.75, "learning_rate": 3.862590854724446e-05, "loss": 0.3643, "step": 494500 }, { "epoch": 22.77, "learning_rate": 3.861440794921336e-05, "loss": 0.3758, "step": 495000 }, { "epoch": 22.79, "learning_rate": 3.860290735118227e-05, "loss": 0.3679, "step": 495500 }, { "epoch": 22.82, "learning_rate": 3.8591406753151164e-05, "loss": 0.3704, "step": 496000 }, { "epoch": 22.84, "learning_rate": 3.857990615512007e-05, "loss": 0.3782, "step": 496500 }, { "epoch": 22.86, "learning_rate": 3.856840555708897e-05, "loss": 0.364, "step": 497000 }, { "epoch": 22.89, "learning_rate": 3.8556904959057874e-05, "loss": 0.3791, "step": 497500 }, { "epoch": 22.91, "learning_rate": 3.854540436102678e-05, "loss": 0.379, "step": 498000 }, { "epoch": 22.93, "learning_rate": 3.853390376299568e-05, "loss": 0.3829, "step": 498500 }, { "epoch": 22.96, "learning_rate": 3.852240316496458e-05, "loss": 0.3713, "step": 499000 }, { "epoch": 22.98, "learning_rate": 3.851090256693348e-05, "loss": 0.3805, "step": 499500 }, { "epoch": 23.0, "learning_rate": 3.849940196890238e-05, "loss": 0.377, "step": 500000 }, { "epoch": 23.02, "learning_rate": 3.8487901370871286e-05, "loss": 0.3079, "step": 500500 }, { "epoch": 23.05, "learning_rate": 3.847640077284019e-05, "loss": 0.3162, "step": 501000 }, { "epoch": 23.07, "learning_rate": 3.846490017480909e-05, "loss": 0.3172, "step": 501500 }, { "epoch": 23.09, "learning_rate": 3.8453399576777996e-05, "loss": 0.3179, "step": 502000 }, { "epoch": 23.12, "learning_rate": 3.84418989787469e-05, "loss": 0.3238, "step": 502500 }, { "epoch": 23.14, "learning_rate": 3.8430398380715796e-05, "loss": 0.3264, "step": 503000 }, { "epoch": 23.16, "learning_rate": 3.8418897782684706e-05, "loss": 0.3313, "step": 503500 }, { "epoch": 23.19, "learning_rate": 3.84073971846536e-05, "loss": 0.3388, "step": 504000 }, { "epoch": 23.21, "learning_rate": 3.8395896586622505e-05, "loss": 0.3275, "step": 504500 }, { "epoch": 23.23, "learning_rate": 3.838439598859141e-05, "loss": 0.3271, "step": 505000 }, { "epoch": 23.25, "learning_rate": 3.837289539056031e-05, "loss": 0.3395, "step": 505500 }, { "epoch": 23.28, "learning_rate": 3.8361394792529215e-05, "loss": 0.334, "step": 506000 }, { "epoch": 23.3, "learning_rate": 3.834989419449812e-05, "loss": 0.3323, "step": 506500 }, { "epoch": 23.32, "learning_rate": 3.833839359646702e-05, "loss": 0.3376, "step": 507000 }, { "epoch": 23.35, "learning_rate": 3.832689299843592e-05, "loss": 0.3393, "step": 507500 }, { "epoch": 23.37, "learning_rate": 3.831539240040482e-05, "loss": 0.3436, "step": 508000 }, { "epoch": 23.39, "learning_rate": 3.8303891802373724e-05, "loss": 0.3438, "step": 508500 }, { "epoch": 23.42, "learning_rate": 3.829239120434263e-05, "loss": 0.3466, "step": 509000 }, { "epoch": 23.44, "learning_rate": 3.828089060631153e-05, "loss": 0.3463, "step": 509500 }, { "epoch": 23.46, "learning_rate": 3.8269390008280434e-05, "loss": 0.3464, "step": 510000 }, { "epoch": 23.48, "learning_rate": 3.825788941024934e-05, "loss": 0.3501, "step": 510500 }, { "epoch": 23.51, "learning_rate": 3.8246388812218234e-05, "loss": 0.3485, "step": 511000 }, { "epoch": 23.53, "learning_rate": 3.8234888214187144e-05, "loss": 0.3474, "step": 511500 }, { "epoch": 23.55, "learning_rate": 3.822338761615604e-05, "loss": 0.3528, "step": 512000 }, { "epoch": 23.58, "learning_rate": 3.8211887018124943e-05, "loss": 0.3544, "step": 512500 }, { "epoch": 23.6, "learning_rate": 3.820038642009385e-05, "loss": 0.348, "step": 513000 }, { "epoch": 23.62, "learning_rate": 3.818888582206275e-05, "loss": 0.3555, "step": 513500 }, { "epoch": 23.65, "learning_rate": 3.817738522403165e-05, "loss": 0.3578, "step": 514000 }, { "epoch": 23.67, "learning_rate": 3.8165884626000556e-05, "loss": 0.3577, "step": 514500 }, { "epoch": 23.69, "learning_rate": 3.815438402796946e-05, "loss": 0.361, "step": 515000 }, { "epoch": 23.71, "learning_rate": 3.8142883429938356e-05, "loss": 0.3586, "step": 515500 }, { "epoch": 23.74, "learning_rate": 3.813138283190726e-05, "loss": 0.3608, "step": 516000 }, { "epoch": 23.76, "learning_rate": 3.811988223387616e-05, "loss": 0.3549, "step": 516500 }, { "epoch": 23.78, "learning_rate": 3.8108381635845066e-05, "loss": 0.3636, "step": 517000 }, { "epoch": 23.81, "learning_rate": 3.809688103781397e-05, "loss": 0.3633, "step": 517500 }, { "epoch": 23.83, "learning_rate": 3.808538043978287e-05, "loss": 0.362, "step": 518000 }, { "epoch": 23.85, "learning_rate": 3.8073879841751775e-05, "loss": 0.361, "step": 518500 }, { "epoch": 23.88, "learning_rate": 3.806237924372067e-05, "loss": 0.3661, "step": 519000 }, { "epoch": 23.9, "learning_rate": 3.8050878645689575e-05, "loss": 0.3683, "step": 519500 }, { "epoch": 23.92, "learning_rate": 3.803937804765848e-05, "loss": 0.3647, "step": 520000 }, { "epoch": 23.94, "learning_rate": 3.802787744962738e-05, "loss": 0.3705, "step": 520500 }, { "epoch": 23.97, "learning_rate": 3.8016376851596285e-05, "loss": 0.3618, "step": 521000 }, { "epoch": 23.99, "learning_rate": 3.800487625356519e-05, "loss": 0.3669, "step": 521500 }, { "epoch": 24.01, "learning_rate": 3.799337565553409e-05, "loss": 0.3309, "step": 522000 }, { "epoch": 24.04, "learning_rate": 3.7981875057502995e-05, "loss": 0.3107, "step": 522500 }, { "epoch": 24.06, "learning_rate": 3.79703744594719e-05, "loss": 0.3097, "step": 523000 }, { "epoch": 24.08, "learning_rate": 3.7958873861440794e-05, "loss": 0.3127, "step": 523500 }, { "epoch": 24.11, "learning_rate": 3.79473732634097e-05, "loss": 0.3194, "step": 524000 }, { "epoch": 24.13, "learning_rate": 3.79358726653786e-05, "loss": 0.3159, "step": 524500 }, { "epoch": 24.15, "learning_rate": 3.7924372067347504e-05, "loss": 0.3189, "step": 525000 }, { "epoch": 24.17, "learning_rate": 3.791287146931641e-05, "loss": 0.3205, "step": 525500 }, { "epoch": 24.2, "learning_rate": 3.790137087128531e-05, "loss": 0.3215, "step": 526000 }, { "epoch": 24.22, "learning_rate": 3.7889870273254214e-05, "loss": 0.3274, "step": 526500 }, { "epoch": 24.24, "learning_rate": 3.787836967522311e-05, "loss": 0.3258, "step": 527000 }, { "epoch": 24.27, "learning_rate": 3.786686907719201e-05, "loss": 0.3348, "step": 527500 }, { "epoch": 24.29, "learning_rate": 3.7855368479160917e-05, "loss": 0.3339, "step": 528000 }, { "epoch": 24.31, "learning_rate": 3.784386788112982e-05, "loss": 0.3295, "step": 528500 }, { "epoch": 24.34, "learning_rate": 3.783236728309872e-05, "loss": 0.3329, "step": 529000 }, { "epoch": 24.36, "learning_rate": 3.7820866685067626e-05, "loss": 0.3337, "step": 529500 }, { "epoch": 24.38, "learning_rate": 3.780936608703653e-05, "loss": 0.3361, "step": 530000 }, { "epoch": 24.4, "learning_rate": 3.779786548900543e-05, "loss": 0.3371, "step": 530500 }, { "epoch": 24.43, "learning_rate": 3.7786364890974336e-05, "loss": 0.3367, "step": 531000 }, { "epoch": 24.45, "learning_rate": 3.777486429294323e-05, "loss": 0.3368, "step": 531500 }, { "epoch": 24.47, "learning_rate": 3.7763363694912136e-05, "loss": 0.3372, "step": 532000 }, { "epoch": 24.5, "learning_rate": 3.775186309688104e-05, "loss": 0.3464, "step": 532500 }, { "epoch": 24.52, "learning_rate": 3.774036249884994e-05, "loss": 0.3426, "step": 533000 }, { "epoch": 24.54, "learning_rate": 3.7728861900818845e-05, "loss": 0.3426, "step": 533500 }, { "epoch": 24.57, "learning_rate": 3.771736130278775e-05, "loss": 0.3416, "step": 534000 }, { "epoch": 24.59, "learning_rate": 3.770586070475665e-05, "loss": 0.3434, "step": 534500 }, { "epoch": 24.61, "learning_rate": 3.769436010672555e-05, "loss": 0.3451, "step": 535000 }, { "epoch": 24.63, "learning_rate": 3.768285950869445e-05, "loss": 0.3467, "step": 535500 }, { "epoch": 24.66, "learning_rate": 3.7671358910663355e-05, "loss": 0.3408, "step": 536000 }, { "epoch": 24.68, "learning_rate": 3.765985831263226e-05, "loss": 0.3477, "step": 536500 }, { "epoch": 24.7, "learning_rate": 3.764835771460116e-05, "loss": 0.345, "step": 537000 }, { "epoch": 24.73, "learning_rate": 3.7636857116570064e-05, "loss": 0.3534, "step": 537500 }, { "epoch": 24.75, "learning_rate": 3.762535651853897e-05, "loss": 0.3542, "step": 538000 }, { "epoch": 24.77, "learning_rate": 3.761385592050787e-05, "loss": 0.3464, "step": 538500 }, { "epoch": 24.8, "learning_rate": 3.7602355322476774e-05, "loss": 0.3495, "step": 539000 }, { "epoch": 24.82, "learning_rate": 3.759085472444567e-05, "loss": 0.3553, "step": 539500 }, { "epoch": 24.84, "learning_rate": 3.7579354126414574e-05, "loss": 0.354, "step": 540000 }, { "epoch": 24.86, "learning_rate": 3.756785352838348e-05, "loss": 0.35, "step": 540500 }, { "epoch": 24.89, "learning_rate": 3.755635293035238e-05, "loss": 0.3574, "step": 541000 }, { "epoch": 24.91, "learning_rate": 3.7544852332321284e-05, "loss": 0.3586, "step": 541500 }, { "epoch": 24.93, "learning_rate": 3.753335173429019e-05, "loss": 0.3613, "step": 542000 }, { "epoch": 24.96, "learning_rate": 3.752185113625909e-05, "loss": 0.3607, "step": 542500 }, { "epoch": 24.98, "learning_rate": 3.7510350538227987e-05, "loss": 0.3595, "step": 543000 }, { "epoch": 25.0, "learning_rate": 3.749884994019689e-05, "loss": 0.3539, "step": 543500 }, { "epoch": 25.03, "learning_rate": 3.748734934216579e-05, "loss": 0.2951, "step": 544000 }, { "epoch": 25.05, "learning_rate": 3.7475848744134696e-05, "loss": 0.2983, "step": 544500 }, { "epoch": 25.07, "learning_rate": 3.74643481461036e-05, "loss": 0.3007, "step": 545000 }, { "epoch": 25.09, "learning_rate": 3.74528475480725e-05, "loss": 0.302, "step": 545500 }, { "epoch": 25.12, "learning_rate": 3.7441346950041406e-05, "loss": 0.3106, "step": 546000 }, { "epoch": 25.14, "learning_rate": 3.742984635201031e-05, "loss": 0.314, "step": 546500 }, { "epoch": 25.16, "learning_rate": 3.7418345753979206e-05, "loss": 0.3133, "step": 547000 }, { "epoch": 25.19, "learning_rate": 3.740684515594811e-05, "loss": 0.3177, "step": 547500 }, { "epoch": 25.21, "learning_rate": 3.739534455791701e-05, "loss": 0.3179, "step": 548000 }, { "epoch": 25.23, "learning_rate": 3.7383843959885915e-05, "loss": 0.316, "step": 548500 }, { "epoch": 25.26, "learning_rate": 3.737234336185482e-05, "loss": 0.3226, "step": 549000 }, { "epoch": 25.28, "learning_rate": 3.736084276382372e-05, "loss": 0.3234, "step": 549500 }, { "epoch": 25.3, "learning_rate": 3.7349342165792625e-05, "loss": 0.3159, "step": 550000 }, { "epoch": 25.32, "learning_rate": 3.733784156776153e-05, "loss": 0.323, "step": 550500 }, { "epoch": 25.35, "learning_rate": 3.7326340969730425e-05, "loss": 0.3229, "step": 551000 }, { "epoch": 25.37, "learning_rate": 3.731484037169933e-05, "loss": 0.3179, "step": 551500 }, { "epoch": 25.39, "learning_rate": 3.730333977366823e-05, "loss": 0.3269, "step": 552000 }, { "epoch": 25.42, "learning_rate": 3.7291839175637134e-05, "loss": 0.3351, "step": 552500 }, { "epoch": 25.44, "learning_rate": 3.728033857760604e-05, "loss": 0.329, "step": 553000 }, { "epoch": 25.46, "learning_rate": 3.726883797957494e-05, "loss": 0.3378, "step": 553500 }, { "epoch": 25.49, "learning_rate": 3.7257337381543844e-05, "loss": 0.336, "step": 554000 }, { "epoch": 25.51, "learning_rate": 3.724583678351275e-05, "loss": 0.3293, "step": 554500 }, { "epoch": 25.53, "learning_rate": 3.7234336185481644e-05, "loss": 0.334, "step": 555000 }, { "epoch": 25.55, "learning_rate": 3.722283558745055e-05, "loss": 0.3371, "step": 555500 }, { "epoch": 25.58, "learning_rate": 3.721133498941945e-05, "loss": 0.3348, "step": 556000 }, { "epoch": 25.6, "learning_rate": 3.7199834391388354e-05, "loss": 0.3353, "step": 556500 }, { "epoch": 25.62, "learning_rate": 3.718833379335726e-05, "loss": 0.3359, "step": 557000 }, { "epoch": 25.65, "learning_rate": 3.717683319532616e-05, "loss": 0.3415, "step": 557500 }, { "epoch": 25.67, "learning_rate": 3.716533259729506e-05, "loss": 0.3403, "step": 558000 }, { "epoch": 25.69, "learning_rate": 3.7153831999263966e-05, "loss": 0.3362, "step": 558500 }, { "epoch": 25.72, "learning_rate": 3.714233140123286e-05, "loss": 0.3409, "step": 559000 }, { "epoch": 25.74, "learning_rate": 3.7130830803201766e-05, "loss": 0.3385, "step": 559500 }, { "epoch": 25.76, "learning_rate": 3.711933020517067e-05, "loss": 0.3518, "step": 560000 }, { "epoch": 25.78, "learning_rate": 3.710782960713957e-05, "loss": 0.3451, "step": 560500 }, { "epoch": 25.81, "learning_rate": 3.7096329009108476e-05, "loss": 0.3441, "step": 561000 }, { "epoch": 25.83, "learning_rate": 3.708482841107738e-05, "loss": 0.3422, "step": 561500 }, { "epoch": 25.85, "learning_rate": 3.707332781304628e-05, "loss": 0.3432, "step": 562000 }, { "epoch": 25.88, "learning_rate": 3.7061827215015186e-05, "loss": 0.3451, "step": 562500 }, { "epoch": 25.9, "learning_rate": 3.705032661698408e-05, "loss": 0.3529, "step": 563000 }, { "epoch": 25.92, "learning_rate": 3.7038826018952985e-05, "loss": 0.3535, "step": 563500 }, { "epoch": 25.95, "learning_rate": 3.702732542092189e-05, "loss": 0.34, "step": 564000 }, { "epoch": 25.97, "learning_rate": 3.701582482289079e-05, "loss": 0.3475, "step": 564500 }, { "epoch": 25.99, "learning_rate": 3.7004324224859695e-05, "loss": 0.3555, "step": 565000 }, { "epoch": 26.01, "learning_rate": 3.69928236268286e-05, "loss": 0.3113, "step": 565500 }, { "epoch": 26.04, "learning_rate": 3.69813230287975e-05, "loss": 0.2942, "step": 566000 }, { "epoch": 26.06, "learning_rate": 3.6969822430766405e-05, "loss": 0.3002, "step": 566500 }, { "epoch": 26.08, "learning_rate": 3.69583218327353e-05, "loss": 0.3018, "step": 567000 }, { "epoch": 26.11, "learning_rate": 3.6946821234704204e-05, "loss": 0.3027, "step": 567500 }, { "epoch": 26.13, "learning_rate": 3.693532063667311e-05, "loss": 0.3085, "step": 568000 }, { "epoch": 26.15, "learning_rate": 3.692382003864201e-05, "loss": 0.3047, "step": 568500 }, { "epoch": 26.18, "learning_rate": 3.6912319440610914e-05, "loss": 0.3039, "step": 569000 }, { "epoch": 26.2, "learning_rate": 3.690081884257982e-05, "loss": 0.3056, "step": 569500 }, { "epoch": 26.22, "learning_rate": 3.688931824454872e-05, "loss": 0.3072, "step": 570000 }, { "epoch": 26.24, "learning_rate": 3.6877817646517624e-05, "loss": 0.3143, "step": 570500 }, { "epoch": 26.27, "learning_rate": 3.686631704848652e-05, "loss": 0.3172, "step": 571000 }, { "epoch": 26.29, "learning_rate": 3.6854816450455423e-05, "loss": 0.3147, "step": 571500 }, { "epoch": 26.31, "learning_rate": 3.684331585242433e-05, "loss": 0.3167, "step": 572000 }, { "epoch": 26.34, "learning_rate": 3.683181525439323e-05, "loss": 0.3187, "step": 572500 }, { "epoch": 26.36, "learning_rate": 3.682031465636213e-05, "loss": 0.318, "step": 573000 }, { "epoch": 26.38, "learning_rate": 3.6808814058331036e-05, "loss": 0.3199, "step": 573500 }, { "epoch": 26.41, "learning_rate": 3.679731346029994e-05, "loss": 0.3162, "step": 574000 }, { "epoch": 26.43, "learning_rate": 3.678581286226884e-05, "loss": 0.3265, "step": 574500 }, { "epoch": 26.45, "learning_rate": 3.677431226423774e-05, "loss": 0.327, "step": 575000 }, { "epoch": 26.47, "learning_rate": 3.676281166620664e-05, "loss": 0.3247, "step": 575500 }, { "epoch": 26.5, "learning_rate": 3.6751311068175546e-05, "loss": 0.3285, "step": 576000 }, { "epoch": 26.52, "learning_rate": 3.673981047014445e-05, "loss": 0.3284, "step": 576500 }, { "epoch": 26.54, "learning_rate": 3.672830987211335e-05, "loss": 0.3313, "step": 577000 }, { "epoch": 26.57, "learning_rate": 3.6716809274082255e-05, "loss": 0.3288, "step": 577500 }, { "epoch": 26.59, "learning_rate": 3.670530867605116e-05, "loss": 0.3286, "step": 578000 }, { "epoch": 26.61, "learning_rate": 3.669380807802006e-05, "loss": 0.3298, "step": 578500 }, { "epoch": 26.64, "learning_rate": 3.668230747998896e-05, "loss": 0.3266, "step": 579000 }, { "epoch": 26.66, "learning_rate": 3.667080688195786e-05, "loss": 0.3239, "step": 579500 }, { "epoch": 26.68, "learning_rate": 3.6659306283926765e-05, "loss": 0.3289, "step": 580000 }, { "epoch": 26.7, "learning_rate": 3.664780568589567e-05, "loss": 0.333, "step": 580500 }, { "epoch": 26.73, "learning_rate": 3.663630508786457e-05, "loss": 0.3402, "step": 581000 }, { "epoch": 26.75, "learning_rate": 3.6624804489833475e-05, "loss": 0.3383, "step": 581500 }, { "epoch": 26.77, "learning_rate": 3.661330389180238e-05, "loss": 0.3309, "step": 582000 }, { "epoch": 26.8, "learning_rate": 3.6601803293771274e-05, "loss": 0.3416, "step": 582500 }, { "epoch": 26.82, "learning_rate": 3.659030269574018e-05, "loss": 0.3319, "step": 583000 }, { "epoch": 26.84, "learning_rate": 3.657880209770908e-05, "loss": 0.3371, "step": 583500 }, { "epoch": 26.87, "learning_rate": 3.6567301499677984e-05, "loss": 0.3364, "step": 584000 }, { "epoch": 26.89, "learning_rate": 3.655580090164689e-05, "loss": 0.337, "step": 584500 }, { "epoch": 26.91, "learning_rate": 3.654430030361579e-05, "loss": 0.3421, "step": 585000 }, { "epoch": 26.93, "learning_rate": 3.6532799705584694e-05, "loss": 0.3393, "step": 585500 }, { "epoch": 26.96, "learning_rate": 3.65212991075536e-05, "loss": 0.3427, "step": 586000 }, { "epoch": 26.98, "learning_rate": 3.65097985095225e-05, "loss": 0.344, "step": 586500 }, { "epoch": 27.0, "learning_rate": 3.6498297911491397e-05, "loss": 0.3347, "step": 587000 }, { "epoch": 27.03, "learning_rate": 3.64867973134603e-05, "loss": 0.2875, "step": 587500 }, { "epoch": 27.05, "learning_rate": 3.64752967154292e-05, "loss": 0.2967, "step": 588000 }, { "epoch": 27.07, "learning_rate": 3.6463796117398106e-05, "loss": 0.2988, "step": 588500 }, { "epoch": 27.1, "learning_rate": 3.645229551936701e-05, "loss": 0.2982, "step": 589000 }, { "epoch": 27.12, "learning_rate": 3.644079492133591e-05, "loss": 0.2919, "step": 589500 }, { "epoch": 27.14, "learning_rate": 3.6429294323304816e-05, "loss": 0.3038, "step": 590000 }, { "epoch": 27.16, "learning_rate": 3.641779372527371e-05, "loss": 0.3073, "step": 590500 }, { "epoch": 27.19, "learning_rate": 3.6406293127242616e-05, "loss": 0.3042, "step": 591000 }, { "epoch": 27.21, "learning_rate": 3.639479252921152e-05, "loss": 0.3045, "step": 591500 }, { "epoch": 27.23, "learning_rate": 3.638329193118042e-05, "loss": 0.3089, "step": 592000 }, { "epoch": 27.26, "learning_rate": 3.6371791333149325e-05, "loss": 0.3066, "step": 592500 }, { "epoch": 27.28, "learning_rate": 3.636029073511823e-05, "loss": 0.3065, "step": 593000 }, { "epoch": 27.3, "learning_rate": 3.634879013708713e-05, "loss": 0.3046, "step": 593500 }, { "epoch": 27.33, "learning_rate": 3.6337289539056035e-05, "loss": 0.3115, "step": 594000 }, { "epoch": 27.35, "learning_rate": 3.632578894102494e-05, "loss": 0.3102, "step": 594500 }, { "epoch": 27.37, "learning_rate": 3.6314288342993835e-05, "loss": 0.3085, "step": 595000 }, { "epoch": 27.39, "learning_rate": 3.630278774496274e-05, "loss": 0.3183, "step": 595500 }, { "epoch": 27.42, "learning_rate": 3.629128714693164e-05, "loss": 0.3137, "step": 596000 }, { "epoch": 27.44, "learning_rate": 3.6279786548900544e-05, "loss": 0.3156, "step": 596500 }, { "epoch": 27.46, "learning_rate": 3.626828595086945e-05, "loss": 0.3155, "step": 597000 }, { "epoch": 27.49, "learning_rate": 3.625678535283835e-05, "loss": 0.3235, "step": 597500 }, { "epoch": 27.51, "learning_rate": 3.6245284754807254e-05, "loss": 0.3178, "step": 598000 }, { "epoch": 27.53, "learning_rate": 3.623378415677615e-05, "loss": 0.3185, "step": 598500 }, { "epoch": 27.56, "learning_rate": 3.6222283558745054e-05, "loss": 0.3211, "step": 599000 }, { "epoch": 27.58, "learning_rate": 3.621078296071396e-05, "loss": 0.3227, "step": 599500 }, { "epoch": 27.6, "learning_rate": 3.619928236268286e-05, "loss": 0.324, "step": 600000 }, { "epoch": 27.62, "learning_rate": 3.6187781764651764e-05, "loss": 0.3233, "step": 600500 }, { "epoch": 27.65, "learning_rate": 3.617628116662067e-05, "loss": 0.3242, "step": 601000 }, { "epoch": 27.67, "learning_rate": 3.616478056858957e-05, "loss": 0.3205, "step": 601500 }, { "epoch": 27.69, "learning_rate": 3.615327997055847e-05, "loss": 0.3262, "step": 602000 }, { "epoch": 27.72, "learning_rate": 3.6141779372527377e-05, "loss": 0.323, "step": 602500 }, { "epoch": 27.74, "learning_rate": 3.613027877449627e-05, "loss": 0.329, "step": 603000 }, { "epoch": 27.76, "learning_rate": 3.6118778176465176e-05, "loss": 0.33, "step": 603500 }, { "epoch": 27.79, "learning_rate": 3.610727757843408e-05, "loss": 0.3292, "step": 604000 }, { "epoch": 27.81, "learning_rate": 3.609577698040298e-05, "loss": 0.328, "step": 604500 }, { "epoch": 27.83, "learning_rate": 3.6084276382371886e-05, "loss": 0.3307, "step": 605000 }, { "epoch": 27.85, "learning_rate": 3.607277578434079e-05, "loss": 0.3285, "step": 605500 }, { "epoch": 27.88, "learning_rate": 3.606127518630969e-05, "loss": 0.3336, "step": 606000 }, { "epoch": 27.9, "learning_rate": 3.604977458827859e-05, "loss": 0.3282, "step": 606500 }, { "epoch": 27.92, "learning_rate": 3.603827399024749e-05, "loss": 0.3324, "step": 607000 }, { "epoch": 27.95, "learning_rate": 3.6026773392216395e-05, "loss": 0.3402, "step": 607500 }, { "epoch": 27.97, "learning_rate": 3.60152727941853e-05, "loss": 0.3332, "step": 608000 }, { "epoch": 27.99, "learning_rate": 3.60037721961542e-05, "loss": 0.3364, "step": 608500 }, { "epoch": 28.02, "learning_rate": 3.5992271598123105e-05, "loss": 0.3058, "step": 609000 }, { "epoch": 28.04, "learning_rate": 3.598077100009201e-05, "loss": 0.2838, "step": 609500 }, { "epoch": 28.06, "learning_rate": 3.5969270402060905e-05, "loss": 0.2906, "step": 610000 }, { "epoch": 28.08, "learning_rate": 3.5957769804029815e-05, "loss": 0.2946, "step": 610500 }, { "epoch": 28.11, "learning_rate": 3.594626920599871e-05, "loss": 0.2901, "step": 611000 }, { "epoch": 28.13, "learning_rate": 3.5934768607967614e-05, "loss": 0.2966, "step": 611500 }, { "epoch": 28.15, "learning_rate": 3.592326800993652e-05, "loss": 0.2936, "step": 612000 }, { "epoch": 28.18, "learning_rate": 3.591176741190542e-05, "loss": 0.2934, "step": 612500 }, { "epoch": 28.2, "learning_rate": 3.5900266813874324e-05, "loss": 0.2962, "step": 613000 }, { "epoch": 28.22, "learning_rate": 3.588876621584323e-05, "loss": 0.2936, "step": 613500 }, { "epoch": 28.25, "learning_rate": 3.587726561781213e-05, "loss": 0.2988, "step": 614000 }, { "epoch": 28.27, "learning_rate": 3.586576501978103e-05, "loss": 0.3013, "step": 614500 }, { "epoch": 28.29, "learning_rate": 3.585426442174993e-05, "loss": 0.3016, "step": 615000 }, { "epoch": 28.31, "learning_rate": 3.5842763823718834e-05, "loss": 0.3035, "step": 615500 }, { "epoch": 28.34, "learning_rate": 3.583126322568774e-05, "loss": 0.3061, "step": 616000 }, { "epoch": 28.36, "learning_rate": 3.581976262765664e-05, "loss": 0.3097, "step": 616500 }, { "epoch": 28.38, "learning_rate": 3.580826202962554e-05, "loss": 0.309, "step": 617000 }, { "epoch": 28.41, "learning_rate": 3.5796761431594446e-05, "loss": 0.3077, "step": 617500 }, { "epoch": 28.43, "learning_rate": 3.578526083356334e-05, "loss": 0.3158, "step": 618000 }, { "epoch": 28.45, "learning_rate": 3.577376023553225e-05, "loss": 0.3043, "step": 618500 }, { "epoch": 28.48, "learning_rate": 3.576225963750115e-05, "loss": 0.3096, "step": 619000 }, { "epoch": 28.5, "learning_rate": 3.575075903947005e-05, "loss": 0.3075, "step": 619500 }, { "epoch": 28.52, "learning_rate": 3.5739258441438956e-05, "loss": 0.3156, "step": 620000 }, { "epoch": 28.54, "learning_rate": 3.572775784340786e-05, "loss": 0.315, "step": 620500 }, { "epoch": 28.57, "learning_rate": 3.571625724537676e-05, "loss": 0.3222, "step": 621000 }, { "epoch": 28.59, "learning_rate": 3.5704756647345666e-05, "loss": 0.3217, "step": 621500 }, { "epoch": 28.61, "learning_rate": 3.569325604931457e-05, "loss": 0.3137, "step": 622000 }, { "epoch": 28.64, "learning_rate": 3.5681755451283465e-05, "loss": 0.3207, "step": 622500 }, { "epoch": 28.66, "learning_rate": 3.567025485325237e-05, "loss": 0.3149, "step": 623000 }, { "epoch": 28.68, "learning_rate": 3.565875425522127e-05, "loss": 0.3247, "step": 623500 }, { "epoch": 28.71, "learning_rate": 3.5647253657190175e-05, "loss": 0.32, "step": 624000 }, { "epoch": 28.73, "learning_rate": 3.563575305915908e-05, "loss": 0.3142, "step": 624500 }, { "epoch": 28.75, "learning_rate": 3.562425246112798e-05, "loss": 0.3209, "step": 625000 }, { "epoch": 28.77, "learning_rate": 3.5612751863096885e-05, "loss": 0.3217, "step": 625500 }, { "epoch": 28.8, "learning_rate": 3.560125126506578e-05, "loss": 0.3294, "step": 626000 }, { "epoch": 28.82, "learning_rate": 3.558975066703469e-05, "loss": 0.3246, "step": 626500 }, { "epoch": 28.84, "learning_rate": 3.557825006900359e-05, "loss": 0.3213, "step": 627000 }, { "epoch": 28.87, "learning_rate": 3.556674947097249e-05, "loss": 0.3211, "step": 627500 }, { "epoch": 28.89, "learning_rate": 3.5555248872941394e-05, "loss": 0.3252, "step": 628000 }, { "epoch": 28.91, "learning_rate": 3.55437482749103e-05, "loss": 0.3259, "step": 628500 }, { "epoch": 28.94, "learning_rate": 3.55322476768792e-05, "loss": 0.3229, "step": 629000 }, { "epoch": 28.96, "learning_rate": 3.5520747078848104e-05, "loss": 0.3276, "step": 629500 }, { "epoch": 28.98, "learning_rate": 3.550924648081701e-05, "loss": 0.3272, "step": 630000 }, { "epoch": 29.0, "learning_rate": 3.5497745882785903e-05, "loss": 0.3164, "step": 630500 }, { "epoch": 29.03, "learning_rate": 3.548624528475481e-05, "loss": 0.2807, "step": 631000 }, { "epoch": 29.05, "learning_rate": 3.547474468672371e-05, "loss": 0.2816, "step": 631500 }, { "epoch": 29.07, "learning_rate": 3.546324408869261e-05, "loss": 0.2811, "step": 632000 }, { "epoch": 29.1, "learning_rate": 3.5451743490661516e-05, "loss": 0.2853, "step": 632500 }, { "epoch": 29.12, "learning_rate": 3.544024289263042e-05, "loss": 0.2878, "step": 633000 }, { "epoch": 29.14, "learning_rate": 3.542874229459932e-05, "loss": 0.296, "step": 633500 }, { "epoch": 29.17, "learning_rate": 3.541724169656822e-05, "loss": 0.2915, "step": 634000 }, { "epoch": 29.19, "learning_rate": 3.540574109853713e-05, "loss": 0.2941, "step": 634500 }, { "epoch": 29.21, "learning_rate": 3.5394240500506026e-05, "loss": 0.2916, "step": 635000 }, { "epoch": 29.23, "learning_rate": 3.538273990247493e-05, "loss": 0.295, "step": 635500 }, { "epoch": 29.26, "learning_rate": 3.537123930444383e-05, "loss": 0.2987, "step": 636000 }, { "epoch": 29.28, "learning_rate": 3.5359738706412735e-05, "loss": 0.2939, "step": 636500 }, { "epoch": 29.3, "learning_rate": 3.534823810838164e-05, "loss": 0.2991, "step": 637000 }, { "epoch": 29.33, "learning_rate": 3.533673751035054e-05, "loss": 0.2993, "step": 637500 }, { "epoch": 29.35, "learning_rate": 3.5325236912319445e-05, "loss": 0.2972, "step": 638000 }, { "epoch": 29.37, "learning_rate": 3.531373631428834e-05, "loss": 0.3002, "step": 638500 }, { "epoch": 29.4, "learning_rate": 3.5302235716257245e-05, "loss": 0.3058, "step": 639000 }, { "epoch": 29.42, "learning_rate": 3.529073511822615e-05, "loss": 0.3088, "step": 639500 }, { "epoch": 29.44, "learning_rate": 3.527923452019505e-05, "loss": 0.302, "step": 640000 }, { "epoch": 29.46, "learning_rate": 3.5267733922163955e-05, "loss": 0.3033, "step": 640500 }, { "epoch": 29.49, "learning_rate": 3.525623332413286e-05, "loss": 0.3104, "step": 641000 }, { "epoch": 29.51, "learning_rate": 3.524473272610176e-05, "loss": 0.3074, "step": 641500 }, { "epoch": 29.53, "learning_rate": 3.523323212807066e-05, "loss": 0.3031, "step": 642000 }, { "epoch": 29.56, "learning_rate": 3.522173153003957e-05, "loss": 0.3053, "step": 642500 }, { "epoch": 29.58, "learning_rate": 3.5210230932008464e-05, "loss": 0.3058, "step": 643000 }, { "epoch": 29.6, "learning_rate": 3.519873033397737e-05, "loss": 0.3148, "step": 643500 }, { "epoch": 29.63, "learning_rate": 3.518722973594627e-05, "loss": 0.3115, "step": 644000 }, { "epoch": 29.65, "learning_rate": 3.5175729137915174e-05, "loss": 0.318, "step": 644500 }, { "epoch": 29.67, "learning_rate": 3.516422853988408e-05, "loss": 0.3136, "step": 645000 }, { "epoch": 29.69, "learning_rate": 3.515272794185297e-05, "loss": 0.3168, "step": 645500 }, { "epoch": 29.72, "learning_rate": 3.514122734382188e-05, "loss": 0.3113, "step": 646000 }, { "epoch": 29.74, "learning_rate": 3.512972674579078e-05, "loss": 0.3172, "step": 646500 }, { "epoch": 29.76, "learning_rate": 3.511822614775968e-05, "loss": 0.3111, "step": 647000 }, { "epoch": 29.79, "learning_rate": 3.5106725549728586e-05, "loss": 0.315, "step": 647500 }, { "epoch": 29.81, "learning_rate": 3.509522495169749e-05, "loss": 0.3197, "step": 648000 }, { "epoch": 29.83, "learning_rate": 3.508372435366639e-05, "loss": 0.315, "step": 648500 }, { "epoch": 29.86, "learning_rate": 3.5072223755635296e-05, "loss": 0.3143, "step": 649000 }, { "epoch": 29.88, "learning_rate": 3.50607231576042e-05, "loss": 0.3228, "step": 649500 }, { "epoch": 29.9, "learning_rate": 3.5049222559573096e-05, "loss": 0.3224, "step": 650000 }, { "epoch": 29.92, "learning_rate": 3.5037721961542006e-05, "loss": 0.3225, "step": 650500 }, { "epoch": 29.95, "learning_rate": 3.50262213635109e-05, "loss": 0.3197, "step": 651000 }, { "epoch": 29.97, "learning_rate": 3.5014720765479805e-05, "loss": 0.3181, "step": 651500 }, { "epoch": 29.99, "learning_rate": 3.500322016744871e-05, "loss": 0.3192, "step": 652000 }, { "epoch": 30.02, "learning_rate": 3.499171956941761e-05, "loss": 0.2897, "step": 652500 }, { "epoch": 30.04, "learning_rate": 3.4980218971386515e-05, "loss": 0.2765, "step": 653000 }, { "epoch": 30.06, "learning_rate": 3.496871837335541e-05, "loss": 0.2736, "step": 653500 }, { "epoch": 30.09, "learning_rate": 3.495721777532432e-05, "loss": 0.2783, "step": 654000 }, { "epoch": 30.11, "learning_rate": 3.494571717729322e-05, "loss": 0.2821, "step": 654500 }, { "epoch": 30.13, "learning_rate": 3.493421657926212e-05, "loss": 0.2845, "step": 655000 }, { "epoch": 30.15, "learning_rate": 3.4922715981231025e-05, "loss": 0.2864, "step": 655500 }, { "epoch": 30.18, "learning_rate": 3.491121538319993e-05, "loss": 0.2852, "step": 656000 }, { "epoch": 30.2, "learning_rate": 3.489971478516883e-05, "loss": 0.2863, "step": 656500 }, { "epoch": 30.22, "learning_rate": 3.4888214187137734e-05, "loss": 0.2931, "step": 657000 }, { "epoch": 30.25, "learning_rate": 3.487671358910664e-05, "loss": 0.2921, "step": 657500 }, { "epoch": 30.27, "learning_rate": 3.4865212991075534e-05, "loss": 0.2944, "step": 658000 }, { "epoch": 30.29, "learning_rate": 3.4853712393044444e-05, "loss": 0.2971, "step": 658500 }, { "epoch": 30.32, "learning_rate": 3.484221179501334e-05, "loss": 0.2976, "step": 659000 }, { "epoch": 30.34, "learning_rate": 3.4830711196982244e-05, "loss": 0.2994, "step": 659500 }, { "epoch": 30.36, "learning_rate": 3.481921059895115e-05, "loss": 0.2985, "step": 660000 }, { "epoch": 30.38, "learning_rate": 3.480771000092005e-05, "loss": 0.3012, "step": 660500 }, { "epoch": 30.41, "learning_rate": 3.479620940288895e-05, "loss": 0.3034, "step": 661000 }, { "epoch": 30.43, "learning_rate": 3.478470880485785e-05, "loss": 0.3022, "step": 661500 }, { "epoch": 30.45, "learning_rate": 3.477320820682676e-05, "loss": 0.3031, "step": 662000 }, { "epoch": 30.48, "learning_rate": 3.4761707608795656e-05, "loss": 0.2977, "step": 662500 }, { "epoch": 30.5, "learning_rate": 3.475020701076456e-05, "loss": 0.3006, "step": 663000 }, { "epoch": 30.52, "learning_rate": 3.473870641273346e-05, "loss": 0.3104, "step": 663500 }, { "epoch": 30.55, "learning_rate": 3.4727205814702366e-05, "loss": 0.3051, "step": 664000 }, { "epoch": 30.57, "learning_rate": 3.471570521667127e-05, "loss": 0.3029, "step": 664500 }, { "epoch": 30.59, "learning_rate": 3.470420461864017e-05, "loss": 0.303, "step": 665000 }, { "epoch": 30.61, "learning_rate": 3.4692704020609076e-05, "loss": 0.3038, "step": 665500 }, { "epoch": 30.64, "learning_rate": 3.468120342257797e-05, "loss": 0.3026, "step": 666000 }, { "epoch": 30.66, "learning_rate": 3.466970282454688e-05, "loss": 0.3005, "step": 666500 }, { "epoch": 30.68, "learning_rate": 3.465820222651578e-05, "loss": 0.3077, "step": 667000 }, { "epoch": 30.71, "learning_rate": 3.464670162848468e-05, "loss": 0.3084, "step": 667500 }, { "epoch": 30.73, "learning_rate": 3.4635201030453585e-05, "loss": 0.3076, "step": 668000 }, { "epoch": 30.75, "learning_rate": 3.462370043242249e-05, "loss": 0.311, "step": 668500 }, { "epoch": 30.78, "learning_rate": 3.461219983439139e-05, "loss": 0.3028, "step": 669000 }, { "epoch": 30.8, "learning_rate": 3.460069923636029e-05, "loss": 0.311, "step": 669500 }, { "epoch": 30.82, "learning_rate": 3.45891986383292e-05, "loss": 0.3065, "step": 670000 }, { "epoch": 30.84, "learning_rate": 3.4577698040298094e-05, "loss": 0.305, "step": 670500 }, { "epoch": 30.87, "learning_rate": 3.4566197442267e-05, "loss": 0.3068, "step": 671000 }, { "epoch": 30.89, "learning_rate": 3.45546968442359e-05, "loss": 0.3138, "step": 671500 }, { "epoch": 30.91, "learning_rate": 3.4543196246204804e-05, "loss": 0.3216, "step": 672000 }, { "epoch": 30.94, "learning_rate": 3.453169564817371e-05, "loss": 0.3107, "step": 672500 }, { "epoch": 30.96, "learning_rate": 3.4520195050142604e-05, "loss": 0.3123, "step": 673000 }, { "epoch": 30.98, "learning_rate": 3.4508694452111514e-05, "loss": 0.315, "step": 673500 }, { "epoch": 31.01, "learning_rate": 3.449719385408041e-05, "loss": 0.3022, "step": 674000 }, { "epoch": 31.03, "learning_rate": 3.448569325604932e-05, "loss": 0.271, "step": 674500 }, { "epoch": 31.05, "learning_rate": 3.447419265801822e-05, "loss": 0.2733, "step": 675000 }, { "epoch": 31.07, "learning_rate": 3.446269205998712e-05, "loss": 0.2753, "step": 675500 }, { "epoch": 31.1, "learning_rate": 3.445119146195602e-05, "loss": 0.2798, "step": 676000 }, { "epoch": 31.12, "learning_rate": 3.4439690863924926e-05, "loss": 0.278, "step": 676500 }, { "epoch": 31.14, "learning_rate": 3.442819026589383e-05, "loss": 0.278, "step": 677000 }, { "epoch": 31.17, "learning_rate": 3.4416689667862726e-05, "loss": 0.284, "step": 677500 }, { "epoch": 31.19, "learning_rate": 3.4405189069831636e-05, "loss": 0.2859, "step": 678000 }, { "epoch": 31.21, "learning_rate": 3.439368847180053e-05, "loss": 0.2871, "step": 678500 }, { "epoch": 31.24, "learning_rate": 3.4382187873769436e-05, "loss": 0.2893, "step": 679000 }, { "epoch": 31.26, "learning_rate": 3.437068727573834e-05, "loss": 0.2864, "step": 679500 }, { "epoch": 31.28, "learning_rate": 3.435918667770724e-05, "loss": 0.2894, "step": 680000 }, { "epoch": 31.3, "learning_rate": 3.4347686079676146e-05, "loss": 0.2891, "step": 680500 }, { "epoch": 31.33, "learning_rate": 3.433618548164504e-05, "loss": 0.2884, "step": 681000 }, { "epoch": 31.35, "learning_rate": 3.432468488361395e-05, "loss": 0.2895, "step": 681500 }, { "epoch": 31.37, "learning_rate": 3.431318428558285e-05, "loss": 0.2922, "step": 682000 }, { "epoch": 31.4, "learning_rate": 3.430168368755176e-05, "loss": 0.2988, "step": 682500 }, { "epoch": 31.42, "learning_rate": 3.4290183089520655e-05, "loss": 0.2926, "step": 683000 }, { "epoch": 31.44, "learning_rate": 3.427868249148956e-05, "loss": 0.292, "step": 683500 }, { "epoch": 31.47, "learning_rate": 3.426718189345846e-05, "loss": 0.297, "step": 684000 }, { "epoch": 31.49, "learning_rate": 3.4255681295427365e-05, "loss": 0.2934, "step": 684500 }, { "epoch": 31.51, "learning_rate": 3.424418069739627e-05, "loss": 0.2978, "step": 685000 }, { "epoch": 31.53, "learning_rate": 3.4232680099365164e-05, "loss": 0.3008, "step": 685500 }, { "epoch": 31.56, "learning_rate": 3.4221179501334074e-05, "loss": 0.2977, "step": 686000 }, { "epoch": 31.58, "learning_rate": 3.420967890330297e-05, "loss": 0.3012, "step": 686500 }, { "epoch": 31.6, "learning_rate": 3.4198178305271874e-05, "loss": 0.2913, "step": 687000 }, { "epoch": 31.63, "learning_rate": 3.418667770724078e-05, "loss": 0.3016, "step": 687500 }, { "epoch": 31.65, "learning_rate": 3.417517710920968e-05, "loss": 0.3005, "step": 688000 }, { "epoch": 31.67, "learning_rate": 3.4163676511178584e-05, "loss": 0.304, "step": 688500 }, { "epoch": 31.7, "learning_rate": 3.415217591314748e-05, "loss": 0.2996, "step": 689000 }, { "epoch": 31.72, "learning_rate": 3.414067531511639e-05, "loss": 0.2986, "step": 689500 }, { "epoch": 31.74, "learning_rate": 3.412917471708529e-05, "loss": 0.3001, "step": 690000 }, { "epoch": 31.76, "learning_rate": 3.41176741190542e-05, "loss": 0.3065, "step": 690500 }, { "epoch": 31.79, "learning_rate": 3.410617352102309e-05, "loss": 0.3104, "step": 691000 }, { "epoch": 31.81, "learning_rate": 3.4094672922991996e-05, "loss": 0.312, "step": 691500 }, { "epoch": 31.83, "learning_rate": 3.40831723249609e-05, "loss": 0.3061, "step": 692000 }, { "epoch": 31.86, "learning_rate": 3.40716717269298e-05, "loss": 0.3079, "step": 692500 }, { "epoch": 31.88, "learning_rate": 3.4060171128898706e-05, "loss": 0.3048, "step": 693000 }, { "epoch": 31.9, "learning_rate": 3.40486705308676e-05, "loss": 0.3096, "step": 693500 }, { "epoch": 31.93, "learning_rate": 3.403716993283651e-05, "loss": 0.3124, "step": 694000 }, { "epoch": 31.95, "learning_rate": 3.402566933480541e-05, "loss": 0.3096, "step": 694500 }, { "epoch": 31.97, "learning_rate": 3.401416873677431e-05, "loss": 0.303, "step": 695000 }, { "epoch": 31.99, "learning_rate": 3.4002668138743215e-05, "loss": 0.3128, "step": 695500 }, { "epoch": 32.02, "learning_rate": 3.399116754071212e-05, "loss": 0.2773, "step": 696000 }, { "epoch": 32.04, "learning_rate": 3.397966694268102e-05, "loss": 0.2705, "step": 696500 }, { "epoch": 32.06, "learning_rate": 3.396816634464992e-05, "loss": 0.2732, "step": 697000 }, { "epoch": 32.09, "learning_rate": 3.395666574661883e-05, "loss": 0.2722, "step": 697500 }, { "epoch": 32.11, "learning_rate": 3.3945165148587725e-05, "loss": 0.2792, "step": 698000 }, { "epoch": 32.13, "learning_rate": 3.3933664550556635e-05, "loss": 0.281, "step": 698500 }, { "epoch": 32.16, "learning_rate": 3.392216395252553e-05, "loss": 0.2785, "step": 699000 }, { "epoch": 32.18, "learning_rate": 3.3910663354494435e-05, "loss": 0.2786, "step": 699500 }, { "epoch": 32.2, "learning_rate": 3.389916275646334e-05, "loss": 0.2843, "step": 700000 }, { "epoch": 32.22, "learning_rate": 3.3887662158432234e-05, "loss": 0.2792, "step": 700500 }, { "epoch": 32.25, "learning_rate": 3.3876161560401144e-05, "loss": 0.279, "step": 701000 }, { "epoch": 32.27, "learning_rate": 3.386466096237004e-05, "loss": 0.2844, "step": 701500 }, { "epoch": 32.29, "learning_rate": 3.385316036433895e-05, "loss": 0.2893, "step": 702000 }, { "epoch": 32.32, "learning_rate": 3.384165976630785e-05, "loss": 0.2851, "step": 702500 }, { "epoch": 32.34, "learning_rate": 3.383015916827675e-05, "loss": 0.2796, "step": 703000 }, { "epoch": 32.36, "learning_rate": 3.3818658570245654e-05, "loss": 0.285, "step": 703500 }, { "epoch": 32.39, "learning_rate": 3.380715797221456e-05, "loss": 0.2872, "step": 704000 }, { "epoch": 32.41, "learning_rate": 3.379565737418346e-05, "loss": 0.2888, "step": 704500 }, { "epoch": 32.43, "learning_rate": 3.3784156776152357e-05, "loss": 0.2928, "step": 705000 }, { "epoch": 32.45, "learning_rate": 3.377265617812127e-05, "loss": 0.2889, "step": 705500 }, { "epoch": 32.48, "learning_rate": 3.376115558009016e-05, "loss": 0.2908, "step": 706000 }, { "epoch": 32.5, "learning_rate": 3.374965498205907e-05, "loss": 0.2897, "step": 706500 }, { "epoch": 32.52, "learning_rate": 3.373815438402797e-05, "loss": 0.295, "step": 707000 }, { "epoch": 32.55, "learning_rate": 3.372665378599687e-05, "loss": 0.2943, "step": 707500 }, { "epoch": 32.57, "learning_rate": 3.3715153187965776e-05, "loss": 0.2896, "step": 708000 }, { "epoch": 32.59, "learning_rate": 3.370365258993467e-05, "loss": 0.2932, "step": 708500 }, { "epoch": 32.62, "learning_rate": 3.369215199190358e-05, "loss": 0.297, "step": 709000 }, { "epoch": 32.64, "learning_rate": 3.368065139387248e-05, "loss": 0.2979, "step": 709500 }, { "epoch": 32.66, "learning_rate": 3.366915079584139e-05, "loss": 0.2957, "step": 710000 }, { "epoch": 32.68, "learning_rate": 3.3657650197810285e-05, "loss": 0.2947, "step": 710500 }, { "epoch": 32.71, "learning_rate": 3.364614959977919e-05, "loss": 0.2974, "step": 711000 }, { "epoch": 32.73, "learning_rate": 3.363464900174809e-05, "loss": 0.2989, "step": 711500 }, { "epoch": 32.75, "learning_rate": 3.3623148403716995e-05, "loss": 0.2996, "step": 712000 }, { "epoch": 32.78, "learning_rate": 3.36116478056859e-05, "loss": 0.2972, "step": 712500 }, { "epoch": 32.8, "learning_rate": 3.3600147207654795e-05, "loss": 0.298, "step": 713000 }, { "epoch": 32.82, "learning_rate": 3.3588646609623705e-05, "loss": 0.3025, "step": 713500 }, { "epoch": 32.85, "learning_rate": 3.35771460115926e-05, "loss": 0.3022, "step": 714000 }, { "epoch": 32.87, "learning_rate": 3.356564541356151e-05, "loss": 0.3005, "step": 714500 }, { "epoch": 32.89, "learning_rate": 3.355414481553041e-05, "loss": 0.303, "step": 715000 }, { "epoch": 32.91, "learning_rate": 3.354264421749931e-05, "loss": 0.3042, "step": 715500 }, { "epoch": 32.94, "learning_rate": 3.3531143619468214e-05, "loss": 0.2975, "step": 716000 }, { "epoch": 32.96, "learning_rate": 3.351964302143711e-05, "loss": 0.3016, "step": 716500 }, { "epoch": 32.98, "learning_rate": 3.350814242340602e-05, "loss": 0.3001, "step": 717000 }, { "epoch": 33.01, "learning_rate": 3.349664182537492e-05, "loss": 0.2902, "step": 717500 }, { "epoch": 33.03, "learning_rate": 3.348514122734383e-05, "loss": 0.262, "step": 718000 }, { "epoch": 33.05, "learning_rate": 3.3473640629312724e-05, "loss": 0.269, "step": 718500 }, { "epoch": 33.08, "learning_rate": 3.346214003128163e-05, "loss": 0.2688, "step": 719000 }, { "epoch": 33.1, "learning_rate": 3.345063943325053e-05, "loss": 0.2663, "step": 719500 }, { "epoch": 33.12, "learning_rate": 3.343913883521943e-05, "loss": 0.2779, "step": 720000 }, { "epoch": 33.14, "learning_rate": 3.3427638237188337e-05, "loss": 0.2769, "step": 720500 }, { "epoch": 33.17, "learning_rate": 3.341613763915723e-05, "loss": 0.2773, "step": 721000 }, { "epoch": 33.19, "learning_rate": 3.340463704112614e-05, "loss": 0.28, "step": 721500 }, { "epoch": 33.21, "learning_rate": 3.339313644309504e-05, "loss": 0.2751, "step": 722000 }, { "epoch": 33.24, "learning_rate": 3.338163584506395e-05, "loss": 0.2808, "step": 722500 }, { "epoch": 33.26, "learning_rate": 3.3370135247032846e-05, "loss": 0.2831, "step": 723000 }, { "epoch": 33.28, "learning_rate": 3.335863464900175e-05, "loss": 0.2814, "step": 723500 }, { "epoch": 33.31, "learning_rate": 3.334713405097065e-05, "loss": 0.2805, "step": 724000 }, { "epoch": 33.33, "learning_rate": 3.333563345293955e-05, "loss": 0.2769, "step": 724500 }, { "epoch": 33.35, "learning_rate": 3.332413285490846e-05, "loss": 0.2795, "step": 725000 }, { "epoch": 33.37, "learning_rate": 3.3312632256877355e-05, "loss": 0.2879, "step": 725500 }, { "epoch": 33.4, "learning_rate": 3.3301131658846265e-05, "loss": 0.2827, "step": 726000 }, { "epoch": 33.42, "learning_rate": 3.328963106081516e-05, "loss": 0.2866, "step": 726500 }, { "epoch": 33.44, "learning_rate": 3.327813046278407e-05, "loss": 0.29, "step": 727000 }, { "epoch": 33.47, "learning_rate": 3.326662986475297e-05, "loss": 0.2878, "step": 727500 }, { "epoch": 33.49, "learning_rate": 3.325512926672187e-05, "loss": 0.2859, "step": 728000 }, { "epoch": 33.51, "learning_rate": 3.3243628668690775e-05, "loss": 0.2888, "step": 728500 }, { "epoch": 33.54, "learning_rate": 3.323212807065967e-05, "loss": 0.2877, "step": 729000 }, { "epoch": 33.56, "learning_rate": 3.322062747262858e-05, "loss": 0.2855, "step": 729500 }, { "epoch": 33.58, "learning_rate": 3.320912687459748e-05, "loss": 0.2868, "step": 730000 }, { "epoch": 33.6, "learning_rate": 3.319762627656639e-05, "loss": 0.2902, "step": 730500 }, { "epoch": 33.63, "learning_rate": 3.3186125678535284e-05, "loss": 0.2951, "step": 731000 }, { "epoch": 33.65, "learning_rate": 3.317462508050419e-05, "loss": 0.2894, "step": 731500 }, { "epoch": 33.67, "learning_rate": 3.316312448247309e-05, "loss": 0.2883, "step": 732000 }, { "epoch": 33.7, "learning_rate": 3.315162388444199e-05, "loss": 0.2925, "step": 732500 }, { "epoch": 33.72, "learning_rate": 3.31401232864109e-05, "loss": 0.2892, "step": 733000 }, { "epoch": 33.74, "learning_rate": 3.3128622688379794e-05, "loss": 0.2961, "step": 733500 }, { "epoch": 33.77, "learning_rate": 3.3117122090348704e-05, "loss": 0.2895, "step": 734000 }, { "epoch": 33.79, "learning_rate": 3.31056214923176e-05, "loss": 0.2959, "step": 734500 }, { "epoch": 33.81, "learning_rate": 3.309412089428651e-05, "loss": 0.2961, "step": 735000 }, { "epoch": 33.83, "learning_rate": 3.3082620296255406e-05, "loss": 0.2916, "step": 735500 }, { "epoch": 33.86, "learning_rate": 3.307111969822431e-05, "loss": 0.2945, "step": 736000 }, { "epoch": 33.88, "learning_rate": 3.305961910019321e-05, "loss": 0.3013, "step": 736500 }, { "epoch": 33.9, "learning_rate": 3.304811850216211e-05, "loss": 0.3013, "step": 737000 }, { "epoch": 33.93, "learning_rate": 3.303661790413102e-05, "loss": 0.3022, "step": 737500 }, { "epoch": 33.95, "learning_rate": 3.3025117306099916e-05, "loss": 0.303, "step": 738000 }, { "epoch": 33.97, "learning_rate": 3.3013616708068826e-05, "loss": 0.3001, "step": 738500 }, { "epoch": 34.0, "learning_rate": 3.300211611003772e-05, "loss": 0.2984, "step": 739000 }, { "epoch": 34.02, "learning_rate": 3.2990615512006626e-05, "loss": 0.2696, "step": 739500 }, { "epoch": 34.04, "learning_rate": 3.297911491397553e-05, "loss": 0.2644, "step": 740000 }, { "epoch": 34.06, "learning_rate": 3.2967614315944425e-05, "loss": 0.259, "step": 740500 }, { "epoch": 34.09, "learning_rate": 3.2956113717913335e-05, "loss": 0.2634, "step": 741000 }, { "epoch": 34.11, "learning_rate": 3.294461311988223e-05, "loss": 0.2721, "step": 741500 }, { "epoch": 34.13, "learning_rate": 3.293311252185114e-05, "loss": 0.2655, "step": 742000 }, { "epoch": 34.16, "learning_rate": 3.292161192382004e-05, "loss": 0.2704, "step": 742500 }, { "epoch": 34.18, "learning_rate": 3.291011132578895e-05, "loss": 0.2717, "step": 743000 }, { "epoch": 34.2, "learning_rate": 3.2898610727757845e-05, "loss": 0.2725, "step": 743500 }, { "epoch": 34.23, "learning_rate": 3.288711012972675e-05, "loss": 0.2689, "step": 744000 }, { "epoch": 34.25, "learning_rate": 3.287560953169565e-05, "loss": 0.2767, "step": 744500 }, { "epoch": 34.27, "learning_rate": 3.286410893366455e-05, "loss": 0.2745, "step": 745000 }, { "epoch": 34.29, "learning_rate": 3.285260833563346e-05, "loss": 0.2755, "step": 745500 }, { "epoch": 34.32, "learning_rate": 3.2841107737602354e-05, "loss": 0.2788, "step": 746000 }, { "epoch": 34.34, "learning_rate": 3.2829607139571264e-05, "loss": 0.2816, "step": 746500 }, { "epoch": 34.36, "learning_rate": 3.281810654154016e-05, "loss": 0.2821, "step": 747000 }, { "epoch": 34.39, "learning_rate": 3.2806605943509064e-05, "loss": 0.28, "step": 747500 }, { "epoch": 34.41, "learning_rate": 3.279510534547797e-05, "loss": 0.279, "step": 748000 }, { "epoch": 34.43, "learning_rate": 3.2783604747446863e-05, "loss": 0.2811, "step": 748500 }, { "epoch": 34.46, "learning_rate": 3.2772104149415773e-05, "loss": 0.2862, "step": 749000 }, { "epoch": 34.48, "learning_rate": 3.276060355138467e-05, "loss": 0.2859, "step": 749500 }, { "epoch": 34.5, "learning_rate": 3.274910295335358e-05, "loss": 0.2847, "step": 750000 }, { "epoch": 34.52, "learning_rate": 3.2737602355322476e-05, "loss": 0.2843, "step": 750500 }, { "epoch": 34.55, "learning_rate": 3.2726101757291386e-05, "loss": 0.2849, "step": 751000 }, { "epoch": 34.57, "learning_rate": 3.271460115926028e-05, "loss": 0.2855, "step": 751500 }, { "epoch": 34.59, "learning_rate": 3.2703100561229186e-05, "loss": 0.2872, "step": 752000 }, { "epoch": 34.62, "learning_rate": 3.269159996319809e-05, "loss": 0.2862, "step": 752500 }, { "epoch": 34.64, "learning_rate": 3.2680099365166986e-05, "loss": 0.2888, "step": 753000 }, { "epoch": 34.66, "learning_rate": 3.2668598767135896e-05, "loss": 0.2893, "step": 753500 }, { "epoch": 34.69, "learning_rate": 3.265709816910479e-05, "loss": 0.292, "step": 754000 }, { "epoch": 34.71, "learning_rate": 3.26455975710737e-05, "loss": 0.2916, "step": 754500 }, { "epoch": 34.73, "learning_rate": 3.26340969730426e-05, "loss": 0.2874, "step": 755000 }, { "epoch": 34.75, "learning_rate": 3.26225963750115e-05, "loss": 0.288, "step": 755500 }, { "epoch": 34.78, "learning_rate": 3.2611095776980405e-05, "loss": 0.2892, "step": 756000 }, { "epoch": 34.8, "learning_rate": 3.25995951789493e-05, "loss": 0.2868, "step": 756500 }, { "epoch": 34.82, "learning_rate": 3.258809458091821e-05, "loss": 0.2871, "step": 757000 }, { "epoch": 34.85, "learning_rate": 3.257659398288711e-05, "loss": 0.2912, "step": 757500 }, { "epoch": 34.87, "learning_rate": 3.256509338485602e-05, "loss": 0.2963, "step": 758000 }, { "epoch": 34.89, "learning_rate": 3.2553592786824915e-05, "loss": 0.2928, "step": 758500 }, { "epoch": 34.92, "learning_rate": 3.2542092188793825e-05, "loss": 0.2911, "step": 759000 }, { "epoch": 34.94, "learning_rate": 3.253059159076272e-05, "loss": 0.2956, "step": 759500 }, { "epoch": 34.96, "learning_rate": 3.2519090992731624e-05, "loss": 0.2892, "step": 760000 }, { "epoch": 34.98, "learning_rate": 3.250759039470053e-05, "loss": 0.2963, "step": 760500 }, { "epoch": 35.01, "learning_rate": 3.2496089796669424e-05, "loss": 0.2796, "step": 761000 }, { "epoch": 35.03, "learning_rate": 3.2484589198638334e-05, "loss": 0.2538, "step": 761500 }, { "epoch": 35.05, "learning_rate": 3.247308860060723e-05, "loss": 0.2603, "step": 762000 }, { "epoch": 35.08, "learning_rate": 3.246158800257614e-05, "loss": 0.2623, "step": 762500 }, { "epoch": 35.1, "learning_rate": 3.245008740454504e-05, "loss": 0.2669, "step": 763000 }, { "epoch": 35.12, "learning_rate": 3.243858680651394e-05, "loss": 0.2621, "step": 763500 }, { "epoch": 35.15, "learning_rate": 3.242708620848284e-05, "loss": 0.2656, "step": 764000 }, { "epoch": 35.17, "learning_rate": 3.241558561045174e-05, "loss": 0.267, "step": 764500 }, { "epoch": 35.19, "learning_rate": 3.240408501242065e-05, "loss": 0.2731, "step": 765000 }, { "epoch": 35.21, "learning_rate": 3.2392584414389546e-05, "loss": 0.2755, "step": 765500 }, { "epoch": 35.24, "learning_rate": 3.2381083816358456e-05, "loss": 0.2723, "step": 766000 }, { "epoch": 35.26, "learning_rate": 3.236958321832735e-05, "loss": 0.2724, "step": 766500 }, { "epoch": 35.28, "learning_rate": 3.235808262029626e-05, "loss": 0.2725, "step": 767000 }, { "epoch": 35.31, "learning_rate": 3.234658202226516e-05, "loss": 0.2769, "step": 767500 }, { "epoch": 35.33, "learning_rate": 3.233508142423406e-05, "loss": 0.2781, "step": 768000 }, { "epoch": 35.35, "learning_rate": 3.2323580826202966e-05, "loss": 0.277, "step": 768500 }, { "epoch": 35.38, "learning_rate": 3.231208022817186e-05, "loss": 0.2752, "step": 769000 }, { "epoch": 35.4, "learning_rate": 3.230057963014077e-05, "loss": 0.2728, "step": 769500 }, { "epoch": 35.42, "learning_rate": 3.228907903210967e-05, "loss": 0.2761, "step": 770000 }, { "epoch": 35.44, "learning_rate": 3.227757843407858e-05, "loss": 0.2803, "step": 770500 }, { "epoch": 35.47, "learning_rate": 3.2266077836047475e-05, "loss": 0.2788, "step": 771000 }, { "epoch": 35.49, "learning_rate": 3.225457723801638e-05, "loss": 0.2809, "step": 771500 }, { "epoch": 35.51, "learning_rate": 3.224307663998528e-05, "loss": 0.2809, "step": 772000 }, { "epoch": 35.54, "learning_rate": 3.223157604195418e-05, "loss": 0.2826, "step": 772500 }, { "epoch": 35.56, "learning_rate": 3.222007544392309e-05, "loss": 0.2823, "step": 773000 }, { "epoch": 35.58, "learning_rate": 3.2208574845891985e-05, "loss": 0.2834, "step": 773500 }, { "epoch": 35.61, "learning_rate": 3.2197074247860895e-05, "loss": 0.2817, "step": 774000 }, { "epoch": 35.63, "learning_rate": 3.218557364982979e-05, "loss": 0.2793, "step": 774500 }, { "epoch": 35.65, "learning_rate": 3.21740730517987e-05, "loss": 0.288, "step": 775000 }, { "epoch": 35.67, "learning_rate": 3.21625724537676e-05, "loss": 0.2817, "step": 775500 }, { "epoch": 35.7, "learning_rate": 3.21510718557365e-05, "loss": 0.2884, "step": 776000 }, { "epoch": 35.72, "learning_rate": 3.2139571257705404e-05, "loss": 0.2844, "step": 776500 }, { "epoch": 35.74, "learning_rate": 3.21280706596743e-05, "loss": 0.284, "step": 777000 }, { "epoch": 35.77, "learning_rate": 3.211657006164321e-05, "loss": 0.284, "step": 777500 }, { "epoch": 35.79, "learning_rate": 3.210506946361211e-05, "loss": 0.2888, "step": 778000 }, { "epoch": 35.81, "learning_rate": 3.209356886558102e-05, "loss": 0.2898, "step": 778500 }, { "epoch": 35.84, "learning_rate": 3.208206826754991e-05, "loss": 0.2918, "step": 779000 }, { "epoch": 35.86, "learning_rate": 3.2070567669518817e-05, "loss": 0.2867, "step": 779500 }, { "epoch": 35.88, "learning_rate": 3.205906707148772e-05, "loss": 0.2936, "step": 780000 }, { "epoch": 35.9, "learning_rate": 3.2047566473456616e-05, "loss": 0.2834, "step": 780500 }, { "epoch": 35.93, "learning_rate": 3.2036065875425526e-05, "loss": 0.2908, "step": 781000 }, { "epoch": 35.95, "learning_rate": 3.202456527739442e-05, "loss": 0.2885, "step": 781500 }, { "epoch": 35.97, "learning_rate": 3.201306467936333e-05, "loss": 0.2931, "step": 782000 }, { "epoch": 36.0, "learning_rate": 3.200156408133223e-05, "loss": 0.2921, "step": 782500 }, { "epoch": 36.02, "learning_rate": 3.199006348330114e-05, "loss": 0.2599, "step": 783000 }, { "epoch": 36.04, "learning_rate": 3.1978562885270036e-05, "loss": 0.2579, "step": 783500 }, { "epoch": 36.07, "learning_rate": 3.196706228723894e-05, "loss": 0.2605, "step": 784000 }, { "epoch": 36.09, "learning_rate": 3.195556168920784e-05, "loss": 0.2573, "step": 784500 }, { "epoch": 36.11, "learning_rate": 3.194406109117674e-05, "loss": 0.2649, "step": 785000 }, { "epoch": 36.13, "learning_rate": 3.193256049314565e-05, "loss": 0.2676, "step": 785500 }, { "epoch": 36.16, "learning_rate": 3.1921059895114545e-05, "loss": 0.2666, "step": 786000 }, { "epoch": 36.18, "learning_rate": 3.1909559297083455e-05, "loss": 0.2619, "step": 786500 }, { "epoch": 36.2, "learning_rate": 3.189805869905235e-05, "loss": 0.2632, "step": 787000 }, { "epoch": 36.23, "learning_rate": 3.1886558101021255e-05, "loss": 0.2702, "step": 787500 }, { "epoch": 36.25, "learning_rate": 3.187505750299016e-05, "loss": 0.2726, "step": 788000 }, { "epoch": 36.27, "learning_rate": 3.1863556904959054e-05, "loss": 0.2708, "step": 788500 }, { "epoch": 36.3, "learning_rate": 3.1852056306927964e-05, "loss": 0.2735, "step": 789000 }, { "epoch": 36.32, "learning_rate": 3.184055570889686e-05, "loss": 0.2758, "step": 789500 }, { "epoch": 36.34, "learning_rate": 3.182905511086577e-05, "loss": 0.276, "step": 790000 }, { "epoch": 36.36, "learning_rate": 3.181755451283467e-05, "loss": 0.2767, "step": 790500 }, { "epoch": 36.39, "learning_rate": 3.180605391480357e-05, "loss": 0.2736, "step": 791000 }, { "epoch": 36.41, "learning_rate": 3.1794553316772474e-05, "loss": 0.2747, "step": 791500 }, { "epoch": 36.43, "learning_rate": 3.178305271874138e-05, "loss": 0.272, "step": 792000 }, { "epoch": 36.46, "learning_rate": 3.177155212071028e-05, "loss": 0.2728, "step": 792500 }, { "epoch": 36.48, "learning_rate": 3.176005152267918e-05, "loss": 0.2802, "step": 793000 }, { "epoch": 36.5, "learning_rate": 3.174855092464809e-05, "loss": 0.2729, "step": 793500 }, { "epoch": 36.53, "learning_rate": 3.173705032661698e-05, "loss": 0.2743, "step": 794000 }, { "epoch": 36.55, "learning_rate": 3.172554972858589e-05, "loss": 0.2715, "step": 794500 }, { "epoch": 36.57, "learning_rate": 3.171404913055479e-05, "loss": 0.2772, "step": 795000 }, { "epoch": 36.59, "learning_rate": 3.170254853252369e-05, "loss": 0.2741, "step": 795500 }, { "epoch": 36.62, "learning_rate": 3.1691047934492596e-05, "loss": 0.2829, "step": 796000 }, { "epoch": 36.64, "learning_rate": 3.167954733646149e-05, "loss": 0.282, "step": 796500 }, { "epoch": 36.66, "learning_rate": 3.16680467384304e-05, "loss": 0.2818, "step": 797000 }, { "epoch": 36.69, "learning_rate": 3.16565461403993e-05, "loss": 0.2842, "step": 797500 }, { "epoch": 36.71, "learning_rate": 3.164504554236821e-05, "loss": 0.2832, "step": 798000 }, { "epoch": 36.73, "learning_rate": 3.1633544944337106e-05, "loss": 0.2795, "step": 798500 }, { "epoch": 36.76, "learning_rate": 3.162204434630601e-05, "loss": 0.2843, "step": 799000 }, { "epoch": 36.78, "learning_rate": 3.161054374827491e-05, "loss": 0.2806, "step": 799500 }, { "epoch": 36.8, "learning_rate": 3.1599043150243815e-05, "loss": 0.2822, "step": 800000 }, { "epoch": 36.82, "learning_rate": 3.158754255221272e-05, "loss": 0.2847, "step": 800500 }, { "epoch": 36.85, "learning_rate": 3.1576041954181615e-05, "loss": 0.2857, "step": 801000 }, { "epoch": 36.87, "learning_rate": 3.1564541356150525e-05, "loss": 0.286, "step": 801500 }, { "epoch": 36.89, "learning_rate": 3.155304075811942e-05, "loss": 0.2829, "step": 802000 }, { "epoch": 36.92, "learning_rate": 3.154154016008833e-05, "loss": 0.2835, "step": 802500 }, { "epoch": 36.94, "learning_rate": 3.153003956205723e-05, "loss": 0.282, "step": 803000 }, { "epoch": 36.96, "learning_rate": 3.151853896402613e-05, "loss": 0.2844, "step": 803500 }, { "epoch": 36.99, "learning_rate": 3.1507038365995034e-05, "loss": 0.2842, "step": 804000 }, { "epoch": 37.01, "learning_rate": 3.149553776796393e-05, "loss": 0.2684, "step": 804500 }, { "epoch": 37.03, "learning_rate": 3.148403716993284e-05, "loss": 0.2529, "step": 805000 }, { "epoch": 37.05, "learning_rate": 3.147253657190174e-05, "loss": 0.2524, "step": 805500 }, { "epoch": 37.08, "learning_rate": 3.146103597387065e-05, "loss": 0.2547, "step": 806000 }, { "epoch": 37.1, "learning_rate": 3.1449535375839544e-05, "loss": 0.2555, "step": 806500 }, { "epoch": 37.12, "learning_rate": 3.143803477780845e-05, "loss": 0.2606, "step": 807000 }, { "epoch": 37.15, "learning_rate": 3.142653417977735e-05, "loss": 0.2611, "step": 807500 }, { "epoch": 37.17, "learning_rate": 3.1415033581746253e-05, "loss": 0.263, "step": 808000 }, { "epoch": 37.19, "learning_rate": 3.140353298371516e-05, "loss": 0.26, "step": 808500 }, { "epoch": 37.22, "learning_rate": 3.139203238568405e-05, "loss": 0.2633, "step": 809000 }, { "epoch": 37.24, "learning_rate": 3.138053178765296e-05, "loss": 0.2659, "step": 809500 }, { "epoch": 37.26, "learning_rate": 3.136903118962186e-05, "loss": 0.2697, "step": 810000 }, { "epoch": 37.28, "learning_rate": 3.135753059159077e-05, "loss": 0.2661, "step": 810500 }, { "epoch": 37.31, "learning_rate": 3.1346029993559666e-05, "loss": 0.2641, "step": 811000 }, { "epoch": 37.33, "learning_rate": 3.133452939552857e-05, "loss": 0.2644, "step": 811500 }, { "epoch": 37.35, "learning_rate": 3.132302879749747e-05, "loss": 0.267, "step": 812000 }, { "epoch": 37.38, "learning_rate": 3.131152819946637e-05, "loss": 0.2685, "step": 812500 }, { "epoch": 37.4, "learning_rate": 3.130002760143528e-05, "loss": 0.2698, "step": 813000 }, { "epoch": 37.42, "learning_rate": 3.1288527003404175e-05, "loss": 0.2661, "step": 813500 }, { "epoch": 37.45, "learning_rate": 3.1277026405373086e-05, "loss": 0.2765, "step": 814000 }, { "epoch": 37.47, "learning_rate": 3.126552580734198e-05, "loss": 0.2715, "step": 814500 }, { "epoch": 37.49, "learning_rate": 3.1254025209310885e-05, "loss": 0.2699, "step": 815000 }, { "epoch": 37.51, "learning_rate": 3.124252461127979e-05, "loss": 0.2709, "step": 815500 }, { "epoch": 37.54, "learning_rate": 3.123102401324869e-05, "loss": 0.2784, "step": 816000 }, { "epoch": 37.56, "learning_rate": 3.1219523415217595e-05, "loss": 0.2775, "step": 816500 }, { "epoch": 37.58, "learning_rate": 3.120802281718649e-05, "loss": 0.2735, "step": 817000 }, { "epoch": 37.61, "learning_rate": 3.11965222191554e-05, "loss": 0.2806, "step": 817500 }, { "epoch": 37.63, "learning_rate": 3.11850216211243e-05, "loss": 0.2727, "step": 818000 }, { "epoch": 37.65, "learning_rate": 3.117352102309321e-05, "loss": 0.273, "step": 818500 }, { "epoch": 37.68, "learning_rate": 3.1162020425062104e-05, "loss": 0.2743, "step": 819000 }, { "epoch": 37.7, "learning_rate": 3.115051982703101e-05, "loss": 0.2809, "step": 819500 }, { "epoch": 37.72, "learning_rate": 3.113901922899991e-05, "loss": 0.2779, "step": 820000 }, { "epoch": 37.74, "learning_rate": 3.112751863096881e-05, "loss": 0.2737, "step": 820500 }, { "epoch": 37.77, "learning_rate": 3.111601803293772e-05, "loss": 0.2775, "step": 821000 }, { "epoch": 37.79, "learning_rate": 3.1104517434906614e-05, "loss": 0.2768, "step": 821500 }, { "epoch": 37.81, "learning_rate": 3.1093016836875524e-05, "loss": 0.2809, "step": 822000 }, { "epoch": 37.84, "learning_rate": 3.108151623884442e-05, "loss": 0.2843, "step": 822500 }, { "epoch": 37.86, "learning_rate": 3.1070015640813323e-05, "loss": 0.2839, "step": 823000 }, { "epoch": 37.88, "learning_rate": 3.105851504278223e-05, "loss": 0.2817, "step": 823500 }, { "epoch": 37.91, "learning_rate": 3.104701444475113e-05, "loss": 0.2834, "step": 824000 }, { "epoch": 37.93, "learning_rate": 3.103551384672003e-05, "loss": 0.2816, "step": 824500 }, { "epoch": 37.95, "learning_rate": 3.102401324868893e-05, "loss": 0.2825, "step": 825000 }, { "epoch": 37.97, "learning_rate": 3.101251265065784e-05, "loss": 0.2833, "step": 825500 }, { "epoch": 38.0, "learning_rate": 3.1001012052626736e-05, "loss": 0.2865, "step": 826000 }, { "epoch": 38.02, "learning_rate": 3.098951145459564e-05, "loss": 0.2486, "step": 826500 }, { "epoch": 38.04, "learning_rate": 3.097801085656454e-05, "loss": 0.2525, "step": 827000 }, { "epoch": 38.07, "learning_rate": 3.0966510258533446e-05, "loss": 0.2499, "step": 827500 }, { "epoch": 38.09, "learning_rate": 3.095500966050235e-05, "loss": 0.2549, "step": 828000 }, { "epoch": 38.11, "learning_rate": 3.0943509062471245e-05, "loss": 0.2533, "step": 828500 }, { "epoch": 38.14, "learning_rate": 3.0932008464440155e-05, "loss": 0.2525, "step": 829000 }, { "epoch": 38.16, "learning_rate": 3.092050786640905e-05, "loss": 0.258, "step": 829500 }, { "epoch": 38.18, "learning_rate": 3.090900726837796e-05, "loss": 0.2631, "step": 830000 }, { "epoch": 38.2, "learning_rate": 3.089750667034686e-05, "loss": 0.2557, "step": 830500 }, { "epoch": 38.23, "learning_rate": 3.088600607231576e-05, "loss": 0.2599, "step": 831000 }, { "epoch": 38.25, "learning_rate": 3.0874505474284665e-05, "loss": 0.263, "step": 831500 }, { "epoch": 38.27, "learning_rate": 3.086300487625357e-05, "loss": 0.2679, "step": 832000 }, { "epoch": 38.3, "learning_rate": 3.085150427822247e-05, "loss": 0.2614, "step": 832500 }, { "epoch": 38.32, "learning_rate": 3.084000368019137e-05, "loss": 0.266, "step": 833000 }, { "epoch": 38.34, "learning_rate": 3.082850308216028e-05, "loss": 0.2666, "step": 833500 }, { "epoch": 38.37, "learning_rate": 3.0817002484129174e-05, "loss": 0.2735, "step": 834000 }, { "epoch": 38.39, "learning_rate": 3.080550188609808e-05, "loss": 0.2653, "step": 834500 }, { "epoch": 38.41, "learning_rate": 3.079400128806698e-05, "loss": 0.2673, "step": 835000 }, { "epoch": 38.43, "learning_rate": 3.0782500690035884e-05, "loss": 0.2673, "step": 835500 }, { "epoch": 38.46, "learning_rate": 3.077100009200479e-05, "loss": 0.2663, "step": 836000 }, { "epoch": 38.48, "learning_rate": 3.0759499493973684e-05, "loss": 0.2667, "step": 836500 }, { "epoch": 38.5, "learning_rate": 3.0747998895942594e-05, "loss": 0.2665, "step": 837000 }, { "epoch": 38.53, "learning_rate": 3.073649829791149e-05, "loss": 0.2667, "step": 837500 }, { "epoch": 38.55, "learning_rate": 3.07249976998804e-05, "loss": 0.2678, "step": 838000 }, { "epoch": 38.57, "learning_rate": 3.0713497101849297e-05, "loss": 0.2732, "step": 838500 }, { "epoch": 38.6, "learning_rate": 3.07019965038182e-05, "loss": 0.2711, "step": 839000 }, { "epoch": 38.62, "learning_rate": 3.06904959057871e-05, "loss": 0.2772, "step": 839500 }, { "epoch": 38.64, "learning_rate": 3.0678995307756006e-05, "loss": 0.2695, "step": 840000 }, { "epoch": 38.67, "learning_rate": 3.066749470972491e-05, "loss": 0.2744, "step": 840500 }, { "epoch": 38.69, "learning_rate": 3.0655994111693806e-05, "loss": 0.27, "step": 841000 }, { "epoch": 38.71, "learning_rate": 3.0644493513662716e-05, "loss": 0.2746, "step": 841500 }, { "epoch": 38.73, "learning_rate": 3.063299291563161e-05, "loss": 0.2772, "step": 842000 }, { "epoch": 38.76, "learning_rate": 3.0621492317600516e-05, "loss": 0.2751, "step": 842500 }, { "epoch": 38.78, "learning_rate": 3.060999171956942e-05, "loss": 0.2737, "step": 843000 }, { "epoch": 38.8, "learning_rate": 3.059849112153832e-05, "loss": 0.2755, "step": 843500 }, { "epoch": 38.83, "learning_rate": 3.0586990523507225e-05, "loss": 0.2781, "step": 844000 }, { "epoch": 38.85, "learning_rate": 3.057548992547612e-05, "loss": 0.2807, "step": 844500 }, { "epoch": 38.87, "learning_rate": 3.056398932744503e-05, "loss": 0.2774, "step": 845000 }, { "epoch": 38.9, "learning_rate": 3.055248872941393e-05, "loss": 0.2791, "step": 845500 }, { "epoch": 38.92, "learning_rate": 3.054098813138284e-05, "loss": 0.2761, "step": 846000 }, { "epoch": 38.94, "learning_rate": 3.0529487533351735e-05, "loss": 0.2785, "step": 846500 }, { "epoch": 38.96, "learning_rate": 3.051798693532064e-05, "loss": 0.2789, "step": 847000 }, { "epoch": 38.99, "learning_rate": 3.050648633728954e-05, "loss": 0.2797, "step": 847500 }, { "epoch": 39.01, "learning_rate": 3.049498573925844e-05, "loss": 0.2608, "step": 848000 }, { "epoch": 39.03, "learning_rate": 3.0483485141227348e-05, "loss": 0.2422, "step": 848500 }, { "epoch": 39.06, "learning_rate": 3.0471984543196248e-05, "loss": 0.2523, "step": 849000 }, { "epoch": 39.08, "learning_rate": 3.0460483945165154e-05, "loss": 0.2501, "step": 849500 }, { "epoch": 39.1, "learning_rate": 3.0448983347134054e-05, "loss": 0.2547, "step": 850000 }, { "epoch": 39.13, "learning_rate": 3.0437482749102954e-05, "loss": 0.2496, "step": 850500 }, { "epoch": 39.15, "learning_rate": 3.0425982151071857e-05, "loss": 0.2572, "step": 851000 }, { "epoch": 39.17, "learning_rate": 3.0414481553040757e-05, "loss": 0.2555, "step": 851500 }, { "epoch": 39.19, "learning_rate": 3.0402980955009664e-05, "loss": 0.258, "step": 852000 }, { "epoch": 39.22, "learning_rate": 3.0391480356978563e-05, "loss": 0.2596, "step": 852500 }, { "epoch": 39.24, "learning_rate": 3.037997975894747e-05, "loss": 0.2566, "step": 853000 }, { "epoch": 39.26, "learning_rate": 3.036847916091637e-05, "loss": 0.2579, "step": 853500 }, { "epoch": 39.29, "learning_rate": 3.035697856288527e-05, "loss": 0.2615, "step": 854000 }, { "epoch": 39.31, "learning_rate": 3.0345477964854173e-05, "loss": 0.2656, "step": 854500 }, { "epoch": 39.33, "learning_rate": 3.0333977366823073e-05, "loss": 0.264, "step": 855000 }, { "epoch": 39.36, "learning_rate": 3.032247676879198e-05, "loss": 0.2591, "step": 855500 }, { "epoch": 39.38, "learning_rate": 3.031097617076088e-05, "loss": 0.2614, "step": 856000 }, { "epoch": 39.4, "learning_rate": 3.0299475572729786e-05, "loss": 0.2644, "step": 856500 }, { "epoch": 39.42, "learning_rate": 3.0287974974698686e-05, "loss": 0.2703, "step": 857000 }, { "epoch": 39.45, "learning_rate": 3.0276474376667592e-05, "loss": 0.2671, "step": 857500 }, { "epoch": 39.47, "learning_rate": 3.0264973778636492e-05, "loss": 0.2687, "step": 858000 }, { "epoch": 39.49, "learning_rate": 3.0253473180605392e-05, "loss": 0.268, "step": 858500 }, { "epoch": 39.52, "learning_rate": 3.0241972582574295e-05, "loss": 0.2675, "step": 859000 }, { "epoch": 39.54, "learning_rate": 3.0230471984543195e-05, "loss": 0.2634, "step": 859500 }, { "epoch": 39.56, "learning_rate": 3.0218971386512102e-05, "loss": 0.2646, "step": 860000 }, { "epoch": 39.59, "learning_rate": 3.0207470788481e-05, "loss": 0.265, "step": 860500 }, { "epoch": 39.61, "learning_rate": 3.0195970190449908e-05, "loss": 0.2709, "step": 861000 }, { "epoch": 39.63, "learning_rate": 3.0184469592418808e-05, "loss": 0.2709, "step": 861500 }, { "epoch": 39.65, "learning_rate": 3.0172968994387708e-05, "loss": 0.2706, "step": 862000 }, { "epoch": 39.68, "learning_rate": 3.016146839635661e-05, "loss": 0.2733, "step": 862500 }, { "epoch": 39.7, "learning_rate": 3.014996779832551e-05, "loss": 0.2724, "step": 863000 }, { "epoch": 39.72, "learning_rate": 3.0138467200294418e-05, "loss": 0.2742, "step": 863500 }, { "epoch": 39.75, "learning_rate": 3.0126966602263317e-05, "loss": 0.2713, "step": 864000 }, { "epoch": 39.77, "learning_rate": 3.0115466004232224e-05, "loss": 0.2728, "step": 864500 }, { "epoch": 39.79, "learning_rate": 3.0103965406201124e-05, "loss": 0.2737, "step": 865000 }, { "epoch": 39.82, "learning_rate": 3.009246480817003e-05, "loss": 0.2751, "step": 865500 }, { "epoch": 39.84, "learning_rate": 3.008096421013893e-05, "loss": 0.2705, "step": 866000 }, { "epoch": 39.86, "learning_rate": 3.006946361210783e-05, "loss": 0.2724, "step": 866500 }, { "epoch": 39.88, "learning_rate": 3.0057963014076733e-05, "loss": 0.2713, "step": 867000 }, { "epoch": 39.91, "learning_rate": 3.0046462416045633e-05, "loss": 0.2736, "step": 867500 }, { "epoch": 39.93, "learning_rate": 3.003496181801454e-05, "loss": 0.2792, "step": 868000 }, { "epoch": 39.95, "learning_rate": 3.002346121998344e-05, "loss": 0.2729, "step": 868500 }, { "epoch": 39.98, "learning_rate": 3.0011960621952346e-05, "loss": 0.2746, "step": 869000 }, { "epoch": 40.0, "learning_rate": 3.0000460023921246e-05, "loss": 0.2751, "step": 869500 }, { "epoch": 40.02, "learning_rate": 2.9988959425890146e-05, "loss": 0.2423, "step": 870000 }, { "epoch": 40.05, "learning_rate": 2.997745882785905e-05, "loss": 0.2423, "step": 870500 }, { "epoch": 40.07, "learning_rate": 2.996595822982795e-05, "loss": 0.2432, "step": 871000 }, { "epoch": 40.09, "learning_rate": 2.9954457631796856e-05, "loss": 0.248, "step": 871500 }, { "epoch": 40.11, "learning_rate": 2.9942957033765756e-05, "loss": 0.2491, "step": 872000 }, { "epoch": 40.14, "learning_rate": 2.9931456435734662e-05, "loss": 0.2529, "step": 872500 }, { "epoch": 40.16, "learning_rate": 2.9919955837703562e-05, "loss": 0.2567, "step": 873000 }, { "epoch": 40.18, "learning_rate": 2.990845523967247e-05, "loss": 0.2519, "step": 873500 }, { "epoch": 40.21, "learning_rate": 2.989695464164137e-05, "loss": 0.2556, "step": 874000 }, { "epoch": 40.23, "learning_rate": 2.988545404361027e-05, "loss": 0.2554, "step": 874500 }, { "epoch": 40.25, "learning_rate": 2.987395344557917e-05, "loss": 0.2571, "step": 875000 }, { "epoch": 40.28, "learning_rate": 2.986245284754807e-05, "loss": 0.2539, "step": 875500 }, { "epoch": 40.3, "learning_rate": 2.9850952249516978e-05, "loss": 0.2595, "step": 876000 }, { "epoch": 40.32, "learning_rate": 2.9839451651485878e-05, "loss": 0.2613, "step": 876500 }, { "epoch": 40.34, "learning_rate": 2.9827951053454785e-05, "loss": 0.2591, "step": 877000 }, { "epoch": 40.37, "learning_rate": 2.9816450455423684e-05, "loss": 0.2625, "step": 877500 }, { "epoch": 40.39, "learning_rate": 2.9804949857392584e-05, "loss": 0.2587, "step": 878000 }, { "epoch": 40.41, "learning_rate": 2.9793449259361488e-05, "loss": 0.264, "step": 878500 }, { "epoch": 40.44, "learning_rate": 2.9781948661330387e-05, "loss": 0.2613, "step": 879000 }, { "epoch": 40.46, "learning_rate": 2.9770448063299294e-05, "loss": 0.2631, "step": 879500 }, { "epoch": 40.48, "learning_rate": 2.9758947465268194e-05, "loss": 0.2668, "step": 880000 }, { "epoch": 40.51, "learning_rate": 2.97474468672371e-05, "loss": 0.27, "step": 880500 }, { "epoch": 40.53, "learning_rate": 2.9735946269206e-05, "loss": 0.2671, "step": 881000 }, { "epoch": 40.55, "learning_rate": 2.97244456711749e-05, "loss": 0.2664, "step": 881500 }, { "epoch": 40.57, "learning_rate": 2.9712945073143807e-05, "loss": 0.2639, "step": 882000 }, { "epoch": 40.6, "learning_rate": 2.9701444475112707e-05, "loss": 0.2653, "step": 882500 }, { "epoch": 40.62, "learning_rate": 2.968994387708161e-05, "loss": 0.2604, "step": 883000 }, { "epoch": 40.64, "learning_rate": 2.967844327905051e-05, "loss": 0.2648, "step": 883500 }, { "epoch": 40.67, "learning_rate": 2.9666942681019416e-05, "loss": 0.2719, "step": 884000 }, { "epoch": 40.69, "learning_rate": 2.9655442082988316e-05, "loss": 0.266, "step": 884500 }, { "epoch": 40.71, "learning_rate": 2.9643941484957223e-05, "loss": 0.2705, "step": 885000 }, { "epoch": 40.74, "learning_rate": 2.9632440886926123e-05, "loss": 0.2666, "step": 885500 }, { "epoch": 40.76, "learning_rate": 2.9620940288895022e-05, "loss": 0.2721, "step": 886000 }, { "epoch": 40.78, "learning_rate": 2.9609439690863926e-05, "loss": 0.2686, "step": 886500 }, { "epoch": 40.8, "learning_rate": 2.9597939092832826e-05, "loss": 0.2704, "step": 887000 }, { "epoch": 40.83, "learning_rate": 2.9586438494801732e-05, "loss": 0.2696, "step": 887500 }, { "epoch": 40.85, "learning_rate": 2.9574937896770632e-05, "loss": 0.2721, "step": 888000 }, { "epoch": 40.87, "learning_rate": 2.956343729873954e-05, "loss": 0.2712, "step": 888500 }, { "epoch": 40.9, "learning_rate": 2.955193670070844e-05, "loss": 0.2711, "step": 889000 }, { "epoch": 40.92, "learning_rate": 2.954043610267734e-05, "loss": 0.2681, "step": 889500 }, { "epoch": 40.94, "learning_rate": 2.9528935504646245e-05, "loss": 0.27, "step": 890000 }, { "epoch": 40.97, "learning_rate": 2.9517434906615145e-05, "loss": 0.2718, "step": 890500 }, { "epoch": 40.99, "learning_rate": 2.9505934308584048e-05, "loss": 0.2748, "step": 891000 }, { "epoch": 41.01, "learning_rate": 2.9494433710552948e-05, "loss": 0.2548, "step": 891500 }, { "epoch": 41.03, "learning_rate": 2.9482933112521855e-05, "loss": 0.2411, "step": 892000 }, { "epoch": 41.06, "learning_rate": 2.9471432514490754e-05, "loss": 0.2465, "step": 892500 }, { "epoch": 41.08, "learning_rate": 2.945993191645966e-05, "loss": 0.2511, "step": 893000 }, { "epoch": 41.1, "learning_rate": 2.944843131842856e-05, "loss": 0.2478, "step": 893500 }, { "epoch": 41.13, "learning_rate": 2.943693072039746e-05, "loss": 0.2523, "step": 894000 }, { "epoch": 41.15, "learning_rate": 2.9425430122366364e-05, "loss": 0.2524, "step": 894500 }, { "epoch": 41.17, "learning_rate": 2.9413929524335264e-05, "loss": 0.2542, "step": 895000 }, { "epoch": 41.2, "learning_rate": 2.940242892630417e-05, "loss": 0.2515, "step": 895500 }, { "epoch": 41.22, "learning_rate": 2.939092832827307e-05, "loss": 0.2532, "step": 896000 }, { "epoch": 41.24, "learning_rate": 2.9379427730241977e-05, "loss": 0.2503, "step": 896500 }, { "epoch": 41.26, "learning_rate": 2.9367927132210877e-05, "loss": 0.2515, "step": 897000 }, { "epoch": 41.29, "learning_rate": 2.9356426534179777e-05, "loss": 0.2577, "step": 897500 }, { "epoch": 41.31, "learning_rate": 2.9344925936148683e-05, "loss": 0.2512, "step": 898000 }, { "epoch": 41.33, "learning_rate": 2.9333425338117583e-05, "loss": 0.2565, "step": 898500 }, { "epoch": 41.36, "learning_rate": 2.9321924740086486e-05, "loss": 0.2562, "step": 899000 }, { "epoch": 41.38, "learning_rate": 2.9310424142055386e-05, "loss": 0.2577, "step": 899500 }, { "epoch": 41.4, "learning_rate": 2.9298923544024293e-05, "loss": 0.2574, "step": 900000 }, { "epoch": 41.43, "learning_rate": 2.9287422945993193e-05, "loss": 0.2565, "step": 900500 }, { "epoch": 41.45, "learning_rate": 2.92759223479621e-05, "loss": 0.2576, "step": 901000 }, { "epoch": 41.47, "learning_rate": 2.9264421749931e-05, "loss": 0.2607, "step": 901500 }, { "epoch": 41.49, "learning_rate": 2.92529211518999e-05, "loss": 0.2646, "step": 902000 }, { "epoch": 41.52, "learning_rate": 2.9241420553868802e-05, "loss": 0.2665, "step": 902500 }, { "epoch": 41.54, "learning_rate": 2.9229919955837702e-05, "loss": 0.2577, "step": 903000 }, { "epoch": 41.56, "learning_rate": 2.921841935780661e-05, "loss": 0.2626, "step": 903500 }, { "epoch": 41.59, "learning_rate": 2.920691875977551e-05, "loss": 0.2621, "step": 904000 }, { "epoch": 41.61, "learning_rate": 2.9195418161744415e-05, "loss": 0.2641, "step": 904500 }, { "epoch": 41.63, "learning_rate": 2.9183917563713315e-05, "loss": 0.2632, "step": 905000 }, { "epoch": 41.66, "learning_rate": 2.9172416965682215e-05, "loss": 0.261, "step": 905500 }, { "epoch": 41.68, "learning_rate": 2.916091636765112e-05, "loss": 0.2621, "step": 906000 }, { "epoch": 41.7, "learning_rate": 2.914941576962002e-05, "loss": 0.2687, "step": 906500 }, { "epoch": 41.72, "learning_rate": 2.9137915171588924e-05, "loss": 0.2648, "step": 907000 }, { "epoch": 41.75, "learning_rate": 2.9126414573557824e-05, "loss": 0.268, "step": 907500 }, { "epoch": 41.77, "learning_rate": 2.911491397552673e-05, "loss": 0.2618, "step": 908000 }, { "epoch": 41.79, "learning_rate": 2.910341337749563e-05, "loss": 0.2616, "step": 908500 }, { "epoch": 41.82, "learning_rate": 2.9091912779464537e-05, "loss": 0.2659, "step": 909000 }, { "epoch": 41.84, "learning_rate": 2.9080412181433437e-05, "loss": 0.2696, "step": 909500 }, { "epoch": 41.86, "learning_rate": 2.9068911583402337e-05, "loss": 0.2668, "step": 910000 }, { "epoch": 41.89, "learning_rate": 2.905741098537124e-05, "loss": 0.265, "step": 910500 }, { "epoch": 41.91, "learning_rate": 2.904591038734014e-05, "loss": 0.2666, "step": 911000 }, { "epoch": 41.93, "learning_rate": 2.9034409789309047e-05, "loss": 0.2679, "step": 911500 }, { "epoch": 41.95, "learning_rate": 2.9022909191277947e-05, "loss": 0.2683, "step": 912000 }, { "epoch": 41.98, "learning_rate": 2.9011408593246853e-05, "loss": 0.2675, "step": 912500 }, { "epoch": 42.0, "learning_rate": 2.8999907995215753e-05, "loss": 0.2689, "step": 913000 }, { "epoch": 42.02, "learning_rate": 2.8988407397184653e-05, "loss": 0.2397, "step": 913500 }, { "epoch": 42.05, "learning_rate": 2.897690679915356e-05, "loss": 0.2383, "step": 914000 }, { "epoch": 42.07, "learning_rate": 2.896540620112246e-05, "loss": 0.2455, "step": 914500 }, { "epoch": 42.09, "learning_rate": 2.8953905603091363e-05, "loss": 0.249, "step": 915000 }, { "epoch": 42.12, "learning_rate": 2.8942405005060262e-05, "loss": 0.2462, "step": 915500 }, { "epoch": 42.14, "learning_rate": 2.893090440702917e-05, "loss": 0.2469, "step": 916000 }, { "epoch": 42.16, "learning_rate": 2.891940380899807e-05, "loss": 0.249, "step": 916500 }, { "epoch": 42.18, "learning_rate": 2.890790321096697e-05, "loss": 0.2528, "step": 917000 }, { "epoch": 42.21, "learning_rate": 2.8896402612935875e-05, "loss": 0.2469, "step": 917500 }, { "epoch": 42.23, "learning_rate": 2.8884902014904775e-05, "loss": 0.2519, "step": 918000 }, { "epoch": 42.25, "learning_rate": 2.887340141687368e-05, "loss": 0.2539, "step": 918500 }, { "epoch": 42.28, "learning_rate": 2.886190081884258e-05, "loss": 0.2506, "step": 919000 }, { "epoch": 42.3, "learning_rate": 2.8850400220811485e-05, "loss": 0.2506, "step": 919500 }, { "epoch": 42.32, "learning_rate": 2.8838899622780385e-05, "loss": 0.2563, "step": 920000 }, { "epoch": 42.35, "learning_rate": 2.882739902474929e-05, "loss": 0.2519, "step": 920500 }, { "epoch": 42.37, "learning_rate": 2.881589842671819e-05, "loss": 0.2551, "step": 921000 }, { "epoch": 42.39, "learning_rate": 2.880439782868709e-05, "loss": 0.2577, "step": 921500 }, { "epoch": 42.41, "learning_rate": 2.8792897230655998e-05, "loss": 0.2582, "step": 922000 }, { "epoch": 42.44, "learning_rate": 2.8781396632624898e-05, "loss": 0.2537, "step": 922500 }, { "epoch": 42.46, "learning_rate": 2.87698960345938e-05, "loss": 0.2551, "step": 923000 }, { "epoch": 42.48, "learning_rate": 2.87583954365627e-05, "loss": 0.2519, "step": 923500 }, { "epoch": 42.51, "learning_rate": 2.8746894838531607e-05, "loss": 0.2605, "step": 924000 }, { "epoch": 42.53, "learning_rate": 2.8735394240500507e-05, "loss": 0.2593, "step": 924500 }, { "epoch": 42.55, "learning_rate": 2.8723893642469407e-05, "loss": 0.2617, "step": 925000 }, { "epoch": 42.58, "learning_rate": 2.8712393044438314e-05, "loss": 0.2591, "step": 925500 }, { "epoch": 42.6, "learning_rate": 2.8700892446407213e-05, "loss": 0.2658, "step": 926000 }, { "epoch": 42.62, "learning_rate": 2.8689391848376117e-05, "loss": 0.2609, "step": 926500 }, { "epoch": 42.64, "learning_rate": 2.8677891250345017e-05, "loss": 0.2639, "step": 927000 }, { "epoch": 42.67, "learning_rate": 2.8666390652313923e-05, "loss": 0.2618, "step": 927500 }, { "epoch": 42.69, "learning_rate": 2.8654890054282823e-05, "loss": 0.2643, "step": 928000 }, { "epoch": 42.71, "learning_rate": 2.864338945625173e-05, "loss": 0.2638, "step": 928500 }, { "epoch": 42.74, "learning_rate": 2.863188885822063e-05, "loss": 0.2594, "step": 929000 }, { "epoch": 42.76, "learning_rate": 2.862038826018953e-05, "loss": 0.262, "step": 929500 }, { "epoch": 42.78, "learning_rate": 2.8608887662158436e-05, "loss": 0.2628, "step": 930000 }, { "epoch": 42.81, "learning_rate": 2.8597387064127336e-05, "loss": 0.2611, "step": 930500 }, { "epoch": 42.83, "learning_rate": 2.858588646609624e-05, "loss": 0.2634, "step": 931000 }, { "epoch": 42.85, "learning_rate": 2.857438586806514e-05, "loss": 0.2618, "step": 931500 }, { "epoch": 42.87, "learning_rate": 2.8562885270034046e-05, "loss": 0.26, "step": 932000 }, { "epoch": 42.9, "learning_rate": 2.8551384672002945e-05, "loss": 0.2634, "step": 932500 }, { "epoch": 42.92, "learning_rate": 2.8539884073971845e-05, "loss": 0.2635, "step": 933000 }, { "epoch": 42.94, "learning_rate": 2.8528383475940752e-05, "loss": 0.2678, "step": 933500 }, { "epoch": 42.97, "learning_rate": 2.851688287790965e-05, "loss": 0.2683, "step": 934000 }, { "epoch": 42.99, "learning_rate": 2.8505382279878555e-05, "loss": 0.2667, "step": 934500 }, { "epoch": 43.01, "learning_rate": 2.8493881681847455e-05, "loss": 0.2492, "step": 935000 }, { "epoch": 43.04, "learning_rate": 2.848238108381636e-05, "loss": 0.2417, "step": 935500 }, { "epoch": 43.06, "learning_rate": 2.847088048578526e-05, "loss": 0.2431, "step": 936000 }, { "epoch": 43.08, "learning_rate": 2.8459379887754168e-05, "loss": 0.2423, "step": 936500 }, { "epoch": 43.1, "learning_rate": 2.8447879289723068e-05, "loss": 0.2425, "step": 937000 }, { "epoch": 43.13, "learning_rate": 2.8436378691691968e-05, "loss": 0.2434, "step": 937500 }, { "epoch": 43.15, "learning_rate": 2.8424878093660874e-05, "loss": 0.2431, "step": 938000 }, { "epoch": 43.17, "learning_rate": 2.8413377495629774e-05, "loss": 0.2463, "step": 938500 }, { "epoch": 43.2, "learning_rate": 2.8401876897598677e-05, "loss": 0.25, "step": 939000 }, { "epoch": 43.22, "learning_rate": 2.8390376299567577e-05, "loss": 0.2442, "step": 939500 }, { "epoch": 43.24, "learning_rate": 2.8378875701536484e-05, "loss": 0.2485, "step": 940000 }, { "epoch": 43.27, "learning_rate": 2.8367375103505384e-05, "loss": 0.2503, "step": 940500 }, { "epoch": 43.29, "learning_rate": 2.8355874505474283e-05, "loss": 0.2529, "step": 941000 }, { "epoch": 43.31, "learning_rate": 2.834437390744319e-05, "loss": 0.2442, "step": 941500 }, { "epoch": 43.33, "learning_rate": 2.833287330941209e-05, "loss": 0.2465, "step": 942000 }, { "epoch": 43.36, "learning_rate": 2.8321372711380993e-05, "loss": 0.252, "step": 942500 }, { "epoch": 43.38, "learning_rate": 2.8309872113349893e-05, "loss": 0.2519, "step": 943000 }, { "epoch": 43.4, "learning_rate": 2.82983715153188e-05, "loss": 0.2566, "step": 943500 }, { "epoch": 43.43, "learning_rate": 2.82868709172877e-05, "loss": 0.255, "step": 944000 }, { "epoch": 43.45, "learning_rate": 2.82753703192566e-05, "loss": 0.2564, "step": 944500 }, { "epoch": 43.47, "learning_rate": 2.8263869721225506e-05, "loss": 0.252, "step": 945000 }, { "epoch": 43.5, "learning_rate": 2.8252369123194406e-05, "loss": 0.2595, "step": 945500 }, { "epoch": 43.52, "learning_rate": 2.8240868525163312e-05, "loss": 0.2528, "step": 946000 }, { "epoch": 43.54, "learning_rate": 2.8229367927132212e-05, "loss": 0.2539, "step": 946500 }, { "epoch": 43.56, "learning_rate": 2.8217867329101115e-05, "loss": 0.2539, "step": 947000 }, { "epoch": 43.59, "learning_rate": 2.8206366731070015e-05, "loss": 0.2621, "step": 947500 }, { "epoch": 43.61, "learning_rate": 2.8194866133038922e-05, "loss": 0.2513, "step": 948000 }, { "epoch": 43.63, "learning_rate": 2.8183365535007822e-05, "loss": 0.2599, "step": 948500 }, { "epoch": 43.66, "learning_rate": 2.817186493697672e-05, "loss": 0.2611, "step": 949000 }, { "epoch": 43.68, "learning_rate": 2.8160364338945628e-05, "loss": 0.2636, "step": 949500 }, { "epoch": 43.7, "learning_rate": 2.8148863740914528e-05, "loss": 0.2642, "step": 950000 }, { "epoch": 43.73, "learning_rate": 2.813736314288343e-05, "loss": 0.2636, "step": 950500 }, { "epoch": 43.75, "learning_rate": 2.812586254485233e-05, "loss": 0.2591, "step": 951000 }, { "epoch": 43.77, "learning_rate": 2.8114361946821238e-05, "loss": 0.2612, "step": 951500 }, { "epoch": 43.79, "learning_rate": 2.8102861348790138e-05, "loss": 0.2574, "step": 952000 }, { "epoch": 43.82, "learning_rate": 2.8091360750759037e-05, "loss": 0.2573, "step": 952500 }, { "epoch": 43.84, "learning_rate": 2.8079860152727944e-05, "loss": 0.2619, "step": 953000 }, { "epoch": 43.86, "learning_rate": 2.8068359554696844e-05, "loss": 0.2607, "step": 953500 }, { "epoch": 43.89, "learning_rate": 2.805685895666575e-05, "loss": 0.2623, "step": 954000 }, { "epoch": 43.91, "learning_rate": 2.804535835863465e-05, "loss": 0.267, "step": 954500 }, { "epoch": 43.93, "learning_rate": 2.8033857760603554e-05, "loss": 0.2613, "step": 955000 }, { "epoch": 43.96, "learning_rate": 2.8022357162572453e-05, "loss": 0.2669, "step": 955500 }, { "epoch": 43.98, "learning_rate": 2.801085656454136e-05, "loss": 0.2641, "step": 956000 }, { "epoch": 44.0, "learning_rate": 2.799935596651026e-05, "loss": 0.2603, "step": 956500 }, { "epoch": 44.02, "learning_rate": 2.798785536847916e-05, "loss": 0.2381, "step": 957000 }, { "epoch": 44.05, "learning_rate": 2.7976354770448066e-05, "loss": 0.2359, "step": 957500 }, { "epoch": 44.07, "learning_rate": 2.7964854172416966e-05, "loss": 0.2368, "step": 958000 }, { "epoch": 44.09, "learning_rate": 2.795335357438587e-05, "loss": 0.2449, "step": 958500 }, { "epoch": 44.12, "learning_rate": 2.794185297635477e-05, "loss": 0.2466, "step": 959000 }, { "epoch": 44.14, "learning_rate": 2.7930352378323676e-05, "loss": 0.2463, "step": 959500 }, { "epoch": 44.16, "learning_rate": 2.7918851780292576e-05, "loss": 0.245, "step": 960000 }, { "epoch": 44.19, "learning_rate": 2.7907351182261476e-05, "loss": 0.2467, "step": 960500 }, { "epoch": 44.21, "learning_rate": 2.7895850584230382e-05, "loss": 0.247, "step": 961000 }, { "epoch": 44.23, "learning_rate": 2.7884349986199282e-05, "loss": 0.2463, "step": 961500 }, { "epoch": 44.25, "learning_rate": 2.787284938816819e-05, "loss": 0.2478, "step": 962000 }, { "epoch": 44.28, "learning_rate": 2.786134879013709e-05, "loss": 0.2498, "step": 962500 }, { "epoch": 44.3, "learning_rate": 2.7849848192105992e-05, "loss": 0.2472, "step": 963000 }, { "epoch": 44.32, "learning_rate": 2.783834759407489e-05, "loss": 0.2529, "step": 963500 }, { "epoch": 44.35, "learning_rate": 2.7826846996043798e-05, "loss": 0.2542, "step": 964000 }, { "epoch": 44.37, "learning_rate": 2.7815346398012698e-05, "loss": 0.2471, "step": 964500 }, { "epoch": 44.39, "learning_rate": 2.7803845799981598e-05, "loss": 0.248, "step": 965000 }, { "epoch": 44.42, "learning_rate": 2.7792345201950505e-05, "loss": 0.2554, "step": 965500 }, { "epoch": 44.44, "learning_rate": 2.7780844603919404e-05, "loss": 0.2541, "step": 966000 }, { "epoch": 44.46, "learning_rate": 2.7769344005888308e-05, "loss": 0.2461, "step": 966500 }, { "epoch": 44.48, "learning_rate": 2.7757843407857208e-05, "loss": 0.2518, "step": 967000 }, { "epoch": 44.51, "learning_rate": 2.7746342809826114e-05, "loss": 0.2537, "step": 967500 }, { "epoch": 44.53, "learning_rate": 2.7734842211795014e-05, "loss": 0.2552, "step": 968000 }, { "epoch": 44.55, "learning_rate": 2.7723341613763914e-05, "loss": 0.2535, "step": 968500 }, { "epoch": 44.58, "learning_rate": 2.771184101573282e-05, "loss": 0.2602, "step": 969000 }, { "epoch": 44.6, "learning_rate": 2.770034041770172e-05, "loss": 0.2568, "step": 969500 }, { "epoch": 44.62, "learning_rate": 2.7688839819670627e-05, "loss": 0.2599, "step": 970000 }, { "epoch": 44.65, "learning_rate": 2.7677339221639527e-05, "loss": 0.2554, "step": 970500 }, { "epoch": 44.67, "learning_rate": 2.766583862360843e-05, "loss": 0.252, "step": 971000 }, { "epoch": 44.69, "learning_rate": 2.765433802557733e-05, "loss": 0.2566, "step": 971500 }, { "epoch": 44.71, "learning_rate": 2.764283742754623e-05, "loss": 0.2588, "step": 972000 }, { "epoch": 44.74, "learning_rate": 2.7631336829515136e-05, "loss": 0.2555, "step": 972500 }, { "epoch": 44.76, "learning_rate": 2.7619836231484036e-05, "loss": 0.2588, "step": 973000 }, { "epoch": 44.78, "learning_rate": 2.7608335633452943e-05, "loss": 0.2573, "step": 973500 }, { "epoch": 44.81, "learning_rate": 2.7596835035421843e-05, "loss": 0.2589, "step": 974000 }, { "epoch": 44.83, "learning_rate": 2.7585334437390746e-05, "loss": 0.2594, "step": 974500 }, { "epoch": 44.85, "learning_rate": 2.7573833839359646e-05, "loss": 0.257, "step": 975000 }, { "epoch": 44.88, "learning_rate": 2.7562333241328552e-05, "loss": 0.256, "step": 975500 }, { "epoch": 44.9, "learning_rate": 2.7550832643297452e-05, "loss": 0.2547, "step": 976000 }, { "epoch": 44.92, "learning_rate": 2.7539332045266352e-05, "loss": 0.2585, "step": 976500 }, { "epoch": 44.94, "learning_rate": 2.752783144723526e-05, "loss": 0.2559, "step": 977000 }, { "epoch": 44.97, "learning_rate": 2.751633084920416e-05, "loss": 0.2624, "step": 977500 }, { "epoch": 44.99, "learning_rate": 2.7504830251173065e-05, "loss": 0.2591, "step": 978000 }, { "epoch": 45.01, "learning_rate": 2.7493329653141965e-05, "loss": 0.2466, "step": 978500 }, { "epoch": 45.04, "learning_rate": 2.7481829055110868e-05, "loss": 0.2319, "step": 979000 }, { "epoch": 45.06, "learning_rate": 2.7470328457079768e-05, "loss": 0.2369, "step": 979500 }, { "epoch": 45.08, "learning_rate": 2.7458827859048668e-05, "loss": 0.2345, "step": 980000 }, { "epoch": 45.11, "learning_rate": 2.7447327261017575e-05, "loss": 0.2425, "step": 980500 }, { "epoch": 45.13, "learning_rate": 2.7435826662986474e-05, "loss": 0.246, "step": 981000 }, { "epoch": 45.15, "learning_rate": 2.742432606495538e-05, "loss": 0.2341, "step": 981500 }, { "epoch": 45.17, "learning_rate": 2.741282546692428e-05, "loss": 0.2466, "step": 982000 }, { "epoch": 45.2, "learning_rate": 2.7401324868893184e-05, "loss": 0.2461, "step": 982500 }, { "epoch": 45.22, "learning_rate": 2.7389824270862084e-05, "loss": 0.2408, "step": 983000 }, { "epoch": 45.24, "learning_rate": 2.737832367283099e-05, "loss": 0.2457, "step": 983500 }, { "epoch": 45.27, "learning_rate": 2.736682307479989e-05, "loss": 0.2426, "step": 984000 }, { "epoch": 45.29, "learning_rate": 2.735532247676879e-05, "loss": 0.2456, "step": 984500 }, { "epoch": 45.31, "learning_rate": 2.7343821878737697e-05, "loss": 0.2472, "step": 985000 }, { "epoch": 45.34, "learning_rate": 2.7332321280706597e-05, "loss": 0.2511, "step": 985500 }, { "epoch": 45.36, "learning_rate": 2.7320820682675503e-05, "loss": 0.2498, "step": 986000 }, { "epoch": 45.38, "learning_rate": 2.7309320084644403e-05, "loss": 0.2498, "step": 986500 }, { "epoch": 45.4, "learning_rate": 2.7297819486613306e-05, "loss": 0.2532, "step": 987000 }, { "epoch": 45.43, "learning_rate": 2.7286318888582206e-05, "loss": 0.2483, "step": 987500 }, { "epoch": 45.45, "learning_rate": 2.7274818290551106e-05, "loss": 0.2471, "step": 988000 }, { "epoch": 45.47, "learning_rate": 2.7263317692520013e-05, "loss": 0.2463, "step": 988500 }, { "epoch": 45.5, "learning_rate": 2.7251817094488913e-05, "loss": 0.2489, "step": 989000 }, { "epoch": 45.52, "learning_rate": 2.724031649645782e-05, "loss": 0.2517, "step": 989500 }, { "epoch": 45.54, "learning_rate": 2.722881589842672e-05, "loss": 0.2544, "step": 990000 }, { "epoch": 45.57, "learning_rate": 2.7217315300395622e-05, "loss": 0.2531, "step": 990500 }, { "epoch": 45.59, "learning_rate": 2.7205814702364522e-05, "loss": 0.2489, "step": 991000 }, { "epoch": 45.61, "learning_rate": 2.719431410433343e-05, "loss": 0.255, "step": 991500 }, { "epoch": 45.63, "learning_rate": 2.718281350630233e-05, "loss": 0.2485, "step": 992000 }, { "epoch": 45.66, "learning_rate": 2.717131290827123e-05, "loss": 0.2521, "step": 992500 }, { "epoch": 45.68, "learning_rate": 2.7159812310240135e-05, "loss": 0.2508, "step": 993000 }, { "epoch": 45.7, "learning_rate": 2.7148311712209035e-05, "loss": 0.2517, "step": 993500 }, { "epoch": 45.73, "learning_rate": 2.713681111417794e-05, "loss": 0.255, "step": 994000 }, { "epoch": 45.75, "learning_rate": 2.712531051614684e-05, "loss": 0.2574, "step": 994500 }, { "epoch": 45.77, "learning_rate": 2.7113809918115745e-05, "loss": 0.2555, "step": 995000 }, { "epoch": 45.8, "learning_rate": 2.7102309320084644e-05, "loss": 0.2533, "step": 995500 }, { "epoch": 45.82, "learning_rate": 2.7090808722053544e-05, "loss": 0.2596, "step": 996000 }, { "epoch": 45.84, "learning_rate": 2.707930812402245e-05, "loss": 0.2563, "step": 996500 }, { "epoch": 45.86, "learning_rate": 2.706780752599135e-05, "loss": 0.2605, "step": 997000 }, { "epoch": 45.89, "learning_rate": 2.7056306927960257e-05, "loss": 0.2554, "step": 997500 }, { "epoch": 45.91, "learning_rate": 2.7044806329929157e-05, "loss": 0.2582, "step": 998000 }, { "epoch": 45.93, "learning_rate": 2.703330573189806e-05, "loss": 0.2534, "step": 998500 }, { "epoch": 45.96, "learning_rate": 2.702180513386696e-05, "loss": 0.258, "step": 999000 }, { "epoch": 45.98, "learning_rate": 2.7010304535835867e-05, "loss": 0.2534, "step": 999500 }, { "epoch": 46.0, "learning_rate": 2.6998803937804767e-05, "loss": 0.2606, "step": 1000000 }, { "epoch": 46.03, "learning_rate": 2.6987303339773667e-05, "loss": 0.2311, "step": 1000500 }, { "epoch": 46.05, "learning_rate": 2.6975802741742573e-05, "loss": 0.2298, "step": 1001000 }, { "epoch": 46.07, "learning_rate": 2.6964302143711473e-05, "loss": 0.2385, "step": 1001500 }, { "epoch": 46.09, "learning_rate": 2.695280154568038e-05, "loss": 0.2385, "step": 1002000 }, { "epoch": 46.12, "learning_rate": 2.694130094764928e-05, "loss": 0.236, "step": 1002500 }, { "epoch": 46.14, "learning_rate": 2.6929800349618183e-05, "loss": 0.2352, "step": 1003000 }, { "epoch": 46.16, "learning_rate": 2.6918299751587083e-05, "loss": 0.2432, "step": 1003500 }, { "epoch": 46.19, "learning_rate": 2.6906799153555983e-05, "loss": 0.2412, "step": 1004000 }, { "epoch": 46.21, "learning_rate": 2.689529855552489e-05, "loss": 0.2422, "step": 1004500 }, { "epoch": 46.23, "learning_rate": 2.688379795749379e-05, "loss": 0.2419, "step": 1005000 }, { "epoch": 46.26, "learning_rate": 2.6872297359462696e-05, "loss": 0.2432, "step": 1005500 }, { "epoch": 46.28, "learning_rate": 2.6860796761431595e-05, "loss": 0.2461, "step": 1006000 }, { "epoch": 46.3, "learning_rate": 2.68492961634005e-05, "loss": 0.2417, "step": 1006500 }, { "epoch": 46.32, "learning_rate": 2.68377955653694e-05, "loss": 0.2423, "step": 1007000 }, { "epoch": 46.35, "learning_rate": 2.6826294967338302e-05, "loss": 0.2432, "step": 1007500 }, { "epoch": 46.37, "learning_rate": 2.6814794369307205e-05, "loss": 0.2415, "step": 1008000 }, { "epoch": 46.39, "learning_rate": 2.6803293771276105e-05, "loss": 0.2486, "step": 1008500 }, { "epoch": 46.42, "learning_rate": 2.679179317324501e-05, "loss": 0.2448, "step": 1009000 }, { "epoch": 46.44, "learning_rate": 2.678029257521391e-05, "loss": 0.2529, "step": 1009500 }, { "epoch": 46.46, "learning_rate": 2.6768791977182818e-05, "loss": 0.2437, "step": 1010000 }, { "epoch": 46.49, "learning_rate": 2.6757291379151718e-05, "loss": 0.2509, "step": 1010500 }, { "epoch": 46.51, "learning_rate": 2.674579078112062e-05, "loss": 0.25, "step": 1011000 }, { "epoch": 46.53, "learning_rate": 2.673429018308952e-05, "loss": 0.2506, "step": 1011500 }, { "epoch": 46.55, "learning_rate": 2.672278958505842e-05, "loss": 0.2467, "step": 1012000 }, { "epoch": 46.58, "learning_rate": 2.6711288987027327e-05, "loss": 0.2506, "step": 1012500 }, { "epoch": 46.6, "learning_rate": 2.6699788388996227e-05, "loss": 0.2481, "step": 1013000 }, { "epoch": 46.62, "learning_rate": 2.6688287790965134e-05, "loss": 0.2547, "step": 1013500 }, { "epoch": 46.65, "learning_rate": 2.6676787192934034e-05, "loss": 0.2474, "step": 1014000 }, { "epoch": 46.67, "learning_rate": 2.6665286594902937e-05, "loss": 0.2566, "step": 1014500 }, { "epoch": 46.69, "learning_rate": 2.665378599687184e-05, "loss": 0.2503, "step": 1015000 }, { "epoch": 46.72, "learning_rate": 2.664228539884074e-05, "loss": 0.2493, "step": 1015500 }, { "epoch": 46.74, "learning_rate": 2.6630784800809643e-05, "loss": 0.2526, "step": 1016000 }, { "epoch": 46.76, "learning_rate": 2.6619284202778543e-05, "loss": 0.2566, "step": 1016500 }, { "epoch": 46.78, "learning_rate": 2.660778360474745e-05, "loss": 0.2553, "step": 1017000 }, { "epoch": 46.81, "learning_rate": 2.659628300671635e-05, "loss": 0.2567, "step": 1017500 }, { "epoch": 46.83, "learning_rate": 2.6584782408685256e-05, "loss": 0.2571, "step": 1018000 }, { "epoch": 46.85, "learning_rate": 2.6573281810654156e-05, "loss": 0.2569, "step": 1018500 }, { "epoch": 46.88, "learning_rate": 2.656178121262306e-05, "loss": 0.2494, "step": 1019000 }, { "epoch": 46.9, "learning_rate": 2.655028061459196e-05, "loss": 0.2537, "step": 1019500 }, { "epoch": 46.92, "learning_rate": 2.653878001656086e-05, "loss": 0.2531, "step": 1020000 }, { "epoch": 46.95, "learning_rate": 2.6527279418529766e-05, "loss": 0.2609, "step": 1020500 }, { "epoch": 46.97, "learning_rate": 2.6515778820498665e-05, "loss": 0.254, "step": 1021000 }, { "epoch": 46.99, "learning_rate": 2.6504278222467572e-05, "loss": 0.2571, "step": 1021500 }, { "epoch": 47.01, "learning_rate": 2.6492777624436472e-05, "loss": 0.2364, "step": 1022000 }, { "epoch": 47.04, "learning_rate": 2.6481277026405375e-05, "loss": 0.2294, "step": 1022500 }, { "epoch": 47.06, "learning_rate": 2.6469776428374278e-05, "loss": 0.2282, "step": 1023000 }, { "epoch": 47.08, "learning_rate": 2.6458275830343178e-05, "loss": 0.2325, "step": 1023500 }, { "epoch": 47.11, "learning_rate": 2.644677523231208e-05, "loss": 0.2347, "step": 1024000 }, { "epoch": 47.13, "learning_rate": 2.643527463428098e-05, "loss": 0.236, "step": 1024500 }, { "epoch": 47.15, "learning_rate": 2.6423774036249888e-05, "loss": 0.2371, "step": 1025000 }, { "epoch": 47.18, "learning_rate": 2.6412273438218788e-05, "loss": 0.2408, "step": 1025500 }, { "epoch": 47.2, "learning_rate": 2.6400772840187694e-05, "loss": 0.2398, "step": 1026000 }, { "epoch": 47.22, "learning_rate": 2.6389272242156594e-05, "loss": 0.2357, "step": 1026500 }, { "epoch": 47.24, "learning_rate": 2.6377771644125497e-05, "loss": 0.2423, "step": 1027000 }, { "epoch": 47.27, "learning_rate": 2.6366271046094397e-05, "loss": 0.2433, "step": 1027500 }, { "epoch": 47.29, "learning_rate": 2.6354770448063297e-05, "loss": 0.2443, "step": 1028000 }, { "epoch": 47.31, "learning_rate": 2.6343269850032204e-05, "loss": 0.2414, "step": 1028500 }, { "epoch": 47.34, "learning_rate": 2.6331769252001104e-05, "loss": 0.2434, "step": 1029000 }, { "epoch": 47.36, "learning_rate": 2.632026865397001e-05, "loss": 0.2412, "step": 1029500 }, { "epoch": 47.38, "learning_rate": 2.630876805593891e-05, "loss": 0.2448, "step": 1030000 }, { "epoch": 47.41, "learning_rate": 2.6297267457907813e-05, "loss": 0.248, "step": 1030500 }, { "epoch": 47.43, "learning_rate": 2.6285766859876717e-05, "loss": 0.2517, "step": 1031000 }, { "epoch": 47.45, "learning_rate": 2.6274266261845616e-05, "loss": 0.2497, "step": 1031500 }, { "epoch": 47.47, "learning_rate": 2.626276566381452e-05, "loss": 0.2454, "step": 1032000 }, { "epoch": 47.5, "learning_rate": 2.625126506578342e-05, "loss": 0.2403, "step": 1032500 }, { "epoch": 47.52, "learning_rate": 2.6239764467752326e-05, "loss": 0.2472, "step": 1033000 }, { "epoch": 47.54, "learning_rate": 2.6228263869721226e-05, "loss": 0.2498, "step": 1033500 }, { "epoch": 47.57, "learning_rate": 2.6216763271690133e-05, "loss": 0.2493, "step": 1034000 }, { "epoch": 47.59, "learning_rate": 2.6205262673659032e-05, "loss": 0.2507, "step": 1034500 }, { "epoch": 47.61, "learning_rate": 2.6193762075627932e-05, "loss": 0.2487, "step": 1035000 }, { "epoch": 47.64, "learning_rate": 2.6182261477596835e-05, "loss": 0.2474, "step": 1035500 }, { "epoch": 47.66, "learning_rate": 2.6170760879565735e-05, "loss": 0.2473, "step": 1036000 }, { "epoch": 47.68, "learning_rate": 2.6159260281534642e-05, "loss": 0.2463, "step": 1036500 }, { "epoch": 47.7, "learning_rate": 2.6147759683503542e-05, "loss": 0.2534, "step": 1037000 }, { "epoch": 47.73, "learning_rate": 2.613625908547245e-05, "loss": 0.2506, "step": 1037500 }, { "epoch": 47.75, "learning_rate": 2.6124758487441348e-05, "loss": 0.2514, "step": 1038000 }, { "epoch": 47.77, "learning_rate": 2.611325788941025e-05, "loss": 0.2504, "step": 1038500 }, { "epoch": 47.8, "learning_rate": 2.6101757291379155e-05, "loss": 0.2505, "step": 1039000 }, { "epoch": 47.82, "learning_rate": 2.6090256693348055e-05, "loss": 0.2507, "step": 1039500 }, { "epoch": 47.84, "learning_rate": 2.6078756095316958e-05, "loss": 0.2484, "step": 1040000 }, { "epoch": 47.87, "learning_rate": 2.6067255497285858e-05, "loss": 0.2496, "step": 1040500 }, { "epoch": 47.89, "learning_rate": 2.6055754899254764e-05, "loss": 0.2542, "step": 1041000 }, { "epoch": 47.91, "learning_rate": 2.6044254301223664e-05, "loss": 0.2505, "step": 1041500 }, { "epoch": 47.93, "learning_rate": 2.603275370319257e-05, "loss": 0.2596, "step": 1042000 }, { "epoch": 47.96, "learning_rate": 2.602125310516147e-05, "loss": 0.2531, "step": 1042500 }, { "epoch": 47.98, "learning_rate": 2.600975250713037e-05, "loss": 0.2515, "step": 1043000 }, { "epoch": 48.0, "learning_rate": 2.5998251909099274e-05, "loss": 0.2478, "step": 1043500 }, { "epoch": 48.03, "learning_rate": 2.5986751311068173e-05, "loss": 0.2295, "step": 1044000 }, { "epoch": 48.05, "learning_rate": 2.597525071303708e-05, "loss": 0.2278, "step": 1044500 }, { "epoch": 48.07, "learning_rate": 2.596375011500598e-05, "loss": 0.2271, "step": 1045000 }, { "epoch": 48.1, "learning_rate": 2.5952249516974887e-05, "loss": 0.2339, "step": 1045500 }, { "epoch": 48.12, "learning_rate": 2.5940748918943786e-05, "loss": 0.2352, "step": 1046000 }, { "epoch": 48.14, "learning_rate": 2.592924832091269e-05, "loss": 0.2301, "step": 1046500 }, { "epoch": 48.16, "learning_rate": 2.5917747722881593e-05, "loss": 0.2422, "step": 1047000 }, { "epoch": 48.19, "learning_rate": 2.5906247124850493e-05, "loss": 0.2375, "step": 1047500 }, { "epoch": 48.21, "learning_rate": 2.5894746526819396e-05, "loss": 0.2398, "step": 1048000 }, { "epoch": 48.23, "learning_rate": 2.5883245928788296e-05, "loss": 0.2367, "step": 1048500 }, { "epoch": 48.26, "learning_rate": 2.5871745330757202e-05, "loss": 0.2432, "step": 1049000 }, { "epoch": 48.28, "learning_rate": 2.5860244732726102e-05, "loss": 0.234, "step": 1049500 }, { "epoch": 48.3, "learning_rate": 2.584874413469501e-05, "loss": 0.2376, "step": 1050000 }, { "epoch": 48.33, "learning_rate": 2.583724353666391e-05, "loss": 0.2392, "step": 1050500 }, { "epoch": 48.35, "learning_rate": 2.582574293863281e-05, "loss": 0.2433, "step": 1051000 }, { "epoch": 48.37, "learning_rate": 2.5814242340601712e-05, "loss": 0.2447, "step": 1051500 }, { "epoch": 48.39, "learning_rate": 2.580274174257061e-05, "loss": 0.2399, "step": 1052000 }, { "epoch": 48.42, "learning_rate": 2.579124114453952e-05, "loss": 0.2404, "step": 1052500 }, { "epoch": 48.44, "learning_rate": 2.5779740546508418e-05, "loss": 0.2456, "step": 1053000 }, { "epoch": 48.46, "learning_rate": 2.5768239948477325e-05, "loss": 0.2437, "step": 1053500 }, { "epoch": 48.49, "learning_rate": 2.5756739350446225e-05, "loss": 0.2448, "step": 1054000 }, { "epoch": 48.51, "learning_rate": 2.5745238752415128e-05, "loss": 0.2437, "step": 1054500 }, { "epoch": 48.53, "learning_rate": 2.573373815438403e-05, "loss": 0.2415, "step": 1055000 }, { "epoch": 48.56, "learning_rate": 2.572223755635293e-05, "loss": 0.2485, "step": 1055500 }, { "epoch": 48.58, "learning_rate": 2.5710736958321834e-05, "loss": 0.2481, "step": 1056000 }, { "epoch": 48.6, "learning_rate": 2.5699236360290734e-05, "loss": 0.2429, "step": 1056500 }, { "epoch": 48.62, "learning_rate": 2.568773576225964e-05, "loss": 0.254, "step": 1057000 }, { "epoch": 48.65, "learning_rate": 2.567623516422854e-05, "loss": 0.2449, "step": 1057500 }, { "epoch": 48.67, "learning_rate": 2.5664734566197447e-05, "loss": 0.246, "step": 1058000 }, { "epoch": 48.69, "learning_rate": 2.5653233968166347e-05, "loss": 0.2474, "step": 1058500 }, { "epoch": 48.72, "learning_rate": 2.5641733370135247e-05, "loss": 0.2495, "step": 1059000 }, { "epoch": 48.74, "learning_rate": 2.563023277210415e-05, "loss": 0.2476, "step": 1059500 }, { "epoch": 48.76, "learning_rate": 2.561873217407305e-05, "loss": 0.2481, "step": 1060000 }, { "epoch": 48.79, "learning_rate": 2.5607231576041957e-05, "loss": 0.2468, "step": 1060500 }, { "epoch": 48.81, "learning_rate": 2.5595730978010856e-05, "loss": 0.2503, "step": 1061000 }, { "epoch": 48.83, "learning_rate": 2.5584230379979763e-05, "loss": 0.2442, "step": 1061500 }, { "epoch": 48.85, "learning_rate": 2.5572729781948663e-05, "loss": 0.2498, "step": 1062000 }, { "epoch": 48.88, "learning_rate": 2.5561229183917566e-05, "loss": 0.2451, "step": 1062500 }, { "epoch": 48.9, "learning_rate": 2.554972858588647e-05, "loss": 0.254, "step": 1063000 }, { "epoch": 48.92, "learning_rate": 2.553822798785537e-05, "loss": 0.251, "step": 1063500 }, { "epoch": 48.95, "learning_rate": 2.5526727389824272e-05, "loss": 0.2492, "step": 1064000 }, { "epoch": 48.97, "learning_rate": 2.5515226791793172e-05, "loss": 0.249, "step": 1064500 }, { "epoch": 48.99, "learning_rate": 2.550372619376208e-05, "loss": 0.254, "step": 1065000 }, { "epoch": 49.02, "learning_rate": 2.549222559573098e-05, "loss": 0.2284, "step": 1065500 }, { "epoch": 49.04, "learning_rate": 2.5480724997699885e-05, "loss": 0.2292, "step": 1066000 }, { "epoch": 49.06, "learning_rate": 2.5469224399668785e-05, "loss": 0.2281, "step": 1066500 }, { "epoch": 49.08, "learning_rate": 2.5457723801637685e-05, "loss": 0.2298, "step": 1067000 }, { "epoch": 49.11, "learning_rate": 2.5446223203606588e-05, "loss": 0.2347, "step": 1067500 }, { "epoch": 49.13, "learning_rate": 2.5434722605575488e-05, "loss": 0.2352, "step": 1068000 }, { "epoch": 49.15, "learning_rate": 2.5423222007544395e-05, "loss": 0.2336, "step": 1068500 }, { "epoch": 49.18, "learning_rate": 2.5411721409513295e-05, "loss": 0.2355, "step": 1069000 }, { "epoch": 49.2, "learning_rate": 2.54002208114822e-05, "loss": 0.2341, "step": 1069500 }, { "epoch": 49.22, "learning_rate": 2.53887202134511e-05, "loss": 0.23, "step": 1070000 }, { "epoch": 49.25, "learning_rate": 2.537721961542e-05, "loss": 0.2405, "step": 1070500 }, { "epoch": 49.27, "learning_rate": 2.5365719017388907e-05, "loss": 0.2354, "step": 1071000 }, { "epoch": 49.29, "learning_rate": 2.5354218419357807e-05, "loss": 0.231, "step": 1071500 }, { "epoch": 49.31, "learning_rate": 2.534271782132671e-05, "loss": 0.2409, "step": 1072000 }, { "epoch": 49.34, "learning_rate": 2.533121722329561e-05, "loss": 0.238, "step": 1072500 }, { "epoch": 49.36, "learning_rate": 2.5319716625264517e-05, "loss": 0.2418, "step": 1073000 }, { "epoch": 49.38, "learning_rate": 2.5308216027233417e-05, "loss": 0.239, "step": 1073500 }, { "epoch": 49.41, "learning_rate": 2.5296715429202324e-05, "loss": 0.238, "step": 1074000 }, { "epoch": 49.43, "learning_rate": 2.5285214831171223e-05, "loss": 0.2387, "step": 1074500 }, { "epoch": 49.45, "learning_rate": 2.5273714233140123e-05, "loss": 0.246, "step": 1075000 }, { "epoch": 49.48, "learning_rate": 2.5262213635109026e-05, "loss": 0.2463, "step": 1075500 }, { "epoch": 49.5, "learning_rate": 2.5250713037077926e-05, "loss": 0.2428, "step": 1076000 }, { "epoch": 49.52, "learning_rate": 2.5239212439046833e-05, "loss": 0.2474, "step": 1076500 }, { "epoch": 49.54, "learning_rate": 2.5227711841015733e-05, "loss": 0.2426, "step": 1077000 }, { "epoch": 49.57, "learning_rate": 2.521621124298464e-05, "loss": 0.249, "step": 1077500 }, { "epoch": 49.59, "learning_rate": 2.520471064495354e-05, "loss": 0.2468, "step": 1078000 }, { "epoch": 49.61, "learning_rate": 2.519321004692244e-05, "loss": 0.2458, "step": 1078500 }, { "epoch": 49.64, "learning_rate": 2.5181709448891346e-05, "loss": 0.2454, "step": 1079000 }, { "epoch": 49.66, "learning_rate": 2.5170208850860246e-05, "loss": 0.2435, "step": 1079500 }, { "epoch": 49.68, "learning_rate": 2.515870825282915e-05, "loss": 0.2479, "step": 1080000 }, { "epoch": 49.71, "learning_rate": 2.514720765479805e-05, "loss": 0.2421, "step": 1080500 }, { "epoch": 49.73, "learning_rate": 2.5135707056766955e-05, "loss": 0.2494, "step": 1081000 }, { "epoch": 49.75, "learning_rate": 2.5124206458735855e-05, "loss": 0.2444, "step": 1081500 }, { "epoch": 49.77, "learning_rate": 2.5112705860704762e-05, "loss": 0.2459, "step": 1082000 }, { "epoch": 49.8, "learning_rate": 2.510120526267366e-05, "loss": 0.2479, "step": 1082500 }, { "epoch": 49.82, "learning_rate": 2.508970466464256e-05, "loss": 0.2462, "step": 1083000 }, { "epoch": 49.84, "learning_rate": 2.5078204066611465e-05, "loss": 0.2466, "step": 1083500 }, { "epoch": 49.87, "learning_rate": 2.5066703468580364e-05, "loss": 0.2482, "step": 1084000 }, { "epoch": 49.89, "learning_rate": 2.505520287054927e-05, "loss": 0.2521, "step": 1084500 }, { "epoch": 49.91, "learning_rate": 2.504370227251817e-05, "loss": 0.2484, "step": 1085000 }, { "epoch": 49.94, "learning_rate": 2.5032201674487078e-05, "loss": 0.25, "step": 1085500 }, { "epoch": 49.96, "learning_rate": 2.5020701076455977e-05, "loss": 0.2467, "step": 1086000 }, { "epoch": 49.98, "learning_rate": 2.5009200478424877e-05, "loss": 0.2468, "step": 1086500 }, { "epoch": 50.0, "learning_rate": 2.4997699880393784e-05, "loss": 0.2443, "step": 1087000 }, { "epoch": 50.03, "learning_rate": 2.4986199282362684e-05, "loss": 0.2288, "step": 1087500 }, { "epoch": 50.05, "learning_rate": 2.4974698684331587e-05, "loss": 0.2308, "step": 1088000 }, { "epoch": 50.07, "learning_rate": 2.4963198086300487e-05, "loss": 0.235, "step": 1088500 }, { "epoch": 50.1, "learning_rate": 2.495169748826939e-05, "loss": 0.2258, "step": 1089000 }, { "epoch": 50.12, "learning_rate": 2.4940196890238293e-05, "loss": 0.2268, "step": 1089500 }, { "epoch": 50.14, "learning_rate": 2.4928696292207197e-05, "loss": 0.2274, "step": 1090000 }, { "epoch": 50.17, "learning_rate": 2.49171956941761e-05, "loss": 0.2334, "step": 1090500 }, { "epoch": 50.19, "learning_rate": 2.4905695096145003e-05, "loss": 0.2357, "step": 1091000 }, { "epoch": 50.21, "learning_rate": 2.4894194498113903e-05, "loss": 0.2306, "step": 1091500 }, { "epoch": 50.23, "learning_rate": 2.4882693900082806e-05, "loss": 0.2383, "step": 1092000 }, { "epoch": 50.26, "learning_rate": 2.4871193302051706e-05, "loss": 0.2392, "step": 1092500 }, { "epoch": 50.28, "learning_rate": 2.485969270402061e-05, "loss": 0.2355, "step": 1093000 }, { "epoch": 50.3, "learning_rate": 2.4848192105989512e-05, "loss": 0.2347, "step": 1093500 }, { "epoch": 50.33, "learning_rate": 2.4836691507958416e-05, "loss": 0.2381, "step": 1094000 }, { "epoch": 50.35, "learning_rate": 2.482519090992732e-05, "loss": 0.237, "step": 1094500 }, { "epoch": 50.37, "learning_rate": 2.4813690311896222e-05, "loss": 0.2426, "step": 1095000 }, { "epoch": 50.4, "learning_rate": 2.4802189713865122e-05, "loss": 0.2422, "step": 1095500 }, { "epoch": 50.42, "learning_rate": 2.4790689115834022e-05, "loss": 0.2395, "step": 1096000 }, { "epoch": 50.44, "learning_rate": 2.4779188517802925e-05, "loss": 0.2368, "step": 1096500 }, { "epoch": 50.46, "learning_rate": 2.4767687919771828e-05, "loss": 0.2386, "step": 1097000 }, { "epoch": 50.49, "learning_rate": 2.475618732174073e-05, "loss": 0.2408, "step": 1097500 }, { "epoch": 50.51, "learning_rate": 2.4744686723709635e-05, "loss": 0.2401, "step": 1098000 }, { "epoch": 50.53, "learning_rate": 2.4733186125678538e-05, "loss": 0.2397, "step": 1098500 }, { "epoch": 50.56, "learning_rate": 2.472168552764744e-05, "loss": 0.2401, "step": 1099000 }, { "epoch": 50.58, "learning_rate": 2.471018492961634e-05, "loss": 0.2405, "step": 1099500 }, { "epoch": 50.6, "learning_rate": 2.469868433158524e-05, "loss": 0.2434, "step": 1100000 }, { "epoch": 50.63, "learning_rate": 2.4687183733554144e-05, "loss": 0.2435, "step": 1100500 }, { "epoch": 50.65, "learning_rate": 2.4675683135523047e-05, "loss": 0.2463, "step": 1101000 }, { "epoch": 50.67, "learning_rate": 2.466418253749195e-05, "loss": 0.2439, "step": 1101500 }, { "epoch": 50.69, "learning_rate": 2.4652681939460854e-05, "loss": 0.2437, "step": 1102000 }, { "epoch": 50.72, "learning_rate": 2.4641181341429757e-05, "loss": 0.2446, "step": 1102500 }, { "epoch": 50.74, "learning_rate": 2.462968074339866e-05, "loss": 0.2449, "step": 1103000 }, { "epoch": 50.76, "learning_rate": 2.461818014536756e-05, "loss": 0.2442, "step": 1103500 }, { "epoch": 50.79, "learning_rate": 2.460667954733646e-05, "loss": 0.2461, "step": 1104000 }, { "epoch": 50.81, "learning_rate": 2.4595178949305363e-05, "loss": 0.2443, "step": 1104500 }, { "epoch": 50.83, "learning_rate": 2.4583678351274266e-05, "loss": 0.2484, "step": 1105000 }, { "epoch": 50.86, "learning_rate": 2.457217775324317e-05, "loss": 0.2483, "step": 1105500 }, { "epoch": 50.88, "learning_rate": 2.4560677155212073e-05, "loss": 0.2426, "step": 1106000 }, { "epoch": 50.9, "learning_rate": 2.4549176557180976e-05, "loss": 0.2443, "step": 1106500 }, { "epoch": 50.92, "learning_rate": 2.453767595914988e-05, "loss": 0.2449, "step": 1107000 }, { "epoch": 50.95, "learning_rate": 2.452617536111878e-05, "loss": 0.247, "step": 1107500 }, { "epoch": 50.97, "learning_rate": 2.451467476308768e-05, "loss": 0.2427, "step": 1108000 }, { "epoch": 50.99, "learning_rate": 2.4503174165056582e-05, "loss": 0.2456, "step": 1108500 }, { "epoch": 51.02, "learning_rate": 2.4491673567025486e-05, "loss": 0.2323, "step": 1109000 }, { "epoch": 51.04, "learning_rate": 2.448017296899439e-05, "loss": 0.2276, "step": 1109500 }, { "epoch": 51.06, "learning_rate": 2.4468672370963292e-05, "loss": 0.2249, "step": 1110000 }, { "epoch": 51.09, "learning_rate": 2.4457171772932195e-05, "loss": 0.2309, "step": 1110500 }, { "epoch": 51.11, "learning_rate": 2.44456711749011e-05, "loss": 0.2311, "step": 1111000 }, { "epoch": 51.13, "learning_rate": 2.443417057687e-05, "loss": 0.227, "step": 1111500 }, { "epoch": 51.15, "learning_rate": 2.4422669978838898e-05, "loss": 0.2313, "step": 1112000 }, { "epoch": 51.18, "learning_rate": 2.44111693808078e-05, "loss": 0.2345, "step": 1112500 }, { "epoch": 51.2, "learning_rate": 2.4399668782776705e-05, "loss": 0.2368, "step": 1113000 }, { "epoch": 51.22, "learning_rate": 2.4388168184745608e-05, "loss": 0.2307, "step": 1113500 }, { "epoch": 51.25, "learning_rate": 2.437666758671451e-05, "loss": 0.2366, "step": 1114000 }, { "epoch": 51.27, "learning_rate": 2.4365166988683414e-05, "loss": 0.2338, "step": 1114500 }, { "epoch": 51.29, "learning_rate": 2.4353666390652318e-05, "loss": 0.2407, "step": 1115000 }, { "epoch": 51.32, "learning_rate": 2.4342165792621217e-05, "loss": 0.233, "step": 1115500 }, { "epoch": 51.34, "learning_rate": 2.4330665194590117e-05, "loss": 0.2428, "step": 1116000 }, { "epoch": 51.36, "learning_rate": 2.431916459655902e-05, "loss": 0.2359, "step": 1116500 }, { "epoch": 51.38, "learning_rate": 2.4307663998527924e-05, "loss": 0.2337, "step": 1117000 }, { "epoch": 51.41, "learning_rate": 2.4296163400496827e-05, "loss": 0.2353, "step": 1117500 }, { "epoch": 51.43, "learning_rate": 2.428466280246573e-05, "loss": 0.2332, "step": 1118000 }, { "epoch": 51.45, "learning_rate": 2.4273162204434633e-05, "loss": 0.237, "step": 1118500 }, { "epoch": 51.48, "learning_rate": 2.4261661606403537e-05, "loss": 0.2397, "step": 1119000 }, { "epoch": 51.5, "learning_rate": 2.4250161008372437e-05, "loss": 0.2368, "step": 1119500 }, { "epoch": 51.52, "learning_rate": 2.4238660410341336e-05, "loss": 0.2433, "step": 1120000 }, { "epoch": 51.55, "learning_rate": 2.422715981231024e-05, "loss": 0.2386, "step": 1120500 }, { "epoch": 51.57, "learning_rate": 2.4215659214279143e-05, "loss": 0.2403, "step": 1121000 }, { "epoch": 51.59, "learning_rate": 2.4204158616248046e-05, "loss": 0.234, "step": 1121500 }, { "epoch": 51.61, "learning_rate": 2.419265801821695e-05, "loss": 0.2423, "step": 1122000 }, { "epoch": 51.64, "learning_rate": 2.4181157420185853e-05, "loss": 0.2439, "step": 1122500 }, { "epoch": 51.66, "learning_rate": 2.4169656822154756e-05, "loss": 0.2394, "step": 1123000 }, { "epoch": 51.68, "learning_rate": 2.4158156224123656e-05, "loss": 0.2392, "step": 1123500 }, { "epoch": 51.71, "learning_rate": 2.4146655626092555e-05, "loss": 0.2383, "step": 1124000 }, { "epoch": 51.73, "learning_rate": 2.413515502806146e-05, "loss": 0.2389, "step": 1124500 }, { "epoch": 51.75, "learning_rate": 2.4123654430030362e-05, "loss": 0.2455, "step": 1125000 }, { "epoch": 51.78, "learning_rate": 2.4112153831999265e-05, "loss": 0.2376, "step": 1125500 }, { "epoch": 51.8, "learning_rate": 2.410065323396817e-05, "loss": 0.2443, "step": 1126000 }, { "epoch": 51.82, "learning_rate": 2.408915263593707e-05, "loss": 0.2416, "step": 1126500 }, { "epoch": 51.84, "learning_rate": 2.4077652037905975e-05, "loss": 0.2438, "step": 1127000 }, { "epoch": 51.87, "learning_rate": 2.4066151439874875e-05, "loss": 0.2429, "step": 1127500 }, { "epoch": 51.89, "learning_rate": 2.4054650841843775e-05, "loss": 0.2426, "step": 1128000 }, { "epoch": 51.91, "learning_rate": 2.4043150243812678e-05, "loss": 0.2423, "step": 1128500 }, { "epoch": 51.94, "learning_rate": 2.403164964578158e-05, "loss": 0.2456, "step": 1129000 }, { "epoch": 51.96, "learning_rate": 2.4020149047750484e-05, "loss": 0.2462, "step": 1129500 }, { "epoch": 51.98, "learning_rate": 2.4008648449719387e-05, "loss": 0.2434, "step": 1130000 }, { "epoch": 52.01, "learning_rate": 2.399714785168829e-05, "loss": 0.2309, "step": 1130500 }, { "epoch": 52.03, "learning_rate": 2.3985647253657194e-05, "loss": 0.2258, "step": 1131000 }, { "epoch": 52.05, "learning_rate": 2.3974146655626094e-05, "loss": 0.2221, "step": 1131500 }, { "epoch": 52.07, "learning_rate": 2.3962646057594994e-05, "loss": 0.2297, "step": 1132000 }, { "epoch": 52.1, "learning_rate": 2.3951145459563897e-05, "loss": 0.2269, "step": 1132500 }, { "epoch": 52.12, "learning_rate": 2.39396448615328e-05, "loss": 0.2279, "step": 1133000 }, { "epoch": 52.14, "learning_rate": 2.3928144263501703e-05, "loss": 0.2265, "step": 1133500 }, { "epoch": 52.17, "learning_rate": 2.3916643665470607e-05, "loss": 0.2314, "step": 1134000 }, { "epoch": 52.19, "learning_rate": 2.390514306743951e-05, "loss": 0.2318, "step": 1134500 }, { "epoch": 52.21, "learning_rate": 2.3893642469408413e-05, "loss": 0.227, "step": 1135000 }, { "epoch": 52.24, "learning_rate": 2.3882141871377313e-05, "loss": 0.2306, "step": 1135500 }, { "epoch": 52.26, "learning_rate": 2.3870641273346213e-05, "loss": 0.2341, "step": 1136000 }, { "epoch": 52.28, "learning_rate": 2.3859140675315116e-05, "loss": 0.2347, "step": 1136500 }, { "epoch": 52.3, "learning_rate": 2.384764007728402e-05, "loss": 0.2319, "step": 1137000 }, { "epoch": 52.33, "learning_rate": 2.3836139479252922e-05, "loss": 0.2321, "step": 1137500 }, { "epoch": 52.35, "learning_rate": 2.3824638881221826e-05, "loss": 0.2336, "step": 1138000 }, { "epoch": 52.37, "learning_rate": 2.381313828319073e-05, "loss": 0.2324, "step": 1138500 }, { "epoch": 52.4, "learning_rate": 2.3801637685159632e-05, "loss": 0.235, "step": 1139000 }, { "epoch": 52.42, "learning_rate": 2.3790137087128532e-05, "loss": 0.2396, "step": 1139500 }, { "epoch": 52.44, "learning_rate": 2.3778636489097432e-05, "loss": 0.2359, "step": 1140000 }, { "epoch": 52.47, "learning_rate": 2.3767135891066335e-05, "loss": 0.2407, "step": 1140500 }, { "epoch": 52.49, "learning_rate": 2.375563529303524e-05, "loss": 0.2335, "step": 1141000 }, { "epoch": 52.51, "learning_rate": 2.374413469500414e-05, "loss": 0.2382, "step": 1141500 }, { "epoch": 52.53, "learning_rate": 2.3732634096973045e-05, "loss": 0.238, "step": 1142000 }, { "epoch": 52.56, "learning_rate": 2.3721133498941948e-05, "loss": 0.2412, "step": 1142500 }, { "epoch": 52.58, "learning_rate": 2.370963290091085e-05, "loss": 0.2391, "step": 1143000 }, { "epoch": 52.6, "learning_rate": 2.369813230287975e-05, "loss": 0.2368, "step": 1143500 }, { "epoch": 52.63, "learning_rate": 2.368663170484865e-05, "loss": 0.2384, "step": 1144000 }, { "epoch": 52.65, "learning_rate": 2.3675131106817554e-05, "loss": 0.2371, "step": 1144500 }, { "epoch": 52.67, "learning_rate": 2.3663630508786457e-05, "loss": 0.2336, "step": 1145000 }, { "epoch": 52.7, "learning_rate": 2.365212991075536e-05, "loss": 0.238, "step": 1145500 }, { "epoch": 52.72, "learning_rate": 2.3640629312724264e-05, "loss": 0.2398, "step": 1146000 }, { "epoch": 52.74, "learning_rate": 2.3629128714693167e-05, "loss": 0.2427, "step": 1146500 }, { "epoch": 52.76, "learning_rate": 2.361762811666207e-05, "loss": 0.2408, "step": 1147000 }, { "epoch": 52.79, "learning_rate": 2.360612751863097e-05, "loss": 0.2441, "step": 1147500 }, { "epoch": 52.81, "learning_rate": 2.359462692059987e-05, "loss": 0.2398, "step": 1148000 }, { "epoch": 52.83, "learning_rate": 2.3583126322568773e-05, "loss": 0.2436, "step": 1148500 }, { "epoch": 52.86, "learning_rate": 2.3571625724537677e-05, "loss": 0.2405, "step": 1149000 }, { "epoch": 52.88, "learning_rate": 2.356012512650658e-05, "loss": 0.2434, "step": 1149500 }, { "epoch": 52.9, "learning_rate": 2.3548624528475483e-05, "loss": 0.2423, "step": 1150000 }, { "epoch": 52.93, "learning_rate": 2.3537123930444386e-05, "loss": 0.2419, "step": 1150500 }, { "epoch": 52.95, "learning_rate": 2.352562333241329e-05, "loss": 0.2447, "step": 1151000 }, { "epoch": 52.97, "learning_rate": 2.351412273438219e-05, "loss": 0.2418, "step": 1151500 }, { "epoch": 52.99, "learning_rate": 2.350262213635109e-05, "loss": 0.2412, "step": 1152000 }, { "epoch": 53.02, "learning_rate": 2.3491121538319992e-05, "loss": 0.2306, "step": 1152500 }, { "epoch": 53.04, "learning_rate": 2.3479620940288896e-05, "loss": 0.222, "step": 1153000 }, { "epoch": 53.06, "learning_rate": 2.34681203422578e-05, "loss": 0.2261, "step": 1153500 }, { "epoch": 53.09, "learning_rate": 2.3456619744226702e-05, "loss": 0.2236, "step": 1154000 }, { "epoch": 53.11, "learning_rate": 2.3445119146195605e-05, "loss": 0.2242, "step": 1154500 }, { "epoch": 53.13, "learning_rate": 2.3433618548164505e-05, "loss": 0.2277, "step": 1155000 }, { "epoch": 53.16, "learning_rate": 2.342211795013341e-05, "loss": 0.2256, "step": 1155500 }, { "epoch": 53.18, "learning_rate": 2.3410617352102308e-05, "loss": 0.233, "step": 1156000 }, { "epoch": 53.2, "learning_rate": 2.339911675407121e-05, "loss": 0.2347, "step": 1156500 }, { "epoch": 53.22, "learning_rate": 2.3387616156040115e-05, "loss": 0.2282, "step": 1157000 }, { "epoch": 53.25, "learning_rate": 2.3376115558009018e-05, "loss": 0.2322, "step": 1157500 }, { "epoch": 53.27, "learning_rate": 2.336461495997792e-05, "loss": 0.2296, "step": 1158000 }, { "epoch": 53.29, "learning_rate": 2.3353114361946824e-05, "loss": 0.2283, "step": 1158500 }, { "epoch": 53.32, "learning_rate": 2.3341613763915724e-05, "loss": 0.2306, "step": 1159000 }, { "epoch": 53.34, "learning_rate": 2.3330113165884627e-05, "loss": 0.2295, "step": 1159500 }, { "epoch": 53.36, "learning_rate": 2.3318612567853527e-05, "loss": 0.2351, "step": 1160000 }, { "epoch": 53.39, "learning_rate": 2.330711196982243e-05, "loss": 0.2296, "step": 1160500 }, { "epoch": 53.41, "learning_rate": 2.3295611371791334e-05, "loss": 0.2375, "step": 1161000 }, { "epoch": 53.43, "learning_rate": 2.3284110773760237e-05, "loss": 0.2365, "step": 1161500 }, { "epoch": 53.45, "learning_rate": 2.327261017572914e-05, "loss": 0.2334, "step": 1162000 }, { "epoch": 53.48, "learning_rate": 2.3261109577698044e-05, "loss": 0.2304, "step": 1162500 }, { "epoch": 53.5, "learning_rate": 2.3249608979666943e-05, "loss": 0.2378, "step": 1163000 }, { "epoch": 53.52, "learning_rate": 2.3238108381635847e-05, "loss": 0.2329, "step": 1163500 }, { "epoch": 53.55, "learning_rate": 2.3226607783604746e-05, "loss": 0.2352, "step": 1164000 }, { "epoch": 53.57, "learning_rate": 2.321510718557365e-05, "loss": 0.2367, "step": 1164500 }, { "epoch": 53.59, "learning_rate": 2.3203606587542553e-05, "loss": 0.2335, "step": 1165000 }, { "epoch": 53.62, "learning_rate": 2.3192105989511456e-05, "loss": 0.235, "step": 1165500 }, { "epoch": 53.64, "learning_rate": 2.318060539148036e-05, "loss": 0.241, "step": 1166000 }, { "epoch": 53.66, "learning_rate": 2.3169104793449263e-05, "loss": 0.2407, "step": 1166500 }, { "epoch": 53.68, "learning_rate": 2.3157604195418162e-05, "loss": 0.2419, "step": 1167000 }, { "epoch": 53.71, "learning_rate": 2.3146103597387066e-05, "loss": 0.2375, "step": 1167500 }, { "epoch": 53.73, "learning_rate": 2.3134602999355966e-05, "loss": 0.242, "step": 1168000 }, { "epoch": 53.75, "learning_rate": 2.312310240132487e-05, "loss": 0.2377, "step": 1168500 }, { "epoch": 53.78, "learning_rate": 2.3111601803293772e-05, "loss": 0.2401, "step": 1169000 }, { "epoch": 53.8, "learning_rate": 2.3100101205262675e-05, "loss": 0.2376, "step": 1169500 }, { "epoch": 53.82, "learning_rate": 2.308860060723158e-05, "loss": 0.2381, "step": 1170000 }, { "epoch": 53.85, "learning_rate": 2.3077100009200482e-05, "loss": 0.2409, "step": 1170500 }, { "epoch": 53.87, "learning_rate": 2.306559941116938e-05, "loss": 0.2426, "step": 1171000 }, { "epoch": 53.89, "learning_rate": 2.3054098813138285e-05, "loss": 0.2398, "step": 1171500 }, { "epoch": 53.91, "learning_rate": 2.3042598215107185e-05, "loss": 0.2427, "step": 1172000 }, { "epoch": 53.94, "learning_rate": 2.3031097617076088e-05, "loss": 0.2389, "step": 1172500 }, { "epoch": 53.96, "learning_rate": 2.301959701904499e-05, "loss": 0.2427, "step": 1173000 }, { "epoch": 53.98, "learning_rate": 2.3008096421013894e-05, "loss": 0.237, "step": 1173500 }, { "epoch": 54.01, "learning_rate": 2.2996595822982798e-05, "loss": 0.2369, "step": 1174000 }, { "epoch": 54.03, "learning_rate": 2.29850952249517e-05, "loss": 0.2238, "step": 1174500 }, { "epoch": 54.05, "learning_rate": 2.29735946269206e-05, "loss": 0.2246, "step": 1175000 }, { "epoch": 54.08, "learning_rate": 2.2962094028889504e-05, "loss": 0.2199, "step": 1175500 }, { "epoch": 54.1, "learning_rate": 2.2950593430858404e-05, "loss": 0.2252, "step": 1176000 }, { "epoch": 54.12, "learning_rate": 2.2939092832827307e-05, "loss": 0.2258, "step": 1176500 }, { "epoch": 54.14, "learning_rate": 2.292759223479621e-05, "loss": 0.2203, "step": 1177000 }, { "epoch": 54.17, "learning_rate": 2.2916091636765113e-05, "loss": 0.2282, "step": 1177500 }, { "epoch": 54.19, "learning_rate": 2.2904591038734017e-05, "loss": 0.2291, "step": 1178000 }, { "epoch": 54.21, "learning_rate": 2.289309044070292e-05, "loss": 0.2292, "step": 1178500 }, { "epoch": 54.24, "learning_rate": 2.288158984267182e-05, "loss": 0.229, "step": 1179000 }, { "epoch": 54.26, "learning_rate": 2.2870089244640723e-05, "loss": 0.2313, "step": 1179500 }, { "epoch": 54.28, "learning_rate": 2.2858588646609623e-05, "loss": 0.2268, "step": 1180000 }, { "epoch": 54.31, "learning_rate": 2.2847088048578526e-05, "loss": 0.2257, "step": 1180500 }, { "epoch": 54.33, "learning_rate": 2.283558745054743e-05, "loss": 0.2284, "step": 1181000 }, { "epoch": 54.35, "learning_rate": 2.2824086852516333e-05, "loss": 0.2357, "step": 1181500 }, { "epoch": 54.37, "learning_rate": 2.2812586254485236e-05, "loss": 0.2357, "step": 1182000 }, { "epoch": 54.4, "learning_rate": 2.280108565645414e-05, "loss": 0.2306, "step": 1182500 }, { "epoch": 54.42, "learning_rate": 2.278958505842304e-05, "loss": 0.237, "step": 1183000 }, { "epoch": 54.44, "learning_rate": 2.2778084460391942e-05, "loss": 0.2341, "step": 1183500 }, { "epoch": 54.47, "learning_rate": 2.2766583862360842e-05, "loss": 0.2385, "step": 1184000 }, { "epoch": 54.49, "learning_rate": 2.2755083264329745e-05, "loss": 0.231, "step": 1184500 }, { "epoch": 54.51, "learning_rate": 2.274358266629865e-05, "loss": 0.2325, "step": 1185000 }, { "epoch": 54.54, "learning_rate": 2.273208206826755e-05, "loss": 0.2358, "step": 1185500 }, { "epoch": 54.56, "learning_rate": 2.2720581470236455e-05, "loss": 0.2354, "step": 1186000 }, { "epoch": 54.58, "learning_rate": 2.2709080872205355e-05, "loss": 0.2347, "step": 1186500 }, { "epoch": 54.6, "learning_rate": 2.2697580274174258e-05, "loss": 0.233, "step": 1187000 }, { "epoch": 54.63, "learning_rate": 2.268607967614316e-05, "loss": 0.2379, "step": 1187500 }, { "epoch": 54.65, "learning_rate": 2.267457907811206e-05, "loss": 0.2407, "step": 1188000 }, { "epoch": 54.67, "learning_rate": 2.2663078480080964e-05, "loss": 0.2376, "step": 1188500 }, { "epoch": 54.7, "learning_rate": 2.2651577882049867e-05, "loss": 0.2369, "step": 1189000 }, { "epoch": 54.72, "learning_rate": 2.264007728401877e-05, "loss": 0.2335, "step": 1189500 }, { "epoch": 54.74, "learning_rate": 2.2628576685987674e-05, "loss": 0.2357, "step": 1190000 }, { "epoch": 54.77, "learning_rate": 2.2617076087956574e-05, "loss": 0.2288, "step": 1190500 }, { "epoch": 54.79, "learning_rate": 2.2605575489925477e-05, "loss": 0.2397, "step": 1191000 }, { "epoch": 54.81, "learning_rate": 2.259407489189438e-05, "loss": 0.2374, "step": 1191500 }, { "epoch": 54.83, "learning_rate": 2.258257429386328e-05, "loss": 0.235, "step": 1192000 }, { "epoch": 54.86, "learning_rate": 2.2571073695832183e-05, "loss": 0.2366, "step": 1192500 }, { "epoch": 54.88, "learning_rate": 2.2559573097801087e-05, "loss": 0.238, "step": 1193000 }, { "epoch": 54.9, "learning_rate": 2.254807249976999e-05, "loss": 0.2341, "step": 1193500 }, { "epoch": 54.93, "learning_rate": 2.2536571901738893e-05, "loss": 0.2409, "step": 1194000 }, { "epoch": 54.95, "learning_rate": 2.2525071303707793e-05, "loss": 0.2395, "step": 1194500 }, { "epoch": 54.97, "learning_rate": 2.2513570705676696e-05, "loss": 0.2353, "step": 1195000 }, { "epoch": 55.0, "learning_rate": 2.25020701076456e-05, "loss": 0.2366, "step": 1195500 }, { "epoch": 55.02, "learning_rate": 2.24905695096145e-05, "loss": 0.2197, "step": 1196000 }, { "epoch": 55.04, "learning_rate": 2.2479068911583402e-05, "loss": 0.2193, "step": 1196500 }, { "epoch": 55.06, "learning_rate": 2.2467568313552306e-05, "loss": 0.2206, "step": 1197000 }, { "epoch": 55.09, "learning_rate": 2.245606771552121e-05, "loss": 0.2201, "step": 1197500 }, { "epoch": 55.11, "learning_rate": 2.2444567117490112e-05, "loss": 0.2236, "step": 1198000 }, { "epoch": 55.13, "learning_rate": 2.2433066519459012e-05, "loss": 0.2236, "step": 1198500 }, { "epoch": 55.16, "learning_rate": 2.2421565921427915e-05, "loss": 0.2264, "step": 1199000 }, { "epoch": 55.18, "learning_rate": 2.241006532339682e-05, "loss": 0.2295, "step": 1199500 }, { "epoch": 55.2, "learning_rate": 2.239856472536572e-05, "loss": 0.227, "step": 1200000 }, { "epoch": 55.23, "learning_rate": 2.238706412733462e-05, "loss": 0.2262, "step": 1200500 }, { "epoch": 55.25, "learning_rate": 2.2375563529303525e-05, "loss": 0.2253, "step": 1201000 }, { "epoch": 55.27, "learning_rate": 2.2364062931272428e-05, "loss": 0.2277, "step": 1201500 }, { "epoch": 55.29, "learning_rate": 2.235256233324133e-05, "loss": 0.2318, "step": 1202000 }, { "epoch": 55.32, "learning_rate": 2.234106173521023e-05, "loss": 0.2263, "step": 1202500 }, { "epoch": 55.34, "learning_rate": 2.2329561137179134e-05, "loss": 0.2328, "step": 1203000 }, { "epoch": 55.36, "learning_rate": 2.2318060539148038e-05, "loss": 0.228, "step": 1203500 }, { "epoch": 55.39, "learning_rate": 2.2306559941116937e-05, "loss": 0.2347, "step": 1204000 }, { "epoch": 55.41, "learning_rate": 2.229505934308584e-05, "loss": 0.2285, "step": 1204500 }, { "epoch": 55.43, "learning_rate": 2.2283558745054744e-05, "loss": 0.2303, "step": 1205000 }, { "epoch": 55.46, "learning_rate": 2.2272058147023647e-05, "loss": 0.2313, "step": 1205500 }, { "epoch": 55.48, "learning_rate": 2.226055754899255e-05, "loss": 0.2274, "step": 1206000 }, { "epoch": 55.5, "learning_rate": 2.224905695096145e-05, "loss": 0.2332, "step": 1206500 }, { "epoch": 55.52, "learning_rate": 2.2237556352930353e-05, "loss": 0.231, "step": 1207000 }, { "epoch": 55.55, "learning_rate": 2.2226055754899257e-05, "loss": 0.2309, "step": 1207500 }, { "epoch": 55.57, "learning_rate": 2.2214555156868157e-05, "loss": 0.2283, "step": 1208000 }, { "epoch": 55.59, "learning_rate": 2.220305455883706e-05, "loss": 0.2342, "step": 1208500 }, { "epoch": 55.62, "learning_rate": 2.2191553960805963e-05, "loss": 0.2362, "step": 1209000 }, { "epoch": 55.64, "learning_rate": 2.2180053362774866e-05, "loss": 0.2287, "step": 1209500 }, { "epoch": 55.66, "learning_rate": 2.216855276474377e-05, "loss": 0.2332, "step": 1210000 }, { "epoch": 55.69, "learning_rate": 2.215705216671267e-05, "loss": 0.2315, "step": 1210500 }, { "epoch": 55.71, "learning_rate": 2.2145551568681573e-05, "loss": 0.2344, "step": 1211000 }, { "epoch": 55.73, "learning_rate": 2.2134050970650476e-05, "loss": 0.2365, "step": 1211500 }, { "epoch": 55.75, "learning_rate": 2.2122550372619376e-05, "loss": 0.2353, "step": 1212000 }, { "epoch": 55.78, "learning_rate": 2.211104977458828e-05, "loss": 0.2375, "step": 1212500 }, { "epoch": 55.8, "learning_rate": 2.2099549176557182e-05, "loss": 0.2334, "step": 1213000 }, { "epoch": 55.82, "learning_rate": 2.2088048578526085e-05, "loss": 0.2399, "step": 1213500 }, { "epoch": 55.85, "learning_rate": 2.207654798049499e-05, "loss": 0.2363, "step": 1214000 }, { "epoch": 55.87, "learning_rate": 2.206504738246389e-05, "loss": 0.239, "step": 1214500 }, { "epoch": 55.89, "learning_rate": 2.205354678443279e-05, "loss": 0.2358, "step": 1215000 }, { "epoch": 55.92, "learning_rate": 2.2042046186401695e-05, "loss": 0.2329, "step": 1215500 }, { "epoch": 55.94, "learning_rate": 2.2030545588370595e-05, "loss": 0.2416, "step": 1216000 }, { "epoch": 55.96, "learning_rate": 2.2019044990339498e-05, "loss": 0.2337, "step": 1216500 }, { "epoch": 55.98, "learning_rate": 2.20075443923084e-05, "loss": 0.238, "step": 1217000 }, { "epoch": 56.01, "learning_rate": 2.1996043794277304e-05, "loss": 0.23, "step": 1217500 }, { "epoch": 56.03, "learning_rate": 2.1984543196246204e-05, "loss": 0.2183, "step": 1218000 }, { "epoch": 56.05, "learning_rate": 2.1973042598215107e-05, "loss": 0.2214, "step": 1218500 }, { "epoch": 56.08, "learning_rate": 2.196154200018401e-05, "loss": 0.2161, "step": 1219000 }, { "epoch": 56.1, "learning_rate": 2.1950041402152914e-05, "loss": 0.2219, "step": 1219500 }, { "epoch": 56.12, "learning_rate": 2.1938540804121814e-05, "loss": 0.2213, "step": 1220000 }, { "epoch": 56.15, "learning_rate": 2.1927040206090717e-05, "loss": 0.2279, "step": 1220500 }, { "epoch": 56.17, "learning_rate": 2.191553960805962e-05, "loss": 0.2202, "step": 1221000 }, { "epoch": 56.19, "learning_rate": 2.1904039010028524e-05, "loss": 0.2256, "step": 1221500 }, { "epoch": 56.21, "learning_rate": 2.1892538411997423e-05, "loss": 0.2245, "step": 1222000 }, { "epoch": 56.24, "learning_rate": 2.1881037813966327e-05, "loss": 0.2292, "step": 1222500 }, { "epoch": 56.26, "learning_rate": 2.186953721593523e-05, "loss": 0.2261, "step": 1223000 }, { "epoch": 56.28, "learning_rate": 2.1858036617904133e-05, "loss": 0.2261, "step": 1223500 }, { "epoch": 56.31, "learning_rate": 2.1846536019873033e-05, "loss": 0.2263, "step": 1224000 }, { "epoch": 56.33, "learning_rate": 2.1835035421841936e-05, "loss": 0.2282, "step": 1224500 }, { "epoch": 56.35, "learning_rate": 2.182353482381084e-05, "loss": 0.2295, "step": 1225000 }, { "epoch": 56.38, "learning_rate": 2.1812034225779743e-05, "loss": 0.227, "step": 1225500 }, { "epoch": 56.4, "learning_rate": 2.1800533627748642e-05, "loss": 0.2264, "step": 1226000 }, { "epoch": 56.42, "learning_rate": 2.1789033029717546e-05, "loss": 0.2276, "step": 1226500 }, { "epoch": 56.44, "learning_rate": 2.177753243168645e-05, "loss": 0.2287, "step": 1227000 }, { "epoch": 56.47, "learning_rate": 2.1766031833655352e-05, "loss": 0.2336, "step": 1227500 }, { "epoch": 56.49, "learning_rate": 2.1754531235624252e-05, "loss": 0.2312, "step": 1228000 }, { "epoch": 56.51, "learning_rate": 2.1743030637593155e-05, "loss": 0.2344, "step": 1228500 }, { "epoch": 56.54, "learning_rate": 2.173153003956206e-05, "loss": 0.2342, "step": 1229000 }, { "epoch": 56.56, "learning_rate": 2.1720029441530962e-05, "loss": 0.2261, "step": 1229500 }, { "epoch": 56.58, "learning_rate": 2.170852884349986e-05, "loss": 0.2336, "step": 1230000 }, { "epoch": 56.61, "learning_rate": 2.1697028245468765e-05, "loss": 0.2359, "step": 1230500 }, { "epoch": 56.63, "learning_rate": 2.1685527647437668e-05, "loss": 0.2339, "step": 1231000 }, { "epoch": 56.65, "learning_rate": 2.167402704940657e-05, "loss": 0.2312, "step": 1231500 }, { "epoch": 56.67, "learning_rate": 2.166252645137547e-05, "loss": 0.2369, "step": 1232000 }, { "epoch": 56.7, "learning_rate": 2.1651025853344374e-05, "loss": 0.2314, "step": 1232500 }, { "epoch": 56.72, "learning_rate": 2.1639525255313278e-05, "loss": 0.2268, "step": 1233000 }, { "epoch": 56.74, "learning_rate": 2.162802465728218e-05, "loss": 0.2308, "step": 1233500 }, { "epoch": 56.77, "learning_rate": 2.161652405925108e-05, "loss": 0.2276, "step": 1234000 }, { "epoch": 56.79, "learning_rate": 2.1605023461219984e-05, "loss": 0.2363, "step": 1234500 }, { "epoch": 56.81, "learning_rate": 2.1593522863188887e-05, "loss": 0.2348, "step": 1235000 }, { "epoch": 56.84, "learning_rate": 2.158202226515779e-05, "loss": 0.2374, "step": 1235500 }, { "epoch": 56.86, "learning_rate": 2.157052166712669e-05, "loss": 0.2325, "step": 1236000 }, { "epoch": 56.88, "learning_rate": 2.1559021069095593e-05, "loss": 0.2363, "step": 1236500 }, { "epoch": 56.9, "learning_rate": 2.1547520471064497e-05, "loss": 0.2342, "step": 1237000 }, { "epoch": 56.93, "learning_rate": 2.15360198730334e-05, "loss": 0.2368, "step": 1237500 }, { "epoch": 56.95, "learning_rate": 2.15245192750023e-05, "loss": 0.2422, "step": 1238000 }, { "epoch": 56.97, "learning_rate": 2.1513018676971203e-05, "loss": 0.2359, "step": 1238500 }, { "epoch": 57.0, "learning_rate": 2.1501518078940106e-05, "loss": 0.231, "step": 1239000 }, { "epoch": 57.02, "learning_rate": 2.149001748090901e-05, "loss": 0.215, "step": 1239500 }, { "epoch": 57.04, "learning_rate": 2.147851688287791e-05, "loss": 0.2141, "step": 1240000 }, { "epoch": 57.07, "learning_rate": 2.1467016284846813e-05, "loss": 0.2143, "step": 1240500 }, { "epoch": 57.09, "learning_rate": 2.1455515686815716e-05, "loss": 0.2233, "step": 1241000 }, { "epoch": 57.11, "learning_rate": 2.144401508878462e-05, "loss": 0.219, "step": 1241500 }, { "epoch": 57.13, "learning_rate": 2.143251449075352e-05, "loss": 0.2204, "step": 1242000 }, { "epoch": 57.16, "learning_rate": 2.1421013892722422e-05, "loss": 0.2193, "step": 1242500 }, { "epoch": 57.18, "learning_rate": 2.1409513294691325e-05, "loss": 0.2239, "step": 1243000 }, { "epoch": 57.2, "learning_rate": 2.139801269666023e-05, "loss": 0.2238, "step": 1243500 }, { "epoch": 57.23, "learning_rate": 2.138651209862913e-05, "loss": 0.2198, "step": 1244000 }, { "epoch": 57.25, "learning_rate": 2.137501150059803e-05, "loss": 0.2267, "step": 1244500 }, { "epoch": 57.27, "learning_rate": 2.1363510902566935e-05, "loss": 0.2263, "step": 1245000 }, { "epoch": 57.3, "learning_rate": 2.1352010304535835e-05, "loss": 0.2268, "step": 1245500 }, { "epoch": 57.32, "learning_rate": 2.1340509706504738e-05, "loss": 0.2242, "step": 1246000 }, { "epoch": 57.34, "learning_rate": 2.132900910847364e-05, "loss": 0.2259, "step": 1246500 }, { "epoch": 57.36, "learning_rate": 2.1317508510442544e-05, "loss": 0.2313, "step": 1247000 }, { "epoch": 57.39, "learning_rate": 2.1306007912411448e-05, "loss": 0.2237, "step": 1247500 }, { "epoch": 57.41, "learning_rate": 2.1294507314380348e-05, "loss": 0.2315, "step": 1248000 }, { "epoch": 57.43, "learning_rate": 2.128300671634925e-05, "loss": 0.2325, "step": 1248500 }, { "epoch": 57.46, "learning_rate": 2.1271506118318154e-05, "loss": 0.222, "step": 1249000 }, { "epoch": 57.48, "learning_rate": 2.1260005520287054e-05, "loss": 0.2316, "step": 1249500 }, { "epoch": 57.5, "learning_rate": 2.1248504922255957e-05, "loss": 0.2294, "step": 1250000 }, { "epoch": 57.53, "learning_rate": 2.123700432422486e-05, "loss": 0.2342, "step": 1250500 }, { "epoch": 57.55, "learning_rate": 2.1225503726193764e-05, "loss": 0.2348, "step": 1251000 }, { "epoch": 57.57, "learning_rate": 2.1214003128162667e-05, "loss": 0.2324, "step": 1251500 }, { "epoch": 57.59, "learning_rate": 2.1202502530131567e-05, "loss": 0.23, "step": 1252000 }, { "epoch": 57.62, "learning_rate": 2.119100193210047e-05, "loss": 0.2259, "step": 1252500 }, { "epoch": 57.64, "learning_rate": 2.1179501334069373e-05, "loss": 0.2353, "step": 1253000 }, { "epoch": 57.66, "learning_rate": 2.1168000736038273e-05, "loss": 0.2349, "step": 1253500 }, { "epoch": 57.69, "learning_rate": 2.1156500138007176e-05, "loss": 0.2319, "step": 1254000 }, { "epoch": 57.71, "learning_rate": 2.114499953997608e-05, "loss": 0.2332, "step": 1254500 }, { "epoch": 57.73, "learning_rate": 2.1133498941944983e-05, "loss": 0.2321, "step": 1255000 }, { "epoch": 57.76, "learning_rate": 2.1121998343913886e-05, "loss": 0.2343, "step": 1255500 }, { "epoch": 57.78, "learning_rate": 2.111049774588279e-05, "loss": 0.2326, "step": 1256000 }, { "epoch": 57.8, "learning_rate": 2.109899714785169e-05, "loss": 0.2323, "step": 1256500 }, { "epoch": 57.83, "learning_rate": 2.1087496549820592e-05, "loss": 0.2356, "step": 1257000 }, { "epoch": 57.85, "learning_rate": 2.1075995951789492e-05, "loss": 0.2315, "step": 1257500 }, { "epoch": 57.87, "learning_rate": 2.1064495353758395e-05, "loss": 0.2312, "step": 1258000 }, { "epoch": 57.89, "learning_rate": 2.10529947557273e-05, "loss": 0.2336, "step": 1258500 }, { "epoch": 57.92, "learning_rate": 2.1041494157696202e-05, "loss": 0.2374, "step": 1259000 }, { "epoch": 57.94, "learning_rate": 2.1029993559665105e-05, "loss": 0.2296, "step": 1259500 }, { "epoch": 57.96, "learning_rate": 2.1018492961634008e-05, "loss": 0.2336, "step": 1260000 }, { "epoch": 57.99, "learning_rate": 2.1006992363602908e-05, "loss": 0.234, "step": 1260500 }, { "epoch": 58.01, "learning_rate": 2.099549176557181e-05, "loss": 0.2274, "step": 1261000 }, { "epoch": 58.03, "learning_rate": 2.098399116754071e-05, "loss": 0.2182, "step": 1261500 }, { "epoch": 58.06, "learning_rate": 2.0972490569509614e-05, "loss": 0.2167, "step": 1262000 }, { "epoch": 58.08, "learning_rate": 2.0960989971478518e-05, "loss": 0.2229, "step": 1262500 }, { "epoch": 58.1, "learning_rate": 2.094948937344742e-05, "loss": 0.2178, "step": 1263000 }, { "epoch": 58.12, "learning_rate": 2.0937988775416324e-05, "loss": 0.2229, "step": 1263500 }, { "epoch": 58.15, "learning_rate": 2.0926488177385227e-05, "loss": 0.2199, "step": 1264000 }, { "epoch": 58.17, "learning_rate": 2.0914987579354127e-05, "loss": 0.2216, "step": 1264500 }, { "epoch": 58.19, "learning_rate": 2.090348698132303e-05, "loss": 0.2216, "step": 1265000 }, { "epoch": 58.22, "learning_rate": 2.089198638329193e-05, "loss": 0.2188, "step": 1265500 }, { "epoch": 58.24, "learning_rate": 2.0880485785260833e-05, "loss": 0.2278, "step": 1266000 }, { "epoch": 58.26, "learning_rate": 2.0868985187229737e-05, "loss": 0.2256, "step": 1266500 }, { "epoch": 58.29, "learning_rate": 2.085748458919864e-05, "loss": 0.2245, "step": 1267000 }, { "epoch": 58.31, "learning_rate": 2.0845983991167543e-05, "loss": 0.2238, "step": 1267500 }, { "epoch": 58.33, "learning_rate": 2.0834483393136446e-05, "loss": 0.2261, "step": 1268000 }, { "epoch": 58.35, "learning_rate": 2.0822982795105346e-05, "loss": 0.2248, "step": 1268500 }, { "epoch": 58.38, "learning_rate": 2.081148219707425e-05, "loss": 0.2266, "step": 1269000 }, { "epoch": 58.4, "learning_rate": 2.079998159904315e-05, "loss": 0.2247, "step": 1269500 }, { "epoch": 58.42, "learning_rate": 2.0788481001012053e-05, "loss": 0.2299, "step": 1270000 }, { "epoch": 58.45, "learning_rate": 2.0776980402980956e-05, "loss": 0.2223, "step": 1270500 }, { "epoch": 58.47, "learning_rate": 2.076547980494986e-05, "loss": 0.2223, "step": 1271000 }, { "epoch": 58.49, "learning_rate": 2.0753979206918762e-05, "loss": 0.2253, "step": 1271500 }, { "epoch": 58.52, "learning_rate": 2.0742478608887665e-05, "loss": 0.2243, "step": 1272000 }, { "epoch": 58.54, "learning_rate": 2.0730978010856565e-05, "loss": 0.2299, "step": 1272500 }, { "epoch": 58.56, "learning_rate": 2.071947741282547e-05, "loss": 0.2304, "step": 1273000 }, { "epoch": 58.58, "learning_rate": 2.070797681479437e-05, "loss": 0.2241, "step": 1273500 }, { "epoch": 58.61, "learning_rate": 2.069647621676327e-05, "loss": 0.2278, "step": 1274000 }, { "epoch": 58.63, "learning_rate": 2.0684975618732175e-05, "loss": 0.2316, "step": 1274500 }, { "epoch": 58.65, "learning_rate": 2.0673475020701078e-05, "loss": 0.2313, "step": 1275000 }, { "epoch": 58.68, "learning_rate": 2.066197442266998e-05, "loss": 0.2322, "step": 1275500 }, { "epoch": 58.7, "learning_rate": 2.0650473824638885e-05, "loss": 0.224, "step": 1276000 }, { "epoch": 58.72, "learning_rate": 2.0638973226607784e-05, "loss": 0.232, "step": 1276500 }, { "epoch": 58.75, "learning_rate": 2.0627472628576684e-05, "loss": 0.2293, "step": 1277000 }, { "epoch": 58.77, "learning_rate": 2.0615972030545588e-05, "loss": 0.2324, "step": 1277500 }, { "epoch": 58.79, "learning_rate": 2.060447143251449e-05, "loss": 0.2288, "step": 1278000 }, { "epoch": 58.81, "learning_rate": 2.0592970834483394e-05, "loss": 0.2332, "step": 1278500 }, { "epoch": 58.84, "learning_rate": 2.0581470236452297e-05, "loss": 0.2276, "step": 1279000 }, { "epoch": 58.86, "learning_rate": 2.05699696384212e-05, "loss": 0.2332, "step": 1279500 }, { "epoch": 58.88, "learning_rate": 2.0558469040390104e-05, "loss": 0.2353, "step": 1280000 }, { "epoch": 58.91, "learning_rate": 2.0546968442359004e-05, "loss": 0.2334, "step": 1280500 }, { "epoch": 58.93, "learning_rate": 2.0535467844327903e-05, "loss": 0.2373, "step": 1281000 }, { "epoch": 58.95, "learning_rate": 2.0523967246296807e-05, "loss": 0.2329, "step": 1281500 }, { "epoch": 58.98, "learning_rate": 2.051246664826571e-05, "loss": 0.2329, "step": 1282000 }, { "epoch": 59.0, "learning_rate": 2.0500966050234613e-05, "loss": 0.2295, "step": 1282500 }, { "epoch": 59.02, "learning_rate": 2.0489465452203516e-05, "loss": 0.2149, "step": 1283000 }, { "epoch": 59.04, "learning_rate": 2.047796485417242e-05, "loss": 0.2184, "step": 1283500 }, { "epoch": 59.07, "learning_rate": 2.0466464256141323e-05, "loss": 0.2171, "step": 1284000 }, { "epoch": 59.09, "learning_rate": 2.0454963658110223e-05, "loss": 0.2153, "step": 1284500 }, { "epoch": 59.11, "learning_rate": 2.0443463060079122e-05, "loss": 0.2186, "step": 1285000 }, { "epoch": 59.14, "learning_rate": 2.0431962462048026e-05, "loss": 0.2195, "step": 1285500 }, { "epoch": 59.16, "learning_rate": 2.042046186401693e-05, "loss": 0.2202, "step": 1286000 }, { "epoch": 59.18, "learning_rate": 2.0408961265985832e-05, "loss": 0.2235, "step": 1286500 }, { "epoch": 59.21, "learning_rate": 2.0397460667954735e-05, "loss": 0.2227, "step": 1287000 }, { "epoch": 59.23, "learning_rate": 2.038596006992364e-05, "loss": 0.2184, "step": 1287500 }, { "epoch": 59.25, "learning_rate": 2.0374459471892542e-05, "loss": 0.2246, "step": 1288000 }, { "epoch": 59.27, "learning_rate": 2.0362958873861442e-05, "loss": 0.2234, "step": 1288500 }, { "epoch": 59.3, "learning_rate": 2.035145827583034e-05, "loss": 0.2239, "step": 1289000 }, { "epoch": 59.32, "learning_rate": 2.0339957677799245e-05, "loss": 0.2228, "step": 1289500 }, { "epoch": 59.34, "learning_rate": 2.0328457079768148e-05, "loss": 0.2238, "step": 1290000 }, { "epoch": 59.37, "learning_rate": 2.031695648173705e-05, "loss": 0.2224, "step": 1290500 }, { "epoch": 59.39, "learning_rate": 2.0305455883705955e-05, "loss": 0.2244, "step": 1291000 }, { "epoch": 59.41, "learning_rate": 2.0293955285674858e-05, "loss": 0.2296, "step": 1291500 }, { "epoch": 59.44, "learning_rate": 2.028245468764376e-05, "loss": 0.2247, "step": 1292000 }, { "epoch": 59.46, "learning_rate": 2.027095408961266e-05, "loss": 0.225, "step": 1292500 }, { "epoch": 59.48, "learning_rate": 2.025945349158156e-05, "loss": 0.2247, "step": 1293000 }, { "epoch": 59.5, "learning_rate": 2.0247952893550464e-05, "loss": 0.2259, "step": 1293500 }, { "epoch": 59.53, "learning_rate": 2.0236452295519367e-05, "loss": 0.2269, "step": 1294000 }, { "epoch": 59.55, "learning_rate": 2.022495169748827e-05, "loss": 0.2299, "step": 1294500 }, { "epoch": 59.57, "learning_rate": 2.0213451099457174e-05, "loss": 0.2307, "step": 1295000 }, { "epoch": 59.6, "learning_rate": 2.0201950501426077e-05, "loss": 0.2272, "step": 1295500 }, { "epoch": 59.62, "learning_rate": 2.019044990339498e-05, "loss": 0.2304, "step": 1296000 }, { "epoch": 59.64, "learning_rate": 2.017894930536388e-05, "loss": 0.2303, "step": 1296500 }, { "epoch": 59.67, "learning_rate": 2.016744870733278e-05, "loss": 0.2259, "step": 1297000 }, { "epoch": 59.69, "learning_rate": 2.0155948109301683e-05, "loss": 0.2257, "step": 1297500 }, { "epoch": 59.71, "learning_rate": 2.0144447511270586e-05, "loss": 0.2251, "step": 1298000 }, { "epoch": 59.73, "learning_rate": 2.013294691323949e-05, "loss": 0.2256, "step": 1298500 }, { "epoch": 59.76, "learning_rate": 2.0121446315208393e-05, "loss": 0.2264, "step": 1299000 }, { "epoch": 59.78, "learning_rate": 2.0109945717177296e-05, "loss": 0.2258, "step": 1299500 }, { "epoch": 59.8, "learning_rate": 2.00984451191462e-05, "loss": 0.2295, "step": 1300000 }, { "epoch": 59.83, "learning_rate": 2.00869445211151e-05, "loss": 0.2326, "step": 1300500 }, { "epoch": 59.85, "learning_rate": 2.0075443923084e-05, "loss": 0.231, "step": 1301000 }, { "epoch": 59.87, "learning_rate": 2.0063943325052902e-05, "loss": 0.2323, "step": 1301500 }, { "epoch": 59.9, "learning_rate": 2.0052442727021805e-05, "loss": 0.2298, "step": 1302000 }, { "epoch": 59.92, "learning_rate": 2.004094212899071e-05, "loss": 0.2286, "step": 1302500 }, { "epoch": 59.94, "learning_rate": 2.0029441530959612e-05, "loss": 0.2349, "step": 1303000 }, { "epoch": 59.96, "learning_rate": 2.0017940932928515e-05, "loss": 0.2362, "step": 1303500 }, { "epoch": 59.99, "learning_rate": 2.0006440334897418e-05, "loss": 0.2285, "step": 1304000 }, { "epoch": 60.01, "learning_rate": 1.9994939736866318e-05, "loss": 0.2187, "step": 1304500 }, { "epoch": 60.03, "learning_rate": 1.9983439138835218e-05, "loss": 0.2151, "step": 1305000 }, { "epoch": 60.06, "learning_rate": 1.997193854080412e-05, "loss": 0.2122, "step": 1305500 }, { "epoch": 60.08, "learning_rate": 1.9960437942773024e-05, "loss": 0.2178, "step": 1306000 }, { "epoch": 60.1, "learning_rate": 1.9948937344741928e-05, "loss": 0.2233, "step": 1306500 }, { "epoch": 60.13, "learning_rate": 1.993743674671083e-05, "loss": 0.22, "step": 1307000 }, { "epoch": 60.15, "learning_rate": 1.9925936148679734e-05, "loss": 0.2145, "step": 1307500 }, { "epoch": 60.17, "learning_rate": 1.9914435550648637e-05, "loss": 0.2176, "step": 1308000 }, { "epoch": 60.19, "learning_rate": 1.9902934952617537e-05, "loss": 0.2173, "step": 1308500 }, { "epoch": 60.22, "learning_rate": 1.9891434354586437e-05, "loss": 0.2223, "step": 1309000 }, { "epoch": 60.24, "learning_rate": 1.987993375655534e-05, "loss": 0.2236, "step": 1309500 }, { "epoch": 60.26, "learning_rate": 1.9868433158524244e-05, "loss": 0.2228, "step": 1310000 }, { "epoch": 60.29, "learning_rate": 1.9856932560493147e-05, "loss": 0.2204, "step": 1310500 }, { "epoch": 60.31, "learning_rate": 1.984543196246205e-05, "loss": 0.2245, "step": 1311000 }, { "epoch": 60.33, "learning_rate": 1.9833931364430953e-05, "loss": 0.2177, "step": 1311500 }, { "epoch": 60.36, "learning_rate": 1.9822430766399856e-05, "loss": 0.2239, "step": 1312000 }, { "epoch": 60.38, "learning_rate": 1.9810930168368756e-05, "loss": 0.2233, "step": 1312500 }, { "epoch": 60.4, "learning_rate": 1.9799429570337656e-05, "loss": 0.2295, "step": 1313000 }, { "epoch": 60.42, "learning_rate": 1.978792897230656e-05, "loss": 0.2235, "step": 1313500 }, { "epoch": 60.45, "learning_rate": 1.9776428374275463e-05, "loss": 0.226, "step": 1314000 }, { "epoch": 60.47, "learning_rate": 1.9764927776244366e-05, "loss": 0.2197, "step": 1314500 }, { "epoch": 60.49, "learning_rate": 1.975342717821327e-05, "loss": 0.2235, "step": 1315000 }, { "epoch": 60.52, "learning_rate": 1.9741926580182172e-05, "loss": 0.2246, "step": 1315500 }, { "epoch": 60.54, "learning_rate": 1.9730425982151076e-05, "loss": 0.2244, "step": 1316000 }, { "epoch": 60.56, "learning_rate": 1.9718925384119975e-05, "loss": 0.2218, "step": 1316500 }, { "epoch": 60.59, "learning_rate": 1.9707424786088875e-05, "loss": 0.2267, "step": 1317000 }, { "epoch": 60.61, "learning_rate": 1.969592418805778e-05, "loss": 0.2279, "step": 1317500 }, { "epoch": 60.63, "learning_rate": 1.9684423590026682e-05, "loss": 0.2277, "step": 1318000 }, { "epoch": 60.65, "learning_rate": 1.9672922991995585e-05, "loss": 0.226, "step": 1318500 }, { "epoch": 60.68, "learning_rate": 1.9661422393964488e-05, "loss": 0.2199, "step": 1319000 }, { "epoch": 60.7, "learning_rate": 1.964992179593339e-05, "loss": 0.222, "step": 1319500 }, { "epoch": 60.72, "learning_rate": 1.9638421197902295e-05, "loss": 0.2274, "step": 1320000 }, { "epoch": 60.75, "learning_rate": 1.9626920599871195e-05, "loss": 0.225, "step": 1320500 }, { "epoch": 60.77, "learning_rate": 1.9615420001840094e-05, "loss": 0.2271, "step": 1321000 }, { "epoch": 60.79, "learning_rate": 1.9603919403808998e-05, "loss": 0.2266, "step": 1321500 }, { "epoch": 60.82, "learning_rate": 1.95924188057779e-05, "loss": 0.2286, "step": 1322000 }, { "epoch": 60.84, "learning_rate": 1.9580918207746804e-05, "loss": 0.2342, "step": 1322500 }, { "epoch": 60.86, "learning_rate": 1.9569417609715707e-05, "loss": 0.2279, "step": 1323000 }, { "epoch": 60.88, "learning_rate": 1.955791701168461e-05, "loss": 0.2269, "step": 1323500 }, { "epoch": 60.91, "learning_rate": 1.9546416413653514e-05, "loss": 0.2333, "step": 1324000 }, { "epoch": 60.93, "learning_rate": 1.9534915815622414e-05, "loss": 0.2294, "step": 1324500 }, { "epoch": 60.95, "learning_rate": 1.9523415217591313e-05, "loss": 0.2334, "step": 1325000 }, { "epoch": 60.98, "learning_rate": 1.9511914619560217e-05, "loss": 0.2278, "step": 1325500 }, { "epoch": 61.0, "learning_rate": 1.950041402152912e-05, "loss": 0.2318, "step": 1326000 }, { "epoch": 61.02, "learning_rate": 1.9488913423498023e-05, "loss": 0.2135, "step": 1326500 }, { "epoch": 61.05, "learning_rate": 1.9477412825466926e-05, "loss": 0.2124, "step": 1327000 }, { "epoch": 61.07, "learning_rate": 1.946591222743583e-05, "loss": 0.2099, "step": 1327500 }, { "epoch": 61.09, "learning_rate": 1.9454411629404733e-05, "loss": 0.2201, "step": 1328000 }, { "epoch": 61.11, "learning_rate": 1.9442911031373633e-05, "loss": 0.2115, "step": 1328500 }, { "epoch": 61.14, "learning_rate": 1.9431410433342533e-05, "loss": 0.2111, "step": 1329000 }, { "epoch": 61.16, "learning_rate": 1.9419909835311436e-05, "loss": 0.2183, "step": 1329500 }, { "epoch": 61.18, "learning_rate": 1.940840923728034e-05, "loss": 0.218, "step": 1330000 }, { "epoch": 61.21, "learning_rate": 1.9396908639249242e-05, "loss": 0.2168, "step": 1330500 }, { "epoch": 61.23, "learning_rate": 1.9385408041218145e-05, "loss": 0.2154, "step": 1331000 }, { "epoch": 61.25, "learning_rate": 1.937390744318705e-05, "loss": 0.2185, "step": 1331500 }, { "epoch": 61.28, "learning_rate": 1.9362406845155952e-05, "loss": 0.2236, "step": 1332000 }, { "epoch": 61.3, "learning_rate": 1.9350906247124852e-05, "loss": 0.2182, "step": 1332500 }, { "epoch": 61.32, "learning_rate": 1.933940564909375e-05, "loss": 0.2236, "step": 1333000 }, { "epoch": 61.34, "learning_rate": 1.9327905051062655e-05, "loss": 0.2204, "step": 1333500 }, { "epoch": 61.37, "learning_rate": 1.9316404453031558e-05, "loss": 0.2262, "step": 1334000 }, { "epoch": 61.39, "learning_rate": 1.930490385500046e-05, "loss": 0.2214, "step": 1334500 }, { "epoch": 61.41, "learning_rate": 1.9293403256969365e-05, "loss": 0.2251, "step": 1335000 }, { "epoch": 61.44, "learning_rate": 1.9281902658938268e-05, "loss": 0.2256, "step": 1335500 }, { "epoch": 61.46, "learning_rate": 1.9270402060907168e-05, "loss": 0.2245, "step": 1336000 }, { "epoch": 61.48, "learning_rate": 1.925890146287607e-05, "loss": 0.223, "step": 1336500 }, { "epoch": 61.51, "learning_rate": 1.924740086484497e-05, "loss": 0.2219, "step": 1337000 }, { "epoch": 61.53, "learning_rate": 1.9235900266813874e-05, "loss": 0.2256, "step": 1337500 }, { "epoch": 61.55, "learning_rate": 1.9224399668782777e-05, "loss": 0.2311, "step": 1338000 }, { "epoch": 61.57, "learning_rate": 1.921289907075168e-05, "loss": 0.2263, "step": 1338500 }, { "epoch": 61.6, "learning_rate": 1.9201398472720584e-05, "loss": 0.225, "step": 1339000 }, { "epoch": 61.62, "learning_rate": 1.9189897874689487e-05, "loss": 0.2254, "step": 1339500 }, { "epoch": 61.64, "learning_rate": 1.9178397276658387e-05, "loss": 0.2262, "step": 1340000 }, { "epoch": 61.67, "learning_rate": 1.916689667862729e-05, "loss": 0.2225, "step": 1340500 }, { "epoch": 61.69, "learning_rate": 1.915539608059619e-05, "loss": 0.2219, "step": 1341000 }, { "epoch": 61.71, "learning_rate": 1.9143895482565093e-05, "loss": 0.2253, "step": 1341500 }, { "epoch": 61.74, "learning_rate": 1.9132394884533996e-05, "loss": 0.225, "step": 1342000 }, { "epoch": 61.76, "learning_rate": 1.91208942865029e-05, "loss": 0.2292, "step": 1342500 }, { "epoch": 61.78, "learning_rate": 1.9109393688471803e-05, "loss": 0.2304, "step": 1343000 }, { "epoch": 61.8, "learning_rate": 1.9097893090440706e-05, "loss": 0.2319, "step": 1343500 }, { "epoch": 61.83, "learning_rate": 1.9086392492409606e-05, "loss": 0.2262, "step": 1344000 }, { "epoch": 61.85, "learning_rate": 1.907489189437851e-05, "loss": 0.2281, "step": 1344500 }, { "epoch": 61.87, "learning_rate": 1.906339129634741e-05, "loss": 0.2275, "step": 1345000 }, { "epoch": 61.9, "learning_rate": 1.9051890698316312e-05, "loss": 0.2248, "step": 1345500 }, { "epoch": 61.92, "learning_rate": 1.9040390100285215e-05, "loss": 0.2273, "step": 1346000 }, { "epoch": 61.94, "learning_rate": 1.902888950225412e-05, "loss": 0.2223, "step": 1346500 }, { "epoch": 61.97, "learning_rate": 1.9017388904223022e-05, "loss": 0.2242, "step": 1347000 }, { "epoch": 61.99, "learning_rate": 1.9005888306191925e-05, "loss": 0.2325, "step": 1347500 }, { "epoch": 62.01, "learning_rate": 1.8994387708160825e-05, "loss": 0.22, "step": 1348000 }, { "epoch": 62.03, "learning_rate": 1.8982887110129728e-05, "loss": 0.2073, "step": 1348500 }, { "epoch": 62.06, "learning_rate": 1.8971386512098628e-05, "loss": 0.2119, "step": 1349000 }, { "epoch": 62.08, "learning_rate": 1.895988591406753e-05, "loss": 0.2124, "step": 1349500 }, { "epoch": 62.1, "learning_rate": 1.8948385316036435e-05, "loss": 0.2182, "step": 1350000 }, { "epoch": 62.13, "learning_rate": 1.8936884718005338e-05, "loss": 0.2157, "step": 1350500 }, { "epoch": 62.15, "learning_rate": 1.892538411997424e-05, "loss": 0.219, "step": 1351000 }, { "epoch": 62.17, "learning_rate": 1.8913883521943144e-05, "loss": 0.2198, "step": 1351500 }, { "epoch": 62.2, "learning_rate": 1.8902382923912044e-05, "loss": 0.217, "step": 1352000 }, { "epoch": 62.22, "learning_rate": 1.8890882325880947e-05, "loss": 0.2182, "step": 1352500 }, { "epoch": 62.24, "learning_rate": 1.8879381727849847e-05, "loss": 0.2154, "step": 1353000 }, { "epoch": 62.26, "learning_rate": 1.886788112981875e-05, "loss": 0.2208, "step": 1353500 }, { "epoch": 62.29, "learning_rate": 1.8856380531787654e-05, "loss": 0.2223, "step": 1354000 }, { "epoch": 62.31, "learning_rate": 1.8844879933756557e-05, "loss": 0.2155, "step": 1354500 }, { "epoch": 62.33, "learning_rate": 1.883337933572546e-05, "loss": 0.2179, "step": 1355000 }, { "epoch": 62.36, "learning_rate": 1.8821878737694363e-05, "loss": 0.214, "step": 1355500 }, { "epoch": 62.38, "learning_rate": 1.8810378139663263e-05, "loss": 0.2178, "step": 1356000 }, { "epoch": 62.4, "learning_rate": 1.8798877541632166e-05, "loss": 0.2223, "step": 1356500 }, { "epoch": 62.43, "learning_rate": 1.8787376943601066e-05, "loss": 0.2212, "step": 1357000 }, { "epoch": 62.45, "learning_rate": 1.877587634556997e-05, "loss": 0.2218, "step": 1357500 }, { "epoch": 62.47, "learning_rate": 1.8764375747538873e-05, "loss": 0.2211, "step": 1358000 }, { "epoch": 62.49, "learning_rate": 1.8752875149507776e-05, "loss": 0.2216, "step": 1358500 }, { "epoch": 62.52, "learning_rate": 1.874137455147668e-05, "loss": 0.2252, "step": 1359000 }, { "epoch": 62.54, "learning_rate": 1.8729873953445582e-05, "loss": 0.221, "step": 1359500 }, { "epoch": 62.56, "learning_rate": 1.8718373355414482e-05, "loss": 0.224, "step": 1360000 }, { "epoch": 62.59, "learning_rate": 1.8706872757383385e-05, "loss": 0.2258, "step": 1360500 }, { "epoch": 62.61, "learning_rate": 1.8695372159352285e-05, "loss": 0.2254, "step": 1361000 }, { "epoch": 62.63, "learning_rate": 1.868387156132119e-05, "loss": 0.2236, "step": 1361500 }, { "epoch": 62.66, "learning_rate": 1.8672370963290092e-05, "loss": 0.2202, "step": 1362000 }, { "epoch": 62.68, "learning_rate": 1.8660870365258995e-05, "loss": 0.2245, "step": 1362500 }, { "epoch": 62.7, "learning_rate": 1.8649369767227898e-05, "loss": 0.2284, "step": 1363000 }, { "epoch": 62.72, "learning_rate": 1.86378691691968e-05, "loss": 0.2232, "step": 1363500 }, { "epoch": 62.75, "learning_rate": 1.86263685711657e-05, "loss": 0.2297, "step": 1364000 }, { "epoch": 62.77, "learning_rate": 1.8614867973134605e-05, "loss": 0.2259, "step": 1364500 }, { "epoch": 62.79, "learning_rate": 1.8603367375103504e-05, "loss": 0.2309, "step": 1365000 }, { "epoch": 62.82, "learning_rate": 1.8591866777072408e-05, "loss": 0.2286, "step": 1365500 }, { "epoch": 62.84, "learning_rate": 1.858036617904131e-05, "loss": 0.2241, "step": 1366000 }, { "epoch": 62.86, "learning_rate": 1.8568865581010214e-05, "loss": 0.2241, "step": 1366500 }, { "epoch": 62.89, "learning_rate": 1.8557364982979117e-05, "loss": 0.2242, "step": 1367000 }, { "epoch": 62.91, "learning_rate": 1.8545864384948017e-05, "loss": 0.2272, "step": 1367500 }, { "epoch": 62.93, "learning_rate": 1.853436378691692e-05, "loss": 0.2261, "step": 1368000 }, { "epoch": 62.95, "learning_rate": 1.8522863188885824e-05, "loss": 0.2285, "step": 1368500 }, { "epoch": 62.98, "learning_rate": 1.8511362590854724e-05, "loss": 0.2255, "step": 1369000 }, { "epoch": 63.0, "learning_rate": 1.8499861992823627e-05, "loss": 0.2272, "step": 1369500 }, { "epoch": 63.02, "learning_rate": 1.848836139479253e-05, "loss": 0.2064, "step": 1370000 }, { "epoch": 63.05, "learning_rate": 1.8476860796761433e-05, "loss": 0.209, "step": 1370500 }, { "epoch": 63.07, "learning_rate": 1.8465360198730336e-05, "loss": 0.2125, "step": 1371000 }, { "epoch": 63.09, "learning_rate": 1.8453859600699236e-05, "loss": 0.2149, "step": 1371500 }, { "epoch": 63.12, "learning_rate": 1.844235900266814e-05, "loss": 0.2086, "step": 1372000 }, { "epoch": 63.14, "learning_rate": 1.8430858404637043e-05, "loss": 0.2169, "step": 1372500 }, { "epoch": 63.16, "learning_rate": 1.8419357806605943e-05, "loss": 0.2125, "step": 1373000 }, { "epoch": 63.18, "learning_rate": 1.8407857208574846e-05, "loss": 0.2194, "step": 1373500 }, { "epoch": 63.21, "learning_rate": 1.839635661054375e-05, "loss": 0.2185, "step": 1374000 }, { "epoch": 63.23, "learning_rate": 1.8384856012512652e-05, "loss": 0.219, "step": 1374500 }, { "epoch": 63.25, "learning_rate": 1.8373355414481556e-05, "loss": 0.2163, "step": 1375000 }, { "epoch": 63.28, "learning_rate": 1.8361854816450455e-05, "loss": 0.2188, "step": 1375500 }, { "epoch": 63.3, "learning_rate": 1.835035421841936e-05, "loss": 0.2189, "step": 1376000 }, { "epoch": 63.32, "learning_rate": 1.8338853620388262e-05, "loss": 0.2205, "step": 1376500 }, { "epoch": 63.35, "learning_rate": 1.8327353022357162e-05, "loss": 0.2229, "step": 1377000 }, { "epoch": 63.37, "learning_rate": 1.8315852424326065e-05, "loss": 0.2149, "step": 1377500 }, { "epoch": 63.39, "learning_rate": 1.8304351826294968e-05, "loss": 0.2183, "step": 1378000 }, { "epoch": 63.41, "learning_rate": 1.829285122826387e-05, "loss": 0.2223, "step": 1378500 }, { "epoch": 63.44, "learning_rate": 1.8281350630232775e-05, "loss": 0.2174, "step": 1379000 }, { "epoch": 63.46, "learning_rate": 1.8269850032201675e-05, "loss": 0.2224, "step": 1379500 }, { "epoch": 63.48, "learning_rate": 1.8258349434170578e-05, "loss": 0.2215, "step": 1380000 }, { "epoch": 63.51, "learning_rate": 1.824684883613948e-05, "loss": 0.2202, "step": 1380500 }, { "epoch": 63.53, "learning_rate": 1.823534823810838e-05, "loss": 0.2221, "step": 1381000 }, { "epoch": 63.55, "learning_rate": 1.8223847640077284e-05, "loss": 0.2191, "step": 1381500 }, { "epoch": 63.58, "learning_rate": 1.8212347042046187e-05, "loss": 0.2194, "step": 1382000 }, { "epoch": 63.6, "learning_rate": 1.820084644401509e-05, "loss": 0.2236, "step": 1382500 }, { "epoch": 63.62, "learning_rate": 1.8189345845983994e-05, "loss": 0.2283, "step": 1383000 }, { "epoch": 63.64, "learning_rate": 1.8177845247952894e-05, "loss": 0.2207, "step": 1383500 }, { "epoch": 63.67, "learning_rate": 1.8166344649921797e-05, "loss": 0.2159, "step": 1384000 }, { "epoch": 63.69, "learning_rate": 1.81548440518907e-05, "loss": 0.2213, "step": 1384500 }, { "epoch": 63.71, "learning_rate": 1.81433434538596e-05, "loss": 0.2228, "step": 1385000 }, { "epoch": 63.74, "learning_rate": 1.8131842855828503e-05, "loss": 0.2266, "step": 1385500 }, { "epoch": 63.76, "learning_rate": 1.8120342257797406e-05, "loss": 0.2257, "step": 1386000 }, { "epoch": 63.78, "learning_rate": 1.810884165976631e-05, "loss": 0.2242, "step": 1386500 }, { "epoch": 63.81, "learning_rate": 1.8097341061735213e-05, "loss": 0.226, "step": 1387000 }, { "epoch": 63.83, "learning_rate": 1.8085840463704113e-05, "loss": 0.2249, "step": 1387500 }, { "epoch": 63.85, "learning_rate": 1.8074339865673016e-05, "loss": 0.2271, "step": 1388000 }, { "epoch": 63.87, "learning_rate": 1.806283926764192e-05, "loss": 0.2185, "step": 1388500 }, { "epoch": 63.9, "learning_rate": 1.805133866961082e-05, "loss": 0.2249, "step": 1389000 }, { "epoch": 63.92, "learning_rate": 1.8039838071579722e-05, "loss": 0.2268, "step": 1389500 }, { "epoch": 63.94, "learning_rate": 1.8028337473548625e-05, "loss": 0.2222, "step": 1390000 }, { "epoch": 63.97, "learning_rate": 1.801683687551753e-05, "loss": 0.2327, "step": 1390500 }, { "epoch": 63.99, "learning_rate": 1.8005336277486432e-05, "loss": 0.231, "step": 1391000 }, { "epoch": 64.01, "learning_rate": 1.7993835679455332e-05, "loss": 0.2153, "step": 1391500 }, { "epoch": 64.04, "learning_rate": 1.7982335081424235e-05, "loss": 0.2131, "step": 1392000 }, { "epoch": 64.06, "learning_rate": 1.7970834483393138e-05, "loss": 0.214, "step": 1392500 }, { "epoch": 64.08, "learning_rate": 1.7959333885362038e-05, "loss": 0.2127, "step": 1393000 }, { "epoch": 64.1, "learning_rate": 1.794783328733094e-05, "loss": 0.2134, "step": 1393500 }, { "epoch": 64.13, "learning_rate": 1.7936332689299845e-05, "loss": 0.2136, "step": 1394000 }, { "epoch": 64.15, "learning_rate": 1.7924832091268748e-05, "loss": 0.2131, "step": 1394500 }, { "epoch": 64.17, "learning_rate": 1.791333149323765e-05, "loss": 0.2095, "step": 1395000 }, { "epoch": 64.2, "learning_rate": 1.790183089520655e-05, "loss": 0.2108, "step": 1395500 }, { "epoch": 64.22, "learning_rate": 1.7890330297175454e-05, "loss": 0.2134, "step": 1396000 }, { "epoch": 64.24, "learning_rate": 1.7878829699144357e-05, "loss": 0.2184, "step": 1396500 }, { "epoch": 64.27, "learning_rate": 1.7867329101113257e-05, "loss": 0.2142, "step": 1397000 }, { "epoch": 64.29, "learning_rate": 1.785582850308216e-05, "loss": 0.2176, "step": 1397500 }, { "epoch": 64.31, "learning_rate": 1.7844327905051064e-05, "loss": 0.2156, "step": 1398000 }, { "epoch": 64.33, "learning_rate": 1.7832827307019967e-05, "loss": 0.2182, "step": 1398500 }, { "epoch": 64.36, "learning_rate": 1.7821326708988867e-05, "loss": 0.2213, "step": 1399000 }, { "epoch": 64.38, "learning_rate": 1.780982611095777e-05, "loss": 0.2102, "step": 1399500 }, { "epoch": 64.4, "learning_rate": 1.7798325512926673e-05, "loss": 0.2162, "step": 1400000 }, { "epoch": 64.43, "learning_rate": 1.7786824914895576e-05, "loss": 0.2177, "step": 1400500 }, { "epoch": 64.45, "learning_rate": 1.7775324316864476e-05, "loss": 0.2189, "step": 1401000 }, { "epoch": 64.47, "learning_rate": 1.776382371883338e-05, "loss": 0.2202, "step": 1401500 }, { "epoch": 64.5, "learning_rate": 1.7752323120802283e-05, "loss": 0.2207, "step": 1402000 }, { "epoch": 64.52, "learning_rate": 1.7740822522771186e-05, "loss": 0.221, "step": 1402500 }, { "epoch": 64.54, "learning_rate": 1.7729321924740086e-05, "loss": 0.2212, "step": 1403000 }, { "epoch": 64.56, "learning_rate": 1.771782132670899e-05, "loss": 0.2234, "step": 1403500 }, { "epoch": 64.59, "learning_rate": 1.7706320728677892e-05, "loss": 0.2184, "step": 1404000 }, { "epoch": 64.61, "learning_rate": 1.7694820130646796e-05, "loss": 0.2244, "step": 1404500 }, { "epoch": 64.63, "learning_rate": 1.7683319532615695e-05, "loss": 0.2241, "step": 1405000 }, { "epoch": 64.66, "learning_rate": 1.76718189345846e-05, "loss": 0.2229, "step": 1405500 }, { "epoch": 64.68, "learning_rate": 1.7660318336553502e-05, "loss": 0.2251, "step": 1406000 }, { "epoch": 64.7, "learning_rate": 1.7648817738522405e-05, "loss": 0.222, "step": 1406500 }, { "epoch": 64.73, "learning_rate": 1.7637317140491305e-05, "loss": 0.2211, "step": 1407000 }, { "epoch": 64.75, "learning_rate": 1.7625816542460208e-05, "loss": 0.223, "step": 1407500 }, { "epoch": 64.77, "learning_rate": 1.761431594442911e-05, "loss": 0.227, "step": 1408000 }, { "epoch": 64.79, "learning_rate": 1.7602815346398015e-05, "loss": 0.2207, "step": 1408500 }, { "epoch": 64.82, "learning_rate": 1.7591314748366915e-05, "loss": 0.2228, "step": 1409000 }, { "epoch": 64.84, "learning_rate": 1.7579814150335818e-05, "loss": 0.224, "step": 1409500 }, { "epoch": 64.86, "learning_rate": 1.756831355230472e-05, "loss": 0.223, "step": 1410000 }, { "epoch": 64.89, "learning_rate": 1.7556812954273624e-05, "loss": 0.2295, "step": 1410500 }, { "epoch": 64.91, "learning_rate": 1.7545312356242524e-05, "loss": 0.224, "step": 1411000 }, { "epoch": 64.93, "learning_rate": 1.7533811758211427e-05, "loss": 0.2223, "step": 1411500 }, { "epoch": 64.96, "learning_rate": 1.752231116018033e-05, "loss": 0.2231, "step": 1412000 }, { "epoch": 64.98, "learning_rate": 1.7510810562149234e-05, "loss": 0.2276, "step": 1412500 }, { "epoch": 65.0, "learning_rate": 1.7499309964118134e-05, "loss": 0.2239, "step": 1413000 }, { "epoch": 65.02, "learning_rate": 1.7487809366087037e-05, "loss": 0.211, "step": 1413500 }, { "epoch": 65.05, "learning_rate": 1.747630876805594e-05, "loss": 0.2103, "step": 1414000 }, { "epoch": 65.07, "learning_rate": 1.7464808170024843e-05, "loss": 0.2105, "step": 1414500 }, { "epoch": 65.09, "learning_rate": 1.7453307571993743e-05, "loss": 0.2075, "step": 1415000 }, { "epoch": 65.12, "learning_rate": 1.7441806973962646e-05, "loss": 0.2104, "step": 1415500 }, { "epoch": 65.14, "learning_rate": 1.743030637593155e-05, "loss": 0.2083, "step": 1416000 }, { "epoch": 65.16, "learning_rate": 1.7418805777900453e-05, "loss": 0.2127, "step": 1416500 }, { "epoch": 65.19, "learning_rate": 1.7407305179869353e-05, "loss": 0.2186, "step": 1417000 }, { "epoch": 65.21, "learning_rate": 1.7395804581838256e-05, "loss": 0.215, "step": 1417500 }, { "epoch": 65.23, "learning_rate": 1.738430398380716e-05, "loss": 0.211, "step": 1418000 }, { "epoch": 65.25, "learning_rate": 1.7372803385776062e-05, "loss": 0.2156, "step": 1418500 }, { "epoch": 65.28, "learning_rate": 1.7361302787744962e-05, "loss": 0.2165, "step": 1419000 }, { "epoch": 65.3, "learning_rate": 1.7349802189713865e-05, "loss": 0.2145, "step": 1419500 }, { "epoch": 65.32, "learning_rate": 1.733830159168277e-05, "loss": 0.2147, "step": 1420000 }, { "epoch": 65.35, "learning_rate": 1.7326800993651672e-05, "loss": 0.2162, "step": 1420500 }, { "epoch": 65.37, "learning_rate": 1.7315300395620572e-05, "loss": 0.2199, "step": 1421000 }, { "epoch": 65.39, "learning_rate": 1.7303799797589475e-05, "loss": 0.2132, "step": 1421500 }, { "epoch": 65.42, "learning_rate": 1.7292299199558378e-05, "loss": 0.2223, "step": 1422000 }, { "epoch": 65.44, "learning_rate": 1.728079860152728e-05, "loss": 0.2173, "step": 1422500 }, { "epoch": 65.46, "learning_rate": 1.726929800349618e-05, "loss": 0.2137, "step": 1423000 }, { "epoch": 65.48, "learning_rate": 1.7257797405465085e-05, "loss": 0.2195, "step": 1423500 }, { "epoch": 65.51, "learning_rate": 1.7246296807433988e-05, "loss": 0.223, "step": 1424000 }, { "epoch": 65.53, "learning_rate": 1.723479620940289e-05, "loss": 0.2188, "step": 1424500 }, { "epoch": 65.55, "learning_rate": 1.722329561137179e-05, "loss": 0.2231, "step": 1425000 }, { "epoch": 65.58, "learning_rate": 1.7211795013340694e-05, "loss": 0.2225, "step": 1425500 }, { "epoch": 65.6, "learning_rate": 1.7200294415309597e-05, "loss": 0.2167, "step": 1426000 }, { "epoch": 65.62, "learning_rate": 1.71887938172785e-05, "loss": 0.2218, "step": 1426500 }, { "epoch": 65.65, "learning_rate": 1.71772932192474e-05, "loss": 0.2204, "step": 1427000 }, { "epoch": 65.67, "learning_rate": 1.7165792621216304e-05, "loss": 0.2236, "step": 1427500 }, { "epoch": 65.69, "learning_rate": 1.7154292023185207e-05, "loss": 0.2189, "step": 1428000 }, { "epoch": 65.71, "learning_rate": 1.714279142515411e-05, "loss": 0.2209, "step": 1428500 }, { "epoch": 65.74, "learning_rate": 1.713129082712301e-05, "loss": 0.2207, "step": 1429000 }, { "epoch": 65.76, "learning_rate": 1.7119790229091913e-05, "loss": 0.2222, "step": 1429500 }, { "epoch": 65.78, "learning_rate": 1.7108289631060816e-05, "loss": 0.2225, "step": 1430000 }, { "epoch": 65.81, "learning_rate": 1.7096789033029716e-05, "loss": 0.2258, "step": 1430500 }, { "epoch": 65.83, "learning_rate": 1.708528843499862e-05, "loss": 0.2276, "step": 1431000 }, { "epoch": 65.85, "learning_rate": 1.7073787836967523e-05, "loss": 0.2135, "step": 1431500 }, { "epoch": 65.88, "learning_rate": 1.7062287238936426e-05, "loss": 0.2218, "step": 1432000 }, { "epoch": 65.9, "learning_rate": 1.705078664090533e-05, "loss": 0.2256, "step": 1432500 }, { "epoch": 65.92, "learning_rate": 1.703928604287423e-05, "loss": 0.2247, "step": 1433000 }, { "epoch": 65.94, "learning_rate": 1.7027785444843132e-05, "loss": 0.226, "step": 1433500 }, { "epoch": 65.97, "learning_rate": 1.7016284846812036e-05, "loss": 0.2269, "step": 1434000 }, { "epoch": 65.99, "learning_rate": 1.7004784248780935e-05, "loss": 0.2263, "step": 1434500 }, { "epoch": 66.01, "learning_rate": 1.699328365074984e-05, "loss": 0.2124, "step": 1435000 }, { "epoch": 66.04, "learning_rate": 1.6981783052718742e-05, "loss": 0.2043, "step": 1435500 }, { "epoch": 66.06, "learning_rate": 1.6970282454687645e-05, "loss": 0.2058, "step": 1436000 }, { "epoch": 66.08, "learning_rate": 1.695878185665655e-05, "loss": 0.2106, "step": 1436500 }, { "epoch": 66.11, "learning_rate": 1.6947281258625448e-05, "loss": 0.2042, "step": 1437000 }, { "epoch": 66.13, "learning_rate": 1.693578066059435e-05, "loss": 0.2122, "step": 1437500 }, { "epoch": 66.15, "learning_rate": 1.6924280062563255e-05, "loss": 0.215, "step": 1438000 }, { "epoch": 66.17, "learning_rate": 1.6912779464532155e-05, "loss": 0.2129, "step": 1438500 }, { "epoch": 66.2, "learning_rate": 1.6901278866501058e-05, "loss": 0.2175, "step": 1439000 }, { "epoch": 66.22, "learning_rate": 1.688977826846996e-05, "loss": 0.2158, "step": 1439500 }, { "epoch": 66.24, "learning_rate": 1.6878277670438864e-05, "loss": 0.2171, "step": 1440000 }, { "epoch": 66.27, "learning_rate": 1.6866777072407767e-05, "loss": 0.2167, "step": 1440500 }, { "epoch": 66.29, "learning_rate": 1.6855276474376667e-05, "loss": 0.2163, "step": 1441000 }, { "epoch": 66.31, "learning_rate": 1.684377587634557e-05, "loss": 0.2178, "step": 1441500 }, { "epoch": 66.34, "learning_rate": 1.6832275278314474e-05, "loss": 0.2163, "step": 1442000 }, { "epoch": 66.36, "learning_rate": 1.6820774680283374e-05, "loss": 0.2182, "step": 1442500 }, { "epoch": 66.38, "learning_rate": 1.6809274082252277e-05, "loss": 0.2157, "step": 1443000 }, { "epoch": 66.4, "learning_rate": 1.679777348422118e-05, "loss": 0.2146, "step": 1443500 }, { "epoch": 66.43, "learning_rate": 1.6786272886190083e-05, "loss": 0.2201, "step": 1444000 }, { "epoch": 66.45, "learning_rate": 1.6774772288158987e-05, "loss": 0.215, "step": 1444500 }, { "epoch": 66.47, "learning_rate": 1.6763271690127886e-05, "loss": 0.2164, "step": 1445000 }, { "epoch": 66.5, "learning_rate": 1.675177109209679e-05, "loss": 0.2149, "step": 1445500 }, { "epoch": 66.52, "learning_rate": 1.6740270494065693e-05, "loss": 0.2184, "step": 1446000 }, { "epoch": 66.54, "learning_rate": 1.6728769896034593e-05, "loss": 0.2151, "step": 1446500 }, { "epoch": 66.57, "learning_rate": 1.6717269298003496e-05, "loss": 0.2195, "step": 1447000 }, { "epoch": 66.59, "learning_rate": 1.67057686999724e-05, "loss": 0.2148, "step": 1447500 }, { "epoch": 66.61, "learning_rate": 1.6694268101941302e-05, "loss": 0.2138, "step": 1448000 }, { "epoch": 66.63, "learning_rate": 1.6682767503910206e-05, "loss": 0.2222, "step": 1448500 }, { "epoch": 66.66, "learning_rate": 1.6671266905879105e-05, "loss": 0.2194, "step": 1449000 }, { "epoch": 66.68, "learning_rate": 1.665976630784801e-05, "loss": 0.2221, "step": 1449500 }, { "epoch": 66.7, "learning_rate": 1.6648265709816912e-05, "loss": 0.2208, "step": 1450000 }, { "epoch": 66.73, "learning_rate": 1.6636765111785812e-05, "loss": 0.2209, "step": 1450500 }, { "epoch": 66.75, "learning_rate": 1.6625264513754715e-05, "loss": 0.2214, "step": 1451000 }, { "epoch": 66.77, "learning_rate": 1.6613763915723618e-05, "loss": 0.2213, "step": 1451500 }, { "epoch": 66.8, "learning_rate": 1.660226331769252e-05, "loss": 0.2224, "step": 1452000 }, { "epoch": 66.82, "learning_rate": 1.6590762719661425e-05, "loss": 0.2211, "step": 1452500 }, { "epoch": 66.84, "learning_rate": 1.6579262121630325e-05, "loss": 0.2251, "step": 1453000 }, { "epoch": 66.86, "learning_rate": 1.6567761523599228e-05, "loss": 0.2156, "step": 1453500 }, { "epoch": 66.89, "learning_rate": 1.655626092556813e-05, "loss": 0.2219, "step": 1454000 }, { "epoch": 66.91, "learning_rate": 1.654476032753703e-05, "loss": 0.2176, "step": 1454500 }, { "epoch": 66.93, "learning_rate": 1.6533259729505934e-05, "loss": 0.2237, "step": 1455000 }, { "epoch": 66.96, "learning_rate": 1.6521759131474837e-05, "loss": 0.2204, "step": 1455500 }, { "epoch": 66.98, "learning_rate": 1.651025853344374e-05, "loss": 0.2243, "step": 1456000 }, { "epoch": 67.0, "learning_rate": 1.6498757935412644e-05, "loss": 0.2211, "step": 1456500 }, { "epoch": 67.03, "learning_rate": 1.6487257337381544e-05, "loss": 0.2052, "step": 1457000 }, { "epoch": 67.05, "learning_rate": 1.6475756739350447e-05, "loss": 0.2051, "step": 1457500 }, { "epoch": 67.07, "learning_rate": 1.6464256141319347e-05, "loss": 0.2074, "step": 1458000 }, { "epoch": 67.09, "learning_rate": 1.645275554328825e-05, "loss": 0.2051, "step": 1458500 }, { "epoch": 67.12, "learning_rate": 1.6441254945257153e-05, "loss": 0.2112, "step": 1459000 }, { "epoch": 67.14, "learning_rate": 1.6429754347226056e-05, "loss": 0.2158, "step": 1459500 }, { "epoch": 67.16, "learning_rate": 1.641825374919496e-05, "loss": 0.2114, "step": 1460000 }, { "epoch": 67.19, "learning_rate": 1.6406753151163863e-05, "loss": 0.2096, "step": 1460500 }, { "epoch": 67.21, "learning_rate": 1.6395252553132763e-05, "loss": 0.2126, "step": 1461000 }, { "epoch": 67.23, "learning_rate": 1.6383751955101666e-05, "loss": 0.2144, "step": 1461500 }, { "epoch": 67.26, "learning_rate": 1.6372251357070566e-05, "loss": 0.2185, "step": 1462000 }, { "epoch": 67.28, "learning_rate": 1.636075075903947e-05, "loss": 0.2114, "step": 1462500 }, { "epoch": 67.3, "learning_rate": 1.6349250161008372e-05, "loss": 0.2201, "step": 1463000 }, { "epoch": 67.32, "learning_rate": 1.6337749562977276e-05, "loss": 0.2086, "step": 1463500 }, { "epoch": 67.35, "learning_rate": 1.632624896494618e-05, "loss": 0.2197, "step": 1464000 }, { "epoch": 67.37, "learning_rate": 1.6314748366915082e-05, "loss": 0.2175, "step": 1464500 }, { "epoch": 67.39, "learning_rate": 1.6303247768883982e-05, "loss": 0.2188, "step": 1465000 }, { "epoch": 67.42, "learning_rate": 1.6291747170852885e-05, "loss": 0.2132, "step": 1465500 }, { "epoch": 67.44, "learning_rate": 1.6280246572821785e-05, "loss": 0.214, "step": 1466000 }, { "epoch": 67.46, "learning_rate": 1.6268745974790688e-05, "loss": 0.2183, "step": 1466500 }, { "epoch": 67.49, "learning_rate": 1.625724537675959e-05, "loss": 0.2153, "step": 1467000 }, { "epoch": 67.51, "learning_rate": 1.6245744778728495e-05, "loss": 0.2117, "step": 1467500 }, { "epoch": 67.53, "learning_rate": 1.6234244180697398e-05, "loss": 0.2131, "step": 1468000 }, { "epoch": 67.55, "learning_rate": 1.62227435826663e-05, "loss": 0.2156, "step": 1468500 }, { "epoch": 67.58, "learning_rate": 1.62112429846352e-05, "loss": 0.2181, "step": 1469000 }, { "epoch": 67.6, "learning_rate": 1.6199742386604104e-05, "loss": 0.2159, "step": 1469500 }, { "epoch": 67.62, "learning_rate": 1.6188241788573004e-05, "loss": 0.2171, "step": 1470000 }, { "epoch": 67.65, "learning_rate": 1.6176741190541907e-05, "loss": 0.2179, "step": 1470500 }, { "epoch": 67.67, "learning_rate": 1.616524059251081e-05, "loss": 0.2235, "step": 1471000 }, { "epoch": 67.69, "learning_rate": 1.6153739994479714e-05, "loss": 0.2195, "step": 1471500 }, { "epoch": 67.72, "learning_rate": 1.6142239396448617e-05, "loss": 0.2219, "step": 1472000 }, { "epoch": 67.74, "learning_rate": 1.613073879841752e-05, "loss": 0.2209, "step": 1472500 }, { "epoch": 67.76, "learning_rate": 1.611923820038642e-05, "loss": 0.2192, "step": 1473000 }, { "epoch": 67.78, "learning_rate": 1.6107737602355323e-05, "loss": 0.2233, "step": 1473500 }, { "epoch": 67.81, "learning_rate": 1.6096237004324223e-05, "loss": 0.2225, "step": 1474000 }, { "epoch": 67.83, "learning_rate": 1.6084736406293126e-05, "loss": 0.2186, "step": 1474500 }, { "epoch": 67.85, "learning_rate": 1.607323580826203e-05, "loss": 0.2202, "step": 1475000 }, { "epoch": 67.88, "learning_rate": 1.6061735210230933e-05, "loss": 0.2202, "step": 1475500 }, { "epoch": 67.9, "learning_rate": 1.6050234612199836e-05, "loss": 0.2207, "step": 1476000 }, { "epoch": 67.92, "learning_rate": 1.603873401416874e-05, "loss": 0.2196, "step": 1476500 }, { "epoch": 67.95, "learning_rate": 1.602723341613764e-05, "loss": 0.2215, "step": 1477000 }, { "epoch": 67.97, "learning_rate": 1.6015732818106542e-05, "loss": 0.2205, "step": 1477500 }, { "epoch": 67.99, "learning_rate": 1.6004232220075442e-05, "loss": 0.2213, "step": 1478000 }, { "epoch": 68.01, "learning_rate": 1.5992731622044345e-05, "loss": 0.2106, "step": 1478500 }, { "epoch": 68.04, "learning_rate": 1.598123102401325e-05, "loss": 0.2056, "step": 1479000 }, { "epoch": 68.06, "learning_rate": 1.5969730425982152e-05, "loss": 0.2085, "step": 1479500 }, { "epoch": 68.08, "learning_rate": 1.5958229827951055e-05, "loss": 0.2053, "step": 1480000 }, { "epoch": 68.11, "learning_rate": 1.594672922991996e-05, "loss": 0.206, "step": 1480500 }, { "epoch": 68.13, "learning_rate": 1.5935228631888858e-05, "loss": 0.2067, "step": 1481000 }, { "epoch": 68.15, "learning_rate": 1.592372803385776e-05, "loss": 0.2114, "step": 1481500 }, { "epoch": 68.18, "learning_rate": 1.591222743582666e-05, "loss": 0.2104, "step": 1482000 }, { "epoch": 68.2, "learning_rate": 1.5900726837795565e-05, "loss": 0.2099, "step": 1482500 }, { "epoch": 68.22, "learning_rate": 1.5889226239764468e-05, "loss": 0.2135, "step": 1483000 }, { "epoch": 68.24, "learning_rate": 1.587772564173337e-05, "loss": 0.2075, "step": 1483500 }, { "epoch": 68.27, "learning_rate": 1.5866225043702274e-05, "loss": 0.2159, "step": 1484000 }, { "epoch": 68.29, "learning_rate": 1.5854724445671178e-05, "loss": 0.2101, "step": 1484500 }, { "epoch": 68.31, "learning_rate": 1.5843223847640077e-05, "loss": 0.2168, "step": 1485000 }, { "epoch": 68.34, "learning_rate": 1.583172324960898e-05, "loss": 0.2135, "step": 1485500 }, { "epoch": 68.36, "learning_rate": 1.582022265157788e-05, "loss": 0.2121, "step": 1486000 }, { "epoch": 68.38, "learning_rate": 1.5808722053546784e-05, "loss": 0.2147, "step": 1486500 }, { "epoch": 68.41, "learning_rate": 1.5797221455515687e-05, "loss": 0.2153, "step": 1487000 }, { "epoch": 68.43, "learning_rate": 1.578572085748459e-05, "loss": 0.2134, "step": 1487500 }, { "epoch": 68.45, "learning_rate": 1.5774220259453493e-05, "loss": 0.2207, "step": 1488000 }, { "epoch": 68.47, "learning_rate": 1.5762719661422397e-05, "loss": 0.2124, "step": 1488500 }, { "epoch": 68.5, "learning_rate": 1.5751219063391296e-05, "loss": 0.2182, "step": 1489000 }, { "epoch": 68.52, "learning_rate": 1.57397184653602e-05, "loss": 0.216, "step": 1489500 }, { "epoch": 68.54, "learning_rate": 1.57282178673291e-05, "loss": 0.2197, "step": 1490000 }, { "epoch": 68.57, "learning_rate": 1.5716717269298003e-05, "loss": 0.2174, "step": 1490500 }, { "epoch": 68.59, "learning_rate": 1.5705216671266906e-05, "loss": 0.2158, "step": 1491000 }, { "epoch": 68.61, "learning_rate": 1.569371607323581e-05, "loss": 0.2174, "step": 1491500 }, { "epoch": 68.64, "learning_rate": 1.5682215475204712e-05, "loss": 0.2146, "step": 1492000 }, { "epoch": 68.66, "learning_rate": 1.5670714877173616e-05, "loss": 0.2152, "step": 1492500 }, { "epoch": 68.68, "learning_rate": 1.5659214279142516e-05, "loss": 0.2224, "step": 1493000 }, { "epoch": 68.7, "learning_rate": 1.564771368111142e-05, "loss": 0.2184, "step": 1493500 }, { "epoch": 68.73, "learning_rate": 1.563621308308032e-05, "loss": 0.2212, "step": 1494000 }, { "epoch": 68.75, "learning_rate": 1.5624712485049222e-05, "loss": 0.2229, "step": 1494500 }, { "epoch": 68.77, "learning_rate": 1.5613211887018125e-05, "loss": 0.217, "step": 1495000 }, { "epoch": 68.8, "learning_rate": 1.560171128898703e-05, "loss": 0.2175, "step": 1495500 }, { "epoch": 68.82, "learning_rate": 1.559021069095593e-05, "loss": 0.2191, "step": 1496000 }, { "epoch": 68.84, "learning_rate": 1.5578710092924835e-05, "loss": 0.2132, "step": 1496500 }, { "epoch": 68.87, "learning_rate": 1.5567209494893738e-05, "loss": 0.221, "step": 1497000 }, { "epoch": 68.89, "learning_rate": 1.5555708896862638e-05, "loss": 0.2215, "step": 1497500 }, { "epoch": 68.91, "learning_rate": 1.5544208298831538e-05, "loss": 0.2281, "step": 1498000 }, { "epoch": 68.93, "learning_rate": 1.553270770080044e-05, "loss": 0.2167, "step": 1498500 }, { "epoch": 68.96, "learning_rate": 1.5521207102769344e-05, "loss": 0.2204, "step": 1499000 }, { "epoch": 68.98, "learning_rate": 1.5509706504738247e-05, "loss": 0.2177, "step": 1499500 }, { "epoch": 69.0, "learning_rate": 1.549820590670715e-05, "loss": 0.2175, "step": 1500000 }, { "epoch": 69.03, "learning_rate": 1.5486705308676054e-05, "loss": 0.2024, "step": 1500500 }, { "epoch": 69.05, "learning_rate": 1.5475204710644957e-05, "loss": 0.2023, "step": 1501000 }, { "epoch": 69.07, "learning_rate": 1.5463704112613857e-05, "loss": 0.2081, "step": 1501500 }, { "epoch": 69.1, "learning_rate": 1.5452203514582757e-05, "loss": 0.2037, "step": 1502000 }, { "epoch": 69.12, "learning_rate": 1.544070291655166e-05, "loss": 0.2107, "step": 1502500 }, { "epoch": 69.14, "learning_rate": 1.5429202318520563e-05, "loss": 0.2058, "step": 1503000 }, { "epoch": 69.16, "learning_rate": 1.5417701720489467e-05, "loss": 0.2097, "step": 1503500 }, { "epoch": 69.19, "learning_rate": 1.540620112245837e-05, "loss": 0.2078, "step": 1504000 }, { "epoch": 69.21, "learning_rate": 1.5394700524427273e-05, "loss": 0.2062, "step": 1504500 }, { "epoch": 69.23, "learning_rate": 1.5383199926396176e-05, "loss": 0.2151, "step": 1505000 }, { "epoch": 69.26, "learning_rate": 1.5371699328365076e-05, "loss": 0.2105, "step": 1505500 }, { "epoch": 69.28, "learning_rate": 1.5360198730333976e-05, "loss": 0.2125, "step": 1506000 }, { "epoch": 69.3, "learning_rate": 1.534869813230288e-05, "loss": 0.2136, "step": 1506500 }, { "epoch": 69.33, "learning_rate": 1.5337197534271782e-05, "loss": 0.2111, "step": 1507000 }, { "epoch": 69.35, "learning_rate": 1.5325696936240686e-05, "loss": 0.2105, "step": 1507500 }, { "epoch": 69.37, "learning_rate": 1.531419633820959e-05, "loss": 0.2149, "step": 1508000 }, { "epoch": 69.39, "learning_rate": 1.5302695740178492e-05, "loss": 0.2089, "step": 1508500 }, { "epoch": 69.42, "learning_rate": 1.5291195142147395e-05, "loss": 0.2137, "step": 1509000 }, { "epoch": 69.44, "learning_rate": 1.5279694544116295e-05, "loss": 0.2167, "step": 1509500 }, { "epoch": 69.46, "learning_rate": 1.5268193946085195e-05, "loss": 0.2098, "step": 1510000 }, { "epoch": 69.49, "learning_rate": 1.5256693348054098e-05, "loss": 0.2189, "step": 1510500 }, { "epoch": 69.51, "learning_rate": 1.5245192750023002e-05, "loss": 0.2191, "step": 1511000 }, { "epoch": 69.53, "learning_rate": 1.5233692151991905e-05, "loss": 0.2135, "step": 1511500 }, { "epoch": 69.56, "learning_rate": 1.5222191553960808e-05, "loss": 0.2156, "step": 1512000 }, { "epoch": 69.58, "learning_rate": 1.521069095592971e-05, "loss": 0.2184, "step": 1512500 }, { "epoch": 69.6, "learning_rate": 1.5199190357898613e-05, "loss": 0.2167, "step": 1513000 }, { "epoch": 69.62, "learning_rate": 1.5187689759867513e-05, "loss": 0.2132, "step": 1513500 }, { "epoch": 69.65, "learning_rate": 1.5176189161836416e-05, "loss": 0.2182, "step": 1514000 }, { "epoch": 69.67, "learning_rate": 1.5164688563805317e-05, "loss": 0.2207, "step": 1514500 }, { "epoch": 69.69, "learning_rate": 1.515318796577422e-05, "loss": 0.2163, "step": 1515000 }, { "epoch": 69.72, "learning_rate": 1.5141687367743124e-05, "loss": 0.2184, "step": 1515500 }, { "epoch": 69.74, "learning_rate": 1.5130186769712027e-05, "loss": 0.2136, "step": 1516000 }, { "epoch": 69.76, "learning_rate": 1.5118686171680929e-05, "loss": 0.2167, "step": 1516500 }, { "epoch": 69.79, "learning_rate": 1.5107185573649832e-05, "loss": 0.2217, "step": 1517000 }, { "epoch": 69.81, "learning_rate": 1.5095684975618732e-05, "loss": 0.2192, "step": 1517500 }, { "epoch": 69.83, "learning_rate": 1.5084184377587635e-05, "loss": 0.2142, "step": 1518000 }, { "epoch": 69.85, "learning_rate": 1.5072683779556536e-05, "loss": 0.2148, "step": 1518500 }, { "epoch": 69.88, "learning_rate": 1.506118318152544e-05, "loss": 0.222, "step": 1519000 }, { "epoch": 69.9, "learning_rate": 1.5049682583494343e-05, "loss": 0.2237, "step": 1519500 }, { "epoch": 69.92, "learning_rate": 1.5038181985463246e-05, "loss": 0.2193, "step": 1520000 }, { "epoch": 69.95, "learning_rate": 1.5026681387432148e-05, "loss": 0.2211, "step": 1520500 }, { "epoch": 69.97, "learning_rate": 1.5015180789401048e-05, "loss": 0.222, "step": 1521000 }, { "epoch": 69.99, "learning_rate": 1.500368019136995e-05, "loss": 0.2195, "step": 1521500 }, { "epoch": 70.02, "learning_rate": 1.4992179593338854e-05, "loss": 0.21, "step": 1522000 }, { "epoch": 70.04, "learning_rate": 1.4980678995307757e-05, "loss": 0.2043, "step": 1522500 }, { "epoch": 70.06, "learning_rate": 1.4969178397276659e-05, "loss": 0.2083, "step": 1523000 }, { "epoch": 70.08, "learning_rate": 1.4957677799245562e-05, "loss": 0.2078, "step": 1523500 }, { "epoch": 70.11, "learning_rate": 1.4946177201214465e-05, "loss": 0.2045, "step": 1524000 }, { "epoch": 70.13, "learning_rate": 1.4934676603183367e-05, "loss": 0.2039, "step": 1524500 }, { "epoch": 70.15, "learning_rate": 1.4923176005152267e-05, "loss": 0.2074, "step": 1525000 }, { "epoch": 70.18, "learning_rate": 1.491167540712117e-05, "loss": 0.2058, "step": 1525500 }, { "epoch": 70.2, "learning_rate": 1.4900174809090073e-05, "loss": 0.2104, "step": 1526000 }, { "epoch": 70.22, "learning_rate": 1.4888674211058976e-05, "loss": 0.2097, "step": 1526500 }, { "epoch": 70.25, "learning_rate": 1.4877173613027878e-05, "loss": 0.2097, "step": 1527000 }, { "epoch": 70.27, "learning_rate": 1.4865673014996781e-05, "loss": 0.2104, "step": 1527500 }, { "epoch": 70.29, "learning_rate": 1.4854172416965684e-05, "loss": 0.2127, "step": 1528000 }, { "epoch": 70.31, "learning_rate": 1.4842671818934586e-05, "loss": 0.2125, "step": 1528500 }, { "epoch": 70.34, "learning_rate": 1.4831171220903486e-05, "loss": 0.2068, "step": 1529000 }, { "epoch": 70.36, "learning_rate": 1.4819670622872389e-05, "loss": 0.2152, "step": 1529500 }, { "epoch": 70.38, "learning_rate": 1.4808170024841292e-05, "loss": 0.2167, "step": 1530000 }, { "epoch": 70.41, "learning_rate": 1.4796669426810195e-05, "loss": 0.2136, "step": 1530500 }, { "epoch": 70.43, "learning_rate": 1.4785168828779097e-05, "loss": 0.2109, "step": 1531000 }, { "epoch": 70.45, "learning_rate": 1.4773668230748e-05, "loss": 0.2129, "step": 1531500 }, { "epoch": 70.48, "learning_rate": 1.4762167632716903e-05, "loss": 0.215, "step": 1532000 }, { "epoch": 70.5, "learning_rate": 1.4750667034685805e-05, "loss": 0.2144, "step": 1532500 }, { "epoch": 70.52, "learning_rate": 1.4739166436654705e-05, "loss": 0.214, "step": 1533000 }, { "epoch": 70.54, "learning_rate": 1.4727665838623608e-05, "loss": 0.2159, "step": 1533500 }, { "epoch": 70.57, "learning_rate": 1.4716165240592511e-05, "loss": 0.2157, "step": 1534000 }, { "epoch": 70.59, "learning_rate": 1.4704664642561415e-05, "loss": 0.2195, "step": 1534500 }, { "epoch": 70.61, "learning_rate": 1.4693164044530316e-05, "loss": 0.2156, "step": 1535000 }, { "epoch": 70.64, "learning_rate": 1.468166344649922e-05, "loss": 0.2126, "step": 1535500 }, { "epoch": 70.66, "learning_rate": 1.4670162848468123e-05, "loss": 0.2156, "step": 1536000 }, { "epoch": 70.68, "learning_rate": 1.4658662250437024e-05, "loss": 0.2127, "step": 1536500 }, { "epoch": 70.71, "learning_rate": 1.4647161652405924e-05, "loss": 0.2138, "step": 1537000 }, { "epoch": 70.73, "learning_rate": 1.4635661054374827e-05, "loss": 0.2165, "step": 1537500 }, { "epoch": 70.75, "learning_rate": 1.462416045634373e-05, "loss": 0.2181, "step": 1538000 }, { "epoch": 70.77, "learning_rate": 1.4612659858312634e-05, "loss": 0.2133, "step": 1538500 }, { "epoch": 70.8, "learning_rate": 1.4601159260281535e-05, "loss": 0.2183, "step": 1539000 }, { "epoch": 70.82, "learning_rate": 1.4589658662250438e-05, "loss": 0.2185, "step": 1539500 }, { "epoch": 70.84, "learning_rate": 1.4578158064219342e-05, "loss": 0.2184, "step": 1540000 }, { "epoch": 70.87, "learning_rate": 1.4566657466188243e-05, "loss": 0.2161, "step": 1540500 }, { "epoch": 70.89, "learning_rate": 1.4555156868157143e-05, "loss": 0.217, "step": 1541000 }, { "epoch": 70.91, "learning_rate": 1.4543656270126046e-05, "loss": 0.2123, "step": 1541500 }, { "epoch": 70.94, "learning_rate": 1.453215567209495e-05, "loss": 0.2177, "step": 1542000 }, { "epoch": 70.96, "learning_rate": 1.4520655074063853e-05, "loss": 0.215, "step": 1542500 }, { "epoch": 70.98, "learning_rate": 1.4509154476032754e-05, "loss": 0.2202, "step": 1543000 }, { "epoch": 71.0, "learning_rate": 1.4497653878001658e-05, "loss": 0.2138, "step": 1543500 }, { "epoch": 71.03, "learning_rate": 1.448615327997056e-05, "loss": 0.2058, "step": 1544000 }, { "epoch": 71.05, "learning_rate": 1.4474652681939462e-05, "loss": 0.2031, "step": 1544500 }, { "epoch": 71.07, "learning_rate": 1.4463152083908362e-05, "loss": 0.2007, "step": 1545000 }, { "epoch": 71.1, "learning_rate": 1.4451651485877265e-05, "loss": 0.2037, "step": 1545500 }, { "epoch": 71.12, "learning_rate": 1.4440150887846169e-05, "loss": 0.2056, "step": 1546000 }, { "epoch": 71.14, "learning_rate": 1.4428650289815072e-05, "loss": 0.2095, "step": 1546500 }, { "epoch": 71.17, "learning_rate": 1.4417149691783973e-05, "loss": 0.2057, "step": 1547000 }, { "epoch": 71.19, "learning_rate": 1.4405649093752877e-05, "loss": 0.208, "step": 1547500 }, { "epoch": 71.21, "learning_rate": 1.439414849572178e-05, "loss": 0.2075, "step": 1548000 }, { "epoch": 71.23, "learning_rate": 1.438264789769068e-05, "loss": 0.2082, "step": 1548500 }, { "epoch": 71.26, "learning_rate": 1.4371147299659581e-05, "loss": 0.206, "step": 1549000 }, { "epoch": 71.28, "learning_rate": 1.4359646701628484e-05, "loss": 0.2078, "step": 1549500 }, { "epoch": 71.3, "learning_rate": 1.4348146103597388e-05, "loss": 0.2097, "step": 1550000 }, { "epoch": 71.33, "learning_rate": 1.4336645505566291e-05, "loss": 0.2113, "step": 1550500 }, { "epoch": 71.35, "learning_rate": 1.4325144907535192e-05, "loss": 0.2101, "step": 1551000 }, { "epoch": 71.37, "learning_rate": 1.4313644309504096e-05, "loss": 0.214, "step": 1551500 }, { "epoch": 71.4, "learning_rate": 1.4302143711472999e-05, "loss": 0.213, "step": 1552000 }, { "epoch": 71.42, "learning_rate": 1.4290643113441899e-05, "loss": 0.2143, "step": 1552500 }, { "epoch": 71.44, "learning_rate": 1.42791425154108e-05, "loss": 0.2103, "step": 1553000 }, { "epoch": 71.46, "learning_rate": 1.4267641917379704e-05, "loss": 0.2124, "step": 1553500 }, { "epoch": 71.49, "learning_rate": 1.4256141319348607e-05, "loss": 0.2108, "step": 1554000 }, { "epoch": 71.51, "learning_rate": 1.424464072131751e-05, "loss": 0.2152, "step": 1554500 }, { "epoch": 71.53, "learning_rate": 1.4233140123286412e-05, "loss": 0.2122, "step": 1555000 }, { "epoch": 71.56, "learning_rate": 1.4221639525255315e-05, "loss": 0.2065, "step": 1555500 }, { "epoch": 71.58, "learning_rate": 1.4210138927224218e-05, "loss": 0.2138, "step": 1556000 }, { "epoch": 71.6, "learning_rate": 1.4198638329193118e-05, "loss": 0.215, "step": 1556500 }, { "epoch": 71.63, "learning_rate": 1.418713773116202e-05, "loss": 0.2116, "step": 1557000 }, { "epoch": 71.65, "learning_rate": 1.4175637133130923e-05, "loss": 0.2092, "step": 1557500 }, { "epoch": 71.67, "learning_rate": 1.4164136535099826e-05, "loss": 0.2216, "step": 1558000 }, { "epoch": 71.69, "learning_rate": 1.415263593706873e-05, "loss": 0.2165, "step": 1558500 }, { "epoch": 71.72, "learning_rate": 1.414113533903763e-05, "loss": 0.2128, "step": 1559000 }, { "epoch": 71.74, "learning_rate": 1.4129634741006534e-05, "loss": 0.2184, "step": 1559500 }, { "epoch": 71.76, "learning_rate": 1.4118134142975437e-05, "loss": 0.2178, "step": 1560000 }, { "epoch": 71.79, "learning_rate": 1.4106633544944337e-05, "loss": 0.2138, "step": 1560500 }, { "epoch": 71.81, "learning_rate": 1.4095132946913239e-05, "loss": 0.2213, "step": 1561000 }, { "epoch": 71.83, "learning_rate": 1.4083632348882142e-05, "loss": 0.2191, "step": 1561500 }, { "epoch": 71.86, "learning_rate": 1.4072131750851045e-05, "loss": 0.2176, "step": 1562000 }, { "epoch": 71.88, "learning_rate": 1.4060631152819948e-05, "loss": 0.2229, "step": 1562500 }, { "epoch": 71.9, "learning_rate": 1.404913055478885e-05, "loss": 0.213, "step": 1563000 }, { "epoch": 71.92, "learning_rate": 1.4037629956757753e-05, "loss": 0.2136, "step": 1563500 }, { "epoch": 71.95, "learning_rate": 1.4026129358726656e-05, "loss": 0.2202, "step": 1564000 }, { "epoch": 71.97, "learning_rate": 1.4014628760695556e-05, "loss": 0.2194, "step": 1564500 }, { "epoch": 71.99, "learning_rate": 1.4003128162664458e-05, "loss": 0.2163, "step": 1565000 }, { "epoch": 72.02, "learning_rate": 1.3991627564633361e-05, "loss": 0.2071, "step": 1565500 }, { "epoch": 72.04, "learning_rate": 1.3980126966602264e-05, "loss": 0.2013, "step": 1566000 }, { "epoch": 72.06, "learning_rate": 1.3968626368571167e-05, "loss": 0.2043, "step": 1566500 }, { "epoch": 72.09, "learning_rate": 1.3957125770540069e-05, "loss": 0.2033, "step": 1567000 }, { "epoch": 72.11, "learning_rate": 1.3945625172508972e-05, "loss": 0.2019, "step": 1567500 }, { "epoch": 72.13, "learning_rate": 1.3934124574477875e-05, "loss": 0.2022, "step": 1568000 }, { "epoch": 72.15, "learning_rate": 1.3922623976446775e-05, "loss": 0.2048, "step": 1568500 }, { "epoch": 72.18, "learning_rate": 1.3911123378415677e-05, "loss": 0.2067, "step": 1569000 }, { "epoch": 72.2, "learning_rate": 1.389962278038458e-05, "loss": 0.2077, "step": 1569500 }, { "epoch": 72.22, "learning_rate": 1.3888122182353483e-05, "loss": 0.2084, "step": 1570000 }, { "epoch": 72.25, "learning_rate": 1.3876621584322386e-05, "loss": 0.2102, "step": 1570500 }, { "epoch": 72.27, "learning_rate": 1.3865120986291288e-05, "loss": 0.213, "step": 1571000 }, { "epoch": 72.29, "learning_rate": 1.3853620388260191e-05, "loss": 0.2096, "step": 1571500 }, { "epoch": 72.32, "learning_rate": 1.3842119790229094e-05, "loss": 0.2078, "step": 1572000 }, { "epoch": 72.34, "learning_rate": 1.3830619192197994e-05, "loss": 0.2107, "step": 1572500 }, { "epoch": 72.36, "learning_rate": 1.3819118594166896e-05, "loss": 0.2135, "step": 1573000 }, { "epoch": 72.38, "learning_rate": 1.3807617996135799e-05, "loss": 0.2186, "step": 1573500 }, { "epoch": 72.41, "learning_rate": 1.3796117398104702e-05, "loss": 0.2103, "step": 1574000 }, { "epoch": 72.43, "learning_rate": 1.3784616800073606e-05, "loss": 0.2073, "step": 1574500 }, { "epoch": 72.45, "learning_rate": 1.3773116202042507e-05, "loss": 0.2091, "step": 1575000 }, { "epoch": 72.48, "learning_rate": 1.376161560401141e-05, "loss": 0.208, "step": 1575500 }, { "epoch": 72.5, "learning_rate": 1.3750115005980314e-05, "loss": 0.2137, "step": 1576000 }, { "epoch": 72.52, "learning_rate": 1.3738614407949213e-05, "loss": 0.2138, "step": 1576500 }, { "epoch": 72.55, "learning_rate": 1.3727113809918115e-05, "loss": 0.2138, "step": 1577000 }, { "epoch": 72.57, "learning_rate": 1.3715613211887018e-05, "loss": 0.2084, "step": 1577500 }, { "epoch": 72.59, "learning_rate": 1.3704112613855921e-05, "loss": 0.2131, "step": 1578000 }, { "epoch": 72.61, "learning_rate": 1.3692612015824825e-05, "loss": 0.2204, "step": 1578500 }, { "epoch": 72.64, "learning_rate": 1.3681111417793726e-05, "loss": 0.2129, "step": 1579000 }, { "epoch": 72.66, "learning_rate": 1.366961081976263e-05, "loss": 0.21, "step": 1579500 }, { "epoch": 72.68, "learning_rate": 1.365811022173153e-05, "loss": 0.2162, "step": 1580000 }, { "epoch": 72.71, "learning_rate": 1.3646609623700433e-05, "loss": 0.2099, "step": 1580500 }, { "epoch": 72.73, "learning_rate": 1.3635109025669334e-05, "loss": 0.2162, "step": 1581000 }, { "epoch": 72.75, "learning_rate": 1.3623608427638237e-05, "loss": 0.2113, "step": 1581500 }, { "epoch": 72.78, "learning_rate": 1.361210782960714e-05, "loss": 0.2177, "step": 1582000 }, { "epoch": 72.8, "learning_rate": 1.3600607231576044e-05, "loss": 0.2129, "step": 1582500 }, { "epoch": 72.82, "learning_rate": 1.3589106633544945e-05, "loss": 0.2183, "step": 1583000 }, { "epoch": 72.84, "learning_rate": 1.3577606035513849e-05, "loss": 0.2142, "step": 1583500 }, { "epoch": 72.87, "learning_rate": 1.3566105437482748e-05, "loss": 0.2162, "step": 1584000 }, { "epoch": 72.89, "learning_rate": 1.3554604839451652e-05, "loss": 0.2144, "step": 1584500 }, { "epoch": 72.91, "learning_rate": 1.3543104241420553e-05, "loss": 0.2174, "step": 1585000 }, { "epoch": 72.94, "learning_rate": 1.3531603643389456e-05, "loss": 0.2182, "step": 1585500 }, { "epoch": 72.96, "learning_rate": 1.352010304535836e-05, "loss": 0.2197, "step": 1586000 }, { "epoch": 72.98, "learning_rate": 1.3508602447327263e-05, "loss": 0.2135, "step": 1586500 }, { "epoch": 73.01, "learning_rate": 1.3497101849296164e-05, "loss": 0.2107, "step": 1587000 }, { "epoch": 73.03, "learning_rate": 1.3485601251265068e-05, "loss": 0.2037, "step": 1587500 }, { "epoch": 73.05, "learning_rate": 1.3474100653233967e-05, "loss": 0.2049, "step": 1588000 }, { "epoch": 73.07, "learning_rate": 1.346260005520287e-05, "loss": 0.2068, "step": 1588500 }, { "epoch": 73.1, "learning_rate": 1.3451099457171772e-05, "loss": 0.2048, "step": 1589000 }, { "epoch": 73.12, "learning_rate": 1.3439598859140675e-05, "loss": 0.207, "step": 1589500 }, { "epoch": 73.14, "learning_rate": 1.3428098261109579e-05, "loss": 0.2063, "step": 1590000 }, { "epoch": 73.17, "learning_rate": 1.3416597663078482e-05, "loss": 0.2035, "step": 1590500 }, { "epoch": 73.19, "learning_rate": 1.3405097065047383e-05, "loss": 0.2113, "step": 1591000 }, { "epoch": 73.21, "learning_rate": 1.3393596467016287e-05, "loss": 0.2038, "step": 1591500 }, { "epoch": 73.24, "learning_rate": 1.3382095868985187e-05, "loss": 0.2068, "step": 1592000 }, { "epoch": 73.26, "learning_rate": 1.337059527095409e-05, "loss": 0.2079, "step": 1592500 }, { "epoch": 73.28, "learning_rate": 1.3359094672922991e-05, "loss": 0.2048, "step": 1593000 }, { "epoch": 73.3, "learning_rate": 1.3347594074891895e-05, "loss": 0.208, "step": 1593500 }, { "epoch": 73.33, "learning_rate": 1.3336093476860798e-05, "loss": 0.2103, "step": 1594000 }, { "epoch": 73.35, "learning_rate": 1.3324592878829701e-05, "loss": 0.207, "step": 1594500 }, { "epoch": 73.37, "learning_rate": 1.3313092280798603e-05, "loss": 0.2084, "step": 1595000 }, { "epoch": 73.4, "learning_rate": 1.3301591682767506e-05, "loss": 0.2133, "step": 1595500 }, { "epoch": 73.42, "learning_rate": 1.3290091084736406e-05, "loss": 0.2122, "step": 1596000 }, { "epoch": 73.44, "learning_rate": 1.3278590486705309e-05, "loss": 0.2143, "step": 1596500 }, { "epoch": 73.47, "learning_rate": 1.326708988867421e-05, "loss": 0.2066, "step": 1597000 }, { "epoch": 73.49, "learning_rate": 1.3255589290643114e-05, "loss": 0.2108, "step": 1597500 }, { "epoch": 73.51, "learning_rate": 1.3244088692612017e-05, "loss": 0.2135, "step": 1598000 }, { "epoch": 73.53, "learning_rate": 1.323258809458092e-05, "loss": 0.2106, "step": 1598500 }, { "epoch": 73.56, "learning_rate": 1.3221087496549822e-05, "loss": 0.2131, "step": 1599000 }, { "epoch": 73.58, "learning_rate": 1.3209586898518725e-05, "loss": 0.2041, "step": 1599500 }, { "epoch": 73.6, "learning_rate": 1.3198086300487625e-05, "loss": 0.2113, "step": 1600000 }, { "epoch": 73.63, "learning_rate": 1.3186585702456528e-05, "loss": 0.2076, "step": 1600500 }, { "epoch": 73.65, "learning_rate": 1.317508510442543e-05, "loss": 0.2115, "step": 1601000 }, { "epoch": 73.67, "learning_rate": 1.3163584506394333e-05, "loss": 0.2123, "step": 1601500 }, { "epoch": 73.7, "learning_rate": 1.3152083908363236e-05, "loss": 0.2145, "step": 1602000 }, { "epoch": 73.72, "learning_rate": 1.314058331033214e-05, "loss": 0.215, "step": 1602500 }, { "epoch": 73.74, "learning_rate": 1.312908271230104e-05, "loss": 0.2115, "step": 1603000 }, { "epoch": 73.76, "learning_rate": 1.3117582114269944e-05, "loss": 0.2155, "step": 1603500 }, { "epoch": 73.79, "learning_rate": 1.3106081516238844e-05, "loss": 0.2192, "step": 1604000 }, { "epoch": 73.81, "learning_rate": 1.3094580918207747e-05, "loss": 0.2152, "step": 1604500 }, { "epoch": 73.83, "learning_rate": 1.3083080320176649e-05, "loss": 0.2123, "step": 1605000 }, { "epoch": 73.86, "learning_rate": 1.3071579722145552e-05, "loss": 0.2124, "step": 1605500 }, { "epoch": 73.88, "learning_rate": 1.3060079124114455e-05, "loss": 0.2102, "step": 1606000 }, { "epoch": 73.9, "learning_rate": 1.3048578526083358e-05, "loss": 0.214, "step": 1606500 }, { "epoch": 73.93, "learning_rate": 1.303707792805226e-05, "loss": 0.2172, "step": 1607000 }, { "epoch": 73.95, "learning_rate": 1.3025577330021163e-05, "loss": 0.2146, "step": 1607500 }, { "epoch": 73.97, "learning_rate": 1.3014076731990063e-05, "loss": 0.216, "step": 1608000 }, { "epoch": 73.99, "learning_rate": 1.3002576133958966e-05, "loss": 0.218, "step": 1608500 }, { "epoch": 74.02, "learning_rate": 1.2991075535927868e-05, "loss": 0.2052, "step": 1609000 }, { "epoch": 74.04, "learning_rate": 1.2979574937896771e-05, "loss": 0.2028, "step": 1609500 }, { "epoch": 74.06, "learning_rate": 1.2968074339865674e-05, "loss": 0.201, "step": 1610000 }, { "epoch": 74.09, "learning_rate": 1.2956573741834577e-05, "loss": 0.2022, "step": 1610500 }, { "epoch": 74.11, "learning_rate": 1.2945073143803479e-05, "loss": 0.2019, "step": 1611000 }, { "epoch": 74.13, "learning_rate": 1.2933572545772379e-05, "loss": 0.2073, "step": 1611500 }, { "epoch": 74.16, "learning_rate": 1.2922071947741282e-05, "loss": 0.2015, "step": 1612000 }, { "epoch": 74.18, "learning_rate": 1.2910571349710185e-05, "loss": 0.2083, "step": 1612500 }, { "epoch": 74.2, "learning_rate": 1.2899070751679087e-05, "loss": 0.2036, "step": 1613000 }, { "epoch": 74.22, "learning_rate": 1.288757015364799e-05, "loss": 0.2021, "step": 1613500 }, { "epoch": 74.25, "learning_rate": 1.2876069555616893e-05, "loss": 0.206, "step": 1614000 }, { "epoch": 74.27, "learning_rate": 1.2864568957585797e-05, "loss": 0.2066, "step": 1614500 }, { "epoch": 74.29, "learning_rate": 1.2853068359554698e-05, "loss": 0.2055, "step": 1615000 }, { "epoch": 74.32, "learning_rate": 1.2841567761523598e-05, "loss": 0.2048, "step": 1615500 }, { "epoch": 74.34, "learning_rate": 1.2830067163492501e-05, "loss": 0.2081, "step": 1616000 }, { "epoch": 74.36, "learning_rate": 1.2818566565461404e-05, "loss": 0.2049, "step": 1616500 }, { "epoch": 74.39, "learning_rate": 1.2807065967430306e-05, "loss": 0.2069, "step": 1617000 }, { "epoch": 74.41, "learning_rate": 1.279556536939921e-05, "loss": 0.2095, "step": 1617500 }, { "epoch": 74.43, "learning_rate": 1.2784064771368112e-05, "loss": 0.2117, "step": 1618000 }, { "epoch": 74.45, "learning_rate": 1.2772564173337016e-05, "loss": 0.2088, "step": 1618500 }, { "epoch": 74.48, "learning_rate": 1.2761063575305917e-05, "loss": 0.2132, "step": 1619000 }, { "epoch": 74.5, "learning_rate": 1.2749562977274817e-05, "loss": 0.21, "step": 1619500 }, { "epoch": 74.52, "learning_rate": 1.273806237924372e-05, "loss": 0.2101, "step": 1620000 }, { "epoch": 74.55, "learning_rate": 1.2726561781212623e-05, "loss": 0.2116, "step": 1620500 }, { "epoch": 74.57, "learning_rate": 1.2715061183181525e-05, "loss": 0.2086, "step": 1621000 }, { "epoch": 74.59, "learning_rate": 1.2703560585150428e-05, "loss": 0.213, "step": 1621500 }, { "epoch": 74.62, "learning_rate": 1.2692059987119331e-05, "loss": 0.213, "step": 1622000 }, { "epoch": 74.64, "learning_rate": 1.2680559389088235e-05, "loss": 0.212, "step": 1622500 }, { "epoch": 74.66, "learning_rate": 1.2669058791057136e-05, "loss": 0.2147, "step": 1623000 }, { "epoch": 74.68, "learning_rate": 1.2657558193026036e-05, "loss": 0.2131, "step": 1623500 }, { "epoch": 74.71, "learning_rate": 1.264605759499494e-05, "loss": 0.2145, "step": 1624000 }, { "epoch": 74.73, "learning_rate": 1.2634556996963843e-05, "loss": 0.2132, "step": 1624500 }, { "epoch": 74.75, "learning_rate": 1.2623056398932744e-05, "loss": 0.2167, "step": 1625000 }, { "epoch": 74.78, "learning_rate": 1.2611555800901647e-05, "loss": 0.2079, "step": 1625500 }, { "epoch": 74.8, "learning_rate": 1.260005520287055e-05, "loss": 0.2141, "step": 1626000 }, { "epoch": 74.82, "learning_rate": 1.2588554604839454e-05, "loss": 0.2136, "step": 1626500 }, { "epoch": 74.85, "learning_rate": 1.2577054006808355e-05, "loss": 0.2147, "step": 1627000 }, { "epoch": 74.87, "learning_rate": 1.2565553408777255e-05, "loss": 0.2129, "step": 1627500 }, { "epoch": 74.89, "learning_rate": 1.2554052810746158e-05, "loss": 0.2099, "step": 1628000 }, { "epoch": 74.91, "learning_rate": 1.2542552212715062e-05, "loss": 0.2097, "step": 1628500 }, { "epoch": 74.94, "learning_rate": 1.2531051614683963e-05, "loss": 0.2192, "step": 1629000 }, { "epoch": 74.96, "learning_rate": 1.2519551016652866e-05, "loss": 0.2137, "step": 1629500 }, { "epoch": 74.98, "learning_rate": 1.250805041862177e-05, "loss": 0.2175, "step": 1630000 }, { "epoch": 75.01, "learning_rate": 1.2496549820590671e-05, "loss": 0.2083, "step": 1630500 }, { "epoch": 75.03, "learning_rate": 1.2485049222559573e-05, "loss": 0.2, "step": 1631000 }, { "epoch": 75.05, "learning_rate": 1.2473548624528476e-05, "loss": 0.2005, "step": 1631500 }, { "epoch": 75.08, "learning_rate": 1.246204802649738e-05, "loss": 0.2052, "step": 1632000 }, { "epoch": 75.1, "learning_rate": 1.245054742846628e-05, "loss": 0.2035, "step": 1632500 }, { "epoch": 75.12, "learning_rate": 1.2439046830435182e-05, "loss": 0.2018, "step": 1633000 }, { "epoch": 75.14, "learning_rate": 1.2427546232404086e-05, "loss": 0.2044, "step": 1633500 }, { "epoch": 75.17, "learning_rate": 1.2416045634372989e-05, "loss": 0.2038, "step": 1634000 }, { "epoch": 75.19, "learning_rate": 1.240454503634189e-05, "loss": 0.2039, "step": 1634500 }, { "epoch": 75.21, "learning_rate": 1.2393044438310792e-05, "loss": 0.1984, "step": 1635000 }, { "epoch": 75.24, "learning_rate": 1.2381543840279695e-05, "loss": 0.206, "step": 1635500 }, { "epoch": 75.26, "learning_rate": 1.2370043242248598e-05, "loss": 0.2052, "step": 1636000 }, { "epoch": 75.28, "learning_rate": 1.23585426442175e-05, "loss": 0.2063, "step": 1636500 }, { "epoch": 75.31, "learning_rate": 1.2347042046186401e-05, "loss": 0.2068, "step": 1637000 }, { "epoch": 75.33, "learning_rate": 1.2335541448155305e-05, "loss": 0.2059, "step": 1637500 }, { "epoch": 75.35, "learning_rate": 1.2324040850124208e-05, "loss": 0.2088, "step": 1638000 }, { "epoch": 75.37, "learning_rate": 1.231254025209311e-05, "loss": 0.2111, "step": 1638500 }, { "epoch": 75.4, "learning_rate": 1.2301039654062011e-05, "loss": 0.203, "step": 1639000 }, { "epoch": 75.42, "learning_rate": 1.2289539056030914e-05, "loss": 0.2093, "step": 1639500 }, { "epoch": 75.44, "learning_rate": 1.2278038457999816e-05, "loss": 0.2101, "step": 1640000 }, { "epoch": 75.47, "learning_rate": 1.2266537859968719e-05, "loss": 0.2071, "step": 1640500 }, { "epoch": 75.49, "learning_rate": 1.225503726193762e-05, "loss": 0.2114, "step": 1641000 }, { "epoch": 75.51, "learning_rate": 1.2243536663906524e-05, "loss": 0.2103, "step": 1641500 }, { "epoch": 75.54, "learning_rate": 1.2232036065875425e-05, "loss": 0.2089, "step": 1642000 }, { "epoch": 75.56, "learning_rate": 1.2220535467844329e-05, "loss": 0.2164, "step": 1642500 }, { "epoch": 75.58, "learning_rate": 1.2209034869813232e-05, "loss": 0.2111, "step": 1643000 }, { "epoch": 75.6, "learning_rate": 1.2197534271782133e-05, "loss": 0.207, "step": 1643500 }, { "epoch": 75.63, "learning_rate": 1.2186033673751035e-05, "loss": 0.2076, "step": 1644000 }, { "epoch": 75.65, "learning_rate": 1.2174533075719938e-05, "loss": 0.2115, "step": 1644500 }, { "epoch": 75.67, "learning_rate": 1.2163032477688841e-05, "loss": 0.2131, "step": 1645000 }, { "epoch": 75.7, "learning_rate": 1.2151531879657743e-05, "loss": 0.2061, "step": 1645500 }, { "epoch": 75.72, "learning_rate": 1.2140031281626644e-05, "loss": 0.2119, "step": 1646000 }, { "epoch": 75.74, "learning_rate": 1.2128530683595548e-05, "loss": 0.2118, "step": 1646500 }, { "epoch": 75.77, "learning_rate": 1.2117030085564451e-05, "loss": 0.2152, "step": 1647000 }, { "epoch": 75.79, "learning_rate": 1.2105529487533352e-05, "loss": 0.2169, "step": 1647500 }, { "epoch": 75.81, "learning_rate": 1.2094028889502254e-05, "loss": 0.2103, "step": 1648000 }, { "epoch": 75.83, "learning_rate": 1.2082528291471157e-05, "loss": 0.2128, "step": 1648500 }, { "epoch": 75.86, "learning_rate": 1.207102769344006e-05, "loss": 0.2158, "step": 1649000 }, { "epoch": 75.88, "learning_rate": 1.2059527095408962e-05, "loss": 0.2166, "step": 1649500 }, { "epoch": 75.9, "learning_rate": 1.2048026497377863e-05, "loss": 0.2164, "step": 1650000 }, { "epoch": 75.93, "learning_rate": 1.2036525899346767e-05, "loss": 0.2123, "step": 1650500 }, { "epoch": 75.95, "learning_rate": 1.202502530131567e-05, "loss": 0.2111, "step": 1651000 }, { "epoch": 75.97, "learning_rate": 1.2013524703284571e-05, "loss": 0.2104, "step": 1651500 }, { "epoch": 76.0, "learning_rate": 1.2002024105253473e-05, "loss": 0.2111, "step": 1652000 }, { "epoch": 76.02, "learning_rate": 1.1990523507222376e-05, "loss": 0.1965, "step": 1652500 }, { "epoch": 76.04, "learning_rate": 1.197902290919128e-05, "loss": 0.1977, "step": 1653000 }, { "epoch": 76.06, "learning_rate": 1.1967522311160181e-05, "loss": 0.1975, "step": 1653500 }, { "epoch": 76.09, "learning_rate": 1.1956021713129083e-05, "loss": 0.2012, "step": 1654000 }, { "epoch": 76.11, "learning_rate": 1.1944521115097986e-05, "loss": 0.2016, "step": 1654500 }, { "epoch": 76.13, "learning_rate": 1.1933020517066889e-05, "loss": 0.2039, "step": 1655000 }, { "epoch": 76.16, "learning_rate": 1.192151991903579e-05, "loss": 0.2074, "step": 1655500 }, { "epoch": 76.18, "learning_rate": 1.1910019321004692e-05, "loss": 0.2035, "step": 1656000 }, { "epoch": 76.2, "learning_rate": 1.1898518722973595e-05, "loss": 0.2021, "step": 1656500 }, { "epoch": 76.23, "learning_rate": 1.1887018124942499e-05, "loss": 0.2029, "step": 1657000 }, { "epoch": 76.25, "learning_rate": 1.18755175269114e-05, "loss": 0.2041, "step": 1657500 }, { "epoch": 76.27, "learning_rate": 1.1864016928880302e-05, "loss": 0.2047, "step": 1658000 }, { "epoch": 76.29, "learning_rate": 1.1852516330849205e-05, "loss": 0.2051, "step": 1658500 }, { "epoch": 76.32, "learning_rate": 1.1841015732818108e-05, "loss": 0.206, "step": 1659000 }, { "epoch": 76.34, "learning_rate": 1.182951513478701e-05, "loss": 0.2073, "step": 1659500 }, { "epoch": 76.36, "learning_rate": 1.1818014536755911e-05, "loss": 0.2058, "step": 1660000 }, { "epoch": 76.39, "learning_rate": 1.1806513938724814e-05, "loss": 0.2092, "step": 1660500 }, { "epoch": 76.41, "learning_rate": 1.1795013340693718e-05, "loss": 0.206, "step": 1661000 }, { "epoch": 76.43, "learning_rate": 1.178351274266262e-05, "loss": 0.2073, "step": 1661500 }, { "epoch": 76.46, "learning_rate": 1.177201214463152e-05, "loss": 0.2067, "step": 1662000 }, { "epoch": 76.48, "learning_rate": 1.1760511546600424e-05, "loss": 0.2123, "step": 1662500 }, { "epoch": 76.5, "learning_rate": 1.1749010948569327e-05, "loss": 0.2092, "step": 1663000 }, { "epoch": 76.52, "learning_rate": 1.1737510350538229e-05, "loss": 0.2076, "step": 1663500 }, { "epoch": 76.55, "learning_rate": 1.172600975250713e-05, "loss": 0.2106, "step": 1664000 }, { "epoch": 76.57, "learning_rate": 1.1714509154476034e-05, "loss": 0.2122, "step": 1664500 }, { "epoch": 76.59, "learning_rate": 1.1703008556444937e-05, "loss": 0.2062, "step": 1665000 }, { "epoch": 76.62, "learning_rate": 1.1691507958413838e-05, "loss": 0.2168, "step": 1665500 }, { "epoch": 76.64, "learning_rate": 1.168000736038274e-05, "loss": 0.2078, "step": 1666000 }, { "epoch": 76.66, "learning_rate": 1.1668506762351643e-05, "loss": 0.2124, "step": 1666500 }, { "epoch": 76.69, "learning_rate": 1.1657006164320546e-05, "loss": 0.2089, "step": 1667000 }, { "epoch": 76.71, "learning_rate": 1.1645505566289448e-05, "loss": 0.2128, "step": 1667500 }, { "epoch": 76.73, "learning_rate": 1.163400496825835e-05, "loss": 0.2134, "step": 1668000 }, { "epoch": 76.75, "learning_rate": 1.1622504370227253e-05, "loss": 0.2167, "step": 1668500 }, { "epoch": 76.78, "learning_rate": 1.1611003772196156e-05, "loss": 0.2111, "step": 1669000 }, { "epoch": 76.8, "learning_rate": 1.1599503174165057e-05, "loss": 0.2118, "step": 1669500 }, { "epoch": 76.82, "learning_rate": 1.1588002576133959e-05, "loss": 0.2123, "step": 1670000 }, { "epoch": 76.85, "learning_rate": 1.1576501978102862e-05, "loss": 0.2054, "step": 1670500 }, { "epoch": 76.87, "learning_rate": 1.1565001380071765e-05, "loss": 0.2126, "step": 1671000 }, { "epoch": 76.89, "learning_rate": 1.1553500782040665e-05, "loss": 0.2105, "step": 1671500 }, { "epoch": 76.92, "learning_rate": 1.1542000184009569e-05, "loss": 0.2148, "step": 1672000 }, { "epoch": 76.94, "learning_rate": 1.1530499585978472e-05, "loss": 0.2111, "step": 1672500 }, { "epoch": 76.96, "learning_rate": 1.1518998987947375e-05, "loss": 0.2108, "step": 1673000 }, { "epoch": 76.99, "learning_rate": 1.1507498389916275e-05, "loss": 0.2174, "step": 1673500 }, { "epoch": 77.01, "learning_rate": 1.1495997791885178e-05, "loss": 0.2057, "step": 1674000 }, { "epoch": 77.03, "learning_rate": 1.1484497193854081e-05, "loss": 0.1949, "step": 1674500 }, { "epoch": 77.05, "learning_rate": 1.1472996595822985e-05, "loss": 0.1977, "step": 1675000 }, { "epoch": 77.08, "learning_rate": 1.1461495997791884e-05, "loss": 0.2028, "step": 1675500 }, { "epoch": 77.1, "learning_rate": 1.1449995399760788e-05, "loss": 0.2024, "step": 1676000 }, { "epoch": 77.12, "learning_rate": 1.1438494801729691e-05, "loss": 0.2042, "step": 1676500 }, { "epoch": 77.15, "learning_rate": 1.1426994203698594e-05, "loss": 0.2007, "step": 1677000 }, { "epoch": 77.17, "learning_rate": 1.1415493605667494e-05, "loss": 0.2025, "step": 1677500 }, { "epoch": 77.19, "learning_rate": 1.1403993007636397e-05, "loss": 0.2044, "step": 1678000 }, { "epoch": 77.22, "learning_rate": 1.13924924096053e-05, "loss": 0.2027, "step": 1678500 }, { "epoch": 77.24, "learning_rate": 1.1380991811574204e-05, "loss": 0.201, "step": 1679000 }, { "epoch": 77.26, "learning_rate": 1.1369491213543103e-05, "loss": 0.2035, "step": 1679500 }, { "epoch": 77.28, "learning_rate": 1.1357990615512007e-05, "loss": 0.2091, "step": 1680000 }, { "epoch": 77.31, "learning_rate": 1.134649001748091e-05, "loss": 0.2039, "step": 1680500 }, { "epoch": 77.33, "learning_rate": 1.1334989419449813e-05, "loss": 0.2032, "step": 1681000 }, { "epoch": 77.35, "learning_rate": 1.1323488821418713e-05, "loss": 0.2057, "step": 1681500 }, { "epoch": 77.38, "learning_rate": 1.1311988223387616e-05, "loss": 0.2, "step": 1682000 }, { "epoch": 77.4, "learning_rate": 1.130048762535652e-05, "loss": 0.2052, "step": 1682500 }, { "epoch": 77.42, "learning_rate": 1.1288987027325423e-05, "loss": 0.2068, "step": 1683000 }, { "epoch": 77.45, "learning_rate": 1.1277486429294323e-05, "loss": 0.2048, "step": 1683500 }, { "epoch": 77.47, "learning_rate": 1.1265985831263226e-05, "loss": 0.2059, "step": 1684000 }, { "epoch": 77.49, "learning_rate": 1.1254485233232129e-05, "loss": 0.2119, "step": 1684500 }, { "epoch": 77.51, "learning_rate": 1.1242984635201032e-05, "loss": 0.2037, "step": 1685000 }, { "epoch": 77.54, "learning_rate": 1.1231484037169932e-05, "loss": 0.2109, "step": 1685500 }, { "epoch": 77.56, "learning_rate": 1.1219983439138835e-05, "loss": 0.2079, "step": 1686000 }, { "epoch": 77.58, "learning_rate": 1.1208482841107739e-05, "loss": 0.2101, "step": 1686500 }, { "epoch": 77.61, "learning_rate": 1.1196982243076642e-05, "loss": 0.2075, "step": 1687000 }, { "epoch": 77.63, "learning_rate": 1.1185481645045542e-05, "loss": 0.2126, "step": 1687500 }, { "epoch": 77.65, "learning_rate": 1.1173981047014445e-05, "loss": 0.2136, "step": 1688000 }, { "epoch": 77.68, "learning_rate": 1.1162480448983348e-05, "loss": 0.2113, "step": 1688500 }, { "epoch": 77.7, "learning_rate": 1.1150979850952251e-05, "loss": 0.2122, "step": 1689000 }, { "epoch": 77.72, "learning_rate": 1.1139479252921151e-05, "loss": 0.2097, "step": 1689500 }, { "epoch": 77.74, "learning_rate": 1.1127978654890054e-05, "loss": 0.2106, "step": 1690000 }, { "epoch": 77.77, "learning_rate": 1.1116478056858958e-05, "loss": 0.2096, "step": 1690500 }, { "epoch": 77.79, "learning_rate": 1.1104977458827861e-05, "loss": 0.209, "step": 1691000 }, { "epoch": 77.81, "learning_rate": 1.109347686079676e-05, "loss": 0.2107, "step": 1691500 }, { "epoch": 77.84, "learning_rate": 1.1081976262765664e-05, "loss": 0.2139, "step": 1692000 }, { "epoch": 77.86, "learning_rate": 1.1070475664734567e-05, "loss": 0.2131, "step": 1692500 }, { "epoch": 77.88, "learning_rate": 1.105897506670347e-05, "loss": 0.2057, "step": 1693000 }, { "epoch": 77.91, "learning_rate": 1.104747446867237e-05, "loss": 0.2129, "step": 1693500 }, { "epoch": 77.93, "learning_rate": 1.1035973870641274e-05, "loss": 0.2081, "step": 1694000 }, { "epoch": 77.95, "learning_rate": 1.1024473272610177e-05, "loss": 0.213, "step": 1694500 }, { "epoch": 77.97, "learning_rate": 1.101297267457908e-05, "loss": 0.2103, "step": 1695000 }, { "epoch": 78.0, "learning_rate": 1.100147207654798e-05, "loss": 0.21, "step": 1695500 }, { "epoch": 78.02, "learning_rate": 1.0989971478516883e-05, "loss": 0.2001, "step": 1696000 }, { "epoch": 78.04, "learning_rate": 1.0978470880485786e-05, "loss": 0.1991, "step": 1696500 }, { "epoch": 78.07, "learning_rate": 1.096697028245469e-05, "loss": 0.2028, "step": 1697000 }, { "epoch": 78.09, "learning_rate": 1.095546968442359e-05, "loss": 0.1953, "step": 1697500 }, { "epoch": 78.11, "learning_rate": 1.0943969086392493e-05, "loss": 0.1988, "step": 1698000 }, { "epoch": 78.14, "learning_rate": 1.0932468488361396e-05, "loss": 0.1996, "step": 1698500 }, { "epoch": 78.16, "learning_rate": 1.0920967890330299e-05, "loss": 0.206, "step": 1699000 }, { "epoch": 78.18, "learning_rate": 1.0909467292299199e-05, "loss": 0.198, "step": 1699500 }, { "epoch": 78.2, "learning_rate": 1.0897966694268102e-05, "loss": 0.199, "step": 1700000 }, { "epoch": 78.23, "learning_rate": 1.0886466096237005e-05, "loss": 0.2038, "step": 1700500 }, { "epoch": 78.25, "learning_rate": 1.0874965498205907e-05, "loss": 0.2072, "step": 1701000 }, { "epoch": 78.27, "learning_rate": 1.0863464900174809e-05, "loss": 0.2021, "step": 1701500 }, { "epoch": 78.3, "learning_rate": 1.0851964302143712e-05, "loss": 0.2037, "step": 1702000 }, { "epoch": 78.32, "learning_rate": 1.0840463704112615e-05, "loss": 0.2053, "step": 1702500 }, { "epoch": 78.34, "learning_rate": 1.0828963106081517e-05, "loss": 0.208, "step": 1703000 }, { "epoch": 78.37, "learning_rate": 1.0817462508050418e-05, "loss": 0.2046, "step": 1703500 }, { "epoch": 78.39, "learning_rate": 1.0805961910019321e-05, "loss": 0.206, "step": 1704000 }, { "epoch": 78.41, "learning_rate": 1.0794461311988225e-05, "loss": 0.2055, "step": 1704500 }, { "epoch": 78.43, "learning_rate": 1.0782960713957126e-05, "loss": 0.2074, "step": 1705000 }, { "epoch": 78.46, "learning_rate": 1.0771460115926028e-05, "loss": 0.207, "step": 1705500 }, { "epoch": 78.48, "learning_rate": 1.0759959517894931e-05, "loss": 0.2083, "step": 1706000 }, { "epoch": 78.5, "learning_rate": 1.0748458919863834e-05, "loss": 0.2031, "step": 1706500 }, { "epoch": 78.53, "learning_rate": 1.0736958321832736e-05, "loss": 0.2062, "step": 1707000 }, { "epoch": 78.55, "learning_rate": 1.0725457723801637e-05, "loss": 0.2092, "step": 1707500 }, { "epoch": 78.57, "learning_rate": 1.071395712577054e-05, "loss": 0.2079, "step": 1708000 }, { "epoch": 78.6, "learning_rate": 1.0702456527739444e-05, "loss": 0.2062, "step": 1708500 }, { "epoch": 78.62, "learning_rate": 1.0690955929708345e-05, "loss": 0.2079, "step": 1709000 }, { "epoch": 78.64, "learning_rate": 1.0679455331677247e-05, "loss": 0.2126, "step": 1709500 }, { "epoch": 78.66, "learning_rate": 1.066795473364615e-05, "loss": 0.2116, "step": 1710000 }, { "epoch": 78.69, "learning_rate": 1.0656454135615053e-05, "loss": 0.2097, "step": 1710500 }, { "epoch": 78.71, "learning_rate": 1.0644953537583955e-05, "loss": 0.2111, "step": 1711000 }, { "epoch": 78.73, "learning_rate": 1.0633452939552856e-05, "loss": 0.2092, "step": 1711500 }, { "epoch": 78.76, "learning_rate": 1.062195234152176e-05, "loss": 0.2092, "step": 1712000 }, { "epoch": 78.78, "learning_rate": 1.0610451743490663e-05, "loss": 0.2085, "step": 1712500 }, { "epoch": 78.8, "learning_rate": 1.0598951145459564e-05, "loss": 0.2112, "step": 1713000 }, { "epoch": 78.83, "learning_rate": 1.0587450547428466e-05, "loss": 0.2071, "step": 1713500 }, { "epoch": 78.85, "learning_rate": 1.0575949949397369e-05, "loss": 0.2059, "step": 1714000 }, { "epoch": 78.87, "learning_rate": 1.0564449351366272e-05, "loss": 0.2109, "step": 1714500 }, { "epoch": 78.89, "learning_rate": 1.0552948753335174e-05, "loss": 0.2103, "step": 1715000 }, { "epoch": 78.92, "learning_rate": 1.0541448155304075e-05, "loss": 0.2129, "step": 1715500 }, { "epoch": 78.94, "learning_rate": 1.0529947557272979e-05, "loss": 0.2072, "step": 1716000 }, { "epoch": 78.96, "learning_rate": 1.0518446959241882e-05, "loss": 0.2147, "step": 1716500 }, { "epoch": 78.99, "learning_rate": 1.0506946361210783e-05, "loss": 0.212, "step": 1717000 }, { "epoch": 79.01, "learning_rate": 1.0495445763179685e-05, "loss": 0.2044, "step": 1717500 }, { "epoch": 79.03, "learning_rate": 1.0483945165148588e-05, "loss": 0.1968, "step": 1718000 }, { "epoch": 79.06, "learning_rate": 1.0472444567117491e-05, "loss": 0.1999, "step": 1718500 }, { "epoch": 79.08, "learning_rate": 1.0460943969086393e-05, "loss": 0.2035, "step": 1719000 }, { "epoch": 79.1, "learning_rate": 1.0449443371055294e-05, "loss": 0.1994, "step": 1719500 }, { "epoch": 79.12, "learning_rate": 1.0437942773024198e-05, "loss": 0.1999, "step": 1720000 }, { "epoch": 79.15, "learning_rate": 1.0426442174993101e-05, "loss": 0.2021, "step": 1720500 }, { "epoch": 79.17, "learning_rate": 1.0414941576962002e-05, "loss": 0.1987, "step": 1721000 }, { "epoch": 79.19, "learning_rate": 1.0403440978930904e-05, "loss": 0.2058, "step": 1721500 }, { "epoch": 79.22, "learning_rate": 1.0391940380899807e-05, "loss": 0.1982, "step": 1722000 }, { "epoch": 79.24, "learning_rate": 1.038043978286871e-05, "loss": 0.2024, "step": 1722500 }, { "epoch": 79.26, "learning_rate": 1.0368939184837612e-05, "loss": 0.2003, "step": 1723000 }, { "epoch": 79.29, "learning_rate": 1.0357438586806514e-05, "loss": 0.2065, "step": 1723500 }, { "epoch": 79.31, "learning_rate": 1.0345937988775417e-05, "loss": 0.2058, "step": 1724000 }, { "epoch": 79.33, "learning_rate": 1.033443739074432e-05, "loss": 0.2061, "step": 1724500 }, { "epoch": 79.35, "learning_rate": 1.0322936792713222e-05, "loss": 0.2067, "step": 1725000 }, { "epoch": 79.38, "learning_rate": 1.0311436194682123e-05, "loss": 0.2037, "step": 1725500 }, { "epoch": 79.4, "learning_rate": 1.0299935596651026e-05, "loss": 0.203, "step": 1726000 }, { "epoch": 79.42, "learning_rate": 1.028843499861993e-05, "loss": 0.2027, "step": 1726500 }, { "epoch": 79.45, "learning_rate": 1.0276934400588831e-05, "loss": 0.2071, "step": 1727000 }, { "epoch": 79.47, "learning_rate": 1.0265433802557733e-05, "loss": 0.2048, "step": 1727500 }, { "epoch": 79.49, "learning_rate": 1.0253933204526636e-05, "loss": 0.2061, "step": 1728000 }, { "epoch": 79.52, "learning_rate": 1.0242432606495539e-05, "loss": 0.2095, "step": 1728500 }, { "epoch": 79.54, "learning_rate": 1.023093200846444e-05, "loss": 0.2094, "step": 1729000 }, { "epoch": 79.56, "learning_rate": 1.0219431410433342e-05, "loss": 0.2053, "step": 1729500 }, { "epoch": 79.58, "learning_rate": 1.0207930812402245e-05, "loss": 0.206, "step": 1730000 }, { "epoch": 79.61, "learning_rate": 1.0196430214371149e-05, "loss": 0.2088, "step": 1730500 }, { "epoch": 79.63, "learning_rate": 1.018492961634005e-05, "loss": 0.2056, "step": 1731000 }, { "epoch": 79.65, "learning_rate": 1.0173429018308952e-05, "loss": 0.2064, "step": 1731500 }, { "epoch": 79.68, "learning_rate": 1.0161928420277855e-05, "loss": 0.2092, "step": 1732000 }, { "epoch": 79.7, "learning_rate": 1.0150427822246757e-05, "loss": 0.2026, "step": 1732500 }, { "epoch": 79.72, "learning_rate": 1.013892722421566e-05, "loss": 0.2111, "step": 1733000 }, { "epoch": 79.75, "learning_rate": 1.0127426626184561e-05, "loss": 0.2071, "step": 1733500 }, { "epoch": 79.77, "learning_rate": 1.0115926028153465e-05, "loss": 0.2113, "step": 1734000 }, { "epoch": 79.79, "learning_rate": 1.0104425430122366e-05, "loss": 0.2075, "step": 1734500 }, { "epoch": 79.81, "learning_rate": 1.009292483209127e-05, "loss": 0.2044, "step": 1735000 }, { "epoch": 79.84, "learning_rate": 1.0081424234060171e-05, "loss": 0.2117, "step": 1735500 }, { "epoch": 79.86, "learning_rate": 1.0069923636029074e-05, "loss": 0.2079, "step": 1736000 }, { "epoch": 79.88, "learning_rate": 1.0058423037997976e-05, "loss": 0.2091, "step": 1736500 }, { "epoch": 79.91, "learning_rate": 1.0046922439966879e-05, "loss": 0.2099, "step": 1737000 }, { "epoch": 79.93, "learning_rate": 1.003542184193578e-05, "loss": 0.2061, "step": 1737500 }, { "epoch": 79.95, "learning_rate": 1.0023921243904684e-05, "loss": 0.2135, "step": 1738000 }, { "epoch": 79.98, "learning_rate": 1.0012420645873585e-05, "loss": 0.2086, "step": 1738500 }, { "epoch": 80.0, "learning_rate": 1.0000920047842488e-05, "loss": 0.2068, "step": 1739000 }, { "epoch": 80.02, "learning_rate": 9.98941944981139e-06, "loss": 0.1994, "step": 1739500 }, { "epoch": 80.04, "learning_rate": 9.977918851780293e-06, "loss": 0.1983, "step": 1740000 }, { "epoch": 80.07, "learning_rate": 9.966418253749195e-06, "loss": 0.1951, "step": 1740500 }, { "epoch": 80.09, "learning_rate": 9.954917655718098e-06, "loss": 0.1978, "step": 1741000 }, { "epoch": 80.11, "learning_rate": 9.943417057687e-06, "loss": 0.1979, "step": 1741500 }, { "epoch": 80.14, "learning_rate": 9.931916459655903e-06, "loss": 0.1981, "step": 1742000 }, { "epoch": 80.16, "learning_rate": 9.920415861624804e-06, "loss": 0.2007, "step": 1742500 }, { "epoch": 80.18, "learning_rate": 9.908915263593708e-06, "loss": 0.1969, "step": 1743000 }, { "epoch": 80.21, "learning_rate": 9.897414665562609e-06, "loss": 0.2035, "step": 1743500 }, { "epoch": 80.23, "learning_rate": 9.885914067531512e-06, "loss": 0.2048, "step": 1744000 }, { "epoch": 80.25, "learning_rate": 9.874413469500414e-06, "loss": 0.2012, "step": 1744500 }, { "epoch": 80.27, "learning_rate": 9.862912871469317e-06, "loss": 0.2022, "step": 1745000 }, { "epoch": 80.3, "learning_rate": 9.851412273438219e-06, "loss": 0.203, "step": 1745500 }, { "epoch": 80.32, "learning_rate": 9.839911675407122e-06, "loss": 0.2076, "step": 1746000 }, { "epoch": 80.34, "learning_rate": 9.828411077376023e-06, "loss": 0.201, "step": 1746500 }, { "epoch": 80.37, "learning_rate": 9.816910479344927e-06, "loss": 0.204, "step": 1747000 }, { "epoch": 80.39, "learning_rate": 9.805409881313828e-06, "loss": 0.2032, "step": 1747500 }, { "epoch": 80.41, "learning_rate": 9.793909283282731e-06, "loss": 0.2011, "step": 1748000 }, { "epoch": 80.44, "learning_rate": 9.782408685251633e-06, "loss": 0.207, "step": 1748500 }, { "epoch": 80.46, "learning_rate": 9.770908087220536e-06, "loss": 0.2024, "step": 1749000 }, { "epoch": 80.48, "learning_rate": 9.759407489189438e-06, "loss": 0.2114, "step": 1749500 }, { "epoch": 80.5, "learning_rate": 9.747906891158341e-06, "loss": 0.2092, "step": 1750000 }, { "epoch": 80.53, "learning_rate": 9.736406293127242e-06, "loss": 0.2069, "step": 1750500 }, { "epoch": 80.55, "learning_rate": 9.724905695096146e-06, "loss": 0.2101, "step": 1751000 }, { "epoch": 80.57, "learning_rate": 9.713405097065047e-06, "loss": 0.2056, "step": 1751500 }, { "epoch": 80.6, "learning_rate": 9.70190449903395e-06, "loss": 0.2091, "step": 1752000 }, { "epoch": 80.62, "learning_rate": 9.690403901002852e-06, "loss": 0.2086, "step": 1752500 }, { "epoch": 80.64, "learning_rate": 9.678903302971755e-06, "loss": 0.2085, "step": 1753000 }, { "epoch": 80.67, "learning_rate": 9.667402704940657e-06, "loss": 0.209, "step": 1753500 }, { "epoch": 80.69, "learning_rate": 9.65590210690956e-06, "loss": 0.204, "step": 1754000 }, { "epoch": 80.71, "learning_rate": 9.644401508878462e-06, "loss": 0.2028, "step": 1754500 }, { "epoch": 80.73, "learning_rate": 9.632900910847365e-06, "loss": 0.2091, "step": 1755000 }, { "epoch": 80.76, "learning_rate": 9.621400312816266e-06, "loss": 0.211, "step": 1755500 }, { "epoch": 80.78, "learning_rate": 9.60989971478517e-06, "loss": 0.2088, "step": 1756000 }, { "epoch": 80.8, "learning_rate": 9.598399116754071e-06, "loss": 0.2075, "step": 1756500 }, { "epoch": 80.83, "learning_rate": 9.586898518722974e-06, "loss": 0.2034, "step": 1757000 }, { "epoch": 80.85, "learning_rate": 9.575397920691876e-06, "loss": 0.2059, "step": 1757500 }, { "epoch": 80.87, "learning_rate": 9.563897322660779e-06, "loss": 0.2105, "step": 1758000 }, { "epoch": 80.9, "learning_rate": 9.55239672462968e-06, "loss": 0.2099, "step": 1758500 }, { "epoch": 80.92, "learning_rate": 9.540896126598584e-06, "loss": 0.2066, "step": 1759000 }, { "epoch": 80.94, "learning_rate": 9.529395528567485e-06, "loss": 0.211, "step": 1759500 }, { "epoch": 80.96, "learning_rate": 9.517894930536389e-06, "loss": 0.2093, "step": 1760000 }, { "epoch": 80.99, "learning_rate": 9.50639433250529e-06, "loss": 0.2079, "step": 1760500 }, { "epoch": 81.01, "learning_rate": 9.494893734474193e-06, "loss": 0.2053, "step": 1761000 }, { "epoch": 81.03, "learning_rate": 9.483393136443095e-06, "loss": 0.1995, "step": 1761500 }, { "epoch": 81.06, "learning_rate": 9.471892538411997e-06, "loss": 0.1991, "step": 1762000 }, { "epoch": 81.08, "learning_rate": 9.4603919403809e-06, "loss": 0.1958, "step": 1762500 }, { "epoch": 81.1, "learning_rate": 9.448891342349803e-06, "loss": 0.1935, "step": 1763000 }, { "epoch": 81.13, "learning_rate": 9.437390744318706e-06, "loss": 0.1981, "step": 1763500 }, { "epoch": 81.15, "learning_rate": 9.425890146287606e-06, "loss": 0.1955, "step": 1764000 }, { "epoch": 81.17, "learning_rate": 9.41438954825651e-06, "loss": 0.2018, "step": 1764500 }, { "epoch": 81.19, "learning_rate": 9.402888950225413e-06, "loss": 0.2017, "step": 1765000 }, { "epoch": 81.22, "learning_rate": 9.391388352194316e-06, "loss": 0.2003, "step": 1765500 }, { "epoch": 81.24, "learning_rate": 9.379887754163216e-06, "loss": 0.2044, "step": 1766000 }, { "epoch": 81.26, "learning_rate": 9.368387156132119e-06, "loss": 0.2033, "step": 1766500 }, { "epoch": 81.29, "learning_rate": 9.356886558101022e-06, "loss": 0.2031, "step": 1767000 }, { "epoch": 81.31, "learning_rate": 9.345385960069925e-06, "loss": 0.2002, "step": 1767500 }, { "epoch": 81.33, "learning_rate": 9.333885362038825e-06, "loss": 0.2066, "step": 1768000 }, { "epoch": 81.36, "learning_rate": 9.322384764007728e-06, "loss": 0.2023, "step": 1768500 }, { "epoch": 81.38, "learning_rate": 9.310884165976632e-06, "loss": 0.2032, "step": 1769000 }, { "epoch": 81.4, "learning_rate": 9.299383567945535e-06, "loss": 0.2052, "step": 1769500 }, { "epoch": 81.42, "learning_rate": 9.287882969914435e-06, "loss": 0.2065, "step": 1770000 }, { "epoch": 81.45, "learning_rate": 9.276382371883338e-06, "loss": 0.2063, "step": 1770500 }, { "epoch": 81.47, "learning_rate": 9.264881773852241e-06, "loss": 0.2058, "step": 1771000 }, { "epoch": 81.49, "learning_rate": 9.253381175821144e-06, "loss": 0.205, "step": 1771500 }, { "epoch": 81.52, "learning_rate": 9.241880577790044e-06, "loss": 0.207, "step": 1772000 }, { "epoch": 81.54, "learning_rate": 9.230379979758948e-06, "loss": 0.2034, "step": 1772500 }, { "epoch": 81.56, "learning_rate": 9.21887938172785e-06, "loss": 0.2085, "step": 1773000 }, { "epoch": 81.59, "learning_rate": 9.207378783696754e-06, "loss": 0.2057, "step": 1773500 }, { "epoch": 81.61, "learning_rate": 9.195878185665654e-06, "loss": 0.2099, "step": 1774000 }, { "epoch": 81.63, "learning_rate": 9.184377587634557e-06, "loss": 0.2055, "step": 1774500 }, { "epoch": 81.65, "learning_rate": 9.17287698960346e-06, "loss": 0.2062, "step": 1775000 }, { "epoch": 81.68, "learning_rate": 9.161376391572364e-06, "loss": 0.205, "step": 1775500 }, { "epoch": 81.7, "learning_rate": 9.149875793541263e-06, "loss": 0.2048, "step": 1776000 }, { "epoch": 81.72, "learning_rate": 9.138375195510167e-06, "loss": 0.2074, "step": 1776500 }, { "epoch": 81.75, "learning_rate": 9.12687459747907e-06, "loss": 0.2081, "step": 1777000 }, { "epoch": 81.77, "learning_rate": 9.115373999447973e-06, "loss": 0.2051, "step": 1777500 }, { "epoch": 81.79, "learning_rate": 9.103873401416873e-06, "loss": 0.2011, "step": 1778000 }, { "epoch": 81.82, "learning_rate": 9.092372803385776e-06, "loss": 0.2038, "step": 1778500 }, { "epoch": 81.84, "learning_rate": 9.08087220535468e-06, "loss": 0.2052, "step": 1779000 }, { "epoch": 81.86, "learning_rate": 9.069371607323583e-06, "loss": 0.206, "step": 1779500 }, { "epoch": 81.88, "learning_rate": 9.057871009292482e-06, "loss": 0.2088, "step": 1780000 }, { "epoch": 81.91, "learning_rate": 9.046370411261386e-06, "loss": 0.2041, "step": 1780500 }, { "epoch": 81.93, "learning_rate": 9.034869813230289e-06, "loss": 0.2066, "step": 1781000 }, { "epoch": 81.95, "learning_rate": 9.023369215199192e-06, "loss": 0.2126, "step": 1781500 }, { "epoch": 81.98, "learning_rate": 9.011868617168092e-06, "loss": 0.2074, "step": 1782000 }, { "epoch": 82.0, "learning_rate": 9.000368019136995e-06, "loss": 0.2102, "step": 1782500 }, { "epoch": 82.02, "learning_rate": 8.988867421105899e-06, "loss": 0.2023, "step": 1783000 }, { "epoch": 82.05, "learning_rate": 8.977366823074802e-06, "loss": 0.1976, "step": 1783500 }, { "epoch": 82.07, "learning_rate": 8.965866225043702e-06, "loss": 0.1968, "step": 1784000 }, { "epoch": 82.09, "learning_rate": 8.954365627012605e-06, "loss": 0.1955, "step": 1784500 }, { "epoch": 82.11, "learning_rate": 8.942865028981508e-06, "loss": 0.2024, "step": 1785000 }, { "epoch": 82.14, "learning_rate": 8.931364430950411e-06, "loss": 0.2003, "step": 1785500 }, { "epoch": 82.16, "learning_rate": 8.919863832919311e-06, "loss": 0.1974, "step": 1786000 }, { "epoch": 82.18, "learning_rate": 8.908363234888214e-06, "loss": 0.1968, "step": 1786500 }, { "epoch": 82.21, "learning_rate": 8.896862636857118e-06, "loss": 0.1983, "step": 1787000 }, { "epoch": 82.23, "learning_rate": 8.88536203882602e-06, "loss": 0.1975, "step": 1787500 }, { "epoch": 82.25, "learning_rate": 8.87386144079492e-06, "loss": 0.1977, "step": 1788000 }, { "epoch": 82.28, "learning_rate": 8.862360842763824e-06, "loss": 0.2072, "step": 1788500 }, { "epoch": 82.3, "learning_rate": 8.850860244732727e-06, "loss": 0.206, "step": 1789000 }, { "epoch": 82.32, "learning_rate": 8.83935964670163e-06, "loss": 0.2037, "step": 1789500 }, { "epoch": 82.34, "learning_rate": 8.82785904867053e-06, "loss": 0.2041, "step": 1790000 }, { "epoch": 82.37, "learning_rate": 8.816358450639433e-06, "loss": 0.2065, "step": 1790500 }, { "epoch": 82.39, "learning_rate": 8.804857852608337e-06, "loss": 0.2007, "step": 1791000 }, { "epoch": 82.41, "learning_rate": 8.793357254577238e-06, "loss": 0.2007, "step": 1791500 }, { "epoch": 82.44, "learning_rate": 8.78185665654614e-06, "loss": 0.204, "step": 1792000 }, { "epoch": 82.46, "learning_rate": 8.770356058515043e-06, "loss": 0.2029, "step": 1792500 }, { "epoch": 82.48, "learning_rate": 8.758855460483946e-06, "loss": 0.2054, "step": 1793000 }, { "epoch": 82.51, "learning_rate": 8.747354862452848e-06, "loss": 0.2067, "step": 1793500 }, { "epoch": 82.53, "learning_rate": 8.73585426442175e-06, "loss": 0.2033, "step": 1794000 }, { "epoch": 82.55, "learning_rate": 8.724353666390653e-06, "loss": 0.2032, "step": 1794500 }, { "epoch": 82.57, "learning_rate": 8.712853068359556e-06, "loss": 0.2046, "step": 1795000 }, { "epoch": 82.6, "learning_rate": 8.701352470328457e-06, "loss": 0.203, "step": 1795500 }, { "epoch": 82.62, "learning_rate": 8.689851872297359e-06, "loss": 0.2065, "step": 1796000 }, { "epoch": 82.64, "learning_rate": 8.678351274266262e-06, "loss": 0.2074, "step": 1796500 }, { "epoch": 82.67, "learning_rate": 8.666850676235165e-06, "loss": 0.2075, "step": 1797000 }, { "epoch": 82.69, "learning_rate": 8.655350078204067e-06, "loss": 0.2044, "step": 1797500 }, { "epoch": 82.71, "learning_rate": 8.643849480172968e-06, "loss": 0.2054, "step": 1798000 }, { "epoch": 82.74, "learning_rate": 8.632348882141872e-06, "loss": 0.2082, "step": 1798500 }, { "epoch": 82.76, "learning_rate": 8.620848284110775e-06, "loss": 0.2026, "step": 1799000 }, { "epoch": 82.78, "learning_rate": 8.609347686079676e-06, "loss": 0.2026, "step": 1799500 }, { "epoch": 82.8, "learning_rate": 8.597847088048578e-06, "loss": 0.2031, "step": 1800000 }, { "epoch": 82.83, "learning_rate": 8.586346490017481e-06, "loss": 0.2084, "step": 1800500 }, { "epoch": 82.85, "learning_rate": 8.574845891986384e-06, "loss": 0.205, "step": 1801000 }, { "epoch": 82.87, "learning_rate": 8.563345293955286e-06, "loss": 0.2072, "step": 1801500 }, { "epoch": 82.9, "learning_rate": 8.551844695924188e-06, "loss": 0.2101, "step": 1802000 }, { "epoch": 82.92, "learning_rate": 8.54034409789309e-06, "loss": 0.2069, "step": 1802500 }, { "epoch": 82.94, "learning_rate": 8.528843499861994e-06, "loss": 0.2079, "step": 1803000 }, { "epoch": 82.97, "learning_rate": 8.517342901830896e-06, "loss": 0.2025, "step": 1803500 }, { "epoch": 82.99, "learning_rate": 8.505842303799797e-06, "loss": 0.2104, "step": 1804000 }, { "epoch": 83.01, "learning_rate": 8.4943417057687e-06, "loss": 0.1991, "step": 1804500 }, { "epoch": 83.03, "learning_rate": 8.482841107737604e-06, "loss": 0.198, "step": 1805000 }, { "epoch": 83.06, "learning_rate": 8.471340509706505e-06, "loss": 0.1976, "step": 1805500 }, { "epoch": 83.08, "learning_rate": 8.459839911675407e-06, "loss": 0.1998, "step": 1806000 }, { "epoch": 83.1, "learning_rate": 8.44833931364431e-06, "loss": 0.1961, "step": 1806500 }, { "epoch": 83.13, "learning_rate": 8.436838715613213e-06, "loss": 0.2029, "step": 1807000 }, { "epoch": 83.15, "learning_rate": 8.425338117582115e-06, "loss": 0.1954, "step": 1807500 }, { "epoch": 83.17, "learning_rate": 8.413837519551016e-06, "loss": 0.1967, "step": 1808000 }, { "epoch": 83.2, "learning_rate": 8.40233692151992e-06, "loss": 0.1975, "step": 1808500 }, { "epoch": 83.22, "learning_rate": 8.390836323488823e-06, "loss": 0.1998, "step": 1809000 }, { "epoch": 83.24, "learning_rate": 8.379335725457724e-06, "loss": 0.2024, "step": 1809500 }, { "epoch": 83.26, "learning_rate": 8.367835127426626e-06, "loss": 0.203, "step": 1810000 }, { "epoch": 83.29, "learning_rate": 8.356334529395529e-06, "loss": 0.2001, "step": 1810500 }, { "epoch": 83.31, "learning_rate": 8.344833931364432e-06, "loss": 0.2014, "step": 1811000 }, { "epoch": 83.33, "learning_rate": 8.333333333333334e-06, "loss": 0.2011, "step": 1811500 }, { "epoch": 83.36, "learning_rate": 8.321832735302235e-06, "loss": 0.2015, "step": 1812000 }, { "epoch": 83.38, "learning_rate": 8.310332137271139e-06, "loss": 0.2077, "step": 1812500 }, { "epoch": 83.4, "learning_rate": 8.298831539240042e-06, "loss": 0.2002, "step": 1813000 }, { "epoch": 83.43, "learning_rate": 8.287330941208943e-06, "loss": 0.2015, "step": 1813500 }, { "epoch": 83.45, "learning_rate": 8.275830343177845e-06, "loss": 0.2038, "step": 1814000 }, { "epoch": 83.47, "learning_rate": 8.264329745146748e-06, "loss": 0.2038, "step": 1814500 }, { "epoch": 83.49, "learning_rate": 8.252829147115651e-06, "loss": 0.1975, "step": 1815000 }, { "epoch": 83.52, "learning_rate": 8.241328549084553e-06, "loss": 0.201, "step": 1815500 }, { "epoch": 83.54, "learning_rate": 8.229827951053454e-06, "loss": 0.2069, "step": 1816000 }, { "epoch": 83.56, "learning_rate": 8.218327353022358e-06, "loss": 0.2028, "step": 1816500 }, { "epoch": 83.59, "learning_rate": 8.20682675499126e-06, "loss": 0.202, "step": 1817000 }, { "epoch": 83.61, "learning_rate": 8.195326156960162e-06, "loss": 0.2028, "step": 1817500 }, { "epoch": 83.63, "learning_rate": 8.183825558929064e-06, "loss": 0.2013, "step": 1818000 }, { "epoch": 83.66, "learning_rate": 8.172324960897967e-06, "loss": 0.2046, "step": 1818500 }, { "epoch": 83.68, "learning_rate": 8.16082436286687e-06, "loss": 0.204, "step": 1819000 }, { "epoch": 83.7, "learning_rate": 8.149323764835772e-06, "loss": 0.2032, "step": 1819500 }, { "epoch": 83.72, "learning_rate": 8.137823166804673e-06, "loss": 0.2106, "step": 1820000 }, { "epoch": 83.75, "learning_rate": 8.126322568773577e-06, "loss": 0.2008, "step": 1820500 }, { "epoch": 83.77, "learning_rate": 8.11482197074248e-06, "loss": 0.2066, "step": 1821000 }, { "epoch": 83.79, "learning_rate": 8.103321372711381e-06, "loss": 0.2039, "step": 1821500 }, { "epoch": 83.82, "learning_rate": 8.091820774680283e-06, "loss": 0.2094, "step": 1822000 }, { "epoch": 83.84, "learning_rate": 8.080320176649186e-06, "loss": 0.2064, "step": 1822500 }, { "epoch": 83.86, "learning_rate": 8.068819578618088e-06, "loss": 0.2131, "step": 1823000 }, { "epoch": 83.89, "learning_rate": 8.057318980586991e-06, "loss": 0.2038, "step": 1823500 }, { "epoch": 83.91, "learning_rate": 8.045818382555893e-06, "loss": 0.2077, "step": 1824000 }, { "epoch": 83.93, "learning_rate": 8.034317784524796e-06, "loss": 0.2099, "step": 1824500 }, { "epoch": 83.95, "learning_rate": 8.022817186493697e-06, "loss": 0.2094, "step": 1825000 }, { "epoch": 83.98, "learning_rate": 8.0113165884626e-06, "loss": 0.206, "step": 1825500 }, { "epoch": 84.0, "learning_rate": 7.999815990431502e-06, "loss": 0.2076, "step": 1826000 }, { "epoch": 84.02, "learning_rate": 7.988315392400405e-06, "loss": 0.1924, "step": 1826500 }, { "epoch": 84.05, "learning_rate": 7.976814794369307e-06, "loss": 0.1928, "step": 1827000 }, { "epoch": 84.07, "learning_rate": 7.96531419633821e-06, "loss": 0.1972, "step": 1827500 }, { "epoch": 84.09, "learning_rate": 7.953813598307112e-06, "loss": 0.199, "step": 1828000 }, { "epoch": 84.12, "learning_rate": 7.942313000276015e-06, "loss": 0.1946, "step": 1828500 }, { "epoch": 84.14, "learning_rate": 7.930812402244916e-06, "loss": 0.2007, "step": 1829000 }, { "epoch": 84.16, "learning_rate": 7.91931180421382e-06, "loss": 0.1982, "step": 1829500 }, { "epoch": 84.18, "learning_rate": 7.907811206182721e-06, "loss": 0.1999, "step": 1830000 }, { "epoch": 84.21, "learning_rate": 7.896310608151624e-06, "loss": 0.2047, "step": 1830500 }, { "epoch": 84.23, "learning_rate": 7.884810010120526e-06, "loss": 0.2017, "step": 1831000 }, { "epoch": 84.25, "learning_rate": 7.87330941208943e-06, "loss": 0.1975, "step": 1831500 }, { "epoch": 84.28, "learning_rate": 7.86180881405833e-06, "loss": 0.2009, "step": 1832000 }, { "epoch": 84.3, "learning_rate": 7.850308216027234e-06, "loss": 0.198, "step": 1832500 }, { "epoch": 84.32, "learning_rate": 7.838807617996136e-06, "loss": 0.2026, "step": 1833000 }, { "epoch": 84.35, "learning_rate": 7.827307019965039e-06, "loss": 0.1994, "step": 1833500 }, { "epoch": 84.37, "learning_rate": 7.81580642193394e-06, "loss": 0.2009, "step": 1834000 }, { "epoch": 84.39, "learning_rate": 7.804305823902844e-06, "loss": 0.1987, "step": 1834500 }, { "epoch": 84.41, "learning_rate": 7.792805225871745e-06, "loss": 0.1979, "step": 1835000 }, { "epoch": 84.44, "learning_rate": 7.781304627840648e-06, "loss": 0.2044, "step": 1835500 }, { "epoch": 84.46, "learning_rate": 7.76980402980955e-06, "loss": 0.1999, "step": 1836000 }, { "epoch": 84.48, "learning_rate": 7.758303431778453e-06, "loss": 0.1994, "step": 1836500 }, { "epoch": 84.51, "learning_rate": 7.746802833747355e-06, "loss": 0.2039, "step": 1837000 }, { "epoch": 84.53, "learning_rate": 7.735302235716258e-06, "loss": 0.2045, "step": 1837500 }, { "epoch": 84.55, "learning_rate": 7.72380163768516e-06, "loss": 0.206, "step": 1838000 }, { "epoch": 84.58, "learning_rate": 7.712301039654063e-06, "loss": 0.2029, "step": 1838500 }, { "epoch": 84.6, "learning_rate": 7.700800441622964e-06, "loss": 0.2023, "step": 1839000 }, { "epoch": 84.62, "learning_rate": 7.689299843591867e-06, "loss": 0.204, "step": 1839500 }, { "epoch": 84.64, "learning_rate": 7.677799245560769e-06, "loss": 0.2026, "step": 1840000 }, { "epoch": 84.67, "learning_rate": 7.666298647529672e-06, "loss": 0.2033, "step": 1840500 }, { "epoch": 84.69, "learning_rate": 7.654798049498574e-06, "loss": 0.2019, "step": 1841000 }, { "epoch": 84.71, "learning_rate": 7.643297451467477e-06, "loss": 0.2059, "step": 1841500 }, { "epoch": 84.74, "learning_rate": 7.631796853436379e-06, "loss": 0.2012, "step": 1842000 }, { "epoch": 84.76, "learning_rate": 7.620296255405282e-06, "loss": 0.2009, "step": 1842500 }, { "epoch": 84.78, "learning_rate": 7.608795657374183e-06, "loss": 0.2092, "step": 1843000 }, { "epoch": 84.81, "learning_rate": 7.5972950593430865e-06, "loss": 0.2051, "step": 1843500 }, { "epoch": 84.83, "learning_rate": 7.585794461311989e-06, "loss": 0.2051, "step": 1844000 }, { "epoch": 84.85, "learning_rate": 7.574293863280891e-06, "loss": 0.209, "step": 1844500 }, { "epoch": 84.87, "learning_rate": 7.562793265249793e-06, "loss": 0.2062, "step": 1845000 }, { "epoch": 84.9, "learning_rate": 7.551292667218696e-06, "loss": 0.2076, "step": 1845500 }, { "epoch": 84.92, "learning_rate": 7.5397920691875985e-06, "loss": 0.2016, "step": 1846000 }, { "epoch": 84.94, "learning_rate": 7.528291471156501e-06, "loss": 0.2063, "step": 1846500 }, { "epoch": 84.97, "learning_rate": 7.516790873125402e-06, "loss": 0.2044, "step": 1847000 }, { "epoch": 84.99, "learning_rate": 7.505290275094306e-06, "loss": 0.2046, "step": 1847500 }, { "epoch": 85.01, "learning_rate": 7.493789677063208e-06, "loss": 0.199, "step": 1848000 }, { "epoch": 85.04, "learning_rate": 7.48228907903211e-06, "loss": 0.1955, "step": 1848500 }, { "epoch": 85.06, "learning_rate": 7.470788481001012e-06, "loss": 0.1934, "step": 1849000 }, { "epoch": 85.08, "learning_rate": 7.459287882969915e-06, "loss": 0.1944, "step": 1849500 }, { "epoch": 85.1, "learning_rate": 7.4477872849388176e-06, "loss": 0.1953, "step": 1850000 }, { "epoch": 85.13, "learning_rate": 7.43628668690772e-06, "loss": 0.1953, "step": 1850500 }, { "epoch": 85.15, "learning_rate": 7.4247860888766215e-06, "loss": 0.1993, "step": 1851000 }, { "epoch": 85.17, "learning_rate": 7.413285490845525e-06, "loss": 0.2019, "step": 1851500 }, { "epoch": 85.2, "learning_rate": 7.401784892814427e-06, "loss": 0.1976, "step": 1852000 }, { "epoch": 85.22, "learning_rate": 7.390284294783329e-06, "loss": 0.1992, "step": 1852500 }, { "epoch": 85.24, "learning_rate": 7.378783696752231e-06, "loss": 0.1996, "step": 1853000 }, { "epoch": 85.27, "learning_rate": 7.367283098721134e-06, "loss": 0.197, "step": 1853500 }, { "epoch": 85.29, "learning_rate": 7.355782500690037e-06, "loss": 0.1989, "step": 1854000 }, { "epoch": 85.31, "learning_rate": 7.344281902658938e-06, "loss": 0.1983, "step": 1854500 }, { "epoch": 85.33, "learning_rate": 7.332781304627841e-06, "loss": 0.1972, "step": 1855000 } ], "max_steps": 2173800, "num_train_epochs": 100, "total_flos": 1.52506321841664e+17, "trial_name": null, "trial_params": null }