{ "best_metric": null, "best_model_checkpoint": null, "epoch": 62.24066390041494, "global_step": 870000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 4.9982114751752754e-05, "loss": 4.1215, "step": 500 }, { "epoch": 0.07, "learning_rate": 4.996422950350551e-05, "loss": 3.3722, "step": 1000 }, { "epoch": 0.11, "learning_rate": 4.9946344255258265e-05, "loss": 3.0718, "step": 1500 }, { "epoch": 0.14, "learning_rate": 4.9928459007011016e-05, "loss": 2.944, "step": 2000 }, { "epoch": 0.18, "learning_rate": 4.9910573758763775e-05, "loss": 2.7645, "step": 2500 }, { "epoch": 0.21, "learning_rate": 4.989268851051653e-05, "loss": 2.7008, "step": 3000 }, { "epoch": 0.25, "learning_rate": 4.987480326226928e-05, "loss": 2.6103, "step": 3500 }, { "epoch": 0.29, "learning_rate": 4.985691801402204e-05, "loss": 2.5739, "step": 4000 }, { "epoch": 0.32, "learning_rate": 4.983903276577479e-05, "loss": 2.4965, "step": 4500 }, { "epoch": 0.36, "learning_rate": 4.982114751752755e-05, "loss": 2.4231, "step": 5000 }, { "epoch": 0.39, "learning_rate": 4.98032622692803e-05, "loss": 2.3954, "step": 5500 }, { "epoch": 0.43, "learning_rate": 4.978537702103305e-05, "loss": 2.365, "step": 6000 }, { "epoch": 0.47, "learning_rate": 4.976749177278581e-05, "loss": 2.3183, "step": 6500 }, { "epoch": 0.5, "learning_rate": 4.974960652453856e-05, "loss": 2.3116, "step": 7000 }, { "epoch": 0.54, "learning_rate": 4.973172127629131e-05, "loss": 2.2515, "step": 7500 }, { "epoch": 0.57, "learning_rate": 4.971383602804407e-05, "loss": 2.2368, "step": 8000 }, { "epoch": 0.61, "learning_rate": 4.9695950779796824e-05, "loss": 2.2348, "step": 8500 }, { "epoch": 0.64, "learning_rate": 4.9678065531549575e-05, "loss": 2.2251, "step": 9000 }, { "epoch": 0.68, "learning_rate": 4.9660180283302334e-05, "loss": 2.1608, "step": 9500 }, { "epoch": 0.72, "learning_rate": 4.9642295035055086e-05, "loss": 2.1046, "step": 10000 }, { "epoch": 0.75, "learning_rate": 4.962440978680784e-05, "loss": 2.1329, "step": 10500 }, { "epoch": 0.79, "learning_rate": 4.96065245385606e-05, "loss": 2.103, "step": 11000 }, { "epoch": 0.82, "learning_rate": 4.9588639290313355e-05, "loss": 2.107, "step": 11500 }, { "epoch": 0.86, "learning_rate": 4.9570754042066106e-05, "loss": 2.0909, "step": 12000 }, { "epoch": 0.89, "learning_rate": 4.9552868793818865e-05, "loss": 2.0483, "step": 12500 }, { "epoch": 0.93, "learning_rate": 4.953498354557162e-05, "loss": 2.0902, "step": 13000 }, { "epoch": 0.97, "learning_rate": 4.951709829732437e-05, "loss": 2.0132, "step": 13500 }, { "epoch": 1.0, "learning_rate": 4.949921304907713e-05, "loss": 1.9799, "step": 14000 }, { "epoch": 1.04, "learning_rate": 4.948132780082988e-05, "loss": 1.8414, "step": 14500 }, { "epoch": 1.07, "learning_rate": 4.946344255258263e-05, "loss": 1.8561, "step": 15000 }, { "epoch": 1.11, "learning_rate": 4.944555730433539e-05, "loss": 1.8412, "step": 15500 }, { "epoch": 1.14, "learning_rate": 4.942767205608814e-05, "loss": 1.8654, "step": 16000 }, { "epoch": 1.18, "learning_rate": 4.94097868078409e-05, "loss": 1.8863, "step": 16500 }, { "epoch": 1.22, "learning_rate": 4.939190155959365e-05, "loss": 1.8676, "step": 17000 }, { "epoch": 1.25, "learning_rate": 4.93740163113464e-05, "loss": 1.8614, "step": 17500 }, { "epoch": 1.29, "learning_rate": 4.935613106309916e-05, "loss": 1.8483, "step": 18000 }, { "epoch": 1.32, "learning_rate": 4.9338245814851914e-05, "loss": 1.7865, "step": 18500 }, { "epoch": 1.36, "learning_rate": 4.9320360566604665e-05, "loss": 1.7937, "step": 19000 }, { "epoch": 1.4, "learning_rate": 4.9302475318357424e-05, "loss": 1.8259, "step": 19500 }, { "epoch": 1.43, "learning_rate": 4.9284590070110176e-05, "loss": 1.818, "step": 20000 }, { "epoch": 1.47, "learning_rate": 4.926670482186293e-05, "loss": 1.8051, "step": 20500 }, { "epoch": 1.5, "learning_rate": 4.9248819573615686e-05, "loss": 1.8204, "step": 21000 }, { "epoch": 1.54, "learning_rate": 4.923093432536844e-05, "loss": 1.7976, "step": 21500 }, { "epoch": 1.57, "learning_rate": 4.921304907712119e-05, "loss": 1.805, "step": 22000 }, { "epoch": 1.61, "learning_rate": 4.919516382887395e-05, "loss": 1.7588, "step": 22500 }, { "epoch": 1.65, "learning_rate": 4.91772785806267e-05, "loss": 1.7988, "step": 23000 }, { "epoch": 1.68, "learning_rate": 4.915939333237946e-05, "loss": 1.7542, "step": 23500 }, { "epoch": 1.72, "learning_rate": 4.914150808413221e-05, "loss": 1.7743, "step": 24000 }, { "epoch": 1.75, "learning_rate": 4.912362283588496e-05, "loss": 1.7507, "step": 24500 }, { "epoch": 1.79, "learning_rate": 4.910573758763772e-05, "loss": 1.8013, "step": 25000 }, { "epoch": 1.82, "learning_rate": 4.908785233939047e-05, "loss": 1.7607, "step": 25500 }, { "epoch": 1.86, "learning_rate": 4.9069967091143224e-05, "loss": 1.7696, "step": 26000 }, { "epoch": 1.9, "learning_rate": 4.905208184289598e-05, "loss": 1.7501, "step": 26500 }, { "epoch": 1.93, "learning_rate": 4.9034196594648735e-05, "loss": 1.7406, "step": 27000 }, { "epoch": 1.97, "learning_rate": 4.9016311346401487e-05, "loss": 1.7568, "step": 27500 }, { "epoch": 2.0, "learning_rate": 4.8998426098154245e-05, "loss": 1.6709, "step": 28000 }, { "epoch": 2.04, "learning_rate": 4.8980540849907e-05, "loss": 1.5369, "step": 28500 }, { "epoch": 2.07, "learning_rate": 4.896265560165975e-05, "loss": 1.54, "step": 29000 }, { "epoch": 2.11, "learning_rate": 4.894477035341251e-05, "loss": 1.5577, "step": 29500 }, { "epoch": 2.15, "learning_rate": 4.892688510516526e-05, "loss": 1.5302, "step": 30000 }, { "epoch": 2.18, "learning_rate": 4.890899985691801e-05, "loss": 1.5484, "step": 30500 }, { "epoch": 2.22, "learning_rate": 4.889111460867077e-05, "loss": 1.5573, "step": 31000 }, { "epoch": 2.25, "learning_rate": 4.887322936042352e-05, "loss": 1.5223, "step": 31500 }, { "epoch": 2.29, "learning_rate": 4.885534411217628e-05, "loss": 1.5303, "step": 32000 }, { "epoch": 2.33, "learning_rate": 4.883745886392903e-05, "loss": 1.5337, "step": 32500 }, { "epoch": 2.36, "learning_rate": 4.881957361568179e-05, "loss": 1.5726, "step": 33000 }, { "epoch": 2.4, "learning_rate": 4.880168836743454e-05, "loss": 1.5842, "step": 33500 }, { "epoch": 2.43, "learning_rate": 4.87838031191873e-05, "loss": 1.5437, "step": 34000 }, { "epoch": 2.47, "learning_rate": 4.876591787094005e-05, "loss": 1.5371, "step": 34500 }, { "epoch": 2.5, "learning_rate": 4.8748032622692804e-05, "loss": 1.5382, "step": 35000 }, { "epoch": 2.54, "learning_rate": 4.873014737444556e-05, "loss": 1.5205, "step": 35500 }, { "epoch": 2.58, "learning_rate": 4.8712262126198314e-05, "loss": 1.5504, "step": 36000 }, { "epoch": 2.61, "learning_rate": 4.869437687795107e-05, "loss": 1.5382, "step": 36500 }, { "epoch": 2.65, "learning_rate": 4.8676491629703825e-05, "loss": 1.5698, "step": 37000 }, { "epoch": 2.68, "learning_rate": 4.865860638145658e-05, "loss": 1.5631, "step": 37500 }, { "epoch": 2.72, "learning_rate": 4.8640721133209335e-05, "loss": 1.5449, "step": 38000 }, { "epoch": 2.75, "learning_rate": 4.862283588496209e-05, "loss": 1.5287, "step": 38500 }, { "epoch": 2.79, "learning_rate": 4.860495063671484e-05, "loss": 1.5721, "step": 39000 }, { "epoch": 2.83, "learning_rate": 4.85870653884676e-05, "loss": 1.5536, "step": 39500 }, { "epoch": 2.86, "learning_rate": 4.856918014022035e-05, "loss": 1.5328, "step": 40000 }, { "epoch": 2.9, "learning_rate": 4.85512948919731e-05, "loss": 1.557, "step": 40500 }, { "epoch": 2.93, "learning_rate": 4.853340964372586e-05, "loss": 1.5637, "step": 41000 }, { "epoch": 2.97, "learning_rate": 4.851552439547861e-05, "loss": 1.552, "step": 41500 }, { "epoch": 3.0, "learning_rate": 4.849763914723136e-05, "loss": 1.5156, "step": 42000 }, { "epoch": 3.04, "learning_rate": 4.847975389898412e-05, "loss": 1.2963, "step": 42500 }, { "epoch": 3.08, "learning_rate": 4.8461868650736873e-05, "loss": 1.3098, "step": 43000 }, { "epoch": 3.11, "learning_rate": 4.844398340248963e-05, "loss": 1.3217, "step": 43500 }, { "epoch": 3.15, "learning_rate": 4.8426098154242384e-05, "loss": 1.3361, "step": 44000 }, { "epoch": 3.18, "learning_rate": 4.8408212905995136e-05, "loss": 1.3243, "step": 44500 }, { "epoch": 3.22, "learning_rate": 4.8390327657747894e-05, "loss": 1.3336, "step": 45000 }, { "epoch": 3.26, "learning_rate": 4.8372442409500646e-05, "loss": 1.3447, "step": 45500 }, { "epoch": 3.29, "learning_rate": 4.83545571612534e-05, "loss": 1.3436, "step": 46000 }, { "epoch": 3.33, "learning_rate": 4.8336671913006156e-05, "loss": 1.3384, "step": 46500 }, { "epoch": 3.36, "learning_rate": 4.831878666475891e-05, "loss": 1.3493, "step": 47000 }, { "epoch": 3.4, "learning_rate": 4.830090141651166e-05, "loss": 1.3428, "step": 47500 }, { "epoch": 3.43, "learning_rate": 4.828301616826442e-05, "loss": 1.3309, "step": 48000 }, { "epoch": 3.47, "learning_rate": 4.826513092001717e-05, "loss": 1.3716, "step": 48500 }, { "epoch": 3.51, "learning_rate": 4.824724567176992e-05, "loss": 1.326, "step": 49000 }, { "epoch": 3.54, "learning_rate": 4.822936042352268e-05, "loss": 1.3752, "step": 49500 }, { "epoch": 3.58, "learning_rate": 4.821147517527543e-05, "loss": 1.3721, "step": 50000 }, { "epoch": 3.61, "learning_rate": 4.819358992702819e-05, "loss": 1.3383, "step": 50500 }, { "epoch": 3.65, "learning_rate": 4.817570467878094e-05, "loss": 1.3797, "step": 51000 }, { "epoch": 3.68, "learning_rate": 4.8157819430533695e-05, "loss": 1.3499, "step": 51500 }, { "epoch": 3.72, "learning_rate": 4.813993418228645e-05, "loss": 1.3518, "step": 52000 }, { "epoch": 3.76, "learning_rate": 4.8122048934039205e-05, "loss": 1.3707, "step": 52500 }, { "epoch": 3.79, "learning_rate": 4.810416368579196e-05, "loss": 1.3867, "step": 53000 }, { "epoch": 3.83, "learning_rate": 4.8086278437544715e-05, "loss": 1.3884, "step": 53500 }, { "epoch": 3.86, "learning_rate": 4.806839318929747e-05, "loss": 1.3647, "step": 54000 }, { "epoch": 3.9, "learning_rate": 4.8050507941050226e-05, "loss": 1.4088, "step": 54500 }, { "epoch": 3.93, "learning_rate": 4.8032622692802984e-05, "loss": 1.375, "step": 55000 }, { "epoch": 3.97, "learning_rate": 4.8014737444555736e-05, "loss": 1.3769, "step": 55500 }, { "epoch": 4.01, "learning_rate": 4.799685219630849e-05, "loss": 1.33, "step": 56000 }, { "epoch": 4.04, "learning_rate": 4.7978966948061246e-05, "loss": 1.0742, "step": 56500 }, { "epoch": 4.08, "learning_rate": 4.7961081699814e-05, "loss": 1.1, "step": 57000 }, { "epoch": 4.11, "learning_rate": 4.794319645156675e-05, "loss": 1.1375, "step": 57500 }, { "epoch": 4.15, "learning_rate": 4.792531120331951e-05, "loss": 1.1721, "step": 58000 }, { "epoch": 4.19, "learning_rate": 4.790742595507226e-05, "loss": 1.1278, "step": 58500 }, { "epoch": 4.22, "learning_rate": 4.788954070682501e-05, "loss": 1.1652, "step": 59000 }, { "epoch": 4.26, "learning_rate": 4.787165545857777e-05, "loss": 1.177, "step": 59500 }, { "epoch": 4.29, "learning_rate": 4.785377021033052e-05, "loss": 1.1758, "step": 60000 }, { "epoch": 4.33, "learning_rate": 4.7835884962083274e-05, "loss": 1.208, "step": 60500 }, { "epoch": 4.36, "learning_rate": 4.781799971383603e-05, "loss": 1.1889, "step": 61000 }, { "epoch": 4.4, "learning_rate": 4.7800114465588785e-05, "loss": 1.1945, "step": 61500 }, { "epoch": 4.44, "learning_rate": 4.7782229217341536e-05, "loss": 1.1915, "step": 62000 }, { "epoch": 4.47, "learning_rate": 4.7764343969094295e-05, "loss": 1.1849, "step": 62500 }, { "epoch": 4.51, "learning_rate": 4.774645872084705e-05, "loss": 1.1912, "step": 63000 }, { "epoch": 4.54, "learning_rate": 4.7728573472599805e-05, "loss": 1.1983, "step": 63500 }, { "epoch": 4.58, "learning_rate": 4.771068822435256e-05, "loss": 1.2211, "step": 64000 }, { "epoch": 4.61, "learning_rate": 4.769280297610531e-05, "loss": 1.1834, "step": 64500 }, { "epoch": 4.65, "learning_rate": 4.767491772785807e-05, "loss": 1.2044, "step": 65000 }, { "epoch": 4.69, "learning_rate": 4.765703247961082e-05, "loss": 1.1973, "step": 65500 }, { "epoch": 4.72, "learning_rate": 4.763914723136357e-05, "loss": 1.2021, "step": 66000 }, { "epoch": 4.76, "learning_rate": 4.762126198311633e-05, "loss": 1.2255, "step": 66500 }, { "epoch": 4.79, "learning_rate": 4.760337673486908e-05, "loss": 1.2106, "step": 67000 }, { "epoch": 4.83, "learning_rate": 4.758549148662183e-05, "loss": 1.2529, "step": 67500 }, { "epoch": 4.86, "learning_rate": 4.756760623837459e-05, "loss": 1.2126, "step": 68000 }, { "epoch": 4.9, "learning_rate": 4.7549720990127344e-05, "loss": 1.2128, "step": 68500 }, { "epoch": 4.94, "learning_rate": 4.7531835741880095e-05, "loss": 1.2421, "step": 69000 }, { "epoch": 4.97, "learning_rate": 4.7513950493632854e-05, "loss": 1.2148, "step": 69500 }, { "epoch": 5.01, "learning_rate": 4.7496065245385606e-05, "loss": 1.1539, "step": 70000 }, { "epoch": 5.04, "learning_rate": 4.7478179997138364e-05, "loss": 0.9842, "step": 70500 }, { "epoch": 5.08, "learning_rate": 4.7460294748891116e-05, "loss": 0.9861, "step": 71000 }, { "epoch": 5.12, "learning_rate": 4.744240950064387e-05, "loss": 1.0112, "step": 71500 }, { "epoch": 5.15, "learning_rate": 4.7424524252396627e-05, "loss": 1.0102, "step": 72000 }, { "epoch": 5.19, "learning_rate": 4.740663900414938e-05, "loss": 0.9797, "step": 72500 }, { "epoch": 5.22, "learning_rate": 4.738875375590213e-05, "loss": 1.0238, "step": 73000 }, { "epoch": 5.26, "learning_rate": 4.737086850765489e-05, "loss": 0.9947, "step": 73500 }, { "epoch": 5.29, "learning_rate": 4.735298325940764e-05, "loss": 1.0288, "step": 74000 }, { "epoch": 5.33, "learning_rate": 4.733509801116039e-05, "loss": 1.0148, "step": 74500 }, { "epoch": 5.37, "learning_rate": 4.731721276291315e-05, "loss": 1.0149, "step": 75000 }, { "epoch": 5.4, "learning_rate": 4.72993275146659e-05, "loss": 1.015, "step": 75500 }, { "epoch": 5.44, "learning_rate": 4.728144226641866e-05, "loss": 1.0409, "step": 76000 }, { "epoch": 5.47, "learning_rate": 4.726355701817142e-05, "loss": 1.0343, "step": 76500 }, { "epoch": 5.51, "learning_rate": 4.724567176992417e-05, "loss": 1.039, "step": 77000 }, { "epoch": 5.54, "learning_rate": 4.7227786521676923e-05, "loss": 1.0662, "step": 77500 }, { "epoch": 5.58, "learning_rate": 4.720990127342968e-05, "loss": 1.0426, "step": 78000 }, { "epoch": 5.62, "learning_rate": 4.7192016025182434e-05, "loss": 1.0727, "step": 78500 }, { "epoch": 5.65, "learning_rate": 4.7174130776935186e-05, "loss": 1.0579, "step": 79000 }, { "epoch": 5.69, "learning_rate": 4.7156245528687944e-05, "loss": 1.0527, "step": 79500 }, { "epoch": 5.72, "learning_rate": 4.7138360280440696e-05, "loss": 1.0907, "step": 80000 }, { "epoch": 5.76, "learning_rate": 4.712047503219345e-05, "loss": 1.0797, "step": 80500 }, { "epoch": 5.79, "learning_rate": 4.7102589783946206e-05, "loss": 1.0814, "step": 81000 }, { "epoch": 5.83, "learning_rate": 4.708470453569896e-05, "loss": 1.0784, "step": 81500 }, { "epoch": 5.87, "learning_rate": 4.706681928745172e-05, "loss": 1.0639, "step": 82000 }, { "epoch": 5.9, "learning_rate": 4.704893403920447e-05, "loss": 1.0881, "step": 82500 }, { "epoch": 5.94, "learning_rate": 4.703104879095722e-05, "loss": 1.0744, "step": 83000 }, { "epoch": 5.97, "learning_rate": 4.701316354270998e-05, "loss": 1.1065, "step": 83500 }, { "epoch": 6.01, "learning_rate": 4.699527829446273e-05, "loss": 1.037, "step": 84000 }, { "epoch": 6.05, "learning_rate": 4.697739304621548e-05, "loss": 0.8384, "step": 84500 }, { "epoch": 6.08, "learning_rate": 4.695950779796824e-05, "loss": 0.8547, "step": 85000 }, { "epoch": 6.12, "learning_rate": 4.694162254972099e-05, "loss": 0.869, "step": 85500 }, { "epoch": 6.15, "learning_rate": 4.6923737301473745e-05, "loss": 0.8673, "step": 86000 }, { "epoch": 6.19, "learning_rate": 4.69058520532265e-05, "loss": 0.8863, "step": 86500 }, { "epoch": 6.22, "learning_rate": 4.6887966804979255e-05, "loss": 0.867, "step": 87000 }, { "epoch": 6.26, "learning_rate": 4.687008155673201e-05, "loss": 0.908, "step": 87500 }, { "epoch": 6.3, "learning_rate": 4.6852196308484765e-05, "loss": 0.9023, "step": 88000 }, { "epoch": 6.33, "learning_rate": 4.683431106023752e-05, "loss": 0.9064, "step": 88500 }, { "epoch": 6.37, "learning_rate": 4.681642581199027e-05, "loss": 0.8924, "step": 89000 }, { "epoch": 6.4, "learning_rate": 4.679854056374303e-05, "loss": 0.8938, "step": 89500 }, { "epoch": 6.44, "learning_rate": 4.678065531549578e-05, "loss": 0.9082, "step": 90000 }, { "epoch": 6.47, "learning_rate": 4.676277006724854e-05, "loss": 0.9036, "step": 90500 }, { "epoch": 6.51, "learning_rate": 4.674488481900129e-05, "loss": 0.9085, "step": 91000 }, { "epoch": 6.55, "learning_rate": 4.672699957075404e-05, "loss": 0.9263, "step": 91500 }, { "epoch": 6.58, "learning_rate": 4.67091143225068e-05, "loss": 0.9085, "step": 92000 }, { "epoch": 6.62, "learning_rate": 4.669122907425955e-05, "loss": 0.9289, "step": 92500 }, { "epoch": 6.65, "learning_rate": 4.6673343826012304e-05, "loss": 0.9407, "step": 93000 }, { "epoch": 6.69, "learning_rate": 4.665545857776506e-05, "loss": 0.9326, "step": 93500 }, { "epoch": 6.72, "learning_rate": 4.6637573329517814e-05, "loss": 0.9551, "step": 94000 }, { "epoch": 6.76, "learning_rate": 4.6619688081270566e-05, "loss": 0.9407, "step": 94500 }, { "epoch": 6.8, "learning_rate": 4.6601802833023324e-05, "loss": 0.9694, "step": 95000 }, { "epoch": 6.83, "learning_rate": 4.6583917584776076e-05, "loss": 0.9743, "step": 95500 }, { "epoch": 6.87, "learning_rate": 4.656603233652883e-05, "loss": 0.9643, "step": 96000 }, { "epoch": 6.9, "learning_rate": 4.6548147088281586e-05, "loss": 0.9497, "step": 96500 }, { "epoch": 6.94, "learning_rate": 4.653026184003434e-05, "loss": 0.9532, "step": 97000 }, { "epoch": 6.98, "learning_rate": 4.65123765917871e-05, "loss": 0.9539, "step": 97500 }, { "epoch": 7.01, "learning_rate": 4.6494491343539855e-05, "loss": 0.8958, "step": 98000 }, { "epoch": 7.05, "learning_rate": 4.647660609529261e-05, "loss": 0.709, "step": 98500 }, { "epoch": 7.08, "learning_rate": 4.645872084704536e-05, "loss": 0.727, "step": 99000 }, { "epoch": 7.12, "learning_rate": 4.644083559879812e-05, "loss": 0.7395, "step": 99500 }, { "epoch": 7.15, "learning_rate": 4.642295035055087e-05, "loss": 0.756, "step": 100000 }, { "epoch": 7.19, "learning_rate": 4.640506510230362e-05, "loss": 0.7668, "step": 100500 }, { "epoch": 7.23, "learning_rate": 4.638717985405638e-05, "loss": 0.7806, "step": 101000 }, { "epoch": 7.26, "learning_rate": 4.636929460580913e-05, "loss": 0.7723, "step": 101500 }, { "epoch": 7.3, "learning_rate": 4.635140935756189e-05, "loss": 0.776, "step": 102000 }, { "epoch": 7.33, "learning_rate": 4.633352410931464e-05, "loss": 0.7923, "step": 102500 }, { "epoch": 7.37, "learning_rate": 4.6315638861067394e-05, "loss": 0.7955, "step": 103000 }, { "epoch": 7.4, "learning_rate": 4.629775361282015e-05, "loss": 0.8003, "step": 103500 }, { "epoch": 7.44, "learning_rate": 4.6279868364572904e-05, "loss": 0.8045, "step": 104000 }, { "epoch": 7.48, "learning_rate": 4.6261983116325656e-05, "loss": 0.8138, "step": 104500 }, { "epoch": 7.51, "learning_rate": 4.6244097868078414e-05, "loss": 0.8075, "step": 105000 }, { "epoch": 7.55, "learning_rate": 4.6226212619831166e-05, "loss": 0.8381, "step": 105500 }, { "epoch": 7.58, "learning_rate": 4.620832737158392e-05, "loss": 0.8036, "step": 106000 }, { "epoch": 7.62, "learning_rate": 4.6190442123336676e-05, "loss": 0.8151, "step": 106500 }, { "epoch": 7.65, "learning_rate": 4.617255687508943e-05, "loss": 0.8434, "step": 107000 }, { "epoch": 7.69, "learning_rate": 4.615467162684218e-05, "loss": 0.8278, "step": 107500 }, { "epoch": 7.73, "learning_rate": 4.613678637859494e-05, "loss": 0.8461, "step": 108000 }, { "epoch": 7.76, "learning_rate": 4.611890113034769e-05, "loss": 0.8149, "step": 108500 }, { "epoch": 7.8, "learning_rate": 4.610101588210045e-05, "loss": 0.8384, "step": 109000 }, { "epoch": 7.83, "learning_rate": 4.60831306338532e-05, "loss": 0.8548, "step": 109500 }, { "epoch": 7.87, "learning_rate": 4.606524538560595e-05, "loss": 0.8473, "step": 110000 }, { "epoch": 7.91, "learning_rate": 4.604736013735871e-05, "loss": 0.8604, "step": 110500 }, { "epoch": 7.94, "learning_rate": 4.602947488911146e-05, "loss": 0.8566, "step": 111000 }, { "epoch": 7.98, "learning_rate": 4.6011589640864215e-05, "loss": 0.8816, "step": 111500 }, { "epoch": 8.01, "learning_rate": 4.599370439261697e-05, "loss": 0.7863, "step": 112000 }, { "epoch": 8.05, "learning_rate": 4.5975819144369725e-05, "loss": 0.6376, "step": 112500 }, { "epoch": 8.08, "learning_rate": 4.595793389612248e-05, "loss": 0.6403, "step": 113000 }, { "epoch": 8.12, "learning_rate": 4.5940048647875235e-05, "loss": 0.6446, "step": 113500 }, { "epoch": 8.16, "learning_rate": 4.592216339962799e-05, "loss": 0.6589, "step": 114000 }, { "epoch": 8.19, "learning_rate": 4.590427815138074e-05, "loss": 0.6604, "step": 114500 }, { "epoch": 8.23, "learning_rate": 4.58863929031335e-05, "loss": 0.6791, "step": 115000 }, { "epoch": 8.26, "learning_rate": 4.586850765488625e-05, "loss": 0.6944, "step": 115500 }, { "epoch": 8.3, "learning_rate": 4.5850622406639e-05, "loss": 0.682, "step": 116000 }, { "epoch": 8.33, "learning_rate": 4.583273715839176e-05, "loss": 0.6984, "step": 116500 }, { "epoch": 8.37, "learning_rate": 4.581485191014451e-05, "loss": 0.6765, "step": 117000 }, { "epoch": 8.41, "learning_rate": 4.579696666189727e-05, "loss": 0.7103, "step": 117500 }, { "epoch": 8.44, "learning_rate": 4.577908141365002e-05, "loss": 0.7166, "step": 118000 }, { "epoch": 8.48, "learning_rate": 4.5761196165402774e-05, "loss": 0.7078, "step": 118500 }, { "epoch": 8.51, "learning_rate": 4.574331091715553e-05, "loss": 0.7309, "step": 119000 }, { "epoch": 8.55, "learning_rate": 4.572542566890829e-05, "loss": 0.7273, "step": 119500 }, { "epoch": 8.58, "learning_rate": 4.570754042066104e-05, "loss": 0.73, "step": 120000 }, { "epoch": 8.62, "learning_rate": 4.5689655172413794e-05, "loss": 0.7467, "step": 120500 }, { "epoch": 8.66, "learning_rate": 4.567176992416655e-05, "loss": 0.7282, "step": 121000 }, { "epoch": 8.69, "learning_rate": 4.5653884675919305e-05, "loss": 0.7432, "step": 121500 }, { "epoch": 8.73, "learning_rate": 4.5635999427672063e-05, "loss": 0.7357, "step": 122000 }, { "epoch": 8.76, "learning_rate": 4.5618114179424815e-05, "loss": 0.7583, "step": 122500 }, { "epoch": 8.8, "learning_rate": 4.560022893117757e-05, "loss": 0.7458, "step": 123000 }, { "epoch": 8.84, "learning_rate": 4.5582343682930326e-05, "loss": 0.7393, "step": 123500 }, { "epoch": 8.87, "learning_rate": 4.556445843468308e-05, "loss": 0.7569, "step": 124000 }, { "epoch": 8.91, "learning_rate": 4.554657318643583e-05, "loss": 0.7656, "step": 124500 }, { "epoch": 8.94, "learning_rate": 4.552868793818859e-05, "loss": 0.7781, "step": 125000 }, { "epoch": 8.98, "learning_rate": 4.551080268994134e-05, "loss": 0.7802, "step": 125500 }, { "epoch": 9.01, "learning_rate": 4.549291744169409e-05, "loss": 0.6816, "step": 126000 }, { "epoch": 9.05, "learning_rate": 4.547503219344685e-05, "loss": 0.5608, "step": 126500 }, { "epoch": 9.09, "learning_rate": 4.54571469451996e-05, "loss": 0.5721, "step": 127000 }, { "epoch": 9.12, "learning_rate": 4.5439261696952353e-05, "loss": 0.5815, "step": 127500 }, { "epoch": 9.16, "learning_rate": 4.542137644870511e-05, "loss": 0.5897, "step": 128000 }, { "epoch": 9.19, "learning_rate": 4.5403491200457864e-05, "loss": 0.5972, "step": 128500 }, { "epoch": 9.23, "learning_rate": 4.538560595221062e-05, "loss": 0.5935, "step": 129000 }, { "epoch": 9.26, "learning_rate": 4.5367720703963374e-05, "loss": 0.6069, "step": 129500 }, { "epoch": 9.3, "learning_rate": 4.5349835455716126e-05, "loss": 0.6047, "step": 130000 }, { "epoch": 9.34, "learning_rate": 4.5331950207468885e-05, "loss": 0.6132, "step": 130500 }, { "epoch": 9.37, "learning_rate": 4.5314064959221636e-05, "loss": 0.6186, "step": 131000 }, { "epoch": 9.41, "learning_rate": 4.529617971097439e-05, "loss": 0.6303, "step": 131500 }, { "epoch": 9.44, "learning_rate": 4.527829446272715e-05, "loss": 0.6312, "step": 132000 }, { "epoch": 9.48, "learning_rate": 4.52604092144799e-05, "loss": 0.6482, "step": 132500 }, { "epoch": 9.51, "learning_rate": 4.524252396623265e-05, "loss": 0.6371, "step": 133000 }, { "epoch": 9.55, "learning_rate": 4.522463871798541e-05, "loss": 0.6346, "step": 133500 }, { "epoch": 9.59, "learning_rate": 4.520675346973816e-05, "loss": 0.6522, "step": 134000 }, { "epoch": 9.62, "learning_rate": 4.518886822149091e-05, "loss": 0.6564, "step": 134500 }, { "epoch": 9.66, "learning_rate": 4.517098297324367e-05, "loss": 0.668, "step": 135000 }, { "epoch": 9.69, "learning_rate": 4.515309772499642e-05, "loss": 0.6653, "step": 135500 }, { "epoch": 9.73, "learning_rate": 4.5135212476749175e-05, "loss": 0.6668, "step": 136000 }, { "epoch": 9.77, "learning_rate": 4.511732722850193e-05, "loss": 0.6692, "step": 136500 }, { "epoch": 9.8, "learning_rate": 4.5099441980254685e-05, "loss": 0.678, "step": 137000 }, { "epoch": 9.84, "learning_rate": 4.5081556732007444e-05, "loss": 0.647, "step": 137500 }, { "epoch": 9.87, "learning_rate": 4.5063671483760195e-05, "loss": 0.6806, "step": 138000 }, { "epoch": 9.91, "learning_rate": 4.504578623551295e-05, "loss": 0.6787, "step": 138500 }, { "epoch": 9.94, "learning_rate": 4.5027900987265706e-05, "loss": 0.6978, "step": 139000 }, { "epoch": 9.98, "learning_rate": 4.501001573901846e-05, "loss": 0.6912, "step": 139500 }, { "epoch": 10.02, "learning_rate": 4.499213049077121e-05, "loss": 0.6072, "step": 140000 }, { "epoch": 10.05, "learning_rate": 4.497424524252397e-05, "loss": 0.5022, "step": 140500 }, { "epoch": 10.09, "learning_rate": 4.495635999427672e-05, "loss": 0.5045, "step": 141000 }, { "epoch": 10.12, "learning_rate": 4.493847474602948e-05, "loss": 0.5298, "step": 141500 }, { "epoch": 10.16, "learning_rate": 4.492058949778224e-05, "loss": 0.5233, "step": 142000 }, { "epoch": 10.19, "learning_rate": 4.490270424953499e-05, "loss": 0.5314, "step": 142500 }, { "epoch": 10.23, "learning_rate": 4.488481900128774e-05, "loss": 0.5469, "step": 143000 }, { "epoch": 10.27, "learning_rate": 4.48669337530405e-05, "loss": 0.5494, "step": 143500 }, { "epoch": 10.3, "learning_rate": 4.484904850479325e-05, "loss": 0.5448, "step": 144000 }, { "epoch": 10.34, "learning_rate": 4.4831163256546e-05, "loss": 0.5507, "step": 144500 }, { "epoch": 10.37, "learning_rate": 4.481327800829876e-05, "loss": 0.5609, "step": 145000 }, { "epoch": 10.41, "learning_rate": 4.479539276005151e-05, "loss": 0.5691, "step": 145500 }, { "epoch": 10.44, "learning_rate": 4.4777507511804265e-05, "loss": 0.5656, "step": 146000 }, { "epoch": 10.48, "learning_rate": 4.475962226355702e-05, "loss": 0.5652, "step": 146500 }, { "epoch": 10.52, "learning_rate": 4.4741737015309775e-05, "loss": 0.5794, "step": 147000 }, { "epoch": 10.55, "learning_rate": 4.472385176706253e-05, "loss": 0.5787, "step": 147500 }, { "epoch": 10.59, "learning_rate": 4.4705966518815285e-05, "loss": 0.5883, "step": 148000 }, { "epoch": 10.62, "learning_rate": 4.468808127056804e-05, "loss": 0.5821, "step": 148500 }, { "epoch": 10.66, "learning_rate": 4.4670196022320796e-05, "loss": 0.5918, "step": 149000 }, { "epoch": 10.7, "learning_rate": 4.465231077407355e-05, "loss": 0.6038, "step": 149500 }, { "epoch": 10.73, "learning_rate": 4.46344255258263e-05, "loss": 0.6114, "step": 150000 }, { "epoch": 10.77, "learning_rate": 4.461654027757906e-05, "loss": 0.6023, "step": 150500 }, { "epoch": 10.8, "learning_rate": 4.459865502933181e-05, "loss": 0.6016, "step": 151000 }, { "epoch": 10.84, "learning_rate": 4.458076978108456e-05, "loss": 0.6057, "step": 151500 }, { "epoch": 10.87, "learning_rate": 4.456288453283732e-05, "loss": 0.6218, "step": 152000 }, { "epoch": 10.91, "learning_rate": 4.454499928459007e-05, "loss": 0.6226, "step": 152500 }, { "epoch": 10.95, "learning_rate": 4.4527114036342824e-05, "loss": 0.6037, "step": 153000 }, { "epoch": 10.98, "learning_rate": 4.450922878809558e-05, "loss": 0.6197, "step": 153500 }, { "epoch": 11.02, "learning_rate": 4.4491343539848334e-05, "loss": 0.539, "step": 154000 }, { "epoch": 11.05, "learning_rate": 4.4473458291601086e-05, "loss": 0.4517, "step": 154500 }, { "epoch": 11.09, "learning_rate": 4.4455573043353844e-05, "loss": 0.4682, "step": 155000 }, { "epoch": 11.12, "learning_rate": 4.4437687795106596e-05, "loss": 0.4675, "step": 155500 }, { "epoch": 11.16, "learning_rate": 4.4419802546859355e-05, "loss": 0.4814, "step": 156000 }, { "epoch": 11.2, "learning_rate": 4.4401917298612107e-05, "loss": 0.4774, "step": 156500 }, { "epoch": 11.23, "learning_rate": 4.438403205036486e-05, "loss": 0.4855, "step": 157000 }, { "epoch": 11.27, "learning_rate": 4.436614680211762e-05, "loss": 0.494, "step": 157500 }, { "epoch": 11.3, "learning_rate": 4.434826155387037e-05, "loss": 0.5003, "step": 158000 }, { "epoch": 11.34, "learning_rate": 4.433037630562312e-05, "loss": 0.5193, "step": 158500 }, { "epoch": 11.38, "learning_rate": 4.431249105737588e-05, "loss": 0.5116, "step": 159000 }, { "epoch": 11.41, "learning_rate": 4.429460580912863e-05, "loss": 0.5133, "step": 159500 }, { "epoch": 11.45, "learning_rate": 4.427672056088138e-05, "loss": 0.531, "step": 160000 }, { "epoch": 11.48, "learning_rate": 4.425883531263414e-05, "loss": 0.5267, "step": 160500 }, { "epoch": 11.52, "learning_rate": 4.424095006438689e-05, "loss": 0.5286, "step": 161000 }, { "epoch": 11.55, "learning_rate": 4.4223064816139645e-05, "loss": 0.5306, "step": 161500 }, { "epoch": 11.59, "learning_rate": 4.4205179567892403e-05, "loss": 0.5261, "step": 162000 }, { "epoch": 11.63, "learning_rate": 4.4187294319645155e-05, "loss": 0.5332, "step": 162500 }, { "epoch": 11.66, "learning_rate": 4.4169409071397914e-05, "loss": 0.5393, "step": 163000 }, { "epoch": 11.7, "learning_rate": 4.415152382315067e-05, "loss": 0.5249, "step": 163500 }, { "epoch": 11.73, "learning_rate": 4.4133638574903424e-05, "loss": 0.5566, "step": 164000 }, { "epoch": 11.77, "learning_rate": 4.4115753326656176e-05, "loss": 0.5509, "step": 164500 }, { "epoch": 11.8, "learning_rate": 4.4097868078408934e-05, "loss": 0.5398, "step": 165000 }, { "epoch": 11.84, "learning_rate": 4.4079982830161686e-05, "loss": 0.5641, "step": 165500 }, { "epoch": 11.88, "learning_rate": 4.406209758191444e-05, "loss": 0.5623, "step": 166000 }, { "epoch": 11.91, "learning_rate": 4.40442123336672e-05, "loss": 0.5612, "step": 166500 }, { "epoch": 11.95, "learning_rate": 4.402632708541995e-05, "loss": 0.576, "step": 167000 }, { "epoch": 11.98, "learning_rate": 4.400844183717271e-05, "loss": 0.5653, "step": 167500 }, { "epoch": 12.02, "learning_rate": 4.399055658892546e-05, "loss": 0.4821, "step": 168000 }, { "epoch": 12.05, "learning_rate": 4.397267134067821e-05, "loss": 0.416, "step": 168500 }, { "epoch": 12.09, "learning_rate": 4.395478609243097e-05, "loss": 0.4239, "step": 169000 }, { "epoch": 12.13, "learning_rate": 4.393690084418372e-05, "loss": 0.4336, "step": 169500 }, { "epoch": 12.16, "learning_rate": 4.391901559593647e-05, "loss": 0.4388, "step": 170000 }, { "epoch": 12.2, "learning_rate": 4.390113034768923e-05, "loss": 0.453, "step": 170500 }, { "epoch": 12.23, "learning_rate": 4.388324509944198e-05, "loss": 0.4524, "step": 171000 }, { "epoch": 12.27, "learning_rate": 4.3865359851194735e-05, "loss": 0.4563, "step": 171500 }, { "epoch": 12.31, "learning_rate": 4.3847474602947493e-05, "loss": 0.4651, "step": 172000 }, { "epoch": 12.34, "learning_rate": 4.3829589354700245e-05, "loss": 0.4644, "step": 172500 }, { "epoch": 12.38, "learning_rate": 4.3811704106453e-05, "loss": 0.4749, "step": 173000 }, { "epoch": 12.41, "learning_rate": 4.3793818858205756e-05, "loss": 0.4855, "step": 173500 }, { "epoch": 12.45, "learning_rate": 4.377593360995851e-05, "loss": 0.4845, "step": 174000 }, { "epoch": 12.48, "learning_rate": 4.375804836171126e-05, "loss": 0.4799, "step": 174500 }, { "epoch": 12.52, "learning_rate": 4.374016311346402e-05, "loss": 0.4844, "step": 175000 }, { "epoch": 12.56, "learning_rate": 4.372227786521677e-05, "loss": 0.4894, "step": 175500 }, { "epoch": 12.59, "learning_rate": 4.370439261696953e-05, "loss": 0.4855, "step": 176000 }, { "epoch": 12.63, "learning_rate": 4.368650736872228e-05, "loss": 0.4963, "step": 176500 }, { "epoch": 12.66, "learning_rate": 4.366862212047503e-05, "loss": 0.5021, "step": 177000 }, { "epoch": 12.7, "learning_rate": 4.365073687222779e-05, "loss": 0.5055, "step": 177500 }, { "epoch": 12.73, "learning_rate": 4.363285162398054e-05, "loss": 0.5034, "step": 178000 }, { "epoch": 12.77, "learning_rate": 4.3614966375733294e-05, "loss": 0.508, "step": 178500 }, { "epoch": 12.81, "learning_rate": 4.359708112748605e-05, "loss": 0.5115, "step": 179000 }, { "epoch": 12.84, "learning_rate": 4.3579195879238804e-05, "loss": 0.5159, "step": 179500 }, { "epoch": 12.88, "learning_rate": 4.3561310630991556e-05, "loss": 0.5193, "step": 180000 }, { "epoch": 12.91, "learning_rate": 4.3543425382744315e-05, "loss": 0.5171, "step": 180500 }, { "epoch": 12.95, "learning_rate": 4.3525540134497066e-05, "loss": 0.5272, "step": 181000 }, { "epoch": 12.98, "learning_rate": 4.350765488624982e-05, "loss": 0.5217, "step": 181500 }, { "epoch": 13.02, "learning_rate": 4.348976963800258e-05, "loss": 0.4477, "step": 182000 }, { "epoch": 13.06, "learning_rate": 4.347188438975533e-05, "loss": 0.3926, "step": 182500 }, { "epoch": 13.09, "learning_rate": 4.345399914150809e-05, "loss": 0.4036, "step": 183000 }, { "epoch": 13.13, "learning_rate": 4.343611389326084e-05, "loss": 0.4139, "step": 183500 }, { "epoch": 13.16, "learning_rate": 4.341822864501359e-05, "loss": 0.4115, "step": 184000 }, { "epoch": 13.2, "learning_rate": 4.340034339676635e-05, "loss": 0.4216, "step": 184500 }, { "epoch": 13.24, "learning_rate": 4.338245814851911e-05, "loss": 0.4288, "step": 185000 }, { "epoch": 13.27, "learning_rate": 4.336457290027186e-05, "loss": 0.4333, "step": 185500 }, { "epoch": 13.31, "learning_rate": 4.334668765202461e-05, "loss": 0.4295, "step": 186000 }, { "epoch": 13.34, "learning_rate": 4.332880240377737e-05, "loss": 0.4312, "step": 186500 }, { "epoch": 13.38, "learning_rate": 4.331091715553012e-05, "loss": 0.4358, "step": 187000 }, { "epoch": 13.41, "learning_rate": 4.329303190728288e-05, "loss": 0.4436, "step": 187500 }, { "epoch": 13.45, "learning_rate": 4.327514665903563e-05, "loss": 0.4472, "step": 188000 }, { "epoch": 13.49, "learning_rate": 4.3257261410788384e-05, "loss": 0.4484, "step": 188500 }, { "epoch": 13.52, "learning_rate": 4.323937616254114e-05, "loss": 0.455, "step": 189000 }, { "epoch": 13.56, "learning_rate": 4.3221490914293894e-05, "loss": 0.4617, "step": 189500 }, { "epoch": 13.59, "learning_rate": 4.3203605666046646e-05, "loss": 0.4521, "step": 190000 }, { "epoch": 13.63, "learning_rate": 4.3185720417799405e-05, "loss": 0.4618, "step": 190500 }, { "epoch": 13.66, "learning_rate": 4.3167835169552156e-05, "loss": 0.4668, "step": 191000 }, { "epoch": 13.7, "learning_rate": 4.314994992130491e-05, "loss": 0.4688, "step": 191500 }, { "epoch": 13.74, "learning_rate": 4.313206467305767e-05, "loss": 0.4778, "step": 192000 }, { "epoch": 13.77, "learning_rate": 4.311417942481042e-05, "loss": 0.4733, "step": 192500 }, { "epoch": 13.81, "learning_rate": 4.309629417656317e-05, "loss": 0.4715, "step": 193000 }, { "epoch": 13.84, "learning_rate": 4.307840892831593e-05, "loss": 0.477, "step": 193500 }, { "epoch": 13.88, "learning_rate": 4.306052368006868e-05, "loss": 0.4782, "step": 194000 }, { "epoch": 13.91, "learning_rate": 4.304263843182143e-05, "loss": 0.4848, "step": 194500 }, { "epoch": 13.95, "learning_rate": 4.302475318357419e-05, "loss": 0.4871, "step": 195000 }, { "epoch": 13.99, "learning_rate": 4.300686793532694e-05, "loss": 0.4858, "step": 195500 }, { "epoch": 14.02, "learning_rate": 4.29889826870797e-05, "loss": 0.4106, "step": 196000 }, { "epoch": 14.06, "learning_rate": 4.297109743883245e-05, "loss": 0.376, "step": 196500 }, { "epoch": 14.09, "learning_rate": 4.2953212190585205e-05, "loss": 0.3836, "step": 197000 }, { "epoch": 14.13, "learning_rate": 4.2935326942337964e-05, "loss": 0.3844, "step": 197500 }, { "epoch": 14.17, "learning_rate": 4.2917441694090715e-05, "loss": 0.3944, "step": 198000 }, { "epoch": 14.2, "learning_rate": 4.289955644584347e-05, "loss": 0.3978, "step": 198500 }, { "epoch": 14.24, "learning_rate": 4.2881671197596226e-05, "loss": 0.4, "step": 199000 }, { "epoch": 14.27, "learning_rate": 4.286378594934898e-05, "loss": 0.4034, "step": 199500 }, { "epoch": 14.31, "learning_rate": 4.284590070110173e-05, "loss": 0.4116, "step": 200000 }, { "epoch": 14.34, "learning_rate": 4.282801545285449e-05, "loss": 0.4154, "step": 200500 }, { "epoch": 14.38, "learning_rate": 4.281013020460724e-05, "loss": 0.4204, "step": 201000 }, { "epoch": 14.42, "learning_rate": 4.279224495635999e-05, "loss": 0.4219, "step": 201500 }, { "epoch": 14.45, "learning_rate": 4.277435970811275e-05, "loss": 0.4204, "step": 202000 }, { "epoch": 14.49, "learning_rate": 4.27564744598655e-05, "loss": 0.4196, "step": 202500 }, { "epoch": 14.52, "learning_rate": 4.273858921161826e-05, "loss": 0.4194, "step": 203000 }, { "epoch": 14.56, "learning_rate": 4.272070396337101e-05, "loss": 0.4289, "step": 203500 }, { "epoch": 14.59, "learning_rate": 4.2702818715123764e-05, "loss": 0.4321, "step": 204000 }, { "epoch": 14.63, "learning_rate": 4.268493346687652e-05, "loss": 0.4371, "step": 204500 }, { "epoch": 14.67, "learning_rate": 4.2667048218629274e-05, "loss": 0.4447, "step": 205000 }, { "epoch": 14.7, "learning_rate": 4.2649162970382026e-05, "loss": 0.4428, "step": 205500 }, { "epoch": 14.74, "learning_rate": 4.2631277722134785e-05, "loss": 0.4365, "step": 206000 }, { "epoch": 14.77, "learning_rate": 4.2613392473887543e-05, "loss": 0.4416, "step": 206500 }, { "epoch": 14.81, "learning_rate": 4.2595507225640295e-05, "loss": 0.4451, "step": 207000 }, { "epoch": 14.84, "learning_rate": 4.2577621977393054e-05, "loss": 0.4488, "step": 207500 }, { "epoch": 14.88, "learning_rate": 4.2559736729145806e-05, "loss": 0.4489, "step": 208000 }, { "epoch": 14.92, "learning_rate": 4.254185148089856e-05, "loss": 0.4529, "step": 208500 }, { "epoch": 14.95, "learning_rate": 4.2523966232651316e-05, "loss": 0.45, "step": 209000 }, { "epoch": 14.99, "learning_rate": 4.250608098440407e-05, "loss": 0.4585, "step": 209500 }, { "epoch": 15.02, "learning_rate": 4.248819573615682e-05, "loss": 0.387, "step": 210000 }, { "epoch": 15.06, "learning_rate": 4.247031048790958e-05, "loss": 0.3537, "step": 210500 }, { "epoch": 15.1, "learning_rate": 4.245242523966233e-05, "loss": 0.362, "step": 211000 }, { "epoch": 15.13, "learning_rate": 4.243453999141508e-05, "loss": 0.3754, "step": 211500 }, { "epoch": 15.17, "learning_rate": 4.241665474316784e-05, "loss": 0.3778, "step": 212000 }, { "epoch": 15.2, "learning_rate": 4.239876949492059e-05, "loss": 0.374, "step": 212500 }, { "epoch": 15.24, "learning_rate": 4.2380884246673344e-05, "loss": 0.3781, "step": 213000 }, { "epoch": 15.27, "learning_rate": 4.23629989984261e-05, "loss": 0.391, "step": 213500 }, { "epoch": 15.31, "learning_rate": 4.2345113750178854e-05, "loss": 0.3932, "step": 214000 }, { "epoch": 15.35, "learning_rate": 4.232722850193161e-05, "loss": 0.3911, "step": 214500 }, { "epoch": 15.38, "learning_rate": 4.2309343253684365e-05, "loss": 0.3946, "step": 215000 }, { "epoch": 15.42, "learning_rate": 4.2291458005437116e-05, "loss": 0.4036, "step": 215500 }, { "epoch": 15.45, "learning_rate": 4.2273572757189875e-05, "loss": 0.4014, "step": 216000 }, { "epoch": 15.49, "learning_rate": 4.225568750894263e-05, "loss": 0.3999, "step": 216500 }, { "epoch": 15.52, "learning_rate": 4.223780226069538e-05, "loss": 0.4079, "step": 217000 }, { "epoch": 15.56, "learning_rate": 4.221991701244814e-05, "loss": 0.4132, "step": 217500 }, { "epoch": 15.6, "learning_rate": 4.220203176420089e-05, "loss": 0.4144, "step": 218000 }, { "epoch": 15.63, "learning_rate": 4.218414651595364e-05, "loss": 0.4104, "step": 218500 }, { "epoch": 15.67, "learning_rate": 4.21662612677064e-05, "loss": 0.4113, "step": 219000 }, { "epoch": 15.7, "learning_rate": 4.214837601945915e-05, "loss": 0.4185, "step": 219500 }, { "epoch": 15.74, "learning_rate": 4.21304907712119e-05, "loss": 0.4216, "step": 220000 }, { "epoch": 15.77, "learning_rate": 4.211260552296466e-05, "loss": 0.419, "step": 220500 }, { "epoch": 15.81, "learning_rate": 4.209472027471741e-05, "loss": 0.4235, "step": 221000 }, { "epoch": 15.85, "learning_rate": 4.2076835026470165e-05, "loss": 0.4206, "step": 221500 }, { "epoch": 15.88, "learning_rate": 4.2058949778222924e-05, "loss": 0.4276, "step": 222000 }, { "epoch": 15.92, "learning_rate": 4.2041064529975675e-05, "loss": 0.4292, "step": 222500 }, { "epoch": 15.95, "learning_rate": 4.2023179281728434e-05, "loss": 0.4214, "step": 223000 }, { "epoch": 15.99, "learning_rate": 4.2005294033481186e-05, "loss": 0.4331, "step": 223500 }, { "epoch": 16.03, "learning_rate": 4.198740878523394e-05, "loss": 0.3667, "step": 224000 }, { "epoch": 16.06, "learning_rate": 4.1969523536986696e-05, "loss": 0.3454, "step": 224500 }, { "epoch": 16.1, "learning_rate": 4.195163828873945e-05, "loss": 0.3508, "step": 225000 }, { "epoch": 16.13, "learning_rate": 4.19337530404922e-05, "loss": 0.3603, "step": 225500 }, { "epoch": 16.17, "learning_rate": 4.191586779224496e-05, "loss": 0.3652, "step": 226000 }, { "epoch": 16.2, "learning_rate": 4.189798254399771e-05, "loss": 0.3647, "step": 226500 }, { "epoch": 16.24, "learning_rate": 4.188009729575046e-05, "loss": 0.3712, "step": 227000 }, { "epoch": 16.28, "learning_rate": 4.186221204750322e-05, "loss": 0.3706, "step": 227500 }, { "epoch": 16.31, "learning_rate": 4.184432679925598e-05, "loss": 0.371, "step": 228000 }, { "epoch": 16.35, "learning_rate": 4.182644155100873e-05, "loss": 0.3764, "step": 228500 }, { "epoch": 16.38, "learning_rate": 4.180855630276149e-05, "loss": 0.3832, "step": 229000 }, { "epoch": 16.42, "learning_rate": 4.179067105451424e-05, "loss": 0.3803, "step": 229500 }, { "epoch": 16.45, "learning_rate": 4.177278580626699e-05, "loss": 0.3902, "step": 230000 }, { "epoch": 16.49, "learning_rate": 4.175490055801975e-05, "loss": 0.3894, "step": 230500 }, { "epoch": 16.53, "learning_rate": 4.17370153097725e-05, "loss": 0.3893, "step": 231000 }, { "epoch": 16.56, "learning_rate": 4.1719130061525255e-05, "loss": 0.3876, "step": 231500 }, { "epoch": 16.6, "learning_rate": 4.1701244813278014e-05, "loss": 0.4002, "step": 232000 }, { "epoch": 16.63, "learning_rate": 4.1683359565030765e-05, "loss": 0.397, "step": 232500 }, { "epoch": 16.67, "learning_rate": 4.166547431678352e-05, "loss": 0.3983, "step": 233000 }, { "epoch": 16.7, "learning_rate": 4.1647589068536276e-05, "loss": 0.3978, "step": 233500 }, { "epoch": 16.74, "learning_rate": 4.162970382028903e-05, "loss": 0.4041, "step": 234000 }, { "epoch": 16.78, "learning_rate": 4.1611818572041786e-05, "loss": 0.3996, "step": 234500 }, { "epoch": 16.81, "learning_rate": 4.159393332379454e-05, "loss": 0.4062, "step": 235000 }, { "epoch": 16.85, "learning_rate": 4.157604807554729e-05, "loss": 0.4031, "step": 235500 }, { "epoch": 16.88, "learning_rate": 4.155816282730005e-05, "loss": 0.4089, "step": 236000 }, { "epoch": 16.92, "learning_rate": 4.15402775790528e-05, "loss": 0.406, "step": 236500 }, { "epoch": 16.96, "learning_rate": 4.152239233080555e-05, "loss": 0.4121, "step": 237000 }, { "epoch": 16.99, "learning_rate": 4.150450708255831e-05, "loss": 0.4119, "step": 237500 }, { "epoch": 17.03, "learning_rate": 4.148662183431106e-05, "loss": 0.3483, "step": 238000 }, { "epoch": 17.06, "learning_rate": 4.1468736586063814e-05, "loss": 0.3395, "step": 238500 }, { "epoch": 17.1, "learning_rate": 4.145085133781657e-05, "loss": 0.3376, "step": 239000 }, { "epoch": 17.13, "learning_rate": 4.1432966089569324e-05, "loss": 0.3422, "step": 239500 }, { "epoch": 17.17, "learning_rate": 4.1415080841322076e-05, "loss": 0.3459, "step": 240000 }, { "epoch": 17.21, "learning_rate": 4.1397195593074835e-05, "loss": 0.3527, "step": 240500 }, { "epoch": 17.24, "learning_rate": 4.1379310344827587e-05, "loss": 0.356, "step": 241000 }, { "epoch": 17.28, "learning_rate": 4.1361425096580345e-05, "loss": 0.3553, "step": 241500 }, { "epoch": 17.31, "learning_rate": 4.13435398483331e-05, "loss": 0.359, "step": 242000 }, { "epoch": 17.35, "learning_rate": 4.132565460008585e-05, "loss": 0.3635, "step": 242500 }, { "epoch": 17.38, "learning_rate": 4.130776935183861e-05, "loss": 0.3632, "step": 243000 }, { "epoch": 17.42, "learning_rate": 4.128988410359136e-05, "loss": 0.3686, "step": 243500 }, { "epoch": 17.46, "learning_rate": 4.127199885534411e-05, "loss": 0.3738, "step": 244000 }, { "epoch": 17.49, "learning_rate": 4.125411360709687e-05, "loss": 0.372, "step": 244500 }, { "epoch": 17.53, "learning_rate": 4.123622835884962e-05, "loss": 0.3742, "step": 245000 }, { "epoch": 17.56, "learning_rate": 4.121834311060237e-05, "loss": 0.3711, "step": 245500 }, { "epoch": 17.6, "learning_rate": 4.120045786235513e-05, "loss": 0.3755, "step": 246000 }, { "epoch": 17.63, "learning_rate": 4.118257261410788e-05, "loss": 0.3787, "step": 246500 }, { "epoch": 17.67, "learning_rate": 4.1164687365860635e-05, "loss": 0.3793, "step": 247000 }, { "epoch": 17.71, "learning_rate": 4.1146802117613394e-05, "loss": 0.3878, "step": 247500 }, { "epoch": 17.74, "learning_rate": 4.1128916869366146e-05, "loss": 0.3891, "step": 248000 }, { "epoch": 17.78, "learning_rate": 4.11110316211189e-05, "loss": 0.3875, "step": 248500 }, { "epoch": 17.81, "learning_rate": 4.1093146372871656e-05, "loss": 0.3858, "step": 249000 }, { "epoch": 17.85, "learning_rate": 4.107526112462441e-05, "loss": 0.3906, "step": 249500 }, { "epoch": 17.89, "learning_rate": 4.1057375876377166e-05, "loss": 0.3943, "step": 250000 }, { "epoch": 17.92, "learning_rate": 4.1039490628129925e-05, "loss": 0.3953, "step": 250500 }, { "epoch": 17.96, "learning_rate": 4.102160537988268e-05, "loss": 0.4001, "step": 251000 }, { "epoch": 17.99, "learning_rate": 4.100372013163543e-05, "loss": 0.3985, "step": 251500 }, { "epoch": 18.03, "learning_rate": 4.098583488338819e-05, "loss": 0.3321, "step": 252000 }, { "epoch": 18.06, "learning_rate": 4.096794963514094e-05, "loss": 0.3254, "step": 252500 }, { "epoch": 18.1, "learning_rate": 4.095006438689369e-05, "loss": 0.3344, "step": 253000 }, { "epoch": 18.14, "learning_rate": 4.093217913864645e-05, "loss": 0.3348, "step": 253500 }, { "epoch": 18.17, "learning_rate": 4.09142938903992e-05, "loss": 0.3461, "step": 254000 }, { "epoch": 18.21, "learning_rate": 4.089640864215196e-05, "loss": 0.3354, "step": 254500 }, { "epoch": 18.24, "learning_rate": 4.087852339390471e-05, "loss": 0.3484, "step": 255000 }, { "epoch": 18.28, "learning_rate": 4.086063814565746e-05, "loss": 0.3487, "step": 255500 }, { "epoch": 18.31, "learning_rate": 4.084275289741022e-05, "loss": 0.3496, "step": 256000 }, { "epoch": 18.35, "learning_rate": 4.0824867649162973e-05, "loss": 0.3552, "step": 256500 }, { "epoch": 18.39, "learning_rate": 4.0806982400915725e-05, "loss": 0.3553, "step": 257000 }, { "epoch": 18.42, "learning_rate": 4.0789097152668484e-05, "loss": 0.3562, "step": 257500 }, { "epoch": 18.46, "learning_rate": 4.0771211904421236e-05, "loss": 0.3692, "step": 258000 }, { "epoch": 18.49, "learning_rate": 4.075332665617399e-05, "loss": 0.3625, "step": 258500 }, { "epoch": 18.53, "learning_rate": 4.0735441407926746e-05, "loss": 0.3625, "step": 259000 }, { "epoch": 18.56, "learning_rate": 4.07175561596795e-05, "loss": 0.3605, "step": 259500 }, { "epoch": 18.6, "learning_rate": 4.069967091143225e-05, "loss": 0.3614, "step": 260000 }, { "epoch": 18.64, "learning_rate": 4.068178566318501e-05, "loss": 0.3658, "step": 260500 }, { "epoch": 18.67, "learning_rate": 4.066390041493776e-05, "loss": 0.3677, "step": 261000 }, { "epoch": 18.71, "learning_rate": 4.064601516669052e-05, "loss": 0.3718, "step": 261500 }, { "epoch": 18.74, "learning_rate": 4.062812991844327e-05, "loss": 0.3752, "step": 262000 }, { "epoch": 18.78, "learning_rate": 4.061024467019602e-05, "loss": 0.3833, "step": 262500 }, { "epoch": 18.82, "learning_rate": 4.059235942194878e-05, "loss": 0.3696, "step": 263000 }, { "epoch": 18.85, "learning_rate": 4.057447417370153e-05, "loss": 0.3756, "step": 263500 }, { "epoch": 18.89, "learning_rate": 4.0556588925454284e-05, "loss": 0.3803, "step": 264000 }, { "epoch": 18.92, "learning_rate": 4.053870367720704e-05, "loss": 0.3803, "step": 264500 }, { "epoch": 18.96, "learning_rate": 4.0520818428959795e-05, "loss": 0.382, "step": 265000 }, { "epoch": 18.99, "learning_rate": 4.0502933180712546e-05, "loss": 0.3811, "step": 265500 }, { "epoch": 19.03, "learning_rate": 4.0485047932465305e-05, "loss": 0.3226, "step": 266000 }, { "epoch": 19.07, "learning_rate": 4.046716268421806e-05, "loss": 0.3189, "step": 266500 }, { "epoch": 19.1, "learning_rate": 4.044927743597081e-05, "loss": 0.3253, "step": 267000 }, { "epoch": 19.14, "learning_rate": 4.043139218772357e-05, "loss": 0.3247, "step": 267500 }, { "epoch": 19.17, "learning_rate": 4.041350693947632e-05, "loss": 0.3269, "step": 268000 }, { "epoch": 19.21, "learning_rate": 4.039562169122908e-05, "loss": 0.3356, "step": 268500 }, { "epoch": 19.24, "learning_rate": 4.037773644298183e-05, "loss": 0.3369, "step": 269000 }, { "epoch": 19.28, "learning_rate": 4.035985119473458e-05, "loss": 0.3366, "step": 269500 }, { "epoch": 19.32, "learning_rate": 4.034196594648734e-05, "loss": 0.339, "step": 270000 }, { "epoch": 19.35, "learning_rate": 4.032408069824009e-05, "loss": 0.3418, "step": 270500 }, { "epoch": 19.39, "learning_rate": 4.030619544999284e-05, "loss": 0.3432, "step": 271000 }, { "epoch": 19.42, "learning_rate": 4.02883102017456e-05, "loss": 0.3492, "step": 271500 }, { "epoch": 19.46, "learning_rate": 4.027042495349836e-05, "loss": 0.3447, "step": 272000 }, { "epoch": 19.49, "learning_rate": 4.025253970525111e-05, "loss": 0.3545, "step": 272500 }, { "epoch": 19.53, "learning_rate": 4.023465445700387e-05, "loss": 0.3492, "step": 273000 }, { "epoch": 19.57, "learning_rate": 4.021676920875662e-05, "loss": 0.3497, "step": 273500 }, { "epoch": 19.6, "learning_rate": 4.0198883960509374e-05, "loss": 0.3532, "step": 274000 }, { "epoch": 19.64, "learning_rate": 4.018099871226213e-05, "loss": 0.3543, "step": 274500 }, { "epoch": 19.67, "learning_rate": 4.0163113464014885e-05, "loss": 0.3614, "step": 275000 }, { "epoch": 19.71, "learning_rate": 4.0145228215767636e-05, "loss": 0.3606, "step": 275500 }, { "epoch": 19.75, "learning_rate": 4.0127342967520395e-05, "loss": 0.3653, "step": 276000 }, { "epoch": 19.78, "learning_rate": 4.010945771927315e-05, "loss": 0.3695, "step": 276500 }, { "epoch": 19.82, "learning_rate": 4.00915724710259e-05, "loss": 0.361, "step": 277000 }, { "epoch": 19.85, "learning_rate": 4.007368722277866e-05, "loss": 0.3617, "step": 277500 }, { "epoch": 19.89, "learning_rate": 4.005580197453141e-05, "loss": 0.365, "step": 278000 }, { "epoch": 19.92, "learning_rate": 4.003791672628416e-05, "loss": 0.3664, "step": 278500 }, { "epoch": 19.96, "learning_rate": 4.002003147803692e-05, "loss": 0.3689, "step": 279000 }, { "epoch": 20.0, "learning_rate": 4.000214622978967e-05, "loss": 0.3769, "step": 279500 }, { "epoch": 20.03, "learning_rate": 3.998426098154242e-05, "loss": 0.3129, "step": 280000 }, { "epoch": 20.07, "learning_rate": 3.996637573329518e-05, "loss": 0.3107, "step": 280500 }, { "epoch": 20.1, "learning_rate": 3.994849048504793e-05, "loss": 0.3165, "step": 281000 }, { "epoch": 20.14, "learning_rate": 3.993060523680069e-05, "loss": 0.319, "step": 281500 }, { "epoch": 20.17, "learning_rate": 3.9912719988553444e-05, "loss": 0.3204, "step": 282000 }, { "epoch": 20.21, "learning_rate": 3.9894834740306195e-05, "loss": 0.3273, "step": 282500 }, { "epoch": 20.25, "learning_rate": 3.9876949492058954e-05, "loss": 0.3274, "step": 283000 }, { "epoch": 20.28, "learning_rate": 3.9859064243811706e-05, "loss": 0.3288, "step": 283500 }, { "epoch": 20.32, "learning_rate": 3.984117899556446e-05, "loss": 0.3309, "step": 284000 }, { "epoch": 20.35, "learning_rate": 3.9823293747317216e-05, "loss": 0.3272, "step": 284500 }, { "epoch": 20.39, "learning_rate": 3.980540849906997e-05, "loss": 0.336, "step": 285000 }, { "epoch": 20.42, "learning_rate": 3.978752325082272e-05, "loss": 0.3403, "step": 285500 }, { "epoch": 20.46, "learning_rate": 3.976963800257548e-05, "loss": 0.3471, "step": 286000 }, { "epoch": 20.5, "learning_rate": 3.975175275432823e-05, "loss": 0.3415, "step": 286500 }, { "epoch": 20.53, "learning_rate": 3.973386750608098e-05, "loss": 0.3453, "step": 287000 }, { "epoch": 20.57, "learning_rate": 3.971598225783374e-05, "loss": 0.3416, "step": 287500 }, { "epoch": 20.6, "learning_rate": 3.969809700958649e-05, "loss": 0.3482, "step": 288000 }, { "epoch": 20.64, "learning_rate": 3.968021176133925e-05, "loss": 0.3491, "step": 288500 }, { "epoch": 20.68, "learning_rate": 3.9662326513092e-05, "loss": 0.3517, "step": 289000 }, { "epoch": 20.71, "learning_rate": 3.9644441264844754e-05, "loss": 0.3593, "step": 289500 }, { "epoch": 20.75, "learning_rate": 3.962655601659751e-05, "loss": 0.3568, "step": 290000 }, { "epoch": 20.78, "learning_rate": 3.9608670768350265e-05, "loss": 0.3547, "step": 290500 }, { "epoch": 20.82, "learning_rate": 3.959078552010302e-05, "loss": 0.3511, "step": 291000 }, { "epoch": 20.85, "learning_rate": 3.9572900271855775e-05, "loss": 0.3517, "step": 291500 }, { "epoch": 20.89, "learning_rate": 3.955501502360853e-05, "loss": 0.3564, "step": 292000 }, { "epoch": 20.93, "learning_rate": 3.953712977536128e-05, "loss": 0.3566, "step": 292500 }, { "epoch": 20.96, "learning_rate": 3.951924452711404e-05, "loss": 0.3621, "step": 293000 }, { "epoch": 21.0, "learning_rate": 3.9501359278866796e-05, "loss": 0.362, "step": 293500 }, { "epoch": 21.03, "learning_rate": 3.948347403061955e-05, "loss": 0.3025, "step": 294000 }, { "epoch": 21.07, "learning_rate": 3.9465588782372306e-05, "loss": 0.2993, "step": 294500 }, { "epoch": 21.1, "learning_rate": 3.944770353412506e-05, "loss": 0.3111, "step": 295000 }, { "epoch": 21.14, "learning_rate": 3.942981828587781e-05, "loss": 0.3089, "step": 295500 }, { "epoch": 21.18, "learning_rate": 3.941193303763057e-05, "loss": 0.3137, "step": 296000 }, { "epoch": 21.21, "learning_rate": 3.939404778938332e-05, "loss": 0.3169, "step": 296500 }, { "epoch": 21.25, "learning_rate": 3.937616254113607e-05, "loss": 0.3173, "step": 297000 }, { "epoch": 21.28, "learning_rate": 3.935827729288883e-05, "loss": 0.3182, "step": 297500 }, { "epoch": 21.32, "learning_rate": 3.934039204464158e-05, "loss": 0.3248, "step": 298000 }, { "epoch": 21.35, "learning_rate": 3.9322506796394334e-05, "loss": 0.3279, "step": 298500 }, { "epoch": 21.39, "learning_rate": 3.930462154814709e-05, "loss": 0.3251, "step": 299000 }, { "epoch": 21.43, "learning_rate": 3.9286736299899845e-05, "loss": 0.325, "step": 299500 }, { "epoch": 21.46, "learning_rate": 3.92688510516526e-05, "loss": 0.3341, "step": 300000 }, { "epoch": 21.5, "learning_rate": 3.9250965803405355e-05, "loss": 0.3362, "step": 300500 }, { "epoch": 21.53, "learning_rate": 3.923308055515811e-05, "loss": 0.3396, "step": 301000 }, { "epoch": 21.57, "learning_rate": 3.9215195306910865e-05, "loss": 0.3381, "step": 301500 }, { "epoch": 21.61, "learning_rate": 3.919731005866362e-05, "loss": 0.3352, "step": 302000 }, { "epoch": 21.64, "learning_rate": 3.917942481041637e-05, "loss": 0.3374, "step": 302500 }, { "epoch": 21.68, "learning_rate": 3.916153956216913e-05, "loss": 0.3379, "step": 303000 }, { "epoch": 21.71, "learning_rate": 3.914365431392188e-05, "loss": 0.3381, "step": 303500 }, { "epoch": 21.75, "learning_rate": 3.912576906567463e-05, "loss": 0.3405, "step": 304000 }, { "epoch": 21.78, "learning_rate": 3.910788381742739e-05, "loss": 0.3459, "step": 304500 }, { "epoch": 21.82, "learning_rate": 3.908999856918014e-05, "loss": 0.3475, "step": 305000 }, { "epoch": 21.86, "learning_rate": 3.907211332093289e-05, "loss": 0.3469, "step": 305500 }, { "epoch": 21.89, "learning_rate": 3.905422807268565e-05, "loss": 0.3519, "step": 306000 }, { "epoch": 21.93, "learning_rate": 3.9036342824438404e-05, "loss": 0.352, "step": 306500 }, { "epoch": 21.96, "learning_rate": 3.9018457576191155e-05, "loss": 0.3554, "step": 307000 }, { "epoch": 22.0, "learning_rate": 3.9000572327943914e-05, "loss": 0.3475, "step": 307500 }, { "epoch": 22.03, "learning_rate": 3.8982687079696666e-05, "loss": 0.2957, "step": 308000 }, { "epoch": 22.07, "learning_rate": 3.8964801831449424e-05, "loss": 0.2972, "step": 308500 }, { "epoch": 22.11, "learning_rate": 3.8946916583202176e-05, "loss": 0.3036, "step": 309000 }, { "epoch": 22.14, "learning_rate": 3.892903133495493e-05, "loss": 0.3058, "step": 309500 }, { "epoch": 22.18, "learning_rate": 3.8911146086707686e-05, "loss": 0.3062, "step": 310000 }, { "epoch": 22.21, "learning_rate": 3.889326083846044e-05, "loss": 0.3097, "step": 310500 }, { "epoch": 22.25, "learning_rate": 3.887537559021319e-05, "loss": 0.3151, "step": 311000 }, { "epoch": 22.29, "learning_rate": 3.885749034196595e-05, "loss": 0.3191, "step": 311500 }, { "epoch": 22.32, "learning_rate": 3.88396050937187e-05, "loss": 0.313, "step": 312000 }, { "epoch": 22.36, "learning_rate": 3.882171984547145e-05, "loss": 0.3222, "step": 312500 }, { "epoch": 22.39, "learning_rate": 3.880383459722421e-05, "loss": 0.3276, "step": 313000 }, { "epoch": 22.43, "learning_rate": 3.878594934897696e-05, "loss": 0.3218, "step": 313500 }, { "epoch": 22.46, "learning_rate": 3.8768064100729714e-05, "loss": 0.3268, "step": 314000 }, { "epoch": 22.5, "learning_rate": 3.875017885248247e-05, "loss": 0.3251, "step": 314500 }, { "epoch": 22.54, "learning_rate": 3.873229360423523e-05, "loss": 0.3252, "step": 315000 }, { "epoch": 22.57, "learning_rate": 3.871440835598798e-05, "loss": 0.3233, "step": 315500 }, { "epoch": 22.61, "learning_rate": 3.869652310774074e-05, "loss": 0.3311, "step": 316000 }, { "epoch": 22.64, "learning_rate": 3.8678637859493494e-05, "loss": 0.3344, "step": 316500 }, { "epoch": 22.68, "learning_rate": 3.8660752611246245e-05, "loss": 0.3332, "step": 317000 }, { "epoch": 22.71, "learning_rate": 3.8642867362999004e-05, "loss": 0.3341, "step": 317500 }, { "epoch": 22.75, "learning_rate": 3.8624982114751756e-05, "loss": 0.3355, "step": 318000 }, { "epoch": 22.79, "learning_rate": 3.860709686650451e-05, "loss": 0.3344, "step": 318500 }, { "epoch": 22.82, "learning_rate": 3.8589211618257266e-05, "loss": 0.3376, "step": 319000 }, { "epoch": 22.86, "learning_rate": 3.857132637001002e-05, "loss": 0.3361, "step": 319500 }, { "epoch": 22.89, "learning_rate": 3.8553441121762776e-05, "loss": 0.3411, "step": 320000 }, { "epoch": 22.93, "learning_rate": 3.853555587351553e-05, "loss": 0.3404, "step": 320500 }, { "epoch": 22.96, "learning_rate": 3.851767062526828e-05, "loss": 0.346, "step": 321000 }, { "epoch": 23.0, "learning_rate": 3.849978537702104e-05, "loss": 0.3427, "step": 321500 }, { "epoch": 23.04, "learning_rate": 3.848190012877379e-05, "loss": 0.2876, "step": 322000 }, { "epoch": 23.07, "learning_rate": 3.846401488052654e-05, "loss": 0.2948, "step": 322500 }, { "epoch": 23.11, "learning_rate": 3.84461296322793e-05, "loss": 0.3026, "step": 323000 }, { "epoch": 23.14, "learning_rate": 3.842824438403205e-05, "loss": 0.2998, "step": 323500 }, { "epoch": 23.18, "learning_rate": 3.8410359135784804e-05, "loss": 0.3039, "step": 324000 }, { "epoch": 23.22, "learning_rate": 3.839247388753756e-05, "loss": 0.3022, "step": 324500 }, { "epoch": 23.25, "learning_rate": 3.8374588639290315e-05, "loss": 0.3082, "step": 325000 }, { "epoch": 23.29, "learning_rate": 3.8356703391043067e-05, "loss": 0.3123, "step": 325500 }, { "epoch": 23.32, "learning_rate": 3.8338818142795825e-05, "loss": 0.3179, "step": 326000 }, { "epoch": 23.36, "learning_rate": 3.832093289454858e-05, "loss": 0.3118, "step": 326500 }, { "epoch": 23.39, "learning_rate": 3.8303047646301335e-05, "loss": 0.3164, "step": 327000 }, { "epoch": 23.43, "learning_rate": 3.828516239805409e-05, "loss": 0.3102, "step": 327500 }, { "epoch": 23.47, "learning_rate": 3.826727714980684e-05, "loss": 0.3231, "step": 328000 }, { "epoch": 23.5, "learning_rate": 3.82493919015596e-05, "loss": 0.3154, "step": 328500 }, { "epoch": 23.54, "learning_rate": 3.823150665331235e-05, "loss": 0.319, "step": 329000 }, { "epoch": 23.57, "learning_rate": 3.82136214050651e-05, "loss": 0.3192, "step": 329500 }, { "epoch": 23.61, "learning_rate": 3.819573615681786e-05, "loss": 0.3166, "step": 330000 }, { "epoch": 23.64, "learning_rate": 3.817785090857061e-05, "loss": 0.3197, "step": 330500 }, { "epoch": 23.68, "learning_rate": 3.815996566032336e-05, "loss": 0.3218, "step": 331000 }, { "epoch": 23.72, "learning_rate": 3.814208041207612e-05, "loss": 0.3324, "step": 331500 }, { "epoch": 23.75, "learning_rate": 3.8124195163828874e-05, "loss": 0.3284, "step": 332000 }, { "epoch": 23.79, "learning_rate": 3.8106309915581626e-05, "loss": 0.3291, "step": 332500 }, { "epoch": 23.82, "learning_rate": 3.8088424667334384e-05, "loss": 0.3295, "step": 333000 }, { "epoch": 23.86, "learning_rate": 3.8070539419087136e-05, "loss": 0.332, "step": 333500 }, { "epoch": 23.89, "learning_rate": 3.805265417083989e-05, "loss": 0.3307, "step": 334000 }, { "epoch": 23.93, "learning_rate": 3.8034768922592646e-05, "loss": 0.3313, "step": 334500 }, { "epoch": 23.97, "learning_rate": 3.80168836743454e-05, "loss": 0.3375, "step": 335000 }, { "epoch": 24.0, "learning_rate": 3.799899842609816e-05, "loss": 0.3292, "step": 335500 }, { "epoch": 24.04, "learning_rate": 3.798111317785091e-05, "loss": 0.2779, "step": 336000 }, { "epoch": 24.07, "learning_rate": 3.796322792960367e-05, "loss": 0.2897, "step": 336500 }, { "epoch": 24.11, "learning_rate": 3.794534268135642e-05, "loss": 0.2903, "step": 337000 }, { "epoch": 24.15, "learning_rate": 3.792745743310918e-05, "loss": 0.2981, "step": 337500 }, { "epoch": 24.18, "learning_rate": 3.790957218486193e-05, "loss": 0.2931, "step": 338000 }, { "epoch": 24.22, "learning_rate": 3.789168693661468e-05, "loss": 0.3005, "step": 338500 }, { "epoch": 24.25, "learning_rate": 3.787380168836744e-05, "loss": 0.3019, "step": 339000 }, { "epoch": 24.29, "learning_rate": 3.785591644012019e-05, "loss": 0.3112, "step": 339500 }, { "epoch": 24.32, "learning_rate": 3.783803119187295e-05, "loss": 0.3052, "step": 340000 }, { "epoch": 24.36, "learning_rate": 3.78201459436257e-05, "loss": 0.3052, "step": 340500 }, { "epoch": 24.4, "learning_rate": 3.7802260695378453e-05, "loss": 0.3081, "step": 341000 }, { "epoch": 24.43, "learning_rate": 3.778437544713121e-05, "loss": 0.3086, "step": 341500 }, { "epoch": 24.47, "learning_rate": 3.7766490198883964e-05, "loss": 0.3115, "step": 342000 }, { "epoch": 24.5, "learning_rate": 3.7748604950636716e-05, "loss": 0.3115, "step": 342500 }, { "epoch": 24.54, "learning_rate": 3.7730719702389474e-05, "loss": 0.3175, "step": 343000 }, { "epoch": 24.57, "learning_rate": 3.7712834454142226e-05, "loss": 0.3134, "step": 343500 }, { "epoch": 24.61, "learning_rate": 3.769494920589498e-05, "loss": 0.3181, "step": 344000 }, { "epoch": 24.65, "learning_rate": 3.7677063957647736e-05, "loss": 0.3161, "step": 344500 }, { "epoch": 24.68, "learning_rate": 3.765917870940049e-05, "loss": 0.3205, "step": 345000 }, { "epoch": 24.72, "learning_rate": 3.764129346115324e-05, "loss": 0.3182, "step": 345500 }, { "epoch": 24.75, "learning_rate": 3.7623408212906e-05, "loss": 0.3232, "step": 346000 }, { "epoch": 24.79, "learning_rate": 3.760552296465875e-05, "loss": 0.3235, "step": 346500 }, { "epoch": 24.82, "learning_rate": 3.758763771641151e-05, "loss": 0.3194, "step": 347000 }, { "epoch": 24.86, "learning_rate": 3.756975246816426e-05, "loss": 0.3172, "step": 347500 }, { "epoch": 24.9, "learning_rate": 3.755186721991701e-05, "loss": 0.3244, "step": 348000 }, { "epoch": 24.93, "learning_rate": 3.753398197166977e-05, "loss": 0.3262, "step": 348500 }, { "epoch": 24.97, "learning_rate": 3.751609672342252e-05, "loss": 0.3262, "step": 349000 }, { "epoch": 25.0, "learning_rate": 3.7498211475175275e-05, "loss": 0.3218, "step": 349500 }, { "epoch": 25.04, "learning_rate": 3.748032622692803e-05, "loss": 0.2829, "step": 350000 }, { "epoch": 25.08, "learning_rate": 3.7462440978680785e-05, "loss": 0.2893, "step": 350500 }, { "epoch": 25.11, "learning_rate": 3.744455573043354e-05, "loss": 0.2878, "step": 351000 }, { "epoch": 25.15, "learning_rate": 3.7426670482186295e-05, "loss": 0.2876, "step": 351500 }, { "epoch": 25.18, "learning_rate": 3.740878523393905e-05, "loss": 0.292, "step": 352000 }, { "epoch": 25.22, "learning_rate": 3.73908999856918e-05, "loss": 0.292, "step": 352500 }, { "epoch": 25.25, "learning_rate": 3.737301473744456e-05, "loss": 0.3027, "step": 353000 }, { "epoch": 25.29, "learning_rate": 3.735512948919731e-05, "loss": 0.299, "step": 353500 }, { "epoch": 25.33, "learning_rate": 3.733724424095006e-05, "loss": 0.3027, "step": 354000 }, { "epoch": 25.36, "learning_rate": 3.731935899270282e-05, "loss": 0.302, "step": 354500 }, { "epoch": 25.4, "learning_rate": 3.730147374445557e-05, "loss": 0.2994, "step": 355000 }, { "epoch": 25.43, "learning_rate": 3.728358849620833e-05, "loss": 0.3042, "step": 355500 }, { "epoch": 25.47, "learning_rate": 3.726570324796108e-05, "loss": 0.3068, "step": 356000 }, { "epoch": 25.5, "learning_rate": 3.7247817999713834e-05, "loss": 0.3123, "step": 356500 }, { "epoch": 25.54, "learning_rate": 3.722993275146659e-05, "loss": 0.3098, "step": 357000 }, { "epoch": 25.58, "learning_rate": 3.7212047503219344e-05, "loss": 0.3083, "step": 357500 }, { "epoch": 25.61, "learning_rate": 3.7194162254972096e-05, "loss": 0.3115, "step": 358000 }, { "epoch": 25.65, "learning_rate": 3.717627700672486e-05, "loss": 0.3047, "step": 358500 }, { "epoch": 25.68, "learning_rate": 3.715839175847761e-05, "loss": 0.3141, "step": 359000 }, { "epoch": 25.72, "learning_rate": 3.7140506510230365e-05, "loss": 0.3159, "step": 359500 }, { "epoch": 25.75, "learning_rate": 3.712262126198312e-05, "loss": 0.3112, "step": 360000 }, { "epoch": 25.79, "learning_rate": 3.7104736013735875e-05, "loss": 0.3096, "step": 360500 }, { "epoch": 25.83, "learning_rate": 3.708685076548863e-05, "loss": 0.3173, "step": 361000 }, { "epoch": 25.86, "learning_rate": 3.7068965517241385e-05, "loss": 0.3201, "step": 361500 }, { "epoch": 25.9, "learning_rate": 3.705108026899414e-05, "loss": 0.3185, "step": 362000 }, { "epoch": 25.93, "learning_rate": 3.703319502074689e-05, "loss": 0.3176, "step": 362500 }, { "epoch": 25.97, "learning_rate": 3.701530977249965e-05, "loss": 0.3206, "step": 363000 }, { "epoch": 26.01, "learning_rate": 3.69974245242524e-05, "loss": 0.3193, "step": 363500 }, { "epoch": 26.04, "learning_rate": 3.697953927600515e-05, "loss": 0.274, "step": 364000 }, { "epoch": 26.08, "learning_rate": 3.696165402775791e-05, "loss": 0.2791, "step": 364500 }, { "epoch": 26.11, "learning_rate": 3.694376877951066e-05, "loss": 0.2841, "step": 365000 }, { "epoch": 26.15, "learning_rate": 3.692588353126341e-05, "loss": 0.279, "step": 365500 }, { "epoch": 26.18, "learning_rate": 3.690799828301617e-05, "loss": 0.2893, "step": 366000 }, { "epoch": 26.22, "learning_rate": 3.6890113034768924e-05, "loss": 0.2937, "step": 366500 }, { "epoch": 26.26, "learning_rate": 3.687222778652168e-05, "loss": 0.2902, "step": 367000 }, { "epoch": 26.29, "learning_rate": 3.6854342538274434e-05, "loss": 0.2968, "step": 367500 }, { "epoch": 26.33, "learning_rate": 3.6836457290027186e-05, "loss": 0.295, "step": 368000 }, { "epoch": 26.36, "learning_rate": 3.6818572041779944e-05, "loss": 0.2923, "step": 368500 }, { "epoch": 26.4, "learning_rate": 3.6800686793532696e-05, "loss": 0.2989, "step": 369000 }, { "epoch": 26.43, "learning_rate": 3.678280154528545e-05, "loss": 0.3007, "step": 369500 }, { "epoch": 26.47, "learning_rate": 3.6764916297038207e-05, "loss": 0.3044, "step": 370000 }, { "epoch": 26.51, "learning_rate": 3.674703104879096e-05, "loss": 0.2978, "step": 370500 }, { "epoch": 26.54, "learning_rate": 3.672914580054371e-05, "loss": 0.2971, "step": 371000 }, { "epoch": 26.58, "learning_rate": 3.671126055229647e-05, "loss": 0.3058, "step": 371500 }, { "epoch": 26.61, "learning_rate": 3.669337530404922e-05, "loss": 0.3078, "step": 372000 }, { "epoch": 26.65, "learning_rate": 3.667549005580197e-05, "loss": 0.3016, "step": 372500 }, { "epoch": 26.68, "learning_rate": 3.665760480755473e-05, "loss": 0.308, "step": 373000 }, { "epoch": 26.72, "learning_rate": 3.663971955930748e-05, "loss": 0.3086, "step": 373500 }, { "epoch": 26.76, "learning_rate": 3.662183431106024e-05, "loss": 0.3112, "step": 374000 }, { "epoch": 26.79, "learning_rate": 3.660394906281299e-05, "loss": 0.3071, "step": 374500 }, { "epoch": 26.83, "learning_rate": 3.6586063814565745e-05, "loss": 0.313, "step": 375000 }, { "epoch": 26.86, "learning_rate": 3.65681785663185e-05, "loss": 0.3117, "step": 375500 }, { "epoch": 26.9, "learning_rate": 3.6550293318071255e-05, "loss": 0.3135, "step": 376000 }, { "epoch": 26.94, "learning_rate": 3.653240806982401e-05, "loss": 0.3138, "step": 376500 }, { "epoch": 26.97, "learning_rate": 3.6514522821576766e-05, "loss": 0.3138, "step": 377000 }, { "epoch": 27.01, "learning_rate": 3.649663757332952e-05, "loss": 0.3043, "step": 377500 }, { "epoch": 27.04, "learning_rate": 3.647875232508227e-05, "loss": 0.2736, "step": 378000 }, { "epoch": 27.08, "learning_rate": 3.646086707683503e-05, "loss": 0.2799, "step": 378500 }, { "epoch": 27.11, "learning_rate": 3.644298182858778e-05, "loss": 0.278, "step": 379000 }, { "epoch": 27.15, "learning_rate": 3.642509658034053e-05, "loss": 0.2836, "step": 379500 }, { "epoch": 27.19, "learning_rate": 3.64072113320933e-05, "loss": 0.282, "step": 380000 }, { "epoch": 27.22, "learning_rate": 3.638932608384605e-05, "loss": 0.2837, "step": 380500 }, { "epoch": 27.26, "learning_rate": 3.63714408355988e-05, "loss": 0.2837, "step": 381000 }, { "epoch": 27.29, "learning_rate": 3.635355558735156e-05, "loss": 0.2897, "step": 381500 }, { "epoch": 27.33, "learning_rate": 3.633567033910431e-05, "loss": 0.2914, "step": 382000 }, { "epoch": 27.36, "learning_rate": 3.631778509085706e-05, "loss": 0.2928, "step": 382500 }, { "epoch": 27.4, "learning_rate": 3.629989984260982e-05, "loss": 0.2941, "step": 383000 }, { "epoch": 27.44, "learning_rate": 3.628201459436257e-05, "loss": 0.293, "step": 383500 }, { "epoch": 27.47, "learning_rate": 3.6264129346115325e-05, "loss": 0.2942, "step": 384000 }, { "epoch": 27.51, "learning_rate": 3.624624409786808e-05, "loss": 0.2941, "step": 384500 }, { "epoch": 27.54, "learning_rate": 3.6228358849620835e-05, "loss": 0.2965, "step": 385000 }, { "epoch": 27.58, "learning_rate": 3.6210473601373593e-05, "loss": 0.3013, "step": 385500 }, { "epoch": 27.61, "learning_rate": 3.6192588353126345e-05, "loss": 0.2992, "step": 386000 }, { "epoch": 27.65, "learning_rate": 3.61747031048791e-05, "loss": 0.2991, "step": 386500 }, { "epoch": 27.69, "learning_rate": 3.6156817856631856e-05, "loss": 0.2993, "step": 387000 }, { "epoch": 27.72, "learning_rate": 3.613893260838461e-05, "loss": 0.3044, "step": 387500 }, { "epoch": 27.76, "learning_rate": 3.612104736013736e-05, "loss": 0.3013, "step": 388000 }, { "epoch": 27.79, "learning_rate": 3.610316211189012e-05, "loss": 0.3072, "step": 388500 }, { "epoch": 27.83, "learning_rate": 3.608527686364287e-05, "loss": 0.3065, "step": 389000 }, { "epoch": 27.87, "learning_rate": 3.606739161539562e-05, "loss": 0.3077, "step": 389500 }, { "epoch": 27.9, "learning_rate": 3.604950636714838e-05, "loss": 0.3092, "step": 390000 }, { "epoch": 27.94, "learning_rate": 3.603162111890113e-05, "loss": 0.3065, "step": 390500 }, { "epoch": 27.97, "learning_rate": 3.6013735870653884e-05, "loss": 0.3074, "step": 391000 }, { "epoch": 28.01, "learning_rate": 3.599585062240664e-05, "loss": 0.2985, "step": 391500 }, { "epoch": 28.04, "learning_rate": 3.5977965374159394e-05, "loss": 0.2652, "step": 392000 }, { "epoch": 28.08, "learning_rate": 3.5960080125912146e-05, "loss": 0.2739, "step": 392500 }, { "epoch": 28.12, "learning_rate": 3.5942194877664904e-05, "loss": 0.2782, "step": 393000 }, { "epoch": 28.15, "learning_rate": 3.5924309629417656e-05, "loss": 0.2794, "step": 393500 }, { "epoch": 28.19, "learning_rate": 3.5906424381170415e-05, "loss": 0.2835, "step": 394000 }, { "epoch": 28.22, "learning_rate": 3.5888539132923166e-05, "loss": 0.2797, "step": 394500 }, { "epoch": 28.26, "learning_rate": 3.587065388467592e-05, "loss": 0.2821, "step": 395000 }, { "epoch": 28.29, "learning_rate": 3.585276863642868e-05, "loss": 0.2846, "step": 395500 }, { "epoch": 28.33, "learning_rate": 3.583488338818143e-05, "loss": 0.2871, "step": 396000 }, { "epoch": 28.37, "learning_rate": 3.581699813993418e-05, "loss": 0.2847, "step": 396500 }, { "epoch": 28.4, "learning_rate": 3.579911289168694e-05, "loss": 0.2892, "step": 397000 }, { "epoch": 28.44, "learning_rate": 3.578122764343969e-05, "loss": 0.2895, "step": 397500 }, { "epoch": 28.47, "learning_rate": 3.576334239519244e-05, "loss": 0.2917, "step": 398000 }, { "epoch": 28.51, "learning_rate": 3.57454571469452e-05, "loss": 0.2965, "step": 398500 }, { "epoch": 28.54, "learning_rate": 3.572757189869795e-05, "loss": 0.2958, "step": 399000 }, { "epoch": 28.58, "learning_rate": 3.5709686650450705e-05, "loss": 0.2948, "step": 399500 }, { "epoch": 28.62, "learning_rate": 3.569180140220346e-05, "loss": 0.3015, "step": 400000 }, { "epoch": 28.65, "learning_rate": 3.5673916153956215e-05, "loss": 0.2949, "step": 400500 }, { "epoch": 28.69, "learning_rate": 3.5656030905708974e-05, "loss": 0.3006, "step": 401000 }, { "epoch": 28.72, "learning_rate": 3.5638145657461725e-05, "loss": 0.3004, "step": 401500 }, { "epoch": 28.76, "learning_rate": 3.5620260409214484e-05, "loss": 0.301, "step": 402000 }, { "epoch": 28.8, "learning_rate": 3.5602375160967236e-05, "loss": 0.2974, "step": 402500 }, { "epoch": 28.83, "learning_rate": 3.5584489912719994e-05, "loss": 0.2978, "step": 403000 }, { "epoch": 28.87, "learning_rate": 3.5566604664472746e-05, "loss": 0.2994, "step": 403500 }, { "epoch": 28.9, "learning_rate": 3.55487194162255e-05, "loss": 0.3041, "step": 404000 }, { "epoch": 28.94, "learning_rate": 3.5530834167978256e-05, "loss": 0.3043, "step": 404500 }, { "epoch": 28.97, "learning_rate": 3.551294891973101e-05, "loss": 0.3016, "step": 405000 }, { "epoch": 29.01, "learning_rate": 3.549506367148377e-05, "loss": 0.2955, "step": 405500 }, { "epoch": 29.05, "learning_rate": 3.547717842323652e-05, "loss": 0.2647, "step": 406000 }, { "epoch": 29.08, "learning_rate": 3.545929317498927e-05, "loss": 0.2694, "step": 406500 }, { "epoch": 29.12, "learning_rate": 3.544140792674203e-05, "loss": 0.2691, "step": 407000 }, { "epoch": 29.15, "learning_rate": 3.542352267849478e-05, "loss": 0.2695, "step": 407500 }, { "epoch": 29.19, "learning_rate": 3.540563743024753e-05, "loss": 0.2747, "step": 408000 }, { "epoch": 29.22, "learning_rate": 3.538775218200029e-05, "loss": 0.2794, "step": 408500 }, { "epoch": 29.26, "learning_rate": 3.536986693375304e-05, "loss": 0.2796, "step": 409000 }, { "epoch": 29.3, "learning_rate": 3.5351981685505795e-05, "loss": 0.2837, "step": 409500 }, { "epoch": 29.33, "learning_rate": 3.533409643725855e-05, "loss": 0.2849, "step": 410000 }, { "epoch": 29.37, "learning_rate": 3.5316211189011305e-05, "loss": 0.2858, "step": 410500 }, { "epoch": 29.4, "learning_rate": 3.529832594076406e-05, "loss": 0.2839, "step": 411000 }, { "epoch": 29.44, "learning_rate": 3.5280440692516815e-05, "loss": 0.2864, "step": 411500 }, { "epoch": 29.47, "learning_rate": 3.526255544426957e-05, "loss": 0.2858, "step": 412000 }, { "epoch": 29.51, "learning_rate": 3.5244670196022326e-05, "loss": 0.29, "step": 412500 }, { "epoch": 29.55, "learning_rate": 3.522678494777508e-05, "loss": 0.2869, "step": 413000 }, { "epoch": 29.58, "learning_rate": 3.520889969952783e-05, "loss": 0.2902, "step": 413500 }, { "epoch": 29.62, "learning_rate": 3.519101445128059e-05, "loss": 0.2906, "step": 414000 }, { "epoch": 29.65, "learning_rate": 3.517312920303334e-05, "loss": 0.2997, "step": 414500 }, { "epoch": 29.69, "learning_rate": 3.515524395478609e-05, "loss": 0.2961, "step": 415000 }, { "epoch": 29.73, "learning_rate": 3.513735870653885e-05, "loss": 0.2937, "step": 415500 }, { "epoch": 29.76, "learning_rate": 3.51194734582916e-05, "loss": 0.2919, "step": 416000 }, { "epoch": 29.8, "learning_rate": 3.5101588210044354e-05, "loss": 0.293, "step": 416500 }, { "epoch": 29.83, "learning_rate": 3.508370296179711e-05, "loss": 0.2976, "step": 417000 }, { "epoch": 29.87, "learning_rate": 3.5065817713549864e-05, "loss": 0.2962, "step": 417500 }, { "epoch": 29.9, "learning_rate": 3.5047932465302616e-05, "loss": 0.299, "step": 418000 }, { "epoch": 29.94, "learning_rate": 3.5030047217055374e-05, "loss": 0.2975, "step": 418500 }, { "epoch": 29.98, "learning_rate": 3.5012161968808126e-05, "loss": 0.2969, "step": 419000 }, { "epoch": 30.01, "learning_rate": 3.499427672056088e-05, "loss": 0.2894, "step": 419500 }, { "epoch": 30.05, "learning_rate": 3.497639147231364e-05, "loss": 0.2614, "step": 420000 }, { "epoch": 30.08, "learning_rate": 3.495850622406639e-05, "loss": 0.2653, "step": 420500 }, { "epoch": 30.12, "learning_rate": 3.494062097581915e-05, "loss": 0.272, "step": 421000 }, { "epoch": 30.15, "learning_rate": 3.49227357275719e-05, "loss": 0.2693, "step": 421500 }, { "epoch": 30.19, "learning_rate": 3.490485047932465e-05, "loss": 0.2741, "step": 422000 }, { "epoch": 30.23, "learning_rate": 3.488696523107741e-05, "loss": 0.2695, "step": 422500 }, { "epoch": 30.26, "learning_rate": 3.486907998283016e-05, "loss": 0.2766, "step": 423000 }, { "epoch": 30.3, "learning_rate": 3.485119473458292e-05, "loss": 0.2787, "step": 423500 }, { "epoch": 30.33, "learning_rate": 3.483330948633567e-05, "loss": 0.2804, "step": 424000 }, { "epoch": 30.37, "learning_rate": 3.481542423808843e-05, "loss": 0.2745, "step": 424500 }, { "epoch": 30.4, "learning_rate": 3.479753898984118e-05, "loss": 0.2776, "step": 425000 }, { "epoch": 30.44, "learning_rate": 3.477965374159394e-05, "loss": 0.2767, "step": 425500 }, { "epoch": 30.48, "learning_rate": 3.476176849334669e-05, "loss": 0.2842, "step": 426000 }, { "epoch": 30.51, "learning_rate": 3.4743883245099444e-05, "loss": 0.2832, "step": 426500 }, { "epoch": 30.55, "learning_rate": 3.47259979968522e-05, "loss": 0.2852, "step": 427000 }, { "epoch": 30.58, "learning_rate": 3.4708112748604954e-05, "loss": 0.2922, "step": 427500 }, { "epoch": 30.62, "learning_rate": 3.4690227500357706e-05, "loss": 0.2883, "step": 428000 }, { "epoch": 30.66, "learning_rate": 3.4672342252110465e-05, "loss": 0.2925, "step": 428500 }, { "epoch": 30.69, "learning_rate": 3.4654457003863216e-05, "loss": 0.2925, "step": 429000 }, { "epoch": 30.73, "learning_rate": 3.463657175561597e-05, "loss": 0.2914, "step": 429500 }, { "epoch": 30.76, "learning_rate": 3.461868650736873e-05, "loss": 0.2897, "step": 430000 }, { "epoch": 30.8, "learning_rate": 3.460080125912148e-05, "loss": 0.2925, "step": 430500 }, { "epoch": 30.83, "learning_rate": 3.458291601087423e-05, "loss": 0.2948, "step": 431000 }, { "epoch": 30.87, "learning_rate": 3.456503076262699e-05, "loss": 0.2928, "step": 431500 }, { "epoch": 30.91, "learning_rate": 3.454714551437974e-05, "loss": 0.2896, "step": 432000 }, { "epoch": 30.94, "learning_rate": 3.45292602661325e-05, "loss": 0.2935, "step": 432500 }, { "epoch": 30.98, "learning_rate": 3.451137501788525e-05, "loss": 0.2906, "step": 433000 }, { "epoch": 31.01, "learning_rate": 3.4493489769638e-05, "loss": 0.2811, "step": 433500 }, { "epoch": 31.05, "learning_rate": 3.447560452139076e-05, "loss": 0.2524, "step": 434000 }, { "epoch": 31.08, "learning_rate": 3.445771927314351e-05, "loss": 0.2664, "step": 434500 }, { "epoch": 31.12, "learning_rate": 3.4439834024896265e-05, "loss": 0.2655, "step": 435000 }, { "epoch": 31.16, "learning_rate": 3.4421948776649024e-05, "loss": 0.2686, "step": 435500 }, { "epoch": 31.19, "learning_rate": 3.4404063528401775e-05, "loss": 0.2724, "step": 436000 }, { "epoch": 31.23, "learning_rate": 3.438617828015453e-05, "loss": 0.2699, "step": 436500 }, { "epoch": 31.26, "learning_rate": 3.4368293031907286e-05, "loss": 0.2718, "step": 437000 }, { "epoch": 31.3, "learning_rate": 3.435040778366004e-05, "loss": 0.2763, "step": 437500 }, { "epoch": 31.33, "learning_rate": 3.433252253541279e-05, "loss": 0.2759, "step": 438000 }, { "epoch": 31.37, "learning_rate": 3.431463728716555e-05, "loss": 0.2789, "step": 438500 }, { "epoch": 31.41, "learning_rate": 3.42967520389183e-05, "loss": 0.2791, "step": 439000 }, { "epoch": 31.44, "learning_rate": 3.427886679067105e-05, "loss": 0.2783, "step": 439500 }, { "epoch": 31.48, "learning_rate": 3.426098154242381e-05, "loss": 0.2806, "step": 440000 }, { "epoch": 31.51, "learning_rate": 3.424309629417656e-05, "loss": 0.2834, "step": 440500 }, { "epoch": 31.55, "learning_rate": 3.422521104592932e-05, "loss": 0.2781, "step": 441000 }, { "epoch": 31.59, "learning_rate": 3.420732579768207e-05, "loss": 0.2836, "step": 441500 }, { "epoch": 31.62, "learning_rate": 3.4189440549434824e-05, "loss": 0.2841, "step": 442000 }, { "epoch": 31.66, "learning_rate": 3.417155530118758e-05, "loss": 0.2825, "step": 442500 }, { "epoch": 31.69, "learning_rate": 3.4153670052940334e-05, "loss": 0.286, "step": 443000 }, { "epoch": 31.73, "learning_rate": 3.4135784804693086e-05, "loss": 0.2892, "step": 443500 }, { "epoch": 31.76, "learning_rate": 3.4117899556445845e-05, "loss": 0.286, "step": 444000 }, { "epoch": 31.8, "learning_rate": 3.4100014308198596e-05, "loss": 0.2922, "step": 444500 }, { "epoch": 31.84, "learning_rate": 3.4082129059951355e-05, "loss": 0.2836, "step": 445000 }, { "epoch": 31.87, "learning_rate": 3.4064243811704114e-05, "loss": 0.2906, "step": 445500 }, { "epoch": 31.91, "learning_rate": 3.4046358563456865e-05, "loss": 0.2913, "step": 446000 }, { "epoch": 31.94, "learning_rate": 3.402847331520962e-05, "loss": 0.2911, "step": 446500 }, { "epoch": 31.98, "learning_rate": 3.4010588066962376e-05, "loss": 0.2941, "step": 447000 }, { "epoch": 32.01, "learning_rate": 3.399270281871513e-05, "loss": 0.2761, "step": 447500 }, { "epoch": 32.05, "learning_rate": 3.397481757046788e-05, "loss": 0.2583, "step": 448000 }, { "epoch": 32.09, "learning_rate": 3.395693232222064e-05, "loss": 0.2592, "step": 448500 }, { "epoch": 32.12, "learning_rate": 3.393904707397339e-05, "loss": 0.2603, "step": 449000 }, { "epoch": 32.16, "learning_rate": 3.392116182572614e-05, "loss": 0.2613, "step": 449500 }, { "epoch": 32.19, "learning_rate": 3.39032765774789e-05, "loss": 0.2671, "step": 450000 }, { "epoch": 32.23, "learning_rate": 3.388539132923165e-05, "loss": 0.2727, "step": 450500 }, { "epoch": 32.26, "learning_rate": 3.3867506080984404e-05, "loss": 0.2692, "step": 451000 }, { "epoch": 32.3, "learning_rate": 3.384962083273716e-05, "loss": 0.2671, "step": 451500 }, { "epoch": 32.34, "learning_rate": 3.3831735584489914e-05, "loss": 0.276, "step": 452000 }, { "epoch": 32.37, "learning_rate": 3.381385033624267e-05, "loss": 0.2731, "step": 452500 }, { "epoch": 32.41, "learning_rate": 3.3795965087995424e-05, "loss": 0.2734, "step": 453000 }, { "epoch": 32.44, "learning_rate": 3.3778079839748176e-05, "loss": 0.2755, "step": 453500 }, { "epoch": 32.48, "learning_rate": 3.3760194591500935e-05, "loss": 0.2786, "step": 454000 }, { "epoch": 32.52, "learning_rate": 3.3742309343253687e-05, "loss": 0.28, "step": 454500 }, { "epoch": 32.55, "learning_rate": 3.372442409500644e-05, "loss": 0.2768, "step": 455000 }, { "epoch": 32.59, "learning_rate": 3.37065388467592e-05, "loss": 0.2833, "step": 455500 }, { "epoch": 32.62, "learning_rate": 3.368865359851195e-05, "loss": 0.2782, "step": 456000 }, { "epoch": 32.66, "learning_rate": 3.36707683502647e-05, "loss": 0.2823, "step": 456500 }, { "epoch": 32.69, "learning_rate": 3.365288310201746e-05, "loss": 0.2795, "step": 457000 }, { "epoch": 32.73, "learning_rate": 3.363499785377021e-05, "loss": 0.2782, "step": 457500 }, { "epoch": 32.77, "learning_rate": 3.361711260552296e-05, "loss": 0.285, "step": 458000 }, { "epoch": 32.8, "learning_rate": 3.359922735727572e-05, "loss": 0.2854, "step": 458500 }, { "epoch": 32.84, "learning_rate": 3.358134210902847e-05, "loss": 0.2825, "step": 459000 }, { "epoch": 32.87, "learning_rate": 3.356345686078123e-05, "loss": 0.2834, "step": 459500 }, { "epoch": 32.91, "learning_rate": 3.354557161253398e-05, "loss": 0.2848, "step": 460000 }, { "epoch": 32.94, "learning_rate": 3.3527686364286735e-05, "loss": 0.2843, "step": 460500 }, { "epoch": 32.98, "learning_rate": 3.3509801116039494e-05, "loss": 0.2864, "step": 461000 }, { "epoch": 33.02, "learning_rate": 3.3491915867792246e-05, "loss": 0.2755, "step": 461500 }, { "epoch": 33.05, "learning_rate": 3.3474030619545e-05, "loss": 0.2541, "step": 462000 }, { "epoch": 33.09, "learning_rate": 3.3456145371297756e-05, "loss": 0.2543, "step": 462500 }, { "epoch": 33.12, "learning_rate": 3.343826012305051e-05, "loss": 0.2596, "step": 463000 }, { "epoch": 33.16, "learning_rate": 3.342037487480326e-05, "loss": 0.2639, "step": 463500 }, { "epoch": 33.2, "learning_rate": 3.340248962655602e-05, "loss": 0.2669, "step": 464000 }, { "epoch": 33.23, "learning_rate": 3.338460437830877e-05, "loss": 0.2641, "step": 464500 }, { "epoch": 33.27, "learning_rate": 3.336671913006152e-05, "loss": 0.2689, "step": 465000 }, { "epoch": 33.3, "learning_rate": 3.334883388181428e-05, "loss": 0.262, "step": 465500 }, { "epoch": 33.34, "learning_rate": 3.333094863356703e-05, "loss": 0.2656, "step": 466000 }, { "epoch": 33.37, "learning_rate": 3.3313063385319784e-05, "loss": 0.2665, "step": 466500 }, { "epoch": 33.41, "learning_rate": 3.329517813707255e-05, "loss": 0.2705, "step": 467000 }, { "epoch": 33.45, "learning_rate": 3.32772928888253e-05, "loss": 0.2744, "step": 467500 }, { "epoch": 33.48, "learning_rate": 3.325940764057805e-05, "loss": 0.266, "step": 468000 }, { "epoch": 33.52, "learning_rate": 3.324152239233081e-05, "loss": 0.2723, "step": 468500 }, { "epoch": 33.55, "learning_rate": 3.322363714408356e-05, "loss": 0.275, "step": 469000 }, { "epoch": 33.59, "learning_rate": 3.3205751895836315e-05, "loss": 0.2751, "step": 469500 }, { "epoch": 33.62, "learning_rate": 3.3187866647589073e-05, "loss": 0.2801, "step": 470000 }, { "epoch": 33.66, "learning_rate": 3.3169981399341825e-05, "loss": 0.2795, "step": 470500 }, { "epoch": 33.7, "learning_rate": 3.3152096151094584e-05, "loss": 0.2764, "step": 471000 }, { "epoch": 33.73, "learning_rate": 3.3134210902847336e-05, "loss": 0.2799, "step": 471500 }, { "epoch": 33.77, "learning_rate": 3.311632565460009e-05, "loss": 0.2812, "step": 472000 }, { "epoch": 33.8, "learning_rate": 3.3098440406352846e-05, "loss": 0.2851, "step": 472500 }, { "epoch": 33.84, "learning_rate": 3.30805551581056e-05, "loss": 0.2855, "step": 473000 }, { "epoch": 33.87, "learning_rate": 3.306266990985835e-05, "loss": 0.2858, "step": 473500 }, { "epoch": 33.91, "learning_rate": 3.304478466161111e-05, "loss": 0.2815, "step": 474000 }, { "epoch": 33.95, "learning_rate": 3.302689941336386e-05, "loss": 0.2766, "step": 474500 }, { "epoch": 33.98, "learning_rate": 3.300901416511661e-05, "loss": 0.2831, "step": 475000 }, { "epoch": 34.02, "learning_rate": 3.299112891686937e-05, "loss": 0.2673, "step": 475500 }, { "epoch": 34.05, "learning_rate": 3.297324366862212e-05, "loss": 0.2541, "step": 476000 }, { "epoch": 34.09, "learning_rate": 3.2955358420374874e-05, "loss": 0.2556, "step": 476500 }, { "epoch": 34.13, "learning_rate": 3.293747317212763e-05, "loss": 0.2588, "step": 477000 }, { "epoch": 34.16, "learning_rate": 3.2919587923880384e-05, "loss": 0.2612, "step": 477500 }, { "epoch": 34.2, "learning_rate": 3.2901702675633136e-05, "loss": 0.262, "step": 478000 }, { "epoch": 34.23, "learning_rate": 3.2883817427385895e-05, "loss": 0.2604, "step": 478500 }, { "epoch": 34.27, "learning_rate": 3.2865932179138646e-05, "loss": 0.2644, "step": 479000 }, { "epoch": 34.3, "learning_rate": 3.2848046930891405e-05, "loss": 0.2628, "step": 479500 }, { "epoch": 34.34, "learning_rate": 3.283016168264416e-05, "loss": 0.2678, "step": 480000 }, { "epoch": 34.38, "learning_rate": 3.281227643439691e-05, "loss": 0.2623, "step": 480500 }, { "epoch": 34.41, "learning_rate": 3.279439118614967e-05, "loss": 0.2665, "step": 481000 }, { "epoch": 34.45, "learning_rate": 3.277650593790242e-05, "loss": 0.2633, "step": 481500 }, { "epoch": 34.48, "learning_rate": 3.275862068965517e-05, "loss": 0.2692, "step": 482000 }, { "epoch": 34.52, "learning_rate": 3.274073544140793e-05, "loss": 0.2678, "step": 482500 }, { "epoch": 34.55, "learning_rate": 3.272285019316068e-05, "loss": 0.2725, "step": 483000 }, { "epoch": 34.59, "learning_rate": 3.270496494491343e-05, "loss": 0.2722, "step": 483500 }, { "epoch": 34.63, "learning_rate": 3.268707969666619e-05, "loss": 0.2747, "step": 484000 }, { "epoch": 34.66, "learning_rate": 3.266919444841894e-05, "loss": 0.277, "step": 484500 }, { "epoch": 34.7, "learning_rate": 3.2651309200171695e-05, "loss": 0.2744, "step": 485000 }, { "epoch": 34.73, "learning_rate": 3.2633423951924454e-05, "loss": 0.273, "step": 485500 }, { "epoch": 34.77, "learning_rate": 3.2615538703677205e-05, "loss": 0.2784, "step": 486000 }, { "epoch": 34.8, "learning_rate": 3.2597653455429964e-05, "loss": 0.2808, "step": 486500 }, { "epoch": 34.84, "learning_rate": 3.2579768207182716e-05, "loss": 0.274, "step": 487000 }, { "epoch": 34.88, "learning_rate": 3.256188295893547e-05, "loss": 0.2794, "step": 487500 }, { "epoch": 34.91, "learning_rate": 3.2543997710688226e-05, "loss": 0.2818, "step": 488000 }, { "epoch": 34.95, "learning_rate": 3.2526112462440985e-05, "loss": 0.2789, "step": 488500 }, { "epoch": 34.98, "learning_rate": 3.2508227214193736e-05, "loss": 0.2775, "step": 489000 }, { "epoch": 35.02, "learning_rate": 3.249034196594649e-05, "loss": 0.2633, "step": 489500 }, { "epoch": 35.06, "learning_rate": 3.247245671769925e-05, "loss": 0.2477, "step": 490000 }, { "epoch": 35.09, "learning_rate": 3.2454571469452e-05, "loss": 0.25, "step": 490500 }, { "epoch": 35.13, "learning_rate": 3.243668622120476e-05, "loss": 0.2536, "step": 491000 }, { "epoch": 35.16, "learning_rate": 3.241880097295751e-05, "loss": 0.254, "step": 491500 }, { "epoch": 35.2, "learning_rate": 3.240091572471026e-05, "loss": 0.2561, "step": 492000 }, { "epoch": 35.23, "learning_rate": 3.238303047646302e-05, "loss": 0.2616, "step": 492500 }, { "epoch": 35.27, "learning_rate": 3.236514522821577e-05, "loss": 0.2619, "step": 493000 }, { "epoch": 35.31, "learning_rate": 3.234725997996852e-05, "loss": 0.2596, "step": 493500 }, { "epoch": 35.34, "learning_rate": 3.232937473172128e-05, "loss": 0.2612, "step": 494000 }, { "epoch": 35.38, "learning_rate": 3.231148948347403e-05, "loss": 0.2629, "step": 494500 }, { "epoch": 35.41, "learning_rate": 3.2293604235226785e-05, "loss": 0.2649, "step": 495000 }, { "epoch": 35.45, "learning_rate": 3.2275718986979544e-05, "loss": 0.2672, "step": 495500 }, { "epoch": 35.48, "learning_rate": 3.2257833738732295e-05, "loss": 0.2701, "step": 496000 }, { "epoch": 35.52, "learning_rate": 3.223994849048505e-05, "loss": 0.268, "step": 496500 }, { "epoch": 35.56, "learning_rate": 3.2222063242237806e-05, "loss": 0.2701, "step": 497000 }, { "epoch": 35.59, "learning_rate": 3.220417799399056e-05, "loss": 0.2734, "step": 497500 }, { "epoch": 35.63, "learning_rate": 3.218629274574331e-05, "loss": 0.2714, "step": 498000 }, { "epoch": 35.66, "learning_rate": 3.216840749749607e-05, "loss": 0.2679, "step": 498500 }, { "epoch": 35.7, "learning_rate": 3.215052224924882e-05, "loss": 0.2712, "step": 499000 }, { "epoch": 35.73, "learning_rate": 3.213263700100158e-05, "loss": 0.2728, "step": 499500 }, { "epoch": 35.77, "learning_rate": 3.211475175275433e-05, "loss": 0.2749, "step": 500000 }, { "epoch": 35.81, "learning_rate": 3.209686650450708e-05, "loss": 0.2746, "step": 500500 }, { "epoch": 35.84, "learning_rate": 3.207898125625984e-05, "loss": 0.2742, "step": 501000 }, { "epoch": 35.88, "learning_rate": 3.206109600801259e-05, "loss": 0.2765, "step": 501500 }, { "epoch": 35.91, "learning_rate": 3.2043210759765344e-05, "loss": 0.2739, "step": 502000 }, { "epoch": 35.95, "learning_rate": 3.20253255115181e-05, "loss": 0.2808, "step": 502500 }, { "epoch": 35.99, "learning_rate": 3.2007440263270854e-05, "loss": 0.2783, "step": 503000 }, { "epoch": 36.02, "learning_rate": 3.1989555015023606e-05, "loss": 0.2615, "step": 503500 }, { "epoch": 36.06, "learning_rate": 3.1971669766776365e-05, "loss": 0.25, "step": 504000 }, { "epoch": 36.09, "learning_rate": 3.195378451852912e-05, "loss": 0.2464, "step": 504500 }, { "epoch": 36.13, "learning_rate": 3.193589927028187e-05, "loss": 0.2554, "step": 505000 }, { "epoch": 36.16, "learning_rate": 3.191801402203463e-05, "loss": 0.2573, "step": 505500 }, { "epoch": 36.2, "learning_rate": 3.190012877378738e-05, "loss": 0.258, "step": 506000 }, { "epoch": 36.24, "learning_rate": 3.188224352554014e-05, "loss": 0.2633, "step": 506500 }, { "epoch": 36.27, "learning_rate": 3.186435827729289e-05, "loss": 0.262, "step": 507000 }, { "epoch": 36.31, "learning_rate": 3.184647302904564e-05, "loss": 0.2673, "step": 507500 }, { "epoch": 36.34, "learning_rate": 3.18285877807984e-05, "loss": 0.2617, "step": 508000 }, { "epoch": 36.38, "learning_rate": 3.181070253255115e-05, "loss": 0.2615, "step": 508500 }, { "epoch": 36.41, "learning_rate": 3.17928172843039e-05, "loss": 0.2644, "step": 509000 }, { "epoch": 36.45, "learning_rate": 3.177493203605666e-05, "loss": 0.2632, "step": 509500 }, { "epoch": 36.49, "learning_rate": 3.1757046787809413e-05, "loss": 0.2633, "step": 510000 }, { "epoch": 36.52, "learning_rate": 3.173916153956217e-05, "loss": 0.2657, "step": 510500 }, { "epoch": 36.56, "learning_rate": 3.172127629131493e-05, "loss": 0.2652, "step": 511000 }, { "epoch": 36.59, "learning_rate": 3.170339104306768e-05, "loss": 0.2676, "step": 511500 }, { "epoch": 36.63, "learning_rate": 3.1685505794820434e-05, "loss": 0.2654, "step": 512000 }, { "epoch": 36.66, "learning_rate": 3.166762054657319e-05, "loss": 0.2714, "step": 512500 }, { "epoch": 36.7, "learning_rate": 3.1649735298325945e-05, "loss": 0.2712, "step": 513000 }, { "epoch": 36.74, "learning_rate": 3.1631850050078696e-05, "loss": 0.2687, "step": 513500 }, { "epoch": 36.77, "learning_rate": 3.1613964801831455e-05, "loss": 0.2659, "step": 514000 }, { "epoch": 36.81, "learning_rate": 3.159607955358421e-05, "loss": 0.2666, "step": 514500 }, { "epoch": 36.84, "learning_rate": 3.157819430533696e-05, "loss": 0.2734, "step": 515000 }, { "epoch": 36.88, "learning_rate": 3.156030905708972e-05, "loss": 0.2696, "step": 515500 }, { "epoch": 36.92, "learning_rate": 3.154242380884247e-05, "loss": 0.2714, "step": 516000 }, { "epoch": 36.95, "learning_rate": 3.152453856059522e-05, "loss": 0.2701, "step": 516500 }, { "epoch": 36.99, "learning_rate": 3.150665331234798e-05, "loss": 0.2733, "step": 517000 }, { "epoch": 37.02, "learning_rate": 3.148876806410073e-05, "loss": 0.2569, "step": 517500 }, { "epoch": 37.06, "learning_rate": 3.147088281585349e-05, "loss": 0.2466, "step": 518000 }, { "epoch": 37.09, "learning_rate": 3.145299756760624e-05, "loss": 0.2461, "step": 518500 }, { "epoch": 37.13, "learning_rate": 3.143511231935899e-05, "loss": 0.2507, "step": 519000 }, { "epoch": 37.17, "learning_rate": 3.141722707111175e-05, "loss": 0.2533, "step": 519500 }, { "epoch": 37.2, "learning_rate": 3.1399341822864504e-05, "loss": 0.256, "step": 520000 }, { "epoch": 37.24, "learning_rate": 3.1381456574617255e-05, "loss": 0.2479, "step": 520500 }, { "epoch": 37.27, "learning_rate": 3.1363571326370014e-05, "loss": 0.2547, "step": 521000 }, { "epoch": 37.31, "learning_rate": 3.1345686078122766e-05, "loss": 0.2567, "step": 521500 }, { "epoch": 37.34, "learning_rate": 3.132780082987552e-05, "loss": 0.2581, "step": 522000 }, { "epoch": 37.38, "learning_rate": 3.1309915581628276e-05, "loss": 0.2609, "step": 522500 }, { "epoch": 37.42, "learning_rate": 3.129203033338103e-05, "loss": 0.2594, "step": 523000 }, { "epoch": 37.45, "learning_rate": 3.127414508513378e-05, "loss": 0.2607, "step": 523500 }, { "epoch": 37.49, "learning_rate": 3.125625983688654e-05, "loss": 0.2675, "step": 524000 }, { "epoch": 37.52, "learning_rate": 3.123837458863929e-05, "loss": 0.2652, "step": 524500 }, { "epoch": 37.56, "learning_rate": 3.122048934039204e-05, "loss": 0.2616, "step": 525000 }, { "epoch": 37.59, "learning_rate": 3.12026040921448e-05, "loss": 0.2635, "step": 525500 }, { "epoch": 37.63, "learning_rate": 3.118471884389755e-05, "loss": 0.2699, "step": 526000 }, { "epoch": 37.67, "learning_rate": 3.116683359565031e-05, "loss": 0.2641, "step": 526500 }, { "epoch": 37.7, "learning_rate": 3.114894834740306e-05, "loss": 0.2685, "step": 527000 }, { "epoch": 37.74, "learning_rate": 3.1131063099155814e-05, "loss": 0.2675, "step": 527500 }, { "epoch": 37.77, "learning_rate": 3.111317785090857e-05, "loss": 0.2666, "step": 528000 }, { "epoch": 37.81, "learning_rate": 3.1095292602661325e-05, "loss": 0.2665, "step": 528500 }, { "epoch": 37.85, "learning_rate": 3.1077407354414076e-05, "loss": 0.2671, "step": 529000 }, { "epoch": 37.88, "learning_rate": 3.1059522106166835e-05, "loss": 0.2667, "step": 529500 }, { "epoch": 37.92, "learning_rate": 3.104163685791959e-05, "loss": 0.2695, "step": 530000 }, { "epoch": 37.95, "learning_rate": 3.102375160967234e-05, "loss": 0.2651, "step": 530500 }, { "epoch": 37.99, "learning_rate": 3.10058663614251e-05, "loss": 0.2732, "step": 531000 }, { "epoch": 38.02, "learning_rate": 3.098798111317785e-05, "loss": 0.2519, "step": 531500 }, { "epoch": 38.06, "learning_rate": 3.097009586493061e-05, "loss": 0.238, "step": 532000 }, { "epoch": 38.1, "learning_rate": 3.0952210616683366e-05, "loss": 0.2483, "step": 532500 }, { "epoch": 38.13, "learning_rate": 3.093432536843612e-05, "loss": 0.2463, "step": 533000 }, { "epoch": 38.17, "learning_rate": 3.091644012018887e-05, "loss": 0.2484, "step": 533500 }, { "epoch": 38.2, "learning_rate": 3.089855487194163e-05, "loss": 0.2522, "step": 534000 }, { "epoch": 38.24, "learning_rate": 3.088066962369438e-05, "loss": 0.2464, "step": 534500 }, { "epoch": 38.27, "learning_rate": 3.086278437544713e-05, "loss": 0.2515, "step": 535000 }, { "epoch": 38.31, "learning_rate": 3.084489912719989e-05, "loss": 0.261, "step": 535500 }, { "epoch": 38.35, "learning_rate": 3.082701387895264e-05, "loss": 0.255, "step": 536000 }, { "epoch": 38.38, "learning_rate": 3.0809128630705394e-05, "loss": 0.2597, "step": 536500 }, { "epoch": 38.42, "learning_rate": 3.079124338245815e-05, "loss": 0.2584, "step": 537000 }, { "epoch": 38.45, "learning_rate": 3.0773358134210904e-05, "loss": 0.2596, "step": 537500 }, { "epoch": 38.49, "learning_rate": 3.075547288596366e-05, "loss": 0.2641, "step": 538000 }, { "epoch": 38.52, "learning_rate": 3.0737587637716415e-05, "loss": 0.265, "step": 538500 }, { "epoch": 38.56, "learning_rate": 3.0719702389469167e-05, "loss": 0.2587, "step": 539000 }, { "epoch": 38.6, "learning_rate": 3.0701817141221925e-05, "loss": 0.2596, "step": 539500 }, { "epoch": 38.63, "learning_rate": 3.068393189297468e-05, "loss": 0.2658, "step": 540000 }, { "epoch": 38.67, "learning_rate": 3.066604664472743e-05, "loss": 0.2638, "step": 540500 }, { "epoch": 38.7, "learning_rate": 3.064816139648019e-05, "loss": 0.2663, "step": 541000 }, { "epoch": 38.74, "learning_rate": 3.063027614823294e-05, "loss": 0.2677, "step": 541500 }, { "epoch": 38.78, "learning_rate": 3.061239089998569e-05, "loss": 0.2613, "step": 542000 }, { "epoch": 38.81, "learning_rate": 3.059450565173845e-05, "loss": 0.2654, "step": 542500 }, { "epoch": 38.85, "learning_rate": 3.05766204034912e-05, "loss": 0.2592, "step": 543000 }, { "epoch": 38.88, "learning_rate": 3.055873515524395e-05, "loss": 0.264, "step": 543500 }, { "epoch": 38.92, "learning_rate": 3.054084990699671e-05, "loss": 0.263, "step": 544000 }, { "epoch": 38.95, "learning_rate": 3.052296465874946e-05, "loss": 0.2662, "step": 544500 }, { "epoch": 38.99, "learning_rate": 3.050507941050222e-05, "loss": 0.2636, "step": 545000 }, { "epoch": 39.03, "learning_rate": 3.0487194162254974e-05, "loss": 0.2439, "step": 545500 }, { "epoch": 39.06, "learning_rate": 3.0469308914007726e-05, "loss": 0.2415, "step": 546000 }, { "epoch": 39.1, "learning_rate": 3.045142366576048e-05, "loss": 0.2504, "step": 546500 }, { "epoch": 39.13, "learning_rate": 3.0433538417513236e-05, "loss": 0.2458, "step": 547000 }, { "epoch": 39.17, "learning_rate": 3.041565316926599e-05, "loss": 0.251, "step": 547500 }, { "epoch": 39.2, "learning_rate": 3.0397767921018743e-05, "loss": 0.248, "step": 548000 }, { "epoch": 39.24, "learning_rate": 3.0379882672771498e-05, "loss": 0.2464, "step": 548500 }, { "epoch": 39.28, "learning_rate": 3.0361997424524253e-05, "loss": 0.2522, "step": 549000 }, { "epoch": 39.31, "learning_rate": 3.0344112176277005e-05, "loss": 0.2542, "step": 549500 }, { "epoch": 39.35, "learning_rate": 3.032622692802976e-05, "loss": 0.2563, "step": 550000 }, { "epoch": 39.38, "learning_rate": 3.0308341679782515e-05, "loss": 0.2619, "step": 550500 }, { "epoch": 39.42, "learning_rate": 3.029045643153527e-05, "loss": 0.26, "step": 551000 }, { "epoch": 39.45, "learning_rate": 3.0272571183288022e-05, "loss": 0.2595, "step": 551500 }, { "epoch": 39.49, "learning_rate": 3.0254685935040778e-05, "loss": 0.2562, "step": 552000 }, { "epoch": 39.53, "learning_rate": 3.0236800686793533e-05, "loss": 0.2542, "step": 552500 }, { "epoch": 39.56, "learning_rate": 3.0218915438546285e-05, "loss": 0.2604, "step": 553000 }, { "epoch": 39.6, "learning_rate": 3.0201030190299046e-05, "loss": 0.2613, "step": 553500 }, { "epoch": 39.63, "learning_rate": 3.0183144942051798e-05, "loss": 0.2621, "step": 554000 }, { "epoch": 39.67, "learning_rate": 3.0165259693804553e-05, "loss": 0.2626, "step": 554500 }, { "epoch": 39.71, "learning_rate": 3.014737444555731e-05, "loss": 0.2608, "step": 555000 }, { "epoch": 39.74, "learning_rate": 3.0129489197310064e-05, "loss": 0.2594, "step": 555500 }, { "epoch": 39.78, "learning_rate": 3.0111603949062816e-05, "loss": 0.2638, "step": 556000 }, { "epoch": 39.81, "learning_rate": 3.009371870081557e-05, "loss": 0.2637, "step": 556500 }, { "epoch": 39.85, "learning_rate": 3.0075833452568326e-05, "loss": 0.262, "step": 557000 }, { "epoch": 39.88, "learning_rate": 3.0057948204321078e-05, "loss": 0.26, "step": 557500 }, { "epoch": 39.92, "learning_rate": 3.0040062956073833e-05, "loss": 0.2621, "step": 558000 }, { "epoch": 39.96, "learning_rate": 3.0022177707826588e-05, "loss": 0.2602, "step": 558500 }, { "epoch": 39.99, "learning_rate": 3.0004292459579343e-05, "loss": 0.2648, "step": 559000 }, { "epoch": 40.03, "learning_rate": 2.9986407211332095e-05, "loss": 0.2462, "step": 559500 }, { "epoch": 40.06, "learning_rate": 2.996852196308485e-05, "loss": 0.239, "step": 560000 }, { "epoch": 40.1, "learning_rate": 2.9950636714837605e-05, "loss": 0.2381, "step": 560500 }, { "epoch": 40.13, "learning_rate": 2.9932751466590357e-05, "loss": 0.2468, "step": 561000 }, { "epoch": 40.17, "learning_rate": 2.9914866218343112e-05, "loss": 0.2426, "step": 561500 }, { "epoch": 40.21, "learning_rate": 2.9896980970095868e-05, "loss": 0.2468, "step": 562000 }, { "epoch": 40.24, "learning_rate": 2.9879095721848623e-05, "loss": 0.2513, "step": 562500 }, { "epoch": 40.28, "learning_rate": 2.9861210473601375e-05, "loss": 0.2499, "step": 563000 }, { "epoch": 40.31, "learning_rate": 2.984332522535413e-05, "loss": 0.2448, "step": 563500 }, { "epoch": 40.35, "learning_rate": 2.9825439977106885e-05, "loss": 0.2501, "step": 564000 }, { "epoch": 40.38, "learning_rate": 2.9807554728859637e-05, "loss": 0.2494, "step": 564500 }, { "epoch": 40.42, "learning_rate": 2.9789669480612392e-05, "loss": 0.2553, "step": 565000 }, { "epoch": 40.46, "learning_rate": 2.9771784232365147e-05, "loss": 0.2581, "step": 565500 }, { "epoch": 40.49, "learning_rate": 2.97538989841179e-05, "loss": 0.251, "step": 566000 }, { "epoch": 40.53, "learning_rate": 2.9736013735870654e-05, "loss": 0.2581, "step": 566500 }, { "epoch": 40.56, "learning_rate": 2.971812848762341e-05, "loss": 0.2608, "step": 567000 }, { "epoch": 40.6, "learning_rate": 2.9700243239376164e-05, "loss": 0.258, "step": 567500 }, { "epoch": 40.64, "learning_rate": 2.9682357991128916e-05, "loss": 0.2545, "step": 568000 }, { "epoch": 40.67, "learning_rate": 2.966447274288167e-05, "loss": 0.2585, "step": 568500 }, { "epoch": 40.71, "learning_rate": 2.9646587494634427e-05, "loss": 0.2617, "step": 569000 }, { "epoch": 40.74, "learning_rate": 2.962870224638718e-05, "loss": 0.2595, "step": 569500 }, { "epoch": 40.78, "learning_rate": 2.9610816998139934e-05, "loss": 0.2563, "step": 570000 }, { "epoch": 40.81, "learning_rate": 2.959293174989269e-05, "loss": 0.2641, "step": 570500 }, { "epoch": 40.85, "learning_rate": 2.9575046501645444e-05, "loss": 0.2613, "step": 571000 }, { "epoch": 40.89, "learning_rate": 2.9557161253398196e-05, "loss": 0.2599, "step": 571500 }, { "epoch": 40.92, "learning_rate": 2.953927600515095e-05, "loss": 0.2633, "step": 572000 }, { "epoch": 40.96, "learning_rate": 2.9521390756903706e-05, "loss": 0.263, "step": 572500 }, { "epoch": 40.99, "learning_rate": 2.9503505508656458e-05, "loss": 0.266, "step": 573000 }, { "epoch": 41.03, "learning_rate": 2.9485620260409213e-05, "loss": 0.2452, "step": 573500 }, { "epoch": 41.06, "learning_rate": 2.9467735012161968e-05, "loss": 0.2366, "step": 574000 }, { "epoch": 41.1, "learning_rate": 2.9449849763914723e-05, "loss": 0.241, "step": 574500 }, { "epoch": 41.14, "learning_rate": 2.9431964515667475e-05, "loss": 0.243, "step": 575000 }, { "epoch": 41.17, "learning_rate": 2.9414079267420237e-05, "loss": 0.2443, "step": 575500 }, { "epoch": 41.21, "learning_rate": 2.939619401917299e-05, "loss": 0.2498, "step": 576000 }, { "epoch": 41.24, "learning_rate": 2.9378308770925744e-05, "loss": 0.2469, "step": 576500 }, { "epoch": 41.28, "learning_rate": 2.93604235226785e-05, "loss": 0.249, "step": 577000 }, { "epoch": 41.31, "learning_rate": 2.934253827443125e-05, "loss": 0.2507, "step": 577500 }, { "epoch": 41.35, "learning_rate": 2.9324653026184006e-05, "loss": 0.247, "step": 578000 }, { "epoch": 41.39, "learning_rate": 2.930676777793676e-05, "loss": 0.2545, "step": 578500 }, { "epoch": 41.42, "learning_rate": 2.9288882529689517e-05, "loss": 0.2523, "step": 579000 }, { "epoch": 41.46, "learning_rate": 2.927099728144227e-05, "loss": 0.253, "step": 579500 }, { "epoch": 41.49, "learning_rate": 2.9253112033195024e-05, "loss": 0.2498, "step": 580000 }, { "epoch": 41.53, "learning_rate": 2.923522678494778e-05, "loss": 0.2473, "step": 580500 }, { "epoch": 41.57, "learning_rate": 2.921734153670053e-05, "loss": 0.2541, "step": 581000 }, { "epoch": 41.6, "learning_rate": 2.9199456288453286e-05, "loss": 0.2509, "step": 581500 }, { "epoch": 41.64, "learning_rate": 2.918157104020604e-05, "loss": 0.259, "step": 582000 }, { "epoch": 41.67, "learning_rate": 2.9163685791958796e-05, "loss": 0.2557, "step": 582500 }, { "epoch": 41.71, "learning_rate": 2.9145800543711548e-05, "loss": 0.2578, "step": 583000 }, { "epoch": 41.74, "learning_rate": 2.9127915295464303e-05, "loss": 0.2598, "step": 583500 }, { "epoch": 41.78, "learning_rate": 2.911003004721706e-05, "loss": 0.2557, "step": 584000 }, { "epoch": 41.82, "learning_rate": 2.909214479896981e-05, "loss": 0.2593, "step": 584500 }, { "epoch": 41.85, "learning_rate": 2.9074259550722565e-05, "loss": 0.2664, "step": 585000 }, { "epoch": 41.89, "learning_rate": 2.905637430247532e-05, "loss": 0.2594, "step": 585500 }, { "epoch": 41.92, "learning_rate": 2.9038489054228076e-05, "loss": 0.2563, "step": 586000 }, { "epoch": 41.96, "learning_rate": 2.9020603805980827e-05, "loss": 0.2558, "step": 586500 }, { "epoch": 41.99, "learning_rate": 2.9002718557733583e-05, "loss": 0.2603, "step": 587000 }, { "epoch": 42.03, "learning_rate": 2.8984833309486338e-05, "loss": 0.2398, "step": 587500 }, { "epoch": 42.07, "learning_rate": 2.896694806123909e-05, "loss": 0.2339, "step": 588000 }, { "epoch": 42.1, "learning_rate": 2.8949062812991845e-05, "loss": 0.2406, "step": 588500 }, { "epoch": 42.14, "learning_rate": 2.89311775647446e-05, "loss": 0.24, "step": 589000 }, { "epoch": 42.17, "learning_rate": 2.8913292316497355e-05, "loss": 0.2433, "step": 589500 }, { "epoch": 42.21, "learning_rate": 2.8895407068250107e-05, "loss": 0.2432, "step": 590000 }, { "epoch": 42.24, "learning_rate": 2.8877521820002862e-05, "loss": 0.2432, "step": 590500 }, { "epoch": 42.28, "learning_rate": 2.8859636571755617e-05, "loss": 0.2468, "step": 591000 }, { "epoch": 42.32, "learning_rate": 2.884175132350837e-05, "loss": 0.2476, "step": 591500 }, { "epoch": 42.35, "learning_rate": 2.8823866075261124e-05, "loss": 0.2459, "step": 592000 }, { "epoch": 42.39, "learning_rate": 2.880598082701388e-05, "loss": 0.2457, "step": 592500 }, { "epoch": 42.42, "learning_rate": 2.878809557876663e-05, "loss": 0.2484, "step": 593000 }, { "epoch": 42.46, "learning_rate": 2.8770210330519386e-05, "loss": 0.2493, "step": 593500 }, { "epoch": 42.5, "learning_rate": 2.875232508227214e-05, "loss": 0.2507, "step": 594000 }, { "epoch": 42.53, "learning_rate": 2.8734439834024897e-05, "loss": 0.2537, "step": 594500 }, { "epoch": 42.57, "learning_rate": 2.871655458577765e-05, "loss": 0.2551, "step": 595000 }, { "epoch": 42.6, "learning_rate": 2.8698669337530404e-05, "loss": 0.2578, "step": 595500 }, { "epoch": 42.64, "learning_rate": 2.868078408928316e-05, "loss": 0.2527, "step": 596000 }, { "epoch": 42.67, "learning_rate": 2.866289884103591e-05, "loss": 0.2514, "step": 596500 }, { "epoch": 42.71, "learning_rate": 2.8645013592788673e-05, "loss": 0.256, "step": 597000 }, { "epoch": 42.75, "learning_rate": 2.8627128344541428e-05, "loss": 0.2561, "step": 597500 }, { "epoch": 42.78, "learning_rate": 2.860924309629418e-05, "loss": 0.2579, "step": 598000 }, { "epoch": 42.82, "learning_rate": 2.8591357848046935e-05, "loss": 0.2582, "step": 598500 }, { "epoch": 42.85, "learning_rate": 2.857347259979969e-05, "loss": 0.2539, "step": 599000 }, { "epoch": 42.89, "learning_rate": 2.8555587351552442e-05, "loss": 0.2577, "step": 599500 }, { "epoch": 42.92, "learning_rate": 2.8537702103305197e-05, "loss": 0.2621, "step": 600000 }, { "epoch": 42.96, "learning_rate": 2.8519816855057952e-05, "loss": 0.2553, "step": 600500 }, { "epoch": 43.0, "learning_rate": 2.8501931606810704e-05, "loss": 0.2562, "step": 601000 }, { "epoch": 43.03, "learning_rate": 2.848404635856346e-05, "loss": 0.2319, "step": 601500 }, { "epoch": 43.07, "learning_rate": 2.8466161110316214e-05, "loss": 0.2392, "step": 602000 }, { "epoch": 43.1, "learning_rate": 2.844827586206897e-05, "loss": 0.241, "step": 602500 }, { "epoch": 43.14, "learning_rate": 2.843039061382172e-05, "loss": 0.2375, "step": 603000 }, { "epoch": 43.17, "learning_rate": 2.8412505365574477e-05, "loss": 0.237, "step": 603500 }, { "epoch": 43.21, "learning_rate": 2.8394620117327232e-05, "loss": 0.2426, "step": 604000 }, { "epoch": 43.25, "learning_rate": 2.8376734869079984e-05, "loss": 0.2474, "step": 604500 }, { "epoch": 43.28, "learning_rate": 2.835884962083274e-05, "loss": 0.2445, "step": 605000 }, { "epoch": 43.32, "learning_rate": 2.8340964372585494e-05, "loss": 0.2448, "step": 605500 }, { "epoch": 43.35, "learning_rate": 2.832307912433825e-05, "loss": 0.2425, "step": 606000 }, { "epoch": 43.39, "learning_rate": 2.8305193876091e-05, "loss": 0.2461, "step": 606500 }, { "epoch": 43.43, "learning_rate": 2.8287308627843756e-05, "loss": 0.243, "step": 607000 }, { "epoch": 43.46, "learning_rate": 2.826942337959651e-05, "loss": 0.2498, "step": 607500 }, { "epoch": 43.5, "learning_rate": 2.8251538131349263e-05, "loss": 0.2506, "step": 608000 }, { "epoch": 43.53, "learning_rate": 2.8233652883102018e-05, "loss": 0.2472, "step": 608500 }, { "epoch": 43.57, "learning_rate": 2.8215767634854773e-05, "loss": 0.2504, "step": 609000 }, { "epoch": 43.6, "learning_rate": 2.819788238660753e-05, "loss": 0.2483, "step": 609500 }, { "epoch": 43.64, "learning_rate": 2.817999713836028e-05, "loss": 0.2513, "step": 610000 }, { "epoch": 43.68, "learning_rate": 2.8162111890113036e-05, "loss": 0.2544, "step": 610500 }, { "epoch": 43.71, "learning_rate": 2.814422664186579e-05, "loss": 0.2489, "step": 611000 }, { "epoch": 43.75, "learning_rate": 2.8126341393618543e-05, "loss": 0.2535, "step": 611500 }, { "epoch": 43.78, "learning_rate": 2.8108456145371298e-05, "loss": 0.2552, "step": 612000 }, { "epoch": 43.82, "learning_rate": 2.8090570897124053e-05, "loss": 0.2566, "step": 612500 }, { "epoch": 43.85, "learning_rate": 2.8072685648876808e-05, "loss": 0.2534, "step": 613000 }, { "epoch": 43.89, "learning_rate": 2.805480040062956e-05, "loss": 0.2544, "step": 613500 }, { "epoch": 43.93, "learning_rate": 2.8036915152382315e-05, "loss": 0.2547, "step": 614000 }, { "epoch": 43.96, "learning_rate": 2.801902990413507e-05, "loss": 0.2552, "step": 614500 }, { "epoch": 44.0, "learning_rate": 2.8001144655887822e-05, "loss": 0.2602, "step": 615000 }, { "epoch": 44.03, "learning_rate": 2.7983259407640577e-05, "loss": 0.2329, "step": 615500 }, { "epoch": 44.07, "learning_rate": 2.7965374159393332e-05, "loss": 0.2327, "step": 616000 }, { "epoch": 44.11, "learning_rate": 2.7947488911146084e-05, "loss": 0.2426, "step": 616500 }, { "epoch": 44.14, "learning_rate": 2.792960366289884e-05, "loss": 0.2345, "step": 617000 }, { "epoch": 44.18, "learning_rate": 2.7911718414651595e-05, "loss": 0.2345, "step": 617500 }, { "epoch": 44.21, "learning_rate": 2.789383316640435e-05, "loss": 0.2434, "step": 618000 }, { "epoch": 44.25, "learning_rate": 2.78759479181571e-05, "loss": 0.242, "step": 618500 }, { "epoch": 44.28, "learning_rate": 2.7858062669909863e-05, "loss": 0.2386, "step": 619000 }, { "epoch": 44.32, "learning_rate": 2.7840177421662615e-05, "loss": 0.2414, "step": 619500 }, { "epoch": 44.36, "learning_rate": 2.782229217341537e-05, "loss": 0.2461, "step": 620000 }, { "epoch": 44.39, "learning_rate": 2.7804406925168126e-05, "loss": 0.244, "step": 620500 }, { "epoch": 44.43, "learning_rate": 2.778652167692088e-05, "loss": 0.2448, "step": 621000 }, { "epoch": 44.46, "learning_rate": 2.7768636428673633e-05, "loss": 0.2493, "step": 621500 }, { "epoch": 44.5, "learning_rate": 2.7750751180426388e-05, "loss": 0.2479, "step": 622000 }, { "epoch": 44.53, "learning_rate": 2.7732865932179143e-05, "loss": 0.2464, "step": 622500 }, { "epoch": 44.57, "learning_rate": 2.7714980683931895e-05, "loss": 0.247, "step": 623000 }, { "epoch": 44.61, "learning_rate": 2.769709543568465e-05, "loss": 0.2493, "step": 623500 }, { "epoch": 44.64, "learning_rate": 2.7679210187437405e-05, "loss": 0.2492, "step": 624000 }, { "epoch": 44.68, "learning_rate": 2.7661324939190157e-05, "loss": 0.2452, "step": 624500 }, { "epoch": 44.71, "learning_rate": 2.7643439690942912e-05, "loss": 0.2474, "step": 625000 }, { "epoch": 44.75, "learning_rate": 2.7625554442695667e-05, "loss": 0.2558, "step": 625500 }, { "epoch": 44.78, "learning_rate": 2.7607669194448422e-05, "loss": 0.2558, "step": 626000 }, { "epoch": 44.82, "learning_rate": 2.7589783946201174e-05, "loss": 0.2494, "step": 626500 }, { "epoch": 44.86, "learning_rate": 2.757189869795393e-05, "loss": 0.2515, "step": 627000 }, { "epoch": 44.89, "learning_rate": 2.7554013449706685e-05, "loss": 0.2508, "step": 627500 }, { "epoch": 44.93, "learning_rate": 2.7536128201459436e-05, "loss": 0.2533, "step": 628000 }, { "epoch": 44.96, "learning_rate": 2.751824295321219e-05, "loss": 0.2569, "step": 628500 }, { "epoch": 45.0, "learning_rate": 2.7500357704964947e-05, "loss": 0.2562, "step": 629000 }, { "epoch": 45.04, "learning_rate": 2.7482472456717702e-05, "loss": 0.2325, "step": 629500 }, { "epoch": 45.07, "learning_rate": 2.7464587208470454e-05, "loss": 0.2268, "step": 630000 }, { "epoch": 45.11, "learning_rate": 2.744670196022321e-05, "loss": 0.2312, "step": 630500 }, { "epoch": 45.14, "learning_rate": 2.7428816711975964e-05, "loss": 0.2372, "step": 631000 }, { "epoch": 45.18, "learning_rate": 2.7410931463728716e-05, "loss": 0.2392, "step": 631500 }, { "epoch": 45.21, "learning_rate": 2.739304621548147e-05, "loss": 0.2398, "step": 632000 }, { "epoch": 45.25, "learning_rate": 2.7375160967234226e-05, "loss": 0.239, "step": 632500 }, { "epoch": 45.29, "learning_rate": 2.735727571898698e-05, "loss": 0.2435, "step": 633000 }, { "epoch": 45.32, "learning_rate": 2.7339390470739733e-05, "loss": 0.2401, "step": 633500 }, { "epoch": 45.36, "learning_rate": 2.732150522249249e-05, "loss": 0.2465, "step": 634000 }, { "epoch": 45.39, "learning_rate": 2.7303619974245244e-05, "loss": 0.2471, "step": 634500 }, { "epoch": 45.43, "learning_rate": 2.7285734725997995e-05, "loss": 0.2449, "step": 635000 }, { "epoch": 45.46, "learning_rate": 2.726784947775075e-05, "loss": 0.2466, "step": 635500 }, { "epoch": 45.5, "learning_rate": 2.7249964229503506e-05, "loss": 0.2436, "step": 636000 }, { "epoch": 45.54, "learning_rate": 2.723207898125626e-05, "loss": 0.2419, "step": 636500 }, { "epoch": 45.57, "learning_rate": 2.7214193733009013e-05, "loss": 0.2493, "step": 637000 }, { "epoch": 45.61, "learning_rate": 2.7196308484761768e-05, "loss": 0.251, "step": 637500 }, { "epoch": 45.64, "learning_rate": 2.7178423236514523e-05, "loss": 0.2458, "step": 638000 }, { "epoch": 45.68, "learning_rate": 2.7160537988267275e-05, "loss": 0.2484, "step": 638500 }, { "epoch": 45.71, "learning_rate": 2.714265274002003e-05, "loss": 0.2491, "step": 639000 }, { "epoch": 45.75, "learning_rate": 2.7124767491772785e-05, "loss": 0.2513, "step": 639500 }, { "epoch": 45.79, "learning_rate": 2.710688224352554e-05, "loss": 0.2521, "step": 640000 }, { "epoch": 45.82, "learning_rate": 2.70889969952783e-05, "loss": 0.2504, "step": 640500 }, { "epoch": 45.86, "learning_rate": 2.7071111747031054e-05, "loss": 0.2476, "step": 641000 }, { "epoch": 45.89, "learning_rate": 2.7053226498783806e-05, "loss": 0.2465, "step": 641500 }, { "epoch": 45.93, "learning_rate": 2.703534125053656e-05, "loss": 0.2506, "step": 642000 }, { "epoch": 45.97, "learning_rate": 2.7017456002289316e-05, "loss": 0.2446, "step": 642500 }, { "epoch": 46.0, "learning_rate": 2.6999570754042068e-05, "loss": 0.2535, "step": 643000 }, { "epoch": 46.04, "learning_rate": 2.6981685505794823e-05, "loss": 0.2261, "step": 643500 }, { "epoch": 46.07, "learning_rate": 2.696380025754758e-05, "loss": 0.2312, "step": 644000 }, { "epoch": 46.11, "learning_rate": 2.6945915009300334e-05, "loss": 0.2356, "step": 644500 }, { "epoch": 46.14, "learning_rate": 2.6928029761053085e-05, "loss": 0.2361, "step": 645000 }, { "epoch": 46.18, "learning_rate": 2.691014451280584e-05, "loss": 0.2377, "step": 645500 }, { "epoch": 46.22, "learning_rate": 2.6892259264558596e-05, "loss": 0.235, "step": 646000 }, { "epoch": 46.25, "learning_rate": 2.6874374016311348e-05, "loss": 0.2389, "step": 646500 }, { "epoch": 46.29, "learning_rate": 2.6856488768064103e-05, "loss": 0.2435, "step": 647000 }, { "epoch": 46.32, "learning_rate": 2.6838603519816858e-05, "loss": 0.2377, "step": 647500 }, { "epoch": 46.36, "learning_rate": 2.6820718271569613e-05, "loss": 0.2389, "step": 648000 }, { "epoch": 46.39, "learning_rate": 2.6802833023322365e-05, "loss": 0.2455, "step": 648500 }, { "epoch": 46.43, "learning_rate": 2.678494777507512e-05, "loss": 0.243, "step": 649000 }, { "epoch": 46.47, "learning_rate": 2.6767062526827875e-05, "loss": 0.2384, "step": 649500 }, { "epoch": 46.5, "learning_rate": 2.6749177278580627e-05, "loss": 0.2427, "step": 650000 }, { "epoch": 46.54, "learning_rate": 2.6731292030333382e-05, "loss": 0.2447, "step": 650500 }, { "epoch": 46.57, "learning_rate": 2.6713406782086137e-05, "loss": 0.2443, "step": 651000 }, { "epoch": 46.61, "learning_rate": 2.669552153383889e-05, "loss": 0.2427, "step": 651500 }, { "epoch": 46.64, "learning_rate": 2.6677636285591644e-05, "loss": 0.25, "step": 652000 }, { "epoch": 46.68, "learning_rate": 2.66597510373444e-05, "loss": 0.2472, "step": 652500 }, { "epoch": 46.72, "learning_rate": 2.6641865789097155e-05, "loss": 0.2537, "step": 653000 }, { "epoch": 46.75, "learning_rate": 2.6623980540849907e-05, "loss": 0.2502, "step": 653500 }, { "epoch": 46.79, "learning_rate": 2.6606095292602662e-05, "loss": 0.2447, "step": 654000 }, { "epoch": 46.82, "learning_rate": 2.6588210044355417e-05, "loss": 0.244, "step": 654500 }, { "epoch": 46.86, "learning_rate": 2.657032479610817e-05, "loss": 0.2479, "step": 655000 }, { "epoch": 46.9, "learning_rate": 2.6552439547860924e-05, "loss": 0.2476, "step": 655500 }, { "epoch": 46.93, "learning_rate": 2.653455429961368e-05, "loss": 0.2506, "step": 656000 }, { "epoch": 46.97, "learning_rate": 2.6516669051366434e-05, "loss": 0.2504, "step": 656500 }, { "epoch": 47.0, "learning_rate": 2.6498783803119186e-05, "loss": 0.2449, "step": 657000 }, { "epoch": 47.04, "learning_rate": 2.648089855487194e-05, "loss": 0.2255, "step": 657500 }, { "epoch": 47.07, "learning_rate": 2.6463013306624696e-05, "loss": 0.2274, "step": 658000 }, { "epoch": 47.11, "learning_rate": 2.6445128058377448e-05, "loss": 0.2333, "step": 658500 }, { "epoch": 47.15, "learning_rate": 2.6427242810130203e-05, "loss": 0.2355, "step": 659000 }, { "epoch": 47.18, "learning_rate": 2.640935756188296e-05, "loss": 0.238, "step": 659500 }, { "epoch": 47.22, "learning_rate": 2.6391472313635714e-05, "loss": 0.2366, "step": 660000 }, { "epoch": 47.25, "learning_rate": 2.6373587065388466e-05, "loss": 0.2376, "step": 660500 }, { "epoch": 47.29, "learning_rate": 2.635570181714122e-05, "loss": 0.236, "step": 661000 }, { "epoch": 47.32, "learning_rate": 2.6337816568893976e-05, "loss": 0.2394, "step": 661500 }, { "epoch": 47.36, "learning_rate": 2.6319931320646728e-05, "loss": 0.2385, "step": 662000 }, { "epoch": 47.4, "learning_rate": 2.630204607239949e-05, "loss": 0.2375, "step": 662500 }, { "epoch": 47.43, "learning_rate": 2.628416082415224e-05, "loss": 0.2388, "step": 663000 }, { "epoch": 47.47, "learning_rate": 2.6266275575904997e-05, "loss": 0.238, "step": 663500 }, { "epoch": 47.5, "learning_rate": 2.6248390327657752e-05, "loss": 0.2389, "step": 664000 }, { "epoch": 47.54, "learning_rate": 2.6230505079410507e-05, "loss": 0.2468, "step": 664500 }, { "epoch": 47.57, "learning_rate": 2.621261983116326e-05, "loss": 0.2439, "step": 665000 }, { "epoch": 47.61, "learning_rate": 2.6194734582916014e-05, "loss": 0.2458, "step": 665500 }, { "epoch": 47.65, "learning_rate": 2.617684933466877e-05, "loss": 0.2526, "step": 666000 }, { "epoch": 47.68, "learning_rate": 2.615896408642152e-05, "loss": 0.2443, "step": 666500 }, { "epoch": 47.72, "learning_rate": 2.6141078838174276e-05, "loss": 0.2496, "step": 667000 }, { "epoch": 47.75, "learning_rate": 2.612319358992703e-05, "loss": 0.2489, "step": 667500 }, { "epoch": 47.79, "learning_rate": 2.6105308341679787e-05, "loss": 0.2431, "step": 668000 }, { "epoch": 47.83, "learning_rate": 2.608742309343254e-05, "loss": 0.2453, "step": 668500 }, { "epoch": 47.86, "learning_rate": 2.6069537845185294e-05, "loss": 0.2484, "step": 669000 }, { "epoch": 47.9, "learning_rate": 2.605165259693805e-05, "loss": 0.2515, "step": 669500 }, { "epoch": 47.93, "learning_rate": 2.60337673486908e-05, "loss": 0.2473, "step": 670000 }, { "epoch": 47.97, "learning_rate": 2.6015882100443556e-05, "loss": 0.2507, "step": 670500 }, { "epoch": 48.0, "learning_rate": 2.599799685219631e-05, "loss": 0.2447, "step": 671000 }, { "epoch": 48.04, "learning_rate": 2.5980111603949066e-05, "loss": 0.2267, "step": 671500 }, { "epoch": 48.08, "learning_rate": 2.5962226355701818e-05, "loss": 0.2293, "step": 672000 }, { "epoch": 48.11, "learning_rate": 2.5944341107454573e-05, "loss": 0.2335, "step": 672500 }, { "epoch": 48.15, "learning_rate": 2.5926455859207328e-05, "loss": 0.2307, "step": 673000 }, { "epoch": 48.18, "learning_rate": 2.590857061096008e-05, "loss": 0.2302, "step": 673500 }, { "epoch": 48.22, "learning_rate": 2.5890685362712835e-05, "loss": 0.236, "step": 674000 }, { "epoch": 48.25, "learning_rate": 2.587280011446559e-05, "loss": 0.2328, "step": 674500 }, { "epoch": 48.29, "learning_rate": 2.5854914866218342e-05, "loss": 0.2393, "step": 675000 }, { "epoch": 48.33, "learning_rate": 2.5837029617971097e-05, "loss": 0.2397, "step": 675500 }, { "epoch": 48.36, "learning_rate": 2.5819144369723853e-05, "loss": 0.2359, "step": 676000 }, { "epoch": 48.4, "learning_rate": 2.5801259121476608e-05, "loss": 0.2399, "step": 676500 }, { "epoch": 48.43, "learning_rate": 2.578337387322936e-05, "loss": 0.2389, "step": 677000 }, { "epoch": 48.47, "learning_rate": 2.5765488624982115e-05, "loss": 0.2423, "step": 677500 }, { "epoch": 48.5, "learning_rate": 2.574760337673487e-05, "loss": 0.2386, "step": 678000 }, { "epoch": 48.54, "learning_rate": 2.572971812848762e-05, "loss": 0.2459, "step": 678500 }, { "epoch": 48.58, "learning_rate": 2.5711832880240377e-05, "loss": 0.2391, "step": 679000 }, { "epoch": 48.61, "learning_rate": 2.5693947631993132e-05, "loss": 0.2481, "step": 679500 }, { "epoch": 48.65, "learning_rate": 2.5676062383745887e-05, "loss": 0.2418, "step": 680000 }, { "epoch": 48.68, "learning_rate": 2.565817713549864e-05, "loss": 0.2395, "step": 680500 }, { "epoch": 48.72, "learning_rate": 2.5640291887251394e-05, "loss": 0.2446, "step": 681000 }, { "epoch": 48.76, "learning_rate": 2.562240663900415e-05, "loss": 0.2462, "step": 681500 }, { "epoch": 48.79, "learning_rate": 2.56045213907569e-05, "loss": 0.244, "step": 682000 }, { "epoch": 48.83, "learning_rate": 2.5586636142509656e-05, "loss": 0.2481, "step": 682500 }, { "epoch": 48.86, "learning_rate": 2.556875089426241e-05, "loss": 0.2437, "step": 683000 }, { "epoch": 48.9, "learning_rate": 2.5550865646015167e-05, "loss": 0.2446, "step": 683500 }, { "epoch": 48.93, "learning_rate": 2.5532980397767925e-05, "loss": 0.2463, "step": 684000 }, { "epoch": 48.97, "learning_rate": 2.551509514952068e-05, "loss": 0.2451, "step": 684500 }, { "epoch": 49.01, "learning_rate": 2.5497209901273432e-05, "loss": 0.2429, "step": 685000 }, { "epoch": 49.04, "learning_rate": 2.5479324653026187e-05, "loss": 0.2262, "step": 685500 }, { "epoch": 49.08, "learning_rate": 2.5461439404778943e-05, "loss": 0.2276, "step": 686000 }, { "epoch": 49.11, "learning_rate": 2.5443554156531694e-05, "loss": 0.2299, "step": 686500 }, { "epoch": 49.15, "learning_rate": 2.542566890828445e-05, "loss": 0.2301, "step": 687000 }, { "epoch": 49.18, "learning_rate": 2.5407783660037205e-05, "loss": 0.2305, "step": 687500 }, { "epoch": 49.22, "learning_rate": 2.538989841178996e-05, "loss": 0.2322, "step": 688000 }, { "epoch": 49.26, "learning_rate": 2.5372013163542712e-05, "loss": 0.2372, "step": 688500 }, { "epoch": 49.29, "learning_rate": 2.5354127915295467e-05, "loss": 0.23, "step": 689000 }, { "epoch": 49.33, "learning_rate": 2.5336242667048222e-05, "loss": 0.2373, "step": 689500 }, { "epoch": 49.36, "learning_rate": 2.5318357418800974e-05, "loss": 0.2361, "step": 690000 }, { "epoch": 49.4, "learning_rate": 2.530047217055373e-05, "loss": 0.239, "step": 690500 }, { "epoch": 49.43, "learning_rate": 2.5282586922306484e-05, "loss": 0.239, "step": 691000 }, { "epoch": 49.47, "learning_rate": 2.526470167405924e-05, "loss": 0.2447, "step": 691500 }, { "epoch": 49.51, "learning_rate": 2.524681642581199e-05, "loss": 0.2379, "step": 692000 }, { "epoch": 49.54, "learning_rate": 2.5228931177564746e-05, "loss": 0.2372, "step": 692500 }, { "epoch": 49.58, "learning_rate": 2.52110459293175e-05, "loss": 0.2403, "step": 693000 }, { "epoch": 49.61, "learning_rate": 2.5193160681070253e-05, "loss": 0.2406, "step": 693500 }, { "epoch": 49.65, "learning_rate": 2.517527543282301e-05, "loss": 0.2407, "step": 694000 }, { "epoch": 49.69, "learning_rate": 2.5157390184575764e-05, "loss": 0.2424, "step": 694500 }, { "epoch": 49.72, "learning_rate": 2.513950493632852e-05, "loss": 0.2448, "step": 695000 }, { "epoch": 49.76, "learning_rate": 2.512161968808127e-05, "loss": 0.2398, "step": 695500 }, { "epoch": 49.79, "learning_rate": 2.5103734439834026e-05, "loss": 0.2426, "step": 696000 }, { "epoch": 49.83, "learning_rate": 2.508584919158678e-05, "loss": 0.2468, "step": 696500 }, { "epoch": 49.86, "learning_rate": 2.5067963943339533e-05, "loss": 0.2444, "step": 697000 }, { "epoch": 49.9, "learning_rate": 2.5050078695092288e-05, "loss": 0.2428, "step": 697500 }, { "epoch": 49.94, "learning_rate": 2.5032193446845043e-05, "loss": 0.2418, "step": 698000 }, { "epoch": 49.97, "learning_rate": 2.50143081985978e-05, "loss": 0.2439, "step": 698500 }, { "epoch": 50.01, "learning_rate": 2.499642295035055e-05, "loss": 0.2434, "step": 699000 }, { "epoch": 50.04, "learning_rate": 2.4978537702103305e-05, "loss": 0.2212, "step": 699500 }, { "epoch": 50.08, "learning_rate": 2.496065245385606e-05, "loss": 0.2255, "step": 700000 }, { "epoch": 50.11, "learning_rate": 2.4942767205608816e-05, "loss": 0.228, "step": 700500 }, { "epoch": 50.15, "learning_rate": 2.492488195736157e-05, "loss": 0.2311, "step": 701000 }, { "epoch": 50.19, "learning_rate": 2.4906996709114326e-05, "loss": 0.2328, "step": 701500 }, { "epoch": 50.22, "learning_rate": 2.4889111460867078e-05, "loss": 0.2318, "step": 702000 }, { "epoch": 50.26, "learning_rate": 2.4871226212619833e-05, "loss": 0.2308, "step": 702500 }, { "epoch": 50.29, "learning_rate": 2.4853340964372588e-05, "loss": 0.2352, "step": 703000 }, { "epoch": 50.33, "learning_rate": 2.483545571612534e-05, "loss": 0.2306, "step": 703500 }, { "epoch": 50.36, "learning_rate": 2.4817570467878095e-05, "loss": 0.2304, "step": 704000 }, { "epoch": 50.4, "learning_rate": 2.479968521963085e-05, "loss": 0.2363, "step": 704500 }, { "epoch": 50.44, "learning_rate": 2.4781799971383606e-05, "loss": 0.2379, "step": 705000 }, { "epoch": 50.47, "learning_rate": 2.4763914723136357e-05, "loss": 0.2329, "step": 705500 }, { "epoch": 50.51, "learning_rate": 2.4746029474889113e-05, "loss": 0.2426, "step": 706000 }, { "epoch": 50.54, "learning_rate": 2.4728144226641868e-05, "loss": 0.2365, "step": 706500 }, { "epoch": 50.58, "learning_rate": 2.471025897839462e-05, "loss": 0.2417, "step": 707000 }, { "epoch": 50.62, "learning_rate": 2.4692373730147375e-05, "loss": 0.2406, "step": 707500 }, { "epoch": 50.65, "learning_rate": 2.467448848190013e-05, "loss": 0.2399, "step": 708000 }, { "epoch": 50.69, "learning_rate": 2.4656603233652885e-05, "loss": 0.2408, "step": 708500 }, { "epoch": 50.72, "learning_rate": 2.4638717985405637e-05, "loss": 0.2423, "step": 709000 }, { "epoch": 50.76, "learning_rate": 2.4620832737158392e-05, "loss": 0.2411, "step": 709500 }, { "epoch": 50.79, "learning_rate": 2.4602947488911147e-05, "loss": 0.241, "step": 710000 }, { "epoch": 50.83, "learning_rate": 2.45850622406639e-05, "loss": 0.2471, "step": 710500 }, { "epoch": 50.87, "learning_rate": 2.4567176992416658e-05, "loss": 0.2425, "step": 711000 }, { "epoch": 50.9, "learning_rate": 2.4549291744169413e-05, "loss": 0.243, "step": 711500 }, { "epoch": 50.94, "learning_rate": 2.4531406495922165e-05, "loss": 0.2454, "step": 712000 }, { "epoch": 50.97, "learning_rate": 2.451352124767492e-05, "loss": 0.2426, "step": 712500 }, { "epoch": 51.01, "learning_rate": 2.4495635999427675e-05, "loss": 0.2365, "step": 713000 }, { "epoch": 51.04, "learning_rate": 2.4477750751180427e-05, "loss": 0.2231, "step": 713500 }, { "epoch": 51.08, "learning_rate": 2.4459865502933182e-05, "loss": 0.2296, "step": 714000 }, { "epoch": 51.12, "learning_rate": 2.4441980254685937e-05, "loss": 0.2256, "step": 714500 }, { "epoch": 51.15, "learning_rate": 2.4424095006438692e-05, "loss": 0.2299, "step": 715000 }, { "epoch": 51.19, "learning_rate": 2.4406209758191444e-05, "loss": 0.2304, "step": 715500 }, { "epoch": 51.22, "learning_rate": 2.43883245099442e-05, "loss": 0.2285, "step": 716000 }, { "epoch": 51.26, "learning_rate": 2.4370439261696954e-05, "loss": 0.2318, "step": 716500 }, { "epoch": 51.29, "learning_rate": 2.4352554013449706e-05, "loss": 0.2296, "step": 717000 }, { "epoch": 51.33, "learning_rate": 2.433466876520246e-05, "loss": 0.235, "step": 717500 }, { "epoch": 51.37, "learning_rate": 2.4316783516955217e-05, "loss": 0.2347, "step": 718000 }, { "epoch": 51.4, "learning_rate": 2.4298898268707972e-05, "loss": 0.2316, "step": 718500 }, { "epoch": 51.44, "learning_rate": 2.4281013020460724e-05, "loss": 0.236, "step": 719000 }, { "epoch": 51.47, "learning_rate": 2.426312777221348e-05, "loss": 0.2393, "step": 719500 }, { "epoch": 51.51, "learning_rate": 2.4245242523966234e-05, "loss": 0.2388, "step": 720000 }, { "epoch": 51.55, "learning_rate": 2.4227357275718986e-05, "loss": 0.2338, "step": 720500 }, { "epoch": 51.58, "learning_rate": 2.420947202747174e-05, "loss": 0.2367, "step": 721000 }, { "epoch": 51.62, "learning_rate": 2.4191586779224496e-05, "loss": 0.2388, "step": 721500 }, { "epoch": 51.65, "learning_rate": 2.417370153097725e-05, "loss": 0.2295, "step": 722000 }, { "epoch": 51.69, "learning_rate": 2.4155816282730006e-05, "loss": 0.2405, "step": 722500 }, { "epoch": 51.72, "learning_rate": 2.413793103448276e-05, "loss": 0.2402, "step": 723000 }, { "epoch": 51.76, "learning_rate": 2.4120045786235513e-05, "loss": 0.2407, "step": 723500 }, { "epoch": 51.8, "learning_rate": 2.410216053798827e-05, "loss": 0.2367, "step": 724000 }, { "epoch": 51.83, "learning_rate": 2.4084275289741024e-05, "loss": 0.2437, "step": 724500 }, { "epoch": 51.87, "learning_rate": 2.406639004149378e-05, "loss": 0.2356, "step": 725000 }, { "epoch": 51.9, "learning_rate": 2.404850479324653e-05, "loss": 0.2409, "step": 725500 }, { "epoch": 51.94, "learning_rate": 2.4030619544999286e-05, "loss": 0.2458, "step": 726000 }, { "epoch": 51.97, "learning_rate": 2.401273429675204e-05, "loss": 0.2438, "step": 726500 }, { "epoch": 52.01, "learning_rate": 2.3994849048504793e-05, "loss": 0.2385, "step": 727000 }, { "epoch": 52.05, "learning_rate": 2.3976963800257548e-05, "loss": 0.2212, "step": 727500 }, { "epoch": 52.08, "learning_rate": 2.3959078552010303e-05, "loss": 0.2228, "step": 728000 }, { "epoch": 52.12, "learning_rate": 2.394119330376306e-05, "loss": 0.2285, "step": 728500 }, { "epoch": 52.15, "learning_rate": 2.392330805551581e-05, "loss": 0.2295, "step": 729000 }, { "epoch": 52.19, "learning_rate": 2.3905422807268565e-05, "loss": 0.2284, "step": 729500 }, { "epoch": 52.22, "learning_rate": 2.388753755902132e-05, "loss": 0.2295, "step": 730000 }, { "epoch": 52.26, "learning_rate": 2.3869652310774072e-05, "loss": 0.237, "step": 730500 }, { "epoch": 52.3, "learning_rate": 2.3851767062526828e-05, "loss": 0.2283, "step": 731000 }, { "epoch": 52.33, "learning_rate": 2.3833881814279583e-05, "loss": 0.2308, "step": 731500 }, { "epoch": 52.37, "learning_rate": 2.3815996566032338e-05, "loss": 0.2321, "step": 732000 }, { "epoch": 52.4, "learning_rate": 2.379811131778509e-05, "loss": 0.2337, "step": 732500 }, { "epoch": 52.44, "learning_rate": 2.378022606953785e-05, "loss": 0.2367, "step": 733000 }, { "epoch": 52.48, "learning_rate": 2.37623408212906e-05, "loss": 0.2342, "step": 733500 }, { "epoch": 52.51, "learning_rate": 2.3744455573043355e-05, "loss": 0.2352, "step": 734000 }, { "epoch": 52.55, "learning_rate": 2.372657032479611e-05, "loss": 0.2311, "step": 734500 }, { "epoch": 52.58, "learning_rate": 2.3708685076548866e-05, "loss": 0.2338, "step": 735000 }, { "epoch": 52.62, "learning_rate": 2.3690799828301617e-05, "loss": 0.2335, "step": 735500 }, { "epoch": 52.65, "learning_rate": 2.3672914580054373e-05, "loss": 0.2377, "step": 736000 }, { "epoch": 52.69, "learning_rate": 2.3655029331807128e-05, "loss": 0.237, "step": 736500 }, { "epoch": 52.73, "learning_rate": 2.363714408355988e-05, "loss": 0.2356, "step": 737000 }, { "epoch": 52.76, "learning_rate": 2.3619258835312635e-05, "loss": 0.2399, "step": 737500 }, { "epoch": 52.8, "learning_rate": 2.360137358706539e-05, "loss": 0.2417, "step": 738000 }, { "epoch": 52.83, "learning_rate": 2.3583488338818145e-05, "loss": 0.2382, "step": 738500 }, { "epoch": 52.87, "learning_rate": 2.3565603090570897e-05, "loss": 0.2356, "step": 739000 }, { "epoch": 52.9, "learning_rate": 2.3547717842323652e-05, "loss": 0.243, "step": 739500 }, { "epoch": 52.94, "learning_rate": 2.3529832594076407e-05, "loss": 0.2427, "step": 740000 }, { "epoch": 52.98, "learning_rate": 2.351194734582916e-05, "loss": 0.2347, "step": 740500 }, { "epoch": 53.01, "learning_rate": 2.3494062097581914e-05, "loss": 0.2362, "step": 741000 }, { "epoch": 53.05, "learning_rate": 2.347617684933467e-05, "loss": 0.2206, "step": 741500 }, { "epoch": 53.08, "learning_rate": 2.3458291601087425e-05, "loss": 0.2206, "step": 742000 }, { "epoch": 53.12, "learning_rate": 2.3440406352840176e-05, "loss": 0.2229, "step": 742500 }, { "epoch": 53.15, "learning_rate": 2.342252110459293e-05, "loss": 0.2269, "step": 743000 }, { "epoch": 53.19, "learning_rate": 2.340463585634569e-05, "loss": 0.2252, "step": 743500 }, { "epoch": 53.23, "learning_rate": 2.3386750608098442e-05, "loss": 0.2302, "step": 744000 }, { "epoch": 53.26, "learning_rate": 2.3368865359851197e-05, "loss": 0.2293, "step": 744500 }, { "epoch": 53.3, "learning_rate": 2.3350980111603952e-05, "loss": 0.2288, "step": 745000 }, { "epoch": 53.33, "learning_rate": 2.3333094863356704e-05, "loss": 0.2309, "step": 745500 }, { "epoch": 53.37, "learning_rate": 2.331520961510946e-05, "loss": 0.2347, "step": 746000 }, { "epoch": 53.41, "learning_rate": 2.3297324366862215e-05, "loss": 0.2333, "step": 746500 }, { "epoch": 53.44, "learning_rate": 2.3279439118614966e-05, "loss": 0.2318, "step": 747000 }, { "epoch": 53.48, "learning_rate": 2.326155387036772e-05, "loss": 0.2356, "step": 747500 }, { "epoch": 53.51, "learning_rate": 2.3243668622120477e-05, "loss": 0.233, "step": 748000 }, { "epoch": 53.55, "learning_rate": 2.3225783373873232e-05, "loss": 0.2354, "step": 748500 }, { "epoch": 53.58, "learning_rate": 2.3207898125625984e-05, "loss": 0.2326, "step": 749000 }, { "epoch": 53.62, "learning_rate": 2.319001287737874e-05, "loss": 0.2323, "step": 749500 }, { "epoch": 53.66, "learning_rate": 2.3172127629131494e-05, "loss": 0.2354, "step": 750000 }, { "epoch": 53.69, "learning_rate": 2.3154242380884246e-05, "loss": 0.2332, "step": 750500 }, { "epoch": 53.73, "learning_rate": 2.3136357132637e-05, "loss": 0.2374, "step": 751000 }, { "epoch": 53.76, "learning_rate": 2.3118471884389756e-05, "loss": 0.2415, "step": 751500 }, { "epoch": 53.8, "learning_rate": 2.310058663614251e-05, "loss": 0.2372, "step": 752000 }, { "epoch": 53.83, "learning_rate": 2.3082701387895263e-05, "loss": 0.2362, "step": 752500 }, { "epoch": 53.87, "learning_rate": 2.306481613964802e-05, "loss": 0.2349, "step": 753000 }, { "epoch": 53.91, "learning_rate": 2.3046930891400774e-05, "loss": 0.2444, "step": 753500 }, { "epoch": 53.94, "learning_rate": 2.3029045643153525e-05, "loss": 0.2417, "step": 754000 }, { "epoch": 53.98, "learning_rate": 2.3011160394906284e-05, "loss": 0.2428, "step": 754500 }, { "epoch": 54.01, "learning_rate": 2.299327514665904e-05, "loss": 0.2304, "step": 755000 }, { "epoch": 54.05, "learning_rate": 2.297538989841179e-05, "loss": 0.2201, "step": 755500 }, { "epoch": 54.08, "learning_rate": 2.2957504650164546e-05, "loss": 0.2235, "step": 756000 }, { "epoch": 54.12, "learning_rate": 2.29396194019173e-05, "loss": 0.2288, "step": 756500 }, { "epoch": 54.16, "learning_rate": 2.2921734153670056e-05, "loss": 0.2296, "step": 757000 }, { "epoch": 54.19, "learning_rate": 2.2903848905422808e-05, "loss": 0.2258, "step": 757500 }, { "epoch": 54.23, "learning_rate": 2.2885963657175563e-05, "loss": 0.2265, "step": 758000 }, { "epoch": 54.26, "learning_rate": 2.286807840892832e-05, "loss": 0.2308, "step": 758500 }, { "epoch": 54.3, "learning_rate": 2.285019316068107e-05, "loss": 0.2284, "step": 759000 }, { "epoch": 54.34, "learning_rate": 2.2832307912433826e-05, "loss": 0.2238, "step": 759500 }, { "epoch": 54.37, "learning_rate": 2.281442266418658e-05, "loss": 0.2287, "step": 760000 }, { "epoch": 54.41, "learning_rate": 2.2796537415939333e-05, "loss": 0.2337, "step": 760500 }, { "epoch": 54.44, "learning_rate": 2.2778652167692088e-05, "loss": 0.2303, "step": 761000 }, { "epoch": 54.48, "learning_rate": 2.2760766919444843e-05, "loss": 0.228, "step": 761500 }, { "epoch": 54.51, "learning_rate": 2.2742881671197598e-05, "loss": 0.2354, "step": 762000 }, { "epoch": 54.55, "learning_rate": 2.272499642295035e-05, "loss": 0.2308, "step": 762500 }, { "epoch": 54.59, "learning_rate": 2.2707111174703105e-05, "loss": 0.2365, "step": 763000 }, { "epoch": 54.62, "learning_rate": 2.268922592645586e-05, "loss": 0.2382, "step": 763500 }, { "epoch": 54.66, "learning_rate": 2.2671340678208612e-05, "loss": 0.2351, "step": 764000 }, { "epoch": 54.69, "learning_rate": 2.2653455429961367e-05, "loss": 0.2302, "step": 764500 }, { "epoch": 54.73, "learning_rate": 2.2635570181714122e-05, "loss": 0.2303, "step": 765000 }, { "epoch": 54.76, "learning_rate": 2.2617684933466878e-05, "loss": 0.2356, "step": 765500 }, { "epoch": 54.8, "learning_rate": 2.2599799685219633e-05, "loss": 0.2352, "step": 766000 }, { "epoch": 54.84, "learning_rate": 2.2581914436972388e-05, "loss": 0.2344, "step": 766500 }, { "epoch": 54.87, "learning_rate": 2.2564029188725143e-05, "loss": 0.2404, "step": 767000 }, { "epoch": 54.91, "learning_rate": 2.2546143940477895e-05, "loss": 0.2363, "step": 767500 }, { "epoch": 54.94, "learning_rate": 2.252825869223065e-05, "loss": 0.239, "step": 768000 }, { "epoch": 54.98, "learning_rate": 2.2510373443983405e-05, "loss": 0.2356, "step": 768500 }, { "epoch": 55.02, "learning_rate": 2.2492488195736157e-05, "loss": 0.2273, "step": 769000 }, { "epoch": 55.05, "learning_rate": 2.2474602947488912e-05, "loss": 0.2172, "step": 769500 }, { "epoch": 55.09, "learning_rate": 2.2456717699241667e-05, "loss": 0.2258, "step": 770000 }, { "epoch": 55.12, "learning_rate": 2.243883245099442e-05, "loss": 0.222, "step": 770500 }, { "epoch": 55.16, "learning_rate": 2.2420947202747174e-05, "loss": 0.2234, "step": 771000 }, { "epoch": 55.19, "learning_rate": 2.240306195449993e-05, "loss": 0.2272, "step": 771500 }, { "epoch": 55.23, "learning_rate": 2.2385176706252685e-05, "loss": 0.2247, "step": 772000 }, { "epoch": 55.27, "learning_rate": 2.2367291458005437e-05, "loss": 0.226, "step": 772500 }, { "epoch": 55.3, "learning_rate": 2.2349406209758192e-05, "loss": 0.2293, "step": 773000 }, { "epoch": 55.34, "learning_rate": 2.2331520961510947e-05, "loss": 0.2277, "step": 773500 }, { "epoch": 55.37, "learning_rate": 2.23136357132637e-05, "loss": 0.2258, "step": 774000 }, { "epoch": 55.41, "learning_rate": 2.2295750465016454e-05, "loss": 0.2301, "step": 774500 }, { "epoch": 55.44, "learning_rate": 2.227786521676921e-05, "loss": 0.2339, "step": 775000 }, { "epoch": 55.48, "learning_rate": 2.2259979968521964e-05, "loss": 0.2279, "step": 775500 }, { "epoch": 55.52, "learning_rate": 2.224209472027472e-05, "loss": 0.2362, "step": 776000 }, { "epoch": 55.55, "learning_rate": 2.2224209472027475e-05, "loss": 0.2362, "step": 776500 }, { "epoch": 55.59, "learning_rate": 2.220632422378023e-05, "loss": 0.2249, "step": 777000 }, { "epoch": 55.62, "learning_rate": 2.218843897553298e-05, "loss": 0.238, "step": 777500 }, { "epoch": 55.66, "learning_rate": 2.2170553727285737e-05, "loss": 0.2312, "step": 778000 }, { "epoch": 55.69, "learning_rate": 2.2152668479038492e-05, "loss": 0.2401, "step": 778500 }, { "epoch": 55.73, "learning_rate": 2.2134783230791244e-05, "loss": 0.232, "step": 779000 }, { "epoch": 55.77, "learning_rate": 2.2116897982544e-05, "loss": 0.2295, "step": 779500 }, { "epoch": 55.8, "learning_rate": 2.2099012734296754e-05, "loss": 0.2361, "step": 780000 }, { "epoch": 55.84, "learning_rate": 2.208112748604951e-05, "loss": 0.2346, "step": 780500 }, { "epoch": 55.87, "learning_rate": 2.206324223780226e-05, "loss": 0.2333, "step": 781000 }, { "epoch": 55.91, "learning_rate": 2.2045356989555016e-05, "loss": 0.2397, "step": 781500 }, { "epoch": 55.95, "learning_rate": 2.202747174130777e-05, "loss": 0.2283, "step": 782000 }, { "epoch": 55.98, "learning_rate": 2.2009586493060523e-05, "loss": 0.2348, "step": 782500 }, { "epoch": 56.02, "learning_rate": 2.199170124481328e-05, "loss": 0.2286, "step": 783000 }, { "epoch": 56.05, "learning_rate": 2.1973815996566034e-05, "loss": 0.2158, "step": 783500 }, { "epoch": 56.09, "learning_rate": 2.1955930748318785e-05, "loss": 0.2168, "step": 784000 }, { "epoch": 56.12, "learning_rate": 2.193804550007154e-05, "loss": 0.2196, "step": 784500 }, { "epoch": 56.16, "learning_rate": 2.1920160251824296e-05, "loss": 0.2221, "step": 785000 }, { "epoch": 56.2, "learning_rate": 2.190227500357705e-05, "loss": 0.2247, "step": 785500 }, { "epoch": 56.23, "learning_rate": 2.1884389755329803e-05, "loss": 0.2241, "step": 786000 }, { "epoch": 56.27, "learning_rate": 2.1866504507082558e-05, "loss": 0.2252, "step": 786500 }, { "epoch": 56.3, "learning_rate": 2.1848619258835316e-05, "loss": 0.2271, "step": 787000 }, { "epoch": 56.34, "learning_rate": 2.1830734010588068e-05, "loss": 0.2289, "step": 787500 }, { "epoch": 56.37, "learning_rate": 2.1812848762340823e-05, "loss": 0.2297, "step": 788000 }, { "epoch": 56.41, "learning_rate": 2.179496351409358e-05, "loss": 0.2346, "step": 788500 }, { "epoch": 56.45, "learning_rate": 2.177707826584633e-05, "loss": 0.2285, "step": 789000 }, { "epoch": 56.48, "learning_rate": 2.1759193017599086e-05, "loss": 0.2272, "step": 789500 }, { "epoch": 56.52, "learning_rate": 2.174130776935184e-05, "loss": 0.2298, "step": 790000 }, { "epoch": 56.55, "learning_rate": 2.1723422521104596e-05, "loss": 0.2334, "step": 790500 }, { "epoch": 56.59, "learning_rate": 2.1705537272857348e-05, "loss": 0.2305, "step": 791000 }, { "epoch": 56.62, "learning_rate": 2.1687652024610103e-05, "loss": 0.2349, "step": 791500 }, { "epoch": 56.66, "learning_rate": 2.1669766776362858e-05, "loss": 0.2294, "step": 792000 }, { "epoch": 56.7, "learning_rate": 2.165188152811561e-05, "loss": 0.2373, "step": 792500 }, { "epoch": 56.73, "learning_rate": 2.1633996279868365e-05, "loss": 0.2319, "step": 793000 }, { "epoch": 56.77, "learning_rate": 2.161611103162112e-05, "loss": 0.2341, "step": 793500 }, { "epoch": 56.8, "learning_rate": 2.1598225783373875e-05, "loss": 0.2311, "step": 794000 }, { "epoch": 56.84, "learning_rate": 2.1580340535126627e-05, "loss": 0.234, "step": 794500 }, { "epoch": 56.88, "learning_rate": 2.1562455286879382e-05, "loss": 0.235, "step": 795000 }, { "epoch": 56.91, "learning_rate": 2.1544570038632138e-05, "loss": 0.2416, "step": 795500 }, { "epoch": 56.95, "learning_rate": 2.152668479038489e-05, "loss": 0.236, "step": 796000 }, { "epoch": 56.98, "learning_rate": 2.1508799542137645e-05, "loss": 0.2375, "step": 796500 }, { "epoch": 57.02, "learning_rate": 2.14909142938904e-05, "loss": 0.2239, "step": 797000 }, { "epoch": 57.05, "learning_rate": 2.147302904564315e-05, "loss": 0.2109, "step": 797500 }, { "epoch": 57.09, "learning_rate": 2.145514379739591e-05, "loss": 0.2177, "step": 798000 }, { "epoch": 57.13, "learning_rate": 2.1437258549148665e-05, "loss": 0.2187, "step": 798500 }, { "epoch": 57.16, "learning_rate": 2.1419373300901417e-05, "loss": 0.2258, "step": 799000 }, { "epoch": 57.2, "learning_rate": 2.1401488052654172e-05, "loss": 0.2228, "step": 799500 }, { "epoch": 57.23, "learning_rate": 2.1383602804406927e-05, "loss": 0.2236, "step": 800000 }, { "epoch": 57.27, "learning_rate": 2.1365717556159683e-05, "loss": 0.2232, "step": 800500 }, { "epoch": 57.3, "learning_rate": 2.1347832307912434e-05, "loss": 0.2246, "step": 801000 }, { "epoch": 57.34, "learning_rate": 2.132994705966519e-05, "loss": 0.2273, "step": 801500 }, { "epoch": 57.38, "learning_rate": 2.1312061811417945e-05, "loss": 0.2282, "step": 802000 }, { "epoch": 57.41, "learning_rate": 2.1294176563170697e-05, "loss": 0.2269, "step": 802500 }, { "epoch": 57.45, "learning_rate": 2.1276291314923452e-05, "loss": 0.2298, "step": 803000 }, { "epoch": 57.48, "learning_rate": 2.1258406066676207e-05, "loss": 0.2284, "step": 803500 }, { "epoch": 57.52, "learning_rate": 2.1240520818428962e-05, "loss": 0.2275, "step": 804000 }, { "epoch": 57.55, "learning_rate": 2.1222635570181714e-05, "loss": 0.2373, "step": 804500 }, { "epoch": 57.59, "learning_rate": 2.120475032193447e-05, "loss": 0.23, "step": 805000 }, { "epoch": 57.63, "learning_rate": 2.1186865073687224e-05, "loss": 0.2297, "step": 805500 }, { "epoch": 57.66, "learning_rate": 2.1168979825439976e-05, "loss": 0.2306, "step": 806000 }, { "epoch": 57.7, "learning_rate": 2.115109457719273e-05, "loss": 0.2331, "step": 806500 }, { "epoch": 57.73, "learning_rate": 2.1133209328945486e-05, "loss": 0.2343, "step": 807000 }, { "epoch": 57.77, "learning_rate": 2.111532408069824e-05, "loss": 0.2298, "step": 807500 }, { "epoch": 57.81, "learning_rate": 2.1097438832450993e-05, "loss": 0.2313, "step": 808000 }, { "epoch": 57.84, "learning_rate": 2.107955358420375e-05, "loss": 0.2329, "step": 808500 }, { "epoch": 57.88, "learning_rate": 2.1061668335956504e-05, "loss": 0.232, "step": 809000 }, { "epoch": 57.91, "learning_rate": 2.104378308770926e-05, "loss": 0.2298, "step": 809500 }, { "epoch": 57.95, "learning_rate": 2.1025897839462014e-05, "loss": 0.2349, "step": 810000 }, { "epoch": 57.98, "learning_rate": 2.100801259121477e-05, "loss": 0.2345, "step": 810500 }, { "epoch": 58.02, "learning_rate": 2.099012734296752e-05, "loss": 0.2235, "step": 811000 }, { "epoch": 58.06, "learning_rate": 2.0972242094720276e-05, "loss": 0.2161, "step": 811500 }, { "epoch": 58.09, "learning_rate": 2.095435684647303e-05, "loss": 0.2149, "step": 812000 }, { "epoch": 58.13, "learning_rate": 2.0936471598225783e-05, "loss": 0.2213, "step": 812500 }, { "epoch": 58.16, "learning_rate": 2.091858634997854e-05, "loss": 0.2232, "step": 813000 }, { "epoch": 58.2, "learning_rate": 2.0900701101731294e-05, "loss": 0.221, "step": 813500 }, { "epoch": 58.23, "learning_rate": 2.088281585348405e-05, "loss": 0.2233, "step": 814000 }, { "epoch": 58.27, "learning_rate": 2.08649306052368e-05, "loss": 0.2231, "step": 814500 }, { "epoch": 58.31, "learning_rate": 2.0847045356989556e-05, "loss": 0.2225, "step": 815000 }, { "epoch": 58.34, "learning_rate": 2.082916010874231e-05, "loss": 0.2299, "step": 815500 }, { "epoch": 58.38, "learning_rate": 2.0811274860495063e-05, "loss": 0.2217, "step": 816000 }, { "epoch": 58.41, "learning_rate": 2.0793389612247818e-05, "loss": 0.2294, "step": 816500 }, { "epoch": 58.45, "learning_rate": 2.0775504364000573e-05, "loss": 0.2272, "step": 817000 }, { "epoch": 58.48, "learning_rate": 2.075761911575333e-05, "loss": 0.2274, "step": 817500 }, { "epoch": 58.52, "learning_rate": 2.073973386750608e-05, "loss": 0.2324, "step": 818000 }, { "epoch": 58.56, "learning_rate": 2.0721848619258835e-05, "loss": 0.2313, "step": 818500 }, { "epoch": 58.59, "learning_rate": 2.070396337101159e-05, "loss": 0.2285, "step": 819000 }, { "epoch": 58.63, "learning_rate": 2.0686078122764346e-05, "loss": 0.2287, "step": 819500 }, { "epoch": 58.66, "learning_rate": 2.06681928745171e-05, "loss": 0.225, "step": 820000 }, { "epoch": 58.7, "learning_rate": 2.0650307626269856e-05, "loss": 0.233, "step": 820500 }, { "epoch": 58.74, "learning_rate": 2.0632422378022608e-05, "loss": 0.232, "step": 821000 }, { "epoch": 58.77, "learning_rate": 2.0614537129775363e-05, "loss": 0.234, "step": 821500 }, { "epoch": 58.81, "learning_rate": 2.0596651881528118e-05, "loss": 0.232, "step": 822000 }, { "epoch": 58.84, "learning_rate": 2.057876663328087e-05, "loss": 0.2346, "step": 822500 }, { "epoch": 58.88, "learning_rate": 2.0560881385033625e-05, "loss": 0.2309, "step": 823000 }, { "epoch": 58.91, "learning_rate": 2.054299613678638e-05, "loss": 0.2332, "step": 823500 }, { "epoch": 58.95, "learning_rate": 2.0525110888539136e-05, "loss": 0.2293, "step": 824000 }, { "epoch": 58.99, "learning_rate": 2.0507225640291887e-05, "loss": 0.2305, "step": 824500 }, { "epoch": 59.02, "learning_rate": 2.0489340392044643e-05, "loss": 0.2231, "step": 825000 }, { "epoch": 59.06, "learning_rate": 2.0471455143797398e-05, "loss": 0.216, "step": 825500 }, { "epoch": 59.09, "learning_rate": 2.045356989555015e-05, "loss": 0.215, "step": 826000 }, { "epoch": 59.13, "learning_rate": 2.0435684647302905e-05, "loss": 0.2172, "step": 826500 }, { "epoch": 59.16, "learning_rate": 2.041779939905566e-05, "loss": 0.2196, "step": 827000 }, { "epoch": 59.2, "learning_rate": 2.0399914150808415e-05, "loss": 0.2188, "step": 827500 }, { "epoch": 59.24, "learning_rate": 2.0382028902561167e-05, "loss": 0.2201, "step": 828000 }, { "epoch": 59.27, "learning_rate": 2.0364143654313922e-05, "loss": 0.2238, "step": 828500 }, { "epoch": 59.31, "learning_rate": 2.0346258406066677e-05, "loss": 0.2251, "step": 829000 }, { "epoch": 59.34, "learning_rate": 2.032837315781943e-05, "loss": 0.2217, "step": 829500 }, { "epoch": 59.38, "learning_rate": 2.0310487909572184e-05, "loss": 0.2241, "step": 830000 }, { "epoch": 59.41, "learning_rate": 2.0292602661324943e-05, "loss": 0.2246, "step": 830500 }, { "epoch": 59.45, "learning_rate": 2.0274717413077695e-05, "loss": 0.2285, "step": 831000 }, { "epoch": 59.49, "learning_rate": 2.025683216483045e-05, "loss": 0.2288, "step": 831500 }, { "epoch": 59.52, "learning_rate": 2.0238946916583205e-05, "loss": 0.2267, "step": 832000 }, { "epoch": 59.56, "learning_rate": 2.0221061668335957e-05, "loss": 0.2266, "step": 832500 }, { "epoch": 59.59, "learning_rate": 2.0203176420088712e-05, "loss": 0.2269, "step": 833000 }, { "epoch": 59.63, "learning_rate": 2.0185291171841467e-05, "loss": 0.2297, "step": 833500 }, { "epoch": 59.67, "learning_rate": 2.0167405923594222e-05, "loss": 0.2321, "step": 834000 }, { "epoch": 59.7, "learning_rate": 2.0149520675346974e-05, "loss": 0.2337, "step": 834500 }, { "epoch": 59.74, "learning_rate": 2.013163542709973e-05, "loss": 0.2355, "step": 835000 }, { "epoch": 59.77, "learning_rate": 2.0113750178852484e-05, "loss": 0.2271, "step": 835500 }, { "epoch": 59.81, "learning_rate": 2.0095864930605236e-05, "loss": 0.2323, "step": 836000 }, { "epoch": 59.84, "learning_rate": 2.007797968235799e-05, "loss": 0.2307, "step": 836500 }, { "epoch": 59.88, "learning_rate": 2.0060094434110747e-05, "loss": 0.2296, "step": 837000 }, { "epoch": 59.92, "learning_rate": 2.0042209185863502e-05, "loss": 0.229, "step": 837500 }, { "epoch": 59.95, "learning_rate": 2.0024323937616254e-05, "loss": 0.2335, "step": 838000 }, { "epoch": 59.99, "learning_rate": 2.000643868936901e-05, "loss": 0.2288, "step": 838500 }, { "epoch": 60.02, "learning_rate": 1.9988553441121764e-05, "loss": 0.2223, "step": 839000 }, { "epoch": 60.06, "learning_rate": 1.9970668192874516e-05, "loss": 0.2112, "step": 839500 }, { "epoch": 60.09, "learning_rate": 1.995278294462727e-05, "loss": 0.2118, "step": 840000 }, { "epoch": 60.13, "learning_rate": 1.9934897696380026e-05, "loss": 0.2179, "step": 840500 }, { "epoch": 60.17, "learning_rate": 1.991701244813278e-05, "loss": 0.2205, "step": 841000 }, { "epoch": 60.2, "learning_rate": 1.9899127199885536e-05, "loss": 0.2214, "step": 841500 }, { "epoch": 60.24, "learning_rate": 1.988124195163829e-05, "loss": 0.2216, "step": 842000 }, { "epoch": 60.27, "learning_rate": 1.9863356703391043e-05, "loss": 0.2264, "step": 842500 }, { "epoch": 60.31, "learning_rate": 1.98454714551438e-05, "loss": 0.2233, "step": 843000 }, { "epoch": 60.34, "learning_rate": 1.9827586206896554e-05, "loss": 0.2307, "step": 843500 }, { "epoch": 60.38, "learning_rate": 1.980970095864931e-05, "loss": 0.2233, "step": 844000 }, { "epoch": 60.42, "learning_rate": 1.979181571040206e-05, "loss": 0.2245, "step": 844500 }, { "epoch": 60.45, "learning_rate": 1.9773930462154816e-05, "loss": 0.226, "step": 845000 }, { "epoch": 60.49, "learning_rate": 1.975604521390757e-05, "loss": 0.2275, "step": 845500 }, { "epoch": 60.52, "learning_rate": 1.9738159965660323e-05, "loss": 0.2276, "step": 846000 }, { "epoch": 60.56, "learning_rate": 1.9720274717413078e-05, "loss": 0.2228, "step": 846500 }, { "epoch": 60.6, "learning_rate": 1.9702389469165833e-05, "loss": 0.226, "step": 847000 }, { "epoch": 60.63, "learning_rate": 1.968450422091859e-05, "loss": 0.2277, "step": 847500 }, { "epoch": 60.67, "learning_rate": 1.966661897267134e-05, "loss": 0.2318, "step": 848000 }, { "epoch": 60.7, "learning_rate": 1.9648733724424095e-05, "loss": 0.2249, "step": 848500 }, { "epoch": 60.74, "learning_rate": 1.963084847617685e-05, "loss": 0.2243, "step": 849000 }, { "epoch": 60.77, "learning_rate": 1.9612963227929602e-05, "loss": 0.2293, "step": 849500 }, { "epoch": 60.81, "learning_rate": 1.9595077979682358e-05, "loss": 0.2277, "step": 850000 }, { "epoch": 60.85, "learning_rate": 1.9577192731435113e-05, "loss": 0.2305, "step": 850500 }, { "epoch": 60.88, "learning_rate": 1.9559307483187868e-05, "loss": 0.232, "step": 851000 }, { "epoch": 60.92, "learning_rate": 1.954142223494062e-05, "loss": 0.2284, "step": 851500 }, { "epoch": 60.95, "learning_rate": 1.9523536986693378e-05, "loss": 0.2268, "step": 852000 }, { "epoch": 60.99, "learning_rate": 1.9505651738446133e-05, "loss": 0.2282, "step": 852500 }, { "epoch": 61.02, "learning_rate": 1.9487766490198885e-05, "loss": 0.2163, "step": 853000 }, { "epoch": 61.06, "learning_rate": 1.946988124195164e-05, "loss": 0.2177, "step": 853500 }, { "epoch": 61.1, "learning_rate": 1.9451995993704396e-05, "loss": 0.2179, "step": 854000 }, { "epoch": 61.13, "learning_rate": 1.9434110745457147e-05, "loss": 0.2188, "step": 854500 }, { "epoch": 61.17, "learning_rate": 1.9416225497209903e-05, "loss": 0.2208, "step": 855000 }, { "epoch": 61.2, "learning_rate": 1.9398340248962658e-05, "loss": 0.2166, "step": 855500 }, { "epoch": 61.24, "learning_rate": 1.938045500071541e-05, "loss": 0.2196, "step": 856000 }, { "epoch": 61.27, "learning_rate": 1.9362569752468165e-05, "loss": 0.2162, "step": 856500 }, { "epoch": 61.31, "learning_rate": 1.934468450422092e-05, "loss": 0.2179, "step": 857000 }, { "epoch": 61.35, "learning_rate": 1.9326799255973675e-05, "loss": 0.2201, "step": 857500 }, { "epoch": 61.38, "learning_rate": 1.9308914007726427e-05, "loss": 0.2275, "step": 858000 }, { "epoch": 61.42, "learning_rate": 1.9291028759479182e-05, "loss": 0.2219, "step": 858500 }, { "epoch": 61.45, "learning_rate": 1.9273143511231937e-05, "loss": 0.223, "step": 859000 }, { "epoch": 61.49, "learning_rate": 1.925525826298469e-05, "loss": 0.2248, "step": 859500 }, { "epoch": 61.53, "learning_rate": 1.9237373014737444e-05, "loss": 0.2247, "step": 860000 }, { "epoch": 61.56, "learning_rate": 1.92194877664902e-05, "loss": 0.2284, "step": 860500 }, { "epoch": 61.6, "learning_rate": 1.9201602518242955e-05, "loss": 0.2274, "step": 861000 }, { "epoch": 61.63, "learning_rate": 1.9183717269995706e-05, "loss": 0.2285, "step": 861500 }, { "epoch": 61.67, "learning_rate": 1.916583202174846e-05, "loss": 0.2239, "step": 862000 }, { "epoch": 61.7, "learning_rate": 1.9147946773501217e-05, "loss": 0.2276, "step": 862500 }, { "epoch": 61.74, "learning_rate": 1.9130061525253972e-05, "loss": 0.2247, "step": 863000 }, { "epoch": 61.78, "learning_rate": 1.9112176277006727e-05, "loss": 0.2242, "step": 863500 }, { "epoch": 61.81, "learning_rate": 1.9094291028759482e-05, "loss": 0.2299, "step": 864000 }, { "epoch": 61.85, "learning_rate": 1.9076405780512234e-05, "loss": 0.2314, "step": 864500 }, { "epoch": 61.88, "learning_rate": 1.905852053226499e-05, "loss": 0.2283, "step": 865000 }, { "epoch": 61.92, "learning_rate": 1.9040635284017744e-05, "loss": 0.2293, "step": 865500 }, { "epoch": 61.95, "learning_rate": 1.90227500357705e-05, "loss": 0.2288, "step": 866000 }, { "epoch": 61.99, "learning_rate": 1.900486478752325e-05, "loss": 0.2341, "step": 866500 }, { "epoch": 62.03, "learning_rate": 1.8986979539276007e-05, "loss": 0.2178, "step": 867000 }, { "epoch": 62.06, "learning_rate": 1.8969094291028762e-05, "loss": 0.2113, "step": 867500 }, { "epoch": 62.1, "learning_rate": 1.8951209042781514e-05, "loss": 0.2171, "step": 868000 }, { "epoch": 62.13, "learning_rate": 1.893332379453427e-05, "loss": 0.217, "step": 868500 }, { "epoch": 62.17, "learning_rate": 1.8915438546287024e-05, "loss": 0.2164, "step": 869000 }, { "epoch": 62.2, "learning_rate": 1.8897553298039776e-05, "loss": 0.2217, "step": 869500 }, { "epoch": 62.24, "learning_rate": 1.887966804979253e-05, "loss": 0.2215, "step": 870000 } ], "max_steps": 1397800, "num_train_epochs": 100, "total_flos": 6.2780322161664e+16, "trial_name": null, "trial_params": null }