|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 182810, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.98632459931076e-05, |
|
"loss": 2.0893, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.97264919862152e-05, |
|
"loss": 2.0282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9589737979322795e-05, |
|
"loss": 2.0359, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.94529839724304e-05, |
|
"loss": 2.0573, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.931622996553799e-05, |
|
"loss": 2.0498, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.9179475958645595e-05, |
|
"loss": 1.9906, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.904272195175319e-05, |
|
"loss": 2.0297, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.8905967944860786e-05, |
|
"loss": 1.9994, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.876921393796838e-05, |
|
"loss": 2.0443, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.863245993107598e-05, |
|
"loss": 1.9891, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.849570592418358e-05, |
|
"loss": 1.9978, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.835895191729118e-05, |
|
"loss": 2.0001, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.8222197910398776e-05, |
|
"loss": 1.9579, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.808544390350638e-05, |
|
"loss": 1.9862, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.7948689896613974e-05, |
|
"loss": 1.959, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.781193588972157e-05, |
|
"loss": 1.9861, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.7675181882829165e-05, |
|
"loss": 1.9626, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.753842787593677e-05, |
|
"loss": 1.9609, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.740167386904437e-05, |
|
"loss": 1.9949, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.7264919862151964e-05, |
|
"loss": 1.9398, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.712816585525956e-05, |
|
"loss": 1.9657, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.699141184836716e-05, |
|
"loss": 1.9369, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.685465784147476e-05, |
|
"loss": 1.9916, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.671790383458235e-05, |
|
"loss": 1.9725, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.6581149827689955e-05, |
|
"loss": 1.9161, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.644439582079756e-05, |
|
"loss": 1.9733, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.630764181390515e-05, |
|
"loss": 1.952, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.617088780701275e-05, |
|
"loss": 1.9321, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.603413380012034e-05, |
|
"loss": 1.935, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.5897379793227945e-05, |
|
"loss": 1.9755, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.576062578633554e-05, |
|
"loss": 1.9573, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.5623871779443136e-05, |
|
"loss": 1.9531, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.548711777255074e-05, |
|
"loss": 1.9465, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.535036376565834e-05, |
|
"loss": 1.9641, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.5213609758765936e-05, |
|
"loss": 1.9157, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.507685575187353e-05, |
|
"loss": 1.9518, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.4940101744981127e-05, |
|
"loss": 1.7654, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.480334773808873e-05, |
|
"loss": 1.6299, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.4666593731196324e-05, |
|
"loss": 1.6508, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.4529839724303926e-05, |
|
"loss": 1.6008, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.439308571741152e-05, |
|
"loss": 1.6127, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.4256331710519124e-05, |
|
"loss": 1.5774, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.411957770362672e-05, |
|
"loss": 1.6349, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.3982823696734315e-05, |
|
"loss": 1.6484, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.384606968984191e-05, |
|
"loss": 1.6607, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.370931568294951e-05, |
|
"loss": 1.6338, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.3572561676057114e-05, |
|
"loss": 1.6342, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.343580766916471e-05, |
|
"loss": 1.6453, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.3299053662272305e-05, |
|
"loss": 1.6547, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.316229965537991e-05, |
|
"loss": 1.6587, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 4.30255456484875e-05, |
|
"loss": 1.6449, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.28887916415951e-05, |
|
"loss": 1.6473, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.275203763470269e-05, |
|
"loss": 1.6378, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.2615283627810296e-05, |
|
"loss": 1.6495, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.24785296209179e-05, |
|
"loss": 1.695, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.234177561402549e-05, |
|
"loss": 1.7073, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.220502160713309e-05, |
|
"loss": 1.7791, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 4.206826760024069e-05, |
|
"loss": 1.7924, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.1931513593348286e-05, |
|
"loss": 1.8115, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.179475958645588e-05, |
|
"loss": 1.7199, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.1658005579563484e-05, |
|
"loss": 1.7923, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.1521251572671086e-05, |
|
"loss": 1.6858, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.138449756577868e-05, |
|
"loss": 1.6759, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.1247743558886276e-05, |
|
"loss": 1.6905, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.111098955199388e-05, |
|
"loss": 1.6696, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 4.0974235545101474e-05, |
|
"loss": 1.7242, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.083748153820907e-05, |
|
"loss": 1.7341, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.0700727531316665e-05, |
|
"loss": 1.7026, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 4.056397352442427e-05, |
|
"loss": 1.7249, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.042721951753187e-05, |
|
"loss": 1.6447, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 4.0290465510639464e-05, |
|
"loss": 1.6748, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.015371150374706e-05, |
|
"loss": 1.6952, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.001695749685466e-05, |
|
"loss": 1.6569, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.988020348996226e-05, |
|
"loss": 1.4236, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 3.974344948306985e-05, |
|
"loss": 1.4146, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.9606695476177455e-05, |
|
"loss": 1.4398, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.946994146928506e-05, |
|
"loss": 1.433, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.933318746239265e-05, |
|
"loss": 1.4019, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.919643345550025e-05, |
|
"loss": 1.4358, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.905967944860784e-05, |
|
"loss": 1.4445, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.8922925441715445e-05, |
|
"loss": 1.3795, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.878617143482304e-05, |
|
"loss": 1.4617, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 3.864941742793064e-05, |
|
"loss": 1.399, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 3.851266342103824e-05, |
|
"loss": 1.4227, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.837590941414584e-05, |
|
"loss": 1.406, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 3.8239155407253436e-05, |
|
"loss": 1.4229, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 3.810240140036103e-05, |
|
"loss": 1.4309, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.796564739346863e-05, |
|
"loss": 1.4129, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 3.782889338657623e-05, |
|
"loss": 1.4409, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.7692139379683824e-05, |
|
"loss": 1.3927, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 3.7555385372791426e-05, |
|
"loss": 1.4553, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 3.741863136589902e-05, |
|
"loss": 1.4204, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.7281877359006624e-05, |
|
"loss": 1.4391, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 3.714512335211422e-05, |
|
"loss": 1.4732, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 3.7008369345221815e-05, |
|
"loss": 1.4435, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.687161533832941e-05, |
|
"loss": 1.4206, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.673486133143701e-05, |
|
"loss": 1.4552, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.6598107324544614e-05, |
|
"loss": 1.4611, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 3.646135331765221e-05, |
|
"loss": 1.4487, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.6324599310759805e-05, |
|
"loss": 1.4679, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.618784530386741e-05, |
|
"loss": 1.4245, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.6051091296975e-05, |
|
"loss": 1.4871, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.59143372900826e-05, |
|
"loss": 1.4553, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 3.57775832831902e-05, |
|
"loss": 1.4775, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.56408292762978e-05, |
|
"loss": 1.4399, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.55040752694054e-05, |
|
"loss": 1.4695, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 3.536732126251299e-05, |
|
"loss": 1.4722, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 3.523056725562059e-05, |
|
"loss": 1.4315, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.509381324872819e-05, |
|
"loss": 1.4428, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.4957059241835786e-05, |
|
"loss": 1.4064, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.482030523494338e-05, |
|
"loss": 1.216, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 3.4683551228050984e-05, |
|
"loss": 1.2302, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 3.4546797221158586e-05, |
|
"loss": 1.217, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.441004321426618e-05, |
|
"loss": 1.2273, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.427328920737378e-05, |
|
"loss": 1.2445, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 3.413653520048137e-05, |
|
"loss": 1.276, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.3999781193588974e-05, |
|
"loss": 1.2173, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 3.386302718669657e-05, |
|
"loss": 1.2329, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 3.372627317980417e-05, |
|
"loss": 1.2375, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 3.358951917291177e-05, |
|
"loss": 1.231, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 3.345276516601937e-05, |
|
"loss": 1.2456, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 3.3316011159126965e-05, |
|
"loss": 1.2182, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 3.317925715223456e-05, |
|
"loss": 1.2585, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.3042503145342155e-05, |
|
"loss": 1.2544, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 3.290574913844976e-05, |
|
"loss": 1.2676, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.276899513155735e-05, |
|
"loss": 1.2598, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.2632241124664955e-05, |
|
"loss": 1.235, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 3.249548711777256e-05, |
|
"loss": 1.2327, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 3.235873311088015e-05, |
|
"loss": 1.2675, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 3.222197910398775e-05, |
|
"loss": 1.2959, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.2085225097095343e-05, |
|
"loss": 1.259, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 3.1948471090202946e-05, |
|
"loss": 1.2785, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 3.181171708331054e-05, |
|
"loss": 1.2611, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.167496307641814e-05, |
|
"loss": 1.2528, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.153820906952574e-05, |
|
"loss": 1.2584, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 3.140145506263334e-05, |
|
"loss": 1.2675, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 3.1264701055740936e-05, |
|
"loss": 1.2754, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 3.112794704884853e-05, |
|
"loss": 1.2927, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3.099119304195613e-05, |
|
"loss": 1.2775, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.085443903506373e-05, |
|
"loss": 1.2686, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 3.071768502817133e-05, |
|
"loss": 1.2631, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 3.0580931021278927e-05, |
|
"loss": 1.2756, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 3.0444177014386522e-05, |
|
"loss": 1.2529, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.0307423007494124e-05, |
|
"loss": 1.2656, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.017066900060172e-05, |
|
"loss": 1.2597, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 3.0033914993709318e-05, |
|
"loss": 1.2928, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 2.9897160986816914e-05, |
|
"loss": 1.1113, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.9760406979924516e-05, |
|
"loss": 1.0677, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 2.962365297303211e-05, |
|
"loss": 1.054, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.948689896613971e-05, |
|
"loss": 1.0708, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 2.9350144959247305e-05, |
|
"loss": 1.0741, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 2.9213390952354908e-05, |
|
"loss": 1.0736, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 2.9076636945462503e-05, |
|
"loss": 1.0618, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 2.8939882938570102e-05, |
|
"loss": 1.1291, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.8803128931677697e-05, |
|
"loss": 1.0901, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.86663749247853e-05, |
|
"loss": 1.0895, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 2.8529620917892898e-05, |
|
"loss": 1.0702, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 2.8392866911000493e-05, |
|
"loss": 1.0729, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 2.825611290410809e-05, |
|
"loss": 1.0829, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.811935889721569e-05, |
|
"loss": 1.105, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.798260489032329e-05, |
|
"loss": 1.1153, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 2.7845850883430885e-05, |
|
"loss": 1.0886, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2.770909687653848e-05, |
|
"loss": 1.112, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 2.7572342869646083e-05, |
|
"loss": 1.1041, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 2.743558886275368e-05, |
|
"loss": 1.096, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 2.7298834855861277e-05, |
|
"loss": 1.1532, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 2.7162080848968872e-05, |
|
"loss": 1.127, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 2.7025326842076474e-05, |
|
"loss": 1.1254, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 2.6888572835184073e-05, |
|
"loss": 1.1162, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.675181882829167e-05, |
|
"loss": 1.0954, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 2.6615064821399267e-05, |
|
"loss": 1.1377, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 2.647831081450687e-05, |
|
"loss": 1.1012, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.6341556807614465e-05, |
|
"loss": 1.1177, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.620480280072206e-05, |
|
"loss": 1.1125, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 2.606804879382966e-05, |
|
"loss": 1.1555, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 2.593129478693726e-05, |
|
"loss": 1.1127, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 2.5794540780044857e-05, |
|
"loss": 1.1456, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.5657786773152452e-05, |
|
"loss": 1.1249, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.552103276626005e-05, |
|
"loss": 1.1422, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 2.5384278759367653e-05, |
|
"loss": 1.1422, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 2.5247524752475248e-05, |
|
"loss": 1.1592, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 2.5110770745582847e-05, |
|
"loss": 1.1493, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.4974016738690446e-05, |
|
"loss": 1.0908, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 2.483726273179804e-05, |
|
"loss": 0.9386, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 2.470050872490564e-05, |
|
"loss": 0.9452, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.456375471801324e-05, |
|
"loss": 0.98, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 2.4427000711120838e-05, |
|
"loss": 0.9672, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 2.4290246704228433e-05, |
|
"loss": 0.9567, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"learning_rate": 2.415349269733603e-05, |
|
"loss": 0.9555, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 2.401673869044363e-05, |
|
"loss": 0.9843, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 2.387998468355123e-05, |
|
"loss": 0.977, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 2.3743230676658828e-05, |
|
"loss": 0.9672, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 2.3606476669766427e-05, |
|
"loss": 0.9683, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 2.3469722662874026e-05, |
|
"loss": 0.9976, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 2.333296865598162e-05, |
|
"loss": 0.9912, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 2.319621464908922e-05, |
|
"loss": 0.9843, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 2.305946064219682e-05, |
|
"loss": 0.9622, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 2.2922706635304417e-05, |
|
"loss": 0.9723, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 2.2785952628412013e-05, |
|
"loss": 0.9783, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 2.264919862151961e-05, |
|
"loss": 0.956, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.251244461462721e-05, |
|
"loss": 0.9982, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 2.237569060773481e-05, |
|
"loss": 0.9763, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 2.2238936600842404e-05, |
|
"loss": 0.9901, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 2.2102182593950007e-05, |
|
"loss": 0.966, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.1965428587057602e-05, |
|
"loss": 0.9847, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 2.18286745801652e-05, |
|
"loss": 0.9981, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 2.1691920573272796e-05, |
|
"loss": 0.9539, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 2.1555166566380398e-05, |
|
"loss": 0.9897, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 2.1418412559487994e-05, |
|
"loss": 1.0071, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 2.1281658552595592e-05, |
|
"loss": 0.9948, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 2.114490454570319e-05, |
|
"loss": 1.0028, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 2.100815053881079e-05, |
|
"loss": 1.0115, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 2.0871396531918385e-05, |
|
"loss": 0.978, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 2.0734642525025984e-05, |
|
"loss": 0.9922, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 2.0597888518133583e-05, |
|
"loss": 1.0203, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 2.046113451124118e-05, |
|
"loss": 1.0288, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 2.0324380504348777e-05, |
|
"loss": 0.9899, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 2.0187626497456376e-05, |
|
"loss": 0.9988, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 2.0050872490563975e-05, |
|
"loss": 1.0026, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.9914118483671573e-05, |
|
"loss": 0.9024, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 6.04, |
|
"learning_rate": 1.977736447677917e-05, |
|
"loss": 0.849, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 1.964061046988677e-05, |
|
"loss": 0.8861, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 1.9503856462994366e-05, |
|
"loss": 0.8508, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 1.9367102456101965e-05, |
|
"loss": 0.8596, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.923034844920956e-05, |
|
"loss": 0.8596, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 1.9093594442317163e-05, |
|
"loss": 0.8316, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 1.8956840435424758e-05, |
|
"loss": 0.8432, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.8820086428532357e-05, |
|
"loss": 0.8965, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.8683332421639956e-05, |
|
"loss": 0.8633, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 1.8546578414747554e-05, |
|
"loss": 0.8741, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 1.840982440785515e-05, |
|
"loss": 0.859, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 1.827307040096275e-05, |
|
"loss": 0.8788, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 1.8136316394070347e-05, |
|
"loss": 0.8782, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 1.7999562387177946e-05, |
|
"loss": 0.8872, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.786280838028554e-05, |
|
"loss": 0.9024, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 1.772605437339314e-05, |
|
"loss": 0.8986, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 1.758930036650074e-05, |
|
"loss": 0.8831, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"learning_rate": 1.7452546359608338e-05, |
|
"loss": 0.8677, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 1.7315792352715933e-05, |
|
"loss": 0.8984, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 1.7179038345823535e-05, |
|
"loss": 0.8661, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 1.704228433893113e-05, |
|
"loss": 0.9107, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1.690553033203873e-05, |
|
"loss": 0.9041, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.6768776325146325e-05, |
|
"loss": 0.8869, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 1.6632022318253927e-05, |
|
"loss": 0.9012, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1.6495268311361522e-05, |
|
"loss": 0.8737, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 1.635851430446912e-05, |
|
"loss": 0.8927, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.622176029757672e-05, |
|
"loss": 0.8836, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.608500629068432e-05, |
|
"loss": 0.8753, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.5948252283791914e-05, |
|
"loss": 0.885, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 1.5811498276899513e-05, |
|
"loss": 0.8998, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.567474427000711e-05, |
|
"loss": 0.916, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 1.553799026311471e-05, |
|
"loss": 0.8755, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.5401236256222306e-05, |
|
"loss": 0.8842, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 1.5264482249329905e-05, |
|
"loss": 0.905, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1.5127728242437505e-05, |
|
"loss": 0.9004, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.4990974235545102e-05, |
|
"loss": 0.8866, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.48542202286527e-05, |
|
"loss": 0.7771, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 1.4717466221760298e-05, |
|
"loss": 0.8103, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.4580712214867898e-05, |
|
"loss": 0.8078, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 1.4443958207975494e-05, |
|
"loss": 0.7961, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.4307204201083094e-05, |
|
"loss": 0.7902, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 1.417045019419069e-05, |
|
"loss": 0.8105, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 1.403369618729829e-05, |
|
"loss": 0.7907, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 1.3896942180405887e-05, |
|
"loss": 0.792, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.3760188173513486e-05, |
|
"loss": 0.7866, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 1.3623434166621083e-05, |
|
"loss": 0.8102, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.3486680159728682e-05, |
|
"loss": 0.7984, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 1.3349926152836279e-05, |
|
"loss": 0.8041, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 1.3213172145943878e-05, |
|
"loss": 0.8205, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 1.3076418139051475e-05, |
|
"loss": 0.7662, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.2939664132159074e-05, |
|
"loss": 0.7829, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 1.280291012526667e-05, |
|
"loss": 0.7997, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.266615611837427e-05, |
|
"loss": 0.8083, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 1.2529402111481866e-05, |
|
"loss": 0.8018, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 1.2392648104589465e-05, |
|
"loss": 0.789, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 1.2255894097697062e-05, |
|
"loss": 0.7978, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 1.2119140090804661e-05, |
|
"loss": 0.7952, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 1.1982386083912258e-05, |
|
"loss": 0.7908, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1.1845632077019857e-05, |
|
"loss": 0.8163, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 1.1708878070127454e-05, |
|
"loss": 0.7917, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 1.1572124063235053e-05, |
|
"loss": 0.7945, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 1.1435370056342652e-05, |
|
"loss": 0.8289, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 1.1298616049450249e-05, |
|
"loss": 0.7807, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 1.1161862042557847e-05, |
|
"loss": 0.8143, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.1025108035665444e-05, |
|
"loss": 0.79, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 7.82, |
|
"learning_rate": 1.0888354028773043e-05, |
|
"loss": 0.7928, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 1.0751600021880642e-05, |
|
"loss": 0.8083, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 1.061484601498824e-05, |
|
"loss": 0.8019, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 1.0478092008095838e-05, |
|
"loss": 0.7891, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.0341338001203437e-05, |
|
"loss": 0.8143, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 1.0204583994311034e-05, |
|
"loss": 0.82, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"learning_rate": 1.0067829987418633e-05, |
|
"loss": 0.8235, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.931075980526231e-06, |
|
"loss": 0.7471, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 9.794321973633828e-06, |
|
"loss": 0.7027, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 9.657567966741427e-06, |
|
"loss": 0.7308, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 9.520813959849024e-06, |
|
"loss": 0.726, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 9.384059952956623e-06, |
|
"loss": 0.7165, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 9.24730594606422e-06, |
|
"loss": 0.7214, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 9.110551939171819e-06, |
|
"loss": 0.7191, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 8.973797932279416e-06, |
|
"loss": 0.7499, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 8.837043925387015e-06, |
|
"loss": 0.7111, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 8.700289918494613e-06, |
|
"loss": 0.7301, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 8.56353591160221e-06, |
|
"loss": 0.7363, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 8.42678190470981e-06, |
|
"loss": 0.7207, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 8.290027897817406e-06, |
|
"loss": 0.7344, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 8.153273890925005e-06, |
|
"loss": 0.7097, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.016519884032602e-06, |
|
"loss": 0.7385, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 7.879765877140201e-06, |
|
"loss": 0.7314, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 7.743011870247798e-06, |
|
"loss": 0.7595, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.606257863355397e-06, |
|
"loss": 0.7482, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 7.469503856462995e-06, |
|
"loss": 0.7449, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 8.53, |
|
"learning_rate": 7.332749849570593e-06, |
|
"loss": 0.7366, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.195995842678191e-06, |
|
"loss": 0.7261, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"learning_rate": 7.059241835785789e-06, |
|
"loss": 0.7209, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 6.922487828893387e-06, |
|
"loss": 0.7439, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 6.785733822000985e-06, |
|
"loss": 0.7301, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 6.648979815108583e-06, |
|
"loss": 0.7198, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 6.512225808216181e-06, |
|
"loss": 0.7509, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 6.375471801323779e-06, |
|
"loss": 0.749, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 6.238717794431377e-06, |
|
"loss": 0.7126, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 6.101963787538975e-06, |
|
"loss": 0.7409, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 8.81, |
|
"learning_rate": 5.965209780646573e-06, |
|
"loss": 0.7593, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 5.828455773754171e-06, |
|
"loss": 0.7404, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 5.6917017668617695e-06, |
|
"loss": 0.7502, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 5.5549477599693675e-06, |
|
"loss": 0.7489, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 5.418193753076965e-06, |
|
"loss": 0.7303, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 5.281439746184563e-06, |
|
"loss": 0.7215, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 5.144685739292161e-06, |
|
"loss": 0.7592, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 5.007931732399759e-06, |
|
"loss": 0.7516, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.871177725507357e-06, |
|
"loss": 0.6963, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 4.734423718614955e-06, |
|
"loss": 0.7041, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 9.08, |
|
"learning_rate": 4.597669711722554e-06, |
|
"loss": 0.6961, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 4.460915704830152e-06, |
|
"loss": 0.6921, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 4.32416169793775e-06, |
|
"loss": 0.6988, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.1874076910453484e-06, |
|
"loss": 0.676, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.050653684152946e-06, |
|
"loss": 0.6658, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 9.22, |
|
"learning_rate": 3.913899677260544e-06, |
|
"loss": 0.6926, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 3.777145670368142e-06, |
|
"loss": 0.68, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 3.64039166347574e-06, |
|
"loss": 0.6911, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 3.5036376565833385e-06, |
|
"loss": 0.7205, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 3.3668836496909364e-06, |
|
"loss": 0.711, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 3.2301296427985343e-06, |
|
"loss": 0.6982, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 3.0933756359061323e-06, |
|
"loss": 0.677, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 2.95662162901373e-06, |
|
"loss": 0.6964, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 2.8198676221213285e-06, |
|
"loss": 0.6738, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.6831136152289265e-06, |
|
"loss": 0.6774, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 2.5463596083365244e-06, |
|
"loss": 0.6757, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.4096056014441223e-06, |
|
"loss": 0.6856, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 2.2728515945517207e-06, |
|
"loss": 0.6794, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 2.1360975876593186e-06, |
|
"loss": 0.6813, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"learning_rate": 1.9993435807669165e-06, |
|
"loss": 0.7041, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 1.8625895738745147e-06, |
|
"loss": 0.7096, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1.7258355669821126e-06, |
|
"loss": 0.6898, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 1.5890815600897107e-06, |
|
"loss": 0.685, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 9.71, |
|
"learning_rate": 1.4523275531973089e-06, |
|
"loss": 0.7295, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 1.3155735463049068e-06, |
|
"loss": 0.6815, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 1.178819539412505e-06, |
|
"loss": 0.6813, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 1.0420655325201029e-06, |
|
"loss": 0.6895, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 9.053115256277009e-07, |
|
"loss": 0.6738, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 7.685575187352989e-07, |
|
"loss": 0.6672, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 6.318035118428971e-07, |
|
"loss": 0.7159, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 4.950495049504951e-07, |
|
"loss": 0.6866, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 3.582954980580931e-07, |
|
"loss": 0.6515, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 2.2154149116569118e-07, |
|
"loss": 0.6941, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 8.478748427328921e-08, |
|
"loss": 0.6977, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 182810, |
|
"total_flos": 1.6977577396666368e+17, |
|
"train_loss": 1.1540878025462047, |
|
"train_runtime": 36258.7506, |
|
"train_samples_per_second": 5.042, |
|
"train_steps_per_second": 5.042 |
|
} |
|
], |
|
"max_steps": 182810, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.6977577396666368e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|