|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.0, |
|
"global_step": 98676, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-09, |
|
"loss": 10.532, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.5e-06, |
|
"loss": 9.367, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 5e-06, |
|
"loss": 7.4277, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 7.5e-06, |
|
"loss": 6.1401, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1e-05, |
|
"loss": 5.8709, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.25e-05, |
|
"loss": 5.753, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.5e-05, |
|
"loss": 5.6777, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.75e-05, |
|
"loss": 5.6151, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2e-05, |
|
"loss": 5.5717, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 2.25e-05, |
|
"loss": 5.5305, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.5e-05, |
|
"loss": 5.4947, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 5.4688, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3e-05, |
|
"loss": 5.4406, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 5.4163, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.5e-05, |
|
"loss": 5.3942, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 5.3762, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4e-05, |
|
"loss": 5.3524, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 4.2495e-05, |
|
"loss": 5.338, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.4995000000000005e-05, |
|
"loss": 5.3205, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 4.7495e-05, |
|
"loss": 5.3096, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 4.9995000000000005e-05, |
|
"loss": 5.2971, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.998859263331501e-05, |
|
"loss": 5.2822, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 4.9977139453912406e-05, |
|
"loss": 5.2716, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 4.99656862745098e-05, |
|
"loss": 5.2599, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 4.9954233095107204e-05, |
|
"loss": 5.2516, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.99427799157046e-05, |
|
"loss": 5.2391, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.9931326736301996e-05, |
|
"loss": 5.2352, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 4.99198964632582e-05, |
|
"loss": 5.2247, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 4.99084432838556e-05, |
|
"loss": 5.2168, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.9896990104453e-05, |
|
"loss": 5.2091, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.9885536925050395e-05, |
|
"loss": 5.2037, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 4.98741066520066e-05, |
|
"loss": 5.196, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 4.9862653472603996e-05, |
|
"loss": 5.1892, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 4.985120029320139e-05, |
|
"loss": 5.1825, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.98397700201576e-05, |
|
"loss": 5.1753, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 4.9828316840755e-05, |
|
"loss": 5.1722, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 4.9816863661352395e-05, |
|
"loss": 5.1653, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 4.980541048194979e-05, |
|
"loss": 5.1603, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.9793980208905996e-05, |
|
"loss": 5.1547, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 4.97825270295034e-05, |
|
"loss": 5.151, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 4.9771073850100794e-05, |
|
"loss": 5.1468, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 4.975962067069819e-05, |
|
"loss": 5.1434, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.974816749129559e-05, |
|
"loss": 5.1388, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 4.973671431189299e-05, |
|
"loss": 5.133, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 4.9725284038849186e-05, |
|
"loss": 5.1264, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.971383085944659e-05, |
|
"loss": 5.1248, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 4.9702377680043984e-05, |
|
"loss": 5.1185, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 4.969092450064138e-05, |
|
"loss": 5.1139, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 4.967947132123878e-05, |
|
"loss": 5.1114, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 4.966806395455378e-05, |
|
"loss": 5.1084, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 4.9656610775151186e-05, |
|
"loss": 5.1041, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 4.964515759574858e-05, |
|
"loss": 5.102, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 4.963370441634598e-05, |
|
"loss": 5.1012, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 4.962225123694338e-05, |
|
"loss": 5.0961, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 4.9610798057540775e-05, |
|
"loss": 5.0918, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 4.959934487813817e-05, |
|
"loss": 5.0823, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 4.958789169873557e-05, |
|
"loss": 4.7898, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.957643851933297e-05, |
|
"loss": 4.47, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 4.9564985339930364e-05, |
|
"loss": 4.1839, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 4.9553532160527766e-05, |
|
"loss": 3.9283, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.9542101887483965e-05, |
|
"loss": 3.3536, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 4.953067161444017e-05, |
|
"loss": 2.7553, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 4.9519218435037566e-05, |
|
"loss": 2.3501, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.950776525563497e-05, |
|
"loss": 1.9139, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 5.84, |
|
"learning_rate": 4.9496312076232364e-05, |
|
"loss": 1.6857, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.948485889682976e-05, |
|
"loss": 1.5418, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 4.947340571742716e-05, |
|
"loss": 1.4448, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 4.946195253802456e-05, |
|
"loss": 1.3729, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 4.945049935862195e-05, |
|
"loss": 1.3178, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 4.9439046179219356e-05, |
|
"loss": 1.2557, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 4.942759299981675e-05, |
|
"loss": 1.1956, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 4.941613982041415e-05, |
|
"loss": 1.1306, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 4.940470954737035e-05, |
|
"loss": 1.0845, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 4.939325636796775e-05, |
|
"loss": 1.0483, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 4.9381803188565143e-05, |
|
"loss": 1.0169, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 4.9370350009162546e-05, |
|
"loss": 0.9886, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 4.9358919736118744e-05, |
|
"loss": 0.966, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.934746655671615e-05, |
|
"loss": 0.9429, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"learning_rate": 4.933601337731354e-05, |
|
"loss": 0.9261, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 4.932456019791094e-05, |
|
"loss": 0.908, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 4.9313129924867143e-05, |
|
"loss": 0.8914, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 4.930167674546454e-05, |
|
"loss": 0.8789, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 4.929022356606194e-05, |
|
"loss": 0.8643, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 4.927877038665934e-05, |
|
"loss": 0.8531, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.926731720725673e-05, |
|
"loss": 0.8418, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 4.925588693421294e-05, |
|
"loss": 0.83, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 4.924443375481034e-05, |
|
"loss": 0.8201, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 4.9232980575407736e-05, |
|
"loss": 0.8115, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 4.922152739600513e-05, |
|
"loss": 0.803, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 4.9210074216602534e-05, |
|
"loss": 0.7935, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 4.919862103719993e-05, |
|
"loss": 0.7857, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 4.9187190764156135e-05, |
|
"loss": 0.7753, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 4.917573758475353e-05, |
|
"loss": 0.7683, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 4.916428440535093e-05, |
|
"loss": 0.761, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 4.915283122594833e-05, |
|
"loss": 0.7549, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 4.914140095290453e-05, |
|
"loss": 0.7478, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 4.912994777350193e-05, |
|
"loss": 0.7404, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 4.9118494594099325e-05, |
|
"loss": 0.7335, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 4.910704141469672e-05, |
|
"loss": 0.7283, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 4.9095611141652926e-05, |
|
"loss": 0.7231, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"learning_rate": 4.908415796225032e-05, |
|
"loss": 0.7156, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 4.9072704782847724e-05, |
|
"loss": 0.7099, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 4.906125160344512e-05, |
|
"loss": 0.7071, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 4.9049798424042515e-05, |
|
"loss": 0.702, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 4.903836815099872e-05, |
|
"loss": 0.6982, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 4.9026914971596116e-05, |
|
"loss": 0.6933, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 4.901546179219352e-05, |
|
"loss": 0.6901, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 4.9004008612790914e-05, |
|
"loss": 0.6854, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 4.899255543338831e-05, |
|
"loss": 0.6813, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 4.8981125160344515e-05, |
|
"loss": 0.6762, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 4.896967198094191e-05, |
|
"loss": 0.6708, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 4.8958218801539307e-05, |
|
"loss": 0.6687, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 10.22, |
|
"learning_rate": 4.894676562213671e-05, |
|
"loss": 0.6641, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 4.8935312442734105e-05, |
|
"loss": 0.6594, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 4.892388216969031e-05, |
|
"loss": 0.6578, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 10.49, |
|
"learning_rate": 4.8912428990287706e-05, |
|
"loss": 0.6542, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 4.89009758108851e-05, |
|
"loss": 0.6509, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 10.67, |
|
"learning_rate": 4.8889522631482504e-05, |
|
"loss": 0.6471, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 4.88780694520799e-05, |
|
"loss": 0.6463, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 4.8866616272677295e-05, |
|
"loss": 0.6418, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 4.88551859996335e-05, |
|
"loss": 0.6391, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 4.8843732820230896e-05, |
|
"loss": 0.6354, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 11.13, |
|
"learning_rate": 4.88322796408283e-05, |
|
"loss": 0.6327, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 4.8820826461425694e-05, |
|
"loss": 0.6292, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 4.88094190947407e-05, |
|
"loss": 0.6258, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.879798882169691e-05, |
|
"loss": 0.6257, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 4.87865356422943e-05, |
|
"loss": 0.6221, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 4.8775082462891706e-05, |
|
"loss": 0.618, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 11.67, |
|
"learning_rate": 4.87636292834891e-05, |
|
"loss": 0.6156, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 4.87521761040865e-05, |
|
"loss": 0.614, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 4.87407229246839e-05, |
|
"loss": 0.612, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 11.95, |
|
"learning_rate": 4.8729269745281295e-05, |
|
"loss": 0.6096, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 4.871781656587869e-05, |
|
"loss": 0.6073, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 4.8706386292834896e-05, |
|
"loss": 0.6039, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 12.22, |
|
"learning_rate": 4.869493311343229e-05, |
|
"loss": 0.6033, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 12.31, |
|
"learning_rate": 4.8683479934029694e-05, |
|
"loss": 0.6005, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.867202675462709e-05, |
|
"loss": 0.5971, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 4.866059648158329e-05, |
|
"loss": 0.5933, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 4.864914330218069e-05, |
|
"loss": 0.5947, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 4.8637690122778086e-05, |
|
"loss": 0.5928, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 12.77, |
|
"learning_rate": 4.862623694337549e-05, |
|
"loss": 0.5897, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 4.8614783763972884e-05, |
|
"loss": 0.588, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 4.860333058457028e-05, |
|
"loss": 0.5862, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 4.859187740516768e-05, |
|
"loss": 0.5835, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 4.858042422576508e-05, |
|
"loss": 0.5827, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 4.8569016859080086e-05, |
|
"loss": 0.581, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 13.32, |
|
"learning_rate": 4.855756367967748e-05, |
|
"loss": 0.5791, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 4.854611050027488e-05, |
|
"loss": 0.5764, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 13.5, |
|
"learning_rate": 4.853465732087228e-05, |
|
"loss": 0.5749, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 13.59, |
|
"learning_rate": 4.8523204141469675e-05, |
|
"loss": 0.5747, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 4.8511773868425874e-05, |
|
"loss": 0.5717, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 4.8500320689023276e-05, |
|
"loss": 0.5716, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 4.848886750962067e-05, |
|
"loss": 0.5691, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 13.95, |
|
"learning_rate": 4.847741433021807e-05, |
|
"loss": 0.5661, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 4.846596115081547e-05, |
|
"loss": 0.565, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 4.845453087777167e-05, |
|
"loss": 0.5627, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 14.23, |
|
"learning_rate": 4.844307769836907e-05, |
|
"loss": 0.563, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 14.32, |
|
"learning_rate": 4.8431624518966466e-05, |
|
"loss": 0.561, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 4.842017133956386e-05, |
|
"loss": 0.5599, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 14.5, |
|
"learning_rate": 4.8408718160161264e-05, |
|
"loss": 0.5573, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 4.839726498075866e-05, |
|
"loss": 0.5558, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 14.68, |
|
"learning_rate": 4.8385834707714865e-05, |
|
"loss": 0.5559, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 4.837438152831226e-05, |
|
"loss": 0.5546, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 4.8362928348909656e-05, |
|
"loss": 0.5531, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 14.96, |
|
"learning_rate": 4.835147516950706e-05, |
|
"loss": 0.5493, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 4.8340021990104454e-05, |
|
"loss": 0.5483, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 15.14, |
|
"learning_rate": 4.832859171706065e-05, |
|
"loss": 0.548, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 15.23, |
|
"learning_rate": 4.8317138537658055e-05, |
|
"loss": 0.5462, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 4.830568535825545e-05, |
|
"loss": 0.5454, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 15.41, |
|
"learning_rate": 4.829423217885285e-05, |
|
"loss": 0.5428, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 15.51, |
|
"learning_rate": 4.828277899945025e-05, |
|
"loss": 0.5428, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 15.6, |
|
"learning_rate": 4.8271325820047645e-05, |
|
"loss": 0.5424, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 4.825987264064504e-05, |
|
"loss": 0.5421, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 15.78, |
|
"learning_rate": 4.824841946124244e-05, |
|
"loss": 0.5382, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 4.823698918819865e-05, |
|
"loss": 0.5373, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 4.8225536008796044e-05, |
|
"loss": 0.5359, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 16.05, |
|
"learning_rate": 4.821408282939344e-05, |
|
"loss": 0.5357, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 16.14, |
|
"learning_rate": 4.8202629649990835e-05, |
|
"loss": 0.5351, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 4.819117647058824e-05, |
|
"loss": 0.5317, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 16.33, |
|
"learning_rate": 4.817972329118563e-05, |
|
"loss": 0.5333, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 4.816829301814184e-05, |
|
"loss": 0.5309, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 4.815683983873924e-05, |
|
"loss": 0.5308, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 16.6, |
|
"learning_rate": 4.8145386659336636e-05, |
|
"loss": 0.5289, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 4.813393347993403e-05, |
|
"loss": 0.5275, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 4.812248030053143e-05, |
|
"loss": 0.5269, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 16.87, |
|
"learning_rate": 4.811102712112883e-05, |
|
"loss": 0.5248, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 4.8099573941726225e-05, |
|
"loss": 0.5242, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 17.06, |
|
"learning_rate": 4.808814366868243e-05, |
|
"loss": 0.5238, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 17.15, |
|
"learning_rate": 4.8076690489279826e-05, |
|
"loss": 0.5239, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 4.806523730987723e-05, |
|
"loss": 0.5221, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 17.33, |
|
"learning_rate": 4.8053784130474624e-05, |
|
"loss": 0.52, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 17.42, |
|
"learning_rate": 4.804233095107202e-05, |
|
"loss": 0.5184, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 17.51, |
|
"learning_rate": 4.8030900678028225e-05, |
|
"loss": 0.5186, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 4.801944749862562e-05, |
|
"loss": 0.5176, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"learning_rate": 4.8008017225581826e-05, |
|
"loss": 0.5182, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 17.79, |
|
"learning_rate": 4.799656404617922e-05, |
|
"loss": 0.5148, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 17.88, |
|
"learning_rate": 4.798511086677662e-05, |
|
"loss": 0.5157, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 4.797365768737402e-05, |
|
"loss": 0.5131, |
|
"step": 98500 |
|
} |
|
], |
|
"max_steps": 2192800, |
|
"num_train_epochs": 400, |
|
"total_flos": 2.659426878192668e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|