|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 709692, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9964773451018196e-05, |
|
"loss": 7.3047, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.9929546902036376e-05, |
|
"loss": 6.5184, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.989432035305457e-05, |
|
"loss": 6.2814, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.985909380407276e-05, |
|
"loss": 6.1168, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9823867255090944e-05, |
|
"loss": 6.0133, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.978864070610913e-05, |
|
"loss": 5.8313, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.975341415712732e-05, |
|
"loss": 5.625, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.9718187608145504e-05, |
|
"loss": 5.3774, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.96829610591637e-05, |
|
"loss": 5.1476, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.964773451018188e-05, |
|
"loss": 4.9479, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.961250796120007e-05, |
|
"loss": 4.7909, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9577281412218266e-05, |
|
"loss": 4.6501, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9542054863236446e-05, |
|
"loss": 4.5042, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.950682831425464e-05, |
|
"loss": 4.3819, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9471601765272826e-05, |
|
"loss": 4.2813, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.943637521629101e-05, |
|
"loss": 4.1623, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.94011486673092e-05, |
|
"loss": 4.0686, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.936592211832739e-05, |
|
"loss": 3.9636, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9330695569345574e-05, |
|
"loss": 3.8418, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.929546902036377e-05, |
|
"loss": 3.7808, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.9260242471381954e-05, |
|
"loss": 3.697, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.922501592240014e-05, |
|
"loss": 3.6477, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9189789373418335e-05, |
|
"loss": 3.5502, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9154562824436515e-05, |
|
"loss": 3.5132, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.911933627545471e-05, |
|
"loss": 3.4574, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.9084109726472896e-05, |
|
"loss": 3.4167, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.904888317749108e-05, |
|
"loss": 3.3762, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.901365662850927e-05, |
|
"loss": 3.33, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.8978430079527456e-05, |
|
"loss": 3.2696, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.894320353054564e-05, |
|
"loss": 3.2309, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.890797698156384e-05, |
|
"loss": 3.1656, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8872750432582024e-05, |
|
"loss": 3.135, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.883752388360021e-05, |
|
"loss": 3.1137, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8802297334618405e-05, |
|
"loss": 3.0699, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.8767070785636585e-05, |
|
"loss": 3.0313, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.873184423665478e-05, |
|
"loss": 3.0078, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.8696617687672965e-05, |
|
"loss": 2.9916, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.866139113869115e-05, |
|
"loss": 2.9576, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.862616458970934e-05, |
|
"loss": 2.9201, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.859093804072753e-05, |
|
"loss": 2.9107, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.855571149174572e-05, |
|
"loss": 2.8871, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8520484942763907e-05, |
|
"loss": 2.8691, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.8485258393782093e-05, |
|
"loss": 2.8645, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.845003184480028e-05, |
|
"loss": 2.8233, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8414805295818474e-05, |
|
"loss": 2.7953, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8379578746836654e-05, |
|
"loss": 2.7669, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.834435219785485e-05, |
|
"loss": 2.7489, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.8309125648873035e-05, |
|
"loss": 2.7754, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.827389909989122e-05, |
|
"loss": 2.7525, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.823867255090941e-05, |
|
"loss": 2.7249, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.82034460019276e-05, |
|
"loss": 2.687, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.816821945294579e-05, |
|
"loss": 2.6729, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8132992903963976e-05, |
|
"loss": 2.6922, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.809776635498216e-05, |
|
"loss": 2.6631, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.806253980600035e-05, |
|
"loss": 2.6465, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8027313257018543e-05, |
|
"loss": 2.6219, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7992086708036724e-05, |
|
"loss": 2.6152, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.795686015905492e-05, |
|
"loss": 2.6077, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.7921633610073104e-05, |
|
"loss": 2.6222, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.788640706109129e-05, |
|
"loss": 2.5918, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.785118051210948e-05, |
|
"loss": 2.5734, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.781595396312767e-05, |
|
"loss": 2.5571, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.778072741414586e-05, |
|
"loss": 2.5387, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7745500865164045e-05, |
|
"loss": 2.5273, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.771027431618223e-05, |
|
"loss": 2.5061, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.767504776720042e-05, |
|
"loss": 2.514, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.763982121821861e-05, |
|
"loss": 2.4995, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.760459466923679e-05, |
|
"loss": 2.4845, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.756936812025499e-05, |
|
"loss": 2.4921, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7534141571273174e-05, |
|
"loss": 2.4816, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.749891502229136e-05, |
|
"loss": 2.4777, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7463688473309554e-05, |
|
"loss": 2.4449, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.742846192432774e-05, |
|
"loss": 2.439, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.739323537534593e-05, |
|
"loss": 2.4522, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7358008826364115e-05, |
|
"loss": 2.4519, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.73227822773823e-05, |
|
"loss": 2.404, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.728755572840049e-05, |
|
"loss": 2.4217, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.725232917941868e-05, |
|
"loss": 2.3985, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.721710263043686e-05, |
|
"loss": 2.3941, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.7181876081455056e-05, |
|
"loss": 2.3889, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.714664953247324e-05, |
|
"loss": 2.4091, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.711142298349143e-05, |
|
"loss": 2.3649, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7076196434509624e-05, |
|
"loss": 2.3702, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.704096988552781e-05, |
|
"loss": 2.3551, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.7005743336546e-05, |
|
"loss": 2.3655, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.6970516787564184e-05, |
|
"loss": 2.3533, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.693529023858237e-05, |
|
"loss": 2.3228, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.690006368960056e-05, |
|
"loss": 2.3521, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.686483714061875e-05, |
|
"loss": 2.3226, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.682961059163694e-05, |
|
"loss": 2.3249, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.6794384042655126e-05, |
|
"loss": 2.3061, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.675915749367332e-05, |
|
"loss": 2.2967, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.67239309446915e-05, |
|
"loss": 2.2972, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.668870439570969e-05, |
|
"loss": 2.319, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.665347784672788e-05, |
|
"loss": 2.3123, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.661825129774607e-05, |
|
"loss": 2.2893, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.6583024748764254e-05, |
|
"loss": 2.2594, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.654779819978244e-05, |
|
"loss": 2.2802, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.651257165080063e-05, |
|
"loss": 2.2758, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.647734510181882e-05, |
|
"loss": 2.2651, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.644211855283701e-05, |
|
"loss": 2.2667, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.6406892003855195e-05, |
|
"loss": 2.2427, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.637166545487339e-05, |
|
"loss": 2.2468, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.633643890589157e-05, |
|
"loss": 2.2605, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.630121235690976e-05, |
|
"loss": 2.2575, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.626598580792795e-05, |
|
"loss": 2.2217, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.6230759258946136e-05, |
|
"loss": 2.2353, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.619553270996432e-05, |
|
"loss": 2.2534, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.616030616098252e-05, |
|
"loss": 2.2287, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.61250796120007e-05, |
|
"loss": 2.2149, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.608985306301889e-05, |
|
"loss": 2.2124, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.605462651403708e-05, |
|
"loss": 2.2191, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.6019399965055265e-05, |
|
"loss": 2.1982, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.598417341607346e-05, |
|
"loss": 2.195, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.594894686709164e-05, |
|
"loss": 2.1845, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.591372031810983e-05, |
|
"loss": 2.1877, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.587849376912802e-05, |
|
"loss": 2.1718, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5843267220146206e-05, |
|
"loss": 2.1686, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.580804067116439e-05, |
|
"loss": 2.1746, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.5772814122182587e-05, |
|
"loss": 2.1597, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.573758757320077e-05, |
|
"loss": 2.1625, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.570236102421896e-05, |
|
"loss": 2.1407, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.566713447523715e-05, |
|
"loss": 2.1433, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.5631907926255334e-05, |
|
"loss": 2.1689, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.559668137727353e-05, |
|
"loss": 2.1465, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.556145482829171e-05, |
|
"loss": 2.1502, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.55262282793099e-05, |
|
"loss": 2.1188, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549100173032809e-05, |
|
"loss": 2.1514, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5455775181346275e-05, |
|
"loss": 2.1102, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.542054863236446e-05, |
|
"loss": 2.1252, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5385322083382656e-05, |
|
"loss": 2.1449, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.5350095534400836e-05, |
|
"loss": 2.1094, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.531486898541903e-05, |
|
"loss": 2.1142, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.527964243643722e-05, |
|
"loss": 2.1201, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.5244415887455404e-05, |
|
"loss": 2.098, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.52091893384736e-05, |
|
"loss": 2.0912, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.517396278949178e-05, |
|
"loss": 2.1082, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.513873624050997e-05, |
|
"loss": 2.1016, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.510350969152816e-05, |
|
"loss": 2.1133, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.5068283142546345e-05, |
|
"loss": 2.0862, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.503305659356453e-05, |
|
"loss": 2.0723, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.4997830044582725e-05, |
|
"loss": 2.0975, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.496260349560091e-05, |
|
"loss": 2.0834, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.49273769466191e-05, |
|
"loss": 2.0644, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.4892150397637286e-05, |
|
"loss": 2.088, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.485692384865547e-05, |
|
"loss": 2.0726, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.482169729967367e-05, |
|
"loss": 2.074, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.478647075069185e-05, |
|
"loss": 2.0736, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.475124420171004e-05, |
|
"loss": 2.056, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.471601765272823e-05, |
|
"loss": 2.0633, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4680791103746414e-05, |
|
"loss": 2.067, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.46455645547646e-05, |
|
"loss": 2.0556, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.4610338005782795e-05, |
|
"loss": 2.0451, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.457511145680098e-05, |
|
"loss": 2.0471, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.453988490781917e-05, |
|
"loss": 2.0763, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4504658358837356e-05, |
|
"loss": 2.0215, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.446943180985554e-05, |
|
"loss": 2.0481, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.4434205260873736e-05, |
|
"loss": 2.0259, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.439897871189192e-05, |
|
"loss": 2.036, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.436375216291011e-05, |
|
"loss": 2.0332, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.43285256139283e-05, |
|
"loss": 2.0387, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.4293299064946484e-05, |
|
"loss": 2.0454, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.425807251596467e-05, |
|
"loss": 2.0148, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.4222845966982864e-05, |
|
"loss": 2.0229, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.418761941800105e-05, |
|
"loss": 2.0375, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.415239286901924e-05, |
|
"loss": 2.0239, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.411716632003743e-05, |
|
"loss": 2.0194, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.408193977105561e-05, |
|
"loss": 2.0271, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.4046713222073806e-05, |
|
"loss": 2.012, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.401148667309199e-05, |
|
"loss": 2.0198, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.397626012411018e-05, |
|
"loss": 1.9945, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.3941033575128366e-05, |
|
"loss": 2.0004, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.390580702614655e-05, |
|
"loss": 2.0044, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.387058047716475e-05, |
|
"loss": 1.9829, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.3835353928182934e-05, |
|
"loss": 1.9796, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.380012737920112e-05, |
|
"loss": 1.9778, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.376490083021931e-05, |
|
"loss": 1.9972, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.37296742812375e-05, |
|
"loss": 2.0046, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.369444773225568e-05, |
|
"loss": 1.9844, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.3659221183273875e-05, |
|
"loss": 1.9935, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.362399463429206e-05, |
|
"loss": 1.9695, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.358876808531025e-05, |
|
"loss": 1.9717, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3553541536328436e-05, |
|
"loss": 1.9795, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.351831498734662e-05, |
|
"loss": 1.9717, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3483088438364816e-05, |
|
"loss": 1.9668, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.3447861889383003e-05, |
|
"loss": 1.9674, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.341263534040119e-05, |
|
"loss": 1.9854, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.337740879141938e-05, |
|
"loss": 1.9695, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.334218224243757e-05, |
|
"loss": 1.9542, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.330695569345575e-05, |
|
"loss": 1.9721, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.3271729144473945e-05, |
|
"loss": 1.9545, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.323650259549213e-05, |
|
"loss": 1.9559, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.320127604651032e-05, |
|
"loss": 1.9466, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.316604949752851e-05, |
|
"loss": 1.9532, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.313082294854669e-05, |
|
"loss": 1.9488, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.3095596399564886e-05, |
|
"loss": 1.9559, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.306036985058307e-05, |
|
"loss": 1.9372, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.302514330160126e-05, |
|
"loss": 1.944, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.298991675261945e-05, |
|
"loss": 1.9516, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.295469020363764e-05, |
|
"loss": 1.9562, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.291946365465582e-05, |
|
"loss": 1.9357, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.2884237105674014e-05, |
|
"loss": 1.9445, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.28490105566922e-05, |
|
"loss": 1.9514, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.281378400771039e-05, |
|
"loss": 1.9441, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.277855745872858e-05, |
|
"loss": 1.9301, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.274333090974676e-05, |
|
"loss": 1.9395, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2708104360764955e-05, |
|
"loss": 1.9468, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.267287781178314e-05, |
|
"loss": 1.9377, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.263765126280133e-05, |
|
"loss": 1.9116, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.2602424713819516e-05, |
|
"loss": 1.9144, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.256719816483771e-05, |
|
"loss": 1.922, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.253197161585589e-05, |
|
"loss": 1.9184, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.2496745066874084e-05, |
|
"loss": 1.9227, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.246151851789227e-05, |
|
"loss": 1.9251, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.242629196891046e-05, |
|
"loss": 1.8982, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.239106541992865e-05, |
|
"loss": 1.8947, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.235583887094683e-05, |
|
"loss": 1.9032, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.2320612321965025e-05, |
|
"loss": 1.9185, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.228538577298321e-05, |
|
"loss": 1.9126, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.22501592240014e-05, |
|
"loss": 1.8936, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.2214932675019586e-05, |
|
"loss": 1.9053, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.217970612603778e-05, |
|
"loss": 1.9096, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.214447957705596e-05, |
|
"loss": 1.9072, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.210925302807415e-05, |
|
"loss": 1.8868, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.207402647909234e-05, |
|
"loss": 1.8924, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.203879993011053e-05, |
|
"loss": 1.8976, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.200357338112872e-05, |
|
"loss": 1.8753, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.196834683214691e-05, |
|
"loss": 1.907, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.1933120283165094e-05, |
|
"loss": 1.8758, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.189789373418328e-05, |
|
"loss": 1.885, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.186266718520147e-05, |
|
"loss": 1.8507, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1827440636219655e-05, |
|
"loss": 1.8878, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.179221408723785e-05, |
|
"loss": 1.8847, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.175698753825603e-05, |
|
"loss": 1.8745, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.172176098927422e-05, |
|
"loss": 1.8778, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1686534440292416e-05, |
|
"loss": 1.8803, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1651307891310596e-05, |
|
"loss": 1.881, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.161608134232879e-05, |
|
"loss": 1.8846, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.158085479334698e-05, |
|
"loss": 1.8718, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1545628244365164e-05, |
|
"loss": 1.8804, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.151040169538335e-05, |
|
"loss": 1.8753, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.147517514640154e-05, |
|
"loss": 1.8816, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.1439948597419725e-05, |
|
"loss": 1.8634, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.140472204843792e-05, |
|
"loss": 1.8694, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1369495499456105e-05, |
|
"loss": 1.8779, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.133426895047429e-05, |
|
"loss": 1.8669, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1299042401492486e-05, |
|
"loss": 1.8656, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1263815852510666e-05, |
|
"loss": 1.8387, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.122858930352886e-05, |
|
"loss": 1.8518, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.1193362754547046e-05, |
|
"loss": 1.8348, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.115813620556523e-05, |
|
"loss": 1.8483, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.112290965658342e-05, |
|
"loss": 1.853, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.108768310760161e-05, |
|
"loss": 1.8376, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.1052456558619794e-05, |
|
"loss": 1.8561, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.101723000963799e-05, |
|
"loss": 1.8326, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.0982003460656175e-05, |
|
"loss": 1.8506, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.094677691167436e-05, |
|
"loss": 1.8433, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0911550362692555e-05, |
|
"loss": 1.8508, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0876323813710735e-05, |
|
"loss": 1.8493, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.084109726472893e-05, |
|
"loss": 1.8302, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0805870715747116e-05, |
|
"loss": 1.8398, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.07706441667653e-05, |
|
"loss": 1.8376, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.073541761778349e-05, |
|
"loss": 1.8452, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.070019106880168e-05, |
|
"loss": 1.8554, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0664964519819864e-05, |
|
"loss": 1.8459, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.062973797083806e-05, |
|
"loss": 1.84, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.0594511421856244e-05, |
|
"loss": 1.8279, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.055928487287443e-05, |
|
"loss": 1.8303, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0524058323892625e-05, |
|
"loss": 1.8323, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0488831774910805e-05, |
|
"loss": 1.8017, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0453605225929e-05, |
|
"loss": 1.8268, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.0418378676947185e-05, |
|
"loss": 1.8221, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.038315212796537e-05, |
|
"loss": 1.832, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.034792557898356e-05, |
|
"loss": 1.8366, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.0312699030001746e-05, |
|
"loss": 1.8313, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.027747248101994e-05, |
|
"loss": 1.8124, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.024224593203813e-05, |
|
"loss": 1.8144, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0207019383056314e-05, |
|
"loss": 1.8164, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.01717928340745e-05, |
|
"loss": 1.8316, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0136566285092694e-05, |
|
"loss": 1.8105, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.0101339736110874e-05, |
|
"loss": 1.8119, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.006611318712907e-05, |
|
"loss": 1.7914, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.0030886638147255e-05, |
|
"loss": 1.8251, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.999566008916544e-05, |
|
"loss": 1.8176, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.996043354018363e-05, |
|
"loss": 1.7962, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.9925206991201816e-05, |
|
"loss": 1.8147, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.988998044222001e-05, |
|
"loss": 1.8182, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.9854753893238196e-05, |
|
"loss": 1.7926, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.981952734425638e-05, |
|
"loss": 1.8024, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.978430079527457e-05, |
|
"loss": 1.7953, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9749074246292764e-05, |
|
"loss": 1.7986, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9713847697310944e-05, |
|
"loss": 1.7843, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.967862114832914e-05, |
|
"loss": 1.8076, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.9643394599347324e-05, |
|
"loss": 1.8062, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.960816805036551e-05, |
|
"loss": 1.7963, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9572941501383705e-05, |
|
"loss": 1.7824, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.953771495240189e-05, |
|
"loss": 1.7936, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.950248840342008e-05, |
|
"loss": 1.7937, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.9467261854438266e-05, |
|
"loss": 1.7844, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.943203530545645e-05, |
|
"loss": 1.7965, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.939680875647464e-05, |
|
"loss": 1.7957, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.936158220749283e-05, |
|
"loss": 1.7802, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.932635565851101e-05, |
|
"loss": 1.7885, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.929112910952921e-05, |
|
"loss": 1.7663, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.9255902560547394e-05, |
|
"loss": 1.7824, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.922067601156558e-05, |
|
"loss": 1.7829, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.9185449462583774e-05, |
|
"loss": 1.7797, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.915022291360196e-05, |
|
"loss": 1.7706, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.911499636462015e-05, |
|
"loss": 1.8029, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.9079769815638335e-05, |
|
"loss": 1.772, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.904454326665652e-05, |
|
"loss": 1.7736, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.900931671767471e-05, |
|
"loss": 1.7655, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.89740901686929e-05, |
|
"loss": 1.7748, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.893886361971108e-05, |
|
"loss": 1.791, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8903637070729276e-05, |
|
"loss": 1.7676, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.886841052174746e-05, |
|
"loss": 1.7681, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.883318397276565e-05, |
|
"loss": 1.7815, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.8797957423783844e-05, |
|
"loss": 1.7793, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.876273087480203e-05, |
|
"loss": 1.7727, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.872750432582022e-05, |
|
"loss": 1.7707, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.8692277776838405e-05, |
|
"loss": 1.7681, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.865705122785659e-05, |
|
"loss": 1.7605, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.862182467887478e-05, |
|
"loss": 1.7668, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.858659812989297e-05, |
|
"loss": 1.7638, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.855137158091115e-05, |
|
"loss": 1.7543, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.8516145031929346e-05, |
|
"loss": 1.7586, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.848091848294754e-05, |
|
"loss": 1.7403, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.844569193396572e-05, |
|
"loss": 1.769, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.841046538498391e-05, |
|
"loss": 1.7605, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.83752388360021e-05, |
|
"loss": 1.7572, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.834001228702029e-05, |
|
"loss": 1.7739, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.8304785738038474e-05, |
|
"loss": 1.7649, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.826955918905666e-05, |
|
"loss": 1.748, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.823433264007485e-05, |
|
"loss": 1.7544, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.819910609109304e-05, |
|
"loss": 1.7466, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.816387954211122e-05, |
|
"loss": 1.7447, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.8128652993129415e-05, |
|
"loss": 1.7424, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.809342644414761e-05, |
|
"loss": 1.7502, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.805819989516579e-05, |
|
"loss": 1.7462, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.802297334618398e-05, |
|
"loss": 1.7351, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.798774679720217e-05, |
|
"loss": 1.7531, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.795252024822036e-05, |
|
"loss": 1.7257, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7917293699238544e-05, |
|
"loss": 1.7429, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.788206715025673e-05, |
|
"loss": 1.7488, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.784684060127492e-05, |
|
"loss": 1.7516, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.781161405229311e-05, |
|
"loss": 1.741, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.77763875033113e-05, |
|
"loss": 1.7334, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.7741160954329485e-05, |
|
"loss": 1.7122, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.770593440534768e-05, |
|
"loss": 1.7641, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.767070785636586e-05, |
|
"loss": 1.7368, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.763548130738405e-05, |
|
"loss": 1.742, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.760025475840224e-05, |
|
"loss": 1.738, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.7565028209420426e-05, |
|
"loss": 1.7385, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.752980166043861e-05, |
|
"loss": 1.7339, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.749457511145681e-05, |
|
"loss": 1.7203, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.745934856247499e-05, |
|
"loss": 1.7333, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.742412201349318e-05, |
|
"loss": 1.734, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.738889546451137e-05, |
|
"loss": 1.736, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.7353668915529554e-05, |
|
"loss": 1.7323, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.731844236654775e-05, |
|
"loss": 1.7213, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.728321581756593e-05, |
|
"loss": 1.7312, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.724798926858412e-05, |
|
"loss": 1.7008, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.721276271960231e-05, |
|
"loss": 1.7275, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7177536170620496e-05, |
|
"loss": 1.7313, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.714230962163868e-05, |
|
"loss": 1.7141, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 3.7107083072656876e-05, |
|
"loss": 1.7279, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.7071856523675056e-05, |
|
"loss": 1.7198, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.703662997469325e-05, |
|
"loss": 1.696, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.700140342571144e-05, |
|
"loss": 1.714, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.6966176876729624e-05, |
|
"loss": 1.7134, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.693095032774782e-05, |
|
"loss": 1.7247, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6895723778766e-05, |
|
"loss": 1.7243, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.686049722978419e-05, |
|
"loss": 1.7078, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.682527068080238e-05, |
|
"loss": 1.7228, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.6790044131820565e-05, |
|
"loss": 1.715, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.675481758283875e-05, |
|
"loss": 1.6951, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6719591033856946e-05, |
|
"loss": 1.6982, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.668436448487513e-05, |
|
"loss": 1.7091, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.664913793589332e-05, |
|
"loss": 1.712, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.6613911386911506e-05, |
|
"loss": 1.7082, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.657868483792969e-05, |
|
"loss": 1.711, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.654345828894789e-05, |
|
"loss": 1.7219, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.650823173996607e-05, |
|
"loss": 1.7177, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.647300519098426e-05, |
|
"loss": 1.7017, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.643777864200245e-05, |
|
"loss": 1.7206, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6402552093020635e-05, |
|
"loss": 1.7188, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.636732554403882e-05, |
|
"loss": 1.6969, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6332098995057015e-05, |
|
"loss": 1.7136, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.62968724460752e-05, |
|
"loss": 1.7186, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.626164589709339e-05, |
|
"loss": 1.7099, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6226419348111576e-05, |
|
"loss": 1.7033, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.619119279912976e-05, |
|
"loss": 1.6896, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6155966250147956e-05, |
|
"loss": 1.7101, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6120739701166137e-05, |
|
"loss": 1.7098, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.608551315218433e-05, |
|
"loss": 1.6815, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.605028660320252e-05, |
|
"loss": 1.6913, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.6015060054220704e-05, |
|
"loss": 1.7059, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.59798335052389e-05, |
|
"loss": 1.6873, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.5944606956257085e-05, |
|
"loss": 1.6798, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.590938040727527e-05, |
|
"loss": 1.6992, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.587415385829346e-05, |
|
"loss": 1.6907, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5838927309311645e-05, |
|
"loss": 1.6852, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.580370076032983e-05, |
|
"loss": 1.6826, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.5768474211348026e-05, |
|
"loss": 1.6713, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5733247662366206e-05, |
|
"loss": 1.692, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.56980211133844e-05, |
|
"loss": 1.7061, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.566279456440259e-05, |
|
"loss": 1.6876, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.5627568015420774e-05, |
|
"loss": 1.6913, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.559234146643897e-05, |
|
"loss": 1.6888, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5557114917457154e-05, |
|
"loss": 1.6828, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.552188836847534e-05, |
|
"loss": 1.6915, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.548666181949353e-05, |
|
"loss": 1.6778, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5451435270511715e-05, |
|
"loss": 1.6868, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.54162087215299e-05, |
|
"loss": 1.6895, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5380982172548095e-05, |
|
"loss": 1.6719, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.534575562356628e-05, |
|
"loss": 1.6942, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.531052907458447e-05, |
|
"loss": 1.6821, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.5275302525602656e-05, |
|
"loss": 1.6694, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.524007597662084e-05, |
|
"loss": 1.6886, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.520484942763904e-05, |
|
"loss": 1.6635, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.5169622878657224e-05, |
|
"loss": 1.6591, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.513439632967541e-05, |
|
"loss": 1.6875, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.50991697806936e-05, |
|
"loss": 1.66, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.506394323171179e-05, |
|
"loss": 1.6815, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.502871668272997e-05, |
|
"loss": 1.6714, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.4993490133748165e-05, |
|
"loss": 1.6703, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.495826358476635e-05, |
|
"loss": 1.6772, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 3.492303703578454e-05, |
|
"loss": 1.6698, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.488781048680273e-05, |
|
"loss": 1.6638, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.485258393782091e-05, |
|
"loss": 1.6613, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.4817357388839106e-05, |
|
"loss": 1.6832, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.478213083985729e-05, |
|
"loss": 1.6443, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.474690429087548e-05, |
|
"loss": 1.6696, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.471167774189367e-05, |
|
"loss": 1.6726, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.467645119291186e-05, |
|
"loss": 1.6643, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.464122464393004e-05, |
|
"loss": 1.6555, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4605998094948234e-05, |
|
"loss": 1.6469, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.457077154596642e-05, |
|
"loss": 1.6534, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.453554499698461e-05, |
|
"loss": 1.6406, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.45003184480028e-05, |
|
"loss": 1.6616, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.446509189902098e-05, |
|
"loss": 1.6385, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.4429865350039176e-05, |
|
"loss": 1.6491, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.439463880105736e-05, |
|
"loss": 1.6511, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.435941225207555e-05, |
|
"loss": 1.6546, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4324185703093736e-05, |
|
"loss": 1.6623, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.428895915411193e-05, |
|
"loss": 1.6536, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.425373260513011e-05, |
|
"loss": 1.6487, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.4218506056148304e-05, |
|
"loss": 1.6546, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.418327950716649e-05, |
|
"loss": 1.6564, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.414805295818468e-05, |
|
"loss": 1.655, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.411282640920287e-05, |
|
"loss": 1.6562, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.407759986022105e-05, |
|
"loss": 1.645, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.4042373311239245e-05, |
|
"loss": 1.6406, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.400714676225743e-05, |
|
"loss": 1.6181, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.397192021327562e-05, |
|
"loss": 1.648, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.3936693664293806e-05, |
|
"loss": 1.6429, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3901467115312e-05, |
|
"loss": 1.6285, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.386624056633018e-05, |
|
"loss": 1.6624, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.383101401734837e-05, |
|
"loss": 1.6395, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.379578746836656e-05, |
|
"loss": 1.6601, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.376056091938475e-05, |
|
"loss": 1.6492, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.372533437040294e-05, |
|
"loss": 1.6444, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.369010782142112e-05, |
|
"loss": 1.6685, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.3654881272439315e-05, |
|
"loss": 1.644, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.36196547234575e-05, |
|
"loss": 1.6401, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.358442817447569e-05, |
|
"loss": 1.6424, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.3549201625493875e-05, |
|
"loss": 1.6277, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.351397507651207e-05, |
|
"loss": 1.6495, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.347874852753025e-05, |
|
"loss": 1.6405, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.344352197854844e-05, |
|
"loss": 1.6408, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.340829542956663e-05, |
|
"loss": 1.6469, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.3373068880584817e-05, |
|
"loss": 1.6347, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.333784233160301e-05, |
|
"loss": 1.6348, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.330261578262119e-05, |
|
"loss": 1.6232, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.3267389233639384e-05, |
|
"loss": 1.6322, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.323216268465757e-05, |
|
"loss": 1.6263, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.319693613567576e-05, |
|
"loss": 1.6207, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3161709586693945e-05, |
|
"loss": 1.6282, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.312648303771214e-05, |
|
"loss": 1.6293, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.3091256488730325e-05, |
|
"loss": 1.6323, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.305602993974851e-05, |
|
"loss": 1.6343, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.30208033907667e-05, |
|
"loss": 1.6286, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.2985576841784886e-05, |
|
"loss": 1.6234, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.295035029280308e-05, |
|
"loss": 1.6215, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.291512374382127e-05, |
|
"loss": 1.6327, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2879897194839454e-05, |
|
"loss": 1.6202, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.284467064585764e-05, |
|
"loss": 1.6296, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.280944409687583e-05, |
|
"loss": 1.6309, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.2774217547894014e-05, |
|
"loss": 1.6442, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.273899099891221e-05, |
|
"loss": 1.6292, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2703764449930395e-05, |
|
"loss": 1.6409, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.266853790094858e-05, |
|
"loss": 1.6236, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2633311351966775e-05, |
|
"loss": 1.6235, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.2598084802984956e-05, |
|
"loss": 1.6319, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.256285825400315e-05, |
|
"loss": 1.6215, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.2527631705021336e-05, |
|
"loss": 1.6065, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.249240515603952e-05, |
|
"loss": 1.6236, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.245717860705771e-05, |
|
"loss": 1.5967, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.24219520580759e-05, |
|
"loss": 1.6094, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.238672550909409e-05, |
|
"loss": 1.6157, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.235149896011228e-05, |
|
"loss": 1.6229, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.2316272411130464e-05, |
|
"loss": 1.6157, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.228104586214865e-05, |
|
"loss": 1.6239, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2245819313166845e-05, |
|
"loss": 1.6115, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2210592764185025e-05, |
|
"loss": 1.613, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.217536621520322e-05, |
|
"loss": 1.6138, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2140139666221406e-05, |
|
"loss": 1.6095, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.210491311723959e-05, |
|
"loss": 1.6144, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.206968656825778e-05, |
|
"loss": 1.6083, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.2034460019275966e-05, |
|
"loss": 1.6129, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.199923347029416e-05, |
|
"loss": 1.6139, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.196400692131235e-05, |
|
"loss": 1.612, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.1928780372330534e-05, |
|
"loss": 1.6092, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.189355382334872e-05, |
|
"loss": 1.6196, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1858327274366914e-05, |
|
"loss": 1.6083, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1823100725385094e-05, |
|
"loss": 1.6137, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.178787417640329e-05, |
|
"loss": 1.6011, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1752647627421475e-05, |
|
"loss": 1.5961, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.171742107843966e-05, |
|
"loss": 1.5967, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.168219452945785e-05, |
|
"loss": 1.6007, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1646967980476036e-05, |
|
"loss": 1.5962, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.161174143149423e-05, |
|
"loss": 1.5978, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1576514882512416e-05, |
|
"loss": 1.6136, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.15412883335306e-05, |
|
"loss": 1.599, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.150606178454879e-05, |
|
"loss": 1.6165, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1470835235566984e-05, |
|
"loss": 1.6045, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1435608686585164e-05, |
|
"loss": 1.6059, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.140038213760336e-05, |
|
"loss": 1.6107, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1365155588621545e-05, |
|
"loss": 1.6015, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.132992903963973e-05, |
|
"loss": 1.5839, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1294702490657925e-05, |
|
"loss": 1.6052, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.1259475941676105e-05, |
|
"loss": 1.6105, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.12242493926943e-05, |
|
"loss": 1.5993, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.1189022843712486e-05, |
|
"loss": 1.5978, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.115379629473067e-05, |
|
"loss": 1.5906, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.111856974574886e-05, |
|
"loss": 1.6163, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.108334319676705e-05, |
|
"loss": 1.5865, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.1048116647785233e-05, |
|
"loss": 1.5982, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.101289009880343e-05, |
|
"loss": 1.5857, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.0977663549821614e-05, |
|
"loss": 1.5938, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.09424370008398e-05, |
|
"loss": 1.5846, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.0907210451857995e-05, |
|
"loss": 1.5827, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.087198390287618e-05, |
|
"loss": 1.585, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.083675735389437e-05, |
|
"loss": 1.6006, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.0801530804912555e-05, |
|
"loss": 1.5951, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.076630425593074e-05, |
|
"loss": 1.5691, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.073107770694893e-05, |
|
"loss": 1.6121, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.069585115796712e-05, |
|
"loss": 1.5855, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.06606246089853e-05, |
|
"loss": 1.5982, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.0625398060003497e-05, |
|
"loss": 1.5991, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.0590171511021684e-05, |
|
"loss": 1.5902, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.055494496203987e-05, |
|
"loss": 1.5886, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0519718413058064e-05, |
|
"loss": 1.578, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0484491864076248e-05, |
|
"loss": 1.584, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 3.0449265315094438e-05, |
|
"loss": 1.5859, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0414038766112625e-05, |
|
"loss": 1.5647, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0378812217130815e-05, |
|
"loss": 1.5826, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0343585668149e-05, |
|
"loss": 1.5689, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.030835911916719e-05, |
|
"loss": 1.5738, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0273132570185376e-05, |
|
"loss": 1.5774, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0237906021203566e-05, |
|
"loss": 1.5727, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0202679472221756e-05, |
|
"loss": 1.5823, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0167452923239943e-05, |
|
"loss": 1.59, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0132226374258134e-05, |
|
"loss": 1.5751, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0096999825276317e-05, |
|
"loss": 1.5672, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.0061773276294507e-05, |
|
"loss": 1.571, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.0026546727312694e-05, |
|
"loss": 1.5825, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9991320178330885e-05, |
|
"loss": 1.5836, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.9956093629349068e-05, |
|
"loss": 1.5874, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.992086708036726e-05, |
|
"loss": 1.5686, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9885640531385445e-05, |
|
"loss": 1.5852, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9850413982403636e-05, |
|
"loss": 1.5594, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9815187433421826e-05, |
|
"loss": 1.5611, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.9779960884440013e-05, |
|
"loss": 1.5729, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9744734335458203e-05, |
|
"loss": 1.5689, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9709507786476387e-05, |
|
"loss": 1.5816, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9674281237494577e-05, |
|
"loss": 1.5728, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9639054688512764e-05, |
|
"loss": 1.5673, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9603828139530954e-05, |
|
"loss": 1.5657, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9568601590549138e-05, |
|
"loss": 1.564, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9533375041567328e-05, |
|
"loss": 1.5606, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.949814849258552e-05, |
|
"loss": 1.5582, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9462921943603705e-05, |
|
"loss": 1.5718, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.9427695394621895e-05, |
|
"loss": 1.5769, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9392468845640082e-05, |
|
"loss": 1.5695, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9357242296658273e-05, |
|
"loss": 1.5549, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9322015747676456e-05, |
|
"loss": 1.5715, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9286789198694646e-05, |
|
"loss": 1.5507, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9251562649712833e-05, |
|
"loss": 1.5762, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9216336100731024e-05, |
|
"loss": 1.5684, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.9181109551749207e-05, |
|
"loss": 1.5616, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.91458830027674e-05, |
|
"loss": 1.5824, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.911065645378559e-05, |
|
"loss": 1.5609, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9075429904803775e-05, |
|
"loss": 1.5675, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9040203355821965e-05, |
|
"loss": 1.5785, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9004976806840152e-05, |
|
"loss": 1.5612, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8969750257858342e-05, |
|
"loss": 1.5634, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.8934523708876526e-05, |
|
"loss": 1.5721, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8899297159894716e-05, |
|
"loss": 1.5641, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8864070610912903e-05, |
|
"loss": 1.5598, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8828844061931093e-05, |
|
"loss": 1.5645, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8793617512949283e-05, |
|
"loss": 1.5563, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.875839096396747e-05, |
|
"loss": 1.5488, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.872316441498566e-05, |
|
"loss": 1.565, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8687937866003844e-05, |
|
"loss": 1.5612, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.8652711317022034e-05, |
|
"loss": 1.5537, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.861748476804022e-05, |
|
"loss": 1.5709, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.858225821905841e-05, |
|
"loss": 1.5456, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8547031670076595e-05, |
|
"loss": 1.5473, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8511805121094785e-05, |
|
"loss": 1.5592, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8476578572112972e-05, |
|
"loss": 1.5503, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8441352023131162e-05, |
|
"loss": 1.5577, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8406125474149353e-05, |
|
"loss": 1.5548, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.837089892516754e-05, |
|
"loss": 1.5601, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.833567237618573e-05, |
|
"loss": 1.556, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8300445827203913e-05, |
|
"loss": 1.5495, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8265219278222104e-05, |
|
"loss": 1.5504, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.822999272924029e-05, |
|
"loss": 1.5581, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.819476618025848e-05, |
|
"loss": 1.5516, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8159539631276664e-05, |
|
"loss": 1.543, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.8124313082294855e-05, |
|
"loss": 1.555, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.808908653331304e-05, |
|
"loss": 1.5422, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8053859984331232e-05, |
|
"loss": 1.5572, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.8018633435349422e-05, |
|
"loss": 1.5592, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.798340688636761e-05, |
|
"loss": 1.5585, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.79481803373858e-05, |
|
"loss": 1.54, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7912953788403983e-05, |
|
"loss": 1.5433, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7877727239422173e-05, |
|
"loss": 1.5444, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.784250069044036e-05, |
|
"loss": 1.5474, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.780727414145855e-05, |
|
"loss": 1.557, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 2.7772047592476734e-05, |
|
"loss": 1.5388, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7736821043494928e-05, |
|
"loss": 1.5422, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7701594494513118e-05, |
|
"loss": 1.55, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.76663679455313e-05, |
|
"loss": 1.5508, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.7631141396549492e-05, |
|
"loss": 1.5562, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.759591484756768e-05, |
|
"loss": 1.5494, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.756068829858587e-05, |
|
"loss": 1.5501, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7525461749604052e-05, |
|
"loss": 1.5343, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7490235200622243e-05, |
|
"loss": 1.5422, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.745500865164043e-05, |
|
"loss": 1.5431, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.741978210265862e-05, |
|
"loss": 1.5465, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7384555553676803e-05, |
|
"loss": 1.5319, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7349329004694997e-05, |
|
"loss": 1.5447, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.7314102455713187e-05, |
|
"loss": 1.5526, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.727887590673137e-05, |
|
"loss": 1.5406, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.724364935774956e-05, |
|
"loss": 1.5548, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7208422808767748e-05, |
|
"loss": 1.5251, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.717319625978594e-05, |
|
"loss": 1.5489, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7137969710804122e-05, |
|
"loss": 1.5399, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.7102743161822312e-05, |
|
"loss": 1.5326, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.70675166128405e-05, |
|
"loss": 1.539, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.703229006385869e-05, |
|
"loss": 1.533, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6997063514876876e-05, |
|
"loss": 1.5366, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6961836965895067e-05, |
|
"loss": 1.5315, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.6926610416913257e-05, |
|
"loss": 1.5349, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.689138386793144e-05, |
|
"loss": 1.5236, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.685615731894963e-05, |
|
"loss": 1.529, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6820930769967818e-05, |
|
"loss": 1.5363, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.6785704220986008e-05, |
|
"loss": 1.5335, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.675047767200419e-05, |
|
"loss": 1.5194, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6715251123022385e-05, |
|
"loss": 1.5217, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.668002457404057e-05, |
|
"loss": 1.527, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.664479802505876e-05, |
|
"loss": 1.5272, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.660957147607695e-05, |
|
"loss": 1.5252, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6574344927095136e-05, |
|
"loss": 1.527, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6539118378113326e-05, |
|
"loss": 1.5201, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.650389182913151e-05, |
|
"loss": 1.53, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.64686652801497e-05, |
|
"loss": 1.5257, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.6433438731167887e-05, |
|
"loss": 1.5265, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6398212182186077e-05, |
|
"loss": 1.5212, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.636298563320426e-05, |
|
"loss": 1.528, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6327759084222455e-05, |
|
"loss": 1.5323, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.6292532535240638e-05, |
|
"loss": 1.5255, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.625730598625883e-05, |
|
"loss": 1.5186, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.622207943727702e-05, |
|
"loss": 1.5431, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6186852888295206e-05, |
|
"loss": 1.5162, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6151626339313396e-05, |
|
"loss": 1.5148, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.611639979033158e-05, |
|
"loss": 1.5157, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.608117324134977e-05, |
|
"loss": 1.5377, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6045946692367957e-05, |
|
"loss": 1.5207, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.6010720143386147e-05, |
|
"loss": 1.5013, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.5975493594404334e-05, |
|
"loss": 1.5212, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.5940267045422524e-05, |
|
"loss": 1.531, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5905040496440714e-05, |
|
"loss": 1.5161, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5869813947458898e-05, |
|
"loss": 1.5135, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5834587398477088e-05, |
|
"loss": 1.5253, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5799360849495275e-05, |
|
"loss": 1.5274, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5764134300513465e-05, |
|
"loss": 1.5188, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.572890775153165e-05, |
|
"loss": 1.5449, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.569368120254984e-05, |
|
"loss": 1.51, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5658454653568026e-05, |
|
"loss": 1.5042, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5623228104586216e-05, |
|
"loss": 1.5027, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.5588001555604403e-05, |
|
"loss": 1.522, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5552775006622593e-05, |
|
"loss": 1.5235, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5517548457640784e-05, |
|
"loss": 1.5245, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5482321908658967e-05, |
|
"loss": 1.5253, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.5447095359677158e-05, |
|
"loss": 1.5275, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5411868810695344e-05, |
|
"loss": 1.5279, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5376642261713535e-05, |
|
"loss": 1.5291, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5341415712731718e-05, |
|
"loss": 1.5053, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5306189163749912e-05, |
|
"loss": 1.5128, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5270962614768095e-05, |
|
"loss": 1.5131, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5235736065786286e-05, |
|
"loss": 1.5201, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5200509516804473e-05, |
|
"loss": 1.5065, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5165282967822663e-05, |
|
"loss": 1.5378, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5130056418840853e-05, |
|
"loss": 1.5125, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5094829869859037e-05, |
|
"loss": 1.5002, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5059603320877227e-05, |
|
"loss": 1.511, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5024376771895414e-05, |
|
"loss": 1.5093, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.4989150222913604e-05, |
|
"loss": 1.4916, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.495392367393179e-05, |
|
"loss": 1.5165, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.491869712494998e-05, |
|
"loss": 1.5244, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.488347057596817e-05, |
|
"loss": 1.5193, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4848244026986355e-05, |
|
"loss": 1.5006, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4813017478004542e-05, |
|
"loss": 1.5149, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4777790929022732e-05, |
|
"loss": 1.5209, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.474256438004092e-05, |
|
"loss": 1.5119, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4707337831059106e-05, |
|
"loss": 1.5099, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4672111282077297e-05, |
|
"loss": 1.5027, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4636884733095487e-05, |
|
"loss": 1.5085, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.4601658184113674e-05, |
|
"loss": 1.5088, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.456643163513186e-05, |
|
"loss": 1.5116, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.453120508615005e-05, |
|
"loss": 1.5279, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4495978537168238e-05, |
|
"loss": 1.5016, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4460751988186425e-05, |
|
"loss": 1.5017, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4425525439204615e-05, |
|
"loss": 1.5148, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4390298890222802e-05, |
|
"loss": 1.4919, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.435507234124099e-05, |
|
"loss": 1.5061, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.4319845792259176e-05, |
|
"loss": 1.5023, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.428461924327737e-05, |
|
"loss": 1.4962, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4249392694295556e-05, |
|
"loss": 1.4906, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4214166145313743e-05, |
|
"loss": 1.5112, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.417893959633193e-05, |
|
"loss": 1.5041, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.414371304735012e-05, |
|
"loss": 1.4872, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2.4108486498368307e-05, |
|
"loss": 1.5038, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4073259949386494e-05, |
|
"loss": 1.4959, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4038033400404684e-05, |
|
"loss": 1.4873, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.400280685142287e-05, |
|
"loss": 1.5008, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.3967580302441058e-05, |
|
"loss": 1.5, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.393235375345925e-05, |
|
"loss": 1.5061, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.389712720447744e-05, |
|
"loss": 1.4817, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3861900655495626e-05, |
|
"loss": 1.5042, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3826674106513813e-05, |
|
"loss": 1.4949, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.3791447557532e-05, |
|
"loss": 1.4928, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.375622100855019e-05, |
|
"loss": 1.4913, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3720994459568377e-05, |
|
"loss": 1.4902, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3685767910586564e-05, |
|
"loss": 1.4975, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.3650541361604754e-05, |
|
"loss": 1.4784, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2.361531481262294e-05, |
|
"loss": 1.4918, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.358008826364113e-05, |
|
"loss": 1.4811, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3544861714659318e-05, |
|
"loss": 1.4877, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.350963516567751e-05, |
|
"loss": 1.4916, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3474408616695695e-05, |
|
"loss": 1.4884, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.3439182067713882e-05, |
|
"loss": 1.496, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3403955518732072e-05, |
|
"loss": 1.4891, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.336872896975026e-05, |
|
"loss": 1.4932, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3333502420768446e-05, |
|
"loss": 1.4884, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3298275871786633e-05, |
|
"loss": 1.4978, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.3263049322804823e-05, |
|
"loss": 1.4961, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.322782277382301e-05, |
|
"loss": 1.4952, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.31925962248412e-05, |
|
"loss": 1.4837, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3157369675859388e-05, |
|
"loss": 1.4911, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3122143126877578e-05, |
|
"loss": 1.486, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3086916577895765e-05, |
|
"loss": 1.4972, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.305169002891395e-05, |
|
"loss": 1.489, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3016463479932142e-05, |
|
"loss": 1.4957, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.298123693095033e-05, |
|
"loss": 1.4988, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.2946010381968516e-05, |
|
"loss": 1.4827, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2910783832986703e-05, |
|
"loss": 1.4883, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2875557284004893e-05, |
|
"loss": 1.49, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2840330735023083e-05, |
|
"loss": 1.4924, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.280510418604127e-05, |
|
"loss": 1.4747, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 2.2769877637059457e-05, |
|
"loss": 1.4842, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2734651088077647e-05, |
|
"loss": 1.4707, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.2699424539095834e-05, |
|
"loss": 1.472, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.266419799011402e-05, |
|
"loss": 1.4679, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.262897144113221e-05, |
|
"loss": 1.4705, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.25937448921504e-05, |
|
"loss": 1.4907, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2558518343168585e-05, |
|
"loss": 1.4825, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2523291794186776e-05, |
|
"loss": 1.4465, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2488065245204966e-05, |
|
"loss": 1.4926, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2452838696223153e-05, |
|
"loss": 1.4968, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.241761214724134e-05, |
|
"loss": 1.4676, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2382385598259527e-05, |
|
"loss": 1.4883, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2347159049277717e-05, |
|
"loss": 1.4776, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.2311932500295904e-05, |
|
"loss": 1.4942, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 2.227670595131409e-05, |
|
"loss": 1.48, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.224147940233228e-05, |
|
"loss": 1.4812, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2206252853350468e-05, |
|
"loss": 1.4918, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2171026304368655e-05, |
|
"loss": 1.4789, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2135799755386845e-05, |
|
"loss": 1.4723, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2100573206405035e-05, |
|
"loss": 1.4703, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.2065346657423222e-05, |
|
"loss": 1.474, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.203012010844141e-05, |
|
"loss": 1.4634, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.19948935594596e-05, |
|
"loss": 1.4582, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.1959667010477786e-05, |
|
"loss": 1.4758, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.1924440461495973e-05, |
|
"loss": 1.4639, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.188921391251416e-05, |
|
"loss": 1.4959, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.185398736353235e-05, |
|
"loss": 1.4849, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1818760814550537e-05, |
|
"loss": 1.4696, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1783534265568728e-05, |
|
"loss": 1.4885, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1748307716586914e-05, |
|
"loss": 1.4798, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1713081167605105e-05, |
|
"loss": 1.4811, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.167785461862329e-05, |
|
"loss": 1.4584, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.164262806964148e-05, |
|
"loss": 1.4638, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.160740152065967e-05, |
|
"loss": 1.4589, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1572174971677856e-05, |
|
"loss": 1.4767, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1536948422696043e-05, |
|
"loss": 1.4618, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.150172187371423e-05, |
|
"loss": 1.4772, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.146649532473242e-05, |
|
"loss": 1.4819, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 2.1431268775750607e-05, |
|
"loss": 1.466, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1396042226768797e-05, |
|
"loss": 1.4859, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1360815677786984e-05, |
|
"loss": 1.4642, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1325589128805174e-05, |
|
"loss": 1.4733, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.129036257982336e-05, |
|
"loss": 1.4645, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.1255136030841548e-05, |
|
"loss": 1.4748, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.121990948185974e-05, |
|
"loss": 1.4548, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1184682932877925e-05, |
|
"loss": 1.4783, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1149456383896112e-05, |
|
"loss": 1.4683, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1114229834914302e-05, |
|
"loss": 1.4649, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.107900328593249e-05, |
|
"loss": 1.4684, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.104377673695068e-05, |
|
"loss": 1.4618, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.1008550187968867e-05, |
|
"loss": 1.445, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0973323638987057e-05, |
|
"loss": 1.4678, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.0938097090005244e-05, |
|
"loss": 1.4766, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.090287054102343e-05, |
|
"loss": 1.4685, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0867643992041618e-05, |
|
"loss": 1.4731, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0832417443059808e-05, |
|
"loss": 1.4512, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0797190894077995e-05, |
|
"loss": 1.4641, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.076196434509618e-05, |
|
"loss": 1.4738, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0726737796114372e-05, |
|
"loss": 1.4527, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0691511247132562e-05, |
|
"loss": 1.4633, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.065628469815075e-05, |
|
"loss": 1.4489, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0621058149168936e-05, |
|
"loss": 1.4547, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.0585831600187126e-05, |
|
"loss": 1.4494, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0550605051205313e-05, |
|
"loss": 1.46, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.05153785022235e-05, |
|
"loss": 1.4733, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0480151953241687e-05, |
|
"loss": 1.4869, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.0444925404259877e-05, |
|
"loss": 1.4579, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0409698855278064e-05, |
|
"loss": 1.4617, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.037447230629625e-05, |
|
"loss": 1.4677, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.033924575731444e-05, |
|
"loss": 1.4645, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.030401920833263e-05, |
|
"loss": 1.4448, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.026879265935082e-05, |
|
"loss": 1.4605, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0233566110369005e-05, |
|
"loss": 1.4556, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0198339561387196e-05, |
|
"loss": 1.4718, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.0163113012405383e-05, |
|
"loss": 1.468, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.012788646342357e-05, |
|
"loss": 1.461, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 2.009265991444176e-05, |
|
"loss": 1.4572, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0057433365459947e-05, |
|
"loss": 1.4566, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0022206816478134e-05, |
|
"loss": 1.4311, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9986980267496324e-05, |
|
"loss": 1.4517, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9951753718514514e-05, |
|
"loss": 1.4416, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.99165271695327e-05, |
|
"loss": 1.4488, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9881300620550888e-05, |
|
"loss": 1.4693, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9846074071569075e-05, |
|
"loss": 1.4516, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9810847522587265e-05, |
|
"loss": 1.4516, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.9775620973605452e-05, |
|
"loss": 1.4366, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.974039442462364e-05, |
|
"loss": 1.4532, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.970516787564183e-05, |
|
"loss": 1.4574, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9669941326660016e-05, |
|
"loss": 1.4513, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9634714777678203e-05, |
|
"loss": 1.4371, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 1.9599488228696393e-05, |
|
"loss": 1.4585, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9564261679714584e-05, |
|
"loss": 1.4605, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.952903513073277e-05, |
|
"loss": 1.4374, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9493808581750958e-05, |
|
"loss": 1.4419, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9458582032769144e-05, |
|
"loss": 1.4401, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9423355483787335e-05, |
|
"loss": 1.4515, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.938812893480552e-05, |
|
"loss": 1.4376, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.935290238582371e-05, |
|
"loss": 1.4439, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.93176758368419e-05, |
|
"loss": 1.4305, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9282449287860086e-05, |
|
"loss": 1.4611, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9247222738878276e-05, |
|
"loss": 1.4564, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9211996189896463e-05, |
|
"loss": 1.4424, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9176769640914653e-05, |
|
"loss": 1.4541, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.914154309193284e-05, |
|
"loss": 1.4338, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.9106316542951027e-05, |
|
"loss": 1.4419, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9071089993969214e-05, |
|
"loss": 1.4363, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.9035863444987404e-05, |
|
"loss": 1.4404, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.900063689600559e-05, |
|
"loss": 1.4442, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8965410347023778e-05, |
|
"loss": 1.442, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8930183798041968e-05, |
|
"loss": 1.4436, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.889495724906016e-05, |
|
"loss": 1.4399, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8859730700078345e-05, |
|
"loss": 1.4423, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8824504151096532e-05, |
|
"loss": 1.4486, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.8789277602114723e-05, |
|
"loss": 1.443, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.875405105313291e-05, |
|
"loss": 1.4428, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8718824504151096e-05, |
|
"loss": 1.4387, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8683597955169287e-05, |
|
"loss": 1.44, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8648371406187474e-05, |
|
"loss": 1.4466, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.861314485720566e-05, |
|
"loss": 1.434, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8577918308223847e-05, |
|
"loss": 1.4469, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.854269175924204e-05, |
|
"loss": 1.44, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8507465210260228e-05, |
|
"loss": 1.4498, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8472238661278415e-05, |
|
"loss": 1.4347, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8437012112296602e-05, |
|
"loss": 1.4467, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8401785563314792e-05, |
|
"loss": 1.4052, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.836655901433298e-05, |
|
"loss": 1.4406, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8331332465351166e-05, |
|
"loss": 1.4246, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8296105916369356e-05, |
|
"loss": 1.4468, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.8260879367387543e-05, |
|
"loss": 1.4283, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.822565281840573e-05, |
|
"loss": 1.4526, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.819042626942392e-05, |
|
"loss": 1.4453, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.815519972044211e-05, |
|
"loss": 1.4303, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8119973171460298e-05, |
|
"loss": 1.4492, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.8084746622478484e-05, |
|
"loss": 1.4505, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.804952007349667e-05, |
|
"loss": 1.4485, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.801429352451486e-05, |
|
"loss": 1.4376, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.797906697553305e-05, |
|
"loss": 1.4448, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7943840426551235e-05, |
|
"loss": 1.4302, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7908613877569426e-05, |
|
"loss": 1.4317, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7873387328587613e-05, |
|
"loss": 1.431, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.78381607796058e-05, |
|
"loss": 1.426, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.780293423062399e-05, |
|
"loss": 1.4361, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.776770768164218e-05, |
|
"loss": 1.4313, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7732481132660367e-05, |
|
"loss": 1.4232, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7697254583678554e-05, |
|
"loss": 1.4349, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7662028034696744e-05, |
|
"loss": 1.4386, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.762680148571493e-05, |
|
"loss": 1.4444, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7591574936733118e-05, |
|
"loss": 1.4377, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7556348387751305e-05, |
|
"loss": 1.4426, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7521121838769495e-05, |
|
"loss": 1.4318, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7485895289787682e-05, |
|
"loss": 1.4234, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.7450668740805872e-05, |
|
"loss": 1.4333, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.741544219182406e-05, |
|
"loss": 1.4448, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.738021564284225e-05, |
|
"loss": 1.4262, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7344989093860436e-05, |
|
"loss": 1.44, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7309762544878623e-05, |
|
"loss": 1.4228, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7274535995896814e-05, |
|
"loss": 1.4315, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7239309446915e-05, |
|
"loss": 1.4252, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7204082897933187e-05, |
|
"loss": 1.4299, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7168856348951374e-05, |
|
"loss": 1.4385, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7133629799969565e-05, |
|
"loss": 1.4411, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7098403250987755e-05, |
|
"loss": 1.4311, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7063176702005942e-05, |
|
"loss": 1.4331, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.702795015302413e-05, |
|
"loss": 1.4274, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.699272360404232e-05, |
|
"loss": 1.4265, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6957497055060506e-05, |
|
"loss": 1.4212, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.6922270506078693e-05, |
|
"loss": 1.4217, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6887043957096883e-05, |
|
"loss": 1.4398, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.685181740811507e-05, |
|
"loss": 1.4163, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6816590859133257e-05, |
|
"loss": 1.4322, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6781364310151447e-05, |
|
"loss": 1.42, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6746137761169638e-05, |
|
"loss": 1.4289, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.6710911212187824e-05, |
|
"loss": 1.4228, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.667568466320601e-05, |
|
"loss": 1.4242, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.66404581142242e-05, |
|
"loss": 1.4102, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.660523156524239e-05, |
|
"loss": 1.4383, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6570005016260575e-05, |
|
"loss": 1.4212, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6534778467278762e-05, |
|
"loss": 1.4139, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6499551918296953e-05, |
|
"loss": 1.4221, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.646432536931514e-05, |
|
"loss": 1.4325, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.6429098820333326e-05, |
|
"loss": 1.4284, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6393872271351517e-05, |
|
"loss": 1.4145, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6358645722369707e-05, |
|
"loss": 1.4103, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6323419173387894e-05, |
|
"loss": 1.428, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.628819262440608e-05, |
|
"loss": 1.4146, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.625296607542427e-05, |
|
"loss": 1.3936, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6217739526442458e-05, |
|
"loss": 1.4026, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6182512977460645e-05, |
|
"loss": 1.409, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6147286428478832e-05, |
|
"loss": 1.4209, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.6112059879497022e-05, |
|
"loss": 1.3949, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.607683333051521e-05, |
|
"loss": 1.4167, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6041606781533396e-05, |
|
"loss": 1.4049, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6006380232551586e-05, |
|
"loss": 1.4066, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.5971153683569776e-05, |
|
"loss": 1.417, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.5935927134587963e-05, |
|
"loss": 1.4069, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.590070058560615e-05, |
|
"loss": 1.4171, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.586547403662434e-05, |
|
"loss": 1.4185, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5830247487642527e-05, |
|
"loss": 1.416, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.5795020938660714e-05, |
|
"loss": 1.4088, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.57597943896789e-05, |
|
"loss": 1.4141, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.572456784069709e-05, |
|
"loss": 1.413, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.568934129171528e-05, |
|
"loss": 1.4033, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.565411474273347e-05, |
|
"loss": 1.4191, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5618888193751656e-05, |
|
"loss": 1.4068, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 1.5583661644769846e-05, |
|
"loss": 1.3959, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5548435095788033e-05, |
|
"loss": 1.4259, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.551320854680622e-05, |
|
"loss": 1.4033, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.547798199782441e-05, |
|
"loss": 1.4058, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5442755448842597e-05, |
|
"loss": 1.3953, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5407528899860784e-05, |
|
"loss": 1.3968, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5372302350878974e-05, |
|
"loss": 1.4198, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.533707580189716e-05, |
|
"loss": 1.413, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.530184925291535e-05, |
|
"loss": 1.399, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.5266622703933538e-05, |
|
"loss": 1.4051, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5231396154951727e-05, |
|
"loss": 1.4136, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5196169605969915e-05, |
|
"loss": 1.4133, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5160943056988102e-05, |
|
"loss": 1.4004, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5125716508006291e-05, |
|
"loss": 1.3953, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 1.5090489959024478e-05, |
|
"loss": 1.4181, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5055263410042666e-05, |
|
"loss": 1.4009, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5020036861060855e-05, |
|
"loss": 1.3945, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4984810312079042e-05, |
|
"loss": 1.4047, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.4949583763097232e-05, |
|
"loss": 1.4041, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.491435721411542e-05, |
|
"loss": 1.4129, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.487913066513361e-05, |
|
"loss": 1.4148, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4843904116151796e-05, |
|
"loss": 1.4022, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4808677567169985e-05, |
|
"loss": 1.4081, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 1.4773451018188172e-05, |
|
"loss": 1.4085, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.473822446920636e-05, |
|
"loss": 1.3954, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4702997920224549e-05, |
|
"loss": 1.3933, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4667771371242736e-05, |
|
"loss": 1.415, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4632544822260925e-05, |
|
"loss": 1.4132, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4597318273279111e-05, |
|
"loss": 1.3878, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4562091724297303e-05, |
|
"loss": 1.3973, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.452686517531549e-05, |
|
"loss": 1.3923, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4491638626333679e-05, |
|
"loss": 1.398, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4456412077351866e-05, |
|
"loss": 1.3871, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4421185528370054e-05, |
|
"loss": 1.3744, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4385958979388241e-05, |
|
"loss": 1.4026, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.435073243040643e-05, |
|
"loss": 1.3924, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4315505881424619e-05, |
|
"loss": 1.4021, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.4280279332442805e-05, |
|
"loss": 1.3925, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4245052783460994e-05, |
|
"loss": 1.3867, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4209826234479184e-05, |
|
"loss": 1.391, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4174599685497373e-05, |
|
"loss": 1.3814, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.413937313651556e-05, |
|
"loss": 1.3976, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4104146587533748e-05, |
|
"loss": 1.4071, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4068920038551935e-05, |
|
"loss": 1.3871, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.4033693489570124e-05, |
|
"loss": 1.3904, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3998466940588312e-05, |
|
"loss": 1.3951, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.39632403916065e-05, |
|
"loss": 1.4098, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3928013842624688e-05, |
|
"loss": 1.385, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3892787293642875e-05, |
|
"loss": 1.3975, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3857560744661067e-05, |
|
"loss": 1.3953, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3822334195679254e-05, |
|
"loss": 1.4071, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.3787107646697442e-05, |
|
"loss": 1.4085, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 1.375188109771563e-05, |
|
"loss": 1.3907, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3716654548733818e-05, |
|
"loss": 1.3906, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3681427999752006e-05, |
|
"loss": 1.4069, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3646201450770193e-05, |
|
"loss": 1.3958, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3610974901788382e-05, |
|
"loss": 1.3909, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3575748352806569e-05, |
|
"loss": 1.3943, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3540521803824757e-05, |
|
"loss": 1.3954, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3505295254842948e-05, |
|
"loss": 1.3823, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3470068705861136e-05, |
|
"loss": 1.3804, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3434842156879323e-05, |
|
"loss": 1.3977, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3399615607897512e-05, |
|
"loss": 1.395, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3364389058915699e-05, |
|
"loss": 1.4002, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3329162509933887e-05, |
|
"loss": 1.3977, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3293935960952076e-05, |
|
"loss": 1.4033, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.3258709411970263e-05, |
|
"loss": 1.3972, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3223482862988451e-05, |
|
"loss": 1.3856, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3188256314006638e-05, |
|
"loss": 1.3869, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.315302976502483e-05, |
|
"loss": 1.3738, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3117803216043017e-05, |
|
"loss": 1.3967, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3082576667061206e-05, |
|
"loss": 1.3648, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3047350118079393e-05, |
|
"loss": 1.3963, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.3012123569097581e-05, |
|
"loss": 1.3893, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.297689702011577e-05, |
|
"loss": 1.387, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 1.2941670471133957e-05, |
|
"loss": 1.3915, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2906443922152145e-05, |
|
"loss": 1.3801, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2871217373170332e-05, |
|
"loss": 1.3754, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2835990824188521e-05, |
|
"loss": 1.4019, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.2800764275206708e-05, |
|
"loss": 1.3726, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.27655377262249e-05, |
|
"loss": 1.3976, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2730311177243087e-05, |
|
"loss": 1.3837, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2695084628261275e-05, |
|
"loss": 1.3865, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2659858079279462e-05, |
|
"loss": 1.3897, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.262463153029765e-05, |
|
"loss": 1.381, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.258940498131584e-05, |
|
"loss": 1.3918, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2554178432334026e-05, |
|
"loss": 1.3859, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2518951883352215e-05, |
|
"loss": 1.3831, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2483725334370403e-05, |
|
"loss": 1.3788, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 1.2448498785388592e-05, |
|
"loss": 1.3823, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2413272236406779e-05, |
|
"loss": 1.3675, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2378045687424968e-05, |
|
"loss": 1.3848, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2342819138443156e-05, |
|
"loss": 1.3831, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2307592589461345e-05, |
|
"loss": 1.3887, |
|
"step": 535000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2272366040479533e-05, |
|
"loss": 1.3719, |
|
"step": 535500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.223713949149772e-05, |
|
"loss": 1.3636, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2201912942515909e-05, |
|
"loss": 1.3885, |
|
"step": 536500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2166686393534097e-05, |
|
"loss": 1.3739, |
|
"step": 537000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2131459844552286e-05, |
|
"loss": 1.3801, |
|
"step": 537500 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.2096233295570473e-05, |
|
"loss": 1.3783, |
|
"step": 538000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.2061006746588662e-05, |
|
"loss": 1.3687, |
|
"step": 538500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.202578019760685e-05, |
|
"loss": 1.3806, |
|
"step": 539000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1990553648625039e-05, |
|
"loss": 1.3965, |
|
"step": 539500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1955327099643226e-05, |
|
"loss": 1.3663, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.1920100550661414e-05, |
|
"loss": 1.3683, |
|
"step": 540500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1884874001679603e-05, |
|
"loss": 1.3619, |
|
"step": 541000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.184964745269779e-05, |
|
"loss": 1.3862, |
|
"step": 541500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1814420903715978e-05, |
|
"loss": 1.3779, |
|
"step": 542000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1779194354734167e-05, |
|
"loss": 1.3827, |
|
"step": 542500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1743967805752356e-05, |
|
"loss": 1.3755, |
|
"step": 543000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1708741256770542e-05, |
|
"loss": 1.3804, |
|
"step": 543500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1673514707788731e-05, |
|
"loss": 1.3846, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.163828815880692e-05, |
|
"loss": 1.3882, |
|
"step": 544500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.1603061609825108e-05, |
|
"loss": 1.3576, |
|
"step": 545000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1567835060843297e-05, |
|
"loss": 1.3771, |
|
"step": 545500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1532608511861484e-05, |
|
"loss": 1.388, |
|
"step": 546000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1497381962879672e-05, |
|
"loss": 1.3631, |
|
"step": 546500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.146215541389786e-05, |
|
"loss": 1.3813, |
|
"step": 547000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.142692886491605e-05, |
|
"loss": 1.3712, |
|
"step": 547500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1391702315934236e-05, |
|
"loss": 1.3714, |
|
"step": 548000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1356475766952425e-05, |
|
"loss": 1.3843, |
|
"step": 548500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.1321249217970614e-05, |
|
"loss": 1.3849, |
|
"step": 549000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.12860226689888e-05, |
|
"loss": 1.3717, |
|
"step": 549500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.125079612000699e-05, |
|
"loss": 1.3724, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1215569571025178e-05, |
|
"loss": 1.3741, |
|
"step": 550500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1180343022043366e-05, |
|
"loss": 1.3816, |
|
"step": 551000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1145116473061553e-05, |
|
"loss": 1.3747, |
|
"step": 551500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 1.1109889924079742e-05, |
|
"loss": 1.3786, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.107466337509793e-05, |
|
"loss": 1.3658, |
|
"step": 552500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1039436826116119e-05, |
|
"loss": 1.3784, |
|
"step": 553000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1004210277134306e-05, |
|
"loss": 1.373, |
|
"step": 553500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0968983728152494e-05, |
|
"loss": 1.3742, |
|
"step": 554000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0933757179170683e-05, |
|
"loss": 1.3781, |
|
"step": 554500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0898530630188872e-05, |
|
"loss": 1.3748, |
|
"step": 555000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.086330408120706e-05, |
|
"loss": 1.3886, |
|
"step": 555500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0828077532225247e-05, |
|
"loss": 1.3756, |
|
"step": 556000 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0792850983243436e-05, |
|
"loss": 1.3639, |
|
"step": 556500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0757624434261623e-05, |
|
"loss": 1.3748, |
|
"step": 557000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0722397885279813e-05, |
|
"loss": 1.3738, |
|
"step": 557500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0687171336298e-05, |
|
"loss": 1.3697, |
|
"step": 558000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0651944787316188e-05, |
|
"loss": 1.3655, |
|
"step": 558500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 1.0616718238334377e-05, |
|
"loss": 1.374, |
|
"step": 559000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0581491689352564e-05, |
|
"loss": 1.3787, |
|
"step": 559500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0546265140370754e-05, |
|
"loss": 1.3725, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0511038591388941e-05, |
|
"loss": 1.3597, |
|
"step": 560500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.047581204240713e-05, |
|
"loss": 1.3592, |
|
"step": 561000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.0440585493425317e-05, |
|
"loss": 1.3705, |
|
"step": 561500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0405358944443505e-05, |
|
"loss": 1.3599, |
|
"step": 562000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0370132395461694e-05, |
|
"loss": 1.3682, |
|
"step": 562500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0334905846479882e-05, |
|
"loss": 1.3778, |
|
"step": 563000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.029967929749807e-05, |
|
"loss": 1.3691, |
|
"step": 563500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.0264452748516258e-05, |
|
"loss": 1.3772, |
|
"step": 564000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0229226199534447e-05, |
|
"loss": 1.3628, |
|
"step": 564500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0193999650552633e-05, |
|
"loss": 1.3647, |
|
"step": 565000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0158773101570824e-05, |
|
"loss": 1.3719, |
|
"step": 565500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.012354655258901e-05, |
|
"loss": 1.3603, |
|
"step": 566000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.00883200036072e-05, |
|
"loss": 1.3517, |
|
"step": 566500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0053093454625386e-05, |
|
"loss": 1.359, |
|
"step": 567000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.0017866905643575e-05, |
|
"loss": 1.3818, |
|
"step": 567500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.982640356661763e-06, |
|
"loss": 1.362, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.947413807679952e-06, |
|
"loss": 1.3738, |
|
"step": 568500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.91218725869814e-06, |
|
"loss": 1.3643, |
|
"step": 569000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.876960709716327e-06, |
|
"loss": 1.3711, |
|
"step": 569500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.841734160734516e-06, |
|
"loss": 1.353, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.806507611752705e-06, |
|
"loss": 1.3638, |
|
"step": 570500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 9.771281062770893e-06, |
|
"loss": 1.3618, |
|
"step": 571000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.73605451378908e-06, |
|
"loss": 1.3599, |
|
"step": 571500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.700827964807269e-06, |
|
"loss": 1.3479, |
|
"step": 572000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.665601415825457e-06, |
|
"loss": 1.3701, |
|
"step": 572500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.630374866843646e-06, |
|
"loss": 1.3582, |
|
"step": 573000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.595148317861835e-06, |
|
"loss": 1.3629, |
|
"step": 573500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.559921768880021e-06, |
|
"loss": 1.3558, |
|
"step": 574000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.52469521989821e-06, |
|
"loss": 1.3495, |
|
"step": 574500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.489468670916397e-06, |
|
"loss": 1.369, |
|
"step": 575000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.454242121934587e-06, |
|
"loss": 1.3572, |
|
"step": 575500 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.419015572952774e-06, |
|
"loss": 1.3634, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.383789023970963e-06, |
|
"loss": 1.3584, |
|
"step": 576500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.34856247498915e-06, |
|
"loss": 1.3633, |
|
"step": 577000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.313335926007338e-06, |
|
"loss": 1.3648, |
|
"step": 577500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 9.278109377025527e-06, |
|
"loss": 1.3428, |
|
"step": 578000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.242882828043715e-06, |
|
"loss": 1.37, |
|
"step": 578500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.207656279061904e-06, |
|
"loss": 1.3493, |
|
"step": 579000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.172429730080091e-06, |
|
"loss": 1.357, |
|
"step": 579500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.13720318109828e-06, |
|
"loss": 1.3635, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.101976632116468e-06, |
|
"loss": 1.3587, |
|
"step": 580500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.066750083134657e-06, |
|
"loss": 1.3605, |
|
"step": 581000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 9.031523534152844e-06, |
|
"loss": 1.3651, |
|
"step": 581500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.996296985171032e-06, |
|
"loss": 1.369, |
|
"step": 582000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.96107043618922e-06, |
|
"loss": 1.3484, |
|
"step": 582500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.92584388720741e-06, |
|
"loss": 1.3682, |
|
"step": 583000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.890617338225598e-06, |
|
"loss": 1.364, |
|
"step": 583500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.855390789243785e-06, |
|
"loss": 1.3634, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.820164240261973e-06, |
|
"loss": 1.354, |
|
"step": 584500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 8.78493769128016e-06, |
|
"loss": 1.3498, |
|
"step": 585000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.74971114229835e-06, |
|
"loss": 1.3625, |
|
"step": 585500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.714484593316538e-06, |
|
"loss": 1.3587, |
|
"step": 586000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.679258044334726e-06, |
|
"loss": 1.3632, |
|
"step": 586500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.644031495352913e-06, |
|
"loss": 1.3528, |
|
"step": 587000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.608804946371102e-06, |
|
"loss": 1.3698, |
|
"step": 587500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.57357839738929e-06, |
|
"loss": 1.3604, |
|
"step": 588000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.538351848407479e-06, |
|
"loss": 1.361, |
|
"step": 588500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.503125299425667e-06, |
|
"loss": 1.3603, |
|
"step": 589000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.467898750443854e-06, |
|
"loss": 1.3618, |
|
"step": 589500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 8.432672201462043e-06, |
|
"loss": 1.3444, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.39744565248023e-06, |
|
"loss": 1.3396, |
|
"step": 590500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.36221910349842e-06, |
|
"loss": 1.3527, |
|
"step": 591000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.326992554516607e-06, |
|
"loss": 1.3589, |
|
"step": 591500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.291766005534796e-06, |
|
"loss": 1.3463, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.256539456552984e-06, |
|
"loss": 1.3477, |
|
"step": 592500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.221312907571171e-06, |
|
"loss": 1.3451, |
|
"step": 593000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.186086358589361e-06, |
|
"loss": 1.3586, |
|
"step": 593500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.150859809607548e-06, |
|
"loss": 1.3506, |
|
"step": 594000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.115633260625737e-06, |
|
"loss": 1.3519, |
|
"step": 594500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.080406711643924e-06, |
|
"loss": 1.3391, |
|
"step": 595000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.045180162662112e-06, |
|
"loss": 1.3489, |
|
"step": 595500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 8.009953613680301e-06, |
|
"loss": 1.3505, |
|
"step": 596000 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.97472706469849e-06, |
|
"loss": 1.3527, |
|
"step": 596500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 7.939500515716678e-06, |
|
"loss": 1.3402, |
|
"step": 597000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.904273966734865e-06, |
|
"loss": 1.3514, |
|
"step": 597500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.869047417753054e-06, |
|
"loss": 1.3529, |
|
"step": 598000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.833820868771242e-06, |
|
"loss": 1.361, |
|
"step": 598500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.798594319789431e-06, |
|
"loss": 1.3407, |
|
"step": 599000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.763367770807618e-06, |
|
"loss": 1.3368, |
|
"step": 599500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.728141221825806e-06, |
|
"loss": 1.3491, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.692914672843993e-06, |
|
"loss": 1.3582, |
|
"step": 600500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.657688123862184e-06, |
|
"loss": 1.354, |
|
"step": 601000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.622461574880371e-06, |
|
"loss": 1.3631, |
|
"step": 601500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.587235025898559e-06, |
|
"loss": 1.354, |
|
"step": 602000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.552008476916747e-06, |
|
"loss": 1.3508, |
|
"step": 602500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.516781927934935e-06, |
|
"loss": 1.3408, |
|
"step": 603000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.481555378953124e-06, |
|
"loss": 1.3407, |
|
"step": 603500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.446328829971312e-06, |
|
"loss": 1.3504, |
|
"step": 604000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.4111022809895e-06, |
|
"loss": 1.3384, |
|
"step": 604500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.375875732007688e-06, |
|
"loss": 1.3482, |
|
"step": 605000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.340649183025876e-06, |
|
"loss": 1.3527, |
|
"step": 605500 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.305422634044065e-06, |
|
"loss": 1.3469, |
|
"step": 606000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.270196085062253e-06, |
|
"loss": 1.3572, |
|
"step": 606500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.234969536080441e-06, |
|
"loss": 1.3329, |
|
"step": 607000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.1997429870986286e-06, |
|
"loss": 1.3468, |
|
"step": 607500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.164516438116816e-06, |
|
"loss": 1.3374, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.129289889135006e-06, |
|
"loss": 1.3521, |
|
"step": 608500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 7.0940633401531935e-06, |
|
"loss": 1.3566, |
|
"step": 609000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.058836791171382e-06, |
|
"loss": 1.3346, |
|
"step": 609500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.02361024218957e-06, |
|
"loss": 1.3446, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.988383693207758e-06, |
|
"loss": 1.3393, |
|
"step": 610500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.953157144225947e-06, |
|
"loss": 1.3335, |
|
"step": 611000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 6.917930595244135e-06, |
|
"loss": 1.3398, |
|
"step": 611500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.8827040462623225e-06, |
|
"loss": 1.3617, |
|
"step": 612000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.84747749728051e-06, |
|
"loss": 1.333, |
|
"step": 612500 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.812250948298699e-06, |
|
"loss": 1.3367, |
|
"step": 613000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.7770243993168875e-06, |
|
"loss": 1.3456, |
|
"step": 613500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.741797850335075e-06, |
|
"loss": 1.3506, |
|
"step": 614000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.706571301353264e-06, |
|
"loss": 1.3499, |
|
"step": 614500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.671344752371452e-06, |
|
"loss": 1.3481, |
|
"step": 615000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.636118203389639e-06, |
|
"loss": 1.3379, |
|
"step": 615500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 6.600891654407827e-06, |
|
"loss": 1.3317, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.5656651054260165e-06, |
|
"loss": 1.3541, |
|
"step": 616500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.530438556444204e-06, |
|
"loss": 1.34, |
|
"step": 617000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.495212007462392e-06, |
|
"loss": 1.3567, |
|
"step": 617500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.459985458480581e-06, |
|
"loss": 1.3509, |
|
"step": 618000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.424758909498768e-06, |
|
"loss": 1.3261, |
|
"step": 618500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.389532360516958e-06, |
|
"loss": 1.3413, |
|
"step": 619000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.3543058115351456e-06, |
|
"loss": 1.337, |
|
"step": 619500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.319079262553333e-06, |
|
"loss": 1.3517, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.283852713571521e-06, |
|
"loss": 1.3456, |
|
"step": 620500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.24862616458971e-06, |
|
"loss": 1.3509, |
|
"step": 621000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.213399615607897e-06, |
|
"loss": 1.3291, |
|
"step": 621500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.178173066626086e-06, |
|
"loss": 1.3451, |
|
"step": 622000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.142946517644274e-06, |
|
"loss": 1.3332, |
|
"step": 622500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.107719968662462e-06, |
|
"loss": 1.3295, |
|
"step": 623000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.072493419680651e-06, |
|
"loss": 1.3317, |
|
"step": 623500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.037266870698839e-06, |
|
"loss": 1.3144, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.002040321717027e-06, |
|
"loss": 1.349, |
|
"step": 624500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.966813772735215e-06, |
|
"loss": 1.3405, |
|
"step": 625000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 5.931587223753403e-06, |
|
"loss": 1.345, |
|
"step": 625500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.896360674771591e-06, |
|
"loss": 1.3336, |
|
"step": 626000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.861134125789779e-06, |
|
"loss": 1.3559, |
|
"step": 626500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.825907576807968e-06, |
|
"loss": 1.3467, |
|
"step": 627000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.7906810278261555e-06, |
|
"loss": 1.3416, |
|
"step": 627500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 5.755454478844344e-06, |
|
"loss": 1.3367, |
|
"step": 628000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.720227929862533e-06, |
|
"loss": 1.3412, |
|
"step": 628500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.68500138088072e-06, |
|
"loss": 1.331, |
|
"step": 629000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.649774831898909e-06, |
|
"loss": 1.3346, |
|
"step": 629500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.614548282917097e-06, |
|
"loss": 1.3458, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.5793217339352845e-06, |
|
"loss": 1.3439, |
|
"step": 630500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.544095184953473e-06, |
|
"loss": 1.3298, |
|
"step": 631000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.508868635971661e-06, |
|
"loss": 1.3438, |
|
"step": 631500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.4736420869898495e-06, |
|
"loss": 1.3493, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.438415538008037e-06, |
|
"loss": 1.3392, |
|
"step": 632500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.403188989026226e-06, |
|
"loss": 1.3413, |
|
"step": 633000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.367962440044414e-06, |
|
"loss": 1.3222, |
|
"step": 633500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.332735891062602e-06, |
|
"loss": 1.3396, |
|
"step": 634000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.29750934208079e-06, |
|
"loss": 1.3346, |
|
"step": 634500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 5.2622827930989785e-06, |
|
"loss": 1.3346, |
|
"step": 635000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.227056244117166e-06, |
|
"loss": 1.3347, |
|
"step": 635500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.191829695135355e-06, |
|
"loss": 1.3412, |
|
"step": 636000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.156603146153543e-06, |
|
"loss": 1.3337, |
|
"step": 636500 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.121376597171731e-06, |
|
"loss": 1.3399, |
|
"step": 637000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.086150048189919e-06, |
|
"loss": 1.3279, |
|
"step": 637500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.0509234992081075e-06, |
|
"loss": 1.3261, |
|
"step": 638000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5.015696950226296e-06, |
|
"loss": 1.3432, |
|
"step": 638500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.980470401244484e-06, |
|
"loss": 1.3482, |
|
"step": 639000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.945243852262672e-06, |
|
"loss": 1.3417, |
|
"step": 639500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.91001730328086e-06, |
|
"loss": 1.3162, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.874790754299048e-06, |
|
"loss": 1.3282, |
|
"step": 640500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.8395642053172366e-06, |
|
"loss": 1.3295, |
|
"step": 641000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.804337656335424e-06, |
|
"loss": 1.3344, |
|
"step": 641500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 4.769111107353613e-06, |
|
"loss": 1.3251, |
|
"step": 642000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.7338845583718015e-06, |
|
"loss": 1.334, |
|
"step": 642500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.698658009389989e-06, |
|
"loss": 1.3487, |
|
"step": 643000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.663431460408178e-06, |
|
"loss": 1.3221, |
|
"step": 643500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.628204911426366e-06, |
|
"loss": 1.3351, |
|
"step": 644000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.592978362444553e-06, |
|
"loss": 1.3307, |
|
"step": 644500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.557751813462742e-06, |
|
"loss": 1.3378, |
|
"step": 645000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.52252526448093e-06, |
|
"loss": 1.3297, |
|
"step": 645500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.487298715499118e-06, |
|
"loss": 1.3219, |
|
"step": 646000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.452072166517306e-06, |
|
"loss": 1.32, |
|
"step": 646500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 4.416845617535495e-06, |
|
"loss": 1.3336, |
|
"step": 647000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.381619068553683e-06, |
|
"loss": 1.3265, |
|
"step": 647500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.346392519571871e-06, |
|
"loss": 1.3393, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.311165970590059e-06, |
|
"loss": 1.3331, |
|
"step": 648500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.275939421608247e-06, |
|
"loss": 1.3354, |
|
"step": 649000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.240712872626435e-06, |
|
"loss": 1.3338, |
|
"step": 649500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.205486323644624e-06, |
|
"loss": 1.3358, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.1702597746628114e-06, |
|
"loss": 1.3439, |
|
"step": 650500 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.135033225680999e-06, |
|
"loss": 1.3401, |
|
"step": 651000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.099806676699188e-06, |
|
"loss": 1.3163, |
|
"step": 651500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.064580127717376e-06, |
|
"loss": 1.3393, |
|
"step": 652000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.029353578735565e-06, |
|
"loss": 1.3251, |
|
"step": 652500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.994127029753753e-06, |
|
"loss": 1.3301, |
|
"step": 653000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.9589004807719405e-06, |
|
"loss": 1.3143, |
|
"step": 653500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.923673931790129e-06, |
|
"loss": 1.3458, |
|
"step": 654000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.888447382808317e-06, |
|
"loss": 1.3413, |
|
"step": 654500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.853220833826505e-06, |
|
"loss": 1.3274, |
|
"step": 655000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.817994284844693e-06, |
|
"loss": 1.324, |
|
"step": 655500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.7827677358628813e-06, |
|
"loss": 1.3265, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.74754118688107e-06, |
|
"loss": 1.3116, |
|
"step": 656500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.7123146378992577e-06, |
|
"loss": 1.3404, |
|
"step": 657000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6770880889174463e-06, |
|
"loss": 1.3302, |
|
"step": 657500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6418615399356344e-06, |
|
"loss": 1.3296, |
|
"step": 658000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.606634990953822e-06, |
|
"loss": 1.3141, |
|
"step": 658500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.571408441972011e-06, |
|
"loss": 1.3288, |
|
"step": 659000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.5361818929901985e-06, |
|
"loss": 1.3337, |
|
"step": 659500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.5009553440083867e-06, |
|
"loss": 1.321, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.4657287950265753e-06, |
|
"loss": 1.3244, |
|
"step": 660500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.430502246044763e-06, |
|
"loss": 1.3255, |
|
"step": 661000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3952756970629517e-06, |
|
"loss": 1.3326, |
|
"step": 661500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3600491480811394e-06, |
|
"loss": 1.3278, |
|
"step": 662000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3248225990993276e-06, |
|
"loss": 1.3335, |
|
"step": 662500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.289596050117516e-06, |
|
"loss": 1.3376, |
|
"step": 663000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.254369501135704e-06, |
|
"loss": 1.3085, |
|
"step": 663500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.2191429521538925e-06, |
|
"loss": 1.3203, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.1839164031720803e-06, |
|
"loss": 1.3281, |
|
"step": 664500 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.1486898541902684e-06, |
|
"loss": 1.3187, |
|
"step": 665000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.113463305208457e-06, |
|
"loss": 1.3373, |
|
"step": 665500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.0782367562266448e-06, |
|
"loss": 1.3192, |
|
"step": 666000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.043010207244833e-06, |
|
"loss": 1.3173, |
|
"step": 666500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 3.007783658263021e-06, |
|
"loss": 1.307, |
|
"step": 667000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.9725571092812097e-06, |
|
"loss": 1.3271, |
|
"step": 667500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.937330560299398e-06, |
|
"loss": 1.3362, |
|
"step": 668000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.9021040113175857e-06, |
|
"loss": 1.3479, |
|
"step": 668500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.866877462335774e-06, |
|
"loss": 1.3371, |
|
"step": 669000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.831650913353962e-06, |
|
"loss": 1.3295, |
|
"step": 669500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.7964243643721506e-06, |
|
"loss": 1.3291, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.7611978153903383e-06, |
|
"loss": 1.3149, |
|
"step": 670500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.7259712664085265e-06, |
|
"loss": 1.325, |
|
"step": 671000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6907447174267147e-06, |
|
"loss": 1.3295, |
|
"step": 671500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.655518168444903e-06, |
|
"loss": 1.3222, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.6202916194630915e-06, |
|
"loss": 1.3174, |
|
"step": 672500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.585065070481279e-06, |
|
"loss": 1.3242, |
|
"step": 673000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.5498385214994674e-06, |
|
"loss": 1.3136, |
|
"step": 673500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.5146119725176556e-06, |
|
"loss": 1.3288, |
|
"step": 674000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.479385423535844e-06, |
|
"loss": 1.3329, |
|
"step": 674500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.444158874554032e-06, |
|
"loss": 1.3102, |
|
"step": 675000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.40893232557222e-06, |
|
"loss": 1.3158, |
|
"step": 675500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.3737057765904082e-06, |
|
"loss": 1.3143, |
|
"step": 676000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.3384792276085964e-06, |
|
"loss": 1.3325, |
|
"step": 676500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.303252678626785e-06, |
|
"loss": 1.3158, |
|
"step": 677000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.2680261296449728e-06, |
|
"loss": 1.3207, |
|
"step": 677500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.232799580663161e-06, |
|
"loss": 1.3307, |
|
"step": 678000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.197573031681349e-06, |
|
"loss": 1.3179, |
|
"step": 678500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.1623464826995373e-06, |
|
"loss": 1.3118, |
|
"step": 679000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.127119933717726e-06, |
|
"loss": 1.344, |
|
"step": 679500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.0918933847359136e-06, |
|
"loss": 1.3192, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.056666835754102e-06, |
|
"loss": 1.3186, |
|
"step": 680500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.02144028677229e-06, |
|
"loss": 1.3112, |
|
"step": 681000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.986213737790478e-06, |
|
"loss": 1.3161, |
|
"step": 681500 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9509871888086663e-06, |
|
"loss": 1.3313, |
|
"step": 682000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.9157606398268545e-06, |
|
"loss": 1.3213, |
|
"step": 682500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8805340908450427e-06, |
|
"loss": 1.3171, |
|
"step": 683000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.845307541863231e-06, |
|
"loss": 1.3279, |
|
"step": 683500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8100809928814192e-06, |
|
"loss": 1.3207, |
|
"step": 684000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7748544438996072e-06, |
|
"loss": 1.3084, |
|
"step": 684500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7396278949177954e-06, |
|
"loss": 1.3133, |
|
"step": 685000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.7044013459359835e-06, |
|
"loss": 1.3074, |
|
"step": 685500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.669174796954172e-06, |
|
"loss": 1.3178, |
|
"step": 686000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.63394824797236e-06, |
|
"loss": 1.3199, |
|
"step": 686500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.598721698990548e-06, |
|
"loss": 1.3263, |
|
"step": 687000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5634951500087362e-06, |
|
"loss": 1.3253, |
|
"step": 687500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.5282686010269244e-06, |
|
"loss": 1.3111, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4930420520451126e-06, |
|
"loss": 1.3213, |
|
"step": 688500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4578155030633007e-06, |
|
"loss": 1.3083, |
|
"step": 689000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.422588954081489e-06, |
|
"loss": 1.3245, |
|
"step": 689500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.387362405099677e-06, |
|
"loss": 1.3098, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3521358561178653e-06, |
|
"loss": 1.3275, |
|
"step": 690500 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.3169093071360534e-06, |
|
"loss": 1.3205, |
|
"step": 691000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.2816827581542416e-06, |
|
"loss": 1.3193, |
|
"step": 691500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2464562091724298e-06, |
|
"loss": 1.3155, |
|
"step": 692000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.211229660190618e-06, |
|
"loss": 1.3161, |
|
"step": 692500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1760031112088061e-06, |
|
"loss": 1.324, |
|
"step": 693000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1407765622269943e-06, |
|
"loss": 1.3231, |
|
"step": 693500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.1055500132451825e-06, |
|
"loss": 1.3215, |
|
"step": 694000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0703234642633706e-06, |
|
"loss": 1.3236, |
|
"step": 694500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0350969152815588e-06, |
|
"loss": 1.3201, |
|
"step": 695000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.99870366299747e-07, |
|
"loss": 1.3197, |
|
"step": 695500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.646438173179352e-07, |
|
"loss": 1.3218, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.294172683361232e-07, |
|
"loss": 1.3271, |
|
"step": 696500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.941907193543115e-07, |
|
"loss": 1.3214, |
|
"step": 697000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.589641703724998e-07, |
|
"loss": 1.3101, |
|
"step": 697500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 8.237376213906878e-07, |
|
"loss": 1.3195, |
|
"step": 698000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.88511072408876e-07, |
|
"loss": 1.3184, |
|
"step": 698500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 7.532845234270642e-07, |
|
"loss": 1.3235, |
|
"step": 699000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.180579744452524e-07, |
|
"loss": 1.3289, |
|
"step": 699500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.828314254634405e-07, |
|
"loss": 1.3122, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.476048764816287e-07, |
|
"loss": 1.3339, |
|
"step": 700500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 6.123783274998169e-07, |
|
"loss": 1.3216, |
|
"step": 701000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.77151778518005e-07, |
|
"loss": 1.312, |
|
"step": 701500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.419252295361931e-07, |
|
"loss": 1.3186, |
|
"step": 702000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.066986805543813e-07, |
|
"loss": 1.3179, |
|
"step": 702500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.714721315725696e-07, |
|
"loss": 1.3172, |
|
"step": 703000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.3624558259075775e-07, |
|
"loss": 1.314, |
|
"step": 703500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.0101903360894587e-07, |
|
"loss": 1.3203, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.6579248462713404e-07, |
|
"loss": 1.3128, |
|
"step": 704500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 3.305659356453222e-07, |
|
"loss": 1.3217, |
|
"step": 705000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.953393866635104e-07, |
|
"loss": 1.318, |
|
"step": 705500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.6011283768169856e-07, |
|
"loss": 1.3209, |
|
"step": 706000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.248862886998867e-07, |
|
"loss": 1.3108, |
|
"step": 706500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.896597397180749e-07, |
|
"loss": 1.3117, |
|
"step": 707000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.5443319073626305e-07, |
|
"loss": 1.3277, |
|
"step": 707500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.1920664175445124e-07, |
|
"loss": 1.3182, |
|
"step": 708000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 8.39800927726394e-08, |
|
"loss": 1.3135, |
|
"step": 708500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.875354379082757e-08, |
|
"loss": 1.3087, |
|
"step": 709000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.3526994809015742e-08, |
|
"loss": 1.3195, |
|
"step": 709500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 709692, |
|
"total_flos": 6.168001493481062e+18, |
|
"train_runtime": 388027.9373, |
|
"train_samples_per_second": 1.829 |
|
} |
|
], |
|
"max_steps": 709692, |
|
"num_train_epochs": 3, |
|
"total_flos": 6.168001493481062e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|