|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9999981318415465, |
|
"eval_steps": 500, |
|
"global_step": 535286, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0018681584534637058, |
|
"grad_norm": 7.906698703765869, |
|
"learning_rate": 9.999978643682902e-05, |
|
"loss": 3.2962, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0037363169069274116, |
|
"grad_norm": 8.97018051147461, |
|
"learning_rate": 9.99991423149794e-05, |
|
"loss": 3.1699, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.005604475360391117, |
|
"grad_norm": 15.646541595458984, |
|
"learning_rate": 9.999806763655335e-05, |
|
"loss": 3.0952, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.007472633813854823, |
|
"grad_norm": 7.570132732391357, |
|
"learning_rate": 9.999656241080522e-05, |
|
"loss": 3.0763, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.009340792267318529, |
|
"grad_norm": 5.853657245635986, |
|
"learning_rate": 9.999462665069693e-05, |
|
"loss": 3.0355, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.011208950720782235, |
|
"grad_norm": 6.587205410003662, |
|
"learning_rate": 9.999226553509718e-05, |
|
"loss": 3.0291, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.01307710917424594, |
|
"grad_norm": 6.537953853607178, |
|
"learning_rate": 9.998946962095583e-05, |
|
"loss": 2.9902, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.014945267627709646, |
|
"grad_norm": 5.199572563171387, |
|
"learning_rate": 9.998624323353232e-05, |
|
"loss": 2.9598, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.016813426081173352, |
|
"grad_norm": 6.676388263702393, |
|
"learning_rate": 9.998258640060996e-05, |
|
"loss": 2.9677, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.018681584534637058, |
|
"grad_norm": 5.827391147613525, |
|
"learning_rate": 9.997849915367876e-05, |
|
"loss": 2.9525, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.020549742988100764, |
|
"grad_norm": 7.3195953369140625, |
|
"learning_rate": 9.997398152793517e-05, |
|
"loss": 2.8857, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.02241790144156447, |
|
"grad_norm": 5.648918151855469, |
|
"learning_rate": 9.99690335622817e-05, |
|
"loss": 2.8672, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.024286059895028175, |
|
"grad_norm": 5.5112528800964355, |
|
"learning_rate": 9.996366648525912e-05, |
|
"loss": 2.9211, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.02615421834849188, |
|
"grad_norm": 5.33510684967041, |
|
"learning_rate": 9.995785883176955e-05, |
|
"loss": 2.8746, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.028022376801955587, |
|
"grad_norm": 4.034855365753174, |
|
"learning_rate": 9.995162097720716e-05, |
|
"loss": 2.8976, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.029890535255419293, |
|
"grad_norm": 6.461133003234863, |
|
"learning_rate": 9.994495297528784e-05, |
|
"loss": 2.8834, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.031758693708883, |
|
"grad_norm": 5.551321029663086, |
|
"learning_rate": 9.993785488343162e-05, |
|
"loss": 2.7976, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.033626852162346704, |
|
"grad_norm": 4.432355880737305, |
|
"learning_rate": 9.993032676276217e-05, |
|
"loss": 2.8252, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.03549501061581041, |
|
"grad_norm": 4.1577839851379395, |
|
"learning_rate": 9.99223686781062e-05, |
|
"loss": 2.8277, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.037363169069274116, |
|
"grad_norm": 4.840776443481445, |
|
"learning_rate": 9.991398069799303e-05, |
|
"loss": 2.8151, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.03923132752273782, |
|
"grad_norm": 5.0539093017578125, |
|
"learning_rate": 9.99051628946539e-05, |
|
"loss": 2.8342, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.04109948597620153, |
|
"grad_norm": 4.669162750244141, |
|
"learning_rate": 9.989593426795811e-05, |
|
"loss": 2.8473, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.04296764442966523, |
|
"grad_norm": 6.490626811981201, |
|
"learning_rate": 9.98862579089188e-05, |
|
"loss": 2.7918, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.04483580288312894, |
|
"grad_norm": 5.523123741149902, |
|
"learning_rate": 9.98761519653822e-05, |
|
"loss": 2.8058, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.046703961336592645, |
|
"grad_norm": 4.8026227951049805, |
|
"learning_rate": 9.98656165243734e-05, |
|
"loss": 2.76, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.04857211979005635, |
|
"grad_norm": 4.3018083572387695, |
|
"learning_rate": 9.985467403479736e-05, |
|
"loss": 2.7533, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.050440278243520056, |
|
"grad_norm": 3.7984395027160645, |
|
"learning_rate": 9.984330394823319e-05, |
|
"loss": 2.7928, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.05230843669698376, |
|
"grad_norm": 3.971073627471924, |
|
"learning_rate": 9.983148229059621e-05, |
|
"loss": 2.7542, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.05417659515044747, |
|
"grad_norm": 4.415925979614258, |
|
"learning_rate": 9.98192315201501e-05, |
|
"loss": 2.7767, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.056044753603911174, |
|
"grad_norm": 4.695183277130127, |
|
"learning_rate": 9.980655174238964e-05, |
|
"loss": 2.7724, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.05791291205737488, |
|
"grad_norm": 5.4851484298706055, |
|
"learning_rate": 9.979344306650395e-05, |
|
"loss": 2.7768, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.059781070510838585, |
|
"grad_norm": 4.120709419250488, |
|
"learning_rate": 9.977990560537549e-05, |
|
"loss": 2.7775, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.06164922896430229, |
|
"grad_norm": 3.63053560256958, |
|
"learning_rate": 9.976593947557912e-05, |
|
"loss": 2.7329, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.063517387417766, |
|
"grad_norm": 4.178781509399414, |
|
"learning_rate": 9.97515447973811e-05, |
|
"loss": 2.7428, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.0653855458712297, |
|
"grad_norm": 3.8429136276245117, |
|
"learning_rate": 9.973675176842667e-05, |
|
"loss": 2.7136, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.06725370432469341, |
|
"grad_norm": 3.6935720443725586, |
|
"learning_rate": 9.972150122544814e-05, |
|
"loss": 2.6918, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.06912186277815711, |
|
"grad_norm": 4.678779125213623, |
|
"learning_rate": 9.970582251673812e-05, |
|
"loss": 2.686, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.07099002123162082, |
|
"grad_norm": 5.219886779785156, |
|
"learning_rate": 9.968971577731036e-05, |
|
"loss": 2.7664, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.07285817968508453, |
|
"grad_norm": 3.985466241836548, |
|
"learning_rate": 9.967318114586451e-05, |
|
"loss": 2.7409, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.07472633813854823, |
|
"grad_norm": 5.018237590789795, |
|
"learning_rate": 9.965621876478483e-05, |
|
"loss": 2.7278, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.07659449659201194, |
|
"grad_norm": 4.305635452270508, |
|
"learning_rate": 9.963882878013921e-05, |
|
"loss": 2.7453, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.07846265504547564, |
|
"grad_norm": 3.6431195735931396, |
|
"learning_rate": 9.962101134167761e-05, |
|
"loss": 2.6693, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.08033081349893935, |
|
"grad_norm": 3.750077962875366, |
|
"learning_rate": 9.960280351865064e-05, |
|
"loss": 2.7108, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.08219897195240305, |
|
"grad_norm": 3.730613946914673, |
|
"learning_rate": 9.95841324906568e-05, |
|
"loss": 2.6607, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.08406713040586676, |
|
"grad_norm": 4.009971618652344, |
|
"learning_rate": 9.956503447985205e-05, |
|
"loss": 2.7232, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.08593528885933047, |
|
"grad_norm": 3.1298115253448486, |
|
"learning_rate": 9.954550965069465e-05, |
|
"loss": 2.6655, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.08780344731279417, |
|
"grad_norm": 3.9897303581237793, |
|
"learning_rate": 9.952555817131835e-05, |
|
"loss": 2.6755, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.08967160576625788, |
|
"grad_norm": 5.565286636352539, |
|
"learning_rate": 9.950522139495593e-05, |
|
"loss": 2.6854, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.09153976421972158, |
|
"grad_norm": 3.5274269580841064, |
|
"learning_rate": 9.948441798666596e-05, |
|
"loss": 2.6821, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.09340792267318529, |
|
"grad_norm": 4.026999473571777, |
|
"learning_rate": 9.946323133845033e-05, |
|
"loss": 2.6389, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.095276081126649, |
|
"grad_norm": 4.1627326011657715, |
|
"learning_rate": 9.944157671638854e-05, |
|
"loss": 2.6786, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.0971442395801127, |
|
"grad_norm": 3.341585159301758, |
|
"learning_rate": 9.94194963391034e-05, |
|
"loss": 2.6419, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.09901239803357641, |
|
"grad_norm": 3.5735983848571777, |
|
"learning_rate": 9.939699039673516e-05, |
|
"loss": 2.652, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.10088055648704011, |
|
"grad_norm": 3.736764669418335, |
|
"learning_rate": 9.937405908308882e-05, |
|
"loss": 2.701, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.10274871494050382, |
|
"grad_norm": 3.172218084335327, |
|
"learning_rate": 9.935070259563231e-05, |
|
"loss": 2.6086, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.10461687339396752, |
|
"grad_norm": 3.945516347885132, |
|
"learning_rate": 9.932692113549484e-05, |
|
"loss": 2.6714, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.10648503184743123, |
|
"grad_norm": 2.7730209827423096, |
|
"learning_rate": 9.930271490746525e-05, |
|
"loss": 2.6346, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.10835319030089494, |
|
"grad_norm": 3.7872776985168457, |
|
"learning_rate": 9.92780841199901e-05, |
|
"loss": 2.6376, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.11022134875435864, |
|
"grad_norm": 3.9411559104919434, |
|
"learning_rate": 9.925302898517198e-05, |
|
"loss": 2.6674, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.11208950720782235, |
|
"grad_norm": 4.368437767028809, |
|
"learning_rate": 9.922760110043857e-05, |
|
"loss": 2.6232, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.11395766566128605, |
|
"grad_norm": 4.385318279266357, |
|
"learning_rate": 9.920169876946009e-05, |
|
"loss": 2.595, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.11582582411474976, |
|
"grad_norm": 3.4636647701263428, |
|
"learning_rate": 9.917537274891421e-05, |
|
"loss": 2.6073, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.11769398256821346, |
|
"grad_norm": 2.474412202835083, |
|
"learning_rate": 9.914862326550168e-05, |
|
"loss": 2.655, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.11956214102167717, |
|
"grad_norm": 3.5162529945373535, |
|
"learning_rate": 9.912145054956974e-05, |
|
"loss": 2.6259, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.12143029947514088, |
|
"grad_norm": 3.149369716644287, |
|
"learning_rate": 9.909385483511026e-05, |
|
"loss": 2.6045, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.12329845792860458, |
|
"grad_norm": 3.873689651489258, |
|
"learning_rate": 9.906583635975763e-05, |
|
"loss": 2.6476, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.1251666163820683, |
|
"grad_norm": 4.371992588043213, |
|
"learning_rate": 9.90374526682891e-05, |
|
"loss": 2.6149, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.127034774835532, |
|
"grad_norm": 4.554148197174072, |
|
"learning_rate": 9.900859024291592e-05, |
|
"loss": 2.6146, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.1289029332889957, |
|
"grad_norm": 4.277965545654297, |
|
"learning_rate": 9.897930579088681e-05, |
|
"loss": 2.5902, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.1307710917424594, |
|
"grad_norm": 4.317843914031982, |
|
"learning_rate": 9.894959956437835e-05, |
|
"loss": 2.6276, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.13263925019592313, |
|
"grad_norm": 3.6088337898254395, |
|
"learning_rate": 9.891953249519332e-05, |
|
"loss": 2.5647, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.13450740864938682, |
|
"grad_norm": 2.6994011402130127, |
|
"learning_rate": 9.888898433303897e-05, |
|
"loss": 2.6306, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.13637556710285054, |
|
"grad_norm": 3.670053005218506, |
|
"learning_rate": 9.885801517418857e-05, |
|
"loss": 2.6103, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.13824372555631423, |
|
"grad_norm": 3.3493151664733887, |
|
"learning_rate": 9.882662528532621e-05, |
|
"loss": 2.5293, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.14011188400977795, |
|
"grad_norm": 4.308838844299316, |
|
"learning_rate": 9.879481493675895e-05, |
|
"loss": 2.5701, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.14198004246324164, |
|
"grad_norm": 3.550856828689575, |
|
"learning_rate": 9.876258440241463e-05, |
|
"loss": 2.5949, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.14384820091670536, |
|
"grad_norm": 3.9775218963623047, |
|
"learning_rate": 9.872999967960666e-05, |
|
"loss": 2.5844, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.14571635937016905, |
|
"grad_norm": 3.936997413635254, |
|
"learning_rate": 9.869693044893364e-05, |
|
"loss": 2.5558, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.14758451782363277, |
|
"grad_norm": 4.209615707397461, |
|
"learning_rate": 9.866344187539423e-05, |
|
"loss": 2.5605, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.14945267627709646, |
|
"grad_norm": 4.603176116943359, |
|
"learning_rate": 9.862960248064681e-05, |
|
"loss": 2.6045, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.15132083473056018, |
|
"grad_norm": 3.0863678455352783, |
|
"learning_rate": 9.859527692735271e-05, |
|
"loss": 2.5638, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.15318899318402387, |
|
"grad_norm": 3.8357596397399902, |
|
"learning_rate": 9.856053290655904e-05, |
|
"loss": 2.5569, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.1550571516374876, |
|
"grad_norm": 3.3822269439697266, |
|
"learning_rate": 9.85253707174563e-05, |
|
"loss": 2.5459, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.1569253100909513, |
|
"grad_norm": 4.058901309967041, |
|
"learning_rate": 9.848979066283589e-05, |
|
"loss": 2.6128, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.158793468544415, |
|
"grad_norm": 4.78932523727417, |
|
"learning_rate": 9.84537930490876e-05, |
|
"loss": 2.5862, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.1606616269978787, |
|
"grad_norm": 3.3654229640960693, |
|
"learning_rate": 9.841737818619692e-05, |
|
"loss": 2.5509, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.16252978545134242, |
|
"grad_norm": 3.9686570167541504, |
|
"learning_rate": 9.838054638774244e-05, |
|
"loss": 2.5089, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.1643979439048061, |
|
"grad_norm": 2.973649740219116, |
|
"learning_rate": 9.834329797089303e-05, |
|
"loss": 2.5321, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.16626610235826983, |
|
"grad_norm": 2.5326201915740967, |
|
"learning_rate": 9.83056332564052e-05, |
|
"loss": 2.5408, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.16813426081173352, |
|
"grad_norm": 3.884883165359497, |
|
"learning_rate": 9.826762914491992e-05, |
|
"loss": 2.5352, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.17000241926519724, |
|
"grad_norm": 3.9567508697509766, |
|
"learning_rate": 9.822913364272259e-05, |
|
"loss": 2.5619, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.17187057771866093, |
|
"grad_norm": 3.041057825088501, |
|
"learning_rate": 9.819022282598776e-05, |
|
"loss": 2.555, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.17373873617212465, |
|
"grad_norm": 3.1877288818359375, |
|
"learning_rate": 9.815089702978735e-05, |
|
"loss": 2.5458, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.17560689462558834, |
|
"grad_norm": 3.142703056335449, |
|
"learning_rate": 9.811115659276677e-05, |
|
"loss": 2.5607, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.17747505307905206, |
|
"grad_norm": 3.609555959701538, |
|
"learning_rate": 9.807100185714202e-05, |
|
"loss": 2.5683, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.17934321153251576, |
|
"grad_norm": 3.200345277786255, |
|
"learning_rate": 9.803051471896693e-05, |
|
"loss": 2.5496, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.18121136998597948, |
|
"grad_norm": 3.56850266456604, |
|
"learning_rate": 9.798953325390536e-05, |
|
"loss": 2.5425, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.18307952843944317, |
|
"grad_norm": 3.4314849376678467, |
|
"learning_rate": 9.794813853757214e-05, |
|
"loss": 2.5238, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.1849476868929069, |
|
"grad_norm": 3.024343967437744, |
|
"learning_rate": 9.790633092642875e-05, |
|
"loss": 2.5786, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.18681584534637058, |
|
"grad_norm": 3.2595534324645996, |
|
"learning_rate": 9.786419563225273e-05, |
|
"loss": 2.5386, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.1886840037998343, |
|
"grad_norm": 3.6985089778900146, |
|
"learning_rate": 9.782156413906974e-05, |
|
"loss": 2.5338, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.190552162253298, |
|
"grad_norm": 2.9342880249023438, |
|
"learning_rate": 9.777852084104404e-05, |
|
"loss": 2.4992, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.1924203207067617, |
|
"grad_norm": 2.8690543174743652, |
|
"learning_rate": 9.773506610883352e-05, |
|
"loss": 2.571, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.1942884791602254, |
|
"grad_norm": 2.8353734016418457, |
|
"learning_rate": 9.769120031663902e-05, |
|
"loss": 2.4895, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.19615663761368912, |
|
"grad_norm": 3.6773738861083984, |
|
"learning_rate": 9.764692384220111e-05, |
|
"loss": 2.5121, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.19802479606715281, |
|
"grad_norm": 3.3569443225860596, |
|
"learning_rate": 9.760223706679688e-05, |
|
"loss": 2.527, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.19989295452061653, |
|
"grad_norm": 2.970712184906006, |
|
"learning_rate": 9.755714037523662e-05, |
|
"loss": 2.5337, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.20176111297408023, |
|
"grad_norm": 3.2004318237304688, |
|
"learning_rate": 9.751172557674817e-05, |
|
"loss": 2.5342, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.20362927142754395, |
|
"grad_norm": 3.16782546043396, |
|
"learning_rate": 9.746581103930153e-05, |
|
"loss": 2.524, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.20549742988100764, |
|
"grad_norm": 3.3260490894317627, |
|
"learning_rate": 9.741948776050147e-05, |
|
"loss": 2.4701, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.20736558833447136, |
|
"grad_norm": 3.6631577014923096, |
|
"learning_rate": 9.737275613925072e-05, |
|
"loss": 2.5314, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.20923374678793505, |
|
"grad_norm": 2.5733258724212646, |
|
"learning_rate": 9.732561657796828e-05, |
|
"loss": 2.5362, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.21110190524139877, |
|
"grad_norm": 3.8227956295013428, |
|
"learning_rate": 9.727816498322433e-05, |
|
"loss": 2.4807, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.21297006369486246, |
|
"grad_norm": 3.5182738304138184, |
|
"learning_rate": 9.723021157702207e-05, |
|
"loss": 2.5263, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.21483822214832618, |
|
"grad_norm": 3.405224084854126, |
|
"learning_rate": 9.71818514582792e-05, |
|
"loss": 2.5105, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.21670638060178987, |
|
"grad_norm": 2.988802671432495, |
|
"learning_rate": 9.713308504343815e-05, |
|
"loss": 2.5297, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.2185745390552536, |
|
"grad_norm": 2.3862366676330566, |
|
"learning_rate": 9.708391275244016e-05, |
|
"loss": 2.5006, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.22044269750871728, |
|
"grad_norm": 3.3643691539764404, |
|
"learning_rate": 9.703433500872156e-05, |
|
"loss": 2.5255, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.222310855962181, |
|
"grad_norm": 3.6664035320281982, |
|
"learning_rate": 9.698435223921016e-05, |
|
"loss": 2.4421, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.2241790144156447, |
|
"grad_norm": 3.3508718013763428, |
|
"learning_rate": 9.693396487432153e-05, |
|
"loss": 2.4893, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.22604717286910841, |
|
"grad_norm": 3.5202372074127197, |
|
"learning_rate": 9.688337731857194e-05, |
|
"loss": 2.505, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.2279153313225721, |
|
"grad_norm": 4.265177249908447, |
|
"learning_rate": 9.683218368212872e-05, |
|
"loss": 2.5134, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.22978348977603583, |
|
"grad_norm": 3.761479377746582, |
|
"learning_rate": 9.67805867606742e-05, |
|
"loss": 2.477, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.23165164822949952, |
|
"grad_norm": 3.254711866378784, |
|
"learning_rate": 9.67285869985239e-05, |
|
"loss": 2.4894, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.23351980668296324, |
|
"grad_norm": 3.4447569847106934, |
|
"learning_rate": 9.667629004906115e-05, |
|
"loss": 2.5338, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.23538796513642693, |
|
"grad_norm": 3.283677577972412, |
|
"learning_rate": 9.662348675576849e-05, |
|
"loss": 2.5028, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.23725612358989065, |
|
"grad_norm": 3.641008138656616, |
|
"learning_rate": 9.657028197461201e-05, |
|
"loss": 2.5102, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.23912428204335434, |
|
"grad_norm": 2.3239517211914062, |
|
"learning_rate": 9.651667616375301e-05, |
|
"loss": 2.4692, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.24099244049681806, |
|
"grad_norm": 2.590287446975708, |
|
"learning_rate": 9.646266978480605e-05, |
|
"loss": 2.4753, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.24286059895028175, |
|
"grad_norm": 3.5106756687164307, |
|
"learning_rate": 9.640826330283514e-05, |
|
"loss": 2.4541, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.24472875740374547, |
|
"grad_norm": 2.9911463260650635, |
|
"learning_rate": 9.635345718634972e-05, |
|
"loss": 2.5228, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.24659691585720916, |
|
"grad_norm": 3.7811479568481445, |
|
"learning_rate": 9.629825190730053e-05, |
|
"loss": 2.468, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.24846507431067288, |
|
"grad_norm": 3.073608875274658, |
|
"learning_rate": 9.624275954658023e-05, |
|
"loss": 2.5416, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.2503332327641366, |
|
"grad_norm": 2.943208932876587, |
|
"learning_rate": 9.618675816793752e-05, |
|
"loss": 2.4685, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.25220139121760027, |
|
"grad_norm": 2.2683610916137695, |
|
"learning_rate": 9.613047225704368e-05, |
|
"loss": 2.4953, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.254069549671064, |
|
"grad_norm": 3.0341203212738037, |
|
"learning_rate": 9.607367670392133e-05, |
|
"loss": 2.4601, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.2559377081245277, |
|
"grad_norm": 3.2594239711761475, |
|
"learning_rate": 9.60164843975031e-05, |
|
"loss": 2.4339, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.2578058665779914, |
|
"grad_norm": 3.045818328857422, |
|
"learning_rate": 9.595889583028791e-05, |
|
"loss": 2.4237, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.2596740250314551, |
|
"grad_norm": 3.0980165004730225, |
|
"learning_rate": 9.590091149818697e-05, |
|
"loss": 2.5111, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.2615421834849188, |
|
"grad_norm": 2.206389904022217, |
|
"learning_rate": 9.584253190051957e-05, |
|
"loss": 2.4885, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.26341034193838253, |
|
"grad_norm": 3.909090518951416, |
|
"learning_rate": 9.578387548236723e-05, |
|
"loss": 2.4945, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.26527850039184625, |
|
"grad_norm": 3.3355019092559814, |
|
"learning_rate": 9.572470765314143e-05, |
|
"loss": 2.4225, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.2671466588453099, |
|
"grad_norm": 2.9104554653167725, |
|
"learning_rate": 9.56651460756897e-05, |
|
"loss": 2.4666, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.26901481729877363, |
|
"grad_norm": 2.195571184158325, |
|
"learning_rate": 9.560519126291337e-05, |
|
"loss": 2.4738, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.27088297575223735, |
|
"grad_norm": 2.8600668907165527, |
|
"learning_rate": 9.554484373110011e-05, |
|
"loss": 2.3982, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.2727511342057011, |
|
"grad_norm": 2.985612630844116, |
|
"learning_rate": 9.54842258704496e-05, |
|
"loss": 2.4708, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.27461929265916474, |
|
"grad_norm": 2.609339475631714, |
|
"learning_rate": 9.542309524577655e-05, |
|
"loss": 2.4385, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.27648745111262846, |
|
"grad_norm": 2.9328203201293945, |
|
"learning_rate": 9.536157347014623e-05, |
|
"loss": 2.3942, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.2783556095660922, |
|
"grad_norm": 3.242722511291504, |
|
"learning_rate": 9.529966107333978e-05, |
|
"loss": 2.4568, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.2802237680195559, |
|
"grad_norm": 2.90252423286438, |
|
"learning_rate": 9.523735858850218e-05, |
|
"loss": 2.4495, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.2820919264730196, |
|
"grad_norm": 2.491132974624634, |
|
"learning_rate": 9.517466655213752e-05, |
|
"loss": 2.4401, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.2839600849264833, |
|
"grad_norm": 2.714989185333252, |
|
"learning_rate": 9.511171205407364e-05, |
|
"loss": 2.4607, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.285828243379947, |
|
"grad_norm": 3.1541576385498047, |
|
"learning_rate": 9.50482433139732e-05, |
|
"loss": 2.4522, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.2876964018334107, |
|
"grad_norm": 3.280564546585083, |
|
"learning_rate": 9.498438665087013e-05, |
|
"loss": 2.4696, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.28956456028687444, |
|
"grad_norm": 3.0421793460845947, |
|
"learning_rate": 9.492014261465201e-05, |
|
"loss": 2.482, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.2914327187403381, |
|
"grad_norm": 2.658756971359253, |
|
"learning_rate": 9.485551175854214e-05, |
|
"loss": 2.4464, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.2933008771938018, |
|
"grad_norm": 4.537105083465576, |
|
"learning_rate": 9.479049463909488e-05, |
|
"loss": 2.444, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.29516903564726554, |
|
"grad_norm": 2.9097115993499756, |
|
"learning_rate": 9.472509181619083e-05, |
|
"loss": 2.4631, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.29703719410072926, |
|
"grad_norm": 2.133843421936035, |
|
"learning_rate": 9.465943581295223e-05, |
|
"loss": 2.4159, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.2989053525541929, |
|
"grad_norm": 2.5699055194854736, |
|
"learning_rate": 9.459326404463687e-05, |
|
"loss": 2.4392, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.30077351100765665, |
|
"grad_norm": 2.927656412124634, |
|
"learning_rate": 9.452684176567582e-05, |
|
"loss": 2.4121, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.30264166946112037, |
|
"grad_norm": 3.3542892932891846, |
|
"learning_rate": 9.44599033266823e-05, |
|
"loss": 2.4138, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.3045098279145841, |
|
"grad_norm": 2.9518256187438965, |
|
"learning_rate": 9.439258203104611e-05, |
|
"loss": 2.4193, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.30637798636804775, |
|
"grad_norm": 2.9476184844970703, |
|
"learning_rate": 9.432487845848965e-05, |
|
"loss": 2.3944, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.30824614482151147, |
|
"grad_norm": 2.688512086868286, |
|
"learning_rate": 9.425679319202733e-05, |
|
"loss": 2.4331, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.3101143032749752, |
|
"grad_norm": 2.971700429916382, |
|
"learning_rate": 9.418832681796042e-05, |
|
"loss": 2.4513, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.3119824617284389, |
|
"grad_norm": 2.495612382888794, |
|
"learning_rate": 9.411947992587194e-05, |
|
"loss": 2.3972, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.3138506201819026, |
|
"grad_norm": 3.071038246154785, |
|
"learning_rate": 9.405025310862172e-05, |
|
"loss": 2.4309, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.3157187786353663, |
|
"grad_norm": 3.627650260925293, |
|
"learning_rate": 9.398064696234121e-05, |
|
"loss": 2.4297, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.31758693708883, |
|
"grad_norm": 2.077777147293091, |
|
"learning_rate": 9.391066208642838e-05, |
|
"loss": 2.4245, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.31945509554229373, |
|
"grad_norm": 3.0603654384613037, |
|
"learning_rate": 9.384044018651683e-05, |
|
"loss": 2.4145, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.3213232539957574, |
|
"grad_norm": 2.993283271789551, |
|
"learning_rate": 9.37697004170087e-05, |
|
"loss": 2.4095, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.3231914124492211, |
|
"grad_norm": 2.8521878719329834, |
|
"learning_rate": 9.369858373438785e-05, |
|
"loss": 2.3967, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.32505957090268484, |
|
"grad_norm": 3.297847032546997, |
|
"learning_rate": 9.362709075105988e-05, |
|
"loss": 2.4343, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.32692772935614856, |
|
"grad_norm": 2.3240292072296143, |
|
"learning_rate": 9.355522208267086e-05, |
|
"loss": 2.3947, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.3287958878096122, |
|
"grad_norm": 3.8041253089904785, |
|
"learning_rate": 9.348297834810195e-05, |
|
"loss": 2.4111, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.33066404626307594, |
|
"grad_norm": 2.6961183547973633, |
|
"learning_rate": 9.341036016946413e-05, |
|
"loss": 2.4159, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.33253220471653966, |
|
"grad_norm": 3.0299246311187744, |
|
"learning_rate": 9.33373681720928e-05, |
|
"loss": 2.4012, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.3344003631700034, |
|
"grad_norm": 2.75026273727417, |
|
"learning_rate": 9.326415008694199e-05, |
|
"loss": 2.3755, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.33626852162346704, |
|
"grad_norm": 2.4696195125579834, |
|
"learning_rate": 9.319056093086089e-05, |
|
"loss": 2.3953, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.33813668007693076, |
|
"grad_norm": 2.428610324859619, |
|
"learning_rate": 9.311645274788967e-05, |
|
"loss": 2.4433, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.3400048385303945, |
|
"grad_norm": 2.851217269897461, |
|
"learning_rate": 9.304197327710381e-05, |
|
"loss": 2.429, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.3418729969838582, |
|
"grad_norm": 3.0488922595977783, |
|
"learning_rate": 9.296712315986686e-05, |
|
"loss": 2.417, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.34374115543732187, |
|
"grad_norm": 2.7306880950927734, |
|
"learning_rate": 9.289190304073406e-05, |
|
"loss": 2.4539, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.3456093138907856, |
|
"grad_norm": 3.2483866214752197, |
|
"learning_rate": 9.281631356744687e-05, |
|
"loss": 2.3616, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.3474774723442493, |
|
"grad_norm": 2.66874098777771, |
|
"learning_rate": 9.274035539092736e-05, |
|
"loss": 2.3984, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.349345630797713, |
|
"grad_norm": 2.5911643505096436, |
|
"learning_rate": 9.266402916527259e-05, |
|
"loss": 2.4403, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.3512137892511767, |
|
"grad_norm": 3.084787607192993, |
|
"learning_rate": 9.258748930120269e-05, |
|
"loss": 2.3685, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.3530819477046404, |
|
"grad_norm": 3.077162742614746, |
|
"learning_rate": 9.251042968504211e-05, |
|
"loss": 2.4033, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.35495010615810413, |
|
"grad_norm": 2.7327165603637695, |
|
"learning_rate": 9.243300399970075e-05, |
|
"loss": 2.357, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.35681826461156785, |
|
"grad_norm": 2.942444324493408, |
|
"learning_rate": 9.235521291191276e-05, |
|
"loss": 2.4114, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.3586864230650315, |
|
"grad_norm": 2.504429817199707, |
|
"learning_rate": 9.227705709155896e-05, |
|
"loss": 2.3763, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.36055458151849523, |
|
"grad_norm": 3.322981119155884, |
|
"learning_rate": 9.219853721166094e-05, |
|
"loss": 2.4037, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.36242273997195895, |
|
"grad_norm": 2.8509936332702637, |
|
"learning_rate": 9.21196539483753e-05, |
|
"loss": 2.4089, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.36429089842542267, |
|
"grad_norm": 3.585662603378296, |
|
"learning_rate": 9.204040798098783e-05, |
|
"loss": 2.4132, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.36615905687888634, |
|
"grad_norm": 2.8213889598846436, |
|
"learning_rate": 9.196095956872841e-05, |
|
"loss": 2.3647, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.36802721533235006, |
|
"grad_norm": 3.3626108169555664, |
|
"learning_rate": 9.188099096546838e-05, |
|
"loss": 2.4143, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.3698953737858138, |
|
"grad_norm": 2.993591785430908, |
|
"learning_rate": 9.180066171330013e-05, |
|
"loss": 2.3806, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.3717635322392775, |
|
"grad_norm": 2.9788472652435303, |
|
"learning_rate": 9.171997250396128e-05, |
|
"loss": 2.3571, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.37363169069274116, |
|
"grad_norm": 2.3888766765594482, |
|
"learning_rate": 9.163908648731292e-05, |
|
"loss": 2.3841, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.3754998491462049, |
|
"grad_norm": 3.0424160957336426, |
|
"learning_rate": 9.155768016766876e-05, |
|
"loss": 2.4152, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.3773680075996686, |
|
"grad_norm": 2.592036724090576, |
|
"learning_rate": 9.147591598323593e-05, |
|
"loss": 2.3465, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.3792361660531323, |
|
"grad_norm": 2.8690261840820312, |
|
"learning_rate": 9.139379463810866e-05, |
|
"loss": 2.3974, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.381104324506596, |
|
"grad_norm": 2.7227180004119873, |
|
"learning_rate": 9.131148215032317e-05, |
|
"loss": 2.3688, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.3829724829600597, |
|
"grad_norm": 2.856623888015747, |
|
"learning_rate": 9.12286493191618e-05, |
|
"loss": 2.4341, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.3848406414135234, |
|
"grad_norm": 2.56028151512146, |
|
"learning_rate": 9.114546145658827e-05, |
|
"loss": 2.427, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.38670879986698714, |
|
"grad_norm": 3.3118507862091064, |
|
"learning_rate": 9.106208671644056e-05, |
|
"loss": 2.3166, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.3885769583204508, |
|
"grad_norm": 3.2025699615478516, |
|
"learning_rate": 9.097819164962692e-05, |
|
"loss": 2.4462, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.3904451167739145, |
|
"grad_norm": 3.240300416946411, |
|
"learning_rate": 9.089394370816208e-05, |
|
"loss": 2.4285, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.39231327522737824, |
|
"grad_norm": 3.5723962783813477, |
|
"learning_rate": 9.080934361752857e-05, |
|
"loss": 2.355, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.39418143368084196, |
|
"grad_norm": 3.186774253845215, |
|
"learning_rate": 9.072456235949608e-05, |
|
"loss": 2.4029, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.39604959213430563, |
|
"grad_norm": 2.629359006881714, |
|
"learning_rate": 9.063926085974259e-05, |
|
"loss": 2.3459, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.39791775058776935, |
|
"grad_norm": 3.2429652214050293, |
|
"learning_rate": 9.055360940396558e-05, |
|
"loss": 2.3847, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.39978590904123307, |
|
"grad_norm": 2.427645206451416, |
|
"learning_rate": 9.046760872973364e-05, |
|
"loss": 2.3435, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.4016540674946968, |
|
"grad_norm": 2.556652784347534, |
|
"learning_rate": 9.038143262321399e-05, |
|
"loss": 2.4121, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.40352222594816045, |
|
"grad_norm": 2.9563798904418945, |
|
"learning_rate": 9.029473643152501e-05, |
|
"loss": 2.3786, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.40539038440162417, |
|
"grad_norm": 2.457141876220703, |
|
"learning_rate": 9.020769325060857e-05, |
|
"loss": 2.3734, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.4072585428550879, |
|
"grad_norm": 2.489871025085449, |
|
"learning_rate": 9.012030383001778e-05, |
|
"loss": 2.3934, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.4091267013085516, |
|
"grad_norm": 2.9061882495880127, |
|
"learning_rate": 9.003256892228738e-05, |
|
"loss": 2.3507, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.4109948597620153, |
|
"grad_norm": 3.2263598442077637, |
|
"learning_rate": 8.994448928292711e-05, |
|
"loss": 2.3866, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.412863018215479, |
|
"grad_norm": 2.9006874561309814, |
|
"learning_rate": 8.985606567041537e-05, |
|
"loss": 2.3546, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.4147311766689427, |
|
"grad_norm": 2.51509428024292, |
|
"learning_rate": 8.976747672185874e-05, |
|
"loss": 2.3669, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 0.41659933512240643, |
|
"grad_norm": 2.6938908100128174, |
|
"learning_rate": 8.967836813445061e-05, |
|
"loss": 2.3485, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 0.4184674935758701, |
|
"grad_norm": 2.7218174934387207, |
|
"learning_rate": 8.958891786553452e-05, |
|
"loss": 2.3798, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.4203356520293338, |
|
"grad_norm": 3.0031161308288574, |
|
"learning_rate": 8.949912668539173e-05, |
|
"loss": 2.3501, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 0.42220381048279754, |
|
"grad_norm": 2.5878889560699463, |
|
"learning_rate": 8.940899536723916e-05, |
|
"loss": 2.3512, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 0.42407196893626126, |
|
"grad_norm": 2.7273967266082764, |
|
"learning_rate": 8.931852468722277e-05, |
|
"loss": 2.3394, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 0.4259401273897249, |
|
"grad_norm": 2.3990983963012695, |
|
"learning_rate": 8.922771542441081e-05, |
|
"loss": 2.3104, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 0.42780828584318864, |
|
"grad_norm": 3.0549476146698, |
|
"learning_rate": 8.913656836078725e-05, |
|
"loss": 2.3557, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 0.42967644429665236, |
|
"grad_norm": 2.417224168777466, |
|
"learning_rate": 8.904508428124488e-05, |
|
"loss": 2.32, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 0.4315446027501161, |
|
"grad_norm": 2.56392502784729, |
|
"learning_rate": 8.895363192352878e-05, |
|
"loss": 2.3651, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 0.43341276120357974, |
|
"grad_norm": 2.027083396911621, |
|
"learning_rate": 8.886147751859986e-05, |
|
"loss": 2.3277, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 0.43528091965704346, |
|
"grad_norm": 1.902034044265747, |
|
"learning_rate": 8.876898846663621e-05, |
|
"loss": 2.3185, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 0.4371490781105072, |
|
"grad_norm": 2.7564985752105713, |
|
"learning_rate": 8.867616556408684e-05, |
|
"loss": 2.3674, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 0.4390172365639709, |
|
"grad_norm": 3.024198532104492, |
|
"learning_rate": 8.858300961027575e-05, |
|
"loss": 2.3832, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 0.44088539501743457, |
|
"grad_norm": 2.2952866554260254, |
|
"learning_rate": 8.84895214073948e-05, |
|
"loss": 2.3799, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 0.4427535534708983, |
|
"grad_norm": 2.352498769760132, |
|
"learning_rate": 8.839570176049705e-05, |
|
"loss": 2.3958, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 0.444621711924362, |
|
"grad_norm": 3.565748453140259, |
|
"learning_rate": 8.830155147748969e-05, |
|
"loss": 2.3614, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 0.4464898703778257, |
|
"grad_norm": 3.0577287673950195, |
|
"learning_rate": 8.82072606579692e-05, |
|
"loss": 2.3458, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 0.4483580288312894, |
|
"grad_norm": 2.6253695487976074, |
|
"learning_rate": 8.81124521950556e-05, |
|
"loss": 2.3273, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.4502261872847531, |
|
"grad_norm": 2.1585161685943604, |
|
"learning_rate": 8.801731553517346e-05, |
|
"loss": 2.3298, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 0.45209434573821683, |
|
"grad_norm": 2.5908641815185547, |
|
"learning_rate": 8.792185149757116e-05, |
|
"loss": 2.323, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 0.45396250419168055, |
|
"grad_norm": 1.9700515270233154, |
|
"learning_rate": 8.78262528108574e-05, |
|
"loss": 2.3285, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 0.4558306626451442, |
|
"grad_norm": 2.0091867446899414, |
|
"learning_rate": 8.773013713746569e-05, |
|
"loss": 2.3353, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 0.45769882109860793, |
|
"grad_norm": 3.026522159576416, |
|
"learning_rate": 8.763369655932719e-05, |
|
"loss": 2.3478, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 0.45956697955207165, |
|
"grad_norm": 2.7834973335266113, |
|
"learning_rate": 8.753693190691863e-05, |
|
"loss": 2.3256, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 0.4614351380055354, |
|
"grad_norm": 3.004798173904419, |
|
"learning_rate": 8.743984401350747e-05, |
|
"loss": 2.3466, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 0.46330329645899904, |
|
"grad_norm": 2.611668586730957, |
|
"learning_rate": 8.734262885694443e-05, |
|
"loss": 2.3222, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 0.46517145491246276, |
|
"grad_norm": 2.902439594268799, |
|
"learning_rate": 8.72448976347505e-05, |
|
"loss": 2.3485, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 0.4670396133659265, |
|
"grad_norm": 2.932037353515625, |
|
"learning_rate": 8.714684568634262e-05, |
|
"loss": 2.3258, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 0.4689077718193902, |
|
"grad_norm": 2.526458263397217, |
|
"learning_rate": 8.70484738560735e-05, |
|
"loss": 2.3549, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 0.47077593027285386, |
|
"grad_norm": 2.8670670986175537, |
|
"learning_rate": 8.694978299105044e-05, |
|
"loss": 2.3685, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 0.4726440887263176, |
|
"grad_norm": 2.95123553276062, |
|
"learning_rate": 8.685077394112803e-05, |
|
"loss": 2.327, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 0.4745122471797813, |
|
"grad_norm": 3.010820150375366, |
|
"learning_rate": 8.675164652779493e-05, |
|
"loss": 2.3247, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 0.476380405633245, |
|
"grad_norm": 1.896767258644104, |
|
"learning_rate": 8.665200430068873e-05, |
|
"loss": 2.3158, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 0.4782485640867087, |
|
"grad_norm": 2.559565305709839, |
|
"learning_rate": 8.655204645293866e-05, |
|
"loss": 2.3425, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.4801167225401724, |
|
"grad_norm": 2.658048391342163, |
|
"learning_rate": 8.645177384530965e-05, |
|
"loss": 2.3565, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 0.4819848809936361, |
|
"grad_norm": 1.818748116493225, |
|
"learning_rate": 8.635118734127712e-05, |
|
"loss": 2.3441, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 0.48385303944709984, |
|
"grad_norm": 2.627014398574829, |
|
"learning_rate": 8.625028780701953e-05, |
|
"loss": 2.3296, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 0.4857211979005635, |
|
"grad_norm": 2.687391519546509, |
|
"learning_rate": 8.614907611141099e-05, |
|
"loss": 2.3334, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.4875893563540272, |
|
"grad_norm": 3.092353582382202, |
|
"learning_rate": 8.604755312601363e-05, |
|
"loss": 2.3278, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 0.48945751480749095, |
|
"grad_norm": 3.0431768894195557, |
|
"learning_rate": 8.59459237010844e-05, |
|
"loss": 2.299, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 0.49132567326095467, |
|
"grad_norm": 2.2302520275115967, |
|
"learning_rate": 8.584378137971116e-05, |
|
"loss": 2.2837, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 0.49319383171441833, |
|
"grad_norm": 2.7669031620025635, |
|
"learning_rate": 8.574133039752728e-05, |
|
"loss": 2.3202, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 0.49506199016788205, |
|
"grad_norm": 2.6957993507385254, |
|
"learning_rate": 8.563857163676681e-05, |
|
"loss": 2.3214, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 0.49693014862134577, |
|
"grad_norm": 2.662504196166992, |
|
"learning_rate": 8.553571241931346e-05, |
|
"loss": 2.2907, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 0.4987983070748095, |
|
"grad_norm": 2.6600215435028076, |
|
"learning_rate": 8.54323413698205e-05, |
|
"loss": 2.2866, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 0.5006664655282732, |
|
"grad_norm": 1.6196849346160889, |
|
"learning_rate": 8.532866520254174e-05, |
|
"loss": 2.3064, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 0.5025346239817369, |
|
"grad_norm": 2.3502981662750244, |
|
"learning_rate": 8.522468481026161e-05, |
|
"loss": 2.3447, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 0.5044027824352005, |
|
"grad_norm": 2.94901442527771, |
|
"learning_rate": 8.512040108838428e-05, |
|
"loss": 2.3602, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 0.5062709408886643, |
|
"grad_norm": 2.749366283416748, |
|
"learning_rate": 8.501581493492603e-05, |
|
"loss": 2.3389, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 0.508139099342128, |
|
"grad_norm": 3.2299070358276367, |
|
"learning_rate": 8.491113732620424e-05, |
|
"loss": 2.3348, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.5100072577955918, |
|
"grad_norm": 2.3727314472198486, |
|
"learning_rate": 8.480616028924504e-05, |
|
"loss": 2.2864, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 0.5118754162490554, |
|
"grad_norm": 1.8499844074249268, |
|
"learning_rate": 8.470067345222588e-05, |
|
"loss": 2.271, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 0.5137435747025191, |
|
"grad_norm": 3.1945462226867676, |
|
"learning_rate": 8.459488779801767e-05, |
|
"loss": 2.2967, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 0.5156117331559829, |
|
"grad_norm": 2.6457462310791016, |
|
"learning_rate": 8.448880423757021e-05, |
|
"loss": 2.2784, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 0.5174798916094465, |
|
"grad_norm": 2.016098976135254, |
|
"learning_rate": 8.438242368439869e-05, |
|
"loss": 2.3013, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 0.5193480500629102, |
|
"grad_norm": 1.97508704662323, |
|
"learning_rate": 8.42757470545757e-05, |
|
"loss": 2.3232, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 0.521216208516374, |
|
"grad_norm": 2.349184274673462, |
|
"learning_rate": 8.416877526672355e-05, |
|
"loss": 2.3266, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 0.5230843669698376, |
|
"grad_norm": 2.6522152423858643, |
|
"learning_rate": 8.406150924200616e-05, |
|
"loss": 2.2941, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.5249525254233014, |
|
"grad_norm": 3.5393903255462646, |
|
"learning_rate": 8.395394990412121e-05, |
|
"loss": 2.3459, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 0.5268206838767651, |
|
"grad_norm": 2.5476553440093994, |
|
"learning_rate": 8.38460981792922e-05, |
|
"loss": 2.2942, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 0.5286888423302287, |
|
"grad_norm": 2.8197927474975586, |
|
"learning_rate": 8.373817157288324e-05, |
|
"loss": 2.3426, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 0.5305570007836925, |
|
"grad_norm": 2.1316707134246826, |
|
"learning_rate": 8.362973844302275e-05, |
|
"loss": 2.2985, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 0.5324251592371562, |
|
"grad_norm": 1.9890694618225098, |
|
"learning_rate": 8.352101571809362e-05, |
|
"loss": 2.2896, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 0.5342933176906198, |
|
"grad_norm": 3.057724952697754, |
|
"learning_rate": 8.34120043343376e-05, |
|
"loss": 2.3079, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 0.5361614761440836, |
|
"grad_norm": 2.373011350631714, |
|
"learning_rate": 8.330270523048216e-05, |
|
"loss": 2.3294, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 0.5380296345975473, |
|
"grad_norm": 2.1205389499664307, |
|
"learning_rate": 8.31931193477324e-05, |
|
"loss": 2.2969, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.539897793051011, |
|
"grad_norm": 2.767277956008911, |
|
"learning_rate": 8.308324762976294e-05, |
|
"loss": 2.2901, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 0.5417659515044747, |
|
"grad_norm": 2.847618579864502, |
|
"learning_rate": 8.297309102270986e-05, |
|
"loss": 2.3128, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 0.5436341099579384, |
|
"grad_norm": 2.3643147945404053, |
|
"learning_rate": 8.286287163899844e-05, |
|
"loss": 2.2991, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 0.5455022684114021, |
|
"grad_norm": 3.874725103378296, |
|
"learning_rate": 8.275214866701926e-05, |
|
"loss": 2.2602, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 0.5473704268648658, |
|
"grad_norm": 2.4457411766052246, |
|
"learning_rate": 8.264114365714206e-05, |
|
"loss": 2.3038, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 0.5492385853183295, |
|
"grad_norm": 2.56156063079834, |
|
"learning_rate": 8.252985756526198e-05, |
|
"loss": 2.3193, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 0.5511067437717933, |
|
"grad_norm": 3.2425754070281982, |
|
"learning_rate": 8.241851476105105e-05, |
|
"loss": 2.294, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 0.5529749022252569, |
|
"grad_norm": 3.299207925796509, |
|
"learning_rate": 8.23066699398898e-05, |
|
"loss": 2.2933, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 0.5548430606787207, |
|
"grad_norm": 2.3422181606292725, |
|
"learning_rate": 8.219454691697226e-05, |
|
"loss": 2.3066, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 0.5567112191321844, |
|
"grad_norm": 2.9155092239379883, |
|
"learning_rate": 8.208214665782109e-05, |
|
"loss": 2.2698, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 0.558579377585648, |
|
"grad_norm": 3.0940420627593994, |
|
"learning_rate": 8.196969575847251e-05, |
|
"loss": 2.2787, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 0.5604475360391118, |
|
"grad_norm": 3.761610507965088, |
|
"learning_rate": 8.185674448258929e-05, |
|
"loss": 2.3008, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.5623156944925755, |
|
"grad_norm": 2.735173463821411, |
|
"learning_rate": 8.174374560372093e-05, |
|
"loss": 2.3122, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 0.5641838529460392, |
|
"grad_norm": 2.3430800437927246, |
|
"learning_rate": 8.163024719393988e-05, |
|
"loss": 2.2645, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 0.5660520113995029, |
|
"grad_norm": 2.489206314086914, |
|
"learning_rate": 8.151647640726769e-05, |
|
"loss": 2.2695, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 0.5679201698529666, |
|
"grad_norm": 3.2072606086730957, |
|
"learning_rate": 8.140243422341638e-05, |
|
"loss": 2.2641, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.5697883283064303, |
|
"grad_norm": 3.0480380058288574, |
|
"learning_rate": 8.128812162443502e-05, |
|
"loss": 2.3294, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 0.571656486759894, |
|
"grad_norm": 3.000128746032715, |
|
"learning_rate": 8.117353959470134e-05, |
|
"loss": 2.2637, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 0.5735246452133577, |
|
"grad_norm": 3.1820998191833496, |
|
"learning_rate": 8.105868912091317e-05, |
|
"loss": 2.2759, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 0.5753928036668214, |
|
"grad_norm": 2.6837666034698486, |
|
"learning_rate": 8.094357119208004e-05, |
|
"loss": 2.2549, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 0.5772609621202851, |
|
"grad_norm": 2.4082396030426025, |
|
"learning_rate": 8.082841783357048e-05, |
|
"loss": 2.3007, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 0.5791291205737489, |
|
"grad_norm": 2.461305618286133, |
|
"learning_rate": 8.0712768500827e-05, |
|
"loss": 2.2654, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 0.5809972790272125, |
|
"grad_norm": 2.9279286861419678, |
|
"learning_rate": 8.059708678275976e-05, |
|
"loss": 2.2669, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 0.5828654374806762, |
|
"grad_norm": 2.3760006427764893, |
|
"learning_rate": 8.048091002168906e-05, |
|
"loss": 2.2429, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 0.58473359593414, |
|
"grad_norm": 2.879556894302368, |
|
"learning_rate": 8.036447078099056e-05, |
|
"loss": 2.2694, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 0.5866017543876036, |
|
"grad_norm": 1.9433120489120483, |
|
"learning_rate": 8.024777006335506e-05, |
|
"loss": 2.243, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 0.5884699128410673, |
|
"grad_norm": 2.5363948345184326, |
|
"learning_rate": 8.013080887372506e-05, |
|
"loss": 2.267, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 0.5903380712945311, |
|
"grad_norm": 2.3004775047302246, |
|
"learning_rate": 8.001358821928599e-05, |
|
"loss": 2.2711, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 0.5922062297479948, |
|
"grad_norm": 2.1187326908111572, |
|
"learning_rate": 7.989610910945766e-05, |
|
"loss": 2.2733, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 0.5940743882014585, |
|
"grad_norm": 2.612976312637329, |
|
"learning_rate": 7.977860828524794e-05, |
|
"loss": 2.2617, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 0.5959425466549222, |
|
"grad_norm": 2.5254204273223877, |
|
"learning_rate": 7.96606158136407e-05, |
|
"loss": 2.2624, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 0.5978107051083859, |
|
"grad_norm": 2.352216958999634, |
|
"learning_rate": 7.954236792618814e-05, |
|
"loss": 2.2923, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.5996788635618496, |
|
"grad_norm": 2.5276451110839844, |
|
"learning_rate": 7.942386564115584e-05, |
|
"loss": 2.281, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 0.6015470220153133, |
|
"grad_norm": 2.3592355251312256, |
|
"learning_rate": 7.930510997900007e-05, |
|
"loss": 2.252, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 0.603415180468777, |
|
"grad_norm": 3.495464324951172, |
|
"learning_rate": 7.918610196235899e-05, |
|
"loss": 2.2379, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 0.6052833389222407, |
|
"grad_norm": 2.2157094478607178, |
|
"learning_rate": 7.906684261604388e-05, |
|
"loss": 2.2813, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 0.6071514973757044, |
|
"grad_norm": 3.170558452606201, |
|
"learning_rate": 7.894733296703025e-05, |
|
"loss": 2.2457, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 0.6090196558291682, |
|
"grad_norm": 3.1325762271881104, |
|
"learning_rate": 7.882781381038415e-05, |
|
"loss": 2.2531, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 0.6108878142826318, |
|
"grad_norm": 2.3855438232421875, |
|
"learning_rate": 7.87078071409669e-05, |
|
"loss": 2.2665, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 0.6127559727360955, |
|
"grad_norm": 2.261495351791382, |
|
"learning_rate": 7.858755326060588e-05, |
|
"loss": 2.2769, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 0.6146241311895593, |
|
"grad_norm": 3.212700128555298, |
|
"learning_rate": 7.846705320484082e-05, |
|
"loss": 2.2719, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 0.6164922896430229, |
|
"grad_norm": 2.875687837600708, |
|
"learning_rate": 7.83465497456751e-05, |
|
"loss": 2.2756, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 0.6183604480964866, |
|
"grad_norm": 3.213188886642456, |
|
"learning_rate": 7.822556094134869e-05, |
|
"loss": 2.2475, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 0.6202286065499504, |
|
"grad_norm": 2.9114816188812256, |
|
"learning_rate": 7.81043290788352e-05, |
|
"loss": 2.2411, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 0.622096765003414, |
|
"grad_norm": 2.960690498352051, |
|
"learning_rate": 7.798285520209603e-05, |
|
"loss": 2.2823, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 0.6239649234568778, |
|
"grad_norm": 2.9522547721862793, |
|
"learning_rate": 7.786138402665644e-05, |
|
"loss": 2.2186, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 0.6258330819103415, |
|
"grad_norm": 2.8541057109832764, |
|
"learning_rate": 7.773942974047013e-05, |
|
"loss": 2.2735, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 0.6277012403638051, |
|
"grad_norm": 2.182999849319458, |
|
"learning_rate": 7.761723658230827e-05, |
|
"loss": 2.2556, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.6295693988172689, |
|
"grad_norm": 2.0711419582366943, |
|
"learning_rate": 7.749480560441025e-05, |
|
"loss": 2.2949, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 0.6314375572707326, |
|
"grad_norm": 2.7931690216064453, |
|
"learning_rate": 7.737238343214024e-05, |
|
"loss": 2.2579, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 0.6333057157241962, |
|
"grad_norm": 2.2357709407806396, |
|
"learning_rate": 7.724948045003347e-05, |
|
"loss": 2.2145, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 0.63517387417766, |
|
"grad_norm": 2.4123311042785645, |
|
"learning_rate": 7.712634281504125e-05, |
|
"loss": 2.2908, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.6370420326311237, |
|
"grad_norm": 3.390855312347412, |
|
"learning_rate": 7.700321856241075e-05, |
|
"loss": 2.1975, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 0.6389101910845875, |
|
"grad_norm": 2.8016293048858643, |
|
"learning_rate": 7.687961526877562e-05, |
|
"loss": 2.2842, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 0.6407783495380511, |
|
"grad_norm": 2.734112501144409, |
|
"learning_rate": 7.675578050726744e-05, |
|
"loss": 2.2881, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 0.6426465079915148, |
|
"grad_norm": 2.7221627235412598, |
|
"learning_rate": 7.66317153442619e-05, |
|
"loss": 2.2748, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 0.6445146664449786, |
|
"grad_norm": 2.9320507049560547, |
|
"learning_rate": 7.650766966527448e-05, |
|
"loss": 2.2157, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 0.6463828248984422, |
|
"grad_norm": 2.428924798965454, |
|
"learning_rate": 7.638314736178451e-05, |
|
"loss": 2.2613, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 0.6482509833519059, |
|
"grad_norm": 2.5038206577301025, |
|
"learning_rate": 7.62583978656453e-05, |
|
"loss": 2.2606, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 0.6501191418053697, |
|
"grad_norm": 2.3970868587493896, |
|
"learning_rate": 7.613342225110954e-05, |
|
"loss": 2.2383, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 0.6519873002588333, |
|
"grad_norm": 2.124425172805786, |
|
"learning_rate": 7.60082215943772e-05, |
|
"loss": 2.2513, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 0.6538554587122971, |
|
"grad_norm": 3.180497884750366, |
|
"learning_rate": 7.58830480456262e-05, |
|
"loss": 2.2722, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 0.6557236171657608, |
|
"grad_norm": 2.8902299404144287, |
|
"learning_rate": 7.575740098553152e-05, |
|
"loss": 2.2439, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 0.6575917756192244, |
|
"grad_norm": 2.987680196762085, |
|
"learning_rate": 7.563153212126435e-05, |
|
"loss": 2.233, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.6594599340726882, |
|
"grad_norm": 2.5328335762023926, |
|
"learning_rate": 7.550544253671663e-05, |
|
"loss": 2.2434, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 0.6613280925261519, |
|
"grad_norm": 2.5823991298675537, |
|
"learning_rate": 7.537913331768098e-05, |
|
"loss": 2.2261, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 0.6631962509796155, |
|
"grad_norm": 3.252668619155884, |
|
"learning_rate": 7.525260555184135e-05, |
|
"loss": 2.2626, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 0.6650644094330793, |
|
"grad_norm": 2.427614688873291, |
|
"learning_rate": 7.512586032876367e-05, |
|
"loss": 2.2249, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 0.666932567886543, |
|
"grad_norm": 2.6210880279541016, |
|
"learning_rate": 7.49988987398865e-05, |
|
"loss": 2.2602, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 0.6688007263400068, |
|
"grad_norm": 2.7572479248046875, |
|
"learning_rate": 7.487223101332892e-05, |
|
"loss": 2.2325, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 0.6706688847934704, |
|
"grad_norm": 3.2144672870635986, |
|
"learning_rate": 7.474484082913688e-05, |
|
"loss": 2.2835, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 0.6725370432469341, |
|
"grad_norm": 2.4524009227752686, |
|
"learning_rate": 7.461723756021062e-05, |
|
"loss": 2.274, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.6744052017003979, |
|
"grad_norm": 2.676546335220337, |
|
"learning_rate": 7.44894223053775e-05, |
|
"loss": 2.2941, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 0.6762733601538615, |
|
"grad_norm": 3.0090246200561523, |
|
"learning_rate": 7.43613961652904e-05, |
|
"loss": 2.2545, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 0.6781415186073252, |
|
"grad_norm": 2.6397953033447266, |
|
"learning_rate": 7.423316024241814e-05, |
|
"loss": 2.2541, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 0.680009677060789, |
|
"grad_norm": 3.0165371894836426, |
|
"learning_rate": 7.410471564103606e-05, |
|
"loss": 2.2319, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 0.6818778355142526, |
|
"grad_norm": 2.1070499420166016, |
|
"learning_rate": 7.39760634672165e-05, |
|
"loss": 2.2617, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 0.6837459939677164, |
|
"grad_norm": 2.777233123779297, |
|
"learning_rate": 7.384746275141047e-05, |
|
"loss": 2.2206, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 0.6856141524211801, |
|
"grad_norm": 2.188089370727539, |
|
"learning_rate": 7.371839916767453e-05, |
|
"loss": 2.2428, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 0.6874823108746437, |
|
"grad_norm": 2.427400827407837, |
|
"learning_rate": 7.358913133818016e-05, |
|
"loss": 2.2161, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.6893504693281075, |
|
"grad_norm": 2.542616605758667, |
|
"learning_rate": 7.34596603760887e-05, |
|
"loss": 2.266, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 0.6912186277815712, |
|
"grad_norm": 2.6249241828918457, |
|
"learning_rate": 7.333024694314207e-05, |
|
"loss": 2.2383, |
|
"step": 185000 |
|
}, |
|
{ |
|
"epoch": 0.6930867862350348, |
|
"grad_norm": 2.5798895359039307, |
|
"learning_rate": 7.320037346301442e-05, |
|
"loss": 2.2524, |
|
"step": 185500 |
|
}, |
|
{ |
|
"epoch": 0.6949549446884986, |
|
"grad_norm": 2.9020352363586426, |
|
"learning_rate": 7.307030019799232e-05, |
|
"loss": 2.2251, |
|
"step": 186000 |
|
}, |
|
{ |
|
"epoch": 0.6968231031419623, |
|
"grad_norm": 3.3277840614318848, |
|
"learning_rate": 7.294002826817298e-05, |
|
"loss": 2.2608, |
|
"step": 186500 |
|
}, |
|
{ |
|
"epoch": 0.698691261595426, |
|
"grad_norm": 2.6658146381378174, |
|
"learning_rate": 7.280955879536435e-05, |
|
"loss": 2.2689, |
|
"step": 187000 |
|
}, |
|
{ |
|
"epoch": 0.7005594200488897, |
|
"grad_norm": 2.736542224884033, |
|
"learning_rate": 7.267915443013911e-05, |
|
"loss": 2.2004, |
|
"step": 187500 |
|
}, |
|
{ |
|
"epoch": 0.7024275785023534, |
|
"grad_norm": 2.440765619277954, |
|
"learning_rate": 7.254829363303503e-05, |
|
"loss": 2.2541, |
|
"step": 188000 |
|
}, |
|
{ |
|
"epoch": 0.7042957369558172, |
|
"grad_norm": 2.6804561614990234, |
|
"learning_rate": 7.241723866627799e-05, |
|
"loss": 2.2647, |
|
"step": 188500 |
|
}, |
|
{ |
|
"epoch": 0.7061638954092808, |
|
"grad_norm": 2.6702585220336914, |
|
"learning_rate": 7.228599065841891e-05, |
|
"loss": 2.2004, |
|
"step": 189000 |
|
}, |
|
{ |
|
"epoch": 0.7080320538627445, |
|
"grad_norm": 2.5987019538879395, |
|
"learning_rate": 7.215481381028357e-05, |
|
"loss": 2.2509, |
|
"step": 189500 |
|
}, |
|
{ |
|
"epoch": 0.7099002123162083, |
|
"grad_norm": 2.9680731296539307, |
|
"learning_rate": 7.20231834929401e-05, |
|
"loss": 2.2262, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.7117683707696719, |
|
"grad_norm": 3.8419201374053955, |
|
"learning_rate": 7.189136352781376e-05, |
|
"loss": 2.2313, |
|
"step": 190500 |
|
}, |
|
{ |
|
"epoch": 0.7136365292231357, |
|
"grad_norm": 2.6179468631744385, |
|
"learning_rate": 7.175935505004304e-05, |
|
"loss": 2.2466, |
|
"step": 191000 |
|
}, |
|
{ |
|
"epoch": 0.7155046876765994, |
|
"grad_norm": 1.9412791728973389, |
|
"learning_rate": 7.162742377434187e-05, |
|
"loss": 2.2336, |
|
"step": 191500 |
|
}, |
|
{ |
|
"epoch": 0.717372846130063, |
|
"grad_norm": 2.312648057937622, |
|
"learning_rate": 7.149504205451939e-05, |
|
"loss": 2.2124, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.7192410045835268, |
|
"grad_norm": 2.4080445766448975, |
|
"learning_rate": 7.136247523488743e-05, |
|
"loss": 2.2103, |
|
"step": 192500 |
|
}, |
|
{ |
|
"epoch": 0.7211091630369905, |
|
"grad_norm": 3.0859153270721436, |
|
"learning_rate": 7.122972445701587e-05, |
|
"loss": 2.1961, |
|
"step": 193000 |
|
}, |
|
{ |
|
"epoch": 0.7229773214904541, |
|
"grad_norm": 3.438227415084839, |
|
"learning_rate": 7.10970569129335e-05, |
|
"loss": 2.2128, |
|
"step": 193500 |
|
}, |
|
{ |
|
"epoch": 0.7248454799439179, |
|
"grad_norm": 2.6577913761138916, |
|
"learning_rate": 7.096394201181632e-05, |
|
"loss": 2.2254, |
|
"step": 194000 |
|
}, |
|
{ |
|
"epoch": 0.7267136383973816, |
|
"grad_norm": 2.579580068588257, |
|
"learning_rate": 7.083064658434042e-05, |
|
"loss": 2.2562, |
|
"step": 194500 |
|
}, |
|
{ |
|
"epoch": 0.7285817968508453, |
|
"grad_norm": 2.957392454147339, |
|
"learning_rate": 7.069717177834997e-05, |
|
"loss": 2.2762, |
|
"step": 195000 |
|
}, |
|
{ |
|
"epoch": 0.730449955304309, |
|
"grad_norm": 1.9975017309188843, |
|
"learning_rate": 7.056378622641193e-05, |
|
"loss": 2.2385, |
|
"step": 195500 |
|
}, |
|
{ |
|
"epoch": 0.7323181137577727, |
|
"grad_norm": 3.1538219451904297, |
|
"learning_rate": 7.042995646610036e-05, |
|
"loss": 2.2086, |
|
"step": 196000 |
|
}, |
|
{ |
|
"epoch": 0.7341862722112364, |
|
"grad_norm": 2.2817578315734863, |
|
"learning_rate": 7.02959507777287e-05, |
|
"loss": 2.2153, |
|
"step": 196500 |
|
}, |
|
{ |
|
"epoch": 0.7360544306647001, |
|
"grad_norm": 2.5474236011505127, |
|
"learning_rate": 7.016177031525738e-05, |
|
"loss": 2.2388, |
|
"step": 197000 |
|
}, |
|
{ |
|
"epoch": 0.7379225891181638, |
|
"grad_norm": 2.5271482467651367, |
|
"learning_rate": 7.002795399479169e-05, |
|
"loss": 2.2344, |
|
"step": 197500 |
|
}, |
|
{ |
|
"epoch": 0.7397907475716275, |
|
"grad_norm": 1.9711894989013672, |
|
"learning_rate": 6.989342813955246e-05, |
|
"loss": 2.1875, |
|
"step": 198000 |
|
}, |
|
{ |
|
"epoch": 0.7416589060250912, |
|
"grad_norm": 2.832296133041382, |
|
"learning_rate": 6.97587309764484e-05, |
|
"loss": 2.2378, |
|
"step": 198500 |
|
}, |
|
{ |
|
"epoch": 0.743527064478555, |
|
"grad_norm": 3.224106788635254, |
|
"learning_rate": 6.962386366539439e-05, |
|
"loss": 2.1749, |
|
"step": 199000 |
|
}, |
|
{ |
|
"epoch": 0.7453952229320187, |
|
"grad_norm": 2.2426908016204834, |
|
"learning_rate": 6.948882736777054e-05, |
|
"loss": 2.1997, |
|
"step": 199500 |
|
}, |
|
{ |
|
"epoch": 0.7472633813854823, |
|
"grad_norm": 2.7945656776428223, |
|
"learning_rate": 6.935362324641206e-05, |
|
"loss": 2.2217, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.7491315398389461, |
|
"grad_norm": 2.7567574977874756, |
|
"learning_rate": 6.921825246559942e-05, |
|
"loss": 2.2296, |
|
"step": 200500 |
|
}, |
|
{ |
|
"epoch": 0.7509996982924098, |
|
"grad_norm": 2.5919723510742188, |
|
"learning_rate": 6.908298742798458e-05, |
|
"loss": 2.2364, |
|
"step": 201000 |
|
}, |
|
{ |
|
"epoch": 0.7528678567458734, |
|
"grad_norm": 2.993880271911621, |
|
"learning_rate": 6.894728715432299e-05, |
|
"loss": 2.2065, |
|
"step": 201500 |
|
}, |
|
{ |
|
"epoch": 0.7547360151993372, |
|
"grad_norm": 2.4301109313964844, |
|
"learning_rate": 6.881142372028077e-05, |
|
"loss": 2.2457, |
|
"step": 202000 |
|
}, |
|
{ |
|
"epoch": 0.7566041736528009, |
|
"grad_norm": 2.623084783554077, |
|
"learning_rate": 6.867539829581595e-05, |
|
"loss": 2.1742, |
|
"step": 202500 |
|
}, |
|
{ |
|
"epoch": 0.7584723321062646, |
|
"grad_norm": 3.4304981231689453, |
|
"learning_rate": 6.853921205228139e-05, |
|
"loss": 2.2292, |
|
"step": 203000 |
|
}, |
|
{ |
|
"epoch": 0.7603404905597283, |
|
"grad_norm": 1.7889618873596191, |
|
"learning_rate": 6.84028661624149e-05, |
|
"loss": 2.217, |
|
"step": 203500 |
|
}, |
|
{ |
|
"epoch": 0.762208649013192, |
|
"grad_norm": 2.954709053039551, |
|
"learning_rate": 6.8266361800329e-05, |
|
"loss": 2.2491, |
|
"step": 204000 |
|
}, |
|
{ |
|
"epoch": 0.7640768074666557, |
|
"grad_norm": 2.892221212387085, |
|
"learning_rate": 6.812970014150086e-05, |
|
"loss": 2.2431, |
|
"step": 204500 |
|
}, |
|
{ |
|
"epoch": 0.7659449659201194, |
|
"grad_norm": 1.9717577695846558, |
|
"learning_rate": 6.799315615334446e-05, |
|
"loss": 2.2397, |
|
"step": 205000 |
|
}, |
|
{ |
|
"epoch": 0.7678131243735831, |
|
"grad_norm": 2.904269218444824, |
|
"learning_rate": 6.785618374157811e-05, |
|
"loss": 2.1972, |
|
"step": 205500 |
|
}, |
|
{ |
|
"epoch": 0.7696812828270468, |
|
"grad_norm": 3.807295083999634, |
|
"learning_rate": 6.771933197025247e-05, |
|
"loss": 2.2292, |
|
"step": 206000 |
|
}, |
|
{ |
|
"epoch": 0.7715494412805105, |
|
"grad_norm": 3.4538333415985107, |
|
"learning_rate": 6.758205351413722e-05, |
|
"loss": 2.1935, |
|
"step": 206500 |
|
}, |
|
{ |
|
"epoch": 0.7734175997339743, |
|
"grad_norm": 2.769444227218628, |
|
"learning_rate": 6.744462365404948e-05, |
|
"loss": 2.1709, |
|
"step": 207000 |
|
}, |
|
{ |
|
"epoch": 0.775285758187438, |
|
"grad_norm": 3.002584934234619, |
|
"learning_rate": 6.730704357343616e-05, |
|
"loss": 2.1863, |
|
"step": 207500 |
|
}, |
|
{ |
|
"epoch": 0.7771539166409016, |
|
"grad_norm": 2.559108257293701, |
|
"learning_rate": 6.716959006322012e-05, |
|
"loss": 2.2118, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.7790220750943654, |
|
"grad_norm": 3.1521153450012207, |
|
"learning_rate": 6.703171339157552e-05, |
|
"loss": 2.19, |
|
"step": 208500 |
|
}, |
|
{ |
|
"epoch": 0.780890233547829, |
|
"grad_norm": 2.7111008167266846, |
|
"learning_rate": 6.689369005509088e-05, |
|
"loss": 2.2044, |
|
"step": 209000 |
|
}, |
|
{ |
|
"epoch": 0.7827583920012927, |
|
"grad_norm": 2.8580000400543213, |
|
"learning_rate": 6.675552124232371e-05, |
|
"loss": 2.2458, |
|
"step": 209500 |
|
}, |
|
{ |
|
"epoch": 0.7846265504547565, |
|
"grad_norm": 2.7248494625091553, |
|
"learning_rate": 6.661720814308425e-05, |
|
"loss": 2.2096, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.7864947089082202, |
|
"grad_norm": 3.5847723484039307, |
|
"learning_rate": 6.647875194842521e-05, |
|
"loss": 2.2238, |
|
"step": 210500 |
|
}, |
|
{ |
|
"epoch": 0.7883628673616839, |
|
"grad_norm": 3.013185977935791, |
|
"learning_rate": 6.634015385063155e-05, |
|
"loss": 2.2128, |
|
"step": 211000 |
|
}, |
|
{ |
|
"epoch": 0.7902310258151476, |
|
"grad_norm": 3.160470962524414, |
|
"learning_rate": 6.620141504321021e-05, |
|
"loss": 2.2604, |
|
"step": 211500 |
|
}, |
|
{ |
|
"epoch": 0.7920991842686113, |
|
"grad_norm": 3.009772300720215, |
|
"learning_rate": 6.606281461596562e-05, |
|
"loss": 2.2169, |
|
"step": 212000 |
|
}, |
|
{ |
|
"epoch": 0.793967342722075, |
|
"grad_norm": 2.7089791297912598, |
|
"learning_rate": 6.592379825008977e-05, |
|
"loss": 2.1894, |
|
"step": 212500 |
|
}, |
|
{ |
|
"epoch": 0.7958355011755387, |
|
"grad_norm": 2.2874131202697754, |
|
"learning_rate": 6.578492320297462e-05, |
|
"loss": 2.2472, |
|
"step": 213000 |
|
}, |
|
{ |
|
"epoch": 0.7977036596290024, |
|
"grad_norm": 3.115208864212036, |
|
"learning_rate": 6.564563405749691e-05, |
|
"loss": 2.1696, |
|
"step": 213500 |
|
}, |
|
{ |
|
"epoch": 0.7995718180824661, |
|
"grad_norm": 3.074309825897217, |
|
"learning_rate": 6.550621018309538e-05, |
|
"loss": 2.2022, |
|
"step": 214000 |
|
}, |
|
{ |
|
"epoch": 0.8014399765359298, |
|
"grad_norm": 2.6160593032836914, |
|
"learning_rate": 6.536665278038796e-05, |
|
"loss": 2.2136, |
|
"step": 214500 |
|
}, |
|
{ |
|
"epoch": 0.8033081349893936, |
|
"grad_norm": 2.875887155532837, |
|
"learning_rate": 6.522696305114238e-05, |
|
"loss": 2.222, |
|
"step": 215000 |
|
}, |
|
{ |
|
"epoch": 0.8051762934428572, |
|
"grad_norm": 1.9582101106643677, |
|
"learning_rate": 6.508714219826595e-05, |
|
"loss": 2.1975, |
|
"step": 215500 |
|
}, |
|
{ |
|
"epoch": 0.8070444518963209, |
|
"grad_norm": 3.11397647857666, |
|
"learning_rate": 6.494719142579506e-05, |
|
"loss": 2.2285, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.8089126103497847, |
|
"grad_norm": 2.7110836505889893, |
|
"learning_rate": 6.480711193888488e-05, |
|
"loss": 2.1638, |
|
"step": 216500 |
|
}, |
|
{ |
|
"epoch": 0.8107807688032483, |
|
"grad_norm": 2.2085702419281006, |
|
"learning_rate": 6.4666904943799e-05, |
|
"loss": 2.2144, |
|
"step": 217000 |
|
}, |
|
{ |
|
"epoch": 0.812648927256712, |
|
"grad_norm": 3.44262957572937, |
|
"learning_rate": 6.452657164789899e-05, |
|
"loss": 2.2248, |
|
"step": 217500 |
|
}, |
|
{ |
|
"epoch": 0.8145170857101758, |
|
"grad_norm": 2.770791530609131, |
|
"learning_rate": 6.438639430044904e-05, |
|
"loss": 2.1861, |
|
"step": 218000 |
|
}, |
|
{ |
|
"epoch": 0.8163852441636394, |
|
"grad_norm": 3.2068679332733154, |
|
"learning_rate": 6.424581227590346e-05, |
|
"loss": 2.1691, |
|
"step": 218500 |
|
}, |
|
{ |
|
"epoch": 0.8182534026171032, |
|
"grad_norm": 3.264312744140625, |
|
"learning_rate": 6.410510757669032e-05, |
|
"loss": 2.159, |
|
"step": 219000 |
|
}, |
|
{ |
|
"epoch": 0.8201215610705669, |
|
"grad_norm": 3.264051675796509, |
|
"learning_rate": 6.396428141445709e-05, |
|
"loss": 2.1775, |
|
"step": 219500 |
|
}, |
|
{ |
|
"epoch": 0.8219897195240305, |
|
"grad_norm": 2.961418867111206, |
|
"learning_rate": 6.382333500189714e-05, |
|
"loss": 2.1851, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.8238578779774943, |
|
"grad_norm": 4.034390449523926, |
|
"learning_rate": 6.368226955273941e-05, |
|
"loss": 2.1552, |
|
"step": 220500 |
|
}, |
|
{ |
|
"epoch": 0.825726036430958, |
|
"grad_norm": 2.0030012130737305, |
|
"learning_rate": 6.354136876505816e-05, |
|
"loss": 2.1762, |
|
"step": 221000 |
|
}, |
|
{ |
|
"epoch": 0.8275941948844217, |
|
"grad_norm": 2.7552449703216553, |
|
"learning_rate": 6.340006911997954e-05, |
|
"loss": 2.1758, |
|
"step": 221500 |
|
}, |
|
{ |
|
"epoch": 0.8294623533378854, |
|
"grad_norm": 2.4928476810455322, |
|
"learning_rate": 6.325865408316381e-05, |
|
"loss": 2.1951, |
|
"step": 222000 |
|
}, |
|
{ |
|
"epoch": 0.8313305117913491, |
|
"grad_norm": 2.8218753337860107, |
|
"learning_rate": 6.311712487237538e-05, |
|
"loss": 2.1348, |
|
"step": 222500 |
|
}, |
|
{ |
|
"epoch": 0.8331986702448129, |
|
"grad_norm": 3.4085326194763184, |
|
"learning_rate": 6.297548270636179e-05, |
|
"loss": 2.2058, |
|
"step": 223000 |
|
}, |
|
{ |
|
"epoch": 0.8350668286982765, |
|
"grad_norm": 3.3644134998321533, |
|
"learning_rate": 6.283372880484332e-05, |
|
"loss": 2.1574, |
|
"step": 223500 |
|
}, |
|
{ |
|
"epoch": 0.8369349871517402, |
|
"grad_norm": 3.0675761699676514, |
|
"learning_rate": 6.269186438850234e-05, |
|
"loss": 2.1725, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.838803145605204, |
|
"grad_norm": 2.6877012252807617, |
|
"learning_rate": 6.2549890678973e-05, |
|
"loss": 2.1889, |
|
"step": 224500 |
|
}, |
|
{ |
|
"epoch": 0.8406713040586676, |
|
"grad_norm": 3.4169256687164307, |
|
"learning_rate": 6.240837743960651e-05, |
|
"loss": 2.1423, |
|
"step": 225000 |
|
}, |
|
{ |
|
"epoch": 0.8425394625121313, |
|
"grad_norm": 3.0024383068084717, |
|
"learning_rate": 6.22661892373068e-05, |
|
"loss": 2.178, |
|
"step": 225500 |
|
}, |
|
{ |
|
"epoch": 0.8444076209655951, |
|
"grad_norm": 3.079028606414795, |
|
"learning_rate": 6.212389540742632e-05, |
|
"loss": 2.2295, |
|
"step": 226000 |
|
}, |
|
{ |
|
"epoch": 0.8462757794190587, |
|
"grad_norm": 2.90077805519104, |
|
"learning_rate": 6.198149717529692e-05, |
|
"loss": 2.1684, |
|
"step": 226500 |
|
}, |
|
{ |
|
"epoch": 0.8481439378725225, |
|
"grad_norm": 3.053629159927368, |
|
"learning_rate": 6.18389957671496e-05, |
|
"loss": 2.1738, |
|
"step": 227000 |
|
}, |
|
{ |
|
"epoch": 0.8500120963259862, |
|
"grad_norm": 3.0925843715667725, |
|
"learning_rate": 6.16963924101038e-05, |
|
"loss": 2.1551, |
|
"step": 227500 |
|
}, |
|
{ |
|
"epoch": 0.8518802547794498, |
|
"grad_norm": 3.0221009254455566, |
|
"learning_rate": 6.155368833215677e-05, |
|
"loss": 2.1966, |
|
"step": 228000 |
|
}, |
|
{ |
|
"epoch": 0.8537484132329136, |
|
"grad_norm": 2.5803329944610596, |
|
"learning_rate": 6.141088476217323e-05, |
|
"loss": 2.164, |
|
"step": 228500 |
|
}, |
|
{ |
|
"epoch": 0.8556165716863773, |
|
"grad_norm": 3.4956555366516113, |
|
"learning_rate": 6.126826883078718e-05, |
|
"loss": 2.1776, |
|
"step": 229000 |
|
}, |
|
{ |
|
"epoch": 0.8574847301398411, |
|
"grad_norm": 2.8954169750213623, |
|
"learning_rate": 6.112527015957583e-05, |
|
"loss": 2.1944, |
|
"step": 229500 |
|
}, |
|
{ |
|
"epoch": 0.8593528885933047, |
|
"grad_norm": 3.2150614261627197, |
|
"learning_rate": 6.0982175685556475e-05, |
|
"loss": 2.1942, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.8612210470467684, |
|
"grad_norm": 2.8969147205352783, |
|
"learning_rate": 6.083898664095558e-05, |
|
"loss": 2.152, |
|
"step": 230500 |
|
}, |
|
{ |
|
"epoch": 0.8630892055002322, |
|
"grad_norm": 2.898751974105835, |
|
"learning_rate": 6.069599091590918e-05, |
|
"loss": 2.1624, |
|
"step": 231000 |
|
}, |
|
{ |
|
"epoch": 0.8649573639536958, |
|
"grad_norm": 3.5042660236358643, |
|
"learning_rate": 6.05529034527542e-05, |
|
"loss": 2.1428, |
|
"step": 231500 |
|
}, |
|
{ |
|
"epoch": 0.8668255224071595, |
|
"grad_norm": 3.0192151069641113, |
|
"learning_rate": 6.040943845887397e-05, |
|
"loss": 2.1942, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 0.8686936808606233, |
|
"grad_norm": 3.0444955825805664, |
|
"learning_rate": 6.026588382641243e-05, |
|
"loss": 2.1533, |
|
"step": 232500 |
|
}, |
|
{ |
|
"epoch": 0.8705618393140869, |
|
"grad_norm": 3.1138992309570312, |
|
"learning_rate": 6.012224079155855e-05, |
|
"loss": 2.1841, |
|
"step": 233000 |
|
}, |
|
{ |
|
"epoch": 0.8724299977675507, |
|
"grad_norm": 2.3980443477630615, |
|
"learning_rate": 5.997879813783181e-05, |
|
"loss": 2.1724, |
|
"step": 233500 |
|
}, |
|
{ |
|
"epoch": 0.8742981562210144, |
|
"grad_norm": 2.9543912410736084, |
|
"learning_rate": 5.9834982180414524e-05, |
|
"loss": 2.1502, |
|
"step": 234000 |
|
}, |
|
{ |
|
"epoch": 0.876166314674478, |
|
"grad_norm": 2.555027961730957, |
|
"learning_rate": 5.969108153121932e-05, |
|
"loss": 2.1499, |
|
"step": 234500 |
|
}, |
|
{ |
|
"epoch": 0.8780344731279418, |
|
"grad_norm": 2.4806180000305176, |
|
"learning_rate": 5.954709742941489e-05, |
|
"loss": 2.1733, |
|
"step": 235000 |
|
}, |
|
{ |
|
"epoch": 0.8799026315814055, |
|
"grad_norm": 2.855769634246826, |
|
"learning_rate": 5.9403031114888505e-05, |
|
"loss": 2.1783, |
|
"step": 235500 |
|
}, |
|
{ |
|
"epoch": 0.8817707900348691, |
|
"grad_norm": 2.85447359085083, |
|
"learning_rate": 5.9258883828235466e-05, |
|
"loss": 2.1684, |
|
"step": 236000 |
|
}, |
|
{ |
|
"epoch": 0.8836389484883329, |
|
"grad_norm": 3.5129261016845703, |
|
"learning_rate": 5.911494534352925e-05, |
|
"loss": 2.1825, |
|
"step": 236500 |
|
}, |
|
{ |
|
"epoch": 0.8855071069417966, |
|
"grad_norm": 3.9751412868499756, |
|
"learning_rate": 5.8970639992924826e-05, |
|
"loss": 2.1827, |
|
"step": 237000 |
|
}, |
|
{ |
|
"epoch": 0.8873752653952603, |
|
"grad_norm": 3.1551120281219482, |
|
"learning_rate": 5.882625739363443e-05, |
|
"loss": 2.2232, |
|
"step": 237500 |
|
}, |
|
{ |
|
"epoch": 0.889243423848724, |
|
"grad_norm": 3.2931878566741943, |
|
"learning_rate": 5.868179878897693e-05, |
|
"loss": 2.1291, |
|
"step": 238000 |
|
}, |
|
{ |
|
"epoch": 0.8911115823021877, |
|
"grad_norm": 3.2662160396575928, |
|
"learning_rate": 5.853726542292572e-05, |
|
"loss": 2.1776, |
|
"step": 238500 |
|
}, |
|
{ |
|
"epoch": 0.8929797407556515, |
|
"grad_norm": 2.764841079711914, |
|
"learning_rate": 5.8392658540097975e-05, |
|
"loss": 2.1069, |
|
"step": 239000 |
|
}, |
|
{ |
|
"epoch": 0.8948478992091151, |
|
"grad_norm": 1.903836965560913, |
|
"learning_rate": 5.8247979385743945e-05, |
|
"loss": 2.1436, |
|
"step": 239500 |
|
}, |
|
{ |
|
"epoch": 0.8967160576625788, |
|
"grad_norm": 2.859905481338501, |
|
"learning_rate": 5.8103229205736235e-05, |
|
"loss": 2.1426, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.8985842161160426, |
|
"grad_norm": 3.1984663009643555, |
|
"learning_rate": 5.79586989552882e-05, |
|
"loss": 2.1798, |
|
"step": 240500 |
|
}, |
|
{ |
|
"epoch": 0.9004523745695062, |
|
"grad_norm": 2.157151222229004, |
|
"learning_rate": 5.781381059984584e-05, |
|
"loss": 2.1766, |
|
"step": 241000 |
|
}, |
|
{ |
|
"epoch": 0.90232053302297, |
|
"grad_norm": 3.674839973449707, |
|
"learning_rate": 5.7668854957498444e-05, |
|
"loss": 2.1925, |
|
"step": 241500 |
|
}, |
|
{ |
|
"epoch": 0.9041886914764337, |
|
"grad_norm": 2.9118549823760986, |
|
"learning_rate": 5.752383327649953e-05, |
|
"loss": 2.1655, |
|
"step": 242000 |
|
}, |
|
{ |
|
"epoch": 0.9060568499298973, |
|
"grad_norm": 3.0006792545318604, |
|
"learning_rate": 5.737903704244284e-05, |
|
"loss": 2.1639, |
|
"step": 242500 |
|
}, |
|
{ |
|
"epoch": 0.9079250083833611, |
|
"grad_norm": 3.3966879844665527, |
|
"learning_rate": 5.723388715699902e-05, |
|
"loss": 2.1106, |
|
"step": 243000 |
|
}, |
|
{ |
|
"epoch": 0.9097931668368248, |
|
"grad_norm": 3.6091904640197754, |
|
"learning_rate": 5.708896546422721e-05, |
|
"loss": 2.1847, |
|
"step": 243500 |
|
}, |
|
{ |
|
"epoch": 0.9116613252902884, |
|
"grad_norm": 2.7571775913238525, |
|
"learning_rate": 5.694369236403816e-05, |
|
"loss": 2.1453, |
|
"step": 244000 |
|
}, |
|
{ |
|
"epoch": 0.9135294837437522, |
|
"grad_norm": 3.4625306129455566, |
|
"learning_rate": 5.6798359469775195e-05, |
|
"loss": 2.1599, |
|
"step": 244500 |
|
}, |
|
{ |
|
"epoch": 0.9153976421972159, |
|
"grad_norm": 2.573812246322632, |
|
"learning_rate": 5.665296803294042e-05, |
|
"loss": 2.1393, |
|
"step": 245000 |
|
}, |
|
{ |
|
"epoch": 0.9172658006506796, |
|
"grad_norm": 2.3979828357696533, |
|
"learning_rate": 5.650751930554011e-05, |
|
"loss": 2.1714, |
|
"step": 245500 |
|
}, |
|
{ |
|
"epoch": 0.9191339591041433, |
|
"grad_norm": 3.1871445178985596, |
|
"learning_rate": 5.6362014540073884e-05, |
|
"loss": 2.1164, |
|
"step": 246000 |
|
}, |
|
{ |
|
"epoch": 0.921002117557607, |
|
"grad_norm": 2.8169736862182617, |
|
"learning_rate": 5.6216454989523906e-05, |
|
"loss": 2.1343, |
|
"step": 246500 |
|
}, |
|
{ |
|
"epoch": 0.9228702760110707, |
|
"grad_norm": 3.2970011234283447, |
|
"learning_rate": 5.607113318609965e-05, |
|
"loss": 2.1403, |
|
"step": 247000 |
|
}, |
|
{ |
|
"epoch": 0.9247384344645344, |
|
"grad_norm": 2.7862350940704346, |
|
"learning_rate": 5.5925467929508655e-05, |
|
"loss": 2.148, |
|
"step": 247500 |
|
}, |
|
{ |
|
"epoch": 0.9266065929179981, |
|
"grad_norm": 2.888575553894043, |
|
"learning_rate": 5.5779751647058663e-05, |
|
"loss": 2.184, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 0.9284747513714618, |
|
"grad_norm": 2.52675199508667, |
|
"learning_rate": 5.56339855935533e-05, |
|
"loss": 2.078, |
|
"step": 248500 |
|
}, |
|
{ |
|
"epoch": 0.9303429098249255, |
|
"grad_norm": 2.9500951766967773, |
|
"learning_rate": 5.54881710242247e-05, |
|
"loss": 2.1206, |
|
"step": 249000 |
|
}, |
|
{ |
|
"epoch": 0.9322110682783893, |
|
"grad_norm": 2.5412566661834717, |
|
"learning_rate": 5.5342309194722885e-05, |
|
"loss": 2.1395, |
|
"step": 249500 |
|
}, |
|
{ |
|
"epoch": 0.934079226731853, |
|
"grad_norm": 2.3108468055725098, |
|
"learning_rate": 5.519640136110478e-05, |
|
"loss": 2.1498, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.9359473851853166, |
|
"grad_norm": 2.373042345046997, |
|
"learning_rate": 5.505044877982351e-05, |
|
"loss": 2.1532, |
|
"step": 250500 |
|
}, |
|
{ |
|
"epoch": 0.9378155436387804, |
|
"grad_norm": 2.997445821762085, |
|
"learning_rate": 5.490474474242996e-05, |
|
"loss": 2.1451, |
|
"step": 251000 |
|
}, |
|
{ |
|
"epoch": 0.939683702092244, |
|
"grad_norm": 2.837625741958618, |
|
"learning_rate": 5.4758706519924406e-05, |
|
"loss": 2.1425, |
|
"step": 251500 |
|
}, |
|
{ |
|
"epoch": 0.9415518605457077, |
|
"grad_norm": 2.954401731491089, |
|
"learning_rate": 5.461262731886816e-05, |
|
"loss": 2.1568, |
|
"step": 252000 |
|
}, |
|
{ |
|
"epoch": 0.9434200189991715, |
|
"grad_norm": 3.2825334072113037, |
|
"learning_rate": 5.446650839719003e-05, |
|
"loss": 2.15, |
|
"step": 252500 |
|
}, |
|
{ |
|
"epoch": 0.9452881774526352, |
|
"grad_norm": 3.196861505508423, |
|
"learning_rate": 5.4320643365477844e-05, |
|
"loss": 2.1278, |
|
"step": 253000 |
|
}, |
|
{ |
|
"epoch": 0.9471563359060989, |
|
"grad_norm": 2.7488534450531006, |
|
"learning_rate": 5.417444885085084e-05, |
|
"loss": 2.1859, |
|
"step": 253500 |
|
}, |
|
{ |
|
"epoch": 0.9490244943595626, |
|
"grad_norm": 2.5847301483154297, |
|
"learning_rate": 5.4028218388879116e-05, |
|
"loss": 2.1445, |
|
"step": 254000 |
|
}, |
|
{ |
|
"epoch": 0.9508926528130263, |
|
"grad_norm": 3.6500895023345947, |
|
"learning_rate": 5.388195323879396e-05, |
|
"loss": 2.1439, |
|
"step": 254500 |
|
}, |
|
{ |
|
"epoch": 0.95276081126649, |
|
"grad_norm": 2.848147392272949, |
|
"learning_rate": 5.373594728980722e-05, |
|
"loss": 2.1709, |
|
"step": 255000 |
|
}, |
|
{ |
|
"epoch": 0.9546289697199537, |
|
"grad_norm": 2.592301368713379, |
|
"learning_rate": 5.35899092980915e-05, |
|
"loss": 2.1306, |
|
"step": 255500 |
|
}, |
|
{ |
|
"epoch": 0.9564971281734174, |
|
"grad_norm": 1.9539679288864136, |
|
"learning_rate": 5.344354776311128e-05, |
|
"loss": 2.115, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 0.9583652866268811, |
|
"grad_norm": 3.211258888244629, |
|
"learning_rate": 5.329715657477968e-05, |
|
"loss": 2.166, |
|
"step": 256500 |
|
}, |
|
{ |
|
"epoch": 0.9602334450803448, |
|
"grad_norm": 2.754812240600586, |
|
"learning_rate": 5.31507369937121e-05, |
|
"loss": 2.1639, |
|
"step": 257000 |
|
}, |
|
{ |
|
"epoch": 0.9621016035338086, |
|
"grad_norm": 2.349533796310425, |
|
"learning_rate": 5.300458320043379e-05, |
|
"loss": 2.155, |
|
"step": 257500 |
|
}, |
|
{ |
|
"epoch": 0.9639697619872722, |
|
"grad_norm": 3.3088858127593994, |
|
"learning_rate": 5.285811066719044e-05, |
|
"loss": 2.1429, |
|
"step": 258000 |
|
}, |
|
{ |
|
"epoch": 0.9658379204407359, |
|
"grad_norm": 3.420562505722046, |
|
"learning_rate": 5.2711613521958034e-05, |
|
"loss": 2.133, |
|
"step": 258500 |
|
}, |
|
{ |
|
"epoch": 0.9677060788941997, |
|
"grad_norm": 2.4579176902770996, |
|
"learning_rate": 5.256509302626437e-05, |
|
"loss": 2.1483, |
|
"step": 259000 |
|
}, |
|
{ |
|
"epoch": 0.9695742373476633, |
|
"grad_norm": 3.574404239654541, |
|
"learning_rate": 5.241855044183839e-05, |
|
"loss": 2.1599, |
|
"step": 259500 |
|
}, |
|
{ |
|
"epoch": 0.971442395801127, |
|
"grad_norm": 2.763312816619873, |
|
"learning_rate": 5.227198703059918e-05, |
|
"loss": 2.1175, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.9733105542545908, |
|
"grad_norm": 3.4662206172943115, |
|
"learning_rate": 5.2125404054645224e-05, |
|
"loss": 2.1439, |
|
"step": 260500 |
|
}, |
|
{ |
|
"epoch": 0.9751787127080545, |
|
"grad_norm": 2.4736666679382324, |
|
"learning_rate": 5.197880277624344e-05, |
|
"loss": 2.166, |
|
"step": 261000 |
|
}, |
|
{ |
|
"epoch": 0.9770468711615182, |
|
"grad_norm": 2.448014974594116, |
|
"learning_rate": 5.1832184457818365e-05, |
|
"loss": 2.1184, |
|
"step": 261500 |
|
}, |
|
{ |
|
"epoch": 0.9789150296149819, |
|
"grad_norm": 2.605496644973755, |
|
"learning_rate": 5.168584364503971e-05, |
|
"loss": 2.0694, |
|
"step": 262000 |
|
}, |
|
{ |
|
"epoch": 0.9807831880684456, |
|
"grad_norm": 2.6576755046844482, |
|
"learning_rate": 5.153919506218703e-05, |
|
"loss": 2.1525, |
|
"step": 262500 |
|
}, |
|
{ |
|
"epoch": 0.9826513465219093, |
|
"grad_norm": 3.0602567195892334, |
|
"learning_rate": 5.139253322489586e-05, |
|
"loss": 2.12, |
|
"step": 263000 |
|
}, |
|
{ |
|
"epoch": 0.984519504975373, |
|
"grad_norm": 2.233271598815918, |
|
"learning_rate": 5.124585939611224e-05, |
|
"loss": 2.124, |
|
"step": 263500 |
|
}, |
|
{ |
|
"epoch": 0.9863876634288367, |
|
"grad_norm": 3.0819501876831055, |
|
"learning_rate": 5.109946821786733e-05, |
|
"loss": 2.1361, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 0.9882558218823004, |
|
"grad_norm": 2.7308757305145264, |
|
"learning_rate": 5.0952774213009e-05, |
|
"loss": 2.1196, |
|
"step": 264500 |
|
}, |
|
{ |
|
"epoch": 0.9901239803357641, |
|
"grad_norm": 2.309229612350464, |
|
"learning_rate": 5.080607200354588e-05, |
|
"loss": 2.071, |
|
"step": 265000 |
|
}, |
|
{ |
|
"epoch": 0.9919921387892279, |
|
"grad_norm": 3.331204652786255, |
|
"learning_rate": 5.065965627716091e-05, |
|
"loss": 2.0675, |
|
"step": 265500 |
|
}, |
|
{ |
|
"epoch": 0.9938602972426915, |
|
"grad_norm": 3.6821019649505615, |
|
"learning_rate": 5.051294145852407e-05, |
|
"loss": 2.1329, |
|
"step": 266000 |
|
}, |
|
{ |
|
"epoch": 0.9957284556961552, |
|
"grad_norm": 1.9205609560012817, |
|
"learning_rate": 5.036622222280509e-05, |
|
"loss": 2.1563, |
|
"step": 266500 |
|
}, |
|
{ |
|
"epoch": 0.997596614149619, |
|
"grad_norm": 3.6985223293304443, |
|
"learning_rate": 5.021949983344428e-05, |
|
"loss": 2.139, |
|
"step": 267000 |
|
}, |
|
{ |
|
"epoch": 0.9994647726030826, |
|
"grad_norm": 3.8483798503875732, |
|
"learning_rate": 5.007277555390912e-05, |
|
"loss": 2.1531, |
|
"step": 267500 |
|
}, |
|
{ |
|
"epoch": 1.0013329310565464, |
|
"grad_norm": 2.758868932723999, |
|
"learning_rate": 4.992605064768335e-05, |
|
"loss": 2.0257, |
|
"step": 268000 |
|
}, |
|
{ |
|
"epoch": 1.00320108951001, |
|
"grad_norm": 2.7047057151794434, |
|
"learning_rate": 4.9779619825319616e-05, |
|
"loss": 1.9918, |
|
"step": 268500 |
|
}, |
|
{ |
|
"epoch": 1.0050692479634737, |
|
"grad_norm": 3.4775989055633545, |
|
"learning_rate": 4.963289745111303e-05, |
|
"loss": 1.9841, |
|
"step": 269000 |
|
}, |
|
{ |
|
"epoch": 1.0069374064169374, |
|
"grad_norm": 3.1174392700195312, |
|
"learning_rate": 4.9486178238129e-05, |
|
"loss": 1.9998, |
|
"step": 269500 |
|
}, |
|
{ |
|
"epoch": 1.008805564870401, |
|
"grad_norm": 3.418029546737671, |
|
"learning_rate": 4.933946344980765e-05, |
|
"loss": 2.0305, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.010673723323865, |
|
"grad_norm": 4.21517276763916, |
|
"learning_rate": 4.919275434955098e-05, |
|
"loss": 1.9349, |
|
"step": 270500 |
|
}, |
|
{ |
|
"epoch": 1.0125418817773286, |
|
"grad_norm": 3.2260196208953857, |
|
"learning_rate": 4.904605220071203e-05, |
|
"loss": 1.9659, |
|
"step": 271000 |
|
}, |
|
{ |
|
"epoch": 1.0144100402307923, |
|
"grad_norm": 2.354206085205078, |
|
"learning_rate": 4.889935826658396e-05, |
|
"loss": 1.9459, |
|
"step": 271500 |
|
}, |
|
{ |
|
"epoch": 1.016278198684256, |
|
"grad_norm": 2.399245262145996, |
|
"learning_rate": 4.8752967169003024e-05, |
|
"loss": 1.9669, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.0181463571377196, |
|
"grad_norm": 2.836991786956787, |
|
"learning_rate": 4.8606293431139685e-05, |
|
"loss": 1.9754, |
|
"step": 272500 |
|
}, |
|
{ |
|
"epoch": 1.0200145155911835, |
|
"grad_norm": 2.369506597518921, |
|
"learning_rate": 4.845963169487281e-05, |
|
"loss": 1.9748, |
|
"step": 273000 |
|
}, |
|
{ |
|
"epoch": 1.0218826740446472, |
|
"grad_norm": 4.3176140785217285, |
|
"learning_rate": 4.831298322314752e-05, |
|
"loss": 1.9874, |
|
"step": 273500 |
|
}, |
|
{ |
|
"epoch": 1.0237508324981108, |
|
"grad_norm": 2.473726749420166, |
|
"learning_rate": 4.8166349278794803e-05, |
|
"loss": 1.9784, |
|
"step": 274000 |
|
}, |
|
{ |
|
"epoch": 1.0256189909515745, |
|
"grad_norm": 3.3185558319091797, |
|
"learning_rate": 4.8019731124520506e-05, |
|
"loss": 2.0007, |
|
"step": 274500 |
|
}, |
|
{ |
|
"epoch": 1.0274871494050382, |
|
"grad_norm": 3.276498317718506, |
|
"learning_rate": 4.787313002289445e-05, |
|
"loss": 1.9758, |
|
"step": 275000 |
|
}, |
|
{ |
|
"epoch": 1.029355307858502, |
|
"grad_norm": 3.0989725589752197, |
|
"learning_rate": 4.772654723633967e-05, |
|
"loss": 2.0042, |
|
"step": 275500 |
|
}, |
|
{ |
|
"epoch": 1.0312234663119657, |
|
"grad_norm": 2.4186153411865234, |
|
"learning_rate": 4.7580277133162835e-05, |
|
"loss": 2.0053, |
|
"step": 276000 |
|
}, |
|
{ |
|
"epoch": 1.0330916247654294, |
|
"grad_norm": 2.4179837703704834, |
|
"learning_rate": 4.74340277836311e-05, |
|
"loss": 1.9908, |
|
"step": 276500 |
|
}, |
|
{ |
|
"epoch": 1.034959783218893, |
|
"grad_norm": 3.3896212577819824, |
|
"learning_rate": 4.728750742427794e-05, |
|
"loss": 1.9604, |
|
"step": 277000 |
|
}, |
|
{ |
|
"epoch": 1.0368279416723567, |
|
"grad_norm": 2.6385319232940674, |
|
"learning_rate": 4.714101042295578e-05, |
|
"loss": 1.9896, |
|
"step": 277500 |
|
}, |
|
{ |
|
"epoch": 1.0386961001258204, |
|
"grad_norm": 3.6427805423736572, |
|
"learning_rate": 4.6994538041191235e-05, |
|
"loss": 2.0044, |
|
"step": 278000 |
|
}, |
|
{ |
|
"epoch": 1.0405642585792843, |
|
"grad_norm": 3.0906810760498047, |
|
"learning_rate": 4.684809154029888e-05, |
|
"loss": 2.0074, |
|
"step": 278500 |
|
}, |
|
{ |
|
"epoch": 1.042432417032748, |
|
"grad_norm": 3.357675313949585, |
|
"learning_rate": 4.67019649921625e-05, |
|
"loss": 2.0337, |
|
"step": 279000 |
|
}, |
|
{ |
|
"epoch": 1.0443005754862116, |
|
"grad_norm": 3.163966655731201, |
|
"learning_rate": 4.655557397799212e-05, |
|
"loss": 1.9936, |
|
"step": 279500 |
|
}, |
|
{ |
|
"epoch": 1.0461687339396752, |
|
"grad_norm": 2.073416233062744, |
|
"learning_rate": 4.640921262473603e-05, |
|
"loss": 1.9917, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.048036892393139, |
|
"grad_norm": 4.012736797332764, |
|
"learning_rate": 4.626288219275275e-05, |
|
"loss": 1.9811, |
|
"step": 280500 |
|
}, |
|
{ |
|
"epoch": 1.0499050508466028, |
|
"grad_norm": 3.065397262573242, |
|
"learning_rate": 4.611658394213446e-05, |
|
"loss": 2.0052, |
|
"step": 281000 |
|
}, |
|
{ |
|
"epoch": 1.0517732093000665, |
|
"grad_norm": 3.3266775608062744, |
|
"learning_rate": 4.597061162810362e-05, |
|
"loss": 1.997, |
|
"step": 281500 |
|
}, |
|
{ |
|
"epoch": 1.0536413677535301, |
|
"grad_norm": 2.940035820007324, |
|
"learning_rate": 4.582438144871442e-05, |
|
"loss": 1.9267, |
|
"step": 282000 |
|
}, |
|
{ |
|
"epoch": 1.0555095262069938, |
|
"grad_norm": 3.5627119541168213, |
|
"learning_rate": 4.567818722674258e-05, |
|
"loss": 1.973, |
|
"step": 282500 |
|
}, |
|
{ |
|
"epoch": 1.0573776846604575, |
|
"grad_norm": 2.702580213546753, |
|
"learning_rate": 4.553203022110738e-05, |
|
"loss": 1.9818, |
|
"step": 283000 |
|
}, |
|
{ |
|
"epoch": 1.0592458431139213, |
|
"grad_norm": 3.027751922607422, |
|
"learning_rate": 4.538591169040759e-05, |
|
"loss": 2.0195, |
|
"step": 283500 |
|
}, |
|
{ |
|
"epoch": 1.061114001567385, |
|
"grad_norm": 2.598694086074829, |
|
"learning_rate": 4.5239832892910685e-05, |
|
"loss": 1.9988, |
|
"step": 284000 |
|
}, |
|
{ |
|
"epoch": 1.0629821600208487, |
|
"grad_norm": 2.5287024974823, |
|
"learning_rate": 4.5093795086541985e-05, |
|
"loss": 1.9794, |
|
"step": 284500 |
|
}, |
|
{ |
|
"epoch": 1.0648503184743123, |
|
"grad_norm": 2.937054395675659, |
|
"learning_rate": 4.494779952887383e-05, |
|
"loss": 1.9804, |
|
"step": 285000 |
|
}, |
|
{ |
|
"epoch": 1.066718476927776, |
|
"grad_norm": 2.625366687774658, |
|
"learning_rate": 4.48021393369639e-05, |
|
"loss": 2.002, |
|
"step": 285500 |
|
}, |
|
{ |
|
"epoch": 1.0685866353812399, |
|
"grad_norm": 2.97308349609375, |
|
"learning_rate": 4.465623195716817e-05, |
|
"loss": 1.974, |
|
"step": 286000 |
|
}, |
|
{ |
|
"epoch": 1.0704547938347035, |
|
"grad_norm": 2.940298080444336, |
|
"learning_rate": 4.4510370594051275e-05, |
|
"loss": 1.9722, |
|
"step": 286500 |
|
}, |
|
{ |
|
"epoch": 1.0723229522881672, |
|
"grad_norm": 2.5476973056793213, |
|
"learning_rate": 4.436455650366615e-05, |
|
"loss": 2.0061, |
|
"step": 287000 |
|
}, |
|
{ |
|
"epoch": 1.0741911107416309, |
|
"grad_norm": 3.88171124458313, |
|
"learning_rate": 4.4218790941658633e-05, |
|
"loss": 1.9859, |
|
"step": 287500 |
|
}, |
|
{ |
|
"epoch": 1.0760592691950945, |
|
"grad_norm": 2.958958864212036, |
|
"learning_rate": 4.407307516325668e-05, |
|
"loss": 1.9929, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.0779274276485582, |
|
"grad_norm": 3.2626969814300537, |
|
"learning_rate": 4.3927410423259555e-05, |
|
"loss": 2.0427, |
|
"step": 288500 |
|
}, |
|
{ |
|
"epoch": 1.079795586102022, |
|
"grad_norm": 2.726310968399048, |
|
"learning_rate": 4.378208914789977e-05, |
|
"loss": 1.9826, |
|
"step": 289000 |
|
}, |
|
{ |
|
"epoch": 1.0816637445554858, |
|
"grad_norm": 3.683236598968506, |
|
"learning_rate": 4.36365301389968e-05, |
|
"loss": 2.006, |
|
"step": 289500 |
|
}, |
|
{ |
|
"epoch": 1.0835319030089494, |
|
"grad_norm": 3.4819111824035645, |
|
"learning_rate": 4.349102592770976e-05, |
|
"loss": 1.9865, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.085400061462413, |
|
"grad_norm": 3.417532444000244, |
|
"learning_rate": 4.334557776701607e-05, |
|
"loss": 1.9988, |
|
"step": 290500 |
|
}, |
|
{ |
|
"epoch": 1.0872682199158767, |
|
"grad_norm": 2.9879865646362305, |
|
"learning_rate": 4.3200477633104895e-05, |
|
"loss": 1.9888, |
|
"step": 291000 |
|
}, |
|
{ |
|
"epoch": 1.0891363783693406, |
|
"grad_norm": 2.8864903450012207, |
|
"learning_rate": 4.305514521222923e-05, |
|
"loss": 1.9602, |
|
"step": 291500 |
|
}, |
|
{ |
|
"epoch": 1.0910045368228043, |
|
"grad_norm": 3.8783183097839355, |
|
"learning_rate": 4.290987259543744e-05, |
|
"loss": 2.0115, |
|
"step": 292000 |
|
}, |
|
{ |
|
"epoch": 1.092872695276268, |
|
"grad_norm": 3.2339043617248535, |
|
"learning_rate": 4.2764661033712623e-05, |
|
"loss": 2.016, |
|
"step": 292500 |
|
}, |
|
{ |
|
"epoch": 1.0947408537297316, |
|
"grad_norm": 3.942629337310791, |
|
"learning_rate": 4.261951177751206e-05, |
|
"loss": 1.9975, |
|
"step": 293000 |
|
}, |
|
{ |
|
"epoch": 1.0966090121831953, |
|
"grad_norm": 5.084557056427002, |
|
"learning_rate": 4.2474426076756546e-05, |
|
"loss": 1.9484, |
|
"step": 293500 |
|
}, |
|
{ |
|
"epoch": 1.098477170636659, |
|
"grad_norm": 3.621943473815918, |
|
"learning_rate": 4.2329405180819554e-05, |
|
"loss": 1.9364, |
|
"step": 294000 |
|
}, |
|
{ |
|
"epoch": 1.1003453290901228, |
|
"grad_norm": 3.5090487003326416, |
|
"learning_rate": 4.2184450338516527e-05, |
|
"loss": 2.0112, |
|
"step": 294500 |
|
}, |
|
{ |
|
"epoch": 1.1022134875435865, |
|
"grad_norm": 4.1997246742248535, |
|
"learning_rate": 4.204014221253661e-05, |
|
"loss": 1.9631, |
|
"step": 295000 |
|
}, |
|
{ |
|
"epoch": 1.1040816459970502, |
|
"grad_norm": 3.7712690830230713, |
|
"learning_rate": 4.189532294497906e-05, |
|
"loss": 1.9428, |
|
"step": 295500 |
|
}, |
|
{ |
|
"epoch": 1.1059498044505138, |
|
"grad_norm": 4.392169952392578, |
|
"learning_rate": 4.175057346905878e-05, |
|
"loss": 2.0024, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.1078179629039775, |
|
"grad_norm": 3.103431463241577, |
|
"learning_rate": 4.160589503125397e-05, |
|
"loss": 1.9671, |
|
"step": 296500 |
|
}, |
|
{ |
|
"epoch": 1.1096861213574414, |
|
"grad_norm": 2.2490739822387695, |
|
"learning_rate": 4.1461288877431045e-05, |
|
"loss": 1.9978, |
|
"step": 297000 |
|
}, |
|
{ |
|
"epoch": 1.111554279810905, |
|
"grad_norm": 3.9997470378875732, |
|
"learning_rate": 4.1317045243873654e-05, |
|
"loss": 1.9756, |
|
"step": 297500 |
|
}, |
|
{ |
|
"epoch": 1.1134224382643687, |
|
"grad_norm": 3.8243391513824463, |
|
"learning_rate": 4.117258724232387e-05, |
|
"loss": 1.9927, |
|
"step": 298000 |
|
}, |
|
{ |
|
"epoch": 1.1152905967178324, |
|
"grad_norm": 3.207801342010498, |
|
"learning_rate": 4.102820525609035e-05, |
|
"loss": 1.9807, |
|
"step": 298500 |
|
}, |
|
{ |
|
"epoch": 1.117158755171296, |
|
"grad_norm": 2.981112480163574, |
|
"learning_rate": 4.08839005284867e-05, |
|
"loss": 1.9757, |
|
"step": 299000 |
|
}, |
|
{ |
|
"epoch": 1.11902691362476, |
|
"grad_norm": 2.8603618144989014, |
|
"learning_rate": 4.0739674302161204e-05, |
|
"loss": 1.9882, |
|
"step": 299500 |
|
}, |
|
{ |
|
"epoch": 1.1208950720782236, |
|
"grad_norm": 3.422062635421753, |
|
"learning_rate": 4.059552781908619e-05, |
|
"loss": 1.9883, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.1227632305316873, |
|
"grad_norm": 3.2499775886535645, |
|
"learning_rate": 4.045146232054726e-05, |
|
"loss": 1.9715, |
|
"step": 300500 |
|
}, |
|
{ |
|
"epoch": 1.124631388985151, |
|
"grad_norm": 3.5448482036590576, |
|
"learning_rate": 4.030776693079458e-05, |
|
"loss": 1.9895, |
|
"step": 301000 |
|
}, |
|
{ |
|
"epoch": 1.1264995474386146, |
|
"grad_norm": 3.52693510055542, |
|
"learning_rate": 4.016386695421753e-05, |
|
"loss": 1.9936, |
|
"step": 301500 |
|
}, |
|
{ |
|
"epoch": 1.1283677058920785, |
|
"grad_norm": 3.247986078262329, |
|
"learning_rate": 4.002005167932884e-05, |
|
"loss": 1.9916, |
|
"step": 302000 |
|
}, |
|
{ |
|
"epoch": 1.1302358643455421, |
|
"grad_norm": 3.287041425704956, |
|
"learning_rate": 3.987632234456198e-05, |
|
"loss": 1.971, |
|
"step": 302500 |
|
}, |
|
{ |
|
"epoch": 1.1321040227990058, |
|
"grad_norm": 2.758507251739502, |
|
"learning_rate": 3.9732680187610403e-05, |
|
"loss": 2.0091, |
|
"step": 303000 |
|
}, |
|
{ |
|
"epoch": 1.1339721812524695, |
|
"grad_norm": 2.9558610916137695, |
|
"learning_rate": 3.958912644541679e-05, |
|
"loss": 2.0046, |
|
"step": 303500 |
|
}, |
|
{ |
|
"epoch": 1.1358403397059331, |
|
"grad_norm": 3.0163705348968506, |
|
"learning_rate": 3.944566235416254e-05, |
|
"loss": 1.9902, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.1377084981593968, |
|
"grad_norm": 2.4738314151763916, |
|
"learning_rate": 3.9302289149256985e-05, |
|
"loss": 1.969, |
|
"step": 304500 |
|
}, |
|
{ |
|
"epoch": 1.1395766566128607, |
|
"grad_norm": 3.352306604385376, |
|
"learning_rate": 3.915929453473775e-05, |
|
"loss": 1.9639, |
|
"step": 305000 |
|
}, |
|
{ |
|
"epoch": 1.1414448150663243, |
|
"grad_norm": 3.9805781841278076, |
|
"learning_rate": 3.9016106617675985e-05, |
|
"loss": 1.9703, |
|
"step": 305500 |
|
}, |
|
{ |
|
"epoch": 1.143312973519788, |
|
"grad_norm": 2.410222291946411, |
|
"learning_rate": 3.8873013285987326e-05, |
|
"loss": 1.9836, |
|
"step": 306000 |
|
}, |
|
{ |
|
"epoch": 1.1451811319732517, |
|
"grad_norm": 3.830815076828003, |
|
"learning_rate": 3.873030167047204e-05, |
|
"loss": 1.9474, |
|
"step": 306500 |
|
}, |
|
{ |
|
"epoch": 1.1470492904267153, |
|
"grad_norm": 3.884229898452759, |
|
"learning_rate": 3.858740101002805e-05, |
|
"loss": 1.9912, |
|
"step": 307000 |
|
}, |
|
{ |
|
"epoch": 1.1489174488801792, |
|
"grad_norm": 3.097529172897339, |
|
"learning_rate": 3.8444598626660855e-05, |
|
"loss": 1.9851, |
|
"step": 307500 |
|
}, |
|
{ |
|
"epoch": 1.1507856073336429, |
|
"grad_norm": 3.3618969917297363, |
|
"learning_rate": 3.8301895750081664e-05, |
|
"loss": 1.9897, |
|
"step": 308000 |
|
}, |
|
{ |
|
"epoch": 1.1526537657871065, |
|
"grad_norm": 2.846202850341797, |
|
"learning_rate": 3.8159293609144794e-05, |
|
"loss": 1.9649, |
|
"step": 308500 |
|
}, |
|
{ |
|
"epoch": 1.1545219242405702, |
|
"grad_norm": 3.3975071907043457, |
|
"learning_rate": 3.801679343183709e-05, |
|
"loss": 1.9611, |
|
"step": 309000 |
|
}, |
|
{ |
|
"epoch": 1.1563900826940339, |
|
"grad_norm": 3.390746831893921, |
|
"learning_rate": 3.787468113544101e-05, |
|
"loss": 1.9809, |
|
"step": 309500 |
|
}, |
|
{ |
|
"epoch": 1.1582582411474975, |
|
"grad_norm": 3.883208990097046, |
|
"learning_rate": 3.773238835577244e-05, |
|
"loss": 1.9741, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.1601263996009614, |
|
"grad_norm": 2.655240535736084, |
|
"learning_rate": 3.7590201215933385e-05, |
|
"loss": 1.9929, |
|
"step": 310500 |
|
}, |
|
{ |
|
"epoch": 1.161994558054425, |
|
"grad_norm": 3.561328649520874, |
|
"learning_rate": 3.7448120940337014e-05, |
|
"loss": 1.9941, |
|
"step": 311000 |
|
}, |
|
{ |
|
"epoch": 1.1638627165078888, |
|
"grad_norm": 4.378994464874268, |
|
"learning_rate": 3.7306148752476284e-05, |
|
"loss": 1.9692, |
|
"step": 311500 |
|
}, |
|
{ |
|
"epoch": 1.1657308749613524, |
|
"grad_norm": 2.515988826751709, |
|
"learning_rate": 3.716428587491332e-05, |
|
"loss": 1.9721, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.1675990334148163, |
|
"grad_norm": 2.2535147666931152, |
|
"learning_rate": 3.702253352926898e-05, |
|
"loss": 1.9904, |
|
"step": 312500 |
|
}, |
|
{ |
|
"epoch": 1.16946719186828, |
|
"grad_norm": 3.65279483795166, |
|
"learning_rate": 3.688117610505848e-05, |
|
"loss": 1.8969, |
|
"step": 313000 |
|
}, |
|
{ |
|
"epoch": 1.1713353503217436, |
|
"grad_norm": 3.5840914249420166, |
|
"learning_rate": 3.6739648257134945e-05, |
|
"loss": 1.9981, |
|
"step": 313500 |
|
}, |
|
{ |
|
"epoch": 1.1732035087752073, |
|
"grad_norm": 4.6728973388671875, |
|
"learning_rate": 3.659823459780314e-05, |
|
"loss": 2.0034, |
|
"step": 314000 |
|
}, |
|
{ |
|
"epoch": 1.175071667228671, |
|
"grad_norm": 3.8465287685394287, |
|
"learning_rate": 3.6456936344815585e-05, |
|
"loss": 1.9575, |
|
"step": 314500 |
|
}, |
|
{ |
|
"epoch": 1.1769398256821346, |
|
"grad_norm": 3.005547046661377, |
|
"learning_rate": 3.631603696099265e-05, |
|
"loss": 1.9799, |
|
"step": 315000 |
|
}, |
|
{ |
|
"epoch": 1.1788079841355985, |
|
"grad_norm": 3.0555107593536377, |
|
"learning_rate": 3.617497293307507e-05, |
|
"loss": 1.9681, |
|
"step": 315500 |
|
}, |
|
{ |
|
"epoch": 1.1806761425890622, |
|
"grad_norm": 3.1861069202423096, |
|
"learning_rate": 3.6034027956326125e-05, |
|
"loss": 2.0004, |
|
"step": 316000 |
|
}, |
|
{ |
|
"epoch": 1.1825443010425258, |
|
"grad_norm": 3.5906646251678467, |
|
"learning_rate": 3.589320324446236e-05, |
|
"loss": 1.984, |
|
"step": 316500 |
|
}, |
|
{ |
|
"epoch": 1.1844124594959895, |
|
"grad_norm": 3.118577480316162, |
|
"learning_rate": 3.5752500010164694e-05, |
|
"loss": 2.0166, |
|
"step": 317000 |
|
}, |
|
{ |
|
"epoch": 1.1862806179494532, |
|
"grad_norm": 3.639019727706909, |
|
"learning_rate": 3.561220050290951e-05, |
|
"loss": 1.9152, |
|
"step": 317500 |
|
}, |
|
{ |
|
"epoch": 1.188148776402917, |
|
"grad_norm": 2.516979455947876, |
|
"learning_rate": 3.547174360858504e-05, |
|
"loss": 1.9838, |
|
"step": 318000 |
|
}, |
|
{ |
|
"epoch": 1.1900169348563807, |
|
"grad_norm": 4.030247688293457, |
|
"learning_rate": 3.5331411821133284e-05, |
|
"loss": 1.9957, |
|
"step": 318500 |
|
}, |
|
{ |
|
"epoch": 1.1918850933098444, |
|
"grad_norm": 2.944655656814575, |
|
"learning_rate": 3.519120634899048e-05, |
|
"loss": 1.9557, |
|
"step": 319000 |
|
}, |
|
{ |
|
"epoch": 1.193753251763308, |
|
"grad_norm": 2.9035158157348633, |
|
"learning_rate": 3.505112839950505e-05, |
|
"loss": 1.9852, |
|
"step": 319500 |
|
}, |
|
{ |
|
"epoch": 1.1956214102167717, |
|
"grad_norm": 4.2154364585876465, |
|
"learning_rate": 3.491117917892734e-05, |
|
"loss": 1.9863, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.1974895686702354, |
|
"grad_norm": 3.7261621952056885, |
|
"learning_rate": 3.4771359892399204e-05, |
|
"loss": 1.9478, |
|
"step": 320500 |
|
}, |
|
{ |
|
"epoch": 1.1993577271236993, |
|
"grad_norm": 4.7101240158081055, |
|
"learning_rate": 3.463195098856492e-05, |
|
"loss": 1.9688, |
|
"step": 321000 |
|
}, |
|
{ |
|
"epoch": 1.201225885577163, |
|
"grad_norm": 3.4447665214538574, |
|
"learning_rate": 3.44923949151937e-05, |
|
"loss": 1.9768, |
|
"step": 321500 |
|
}, |
|
{ |
|
"epoch": 1.2030940440306266, |
|
"grad_norm": 2.6960058212280273, |
|
"learning_rate": 3.4352972382140294e-05, |
|
"loss": 1.9639, |
|
"step": 322000 |
|
}, |
|
{ |
|
"epoch": 1.2049622024840903, |
|
"grad_norm": 3.2135891914367676, |
|
"learning_rate": 3.421368459001103e-05, |
|
"loss": 2.0298, |
|
"step": 322500 |
|
}, |
|
{ |
|
"epoch": 1.206830360937554, |
|
"grad_norm": 3.953632116317749, |
|
"learning_rate": 3.4074532738252e-05, |
|
"loss": 2.0028, |
|
"step": 323000 |
|
}, |
|
{ |
|
"epoch": 1.2086985193910178, |
|
"grad_norm": 3.091557025909424, |
|
"learning_rate": 3.393551802513865e-05, |
|
"loss": 1.9353, |
|
"step": 323500 |
|
}, |
|
{ |
|
"epoch": 1.2105666778444815, |
|
"grad_norm": 3.2774996757507324, |
|
"learning_rate": 3.379664164776548e-05, |
|
"loss": 1.9976, |
|
"step": 324000 |
|
}, |
|
{ |
|
"epoch": 1.2124348362979451, |
|
"grad_norm": 4.057534694671631, |
|
"learning_rate": 3.365790480203579e-05, |
|
"loss": 1.9577, |
|
"step": 324500 |
|
}, |
|
{ |
|
"epoch": 1.2143029947514088, |
|
"grad_norm": 3.725080728530884, |
|
"learning_rate": 3.351958573365166e-05, |
|
"loss": 1.9619, |
|
"step": 325000 |
|
}, |
|
{ |
|
"epoch": 1.2161711532048725, |
|
"grad_norm": 2.542310953140259, |
|
"learning_rate": 3.338140801561512e-05, |
|
"loss": 1.9413, |
|
"step": 325500 |
|
}, |
|
{ |
|
"epoch": 1.2180393116583361, |
|
"grad_norm": 3.8798625469207764, |
|
"learning_rate": 3.324309635334674e-05, |
|
"loss": 1.9272, |
|
"step": 326000 |
|
}, |
|
{ |
|
"epoch": 1.2199074701118, |
|
"grad_norm": 2.8388006687164307, |
|
"learning_rate": 3.310492898945492e-05, |
|
"loss": 1.9717, |
|
"step": 326500 |
|
}, |
|
{ |
|
"epoch": 1.2217756285652637, |
|
"grad_norm": 3.845374822616577, |
|
"learning_rate": 3.296690711373742e-05, |
|
"loss": 1.9995, |
|
"step": 327000 |
|
}, |
|
{ |
|
"epoch": 1.2236437870187273, |
|
"grad_norm": 3.3350958824157715, |
|
"learning_rate": 3.282903191473914e-05, |
|
"loss": 1.9505, |
|
"step": 327500 |
|
}, |
|
{ |
|
"epoch": 1.225511945472191, |
|
"grad_norm": 3.514188289642334, |
|
"learning_rate": 3.2691304579741944e-05, |
|
"loss": 1.9493, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.2273801039256549, |
|
"grad_norm": 4.140675067901611, |
|
"learning_rate": 3.255372629475436e-05, |
|
"loss": 1.9381, |
|
"step": 328500 |
|
}, |
|
{ |
|
"epoch": 1.2292482623791186, |
|
"grad_norm": 3.2821719646453857, |
|
"learning_rate": 3.241629824450141e-05, |
|
"loss": 1.9647, |
|
"step": 329000 |
|
}, |
|
{ |
|
"epoch": 1.2311164208325822, |
|
"grad_norm": 3.671809434890747, |
|
"learning_rate": 3.227929601377734e-05, |
|
"loss": 1.948, |
|
"step": 329500 |
|
}, |
|
{ |
|
"epoch": 1.2329845792860459, |
|
"grad_norm": 4.461349010467529, |
|
"learning_rate": 3.214244577120278e-05, |
|
"loss": 1.9533, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.2348527377395095, |
|
"grad_norm": 4.116054058074951, |
|
"learning_rate": 3.200547490304101e-05, |
|
"loss": 1.9278, |
|
"step": 330500 |
|
}, |
|
{ |
|
"epoch": 1.2367208961929732, |
|
"grad_norm": 3.0734941959381104, |
|
"learning_rate": 3.1868658990759734e-05, |
|
"loss": 1.9038, |
|
"step": 331000 |
|
}, |
|
{ |
|
"epoch": 1.238589054646437, |
|
"grad_norm": 4.233485698699951, |
|
"learning_rate": 3.173199921251894e-05, |
|
"loss": 1.9466, |
|
"step": 331500 |
|
}, |
|
{ |
|
"epoch": 1.2404572130999008, |
|
"grad_norm": 3.6610071659088135, |
|
"learning_rate": 3.159549674513415e-05, |
|
"loss": 1.9437, |
|
"step": 332000 |
|
}, |
|
{ |
|
"epoch": 1.2423253715533644, |
|
"grad_norm": 3.757662773132324, |
|
"learning_rate": 3.145915276406623e-05, |
|
"loss": 1.9695, |
|
"step": 332500 |
|
}, |
|
{ |
|
"epoch": 1.244193530006828, |
|
"grad_norm": 4.0608062744140625, |
|
"learning_rate": 3.1322968443411296e-05, |
|
"loss": 1.9398, |
|
"step": 333000 |
|
}, |
|
{ |
|
"epoch": 1.2460616884602917, |
|
"grad_norm": 3.5959203243255615, |
|
"learning_rate": 3.118694495589054e-05, |
|
"loss": 1.9154, |
|
"step": 333500 |
|
}, |
|
{ |
|
"epoch": 1.2479298469137556, |
|
"grad_norm": 4.01427698135376, |
|
"learning_rate": 3.105135503334797e-05, |
|
"loss": 1.9268, |
|
"step": 334000 |
|
}, |
|
{ |
|
"epoch": 1.2497980053672193, |
|
"grad_norm": 4.18043851852417, |
|
"learning_rate": 3.091565639719372e-05, |
|
"loss": 1.9349, |
|
"step": 334500 |
|
}, |
|
{ |
|
"epoch": 1.251666163820683, |
|
"grad_norm": 3.132768154144287, |
|
"learning_rate": 3.0780122101651435e-05, |
|
"loss": 1.9476, |
|
"step": 335000 |
|
}, |
|
{ |
|
"epoch": 1.2535343222741466, |
|
"grad_norm": 2.99275803565979, |
|
"learning_rate": 3.0644753313844755e-05, |
|
"loss": 1.9625, |
|
"step": 335500 |
|
}, |
|
{ |
|
"epoch": 1.2554024807276103, |
|
"grad_norm": 3.58479380607605, |
|
"learning_rate": 3.0509551199472118e-05, |
|
"loss": 1.9545, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.257270639181074, |
|
"grad_norm": 3.13480544090271, |
|
"learning_rate": 3.0374786823074896e-05, |
|
"loss": 1.9398, |
|
"step": 336500 |
|
}, |
|
{ |
|
"epoch": 1.2591387976345378, |
|
"grad_norm": 3.130760431289673, |
|
"learning_rate": 3.0239921207753986e-05, |
|
"loss": 1.9582, |
|
"step": 337000 |
|
}, |
|
{ |
|
"epoch": 1.2610069560880015, |
|
"grad_norm": 3.4282748699188232, |
|
"learning_rate": 3.0105225751989453e-05, |
|
"loss": 1.9285, |
|
"step": 337500 |
|
}, |
|
{ |
|
"epoch": 1.2628751145414652, |
|
"grad_norm": 3.996558666229248, |
|
"learning_rate": 2.9970701615681463e-05, |
|
"loss": 1.9397, |
|
"step": 338000 |
|
}, |
|
{ |
|
"epoch": 1.2647432729949288, |
|
"grad_norm": 3.9144933223724365, |
|
"learning_rate": 2.9836349957254927e-05, |
|
"loss": 1.9361, |
|
"step": 338500 |
|
}, |
|
{ |
|
"epoch": 1.2666114314483927, |
|
"grad_norm": 2.7201411724090576, |
|
"learning_rate": 2.9702171933649482e-05, |
|
"loss": 1.9221, |
|
"step": 339000 |
|
}, |
|
{ |
|
"epoch": 1.2684795899018564, |
|
"grad_norm": 3.485480785369873, |
|
"learning_rate": 2.956843653156831e-05, |
|
"loss": 1.951, |
|
"step": 339500 |
|
}, |
|
{ |
|
"epoch": 1.27034774835532, |
|
"grad_norm": 4.514249324798584, |
|
"learning_rate": 2.943460888939414e-05, |
|
"loss": 1.9556, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.2722159068087837, |
|
"grad_norm": 3.043680429458618, |
|
"learning_rate": 2.930095834154558e-05, |
|
"loss": 1.9673, |
|
"step": 340500 |
|
}, |
|
{ |
|
"epoch": 1.2740840652622474, |
|
"grad_norm": 2.636143207550049, |
|
"learning_rate": 2.9167486038924823e-05, |
|
"loss": 1.9492, |
|
"step": 341000 |
|
}, |
|
{ |
|
"epoch": 1.275952223715711, |
|
"grad_norm": 3.6190054416656494, |
|
"learning_rate": 2.9034193130899155e-05, |
|
"loss": 1.9648, |
|
"step": 341500 |
|
}, |
|
{ |
|
"epoch": 1.2778203821691747, |
|
"grad_norm": 4.245516777038574, |
|
"learning_rate": 2.890108076529099e-05, |
|
"loss": 1.9589, |
|
"step": 342000 |
|
}, |
|
{ |
|
"epoch": 1.2796885406226386, |
|
"grad_norm": 3.619927406311035, |
|
"learning_rate": 2.876841576763556e-05, |
|
"loss": 1.9439, |
|
"step": 342500 |
|
}, |
|
{ |
|
"epoch": 1.2815566990761023, |
|
"grad_norm": 3.657912015914917, |
|
"learning_rate": 2.863566755729298e-05, |
|
"loss": 1.9564, |
|
"step": 343000 |
|
}, |
|
{ |
|
"epoch": 1.283424857529566, |
|
"grad_norm": 3.4643499851226807, |
|
"learning_rate": 2.8503103321182943e-05, |
|
"loss": 1.9754, |
|
"step": 343500 |
|
}, |
|
{ |
|
"epoch": 1.2852930159830296, |
|
"grad_norm": 4.774941444396973, |
|
"learning_rate": 2.8370724200853072e-05, |
|
"loss": 1.9406, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.2871611744364935, |
|
"grad_norm": 3.5722765922546387, |
|
"learning_rate": 2.8238531336256975e-05, |
|
"loss": 1.9708, |
|
"step": 344500 |
|
}, |
|
{ |
|
"epoch": 1.2890293328899571, |
|
"grad_norm": 3.9576704502105713, |
|
"learning_rate": 2.8106525865744272e-05, |
|
"loss": 1.9503, |
|
"step": 345000 |
|
}, |
|
{ |
|
"epoch": 1.2908974913434208, |
|
"grad_norm": 4.773796558380127, |
|
"learning_rate": 2.7974972371021873e-05, |
|
"loss": 1.967, |
|
"step": 345500 |
|
}, |
|
{ |
|
"epoch": 1.2927656497968845, |
|
"grad_norm": 3.749734401702881, |
|
"learning_rate": 2.784334471679681e-05, |
|
"loss": 1.9484, |
|
"step": 346000 |
|
}, |
|
{ |
|
"epoch": 1.2946338082503481, |
|
"grad_norm": 4.330195903778076, |
|
"learning_rate": 2.7711907859717524e-05, |
|
"loss": 1.9094, |
|
"step": 346500 |
|
}, |
|
{ |
|
"epoch": 1.2965019667038118, |
|
"grad_norm": 3.0685718059539795, |
|
"learning_rate": 2.758066293162346e-05, |
|
"loss": 1.9195, |
|
"step": 347000 |
|
}, |
|
{ |
|
"epoch": 1.2983701251572755, |
|
"grad_norm": 3.8571877479553223, |
|
"learning_rate": 2.7449611062701342e-05, |
|
"loss": 1.9457, |
|
"step": 347500 |
|
}, |
|
{ |
|
"epoch": 1.3002382836107393, |
|
"grad_norm": 3.673949718475342, |
|
"learning_rate": 2.731875338147545e-05, |
|
"loss": 1.9046, |
|
"step": 348000 |
|
}, |
|
{ |
|
"epoch": 1.302106442064203, |
|
"grad_norm": 3.5845327377319336, |
|
"learning_rate": 2.7188091014797774e-05, |
|
"loss": 1.9871, |
|
"step": 348500 |
|
}, |
|
{ |
|
"epoch": 1.3039746005176667, |
|
"grad_norm": 5.045246124267578, |
|
"learning_rate": 2.7057885822898532e-05, |
|
"loss": 1.9445, |
|
"step": 349000 |
|
}, |
|
{ |
|
"epoch": 1.3058427589711303, |
|
"grad_norm": 4.416993141174316, |
|
"learning_rate": 2.692761706288961e-05, |
|
"loss": 1.9242, |
|
"step": 349500 |
|
}, |
|
{ |
|
"epoch": 1.3077109174245942, |
|
"grad_norm": 5.05975341796875, |
|
"learning_rate": 2.6797546985612997e-05, |
|
"loss": 1.9729, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.3095790758780579, |
|
"grad_norm": 3.4689128398895264, |
|
"learning_rate": 2.6667676711138423e-05, |
|
"loss": 1.9479, |
|
"step": 350500 |
|
}, |
|
{ |
|
"epoch": 1.3114472343315215, |
|
"grad_norm": 3.177008628845215, |
|
"learning_rate": 2.6538266495259985e-05, |
|
"loss": 1.9456, |
|
"step": 351000 |
|
}, |
|
{ |
|
"epoch": 1.3133153927849852, |
|
"grad_norm": 3.6939172744750977, |
|
"learning_rate": 2.6408798774518146e-05, |
|
"loss": 1.934, |
|
"step": 351500 |
|
}, |
|
{ |
|
"epoch": 1.3151835512384489, |
|
"grad_norm": 4.592978477478027, |
|
"learning_rate": 2.6279534204197788e-05, |
|
"loss": 1.8931, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.3170517096919125, |
|
"grad_norm": 4.249555587768555, |
|
"learning_rate": 2.6150473897432166e-05, |
|
"loss": 1.9352, |
|
"step": 352500 |
|
}, |
|
{ |
|
"epoch": 1.3189198681453764, |
|
"grad_norm": 3.4636592864990234, |
|
"learning_rate": 2.6021876469757334e-05, |
|
"loss": 1.9227, |
|
"step": 353000 |
|
}, |
|
{ |
|
"epoch": 1.32078802659884, |
|
"grad_norm": 3.9055769443511963, |
|
"learning_rate": 2.5893227608380464e-05, |
|
"loss": 2.0114, |
|
"step": 353500 |
|
}, |
|
{ |
|
"epoch": 1.3226561850523038, |
|
"grad_norm": 3.659078359603882, |
|
"learning_rate": 2.576478633715232e-05, |
|
"loss": 1.9675, |
|
"step": 354000 |
|
}, |
|
{ |
|
"epoch": 1.3245243435057674, |
|
"grad_norm": 4.109720230102539, |
|
"learning_rate": 2.563655376211658e-05, |
|
"loss": 1.9515, |
|
"step": 354500 |
|
}, |
|
{ |
|
"epoch": 1.3263925019592313, |
|
"grad_norm": 3.4679160118103027, |
|
"learning_rate": 2.550853098751974e-05, |
|
"loss": 1.965, |
|
"step": 355000 |
|
}, |
|
{ |
|
"epoch": 1.328260660412695, |
|
"grad_norm": 3.3445444107055664, |
|
"learning_rate": 2.538097452833215e-05, |
|
"loss": 1.9422, |
|
"step": 355500 |
|
}, |
|
{ |
|
"epoch": 1.3301288188661586, |
|
"grad_norm": 4.475471496582031, |
|
"learning_rate": 2.5253374235012317e-05, |
|
"loss": 1.9533, |
|
"step": 356000 |
|
}, |
|
{ |
|
"epoch": 1.3319969773196223, |
|
"grad_norm": 3.064134359359741, |
|
"learning_rate": 2.5125987041797306e-05, |
|
"loss": 1.9263, |
|
"step": 356500 |
|
}, |
|
{ |
|
"epoch": 1.333865135773086, |
|
"grad_norm": 3.313082218170166, |
|
"learning_rate": 2.4998814045653785e-05, |
|
"loss": 1.8802, |
|
"step": 357000 |
|
}, |
|
{ |
|
"epoch": 1.3357332942265496, |
|
"grad_norm": 5.206328392028809, |
|
"learning_rate": 2.4872110041523282e-05, |
|
"loss": 1.8967, |
|
"step": 357500 |
|
}, |
|
{ |
|
"epoch": 1.3376014526800133, |
|
"grad_norm": 4.334334373474121, |
|
"learning_rate": 2.4745368289174596e-05, |
|
"loss": 1.9429, |
|
"step": 358000 |
|
}, |
|
{ |
|
"epoch": 1.3394696111334772, |
|
"grad_norm": 5.680240154266357, |
|
"learning_rate": 2.4618844011511794e-05, |
|
"loss": 1.9209, |
|
"step": 358500 |
|
}, |
|
{ |
|
"epoch": 1.3413377695869408, |
|
"grad_norm": 3.261059284210205, |
|
"learning_rate": 2.449253829807073e-05, |
|
"loss": 1.9251, |
|
"step": 359000 |
|
}, |
|
{ |
|
"epoch": 1.3432059280404045, |
|
"grad_norm": 3.2310187816619873, |
|
"learning_rate": 2.4366704188693773e-05, |
|
"loss": 1.9056, |
|
"step": 359500 |
|
}, |
|
{ |
|
"epoch": 1.3450740864938682, |
|
"grad_norm": 4.145471096038818, |
|
"learning_rate": 2.424083842220842e-05, |
|
"loss": 1.926, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.346942244947332, |
|
"grad_norm": 4.704455852508545, |
|
"learning_rate": 2.411519447505653e-05, |
|
"loss": 1.9485, |
|
"step": 360500 |
|
}, |
|
{ |
|
"epoch": 1.3488104034007957, |
|
"grad_norm": 3.9618282318115234, |
|
"learning_rate": 2.3989773429193175e-05, |
|
"loss": 1.9304, |
|
"step": 361000 |
|
}, |
|
{ |
|
"epoch": 1.3506785618542594, |
|
"grad_norm": 3.921598434448242, |
|
"learning_rate": 2.3864576364654012e-05, |
|
"loss": 1.91, |
|
"step": 361500 |
|
}, |
|
{ |
|
"epoch": 1.352546720307723, |
|
"grad_norm": 4.026153087615967, |
|
"learning_rate": 2.3739604359545953e-05, |
|
"loss": 1.9588, |
|
"step": 362000 |
|
}, |
|
{ |
|
"epoch": 1.3544148787611867, |
|
"grad_norm": 3.6452534198760986, |
|
"learning_rate": 2.3615107755379164e-05, |
|
"loss": 1.9613, |
|
"step": 362500 |
|
}, |
|
{ |
|
"epoch": 1.3562830372146504, |
|
"grad_norm": 3.757392406463623, |
|
"learning_rate": 2.349058864020204e-05, |
|
"loss": 1.9386, |
|
"step": 363000 |
|
}, |
|
{ |
|
"epoch": 1.358151195668114, |
|
"grad_norm": 4.3105902671813965, |
|
"learning_rate": 2.3366297804968707e-05, |
|
"loss": 1.9171, |
|
"step": 363500 |
|
}, |
|
{ |
|
"epoch": 1.360019354121578, |
|
"grad_norm": 4.3953938484191895, |
|
"learning_rate": 2.3242236319982296e-05, |
|
"loss": 1.9274, |
|
"step": 364000 |
|
}, |
|
{ |
|
"epoch": 1.3618875125750416, |
|
"grad_norm": 3.9918718338012695, |
|
"learning_rate": 2.3118652685036857e-05, |
|
"loss": 1.9505, |
|
"step": 364500 |
|
}, |
|
{ |
|
"epoch": 1.3637556710285053, |
|
"grad_norm": 4.170524597167969, |
|
"learning_rate": 2.2995052639511584e-05, |
|
"loss": 1.9666, |
|
"step": 365000 |
|
}, |
|
{ |
|
"epoch": 1.365623829481969, |
|
"grad_norm": 2.33520245552063, |
|
"learning_rate": 2.2871685141129013e-05, |
|
"loss": 1.8909, |
|
"step": 365500 |
|
}, |
|
{ |
|
"epoch": 1.3674919879354328, |
|
"grad_norm": 3.8575286865234375, |
|
"learning_rate": 2.2748551252241096e-05, |
|
"loss": 1.9036, |
|
"step": 366000 |
|
}, |
|
{ |
|
"epoch": 1.3693601463888965, |
|
"grad_norm": 3.738067150115967, |
|
"learning_rate": 2.262589759672201e-05, |
|
"loss": 1.9242, |
|
"step": 366500 |
|
}, |
|
{ |
|
"epoch": 1.3712283048423601, |
|
"grad_norm": 3.2097079753875732, |
|
"learning_rate": 2.2503233633312364e-05, |
|
"loss": 1.9669, |
|
"step": 367000 |
|
}, |
|
{ |
|
"epoch": 1.3730964632958238, |
|
"grad_norm": 4.111919403076172, |
|
"learning_rate": 2.2380806452236224e-05, |
|
"loss": 1.9115, |
|
"step": 367500 |
|
}, |
|
{ |
|
"epoch": 1.3749646217492875, |
|
"grad_norm": 3.6487059593200684, |
|
"learning_rate": 2.2258617107748202e-05, |
|
"loss": 1.9221, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.3768327802027511, |
|
"grad_norm": 3.9140658378601074, |
|
"learning_rate": 2.213666665205488e-05, |
|
"loss": 1.9077, |
|
"step": 368500 |
|
}, |
|
{ |
|
"epoch": 1.378700938656215, |
|
"grad_norm": 4.236271858215332, |
|
"learning_rate": 2.2015199316183162e-05, |
|
"loss": 1.9248, |
|
"step": 369000 |
|
}, |
|
{ |
|
"epoch": 1.3805690971096787, |
|
"grad_norm": 3.9722940921783447, |
|
"learning_rate": 2.189372930344269e-05, |
|
"loss": 1.9075, |
|
"step": 369500 |
|
}, |
|
{ |
|
"epoch": 1.3824372555631423, |
|
"grad_norm": 3.9439289569854736, |
|
"learning_rate": 2.1772501321647675e-05, |
|
"loss": 1.9325, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.384305414016606, |
|
"grad_norm": 3.183210611343384, |
|
"learning_rate": 2.1651516414726137e-05, |
|
"loss": 1.9372, |
|
"step": 370500 |
|
}, |
|
{ |
|
"epoch": 1.38617357247007, |
|
"grad_norm": 4.380889892578125, |
|
"learning_rate": 2.1530775624512915e-05, |
|
"loss": 1.9119, |
|
"step": 371000 |
|
}, |
|
{ |
|
"epoch": 1.3880417309235336, |
|
"grad_norm": 3.137747049331665, |
|
"learning_rate": 2.1410520736652044e-05, |
|
"loss": 1.8852, |
|
"step": 371500 |
|
}, |
|
{ |
|
"epoch": 1.3899098893769972, |
|
"grad_norm": 4.502001762390137, |
|
"learning_rate": 2.129027080352e-05, |
|
"loss": 1.9157, |
|
"step": 372000 |
|
}, |
|
{ |
|
"epoch": 1.3917780478304609, |
|
"grad_norm": 3.3394224643707275, |
|
"learning_rate": 2.1170268097883096e-05, |
|
"loss": 1.9329, |
|
"step": 372500 |
|
}, |
|
{ |
|
"epoch": 1.3936462062839245, |
|
"grad_norm": 3.0865299701690674, |
|
"learning_rate": 2.1050513653118137e-05, |
|
"loss": 1.9178, |
|
"step": 373000 |
|
}, |
|
{ |
|
"epoch": 1.3955143647373882, |
|
"grad_norm": 4.535000324249268, |
|
"learning_rate": 2.0931247261291493e-05, |
|
"loss": 1.9163, |
|
"step": 373500 |
|
}, |
|
{ |
|
"epoch": 1.3973825231908519, |
|
"grad_norm": 3.5877630710601807, |
|
"learning_rate": 2.0811991928172553e-05, |
|
"loss": 1.9437, |
|
"step": 374000 |
|
}, |
|
{ |
|
"epoch": 1.3992506816443158, |
|
"grad_norm": 4.446563243865967, |
|
"learning_rate": 2.0692987941141717e-05, |
|
"loss": 1.9458, |
|
"step": 374500 |
|
}, |
|
{ |
|
"epoch": 1.4011188400977794, |
|
"grad_norm": 3.427525758743286, |
|
"learning_rate": 2.0574236324975526e-05, |
|
"loss": 1.9163, |
|
"step": 375000 |
|
}, |
|
{ |
|
"epoch": 1.402986998551243, |
|
"grad_norm": 4.324997901916504, |
|
"learning_rate": 2.0455974845157404e-05, |
|
"loss": 1.9447, |
|
"step": 375500 |
|
}, |
|
{ |
|
"epoch": 1.4048551570047068, |
|
"grad_norm": 4.460984706878662, |
|
"learning_rate": 2.0337730526503722e-05, |
|
"loss": 1.8936, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.4067233154581706, |
|
"grad_norm": 3.0335512161254883, |
|
"learning_rate": 2.0219741637935503e-05, |
|
"loss": 1.9274, |
|
"step": 376500 |
|
}, |
|
{ |
|
"epoch": 1.4085914739116343, |
|
"grad_norm": 3.983215808868408, |
|
"learning_rate": 2.010200919548798e-05, |
|
"loss": 1.9456, |
|
"step": 377000 |
|
}, |
|
{ |
|
"epoch": 1.410459632365098, |
|
"grad_norm": 4.645228385925293, |
|
"learning_rate": 1.9984534212988126e-05, |
|
"loss": 1.8914, |
|
"step": 377500 |
|
}, |
|
{ |
|
"epoch": 1.4123277908185616, |
|
"grad_norm": 4.4612250328063965, |
|
"learning_rate": 1.986755187644178e-05, |
|
"loss": 1.9379, |
|
"step": 378000 |
|
}, |
|
{ |
|
"epoch": 1.4141959492720253, |
|
"grad_norm": 3.9466419219970703, |
|
"learning_rate": 1.9750594326473332e-05, |
|
"loss": 1.9053, |
|
"step": 378500 |
|
}, |
|
{ |
|
"epoch": 1.416064107725489, |
|
"grad_norm": 3.384223461151123, |
|
"learning_rate": 1.9633897262584083e-05, |
|
"loss": 1.9777, |
|
"step": 379000 |
|
}, |
|
{ |
|
"epoch": 1.4179322661789528, |
|
"grad_norm": 3.591265916824341, |
|
"learning_rate": 1.9517461689685075e-05, |
|
"loss": 1.9357, |
|
"step": 379500 |
|
}, |
|
{ |
|
"epoch": 1.4198004246324165, |
|
"grad_norm": 4.8993730545043945, |
|
"learning_rate": 1.9401520693960035e-05, |
|
"loss": 1.9063, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.4216685830858802, |
|
"grad_norm": 4.398604869842529, |
|
"learning_rate": 1.9285610580773773e-05, |
|
"loss": 1.8615, |
|
"step": 380500 |
|
}, |
|
{ |
|
"epoch": 1.4235367415393438, |
|
"grad_norm": 3.6538774967193604, |
|
"learning_rate": 1.916996495777159e-05, |
|
"loss": 1.9166, |
|
"step": 381000 |
|
}, |
|
{ |
|
"epoch": 1.4254048999928077, |
|
"grad_norm": 3.730799436569214, |
|
"learning_rate": 1.905458482081028e-05, |
|
"loss": 1.8853, |
|
"step": 381500 |
|
}, |
|
{ |
|
"epoch": 1.4272730584462714, |
|
"grad_norm": 5.199082851409912, |
|
"learning_rate": 1.8939701124169172e-05, |
|
"loss": 1.8736, |
|
"step": 382000 |
|
}, |
|
{ |
|
"epoch": 1.429141216899735, |
|
"grad_norm": 4.507551670074463, |
|
"learning_rate": 1.8824854401777008e-05, |
|
"loss": 1.9045, |
|
"step": 382500 |
|
}, |
|
{ |
|
"epoch": 1.4310093753531987, |
|
"grad_norm": 2.917692184448242, |
|
"learning_rate": 1.8710276137269065e-05, |
|
"loss": 1.8737, |
|
"step": 383000 |
|
}, |
|
{ |
|
"epoch": 1.4328775338066624, |
|
"grad_norm": 4.9208221435546875, |
|
"learning_rate": 1.8595967317310803e-05, |
|
"loss": 1.8852, |
|
"step": 383500 |
|
}, |
|
{ |
|
"epoch": 1.434745692260126, |
|
"grad_norm": 4.914313793182373, |
|
"learning_rate": 1.8481928926247323e-05, |
|
"loss": 1.9188, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.4366138507135897, |
|
"grad_norm": 4.2889556884765625, |
|
"learning_rate": 1.836838920853576e-05, |
|
"loss": 1.9626, |
|
"step": 384500 |
|
}, |
|
{ |
|
"epoch": 1.4384820091670536, |
|
"grad_norm": 4.040252208709717, |
|
"learning_rate": 1.8254894073216665e-05, |
|
"loss": 1.9157, |
|
"step": 385000 |
|
}, |
|
{ |
|
"epoch": 1.4403501676205173, |
|
"grad_norm": 4.800929546356201, |
|
"learning_rate": 1.8141672303869356e-05, |
|
"loss": 1.8893, |
|
"step": 385500 |
|
}, |
|
{ |
|
"epoch": 1.442218326073981, |
|
"grad_norm": 3.5540807247161865, |
|
"learning_rate": 1.8028724875478063e-05, |
|
"loss": 1.9504, |
|
"step": 386000 |
|
}, |
|
{ |
|
"epoch": 1.4440864845274446, |
|
"grad_norm": 3.3006908893585205, |
|
"learning_rate": 1.791627782948606e-05, |
|
"loss": 1.9409, |
|
"step": 386500 |
|
}, |
|
{ |
|
"epoch": 1.4459546429809085, |
|
"grad_norm": 2.976499080657959, |
|
"learning_rate": 1.7803881444967192e-05, |
|
"loss": 1.9083, |
|
"step": 387000 |
|
}, |
|
{ |
|
"epoch": 1.4478228014343721, |
|
"grad_norm": 4.687767505645752, |
|
"learning_rate": 1.7691762310215786e-05, |
|
"loss": 1.9419, |
|
"step": 387500 |
|
}, |
|
{ |
|
"epoch": 1.4496909598878358, |
|
"grad_norm": 4.436933517456055, |
|
"learning_rate": 1.7579921390721e-05, |
|
"loss": 1.9205, |
|
"step": 388000 |
|
}, |
|
{ |
|
"epoch": 1.4515591183412995, |
|
"grad_norm": 4.451811790466309, |
|
"learning_rate": 1.7468582493799596e-05, |
|
"loss": 1.9, |
|
"step": 388500 |
|
}, |
|
{ |
|
"epoch": 1.4534272767947631, |
|
"grad_norm": 4.564020156860352, |
|
"learning_rate": 1.7357300330458897e-05, |
|
"loss": 1.8913, |
|
"step": 389000 |
|
}, |
|
{ |
|
"epoch": 1.4552954352482268, |
|
"grad_norm": 3.211652994155884, |
|
"learning_rate": 1.724629926252035e-05, |
|
"loss": 1.8884, |
|
"step": 389500 |
|
}, |
|
{ |
|
"epoch": 1.4571635937016905, |
|
"grad_norm": 4.224535942077637, |
|
"learning_rate": 1.7135580245845107e-05, |
|
"loss": 1.9185, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 1.4590317521551543, |
|
"grad_norm": 3.9640257358551025, |
|
"learning_rate": 1.7025364822818328e-05, |
|
"loss": 1.9193, |
|
"step": 390500 |
|
}, |
|
{ |
|
"epoch": 1.460899910608618, |
|
"grad_norm": 3.1013686656951904, |
|
"learning_rate": 1.6915212197670978e-05, |
|
"loss": 1.9274, |
|
"step": 391000 |
|
}, |
|
{ |
|
"epoch": 1.4627680690620817, |
|
"grad_norm": 5.020761966705322, |
|
"learning_rate": 1.68053444748701e-05, |
|
"loss": 1.8856, |
|
"step": 391500 |
|
}, |
|
{ |
|
"epoch": 1.4646362275155453, |
|
"grad_norm": 3.306040048599243, |
|
"learning_rate": 1.6695762600517374e-05, |
|
"loss": 1.9403, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.4665043859690092, |
|
"grad_norm": 4.234299182891846, |
|
"learning_rate": 1.658668582157294e-05, |
|
"loss": 1.8777, |
|
"step": 392500 |
|
}, |
|
{ |
|
"epoch": 1.468372544422473, |
|
"grad_norm": 6.068370342254639, |
|
"learning_rate": 1.6477677896163034e-05, |
|
"loss": 1.8937, |
|
"step": 393000 |
|
}, |
|
{ |
|
"epoch": 1.4702407028759366, |
|
"grad_norm": 4.372175216674805, |
|
"learning_rate": 1.636895864082966e-05, |
|
"loss": 1.9034, |
|
"step": 393500 |
|
}, |
|
{ |
|
"epoch": 1.4721088613294002, |
|
"grad_norm": 4.099493980407715, |
|
"learning_rate": 1.6260528991784696e-05, |
|
"loss": 1.9204, |
|
"step": 394000 |
|
}, |
|
{ |
|
"epoch": 1.4739770197828639, |
|
"grad_norm": 3.7667877674102783, |
|
"learning_rate": 1.6152389882746138e-05, |
|
"loss": 1.9014, |
|
"step": 394500 |
|
}, |
|
{ |
|
"epoch": 1.4758451782363275, |
|
"grad_norm": 2.797348976135254, |
|
"learning_rate": 1.60447576486997e-05, |
|
"loss": 1.9077, |
|
"step": 395000 |
|
}, |
|
{ |
|
"epoch": 1.4777133366897914, |
|
"grad_norm": 4.806083679199219, |
|
"learning_rate": 1.593720182508714e-05, |
|
"loss": 1.9239, |
|
"step": 395500 |
|
}, |
|
{ |
|
"epoch": 1.479581495143255, |
|
"grad_norm": 4.35167121887207, |
|
"learning_rate": 1.58299393257415e-05, |
|
"loss": 1.9147, |
|
"step": 396000 |
|
}, |
|
{ |
|
"epoch": 1.4814496535967188, |
|
"grad_norm": 7.256587982177734, |
|
"learning_rate": 1.5722971074330122e-05, |
|
"loss": 1.9101, |
|
"step": 396500 |
|
}, |
|
{ |
|
"epoch": 1.4833178120501824, |
|
"grad_norm": 4.269795894622803, |
|
"learning_rate": 1.5616511042961456e-05, |
|
"loss": 1.9253, |
|
"step": 397000 |
|
}, |
|
{ |
|
"epoch": 1.4851859705036463, |
|
"grad_norm": 3.5930633544921875, |
|
"learning_rate": 1.551013345518685e-05, |
|
"loss": 1.9399, |
|
"step": 397500 |
|
}, |
|
{ |
|
"epoch": 1.48705412895711, |
|
"grad_norm": 4.802802085876465, |
|
"learning_rate": 1.5404052869284143e-05, |
|
"loss": 1.924, |
|
"step": 398000 |
|
}, |
|
{ |
|
"epoch": 1.4889222874105736, |
|
"grad_norm": 5.457955360412598, |
|
"learning_rate": 1.5298270198742908e-05, |
|
"loss": 1.925, |
|
"step": 398500 |
|
}, |
|
{ |
|
"epoch": 1.4907904458640373, |
|
"grad_norm": 4.350592613220215, |
|
"learning_rate": 1.5192997023342925e-05, |
|
"loss": 1.9841, |
|
"step": 399000 |
|
}, |
|
{ |
|
"epoch": 1.492658604317501, |
|
"grad_norm": 3.5578579902648926, |
|
"learning_rate": 1.5087812313349553e-05, |
|
"loss": 1.8914, |
|
"step": 399500 |
|
}, |
|
{ |
|
"epoch": 1.4945267627709646, |
|
"grad_norm": 4.802867412567139, |
|
"learning_rate": 1.4982928241953386e-05, |
|
"loss": 1.8969, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.4963949212244283, |
|
"grad_norm": 4.002582550048828, |
|
"learning_rate": 1.4878345712340435e-05, |
|
"loss": 1.904, |
|
"step": 400500 |
|
}, |
|
{ |
|
"epoch": 1.4982630796778922, |
|
"grad_norm": 4.3025665283203125, |
|
"learning_rate": 1.4774273882839745e-05, |
|
"loss": 1.916, |
|
"step": 401000 |
|
}, |
|
{ |
|
"epoch": 1.5001312381313558, |
|
"grad_norm": 4.821669101715088, |
|
"learning_rate": 1.4670296528381727e-05, |
|
"loss": 1.8837, |
|
"step": 401500 |
|
}, |
|
{ |
|
"epoch": 1.5019993965848195, |
|
"grad_norm": 3.655703067779541, |
|
"learning_rate": 1.456662340786592e-05, |
|
"loss": 1.95, |
|
"step": 402000 |
|
}, |
|
{ |
|
"epoch": 1.5038675550382834, |
|
"grad_norm": 3.852405548095703, |
|
"learning_rate": 1.4463255414050487e-05, |
|
"loss": 1.8723, |
|
"step": 402500 |
|
}, |
|
{ |
|
"epoch": 1.505735713491747, |
|
"grad_norm": 4.878715515136719, |
|
"learning_rate": 1.4360193437066122e-05, |
|
"loss": 1.8876, |
|
"step": 403000 |
|
}, |
|
{ |
|
"epoch": 1.5076038719452107, |
|
"grad_norm": 4.768284320831299, |
|
"learning_rate": 1.4257643567674483e-05, |
|
"loss": 1.9061, |
|
"step": 403500 |
|
}, |
|
{ |
|
"epoch": 1.5094720303986744, |
|
"grad_norm": 4.845045566558838, |
|
"learning_rate": 1.4155195667736094e-05, |
|
"loss": 1.8932, |
|
"step": 404000 |
|
}, |
|
{ |
|
"epoch": 1.511340188852138, |
|
"grad_norm": 3.8661012649536133, |
|
"learning_rate": 1.4053056437417239e-05, |
|
"loss": 1.9518, |
|
"step": 404500 |
|
}, |
|
{ |
|
"epoch": 1.5132083473056017, |
|
"grad_norm": 4.624420166015625, |
|
"learning_rate": 1.3951226756267382e-05, |
|
"loss": 1.8403, |
|
"step": 405000 |
|
}, |
|
{ |
|
"epoch": 1.5150765057590654, |
|
"grad_norm": 3.6633214950561523, |
|
"learning_rate": 1.3849910229293806e-05, |
|
"loss": 1.8943, |
|
"step": 405500 |
|
}, |
|
{ |
|
"epoch": 1.516944664212529, |
|
"grad_norm": 5.2839155197143555, |
|
"learning_rate": 1.3748701650989005e-05, |
|
"loss": 1.8692, |
|
"step": 406000 |
|
}, |
|
{ |
|
"epoch": 1.518812822665993, |
|
"grad_norm": 3.8412556648254395, |
|
"learning_rate": 1.3647805242737227e-05, |
|
"loss": 1.8699, |
|
"step": 406500 |
|
}, |
|
{ |
|
"epoch": 1.5206809811194566, |
|
"grad_norm": 3.3254265785217285, |
|
"learning_rate": 1.3547221873385652e-05, |
|
"loss": 1.8909, |
|
"step": 407000 |
|
}, |
|
{ |
|
"epoch": 1.5225491395729203, |
|
"grad_norm": 3.2033207416534424, |
|
"learning_rate": 1.3446952409085728e-05, |
|
"loss": 1.8986, |
|
"step": 407500 |
|
}, |
|
{ |
|
"epoch": 1.5244172980263841, |
|
"grad_norm": 4.760767459869385, |
|
"learning_rate": 1.334719730796591e-05, |
|
"loss": 1.8756, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.5262854564798478, |
|
"grad_norm": 4.965844631195068, |
|
"learning_rate": 1.3247557609288142e-05, |
|
"loss": 1.8743, |
|
"step": 408500 |
|
}, |
|
{ |
|
"epoch": 1.5281536149333115, |
|
"grad_norm": 4.014163494110107, |
|
"learning_rate": 1.314823439615473e-05, |
|
"loss": 1.9219, |
|
"step": 409000 |
|
}, |
|
{ |
|
"epoch": 1.5300217733867751, |
|
"grad_norm": 4.178042888641357, |
|
"learning_rate": 1.3049228523865536e-05, |
|
"loss": 1.881, |
|
"step": 409500 |
|
}, |
|
{ |
|
"epoch": 1.5318899318402388, |
|
"grad_norm": 4.607501983642578, |
|
"learning_rate": 1.2950737902223226e-05, |
|
"loss": 1.9469, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 1.5337580902937025, |
|
"grad_norm": 4.652303695678711, |
|
"learning_rate": 1.2852368627651334e-05, |
|
"loss": 1.8881, |
|
"step": 410500 |
|
}, |
|
{ |
|
"epoch": 1.5356262487471661, |
|
"grad_norm": 4.992543697357178, |
|
"learning_rate": 1.2754319241706458e-05, |
|
"loss": 1.9569, |
|
"step": 411000 |
|
}, |
|
{ |
|
"epoch": 1.5374944072006298, |
|
"grad_norm": 3.5058271884918213, |
|
"learning_rate": 1.2656590588719214e-05, |
|
"loss": 1.9032, |
|
"step": 411500 |
|
}, |
|
{ |
|
"epoch": 1.5393625656540937, |
|
"grad_norm": 3.973353147506714, |
|
"learning_rate": 1.2559183510258338e-05, |
|
"loss": 1.8669, |
|
"step": 412000 |
|
}, |
|
{ |
|
"epoch": 1.5412307241075573, |
|
"grad_norm": 4.776645660400391, |
|
"learning_rate": 1.2462292692129003e-05, |
|
"loss": 1.8993, |
|
"step": 412500 |
|
}, |
|
{ |
|
"epoch": 1.543098882561021, |
|
"grad_norm": 4.160543441772461, |
|
"learning_rate": 1.2365530629011917e-05, |
|
"loss": 1.9269, |
|
"step": 413000 |
|
}, |
|
{ |
|
"epoch": 1.544967041014485, |
|
"grad_norm": 4.14699125289917, |
|
"learning_rate": 1.226909264681978e-05, |
|
"loss": 1.9139, |
|
"step": 413500 |
|
}, |
|
{ |
|
"epoch": 1.5468351994679486, |
|
"grad_norm": 4.639766693115234, |
|
"learning_rate": 1.2172979576006998e-05, |
|
"loss": 1.8844, |
|
"step": 414000 |
|
}, |
|
{ |
|
"epoch": 1.5487033579214122, |
|
"grad_norm": 3.771737575531006, |
|
"learning_rate": 1.207719224423004e-05, |
|
"loss": 1.8961, |
|
"step": 414500 |
|
}, |
|
{ |
|
"epoch": 1.550571516374876, |
|
"grad_norm": 4.165931701660156, |
|
"learning_rate": 1.1981922071418567e-05, |
|
"loss": 1.891, |
|
"step": 415000 |
|
}, |
|
{ |
|
"epoch": 1.5524396748283396, |
|
"grad_norm": 5.3882341384887695, |
|
"learning_rate": 1.1886788033865165e-05, |
|
"loss": 1.8854, |
|
"step": 415500 |
|
}, |
|
{ |
|
"epoch": 1.5543078332818032, |
|
"grad_norm": 4.879900932312012, |
|
"learning_rate": 1.1791982199822898e-05, |
|
"loss": 1.8817, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.5561759917352669, |
|
"grad_norm": 4.769500732421875, |
|
"learning_rate": 1.169750538569126e-05, |
|
"loss": 1.9078, |
|
"step": 416500 |
|
}, |
|
{ |
|
"epoch": 1.5580441501887305, |
|
"grad_norm": 5.184789657592773, |
|
"learning_rate": 1.1603546369284646e-05, |
|
"loss": 1.864, |
|
"step": 417000 |
|
}, |
|
{ |
|
"epoch": 1.5599123086421944, |
|
"grad_norm": 3.5462260246276855, |
|
"learning_rate": 1.1509729370737072e-05, |
|
"loss": 1.9012, |
|
"step": 417500 |
|
}, |
|
{ |
|
"epoch": 1.561780467095658, |
|
"grad_norm": 4.478038311004639, |
|
"learning_rate": 1.1416243822658057e-05, |
|
"loss": 1.8541, |
|
"step": 418000 |
|
}, |
|
{ |
|
"epoch": 1.563648625549122, |
|
"grad_norm": 4.2772650718688965, |
|
"learning_rate": 1.1323090530077756e-05, |
|
"loss": 1.9176, |
|
"step": 418500 |
|
}, |
|
{ |
|
"epoch": 1.5655167840025856, |
|
"grad_norm": 4.45164155960083, |
|
"learning_rate": 1.123045560271172e-05, |
|
"loss": 1.9191, |
|
"step": 419000 |
|
}, |
|
{ |
|
"epoch": 1.5673849424560493, |
|
"grad_norm": 4.31321382522583, |
|
"learning_rate": 1.1137968556258127e-05, |
|
"loss": 1.9104, |
|
"step": 419500 |
|
}, |
|
{ |
|
"epoch": 1.569253100909513, |
|
"grad_norm": 3.313171625137329, |
|
"learning_rate": 1.1045816161609301e-05, |
|
"loss": 1.8969, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 1.5711212593629766, |
|
"grad_norm": 5.630086898803711, |
|
"learning_rate": 1.0953999212315213e-05, |
|
"loss": 1.8921, |
|
"step": 420500 |
|
}, |
|
{ |
|
"epoch": 1.5729894178164403, |
|
"grad_norm": 4.993584632873535, |
|
"learning_rate": 1.0862518499037283e-05, |
|
"loss": 1.8845, |
|
"step": 421000 |
|
}, |
|
{ |
|
"epoch": 1.574857576269904, |
|
"grad_norm": 5.677700996398926, |
|
"learning_rate": 1.077155676004855e-05, |
|
"loss": 1.8988, |
|
"step": 421500 |
|
}, |
|
{ |
|
"epoch": 1.5767257347233676, |
|
"grad_norm": 4.58486795425415, |
|
"learning_rate": 1.068075020279995e-05, |
|
"loss": 1.9101, |
|
"step": 422000 |
|
}, |
|
{ |
|
"epoch": 1.5785938931768315, |
|
"grad_norm": 4.042180061340332, |
|
"learning_rate": 1.0590282234591004e-05, |
|
"loss": 1.9224, |
|
"step": 422500 |
|
}, |
|
{ |
|
"epoch": 1.5804620516302952, |
|
"grad_norm": 3.4549098014831543, |
|
"learning_rate": 1.0500153634466675e-05, |
|
"loss": 1.8885, |
|
"step": 423000 |
|
}, |
|
{ |
|
"epoch": 1.5823302100837588, |
|
"grad_norm": 4.782561302185059, |
|
"learning_rate": 1.0410544415482986e-05, |
|
"loss": 1.9126, |
|
"step": 423500 |
|
}, |
|
{ |
|
"epoch": 1.5841983685372227, |
|
"grad_norm": 4.326170921325684, |
|
"learning_rate": 1.0321096194361922e-05, |
|
"loss": 1.8519, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.5860665269906864, |
|
"grad_norm": 4.411458492279053, |
|
"learning_rate": 1.0231989659361606e-05, |
|
"loss": 1.8756, |
|
"step": 424500 |
|
}, |
|
{ |
|
"epoch": 1.58793468544415, |
|
"grad_norm": 4.059584140777588, |
|
"learning_rate": 1.0143225577803328e-05, |
|
"loss": 1.897, |
|
"step": 425000 |
|
}, |
|
{ |
|
"epoch": 1.5898028438976137, |
|
"grad_norm": 4.62555456161499, |
|
"learning_rate": 1.0054981212748877e-05, |
|
"loss": 1.9044, |
|
"step": 425500 |
|
}, |
|
{ |
|
"epoch": 1.5916710023510774, |
|
"grad_norm": 3.3062992095947266, |
|
"learning_rate": 9.966903639519581e-06, |
|
"loss": 1.8671, |
|
"step": 426000 |
|
}, |
|
{ |
|
"epoch": 1.593539160804541, |
|
"grad_norm": 3.750192880630493, |
|
"learning_rate": 9.879170802462034e-06, |
|
"loss": 1.9024, |
|
"step": 426500 |
|
}, |
|
{ |
|
"epoch": 1.5954073192580047, |
|
"grad_norm": 3.6934866905212402, |
|
"learning_rate": 9.791783457068221e-06, |
|
"loss": 1.8972, |
|
"step": 427000 |
|
}, |
|
{ |
|
"epoch": 1.5972754777114684, |
|
"grad_norm": 4.577314376831055, |
|
"learning_rate": 9.704916092006999e-06, |
|
"loss": 1.9391, |
|
"step": 427500 |
|
}, |
|
{ |
|
"epoch": 1.5991436361649323, |
|
"grad_norm": 4.8952226638793945, |
|
"learning_rate": 9.618221289776025e-06, |
|
"loss": 1.8756, |
|
"step": 428000 |
|
}, |
|
{ |
|
"epoch": 1.601011794618396, |
|
"grad_norm": 5.817446231842041, |
|
"learning_rate": 9.531874226317888e-06, |
|
"loss": 1.8756, |
|
"step": 428500 |
|
}, |
|
{ |
|
"epoch": 1.6028799530718596, |
|
"grad_norm": 3.9412033557891846, |
|
"learning_rate": 9.445875645191288e-06, |
|
"loss": 1.912, |
|
"step": 429000 |
|
}, |
|
{ |
|
"epoch": 1.6047481115253235, |
|
"grad_norm": 4.50702428817749, |
|
"learning_rate": 9.360397236655304e-06, |
|
"loss": 1.8652, |
|
"step": 429500 |
|
}, |
|
{ |
|
"epoch": 1.6066162699787871, |
|
"grad_norm": 4.587414741516113, |
|
"learning_rate": 9.27509713820291e-06, |
|
"loss": 1.9097, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 1.6084844284322508, |
|
"grad_norm": 6.312617301940918, |
|
"learning_rate": 9.190147733261234e-06, |
|
"loss": 1.8736, |
|
"step": 430500 |
|
}, |
|
{ |
|
"epoch": 1.6103525868857145, |
|
"grad_norm": 5.86572790145874, |
|
"learning_rate": 9.105549753353348e-06, |
|
"loss": 1.8866, |
|
"step": 431000 |
|
}, |
|
{ |
|
"epoch": 1.6122207453391781, |
|
"grad_norm": 4.819661617279053, |
|
"learning_rate": 9.021303926976055e-06, |
|
"loss": 1.8648, |
|
"step": 431500 |
|
}, |
|
{ |
|
"epoch": 1.6140889037926418, |
|
"grad_norm": 4.977511882781982, |
|
"learning_rate": 8.937578412834564e-06, |
|
"loss": 1.8504, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.6159570622461055, |
|
"grad_norm": 3.8270606994628906, |
|
"learning_rate": 8.85403835895094e-06, |
|
"loss": 1.9031, |
|
"step": 432500 |
|
}, |
|
{ |
|
"epoch": 1.6178252206995691, |
|
"grad_norm": 3.582000255584717, |
|
"learning_rate": 8.770852624432785e-06, |
|
"loss": 1.9016, |
|
"step": 433000 |
|
}, |
|
{ |
|
"epoch": 1.619693379153033, |
|
"grad_norm": 4.828258037567139, |
|
"learning_rate": 8.688021925615658e-06, |
|
"loss": 1.9003, |
|
"step": 433500 |
|
}, |
|
{ |
|
"epoch": 1.6215615376064967, |
|
"grad_norm": 4.899356842041016, |
|
"learning_rate": 8.60571157016748e-06, |
|
"loss": 1.902, |
|
"step": 434000 |
|
}, |
|
{ |
|
"epoch": 1.6234296960599606, |
|
"grad_norm": 3.5516891479492188, |
|
"learning_rate": 8.523592365898686e-06, |
|
"loss": 1.8574, |
|
"step": 434500 |
|
}, |
|
{ |
|
"epoch": 1.6252978545134242, |
|
"grad_norm": 4.53317928314209, |
|
"learning_rate": 8.441830326558064e-06, |
|
"loss": 1.8844, |
|
"step": 435000 |
|
}, |
|
{ |
|
"epoch": 1.627166012966888, |
|
"grad_norm": 6.883234977722168, |
|
"learning_rate": 8.360426156221358e-06, |
|
"loss": 1.859, |
|
"step": 435500 |
|
}, |
|
{ |
|
"epoch": 1.6290341714203516, |
|
"grad_norm": 5.441802024841309, |
|
"learning_rate": 8.279542288766052e-06, |
|
"loss": 1.9012, |
|
"step": 436000 |
|
}, |
|
{ |
|
"epoch": 1.6309023298738152, |
|
"grad_norm": 3.1804521083831787, |
|
"learning_rate": 8.198855237101328e-06, |
|
"loss": 1.8847, |
|
"step": 436500 |
|
}, |
|
{ |
|
"epoch": 1.632770488327279, |
|
"grad_norm": 4.132668972015381, |
|
"learning_rate": 8.118528146766863e-06, |
|
"loss": 1.8517, |
|
"step": 437000 |
|
}, |
|
{ |
|
"epoch": 1.6346386467807426, |
|
"grad_norm": 4.795321464538574, |
|
"learning_rate": 8.038561709481684e-06, |
|
"loss": 1.9175, |
|
"step": 437500 |
|
}, |
|
{ |
|
"epoch": 1.6365068052342062, |
|
"grad_norm": 4.67226505279541, |
|
"learning_rate": 7.959115462975215e-06, |
|
"loss": 1.857, |
|
"step": 438000 |
|
}, |
|
{ |
|
"epoch": 1.63837496368767, |
|
"grad_norm": 5.205322742462158, |
|
"learning_rate": 7.879871669780554e-06, |
|
"loss": 1.8824, |
|
"step": 438500 |
|
}, |
|
{ |
|
"epoch": 1.6402431221411338, |
|
"grad_norm": 5.369668960571289, |
|
"learning_rate": 7.800990584772722e-06, |
|
"loss": 1.876, |
|
"step": 439000 |
|
}, |
|
{ |
|
"epoch": 1.6421112805945974, |
|
"grad_norm": 4.469278335571289, |
|
"learning_rate": 7.722472887218802e-06, |
|
"loss": 1.8871, |
|
"step": 439500 |
|
}, |
|
{ |
|
"epoch": 1.6439794390480613, |
|
"grad_norm": 4.810849189758301, |
|
"learning_rate": 7.644319253256577e-06, |
|
"loss": 1.892, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.645847597501525, |
|
"grad_norm": 5.1172027587890625, |
|
"learning_rate": 7.5666855692307025e-06, |
|
"loss": 1.9003, |
|
"step": 440500 |
|
}, |
|
{ |
|
"epoch": 1.6477157559549886, |
|
"grad_norm": 5.264705181121826, |
|
"learning_rate": 7.48926134684001e-06, |
|
"loss": 1.866, |
|
"step": 441000 |
|
}, |
|
{ |
|
"epoch": 1.6495839144084523, |
|
"grad_norm": 3.7140793800354004, |
|
"learning_rate": 7.41220319629074e-06, |
|
"loss": 1.8958, |
|
"step": 441500 |
|
}, |
|
{ |
|
"epoch": 1.651452072861916, |
|
"grad_norm": 4.509251117706299, |
|
"learning_rate": 7.335511781152121e-06, |
|
"loss": 1.8784, |
|
"step": 442000 |
|
}, |
|
{ |
|
"epoch": 1.6533202313153796, |
|
"grad_norm": 4.2154388427734375, |
|
"learning_rate": 7.259340042775581e-06, |
|
"loss": 1.8476, |
|
"step": 442500 |
|
}, |
|
{ |
|
"epoch": 1.6551883897688433, |
|
"grad_norm": 6.030950546264648, |
|
"learning_rate": 7.183383339768157e-06, |
|
"loss": 1.9157, |
|
"step": 443000 |
|
}, |
|
{ |
|
"epoch": 1.657056548222307, |
|
"grad_norm": 4.760791301727295, |
|
"learning_rate": 7.107795342603074e-06, |
|
"loss": 1.8709, |
|
"step": 443500 |
|
}, |
|
{ |
|
"epoch": 1.6589247066757709, |
|
"grad_norm": 4.554337978363037, |
|
"learning_rate": 7.032576702189675e-06, |
|
"loss": 1.8865, |
|
"step": 444000 |
|
}, |
|
{ |
|
"epoch": 1.6607928651292345, |
|
"grad_norm": 5.714734077453613, |
|
"learning_rate": 6.9578773938351495e-06, |
|
"loss": 1.8687, |
|
"step": 444500 |
|
}, |
|
{ |
|
"epoch": 1.6626610235826982, |
|
"grad_norm": 4.749231338500977, |
|
"learning_rate": 6.883398664985902e-06, |
|
"loss": 1.8953, |
|
"step": 445000 |
|
}, |
|
{ |
|
"epoch": 1.664529182036162, |
|
"grad_norm": 2.8103106021881104, |
|
"learning_rate": 6.809291225230813e-06, |
|
"loss": 1.8854, |
|
"step": 445500 |
|
}, |
|
{ |
|
"epoch": 1.6663973404896257, |
|
"grad_norm": 6.017327308654785, |
|
"learning_rate": 6.735555712729713e-06, |
|
"loss": 1.8829, |
|
"step": 446000 |
|
}, |
|
{ |
|
"epoch": 1.6682654989430894, |
|
"grad_norm": 5.306553363800049, |
|
"learning_rate": 6.662339116102778e-06, |
|
"loss": 1.8542, |
|
"step": 446500 |
|
}, |
|
{ |
|
"epoch": 1.670133657396553, |
|
"grad_norm": 5.078936576843262, |
|
"learning_rate": 6.5893486127564465e-06, |
|
"loss": 1.9077, |
|
"step": 447000 |
|
}, |
|
{ |
|
"epoch": 1.6720018158500167, |
|
"grad_norm": 5.262309551239014, |
|
"learning_rate": 6.516731930651387e-06, |
|
"loss": 1.8863, |
|
"step": 447500 |
|
}, |
|
{ |
|
"epoch": 1.6738699743034804, |
|
"grad_norm": 5.343240261077881, |
|
"learning_rate": 6.444489695110101e-06, |
|
"loss": 1.8784, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.675738132756944, |
|
"grad_norm": 4.112715244293213, |
|
"learning_rate": 6.372622528230676e-06, |
|
"loss": 1.8559, |
|
"step": 448500 |
|
}, |
|
{ |
|
"epoch": 1.6776062912104077, |
|
"grad_norm": 3.1489148139953613, |
|
"learning_rate": 6.301273656494144e-06, |
|
"loss": 1.8633, |
|
"step": 449000 |
|
}, |
|
{ |
|
"epoch": 1.6794744496638716, |
|
"grad_norm": 5.503724575042725, |
|
"learning_rate": 6.230157727089419e-06, |
|
"loss": 1.8898, |
|
"step": 449500 |
|
}, |
|
{ |
|
"epoch": 1.6813426081173353, |
|
"grad_norm": 4.443988800048828, |
|
"learning_rate": 6.159418712018961e-06, |
|
"loss": 1.881, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.6832107665707992, |
|
"grad_norm": 3.3895161151885986, |
|
"learning_rate": 6.089057220436195e-06, |
|
"loss": 1.8802, |
|
"step": 450500 |
|
}, |
|
{ |
|
"epoch": 1.6850789250242628, |
|
"grad_norm": 4.960055828094482, |
|
"learning_rate": 6.0192134471937224e-06, |
|
"loss": 1.8593, |
|
"step": 451000 |
|
}, |
|
{ |
|
"epoch": 1.6869470834777265, |
|
"grad_norm": 4.596670150756836, |
|
"learning_rate": 5.949608058974171e-06, |
|
"loss": 1.8924, |
|
"step": 451500 |
|
}, |
|
{ |
|
"epoch": 1.6888152419311901, |
|
"grad_norm": 3.810817003250122, |
|
"learning_rate": 5.8803820009804165e-06, |
|
"loss": 1.8412, |
|
"step": 452000 |
|
}, |
|
{ |
|
"epoch": 1.6906834003846538, |
|
"grad_norm": 6.2422380447387695, |
|
"learning_rate": 5.8115358693374035e-06, |
|
"loss": 1.875, |
|
"step": 452500 |
|
}, |
|
{ |
|
"epoch": 1.6925515588381175, |
|
"grad_norm": 4.921154499053955, |
|
"learning_rate": 5.7432068079726676e-06, |
|
"loss": 1.8729, |
|
"step": 453000 |
|
}, |
|
{ |
|
"epoch": 1.6944197172915811, |
|
"grad_norm": 5.331964015960693, |
|
"learning_rate": 5.675121541510353e-06, |
|
"loss": 1.8726, |
|
"step": 453500 |
|
}, |
|
{ |
|
"epoch": 1.6962878757450448, |
|
"grad_norm": 4.561686038970947, |
|
"learning_rate": 5.607417968953904e-06, |
|
"loss": 1.8597, |
|
"step": 454000 |
|
}, |
|
{ |
|
"epoch": 1.6981560341985087, |
|
"grad_norm": 5.06734037399292, |
|
"learning_rate": 5.5400966733176905e-06, |
|
"loss": 1.8741, |
|
"step": 454500 |
|
}, |
|
{ |
|
"epoch": 1.7000241926519724, |
|
"grad_norm": 6.29988956451416, |
|
"learning_rate": 5.473291728727564e-06, |
|
"loss": 1.9034, |
|
"step": 455000 |
|
}, |
|
{ |
|
"epoch": 1.701892351105436, |
|
"grad_norm": 5.206850051879883, |
|
"learning_rate": 5.406735955363129e-06, |
|
"loss": 1.8556, |
|
"step": 455500 |
|
}, |
|
{ |
|
"epoch": 1.7037605095589, |
|
"grad_norm": 3.8202433586120605, |
|
"learning_rate": 5.340564187047786e-06, |
|
"loss": 1.8677, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.7056286680123636, |
|
"grad_norm": 3.6107611656188965, |
|
"learning_rate": 5.2747769936051125e-06, |
|
"loss": 1.8593, |
|
"step": 456500 |
|
}, |
|
{ |
|
"epoch": 1.7074968264658272, |
|
"grad_norm": 4.204036235809326, |
|
"learning_rate": 5.20937494154699e-06, |
|
"loss": 1.8571, |
|
"step": 457000 |
|
}, |
|
{ |
|
"epoch": 1.709364984919291, |
|
"grad_norm": 5.234120845794678, |
|
"learning_rate": 5.1444882414578675e-06, |
|
"loss": 1.8433, |
|
"step": 457500 |
|
}, |
|
{ |
|
"epoch": 1.7112331433727546, |
|
"grad_norm": 3.4716298580169678, |
|
"learning_rate": 5.079857385347997e-06, |
|
"loss": 1.8765, |
|
"step": 458000 |
|
}, |
|
{ |
|
"epoch": 1.7131013018262182, |
|
"grad_norm": 5.14175271987915, |
|
"learning_rate": 5.015613349129866e-06, |
|
"loss": 1.9206, |
|
"step": 458500 |
|
}, |
|
{ |
|
"epoch": 1.714969460279682, |
|
"grad_norm": 4.21678352355957, |
|
"learning_rate": 4.951756686026798e-06, |
|
"loss": 1.8835, |
|
"step": 459000 |
|
}, |
|
{ |
|
"epoch": 1.7168376187331456, |
|
"grad_norm": 3.8663065433502197, |
|
"learning_rate": 4.888414495895577e-06, |
|
"loss": 1.8974, |
|
"step": 459500 |
|
}, |
|
{ |
|
"epoch": 1.7187057771866094, |
|
"grad_norm": 4.44641637802124, |
|
"learning_rate": 4.825333447862485e-06, |
|
"loss": 1.8963, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 1.720573935640073, |
|
"grad_norm": 4.290149211883545, |
|
"learning_rate": 4.762641411497825e-06, |
|
"loss": 1.8818, |
|
"step": 460500 |
|
}, |
|
{ |
|
"epoch": 1.722442094093537, |
|
"grad_norm": 3.1460719108581543, |
|
"learning_rate": 4.700338926660225e-06, |
|
"loss": 1.8916, |
|
"step": 461000 |
|
}, |
|
{ |
|
"epoch": 1.7243102525470007, |
|
"grad_norm": 3.602639675140381, |
|
"learning_rate": 4.63842652985379e-06, |
|
"loss": 1.8656, |
|
"step": 461500 |
|
}, |
|
{ |
|
"epoch": 1.7261784110004643, |
|
"grad_norm": 4.454497337341309, |
|
"learning_rate": 4.577027407582085e-06, |
|
"loss": 1.8377, |
|
"step": 462000 |
|
}, |
|
{ |
|
"epoch": 1.728046569453928, |
|
"grad_norm": 4.91801118850708, |
|
"learning_rate": 4.5158960000806275e-06, |
|
"loss": 1.8708, |
|
"step": 462500 |
|
}, |
|
{ |
|
"epoch": 1.7299147279073916, |
|
"grad_norm": 5.951587200164795, |
|
"learning_rate": 4.45515626889988e-06, |
|
"loss": 1.8598, |
|
"step": 463000 |
|
}, |
|
{ |
|
"epoch": 1.7317828863608553, |
|
"grad_norm": 3.9829583168029785, |
|
"learning_rate": 4.394808737086631e-06, |
|
"loss": 1.8637, |
|
"step": 463500 |
|
}, |
|
{ |
|
"epoch": 1.733651044814319, |
|
"grad_norm": 4.84136962890625, |
|
"learning_rate": 4.334973441658552e-06, |
|
"loss": 1.849, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.7355192032677826, |
|
"grad_norm": 5.9698991775512695, |
|
"learning_rate": 4.275411077223152e-06, |
|
"loss": 1.8716, |
|
"step": 464500 |
|
}, |
|
{ |
|
"epoch": 1.7373873617212465, |
|
"grad_norm": 6.253756046295166, |
|
"learning_rate": 4.216242459991293e-06, |
|
"loss": 1.877, |
|
"step": 465000 |
|
}, |
|
{ |
|
"epoch": 1.7392555201747102, |
|
"grad_norm": 4.6036152839660645, |
|
"learning_rate": 4.157468099480438e-06, |
|
"loss": 1.8532, |
|
"step": 465500 |
|
}, |
|
{ |
|
"epoch": 1.7411236786281739, |
|
"grad_norm": 4.482430934906006, |
|
"learning_rate": 4.099204866700346e-06, |
|
"loss": 1.858, |
|
"step": 466000 |
|
}, |
|
{ |
|
"epoch": 1.7429918370816377, |
|
"grad_norm": 4.4797749519348145, |
|
"learning_rate": 4.041219743568814e-06, |
|
"loss": 1.8436, |
|
"step": 466500 |
|
}, |
|
{ |
|
"epoch": 1.7448599955351014, |
|
"grad_norm": 5.49769926071167, |
|
"learning_rate": 3.983630384327791e-06, |
|
"loss": 1.8767, |
|
"step": 467000 |
|
}, |
|
{ |
|
"epoch": 1.746728153988565, |
|
"grad_norm": 5.328680038452148, |
|
"learning_rate": 3.9264372848953125e-06, |
|
"loss": 1.8929, |
|
"step": 467500 |
|
}, |
|
{ |
|
"epoch": 1.7485963124420287, |
|
"grad_norm": 3.2703754901885986, |
|
"learning_rate": 3.869640937777136e-06, |
|
"loss": 1.7657, |
|
"step": 468000 |
|
}, |
|
{ |
|
"epoch": 1.7504644708954924, |
|
"grad_norm": 4.710208892822266, |
|
"learning_rate": 3.813241832062481e-06, |
|
"loss": 1.868, |
|
"step": 468500 |
|
}, |
|
{ |
|
"epoch": 1.752332629348956, |
|
"grad_norm": 3.9908735752105713, |
|
"learning_rate": 3.7572404534197746e-06, |
|
"loss": 1.9306, |
|
"step": 469000 |
|
}, |
|
{ |
|
"epoch": 1.7542007878024197, |
|
"grad_norm": 5.898683071136475, |
|
"learning_rate": 3.701637284092546e-06, |
|
"loss": 1.8756, |
|
"step": 469500 |
|
}, |
|
{ |
|
"epoch": 1.7560689462558834, |
|
"grad_norm": 5.575063705444336, |
|
"learning_rate": 3.6465428136502942e-06, |
|
"loss": 1.8415, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 1.7579371047093473, |
|
"grad_norm": 3.8220248222351074, |
|
"learning_rate": 3.591736697164866e-06, |
|
"loss": 1.8549, |
|
"step": 470500 |
|
}, |
|
{ |
|
"epoch": 1.759805263162811, |
|
"grad_norm": 4.483773708343506, |
|
"learning_rate": 3.5373302151939625e-06, |
|
"loss": 1.8414, |
|
"step": 471000 |
|
}, |
|
{ |
|
"epoch": 1.7616734216162746, |
|
"grad_norm": 5.593682289123535, |
|
"learning_rate": 3.4833238362470044e-06, |
|
"loss": 1.8729, |
|
"step": 471500 |
|
}, |
|
{ |
|
"epoch": 1.7635415800697385, |
|
"grad_norm": 3.2169010639190674, |
|
"learning_rate": 3.4298248369353582e-06, |
|
"loss": 1.8556, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 1.7654097385232022, |
|
"grad_norm": 5.516305923461914, |
|
"learning_rate": 3.3766192532610986e-06, |
|
"loss": 1.8855, |
|
"step": 472500 |
|
}, |
|
{ |
|
"epoch": 1.7672778969766658, |
|
"grad_norm": 5.06584358215332, |
|
"learning_rate": 3.3239203637443983e-06, |
|
"loss": 1.8967, |
|
"step": 473000 |
|
}, |
|
{ |
|
"epoch": 1.7691460554301295, |
|
"grad_norm": 4.666677474975586, |
|
"learning_rate": 3.271517404347946e-06, |
|
"loss": 1.8351, |
|
"step": 473500 |
|
}, |
|
{ |
|
"epoch": 1.7710142138835931, |
|
"grad_norm": 5.4451823234558105, |
|
"learning_rate": 3.2195168369637765e-06, |
|
"loss": 1.8405, |
|
"step": 474000 |
|
}, |
|
{ |
|
"epoch": 1.7728823723370568, |
|
"grad_norm": 4.598884582519531, |
|
"learning_rate": 3.1679191093832883e-06, |
|
"loss": 1.8774, |
|
"step": 474500 |
|
}, |
|
{ |
|
"epoch": 1.7747505307905205, |
|
"grad_norm": 5.018040657043457, |
|
"learning_rate": 3.1167246659289217e-06, |
|
"loss": 1.8544, |
|
"step": 475000 |
|
}, |
|
{ |
|
"epoch": 1.7766186892439841, |
|
"grad_norm": 5.349071502685547, |
|
"learning_rate": 3.065933947450339e-06, |
|
"loss": 1.8779, |
|
"step": 475500 |
|
}, |
|
{ |
|
"epoch": 1.778486847697448, |
|
"grad_norm": 4.253110408782959, |
|
"learning_rate": 3.015547391320589e-06, |
|
"loss": 1.8161, |
|
"step": 476000 |
|
}, |
|
{ |
|
"epoch": 1.7803550061509117, |
|
"grad_norm": 3.6783599853515625, |
|
"learning_rate": 2.9655654314323655e-06, |
|
"loss": 1.8395, |
|
"step": 476500 |
|
}, |
|
{ |
|
"epoch": 1.7822231646043756, |
|
"grad_norm": 4.650113582611084, |
|
"learning_rate": 2.916185998547194e-06, |
|
"loss": 1.8573, |
|
"step": 477000 |
|
}, |
|
{ |
|
"epoch": 1.7840913230578392, |
|
"grad_norm": 4.785963535308838, |
|
"learning_rate": 2.8670128962200117e-06, |
|
"loss": 1.839, |
|
"step": 477500 |
|
}, |
|
{ |
|
"epoch": 1.785959481511303, |
|
"grad_norm": 4.258472442626953, |
|
"learning_rate": 2.818245669206393e-06, |
|
"loss": 1.8937, |
|
"step": 478000 |
|
}, |
|
{ |
|
"epoch": 1.7878276399647666, |
|
"grad_norm": 5.702148914337158, |
|
"learning_rate": 2.7698847374545255e-06, |
|
"loss": 1.8767, |
|
"step": 478500 |
|
}, |
|
{ |
|
"epoch": 1.7896957984182302, |
|
"grad_norm": 5.909474849700928, |
|
"learning_rate": 2.7219305174139067e-06, |
|
"loss": 1.8927, |
|
"step": 479000 |
|
}, |
|
{ |
|
"epoch": 1.791563956871694, |
|
"grad_norm": 4.348086357116699, |
|
"learning_rate": 2.6743834220317286e-06, |
|
"loss": 1.8478, |
|
"step": 479500 |
|
}, |
|
{ |
|
"epoch": 1.7934321153251576, |
|
"grad_norm": 4.148903846740723, |
|
"learning_rate": 2.62724386074929e-06, |
|
"loss": 1.855, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.7953002737786212, |
|
"grad_norm": 4.32988977432251, |
|
"learning_rate": 2.580512239498528e-06, |
|
"loss": 1.8551, |
|
"step": 480500 |
|
}, |
|
{ |
|
"epoch": 1.7971684322320851, |
|
"grad_norm": 4.866036415100098, |
|
"learning_rate": 2.534188960698475e-06, |
|
"loss": 1.8938, |
|
"step": 481000 |
|
}, |
|
{ |
|
"epoch": 1.7990365906855488, |
|
"grad_norm": 4.053302764892578, |
|
"learning_rate": 2.4883658441394673e-06, |
|
"loss": 1.8759, |
|
"step": 481500 |
|
}, |
|
{ |
|
"epoch": 1.8009047491390124, |
|
"grad_norm": 5.242681980133057, |
|
"learning_rate": 2.4428596247633885e-06, |
|
"loss": 1.8914, |
|
"step": 482000 |
|
}, |
|
{ |
|
"epoch": 1.8027729075924763, |
|
"grad_norm": 5.018854141235352, |
|
"learning_rate": 2.3977629332031404e-06, |
|
"loss": 1.8592, |
|
"step": 482500 |
|
}, |
|
{ |
|
"epoch": 1.80464106604594, |
|
"grad_norm": 4.828859329223633, |
|
"learning_rate": 2.3530761577989e-06, |
|
"loss": 1.8676, |
|
"step": 483000 |
|
}, |
|
{ |
|
"epoch": 1.8065092244994037, |
|
"grad_norm": 3.3137731552124023, |
|
"learning_rate": 2.3088878265754845e-06, |
|
"loss": 1.8182, |
|
"step": 483500 |
|
}, |
|
{ |
|
"epoch": 1.8083773829528673, |
|
"grad_norm": 6.416788101196289, |
|
"learning_rate": 2.2650212126383242e-06, |
|
"loss": 1.8656, |
|
"step": 484000 |
|
}, |
|
{ |
|
"epoch": 1.810245541406331, |
|
"grad_norm": 4.340769290924072, |
|
"learning_rate": 2.2215656579332167e-06, |
|
"loss": 1.9075, |
|
"step": 484500 |
|
}, |
|
{ |
|
"epoch": 1.8121136998597946, |
|
"grad_norm": 4.634076118469238, |
|
"learning_rate": 2.17852153666806e-06, |
|
"loss": 1.8799, |
|
"step": 485000 |
|
}, |
|
{ |
|
"epoch": 1.8139818583132583, |
|
"grad_norm": 4.349535942077637, |
|
"learning_rate": 2.1359740729170296e-06, |
|
"loss": 1.8522, |
|
"step": 485500 |
|
}, |
|
{ |
|
"epoch": 1.815850016766722, |
|
"grad_norm": 4.439642429351807, |
|
"learning_rate": 2.0937531022739987e-06, |
|
"loss": 1.8578, |
|
"step": 486000 |
|
}, |
|
{ |
|
"epoch": 1.8177181752201859, |
|
"grad_norm": 4.639336585998535, |
|
"learning_rate": 2.051944665700545e-06, |
|
"loss": 1.883, |
|
"step": 486500 |
|
}, |
|
{ |
|
"epoch": 1.8195863336736495, |
|
"grad_norm": 4.625245571136475, |
|
"learning_rate": 2.010549123220773e-06, |
|
"loss": 1.8886, |
|
"step": 487000 |
|
}, |
|
{ |
|
"epoch": 1.8214544921271132, |
|
"grad_norm": 4.0239667892456055, |
|
"learning_rate": 1.9696483832278845e-06, |
|
"loss": 1.8653, |
|
"step": 487500 |
|
}, |
|
{ |
|
"epoch": 1.823322650580577, |
|
"grad_norm": 4.363647937774658, |
|
"learning_rate": 1.92907886722582e-06, |
|
"loss": 1.8718, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 1.8251908090340407, |
|
"grad_norm": 4.025300025939941, |
|
"learning_rate": 1.8889233033491493e-06, |
|
"loss": 1.8352, |
|
"step": 488500 |
|
}, |
|
{ |
|
"epoch": 1.8270589674875044, |
|
"grad_norm": 6.883707046508789, |
|
"learning_rate": 1.8491820373886358e-06, |
|
"loss": 1.9056, |
|
"step": 489000 |
|
}, |
|
{ |
|
"epoch": 1.828927125940968, |
|
"grad_norm": 5.169373512268066, |
|
"learning_rate": 1.8098554115674292e-06, |
|
"loss": 1.8994, |
|
"step": 489500 |
|
}, |
|
{ |
|
"epoch": 1.8307952843944317, |
|
"grad_norm": 5.691972255706787, |
|
"learning_rate": 1.7710985840431572e-06, |
|
"loss": 1.8602, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 1.8326634428478954, |
|
"grad_norm": 4.719027042388916, |
|
"learning_rate": 1.7326005889664986e-06, |
|
"loss": 1.8645, |
|
"step": 490500 |
|
}, |
|
{ |
|
"epoch": 1.834531601301359, |
|
"grad_norm": 5.3066816329956055, |
|
"learning_rate": 1.6945182379445534e-06, |
|
"loss": 1.879, |
|
"step": 491000 |
|
}, |
|
{ |
|
"epoch": 1.8363997597548227, |
|
"grad_norm": 5.338113307952881, |
|
"learning_rate": 1.6568518589150705e-06, |
|
"loss": 1.8811, |
|
"step": 491500 |
|
}, |
|
{ |
|
"epoch": 1.8382679182082866, |
|
"grad_norm": 3.351616382598877, |
|
"learning_rate": 1.61960177623377e-06, |
|
"loss": 1.8459, |
|
"step": 492000 |
|
}, |
|
{ |
|
"epoch": 1.8401360766617503, |
|
"grad_norm": 5.075439929962158, |
|
"learning_rate": 1.5827683106715008e-06, |
|
"loss": 1.8515, |
|
"step": 492500 |
|
}, |
|
{ |
|
"epoch": 1.8420042351152142, |
|
"grad_norm": 4.089956283569336, |
|
"learning_rate": 1.5463517794115367e-06, |
|
"loss": 1.8624, |
|
"step": 493000 |
|
}, |
|
{ |
|
"epoch": 1.8438723935686778, |
|
"grad_norm": 6.492163181304932, |
|
"learning_rate": 1.5103524960467908e-06, |
|
"loss": 1.8245, |
|
"step": 493500 |
|
}, |
|
{ |
|
"epoch": 1.8457405520221415, |
|
"grad_norm": 6.452279567718506, |
|
"learning_rate": 1.4748415171010387e-06, |
|
"loss": 1.8406, |
|
"step": 494000 |
|
}, |
|
{ |
|
"epoch": 1.8476087104756052, |
|
"grad_norm": 3.7838053703308105, |
|
"learning_rate": 1.4396768198986554e-06, |
|
"loss": 1.8508, |
|
"step": 494500 |
|
}, |
|
{ |
|
"epoch": 1.8494768689290688, |
|
"grad_norm": 3.706258535385132, |
|
"learning_rate": 1.4049302891993631e-06, |
|
"loss": 1.8484, |
|
"step": 495000 |
|
}, |
|
{ |
|
"epoch": 1.8513450273825325, |
|
"grad_norm": 4.734787940979004, |
|
"learning_rate": 1.3706022242152227e-06, |
|
"loss": 1.8616, |
|
"step": 495500 |
|
}, |
|
{ |
|
"epoch": 1.8532131858359961, |
|
"grad_norm": 5.525266170501709, |
|
"learning_rate": 1.336760321043634e-06, |
|
"loss": 1.8696, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 1.8550813442894598, |
|
"grad_norm": 3.555717706680298, |
|
"learning_rate": 1.3032692323137307e-06, |
|
"loss": 1.8539, |
|
"step": 496500 |
|
}, |
|
{ |
|
"epoch": 1.8569495027429237, |
|
"grad_norm": 4.906459331512451, |
|
"learning_rate": 1.2701974847307452e-06, |
|
"loss": 1.8555, |
|
"step": 497000 |
|
}, |
|
{ |
|
"epoch": 1.8588176611963874, |
|
"grad_norm": 5.703590393066406, |
|
"learning_rate": 1.2375453630847134e-06, |
|
"loss": 1.8088, |
|
"step": 497500 |
|
}, |
|
{ |
|
"epoch": 1.860685819649851, |
|
"grad_norm": 4.265283107757568, |
|
"learning_rate": 1.2053771937288626e-06, |
|
"loss": 1.8823, |
|
"step": 498000 |
|
}, |
|
{ |
|
"epoch": 1.862553978103315, |
|
"grad_norm": 4.899601936340332, |
|
"learning_rate": 1.1735643232264836e-06, |
|
"loss": 1.8687, |
|
"step": 498500 |
|
}, |
|
{ |
|
"epoch": 1.8644221365567786, |
|
"grad_norm": 4.975470542907715, |
|
"learning_rate": 1.1422342758236281e-06, |
|
"loss": 1.871, |
|
"step": 499000 |
|
}, |
|
{ |
|
"epoch": 1.8662902950102422, |
|
"grad_norm": 4.806349754333496, |
|
"learning_rate": 1.1112617500700973e-06, |
|
"loss": 1.8244, |
|
"step": 499500 |
|
}, |
|
{ |
|
"epoch": 1.868158453463706, |
|
"grad_norm": 5.105782508850098, |
|
"learning_rate": 1.0807102188935214e-06, |
|
"loss": 1.8867, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 1.8700266119171696, |
|
"grad_norm": 5.97845458984375, |
|
"learning_rate": 1.050579945381669e-06, |
|
"loss": 1.8339, |
|
"step": 500500 |
|
}, |
|
{ |
|
"epoch": 1.8718947703706332, |
|
"grad_norm": 4.778586387634277, |
|
"learning_rate": 1.0208711889947376e-06, |
|
"loss": 1.8423, |
|
"step": 501000 |
|
}, |
|
{ |
|
"epoch": 1.873762928824097, |
|
"grad_norm": 4.4693169593811035, |
|
"learning_rate": 9.915842055631286e-07, |
|
"loss": 1.8629, |
|
"step": 501500 |
|
}, |
|
{ |
|
"epoch": 1.8756310872775606, |
|
"grad_norm": 5.0336222648620605, |
|
"learning_rate": 9.62719247285221e-07, |
|
"loss": 1.8386, |
|
"step": 502000 |
|
}, |
|
{ |
|
"epoch": 1.8774992457310244, |
|
"grad_norm": 4.51587438583374, |
|
"learning_rate": 9.342765627252504e-07, |
|
"loss": 1.8566, |
|
"step": 502500 |
|
}, |
|
{ |
|
"epoch": 1.879367404184488, |
|
"grad_norm": 4.207951068878174, |
|
"learning_rate": 9.062563968110948e-07, |
|
"loss": 1.8517, |
|
"step": 503000 |
|
}, |
|
{ |
|
"epoch": 1.8812355626379518, |
|
"grad_norm": 3.8609273433685303, |
|
"learning_rate": 8.787137635712206e-07, |
|
"loss": 1.8727, |
|
"step": 503500 |
|
}, |
|
{ |
|
"epoch": 1.8831037210914157, |
|
"grad_norm": 4.1626877784729, |
|
"learning_rate": 8.515385089467198e-07, |
|
"loss": 1.89, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 1.8849718795448793, |
|
"grad_norm": 3.9561331272125244, |
|
"learning_rate": 8.247864854485199e-07, |
|
"loss": 1.8863, |
|
"step": 504500 |
|
}, |
|
{ |
|
"epoch": 1.886840037998343, |
|
"grad_norm": 4.846907138824463, |
|
"learning_rate": 7.98457923445789e-07, |
|
"loss": 1.8208, |
|
"step": 505000 |
|
}, |
|
{ |
|
"epoch": 1.8887081964518067, |
|
"grad_norm": 4.7613911628723145, |
|
"learning_rate": 7.726044364189499e-07, |
|
"loss": 1.8515, |
|
"step": 505500 |
|
}, |
|
{ |
|
"epoch": 1.8905763549052703, |
|
"grad_norm": 5.021259307861328, |
|
"learning_rate": 7.47122625883645e-07, |
|
"loss": 1.8398, |
|
"step": 506000 |
|
}, |
|
{ |
|
"epoch": 1.892444513358734, |
|
"grad_norm": 6.04338264465332, |
|
"learning_rate": 7.220649456289641e-07, |
|
"loss": 1.8433, |
|
"step": 506500 |
|
}, |
|
{ |
|
"epoch": 1.8943126718121976, |
|
"grad_norm": 4.8739094734191895, |
|
"learning_rate": 6.974316114336077e-07, |
|
"loss": 1.8352, |
|
"step": 507000 |
|
}, |
|
{ |
|
"epoch": 1.8961808302656613, |
|
"grad_norm": 4.441490650177002, |
|
"learning_rate": 6.732708291258827e-07, |
|
"loss": 1.8887, |
|
"step": 507500 |
|
}, |
|
{ |
|
"epoch": 1.8980489887191252, |
|
"grad_norm": 3.811279058456421, |
|
"learning_rate": 6.494859700278133e-07, |
|
"loss": 1.8689, |
|
"step": 508000 |
|
}, |
|
{ |
|
"epoch": 1.8999171471725889, |
|
"grad_norm": 2.8529744148254395, |
|
"learning_rate": 6.26126081986883e-07, |
|
"loss": 1.9027, |
|
"step": 508500 |
|
}, |
|
{ |
|
"epoch": 1.9017853056260527, |
|
"grad_norm": 4.631827354431152, |
|
"learning_rate": 6.031913661616207e-07, |
|
"loss": 1.848, |
|
"step": 509000 |
|
}, |
|
{ |
|
"epoch": 1.9036534640795164, |
|
"grad_norm": 3.616713762283325, |
|
"learning_rate": 5.807266140930689e-07, |
|
"loss": 1.8911, |
|
"step": 509500 |
|
}, |
|
{ |
|
"epoch": 1.90552162253298, |
|
"grad_norm": 5.187899112701416, |
|
"learning_rate": 5.586419802097898e-07, |
|
"loss": 1.8309, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 1.9073897809864437, |
|
"grad_norm": 5.249440670013428, |
|
"learning_rate": 5.369830996666103e-07, |
|
"loss": 1.8542, |
|
"step": 510500 |
|
}, |
|
{ |
|
"epoch": 1.9092579394399074, |
|
"grad_norm": 5.117617607116699, |
|
"learning_rate": 5.157501589742042e-07, |
|
"loss": 1.8459, |
|
"step": 511000 |
|
}, |
|
{ |
|
"epoch": 1.911126097893371, |
|
"grad_norm": 5.904655456542969, |
|
"learning_rate": 4.949433409753679e-07, |
|
"loss": 1.8495, |
|
"step": 511500 |
|
}, |
|
{ |
|
"epoch": 1.9129942563468347, |
|
"grad_norm": 6.1428632736206055, |
|
"learning_rate": 4.7460316030914495e-07, |
|
"loss": 1.8274, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 1.9148624148002984, |
|
"grad_norm": 4.737666130065918, |
|
"learning_rate": 4.546482684189279e-07, |
|
"loss": 1.8814, |
|
"step": 512500 |
|
}, |
|
{ |
|
"epoch": 1.9167305732537623, |
|
"grad_norm": 5.555963516235352, |
|
"learning_rate": 4.351200253877141e-07, |
|
"loss": 1.8644, |
|
"step": 513000 |
|
}, |
|
{ |
|
"epoch": 1.918598731707226, |
|
"grad_norm": 4.281107425689697, |
|
"learning_rate": 4.160185993786592e-07, |
|
"loss": 1.8685, |
|
"step": 513500 |
|
}, |
|
{ |
|
"epoch": 1.9204668901606896, |
|
"grad_norm": 4.849224090576172, |
|
"learning_rate": 3.973441548794699e-07, |
|
"loss": 1.8921, |
|
"step": 514000 |
|
}, |
|
{ |
|
"epoch": 1.9223350486141535, |
|
"grad_norm": 5.799472332000732, |
|
"learning_rate": 3.791329209122674e-07, |
|
"loss": 1.8326, |
|
"step": 514500 |
|
}, |
|
{ |
|
"epoch": 1.9242032070676172, |
|
"grad_norm": 5.754580020904541, |
|
"learning_rate": 3.613120634338663e-07, |
|
"loss": 1.8677, |
|
"step": 515000 |
|
}, |
|
{ |
|
"epoch": 1.9260713655210808, |
|
"grad_norm": 4.404658317565918, |
|
"learning_rate": 3.4391865855858406e-07, |
|
"loss": 1.8637, |
|
"step": 515500 |
|
}, |
|
{ |
|
"epoch": 1.9279395239745445, |
|
"grad_norm": 4.911507606506348, |
|
"learning_rate": 3.2695285606589856e-07, |
|
"loss": 1.85, |
|
"step": 516000 |
|
}, |
|
{ |
|
"epoch": 1.9298076824280082, |
|
"grad_norm": 4.071664333343506, |
|
"learning_rate": 3.1044745117284056e-07, |
|
"loss": 1.8303, |
|
"step": 516500 |
|
}, |
|
{ |
|
"epoch": 1.9316758408814718, |
|
"grad_norm": 5.3374223709106445, |
|
"learning_rate": 2.9433643213220284e-07, |
|
"loss": 1.8384, |
|
"step": 517000 |
|
}, |
|
{ |
|
"epoch": 1.9335439993349355, |
|
"grad_norm": 5.541077613830566, |
|
"learning_rate": 2.7865344244054625e-07, |
|
"loss": 1.8562, |
|
"step": 517500 |
|
}, |
|
{ |
|
"epoch": 1.9354121577883991, |
|
"grad_norm": 4.992559432983398, |
|
"learning_rate": 2.6339861714849144e-07, |
|
"loss": 1.8563, |
|
"step": 518000 |
|
}, |
|
{ |
|
"epoch": 1.937280316241863, |
|
"grad_norm": 3.9907846450805664, |
|
"learning_rate": 2.486013131539955e-07, |
|
"loss": 1.8736, |
|
"step": 518500 |
|
}, |
|
{ |
|
"epoch": 1.9391484746953267, |
|
"grad_norm": 3.9517438411712646, |
|
"learning_rate": 2.3420235009178893e-07, |
|
"loss": 1.859, |
|
"step": 519000 |
|
}, |
|
{ |
|
"epoch": 1.9410166331487904, |
|
"grad_norm": 4.987946510314941, |
|
"learning_rate": 2.2023193420994125e-07, |
|
"loss": 1.8258, |
|
"step": 519500 |
|
}, |
|
{ |
|
"epoch": 1.9428847916022542, |
|
"grad_norm": 4.550879955291748, |
|
"learning_rate": 2.0669018581160883e-07, |
|
"loss": 1.8678, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.944752950055718, |
|
"grad_norm": 3.339261293411255, |
|
"learning_rate": 1.936030194349736e-07, |
|
"loss": 1.8278, |
|
"step": 520500 |
|
}, |
|
{ |
|
"epoch": 1.9466211085091816, |
|
"grad_norm": 5.5620951652526855, |
|
"learning_rate": 1.8091809424235495e-07, |
|
"loss": 1.8996, |
|
"step": 521000 |
|
}, |
|
{ |
|
"epoch": 1.9484892669626452, |
|
"grad_norm": 3.614462375640869, |
|
"learning_rate": 1.6866217507570114e-07, |
|
"loss": 1.8478, |
|
"step": 521500 |
|
}, |
|
{ |
|
"epoch": 1.950357425416109, |
|
"grad_norm": 4.48366117477417, |
|
"learning_rate": 1.5683536747416184e-07, |
|
"loss": 1.8555, |
|
"step": 522000 |
|
}, |
|
{ |
|
"epoch": 1.9522255838695726, |
|
"grad_norm": 5.737336158752441, |
|
"learning_rate": 1.454601400492306e-07, |
|
"loss": 1.8463, |
|
"step": 522500 |
|
}, |
|
{ |
|
"epoch": 1.9540937423230362, |
|
"grad_norm": 3.779061794281006, |
|
"learning_rate": 1.3449099869505266e-07, |
|
"loss": 1.8293, |
|
"step": 523000 |
|
}, |
|
{ |
|
"epoch": 1.9559619007765, |
|
"grad_norm": 5.098133087158203, |
|
"learning_rate": 1.239512631635298e-07, |
|
"loss": 1.8594, |
|
"step": 523500 |
|
}, |
|
{ |
|
"epoch": 1.9578300592299638, |
|
"grad_norm": 4.416299343109131, |
|
"learning_rate": 1.1384102421526654e-07, |
|
"loss": 1.8593, |
|
"step": 524000 |
|
}, |
|
{ |
|
"epoch": 1.9596982176834274, |
|
"grad_norm": 3.656932830810547, |
|
"learning_rate": 1.0417930144245858e-07, |
|
"loss": 1.836, |
|
"step": 524500 |
|
}, |
|
{ |
|
"epoch": 1.9615663761368913, |
|
"grad_norm": 5.132260322570801, |
|
"learning_rate": 9.492745373296808e-08, |
|
"loss": 1.8943, |
|
"step": 525000 |
|
}, |
|
{ |
|
"epoch": 1.963434534590355, |
|
"grad_norm": 4.663350582122803, |
|
"learning_rate": 8.61053525388622e-08, |
|
"loss": 1.8534, |
|
"step": 525500 |
|
}, |
|
{ |
|
"epoch": 1.9653026930438187, |
|
"grad_norm": 6.682803153991699, |
|
"learning_rate": 7.77130738297216e-08, |
|
"loss": 1.8735, |
|
"step": 526000 |
|
}, |
|
{ |
|
"epoch": 1.9671708514972823, |
|
"grad_norm": 6.555516719818115, |
|
"learning_rate": 6.976618556056025e-08, |
|
"loss": 1.88, |
|
"step": 526500 |
|
}, |
|
{ |
|
"epoch": 1.969039009950746, |
|
"grad_norm": 5.245980739593506, |
|
"learning_rate": 6.223290493156397e-08, |
|
"loss": 1.8565, |
|
"step": 527000 |
|
}, |
|
{ |
|
"epoch": 1.9709071684042097, |
|
"grad_norm": 3.9505879878997803, |
|
"learning_rate": 5.512965235983658e-08, |
|
"loss": 1.8449, |
|
"step": 527500 |
|
}, |
|
{ |
|
"epoch": 1.9727753268576733, |
|
"grad_norm": 6.470322132110596, |
|
"learning_rate": 4.8456489013481986e-08, |
|
"loss": 1.8588, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 1.974643485311137, |
|
"grad_norm": 5.629650592803955, |
|
"learning_rate": 4.221347235697226e-08, |
|
"loss": 1.8839, |
|
"step": 528500 |
|
}, |
|
{ |
|
"epoch": 1.9765116437646009, |
|
"grad_norm": 3.961327075958252, |
|
"learning_rate": 3.6411852409129475e-08, |
|
"loss": 1.8824, |
|
"step": 529000 |
|
}, |
|
{ |
|
"epoch": 1.9783798022180645, |
|
"grad_norm": 4.475338935852051, |
|
"learning_rate": 3.1028426160295554e-08, |
|
"loss": 1.8725, |
|
"step": 529500 |
|
}, |
|
{ |
|
"epoch": 1.9802479606715282, |
|
"grad_norm": 6.577774524688721, |
|
"learning_rate": 2.607529667921771e-08, |
|
"loss": 1.8575, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 1.982116119124992, |
|
"grad_norm": 6.510643005371094, |
|
"learning_rate": 2.1552506618677248e-08, |
|
"loss": 1.8503, |
|
"step": 530500 |
|
}, |
|
{ |
|
"epoch": 1.9839842775784557, |
|
"grad_norm": 4.923540115356445, |
|
"learning_rate": 1.746785020741437e-08, |
|
"loss": 1.8607, |
|
"step": 531000 |
|
}, |
|
{ |
|
"epoch": 1.9858524360319194, |
|
"grad_norm": 4.267704486846924, |
|
"learning_rate": 1.3804991262938994e-08, |
|
"loss": 1.8248, |
|
"step": 531500 |
|
}, |
|
{ |
|
"epoch": 1.987720594485383, |
|
"grad_norm": 5.18399715423584, |
|
"learning_rate": 1.0572577402029326e-08, |
|
"loss": 1.8468, |
|
"step": 532000 |
|
}, |
|
{ |
|
"epoch": 1.9895887529388467, |
|
"grad_norm": 4.5753045082092285, |
|
"learning_rate": 7.770636459902836e-09, |
|
"loss": 1.8354, |
|
"step": 532500 |
|
}, |
|
{ |
|
"epoch": 1.9914569113923104, |
|
"grad_norm": 4.492304801940918, |
|
"learning_rate": 5.403505802398234e-09, |
|
"loss": 1.8668, |
|
"step": 533000 |
|
}, |
|
{ |
|
"epoch": 1.993325069845774, |
|
"grad_norm": 6.207240104675293, |
|
"learning_rate": 3.461718322739227e-09, |
|
"loss": 1.8532, |
|
"step": 533500 |
|
}, |
|
{ |
|
"epoch": 1.9951932282992377, |
|
"grad_norm": 6.569146156311035, |
|
"learning_rate": 1.9504649954538156e-09, |
|
"loss": 1.8313, |
|
"step": 534000 |
|
}, |
|
{ |
|
"epoch": 1.9970613867527016, |
|
"grad_norm": 3.274258852005005, |
|
"learning_rate": 8.69758834370904e-10, |
|
"loss": 1.9104, |
|
"step": 534500 |
|
}, |
|
{ |
|
"epoch": 1.9989295452061653, |
|
"grad_norm": 4.226444721221924, |
|
"learning_rate": 2.2047974543304427e-10, |
|
"loss": 1.8681, |
|
"step": 535000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 535286, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.4321334103279616e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|