|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.7499861462585662, |
|
"eval_steps": 500, |
|
"global_step": 20301, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0007388662098010602, |
|
"grad_norm": 12.062633265062304, |
|
"learning_rate": 4.999999984845559e-07, |
|
"loss": 1.9234, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0014777324196021205, |
|
"grad_norm": 7.366383713647709, |
|
"learning_rate": 4.999993938226169e-07, |
|
"loss": 1.8338, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.002216598629403181, |
|
"grad_norm": 1.7977538453913595, |
|
"learning_rate": 4.999975752937336e-07, |
|
"loss": 1.7526, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.002955464839204241, |
|
"grad_norm": 1.6391792506527756, |
|
"learning_rate": 4.999945444231491e-07, |
|
"loss": 1.7305, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0036943310490053015, |
|
"grad_norm": 2.046829887141379, |
|
"learning_rate": 4.999903012271942e-07, |
|
"loss": 1.741, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.004433197258806362, |
|
"grad_norm": 1.8506603870837903, |
|
"learning_rate": 4.999848457287324e-07, |
|
"loss": 1.7129, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.005172063468607422, |
|
"grad_norm": 1.532512758048568, |
|
"learning_rate": 4.999781779571592e-07, |
|
"loss": 1.6774, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.005910929678408482, |
|
"grad_norm": 1.5744798799071549, |
|
"learning_rate": 4.999702979484023e-07, |
|
"loss": 1.7007, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.006649795888209542, |
|
"grad_norm": 3.3220978847027203, |
|
"learning_rate": 4.999612057449209e-07, |
|
"loss": 1.713, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.007388662098010603, |
|
"grad_norm": 1.6192232490789547, |
|
"learning_rate": 4.999509013957061e-07, |
|
"loss": 1.7085, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.008127528307811663, |
|
"grad_norm": 1.6102605979486695, |
|
"learning_rate": 4.999393849562803e-07, |
|
"loss": 1.6909, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.008866394517612723, |
|
"grad_norm": 1.7856392499507323, |
|
"learning_rate": 4.999266564886968e-07, |
|
"loss": 1.7105, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.009605260727413783, |
|
"grad_norm": 1.462292806282488, |
|
"learning_rate": 4.999127160615396e-07, |
|
"loss": 1.7254, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.010344126937214844, |
|
"grad_norm": 1.6210572507919945, |
|
"learning_rate": 4.998975637499234e-07, |
|
"loss": 1.7228, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.011082993147015904, |
|
"grad_norm": 1.5952635257667038, |
|
"learning_rate": 4.998811996354924e-07, |
|
"loss": 1.747, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.011821859356816964, |
|
"grad_norm": 1.7849552493640788, |
|
"learning_rate": 4.998636238064202e-07, |
|
"loss": 1.6851, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.012560725566618025, |
|
"grad_norm": 1.7748576256781579, |
|
"learning_rate": 4.9984483635741e-07, |
|
"loss": 1.7215, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.013299591776419085, |
|
"grad_norm": 1.5217631531152356, |
|
"learning_rate": 4.998248373896929e-07, |
|
"loss": 1.7062, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.014038457986220144, |
|
"grad_norm": 1.8070743437219547, |
|
"learning_rate": 4.998036270110284e-07, |
|
"loss": 1.7108, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.014777324196021206, |
|
"grad_norm": 1.931037650128842, |
|
"learning_rate": 4.997812053357031e-07, |
|
"loss": 1.6739, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.015516190405822266, |
|
"grad_norm": 2.3425787137976073, |
|
"learning_rate": 4.997575724845303e-07, |
|
"loss": 1.6501, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.016255056615623327, |
|
"grad_norm": 1.5819249160473718, |
|
"learning_rate": 4.997327285848497e-07, |
|
"loss": 1.7295, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.016993922825424387, |
|
"grad_norm": 1.780767344095751, |
|
"learning_rate": 4.997066737705263e-07, |
|
"loss": 1.7035, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.017732789035225446, |
|
"grad_norm": 1.502517772930168, |
|
"learning_rate": 4.996794081819497e-07, |
|
"loss": 1.72, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.018471655245026506, |
|
"grad_norm": 1.6230104328728192, |
|
"learning_rate": 4.996509319660336e-07, |
|
"loss": 1.7052, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.019210521454827566, |
|
"grad_norm": 2.5063386321134287, |
|
"learning_rate": 4.996212452762147e-07, |
|
"loss": 1.7111, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.01994938766462863, |
|
"grad_norm": 1.508194569170525, |
|
"learning_rate": 4.995903482724523e-07, |
|
"loss": 1.7116, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.02068825387442969, |
|
"grad_norm": 1.5662589803980058, |
|
"learning_rate": 4.995582411212267e-07, |
|
"loss": 1.6586, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.021427120084230748, |
|
"grad_norm": 1.5479171922961865, |
|
"learning_rate": 4.995249239955392e-07, |
|
"loss": 1.6605, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.022165986294031808, |
|
"grad_norm": 1.4441787150001577, |
|
"learning_rate": 4.994903970749107e-07, |
|
"loss": 1.6952, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.022904852503832868, |
|
"grad_norm": 1.8034952536565763, |
|
"learning_rate": 4.994546605453804e-07, |
|
"loss": 1.6928, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.023643718713633927, |
|
"grad_norm": 2.0505792045813123, |
|
"learning_rate": 4.994177145995056e-07, |
|
"loss": 1.6979, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.02438258492343499, |
|
"grad_norm": 1.7345451000474756, |
|
"learning_rate": 4.993795594363599e-07, |
|
"loss": 1.6827, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.02512145113323605, |
|
"grad_norm": 1.7265664949693813, |
|
"learning_rate": 4.993401952615327e-07, |
|
"loss": 1.6949, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.02586031734303711, |
|
"grad_norm": 1.5684431888117931, |
|
"learning_rate": 4.992996222871278e-07, |
|
"loss": 1.6725, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02659918355283817, |
|
"grad_norm": 1.8458741005435486, |
|
"learning_rate": 4.992578407317622e-07, |
|
"loss": 1.6876, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.02733804976263923, |
|
"grad_norm": 1.603183220486937, |
|
"learning_rate": 4.992148508205652e-07, |
|
"loss": 1.7001, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.02807691597244029, |
|
"grad_norm": 1.4656870216667528, |
|
"learning_rate": 4.991706527851766e-07, |
|
"loss": 1.6743, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.028815782182241352, |
|
"grad_norm": 1.79341933069724, |
|
"learning_rate": 4.991252468637465e-07, |
|
"loss": 1.6894, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.029554648392042412, |
|
"grad_norm": 1.4496770314789245, |
|
"learning_rate": 4.990786333009329e-07, |
|
"loss": 1.7038, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.03029351460184347, |
|
"grad_norm": 1.757004570982493, |
|
"learning_rate": 4.990308123479012e-07, |
|
"loss": 1.7134, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.03103238081164453, |
|
"grad_norm": 1.5200364379437228, |
|
"learning_rate": 4.98981784262322e-07, |
|
"loss": 1.6698, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.03177124702144559, |
|
"grad_norm": 1.486100216095798, |
|
"learning_rate": 4.989315493083708e-07, |
|
"loss": 1.6896, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.032510113231246654, |
|
"grad_norm": 1.6006604995588511, |
|
"learning_rate": 4.988801077567258e-07, |
|
"loss": 1.6842, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.03324897944104771, |
|
"grad_norm": 1.6369118826080298, |
|
"learning_rate": 4.988274598845665e-07, |
|
"loss": 1.7129, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.03398784565084877, |
|
"grad_norm": 1.594714153238538, |
|
"learning_rate": 4.987736059755724e-07, |
|
"loss": 1.6812, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.03472671186064983, |
|
"grad_norm": 1.691349253313934, |
|
"learning_rate": 4.987185463199215e-07, |
|
"loss": 1.7131, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.03546557807045089, |
|
"grad_norm": 2.1466962990805385, |
|
"learning_rate": 4.986622812142888e-07, |
|
"loss": 1.7217, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.036204444280251956, |
|
"grad_norm": 1.6551865204895997, |
|
"learning_rate": 4.986048109618442e-07, |
|
"loss": 1.7179, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.03694331049005301, |
|
"grad_norm": 1.5681769699914139, |
|
"learning_rate": 4.985461358722514e-07, |
|
"loss": 1.6897, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.037682176699854075, |
|
"grad_norm": 1.5623589901869384, |
|
"learning_rate": 4.984862562616661e-07, |
|
"loss": 1.7307, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.03842104290965513, |
|
"grad_norm": 1.710638101923504, |
|
"learning_rate": 4.984251724527342e-07, |
|
"loss": 1.6815, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.039159909119456195, |
|
"grad_norm": 2.613860202511964, |
|
"learning_rate": 4.983628847745904e-07, |
|
"loss": 1.6798, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.03989877532925726, |
|
"grad_norm": 1.9956576024499864, |
|
"learning_rate": 4.982993935628554e-07, |
|
"loss": 1.6715, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.040637641539058314, |
|
"grad_norm": 1.833965747583207, |
|
"learning_rate": 4.982346991596356e-07, |
|
"loss": 1.7044, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.04137650774885938, |
|
"grad_norm": 1.709065067682895, |
|
"learning_rate": 4.981688019135202e-07, |
|
"loss": 1.6612, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.04211537395866043, |
|
"grad_norm": 2.2904499102757767, |
|
"learning_rate": 4.981017021795794e-07, |
|
"loss": 1.6984, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.042854240168461497, |
|
"grad_norm": 1.5809892282131641, |
|
"learning_rate": 4.980334003193632e-07, |
|
"loss": 1.672, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.04359310637826255, |
|
"grad_norm": 1.4895408854624943, |
|
"learning_rate": 4.979638967008983e-07, |
|
"loss": 1.6637, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.044331972588063616, |
|
"grad_norm": 1.6294048072820626, |
|
"learning_rate": 4.978931916986874e-07, |
|
"loss": 1.6604, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.04507083879786468, |
|
"grad_norm": 2.156396711607377, |
|
"learning_rate": 4.978212856937062e-07, |
|
"loss": 1.678, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.045809705007665735, |
|
"grad_norm": 1.6010232675443634, |
|
"learning_rate": 4.977481790734016e-07, |
|
"loss": 1.6922, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.0465485712174668, |
|
"grad_norm": 1.4024504403885678, |
|
"learning_rate": 4.9767387223169e-07, |
|
"loss": 1.6538, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.047287437427267855, |
|
"grad_norm": 1.711902948101267, |
|
"learning_rate": 4.975983655689547e-07, |
|
"loss": 1.6844, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.04802630363706892, |
|
"grad_norm": 1.572788133497536, |
|
"learning_rate": 4.975216594920441e-07, |
|
"loss": 1.6773, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.04876516984686998, |
|
"grad_norm": 1.5865129712420638, |
|
"learning_rate": 4.974437544142691e-07, |
|
"loss": 1.6794, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.04950403605667104, |
|
"grad_norm": 1.5690014017001472, |
|
"learning_rate": 4.973646507554012e-07, |
|
"loss": 1.7073, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.0502429022664721, |
|
"grad_norm": 1.5937164094810738, |
|
"learning_rate": 4.972843489416702e-07, |
|
"loss": 1.6958, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.05098176847627316, |
|
"grad_norm": 1.5264901796499448, |
|
"learning_rate": 4.972028494057619e-07, |
|
"loss": 1.6452, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.05172063468607422, |
|
"grad_norm": 1.7268277514753942, |
|
"learning_rate": 4.971201525868155e-07, |
|
"loss": 1.6944, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.052459500895875276, |
|
"grad_norm": 1.6746090286211905, |
|
"learning_rate": 4.970362589304216e-07, |
|
"loss": 1.6621, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.05319836710567634, |
|
"grad_norm": 1.5009694467436718, |
|
"learning_rate": 4.969511688886198e-07, |
|
"loss": 1.6797, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.0539372333154774, |
|
"grad_norm": 1.4662106712988012, |
|
"learning_rate": 4.968648829198958e-07, |
|
"loss": 1.6664, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.05467609952527846, |
|
"grad_norm": 1.5749221565087543, |
|
"learning_rate": 4.967774014891796e-07, |
|
"loss": 1.7086, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.05541496573507952, |
|
"grad_norm": 1.482093097261866, |
|
"learning_rate": 4.966887250678421e-07, |
|
"loss": 1.7089, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.05615383194488058, |
|
"grad_norm": 1.520435320135513, |
|
"learning_rate": 4.965988541336936e-07, |
|
"loss": 1.6734, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.05689269815468164, |
|
"grad_norm": 1.5553430296104012, |
|
"learning_rate": 4.965077891709807e-07, |
|
"loss": 1.697, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.057631564364482704, |
|
"grad_norm": 1.3543792401342896, |
|
"learning_rate": 4.964155306703835e-07, |
|
"loss": 1.6997, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.05837043057428376, |
|
"grad_norm": 1.575031153656866, |
|
"learning_rate": 4.963220791290132e-07, |
|
"loss": 1.6916, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.059109296784084824, |
|
"grad_norm": 1.669401673230416, |
|
"learning_rate": 4.962274350504096e-07, |
|
"loss": 1.7042, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.05984816299388588, |
|
"grad_norm": 1.5212881661869584, |
|
"learning_rate": 4.961315989445378e-07, |
|
"loss": 1.667, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.06058702920368694, |
|
"grad_norm": 1.5762244565376538, |
|
"learning_rate": 4.960345713277863e-07, |
|
"loss": 1.6342, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.061325895413488006, |
|
"grad_norm": 1.5691787785228513, |
|
"learning_rate": 4.959363527229634e-07, |
|
"loss": 1.6525, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.06206476162328906, |
|
"grad_norm": 1.6437209138688083, |
|
"learning_rate": 4.958369436592948e-07, |
|
"loss": 1.6769, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.06280362783309013, |
|
"grad_norm": 1.553888728962609, |
|
"learning_rate": 4.957363446724208e-07, |
|
"loss": 1.6924, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.06354249404289118, |
|
"grad_norm": 1.5365170274491486, |
|
"learning_rate": 4.956345563043933e-07, |
|
"loss": 1.6894, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.06428136025269224, |
|
"grad_norm": 1.5197453947387185, |
|
"learning_rate": 4.955315791036727e-07, |
|
"loss": 1.6758, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.06502022646249331, |
|
"grad_norm": 2.464391338240643, |
|
"learning_rate": 4.954274136251251e-07, |
|
"loss": 1.6332, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.06575909267229436, |
|
"grad_norm": 1.6394501426047832, |
|
"learning_rate": 4.953220604300198e-07, |
|
"loss": 1.6879, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.06649795888209542, |
|
"grad_norm": 1.6810218422818062, |
|
"learning_rate": 4.952155200860251e-07, |
|
"loss": 1.6724, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.06723682509189649, |
|
"grad_norm": 1.6917107156739108, |
|
"learning_rate": 4.951077931672067e-07, |
|
"loss": 1.6826, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.06797569130169755, |
|
"grad_norm": 4.580444011220939, |
|
"learning_rate": 4.949988802540229e-07, |
|
"loss": 1.6581, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.0687145575114986, |
|
"grad_norm": 1.529682024928737, |
|
"learning_rate": 4.948887819333236e-07, |
|
"loss": 1.6649, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.06945342372129966, |
|
"grad_norm": 1.4414816212505979, |
|
"learning_rate": 4.947774987983449e-07, |
|
"loss": 1.6691, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.07019228993110073, |
|
"grad_norm": 1.9799877388187868, |
|
"learning_rate": 4.946650314487077e-07, |
|
"loss": 1.6937, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.07093115614090179, |
|
"grad_norm": 1.4952225950122013, |
|
"learning_rate": 4.945513804904131e-07, |
|
"loss": 1.6798, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.07167002235070284, |
|
"grad_norm": 1.5642244850204086, |
|
"learning_rate": 4.944365465358407e-07, |
|
"loss": 1.6783, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.07240888856050391, |
|
"grad_norm": 1.5048141774546024, |
|
"learning_rate": 4.943205302037432e-07, |
|
"loss": 1.6486, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.07314775477030497, |
|
"grad_norm": 1.4222420311354336, |
|
"learning_rate": 4.942033321192452e-07, |
|
"loss": 1.6868, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.07388662098010602, |
|
"grad_norm": 1.6893784971157513, |
|
"learning_rate": 4.940849529138383e-07, |
|
"loss": 1.6934, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0746254871899071, |
|
"grad_norm": 1.9605139373755291, |
|
"learning_rate": 4.939653932253786e-07, |
|
"loss": 1.6537, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.07536435339970815, |
|
"grad_norm": 1.6497175696745814, |
|
"learning_rate": 4.938446536980829e-07, |
|
"loss": 1.7022, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.0761032196095092, |
|
"grad_norm": 1.6258237906038047, |
|
"learning_rate": 4.93722734982525e-07, |
|
"loss": 1.6925, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.07684208581931026, |
|
"grad_norm": 1.5236446480879742, |
|
"learning_rate": 4.935996377356326e-07, |
|
"loss": 1.6418, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.07758095202911133, |
|
"grad_norm": 1.5958168322212294, |
|
"learning_rate": 4.934753626206837e-07, |
|
"loss": 1.7259, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.07831981823891239, |
|
"grad_norm": 2.45551144657574, |
|
"learning_rate": 4.933499103073029e-07, |
|
"loss": 1.7141, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.07905868444871345, |
|
"grad_norm": 1.519298595383626, |
|
"learning_rate": 4.932232814714576e-07, |
|
"loss": 1.6712, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.07979755065851452, |
|
"grad_norm": 1.6278461520508305, |
|
"learning_rate": 4.930954767954551e-07, |
|
"loss": 1.6784, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.08053641686831557, |
|
"grad_norm": 1.5199514109499472, |
|
"learning_rate": 4.92966496967938e-07, |
|
"loss": 1.6842, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.08127528307811663, |
|
"grad_norm": 1.8268617588637115, |
|
"learning_rate": 4.928363426838808e-07, |
|
"loss": 1.714, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.08201414928791768, |
|
"grad_norm": 1.671569089879459, |
|
"learning_rate": 4.927050146445867e-07, |
|
"loss": 1.6693, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.08275301549771875, |
|
"grad_norm": 1.4546842764348067, |
|
"learning_rate": 4.92572513557683e-07, |
|
"loss": 1.6724, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.08349188170751981, |
|
"grad_norm": 1.5602541654624753, |
|
"learning_rate": 4.924388401371179e-07, |
|
"loss": 1.6715, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.08423074791732087, |
|
"grad_norm": 1.6408350929881408, |
|
"learning_rate": 4.923039951031562e-07, |
|
"loss": 1.6538, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.08496961412712194, |
|
"grad_norm": 1.3547990923859226, |
|
"learning_rate": 4.921679791823761e-07, |
|
"loss": 1.6639, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.08570848033692299, |
|
"grad_norm": 1.536988279196407, |
|
"learning_rate": 4.92030793107664e-07, |
|
"loss": 1.6709, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.08644734654672405, |
|
"grad_norm": 1.4484585256339257, |
|
"learning_rate": 4.918924376182121e-07, |
|
"loss": 1.6517, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.0871862127565251, |
|
"grad_norm": 1.4965077209050879, |
|
"learning_rate": 4.917529134595135e-07, |
|
"loss": 1.6956, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.08792507896632618, |
|
"grad_norm": 1.858590784934109, |
|
"learning_rate": 4.916122213833584e-07, |
|
"loss": 1.6667, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.08866394517612723, |
|
"grad_norm": 1.6845854426029852, |
|
"learning_rate": 4.914703621478297e-07, |
|
"loss": 1.6392, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.08940281138592829, |
|
"grad_norm": 1.479499322660105, |
|
"learning_rate": 4.913273365172998e-07, |
|
"loss": 1.6323, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.09014167759572936, |
|
"grad_norm": 1.4475363138688357, |
|
"learning_rate": 4.911831452624253e-07, |
|
"loss": 1.655, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.09088054380553041, |
|
"grad_norm": 1.4410524177419237, |
|
"learning_rate": 4.910377891601439e-07, |
|
"loss": 1.6977, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.09161941001533147, |
|
"grad_norm": 1.512362377905178, |
|
"learning_rate": 4.908912689936697e-07, |
|
"loss": 1.6716, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.09235827622513254, |
|
"grad_norm": 1.8697344841744916, |
|
"learning_rate": 4.90743585552489e-07, |
|
"loss": 1.6694, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0930971424349336, |
|
"grad_norm": 1.5406884179833267, |
|
"learning_rate": 4.905947396323561e-07, |
|
"loss": 1.7013, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.09383600864473465, |
|
"grad_norm": 1.883868312926782, |
|
"learning_rate": 4.904447320352891e-07, |
|
"loss": 1.6438, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.09457487485453571, |
|
"grad_norm": 1.5063919396389938, |
|
"learning_rate": 4.902935635695655e-07, |
|
"loss": 1.6341, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.09531374106433678, |
|
"grad_norm": 2.5666944465723223, |
|
"learning_rate": 4.901412350497177e-07, |
|
"loss": 1.673, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.09605260727413784, |
|
"grad_norm": 1.6864017944187357, |
|
"learning_rate": 4.899877472965289e-07, |
|
"loss": 1.6532, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.09679147348393889, |
|
"grad_norm": 1.489838376943142, |
|
"learning_rate": 4.898331011370282e-07, |
|
"loss": 1.7006, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.09753033969373996, |
|
"grad_norm": 1.516224235445671, |
|
"learning_rate": 4.896772974044871e-07, |
|
"loss": 1.664, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.09826920590354102, |
|
"grad_norm": 1.4154240383552321, |
|
"learning_rate": 4.895203369384138e-07, |
|
"loss": 1.6749, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.09900807211334207, |
|
"grad_norm": 1.8653781111338754, |
|
"learning_rate": 4.893622205845498e-07, |
|
"loss": 1.6255, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.09974693832314313, |
|
"grad_norm": 1.6154619117281779, |
|
"learning_rate": 4.892029491948642e-07, |
|
"loss": 1.7121, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.1004858045329442, |
|
"grad_norm": 1.6240732568528131, |
|
"learning_rate": 4.890425236275502e-07, |
|
"loss": 1.687, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.10122467074274526, |
|
"grad_norm": 1.459326292962488, |
|
"learning_rate": 4.888809447470195e-07, |
|
"loss": 1.5967, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.10196353695254631, |
|
"grad_norm": 1.7582112558661527, |
|
"learning_rate": 4.887182134238989e-07, |
|
"loss": 1.7297, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.10270240316234738, |
|
"grad_norm": 1.7154567295687058, |
|
"learning_rate": 4.885543305350241e-07, |
|
"loss": 1.6881, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.10344126937214844, |
|
"grad_norm": 1.68486225816754, |
|
"learning_rate": 4.88389296963436e-07, |
|
"loss": 1.6351, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.1041801355819495, |
|
"grad_norm": 1.4658940210533413, |
|
"learning_rate": 4.882231135983757e-07, |
|
"loss": 1.6584, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.10491900179175055, |
|
"grad_norm": 1.3967168353938462, |
|
"learning_rate": 4.880557813352796e-07, |
|
"loss": 1.6811, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.10565786800155162, |
|
"grad_norm": 1.6648778148188543, |
|
"learning_rate": 4.878873010757747e-07, |
|
"loss": 1.6447, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.10639673421135268, |
|
"grad_norm": 1.6827360384506134, |
|
"learning_rate": 4.877176737276736e-07, |
|
"loss": 1.6671, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.10713560042115373, |
|
"grad_norm": 1.6125148782802161, |
|
"learning_rate": 4.875469002049697e-07, |
|
"loss": 1.6611, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.1078744666309548, |
|
"grad_norm": 3.1640996826552925, |
|
"learning_rate": 4.873749814278325e-07, |
|
"loss": 1.6914, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.10861333284075586, |
|
"grad_norm": 1.5756821875718683, |
|
"learning_rate": 4.87201918322602e-07, |
|
"loss": 1.6891, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.10935219905055692, |
|
"grad_norm": 1.508384464413988, |
|
"learning_rate": 4.870277118217844e-07, |
|
"loss": 1.6765, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.11009106526035799, |
|
"grad_norm": 1.8943879400046142, |
|
"learning_rate": 4.868523628640468e-07, |
|
"loss": 1.6718, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.11082993147015904, |
|
"grad_norm": 1.5476264075937183, |
|
"learning_rate": 4.86675872394212e-07, |
|
"loss": 1.6384, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1115687976799601, |
|
"grad_norm": 1.7120101891654744, |
|
"learning_rate": 4.864982413632537e-07, |
|
"loss": 1.66, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.11230766388976116, |
|
"grad_norm": 1.8834789513548644, |
|
"learning_rate": 4.863284363107887e-07, |
|
"loss": 1.6453, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.11304653009956223, |
|
"grad_norm": 1.6393861847878763, |
|
"learning_rate": 4.861485839441465e-07, |
|
"loss": 1.6914, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.11378539630936328, |
|
"grad_norm": 1.548505894649462, |
|
"learning_rate": 4.859675938575391e-07, |
|
"loss": 1.6513, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.11452426251916434, |
|
"grad_norm": 1.7314160899998987, |
|
"learning_rate": 4.857854670261854e-07, |
|
"loss": 1.6652, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.11526312872896541, |
|
"grad_norm": 1.6255645061866926, |
|
"learning_rate": 4.856022044314289e-07, |
|
"loss": 1.6825, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.11600199493876646, |
|
"grad_norm": 1.7047082936180922, |
|
"learning_rate": 4.854178070607332e-07, |
|
"loss": 1.6571, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.11674086114856752, |
|
"grad_norm": 1.5937691951508997, |
|
"learning_rate": 4.852322759076762e-07, |
|
"loss": 1.6796, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.11747972735836858, |
|
"grad_norm": 1.5581038553350461, |
|
"learning_rate": 4.850456119719448e-07, |
|
"loss": 1.6237, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.11821859356816965, |
|
"grad_norm": 1.5319442885899253, |
|
"learning_rate": 4.848578162593298e-07, |
|
"loss": 1.6507, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.1189574597779707, |
|
"grad_norm": 1.7452294652307094, |
|
"learning_rate": 4.846783629455789e-07, |
|
"loss": 1.6334, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.11969632598777176, |
|
"grad_norm": 1.6423452527210813, |
|
"learning_rate": 4.844883631840362e-07, |
|
"loss": 1.6591, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.12043519219757283, |
|
"grad_norm": 2.3138017105742277, |
|
"learning_rate": 4.842972346482019e-07, |
|
"loss": 1.6693, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.12117405840737389, |
|
"grad_norm": 1.5077648756938484, |
|
"learning_rate": 4.841049783679233e-07, |
|
"loss": 1.6486, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.12191292461717494, |
|
"grad_norm": 1.4711190983794034, |
|
"learning_rate": 4.839115953791238e-07, |
|
"loss": 1.6881, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.12265179082697601, |
|
"grad_norm": 4.058044242916531, |
|
"learning_rate": 4.837170867237982e-07, |
|
"loss": 1.6469, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.12339065703677707, |
|
"grad_norm": 1.8109757223352017, |
|
"learning_rate": 4.835214534500064e-07, |
|
"loss": 1.6912, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.12412952324657812, |
|
"grad_norm": 1.5112894099167034, |
|
"learning_rate": 4.83324696611868e-07, |
|
"loss": 1.6452, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.12486838945637918, |
|
"grad_norm": 1.7532693818843224, |
|
"learning_rate": 4.83126817269557e-07, |
|
"loss": 1.6158, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.12560725566618025, |
|
"grad_norm": 1.7433921276878421, |
|
"learning_rate": 4.829278164892951e-07, |
|
"loss": 1.6684, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.1263461218759813, |
|
"grad_norm": 1.499971805431214, |
|
"learning_rate": 4.827276953433474e-07, |
|
"loss": 1.6596, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.12708498808578236, |
|
"grad_norm": 1.5392331224579805, |
|
"learning_rate": 4.825264549100149e-07, |
|
"loss": 1.6411, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.12782385429558343, |
|
"grad_norm": 1.5289257318137572, |
|
"learning_rate": 4.823240962736303e-07, |
|
"loss": 1.6759, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.12856272050538448, |
|
"grad_norm": 1.5034439532563377, |
|
"learning_rate": 4.82120620524551e-07, |
|
"loss": 1.6405, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.12930158671518555, |
|
"grad_norm": 1.4978715454221503, |
|
"learning_rate": 4.81916028759154e-07, |
|
"loss": 1.6732, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.13004045292498662, |
|
"grad_norm": 1.45790640802375, |
|
"learning_rate": 4.817103220798296e-07, |
|
"loss": 1.6649, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.13077931913478766, |
|
"grad_norm": 1.5322708095688835, |
|
"learning_rate": 4.815035015949754e-07, |
|
"loss": 1.6588, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.13151818534458873, |
|
"grad_norm": 1.540513558070265, |
|
"learning_rate": 4.812955684189904e-07, |
|
"loss": 1.6718, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.1322570515543898, |
|
"grad_norm": 1.4880225438470713, |
|
"learning_rate": 4.810865236722692e-07, |
|
"loss": 1.6313, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.13299591776419084, |
|
"grad_norm": 1.4919528959671158, |
|
"learning_rate": 4.808763684811959e-07, |
|
"loss": 1.62, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.1337347839739919, |
|
"grad_norm": 1.6101194590431924, |
|
"learning_rate": 4.806651039781377e-07, |
|
"loss": 1.6933, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.13447365018379298, |
|
"grad_norm": 1.5722737602103793, |
|
"learning_rate": 4.804527313014392e-07, |
|
"loss": 1.6555, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.13521251639359402, |
|
"grad_norm": 1.647937670204523, |
|
"learning_rate": 4.802392515954161e-07, |
|
"loss": 1.6561, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.1359513826033951, |
|
"grad_norm": 1.6527027343392149, |
|
"learning_rate": 4.80024666010349e-07, |
|
"loss": 1.6747, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.13669024881319616, |
|
"grad_norm": 1.596151179002379, |
|
"learning_rate": 4.798089757024773e-07, |
|
"loss": 1.6602, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.1374291150229972, |
|
"grad_norm": 1.6359785367644735, |
|
"learning_rate": 4.795921818339928e-07, |
|
"loss": 1.7041, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.13816798123279828, |
|
"grad_norm": 1.5303851327334592, |
|
"learning_rate": 4.793742855730337e-07, |
|
"loss": 1.6921, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.13890684744259932, |
|
"grad_norm": 1.552833624004378, |
|
"learning_rate": 4.79155288093678e-07, |
|
"loss": 1.6646, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.1396457136524004, |
|
"grad_norm": 1.5328749650552398, |
|
"learning_rate": 4.789351905759377e-07, |
|
"loss": 1.671, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.14038457986220146, |
|
"grad_norm": 1.4637618775535644, |
|
"learning_rate": 4.787139942057513e-07, |
|
"loss": 1.6826, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.1411234460720025, |
|
"grad_norm": 1.456698106912096, |
|
"learning_rate": 4.784917001749791e-07, |
|
"loss": 1.7079, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.14186231228180357, |
|
"grad_norm": 1.4778158837226694, |
|
"learning_rate": 4.782683096813954e-07, |
|
"loss": 1.6673, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.14260117849160464, |
|
"grad_norm": 1.426517743754919, |
|
"learning_rate": 4.780438239286824e-07, |
|
"loss": 1.6327, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.14334004470140568, |
|
"grad_norm": 1.7717097070454197, |
|
"learning_rate": 4.77818244126424e-07, |
|
"loss": 1.6577, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.14407891091120675, |
|
"grad_norm": 1.6008901431845195, |
|
"learning_rate": 4.775915714900992e-07, |
|
"loss": 1.6493, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.14481777712100782, |
|
"grad_norm": 1.5377457534191892, |
|
"learning_rate": 4.773638072410752e-07, |
|
"loss": 1.6668, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.14555664333080887, |
|
"grad_norm": 1.9114280227385299, |
|
"learning_rate": 4.771349526066014e-07, |
|
"loss": 1.6925, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.14629550954060994, |
|
"grad_norm": 1.803899924444919, |
|
"learning_rate": 4.769050088198021e-07, |
|
"loss": 1.6775, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.147034375750411, |
|
"grad_norm": 1.5100721777601815, |
|
"learning_rate": 4.7667397711967037e-07, |
|
"loss": 1.6181, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.14777324196021205, |
|
"grad_norm": 1.4720945445766893, |
|
"learning_rate": 4.764418587510615e-07, |
|
"loss": 1.6607, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14851210817001312, |
|
"grad_norm": 1.569266687535282, |
|
"learning_rate": 4.7620865496468544e-07, |
|
"loss": 1.6829, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.1492509743798142, |
|
"grad_norm": 1.5799540185979453, |
|
"learning_rate": 4.7597436701710107e-07, |
|
"loss": 1.6483, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.14998984058961523, |
|
"grad_norm": 1.5804308168544465, |
|
"learning_rate": 4.75738996170709e-07, |
|
"loss": 1.6924, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.1507287067994163, |
|
"grad_norm": 1.523398154876467, |
|
"learning_rate": 4.7550254369374455e-07, |
|
"loss": 1.6519, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.15146757300921734, |
|
"grad_norm": 1.4233865381689017, |
|
"learning_rate": 4.752650108602712e-07, |
|
"loss": 1.664, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.1522064392190184, |
|
"grad_norm": 1.512734811893487, |
|
"learning_rate": 4.7502639895017366e-07, |
|
"loss": 1.7103, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.15294530542881948, |
|
"grad_norm": 1.5630800949377466, |
|
"learning_rate": 4.747867092491511e-07, |
|
"loss": 1.6531, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.15368417163862053, |
|
"grad_norm": 1.470144612554125, |
|
"learning_rate": 4.7454594304870977e-07, |
|
"loss": 1.6725, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.1544230378484216, |
|
"grad_norm": 1.6569477682445206, |
|
"learning_rate": 4.743041016461567e-07, |
|
"loss": 1.6998, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.15516190405822267, |
|
"grad_norm": 1.7296103801240361, |
|
"learning_rate": 4.7406118634459223e-07, |
|
"loss": 1.6613, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.1559007702680237, |
|
"grad_norm": 1.6463696442561442, |
|
"learning_rate": 4.738171984529031e-07, |
|
"loss": 1.6575, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.15663963647782478, |
|
"grad_norm": 1.545869558479261, |
|
"learning_rate": 4.7357213928575546e-07, |
|
"loss": 1.6741, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.15737850268762585, |
|
"grad_norm": 1.7796493147352748, |
|
"learning_rate": 4.7332601016358773e-07, |
|
"loss": 1.7046, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.1581173688974269, |
|
"grad_norm": 1.5172414763731175, |
|
"learning_rate": 4.7307881241260365e-07, |
|
"loss": 1.6365, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.15885623510722796, |
|
"grad_norm": 1.5470321540163943, |
|
"learning_rate": 4.7283054736476474e-07, |
|
"loss": 1.6844, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.15959510131702903, |
|
"grad_norm": 1.5074962263335083, |
|
"learning_rate": 4.725812163577835e-07, |
|
"loss": 1.6683, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.16033396752683007, |
|
"grad_norm": 1.5931587963454854, |
|
"learning_rate": 4.723308207351162e-07, |
|
"loss": 1.6972, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.16107283373663114, |
|
"grad_norm": 1.4335946997211053, |
|
"learning_rate": 4.720793618459553e-07, |
|
"loss": 1.6182, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.1618116999464322, |
|
"grad_norm": 1.9207877719443267, |
|
"learning_rate": 4.718268410452226e-07, |
|
"loss": 1.6777, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.16255056615623326, |
|
"grad_norm": 1.4490578223410473, |
|
"learning_rate": 4.7157325969356143e-07, |
|
"loss": 1.6911, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.16328943236603433, |
|
"grad_norm": 1.593847776562296, |
|
"learning_rate": 4.713186191573301e-07, |
|
"loss": 1.6927, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.16402829857583537, |
|
"grad_norm": 1.4739123126868083, |
|
"learning_rate": 4.7106292080859363e-07, |
|
"loss": 1.6492, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.16476716478563644, |
|
"grad_norm": 1.424511297941709, |
|
"learning_rate": 4.7080616602511705e-07, |
|
"loss": 1.6847, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.1655060309954375, |
|
"grad_norm": 1.6007681786366288, |
|
"learning_rate": 4.705483561903576e-07, |
|
"loss": 1.662, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.16624489720523855, |
|
"grad_norm": 1.55690540989863, |
|
"learning_rate": 4.702894926934573e-07, |
|
"loss": 1.6851, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.16698376341503962, |
|
"grad_norm": 2.0423474735881926, |
|
"learning_rate": 4.700295769292359e-07, |
|
"loss": 1.6604, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.1677226296248407, |
|
"grad_norm": 1.453355289637868, |
|
"learning_rate": 4.6976861029818264e-07, |
|
"loss": 1.6842, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.16846149583464173, |
|
"grad_norm": 1.5505160972568328, |
|
"learning_rate": 4.695065942064494e-07, |
|
"loss": 1.6804, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.1692003620444428, |
|
"grad_norm": 1.7608287873846744, |
|
"learning_rate": 4.6924353006584244e-07, |
|
"loss": 1.6595, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.16993922825424387, |
|
"grad_norm": 1.4685283699391545, |
|
"learning_rate": 4.689794192938156e-07, |
|
"loss": 1.6264, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.17067809446404492, |
|
"grad_norm": 1.7781661683868824, |
|
"learning_rate": 4.687142633134619e-07, |
|
"loss": 1.6875, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.17141696067384599, |
|
"grad_norm": 1.6196809334292608, |
|
"learning_rate": 4.6844806355350623e-07, |
|
"loss": 1.6753, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.17215582688364706, |
|
"grad_norm": 1.6293152376567321, |
|
"learning_rate": 4.6818082144829787e-07, |
|
"loss": 1.6665, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.1728946930934481, |
|
"grad_norm": 1.510069163173277, |
|
"learning_rate": 4.6791253843780217e-07, |
|
"loss": 1.6697, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.17363355930324917, |
|
"grad_norm": 1.4471973015401869, |
|
"learning_rate": 4.676432159675933e-07, |
|
"loss": 1.6806, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.1743724255130502, |
|
"grad_norm": 1.7753201121195747, |
|
"learning_rate": 4.6737285548884655e-07, |
|
"loss": 1.6935, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.17511129172285128, |
|
"grad_norm": 1.5222859899502188, |
|
"learning_rate": 4.671014584583296e-07, |
|
"loss": 1.6664, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.17585015793265235, |
|
"grad_norm": 1.4892529478692567, |
|
"learning_rate": 4.668290263383959e-07, |
|
"loss": 1.6669, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.1765890241424534, |
|
"grad_norm": 1.5841443455470228, |
|
"learning_rate": 4.66555560596976e-07, |
|
"loss": 1.6419, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.17732789035225446, |
|
"grad_norm": 1.5264328160932443, |
|
"learning_rate": 4.6628106270757e-07, |
|
"loss": 1.6642, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.17806675656205553, |
|
"grad_norm": 1.6887371299004348, |
|
"learning_rate": 4.6600553414923913e-07, |
|
"loss": 1.6387, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.17880562277185658, |
|
"grad_norm": 1.4594422560615166, |
|
"learning_rate": 4.657289764065985e-07, |
|
"loss": 1.6493, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.17954448898165765, |
|
"grad_norm": 1.6615232385858325, |
|
"learning_rate": 4.6545139096980846e-07, |
|
"loss": 1.6312, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.18028335519145872, |
|
"grad_norm": 1.4161658999634517, |
|
"learning_rate": 4.651727793345669e-07, |
|
"loss": 1.687, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.18102222140125976, |
|
"grad_norm": 1.4750799503852594, |
|
"learning_rate": 4.6489314300210117e-07, |
|
"loss": 1.6579, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.18176108761106083, |
|
"grad_norm": 1.5823630581751142, |
|
"learning_rate": 4.646124834791598e-07, |
|
"loss": 1.6974, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.1824999538208619, |
|
"grad_norm": 1.5953496527857909, |
|
"learning_rate": 4.6433080227800476e-07, |
|
"loss": 1.6349, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.18323882003066294, |
|
"grad_norm": 1.8088958779925088, |
|
"learning_rate": 4.640481009164028e-07, |
|
"loss": 1.7021, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.183977686240464, |
|
"grad_norm": 1.6985722383661672, |
|
"learning_rate": 4.6376438091761776e-07, |
|
"loss": 1.6835, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.18471655245026508, |
|
"grad_norm": 1.5740586459999972, |
|
"learning_rate": 4.63479643810402e-07, |
|
"loss": 1.6778, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.18545541866006612, |
|
"grad_norm": 1.5576615822168314, |
|
"learning_rate": 4.631938911289884e-07, |
|
"loss": 1.6432, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.1861942848698672, |
|
"grad_norm": 1.4882435243374539, |
|
"learning_rate": 4.629071244130818e-07, |
|
"loss": 1.697, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.18693315107966824, |
|
"grad_norm": 1.7414218611909407, |
|
"learning_rate": 4.6261934520785135e-07, |
|
"loss": 1.6472, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.1876720172894693, |
|
"grad_norm": 1.5111215790202166, |
|
"learning_rate": 4.623305550639212e-07, |
|
"loss": 1.6814, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.18841088349927038, |
|
"grad_norm": 1.4998930010938694, |
|
"learning_rate": 4.6204075553736317e-07, |
|
"loss": 1.6965, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.18914974970907142, |
|
"grad_norm": 1.6166379161449234, |
|
"learning_rate": 4.617499481896874e-07, |
|
"loss": 1.6367, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.1898886159188725, |
|
"grad_norm": 1.564060473042759, |
|
"learning_rate": 4.6145813458783484e-07, |
|
"loss": 1.6404, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.19062748212867356, |
|
"grad_norm": 1.5498475055243737, |
|
"learning_rate": 4.611653163041681e-07, |
|
"loss": 1.64, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.1913663483384746, |
|
"grad_norm": 1.6108037998097682, |
|
"learning_rate": 4.6087149491646343e-07, |
|
"loss": 1.699, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.19210521454827567, |
|
"grad_norm": 1.6995541712978521, |
|
"learning_rate": 4.6057667200790203e-07, |
|
"loss": 1.6546, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.19284408075807674, |
|
"grad_norm": 1.499037507366822, |
|
"learning_rate": 4.6028084916706147e-07, |
|
"loss": 1.6083, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.19358294696787778, |
|
"grad_norm": 1.5172594570626625, |
|
"learning_rate": 4.5998402798790704e-07, |
|
"loss": 1.6699, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.19432181317767885, |
|
"grad_norm": 1.4963740648019974, |
|
"learning_rate": 4.5968621006978373e-07, |
|
"loss": 1.6898, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.19506067938747992, |
|
"grad_norm": 2.566805183937073, |
|
"learning_rate": 4.5938739701740686e-07, |
|
"loss": 1.6694, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.19579954559728097, |
|
"grad_norm": 1.4540566793967926, |
|
"learning_rate": 4.590875904408539e-07, |
|
"loss": 1.6692, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.19653841180708204, |
|
"grad_norm": 3.9730656922103447, |
|
"learning_rate": 4.587867919555557e-07, |
|
"loss": 1.6625, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.1972772780168831, |
|
"grad_norm": 1.5142078546698041, |
|
"learning_rate": 4.5848500318228774e-07, |
|
"loss": 1.6654, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.19801614422668415, |
|
"grad_norm": 1.7032492720795371, |
|
"learning_rate": 4.5818222574716127e-07, |
|
"loss": 1.7022, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.19875501043648522, |
|
"grad_norm": 1.554191757548726, |
|
"learning_rate": 4.578784612816149e-07, |
|
"loss": 1.6811, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.19949387664628626, |
|
"grad_norm": 1.4929225978552914, |
|
"learning_rate": 4.5758897229313755e-07, |
|
"loss": 1.6509, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.20023274285608733, |
|
"grad_norm": 1.4628893559215694, |
|
"learning_rate": 4.5728328783083036e-07, |
|
"loss": 1.7302, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.2009716090658884, |
|
"grad_norm": 1.493249123165425, |
|
"learning_rate": 4.5699197781569844e-07, |
|
"loss": 1.6383, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.20171047527568944, |
|
"grad_norm": 1.600690331893774, |
|
"learning_rate": 4.5668437961972905e-07, |
|
"loss": 1.6189, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.2024493414854905, |
|
"grad_norm": 1.464802503893095, |
|
"learning_rate": 4.5637580246409934e-07, |
|
"loss": 1.65, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.20318820769529158, |
|
"grad_norm": 1.5375722464094912, |
|
"learning_rate": 4.5606624801149797e-07, |
|
"loss": 1.6546, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.20392707390509263, |
|
"grad_norm": 1.5967568446324583, |
|
"learning_rate": 4.5575571792987984e-07, |
|
"loss": 1.6286, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.2046659401148937, |
|
"grad_norm": 1.5568969231756908, |
|
"learning_rate": 4.5544421389245646e-07, |
|
"loss": 1.6278, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.20540480632469477, |
|
"grad_norm": 1.5499607650206735, |
|
"learning_rate": 4.5513173757768746e-07, |
|
"loss": 1.6755, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.2061436725344958, |
|
"grad_norm": 1.4823222337131237, |
|
"learning_rate": 4.548182906692714e-07, |
|
"loss": 1.6661, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.20688253874429688, |
|
"grad_norm": 1.507552555113675, |
|
"learning_rate": 4.5450387485613635e-07, |
|
"loss": 1.6659, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.20762140495409795, |
|
"grad_norm": 1.4811185047336115, |
|
"learning_rate": 4.541884918324313e-07, |
|
"loss": 1.656, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.208360271163899, |
|
"grad_norm": 1.576191450168426, |
|
"learning_rate": 4.538721432975168e-07, |
|
"loss": 1.6875, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.20909913737370006, |
|
"grad_norm": 1.7938635395127402, |
|
"learning_rate": 4.535707194370682e-07, |
|
"loss": 1.6646, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.2098380035835011, |
|
"grad_norm": 1.6552255449585238, |
|
"learning_rate": 4.532524930627744e-07, |
|
"loss": 1.6524, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.21057686979330217, |
|
"grad_norm": 1.7516118506092397, |
|
"learning_rate": 4.5293330622066034e-07, |
|
"loss": 1.6157, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.21131573600310324, |
|
"grad_norm": 1.4545866638005132, |
|
"learning_rate": 4.526131606305823e-07, |
|
"loss": 1.6476, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.2120546022129043, |
|
"grad_norm": 1.6248585310317667, |
|
"learning_rate": 4.5229205801756273e-07, |
|
"loss": 1.6573, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.21279346842270536, |
|
"grad_norm": 1.41925791489552, |
|
"learning_rate": 4.519700001117807e-07, |
|
"loss": 1.6685, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.21353233463250643, |
|
"grad_norm": 1.7509635950883726, |
|
"learning_rate": 4.5164698864856257e-07, |
|
"loss": 1.6812, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.21427120084230747, |
|
"grad_norm": 1.4694228842841779, |
|
"learning_rate": 4.5132302536837273e-07, |
|
"loss": 1.6556, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.21501006705210854, |
|
"grad_norm": 1.553864895417105, |
|
"learning_rate": 4.5099811201680416e-07, |
|
"loss": 1.6883, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.2157489332619096, |
|
"grad_norm": 1.491366651426128, |
|
"learning_rate": 4.506722503445691e-07, |
|
"loss": 1.6613, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.21648779947171065, |
|
"grad_norm": 1.6466798284982602, |
|
"learning_rate": 4.5034544210748953e-07, |
|
"loss": 1.6497, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.21722666568151172, |
|
"grad_norm": 1.4331846976152014, |
|
"learning_rate": 4.5001768906648783e-07, |
|
"loss": 1.6583, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.2179655318913128, |
|
"grad_norm": 2.4779046528418793, |
|
"learning_rate": 4.496889929875771e-07, |
|
"loss": 1.6456, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.21870439810111383, |
|
"grad_norm": 1.6613792185698004, |
|
"learning_rate": 4.493593556418519e-07, |
|
"loss": 1.6876, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.2194432643109149, |
|
"grad_norm": 1.5936970250540041, |
|
"learning_rate": 4.490287788054785e-07, |
|
"loss": 1.6856, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.22018213052071597, |
|
"grad_norm": 1.7774522510719284, |
|
"learning_rate": 4.486972642596852e-07, |
|
"loss": 1.6574, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.22092099673051702, |
|
"grad_norm": 1.5404871158832736, |
|
"learning_rate": 4.483648137907532e-07, |
|
"loss": 1.6637, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.2216598629403181, |
|
"grad_norm": 1.5238762502370415, |
|
"learning_rate": 4.4803142919000645e-07, |
|
"loss": 1.6526, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.22239872915011913, |
|
"grad_norm": 1.4681103098352588, |
|
"learning_rate": 4.4769711225380254e-07, |
|
"loss": 1.6538, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.2231375953599202, |
|
"grad_norm": 1.406496721553823, |
|
"learning_rate": 4.4736186478352225e-07, |
|
"loss": 1.6593, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.22387646156972127, |
|
"grad_norm": 1.6502790317877305, |
|
"learning_rate": 4.4702568858556063e-07, |
|
"loss": 1.6946, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.2246153277795223, |
|
"grad_norm": 1.5544958034860874, |
|
"learning_rate": 4.466885854713169e-07, |
|
"loss": 1.6922, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.22535419398932338, |
|
"grad_norm": 1.35257283259656, |
|
"learning_rate": 4.463505572571847e-07, |
|
"loss": 1.6646, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.22609306019912445, |
|
"grad_norm": 1.624788597950665, |
|
"learning_rate": 4.460116057645422e-07, |
|
"loss": 1.6464, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.2268319264089255, |
|
"grad_norm": 1.5573729356283417, |
|
"learning_rate": 4.4567173281974274e-07, |
|
"loss": 1.6311, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.22757079261872656, |
|
"grad_norm": 1.9342192243430807, |
|
"learning_rate": 4.453309402541044e-07, |
|
"loss": 1.6517, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.22830965882852763, |
|
"grad_norm": 1.6525422759457808, |
|
"learning_rate": 4.4498922990390044e-07, |
|
"loss": 1.6584, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.22904852503832868, |
|
"grad_norm": 1.3709737663427297, |
|
"learning_rate": 4.446466036103493e-07, |
|
"loss": 1.6552, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.22978739124812975, |
|
"grad_norm": 1.7619047090616546, |
|
"learning_rate": 4.44303063219605e-07, |
|
"loss": 1.6515, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.23052625745793082, |
|
"grad_norm": 1.425527104774275, |
|
"learning_rate": 4.439586105827468e-07, |
|
"loss": 1.7082, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.23126512366773186, |
|
"grad_norm": 2.183066565667764, |
|
"learning_rate": 4.436132475557693e-07, |
|
"loss": 1.6457, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.23200398987753293, |
|
"grad_norm": 2.5631189419788103, |
|
"learning_rate": 4.432669759995725e-07, |
|
"loss": 1.6441, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.232742856087334, |
|
"grad_norm": 1.531958854525398, |
|
"learning_rate": 4.4291979777995186e-07, |
|
"loss": 1.6597, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.23348172229713504, |
|
"grad_norm": 1.7334807358971334, |
|
"learning_rate": 4.4257171476758813e-07, |
|
"loss": 1.6189, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.2342205885069361, |
|
"grad_norm": 1.606688663391079, |
|
"learning_rate": 4.422227288380374e-07, |
|
"loss": 1.6635, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.23495945471673715, |
|
"grad_norm": 1.5504111994528522, |
|
"learning_rate": 4.418728418717207e-07, |
|
"loss": 1.6619, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.23569832092653822, |
|
"grad_norm": 1.7059923161913078, |
|
"learning_rate": 4.415220557539142e-07, |
|
"loss": 1.6518, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.2364371871363393, |
|
"grad_norm": 1.5282124634083587, |
|
"learning_rate": 4.411703723747389e-07, |
|
"loss": 1.6281, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.23717605334614034, |
|
"grad_norm": 1.817029524914551, |
|
"learning_rate": 4.4081779362915033e-07, |
|
"loss": 1.6196, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.2379149195559414, |
|
"grad_norm": 1.4287258918617316, |
|
"learning_rate": 4.404643214169288e-07, |
|
"loss": 1.6552, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.23865378576574248, |
|
"grad_norm": 1.4874633967888828, |
|
"learning_rate": 4.4010995764266845e-07, |
|
"loss": 1.6398, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.23939265197554352, |
|
"grad_norm": 1.721122957795877, |
|
"learning_rate": 4.3975470421576764e-07, |
|
"loss": 1.6512, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.2401315181853446, |
|
"grad_norm": 1.523301573082442, |
|
"learning_rate": 4.393985630504183e-07, |
|
"loss": 1.6782, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.24087038439514566, |
|
"grad_norm": 1.4599906341858953, |
|
"learning_rate": 4.390415360655957e-07, |
|
"loss": 1.6396, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.2416092506049467, |
|
"grad_norm": 1.5009190844531946, |
|
"learning_rate": 4.386836251850481e-07, |
|
"loss": 1.648, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.24234811681474777, |
|
"grad_norm": 1.3512220497620588, |
|
"learning_rate": 4.3832483233728654e-07, |
|
"loss": 1.6712, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.24308698302454884, |
|
"grad_norm": 1.6590943419842232, |
|
"learning_rate": 4.379651594555741e-07, |
|
"loss": 1.6174, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.24382584923434988, |
|
"grad_norm": 1.3956181675020793, |
|
"learning_rate": 4.376046084779159e-07, |
|
"loss": 1.6173, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.24456471544415095, |
|
"grad_norm": 1.5798276517321244, |
|
"learning_rate": 4.3724318134704826e-07, |
|
"loss": 1.6419, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.24530358165395202, |
|
"grad_norm": 1.4769865046542814, |
|
"learning_rate": 4.3688088001042866e-07, |
|
"loss": 1.6631, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.24604244786375307, |
|
"grad_norm": 1.7571296905735259, |
|
"learning_rate": 4.3651770642022483e-07, |
|
"loss": 1.6615, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.24678131407355414, |
|
"grad_norm": 10.261084539724488, |
|
"learning_rate": 4.361536625333045e-07, |
|
"loss": 1.6515, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.24752018028335518, |
|
"grad_norm": 2.7070070654149956, |
|
"learning_rate": 4.3578875031122466e-07, |
|
"loss": 1.6584, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.24825904649315625, |
|
"grad_norm": 1.54607876926978, |
|
"learning_rate": 4.3542297172022126e-07, |
|
"loss": 1.6517, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.24899791270295732, |
|
"grad_norm": 1.3861037085930092, |
|
"learning_rate": 4.3505632873119844e-07, |
|
"loss": 1.6686, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.24973677891275836, |
|
"grad_norm": 1.4161848471548175, |
|
"learning_rate": 4.346888233197178e-07, |
|
"loss": 1.6449, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.25047564512255943, |
|
"grad_norm": 1.9634719417599906, |
|
"learning_rate": 4.343204574659878e-07, |
|
"loss": 1.6586, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.2512145113323605, |
|
"grad_norm": 2.2362709149394835, |
|
"learning_rate": 4.339512331548535e-07, |
|
"loss": 1.6481, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.25195337754216157, |
|
"grad_norm": 2.435262162446439, |
|
"learning_rate": 4.335811523757855e-07, |
|
"loss": 1.6751, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.2526922437519626, |
|
"grad_norm": 1.4440630152259213, |
|
"learning_rate": 4.3321021712286874e-07, |
|
"loss": 1.6865, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.25343110996176366, |
|
"grad_norm": 1.6572017188801809, |
|
"learning_rate": 4.3283842939479297e-07, |
|
"loss": 1.6874, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.2541699761715647, |
|
"grad_norm": 1.6358091879473202, |
|
"learning_rate": 4.3246579119484086e-07, |
|
"loss": 1.6442, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.2549088423813658, |
|
"grad_norm": 1.861949731594006, |
|
"learning_rate": 4.3209230453087763e-07, |
|
"loss": 1.6596, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.25564770859116687, |
|
"grad_norm": 1.576364259347636, |
|
"learning_rate": 4.317179714153405e-07, |
|
"loss": 1.6409, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.25638657480096794, |
|
"grad_norm": 1.6344350623705748, |
|
"learning_rate": 4.3134279386522734e-07, |
|
"loss": 1.6634, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.25712544101076895, |
|
"grad_norm": 2.4484913186668056, |
|
"learning_rate": 4.3096677390208606e-07, |
|
"loss": 1.6635, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.25786430722057, |
|
"grad_norm": 1.459583448230627, |
|
"learning_rate": 4.3058991355200385e-07, |
|
"loss": 1.6437, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.2586031734303711, |
|
"grad_norm": 2.0774440428993426, |
|
"learning_rate": 4.302122148455959e-07, |
|
"loss": 1.6807, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.25934203964017216, |
|
"grad_norm": 1.4906050741171306, |
|
"learning_rate": 4.2983367981799484e-07, |
|
"loss": 1.6477, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.26008090584997323, |
|
"grad_norm": 1.6727105507446454, |
|
"learning_rate": 4.294543105088395e-07, |
|
"loss": 1.617, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.2608197720597743, |
|
"grad_norm": 1.4754199269220696, |
|
"learning_rate": 4.2907410896226415e-07, |
|
"loss": 1.6391, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.2615586382695753, |
|
"grad_norm": 1.5380802874413815, |
|
"learning_rate": 4.2869307722688715e-07, |
|
"loss": 1.687, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.2622975044793764, |
|
"grad_norm": 1.6040883755814137, |
|
"learning_rate": 4.283112173558003e-07, |
|
"loss": 1.7171, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.26303637068917746, |
|
"grad_norm": 2.822094109735399, |
|
"learning_rate": 4.279285314065575e-07, |
|
"loss": 1.6671, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.2637752368989785, |
|
"grad_norm": 1.4328096068889253, |
|
"learning_rate": 4.275450214411638e-07, |
|
"loss": 1.6475, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.2645141031087796, |
|
"grad_norm": 1.624272809516238, |
|
"learning_rate": 4.2716068952606424e-07, |
|
"loss": 1.693, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.2652529693185806, |
|
"grad_norm": 1.502383886350249, |
|
"learning_rate": 4.267755377321327e-07, |
|
"loss": 1.6592, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.2659918355283817, |
|
"grad_norm": 1.4780327874669796, |
|
"learning_rate": 4.2638956813466094e-07, |
|
"loss": 1.6273, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.26673070173818275, |
|
"grad_norm": 1.647788340317037, |
|
"learning_rate": 4.2600278281334683e-07, |
|
"loss": 1.7177, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.2674695679479838, |
|
"grad_norm": 1.4249175729696602, |
|
"learning_rate": 4.256151838522842e-07, |
|
"loss": 1.6134, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.2682084341577849, |
|
"grad_norm": 1.525640467280493, |
|
"learning_rate": 4.252267733399502e-07, |
|
"loss": 1.6279, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.26894730036758596, |
|
"grad_norm": 1.5643231773087998, |
|
"learning_rate": 4.2483755336919546e-07, |
|
"loss": 1.6319, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.269686166577387, |
|
"grad_norm": 1.5088025290660787, |
|
"learning_rate": 4.2444752603723185e-07, |
|
"loss": 1.6465, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.27042503278718805, |
|
"grad_norm": 1.690559249481047, |
|
"learning_rate": 4.2405669344562157e-07, |
|
"loss": 1.6597, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.2711638989969891, |
|
"grad_norm": 1.4158777914075165, |
|
"learning_rate": 4.236650577002658e-07, |
|
"loss": 1.6498, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.2719027652067902, |
|
"grad_norm": 1.4954788634515361, |
|
"learning_rate": 4.232726209113931e-07, |
|
"loss": 1.7073, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.27264163141659126, |
|
"grad_norm": 1.96245857269846, |
|
"learning_rate": 4.228793851935486e-07, |
|
"loss": 1.6559, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.2733804976263923, |
|
"grad_norm": 1.5534874631194424, |
|
"learning_rate": 4.22485352665582e-07, |
|
"loss": 1.6795, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.27411936383619334, |
|
"grad_norm": 1.513478614204036, |
|
"learning_rate": 4.2209052545063645e-07, |
|
"loss": 1.6598, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.2748582300459944, |
|
"grad_norm": 1.4981685008613979, |
|
"learning_rate": 4.216949056761371e-07, |
|
"loss": 1.6796, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.2755970962557955, |
|
"grad_norm": 1.453166525310124, |
|
"learning_rate": 4.212984954737796e-07, |
|
"loss": 1.6547, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.27633596246559655, |
|
"grad_norm": 1.4590359213340498, |
|
"learning_rate": 4.2090129697951865e-07, |
|
"loss": 1.668, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.2770748286753976, |
|
"grad_norm": 1.5012030999873756, |
|
"learning_rate": 4.205033123335563e-07, |
|
"loss": 1.6253, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.27781369488519864, |
|
"grad_norm": 1.605863135582104, |
|
"learning_rate": 4.2010454368033075e-07, |
|
"loss": 1.6684, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.2785525610949997, |
|
"grad_norm": 1.9991749625802369, |
|
"learning_rate": 4.197049931685046e-07, |
|
"loss": 1.6403, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.2792914273048008, |
|
"grad_norm": 1.5084206750440898, |
|
"learning_rate": 4.193046629509533e-07, |
|
"loss": 1.6673, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.28003029351460185, |
|
"grad_norm": 1.6013334792913052, |
|
"learning_rate": 4.1890355518475335e-07, |
|
"loss": 1.6483, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.2807691597244029, |
|
"grad_norm": 1.798812837038986, |
|
"learning_rate": 4.185016720311712e-07, |
|
"loss": 1.6795, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.281508025934204, |
|
"grad_norm": 1.4900500600235345, |
|
"learning_rate": 4.18099015655651e-07, |
|
"loss": 1.6807, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.282246892144005, |
|
"grad_norm": 1.6028189719479609, |
|
"learning_rate": 4.176955882278033e-07, |
|
"loss": 1.6596, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.28298575835380607, |
|
"grad_norm": 1.9939881516366833, |
|
"learning_rate": 4.1729139192139335e-07, |
|
"loss": 1.6695, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.28372462456360714, |
|
"grad_norm": 1.5127346940191255, |
|
"learning_rate": 4.168864289143291e-07, |
|
"loss": 1.7078, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.2844634907734082, |
|
"grad_norm": 1.5284950240291668, |
|
"learning_rate": 4.1648070138864993e-07, |
|
"loss": 1.7175, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.2852023569832093, |
|
"grad_norm": 1.5249438102092971, |
|
"learning_rate": 4.1607421153051454e-07, |
|
"loss": 1.6753, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.28594122319301035, |
|
"grad_norm": 1.6281345917446086, |
|
"learning_rate": 4.156669615301891e-07, |
|
"loss": 1.6455, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.28668008940281137, |
|
"grad_norm": 1.7327391694790744, |
|
"learning_rate": 4.152589535820358e-07, |
|
"loss": 1.6115, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.28741895561261244, |
|
"grad_norm": 1.8046545180697087, |
|
"learning_rate": 4.148501898845008e-07, |
|
"loss": 1.6752, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.2881578218224135, |
|
"grad_norm": 1.4479684507284691, |
|
"learning_rate": 4.144406726401024e-07, |
|
"loss": 1.7095, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.2888966880322146, |
|
"grad_norm": 1.5133767331728856, |
|
"learning_rate": 4.140304040554192e-07, |
|
"loss": 1.6637, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.28963555424201565, |
|
"grad_norm": 1.69526484807945, |
|
"learning_rate": 4.1361938634107795e-07, |
|
"loss": 1.6604, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.29037442045181666, |
|
"grad_norm": 1.5901137640996412, |
|
"learning_rate": 4.132076217117425e-07, |
|
"loss": 1.7023, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.29111328666161773, |
|
"grad_norm": 1.423118541107655, |
|
"learning_rate": 4.1279511238610075e-07, |
|
"loss": 1.6251, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.2918521528714188, |
|
"grad_norm": 1.3770610046698395, |
|
"learning_rate": 4.123818605868533e-07, |
|
"loss": 1.6859, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.29259101908121987, |
|
"grad_norm": 1.5512042035926865, |
|
"learning_rate": 4.1196786854070147e-07, |
|
"loss": 1.6682, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.29332988529102094, |
|
"grad_norm": 1.5657764052019774, |
|
"learning_rate": 4.115531384783352e-07, |
|
"loss": 1.6373, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.294068751500822, |
|
"grad_norm": 1.3977001410170469, |
|
"learning_rate": 4.11137672634421e-07, |
|
"loss": 1.623, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.294807617710623, |
|
"grad_norm": 1.5471885506840533, |
|
"learning_rate": 4.1072147324759007e-07, |
|
"loss": 1.6359, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.2955464839204241, |
|
"grad_norm": 1.9646501043093372, |
|
"learning_rate": 4.103045425604257e-07, |
|
"loss": 1.6575, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.29628535013022517, |
|
"grad_norm": 2.4554925260754192, |
|
"learning_rate": 4.098868828194523e-07, |
|
"loss": 1.6505, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.29702421634002624, |
|
"grad_norm": 1.5764440647794176, |
|
"learning_rate": 4.0946849627512194e-07, |
|
"loss": 1.6537, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.2977630825498273, |
|
"grad_norm": 1.5679031999275903, |
|
"learning_rate": 4.090493851818032e-07, |
|
"loss": 1.6678, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.2985019487596284, |
|
"grad_norm": 1.5427978270277976, |
|
"learning_rate": 4.086295517977688e-07, |
|
"loss": 1.646, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.2992408149694294, |
|
"grad_norm": 1.6159758168642673, |
|
"learning_rate": 4.082089983851831e-07, |
|
"loss": 1.6543, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.29997968117923046, |
|
"grad_norm": 1.4061897285537437, |
|
"learning_rate": 4.0778772721009036e-07, |
|
"loss": 1.6285, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.30071854738903153, |
|
"grad_norm": 1.3965741494953192, |
|
"learning_rate": 4.073657405424019e-07, |
|
"loss": 1.6656, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.3014574135988326, |
|
"grad_norm": 1.5484468689064121, |
|
"learning_rate": 4.06943040655885e-07, |
|
"loss": 1.661, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.30219627980863367, |
|
"grad_norm": 1.5843927161871971, |
|
"learning_rate": 4.065196298281493e-07, |
|
"loss": 1.6622, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.3029351460184347, |
|
"grad_norm": 1.6553065392619284, |
|
"learning_rate": 4.0609551034063555e-07, |
|
"loss": 1.6989, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.30367401222823576, |
|
"grad_norm": 1.6004229625484228, |
|
"learning_rate": 4.056706844786025e-07, |
|
"loss": 1.6673, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.3044128784380368, |
|
"grad_norm": 1.7218496726083523, |
|
"learning_rate": 4.052451545311157e-07, |
|
"loss": 1.7071, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.3051517446478379, |
|
"grad_norm": 1.4453612541643919, |
|
"learning_rate": 4.0481892279103375e-07, |
|
"loss": 1.6418, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.30589061085763897, |
|
"grad_norm": 2.0343056912272415, |
|
"learning_rate": 4.043919915549972e-07, |
|
"loss": 1.6406, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.30662947706744004, |
|
"grad_norm": 1.4141851056827188, |
|
"learning_rate": 4.0396436312341537e-07, |
|
"loss": 1.6697, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.30736834327724105, |
|
"grad_norm": 1.7030187367387806, |
|
"learning_rate": 4.0353603980045434e-07, |
|
"loss": 1.648, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.3081072094870421, |
|
"grad_norm": 1.4580931131013146, |
|
"learning_rate": 4.0310702389402455e-07, |
|
"loss": 1.6738, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.3088460756968432, |
|
"grad_norm": 1.6315260212867364, |
|
"learning_rate": 4.0267731771576795e-07, |
|
"loss": 1.6568, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.30958494190664426, |
|
"grad_norm": 1.760277165218215, |
|
"learning_rate": 4.022469235810462e-07, |
|
"loss": 1.7044, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.31032380811644533, |
|
"grad_norm": 1.5247483148379708, |
|
"learning_rate": 4.0181584380892747e-07, |
|
"loss": 1.625, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.3110626743262464, |
|
"grad_norm": 1.6055425468824278, |
|
"learning_rate": 4.0138408072217467e-07, |
|
"loss": 1.6332, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.3118015405360474, |
|
"grad_norm": 2.522263277058951, |
|
"learning_rate": 4.009516366472323e-07, |
|
"loss": 1.6795, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.3125404067458485, |
|
"grad_norm": 1.4776229994815417, |
|
"learning_rate": 4.005185139142143e-07, |
|
"loss": 1.6675, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.31327927295564956, |
|
"grad_norm": 1.458660936186841, |
|
"learning_rate": 4.000847148568915e-07, |
|
"loss": 1.661, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.3140181391654506, |
|
"grad_norm": 1.5895551714359692, |
|
"learning_rate": 3.9965024181267865e-07, |
|
"loss": 1.6474, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.3147570053752517, |
|
"grad_norm": 1.6027764846949324, |
|
"learning_rate": 3.9921509712262237e-07, |
|
"loss": 1.7055, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.3154958715850527, |
|
"grad_norm": 1.4709407841933115, |
|
"learning_rate": 3.9877928313138807e-07, |
|
"loss": 1.6721, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.3162347377948538, |
|
"grad_norm": 1.4461242455876133, |
|
"learning_rate": 3.983428021872477e-07, |
|
"loss": 1.6496, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.31697360400465485, |
|
"grad_norm": 1.4524171700785795, |
|
"learning_rate": 3.979056566420668e-07, |
|
"loss": 1.6553, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.3177124702144559, |
|
"grad_norm": 1.5057325136067627, |
|
"learning_rate": 3.974678488512921e-07, |
|
"loss": 1.6723, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.318451336424257, |
|
"grad_norm": 1.4293777770249827, |
|
"learning_rate": 3.9702938117393825e-07, |
|
"loss": 1.6586, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.31919020263405806, |
|
"grad_norm": 1.4212368243075615, |
|
"learning_rate": 3.965902559725761e-07, |
|
"loss": 1.6458, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.3199290688438591, |
|
"grad_norm": 1.4727420961415922, |
|
"learning_rate": 3.961504756133189e-07, |
|
"loss": 1.6481, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.32066793505366015, |
|
"grad_norm": 2.5900548552419895, |
|
"learning_rate": 3.9573207959028544e-07, |
|
"loss": 1.621, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.3214068012634612, |
|
"grad_norm": 1.5430259080799726, |
|
"learning_rate": 3.952910284920244e-07, |
|
"loss": 1.6812, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.3221456674732623, |
|
"grad_norm": 1.4794345694793534, |
|
"learning_rate": 3.948493292364224e-07, |
|
"loss": 1.6585, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.32288453368306336, |
|
"grad_norm": 1.4614630552620829, |
|
"learning_rate": 3.9440698420346246e-07, |
|
"loss": 1.6466, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.3236233998928644, |
|
"grad_norm": 1.4393288175430394, |
|
"learning_rate": 3.939639957766073e-07, |
|
"loss": 1.6215, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.32436226610266544, |
|
"grad_norm": 2.1230018342791532, |
|
"learning_rate": 3.9352036634278634e-07, |
|
"loss": 1.6803, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.3251011323124665, |
|
"grad_norm": 1.6164570568462948, |
|
"learning_rate": 3.9307609829238297e-07, |
|
"loss": 1.6766, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.3258399985222676, |
|
"grad_norm": 1.4370335980422504, |
|
"learning_rate": 3.9263119401922175e-07, |
|
"loss": 1.6822, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.32657886473206865, |
|
"grad_norm": 1.644081010299245, |
|
"learning_rate": 3.9218565592055486e-07, |
|
"loss": 1.6633, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.3273177309418697, |
|
"grad_norm": 2.1011988058241173, |
|
"learning_rate": 3.9173948639705027e-07, |
|
"loss": 1.6765, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.32805659715167074, |
|
"grad_norm": 2.151384135030328, |
|
"learning_rate": 3.9129268785277796e-07, |
|
"loss": 1.6465, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.3287954633614718, |
|
"grad_norm": 1.4309025880636768, |
|
"learning_rate": 3.908452626951972e-07, |
|
"loss": 1.6543, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.3295343295712729, |
|
"grad_norm": 1.8849999578121595, |
|
"learning_rate": 3.903972133351436e-07, |
|
"loss": 1.6514, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.33027319578107395, |
|
"grad_norm": 1.7164685196230511, |
|
"learning_rate": 3.8994854218681627e-07, |
|
"loss": 1.7006, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.331012061990875, |
|
"grad_norm": 1.4964402365248954, |
|
"learning_rate": 3.8949925166776454e-07, |
|
"loss": 1.6995, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.3317509282006761, |
|
"grad_norm": 1.9725561956682367, |
|
"learning_rate": 3.8904934419887493e-07, |
|
"loss": 1.634, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.3324897944104771, |
|
"grad_norm": 1.604770043849599, |
|
"learning_rate": 3.885988222043586e-07, |
|
"loss": 1.6307, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.33322866062027817, |
|
"grad_norm": 1.4014528232679808, |
|
"learning_rate": 3.881476881117376e-07, |
|
"loss": 1.6384, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.33396752683007924, |
|
"grad_norm": 1.5592294550988919, |
|
"learning_rate": 3.876959443518323e-07, |
|
"loss": 1.6893, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.3347063930398803, |
|
"grad_norm": 1.512028885113723, |
|
"learning_rate": 3.872662252925764e-07, |
|
"loss": 1.6126, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.3354452592496814, |
|
"grad_norm": 1.5167336039874841, |
|
"learning_rate": 3.868132996855423e-07, |
|
"loss": 1.6438, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.3361841254594824, |
|
"grad_norm": 1.5732905269770532, |
|
"learning_rate": 3.8635977160123356e-07, |
|
"loss": 1.6129, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.33692299166928347, |
|
"grad_norm": 1.6825164459147328, |
|
"learning_rate": 3.859056434833698e-07, |
|
"loss": 1.611, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.33766185787908454, |
|
"grad_norm": 2.3767246380889095, |
|
"learning_rate": 3.854509177789039e-07, |
|
"loss": 1.6473, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.3384007240888856, |
|
"grad_norm": 1.51475900965411, |
|
"learning_rate": 3.8499559693800866e-07, |
|
"loss": 1.6696, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.3391395902986867, |
|
"grad_norm": 2.1798994146623496, |
|
"learning_rate": 3.845396834140635e-07, |
|
"loss": 1.6272, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.33987845650848775, |
|
"grad_norm": 5.503662773520221, |
|
"learning_rate": 3.8408317966364155e-07, |
|
"loss": 1.6598, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.34061732271828876, |
|
"grad_norm": 1.4387011677124582, |
|
"learning_rate": 3.836260881464961e-07, |
|
"loss": 1.6327, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.34135618892808983, |
|
"grad_norm": 1.8647315334479582, |
|
"learning_rate": 3.831684113255475e-07, |
|
"loss": 1.6511, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.3420950551378909, |
|
"grad_norm": 1.4777808537198769, |
|
"learning_rate": 3.8271015166686987e-07, |
|
"loss": 1.6361, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.34283392134769197, |
|
"grad_norm": 2.045197276055339, |
|
"learning_rate": 3.822513116396778e-07, |
|
"loss": 1.6659, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.34357278755749304, |
|
"grad_norm": 1.7790240681877276, |
|
"learning_rate": 3.8179189371631307e-07, |
|
"loss": 1.617, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.3443116537672941, |
|
"grad_norm": 1.6594283041904447, |
|
"learning_rate": 3.813319003722312e-07, |
|
"loss": 1.6798, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.3450505199770951, |
|
"grad_norm": 1.5722518111489987, |
|
"learning_rate": 3.8087133408598837e-07, |
|
"loss": 1.6448, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.3457893861868962, |
|
"grad_norm": 1.3834190123625751, |
|
"learning_rate": 3.804101973392278e-07, |
|
"loss": 1.6937, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.34652825239669727, |
|
"grad_norm": 2.860970712860898, |
|
"learning_rate": 3.799484926166665e-07, |
|
"loss": 1.6803, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.34726711860649834, |
|
"grad_norm": 1.7303789413551895, |
|
"learning_rate": 3.794862224060819e-07, |
|
"loss": 1.6652, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.3480059848162994, |
|
"grad_norm": 1.5722357665247504, |
|
"learning_rate": 3.7902338919829854e-07, |
|
"loss": 1.6824, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.3487448510261004, |
|
"grad_norm": 1.4942909416069685, |
|
"learning_rate": 3.785599954871741e-07, |
|
"loss": 1.6334, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.3494837172359015, |
|
"grad_norm": 1.5407701751336818, |
|
"learning_rate": 3.7809604376958705e-07, |
|
"loss": 1.6147, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.35022258344570256, |
|
"grad_norm": 1.5151800327591411, |
|
"learning_rate": 3.7763153654542187e-07, |
|
"loss": 1.6591, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.35096144965550363, |
|
"grad_norm": 1.5820720313790753, |
|
"learning_rate": 3.7716647631755684e-07, |
|
"loss": 1.6267, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.3517003158653047, |
|
"grad_norm": 1.7136185539713005, |
|
"learning_rate": 3.7670086559184944e-07, |
|
"loss": 1.6443, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.3524391820751058, |
|
"grad_norm": 1.6610072999142345, |
|
"learning_rate": 3.7623470687712363e-07, |
|
"loss": 1.6391, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.3531780482849068, |
|
"grad_norm": 1.7561532016780041, |
|
"learning_rate": 3.7576800268515615e-07, |
|
"loss": 1.6403, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.35391691449470786, |
|
"grad_norm": 1.6534365111706855, |
|
"learning_rate": 3.7530075553066256e-07, |
|
"loss": 1.6604, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.3546557807045089, |
|
"grad_norm": 1.5197922636545014, |
|
"learning_rate": 3.748329679312845e-07, |
|
"loss": 1.6005, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.35539464691431, |
|
"grad_norm": 2.1221364447575635, |
|
"learning_rate": 3.743646424075753e-07, |
|
"loss": 1.6302, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.35613351312411107, |
|
"grad_norm": 1.520654127135304, |
|
"learning_rate": 3.738957814829868e-07, |
|
"loss": 1.7174, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.35687237933391214, |
|
"grad_norm": 1.5099869797232601, |
|
"learning_rate": 3.7342638768385597e-07, |
|
"loss": 1.6592, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.35761124554371315, |
|
"grad_norm": 1.8304484700278734, |
|
"learning_rate": 3.729564635393907e-07, |
|
"loss": 1.6745, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.3583501117535142, |
|
"grad_norm": 1.778696114508267, |
|
"learning_rate": 3.7248601158165674e-07, |
|
"loss": 1.6592, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.3590889779633153, |
|
"grad_norm": 1.4183327236752137, |
|
"learning_rate": 3.720150343455638e-07, |
|
"loss": 1.6637, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.35982784417311636, |
|
"grad_norm": 1.559240346976758, |
|
"learning_rate": 3.715435343688517e-07, |
|
"loss": 1.6862, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.36056671038291743, |
|
"grad_norm": 1.5461740842164586, |
|
"learning_rate": 3.710715141920772e-07, |
|
"loss": 1.6276, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.36130557659271845, |
|
"grad_norm": 1.541024781373399, |
|
"learning_rate": 3.705989763585998e-07, |
|
"loss": 1.6519, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.3620444428025195, |
|
"grad_norm": 1.568073509021964, |
|
"learning_rate": 3.7012592341456855e-07, |
|
"loss": 1.644, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.3627833090123206, |
|
"grad_norm": 7.164278419276029, |
|
"learning_rate": 3.6965235790890776e-07, |
|
"loss": 1.6649, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.36352217522212166, |
|
"grad_norm": 1.6290047071156604, |
|
"learning_rate": 3.6917828239330364e-07, |
|
"loss": 1.6321, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.3642610414319227, |
|
"grad_norm": 2.2138525137520078, |
|
"learning_rate": 3.6870369942219043e-07, |
|
"loss": 1.6623, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.3649999076417238, |
|
"grad_norm": 1.4780745550505248, |
|
"learning_rate": 3.6822861155273664e-07, |
|
"loss": 1.6303, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.3657387738515248, |
|
"grad_norm": 1.6513433655082623, |
|
"learning_rate": 3.677530213448315e-07, |
|
"loss": 1.6678, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.3664776400613259, |
|
"grad_norm": 1.4330452468765504, |
|
"learning_rate": 3.6727693136107074e-07, |
|
"loss": 1.6411, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.36721650627112695, |
|
"grad_norm": 2.1041910204234773, |
|
"learning_rate": 3.668241852955783e-07, |
|
"loss": 1.6638, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.367955372480928, |
|
"grad_norm": 1.579705325259841, |
|
"learning_rate": 3.66347128129751e-07, |
|
"loss": 1.6245, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.3686942386907291, |
|
"grad_norm": 2.2840341365356185, |
|
"learning_rate": 3.65869578763363e-07, |
|
"loss": 1.6621, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.36943310490053016, |
|
"grad_norm": 1.4886178225841975, |
|
"learning_rate": 3.6539153976956643e-07, |
|
"loss": 1.6815, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.3701719711103312, |
|
"grad_norm": 2.0581153395070952, |
|
"learning_rate": 3.6491301372415173e-07, |
|
"loss": 1.6911, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.37091083732013225, |
|
"grad_norm": 1.5433010278052928, |
|
"learning_rate": 3.6443400320553387e-07, |
|
"loss": 1.6726, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.3716497035299333, |
|
"grad_norm": 1.3650733078052242, |
|
"learning_rate": 3.6395451079473785e-07, |
|
"loss": 1.6808, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.3723885697397344, |
|
"grad_norm": 1.4829849508478619, |
|
"learning_rate": 3.634745390753857e-07, |
|
"loss": 1.638, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.37312743594953546, |
|
"grad_norm": 1.4843368467181628, |
|
"learning_rate": 3.6299409063368177e-07, |
|
"loss": 1.6608, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.37386630215933647, |
|
"grad_norm": 1.7135290138411319, |
|
"learning_rate": 3.6251316805839925e-07, |
|
"loss": 1.6201, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.37460516836913754, |
|
"grad_norm": 1.4665338261705847, |
|
"learning_rate": 3.6203177394086603e-07, |
|
"loss": 1.6576, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.3753440345789386, |
|
"grad_norm": 1.523807524784342, |
|
"learning_rate": 3.615499108749508e-07, |
|
"loss": 1.6531, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.3760829007887397, |
|
"grad_norm": 1.4605532197043567, |
|
"learning_rate": 3.6106758145704903e-07, |
|
"loss": 1.6351, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.37682176699854075, |
|
"grad_norm": 1.4767414919395185, |
|
"learning_rate": 3.6058478828606904e-07, |
|
"loss": 1.6816, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.3775606332083418, |
|
"grad_norm": 3.319352148345807, |
|
"learning_rate": 3.601015339634179e-07, |
|
"loss": 1.646, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.37829949941814284, |
|
"grad_norm": 1.6462705304843952, |
|
"learning_rate": 3.5961782109298767e-07, |
|
"loss": 1.6572, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.3790383656279439, |
|
"grad_norm": 1.987828688877245, |
|
"learning_rate": 3.5913365228114085e-07, |
|
"loss": 1.6272, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.379777231837745, |
|
"grad_norm": 1.5685525483250444, |
|
"learning_rate": 3.5864903013669696e-07, |
|
"loss": 1.629, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.38051609804754605, |
|
"grad_norm": 1.454531386924792, |
|
"learning_rate": 3.58163957270918e-07, |
|
"loss": 1.6391, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.3812549642573471, |
|
"grad_norm": 1.5741474691311197, |
|
"learning_rate": 3.5767843629749465e-07, |
|
"loss": 1.6497, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.3819938304671482, |
|
"grad_norm": 1.494255550534897, |
|
"learning_rate": 3.5719246983253227e-07, |
|
"loss": 1.6584, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.3827326966769492, |
|
"grad_norm": 1.5743114630725665, |
|
"learning_rate": 3.5670606049453624e-07, |
|
"loss": 1.6333, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.3834715628867503, |
|
"grad_norm": 1.5229234435536247, |
|
"learning_rate": 3.5621921090439856e-07, |
|
"loss": 1.651, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.38421042909655134, |
|
"grad_norm": 1.5784429804907898, |
|
"learning_rate": 3.557319236853833e-07, |
|
"loss": 1.6922, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.3849492953063524, |
|
"grad_norm": 1.581472732564025, |
|
"learning_rate": 3.552442014631125e-07, |
|
"loss": 1.6725, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.3856881615161535, |
|
"grad_norm": 1.5126802451542531, |
|
"learning_rate": 3.5475604686555246e-07, |
|
"loss": 1.6944, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.3864270277259545, |
|
"grad_norm": 1.5957042160618131, |
|
"learning_rate": 3.5426746252299876e-07, |
|
"loss": 1.6474, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.38716589393575557, |
|
"grad_norm": 1.5167798574452542, |
|
"learning_rate": 3.537784510680629e-07, |
|
"loss": 1.6269, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.38790476014555664, |
|
"grad_norm": 1.4073803006779033, |
|
"learning_rate": 3.5328901513565755e-07, |
|
"loss": 1.667, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.3886436263553577, |
|
"grad_norm": 1.5025049762633182, |
|
"learning_rate": 3.527991573629826e-07, |
|
"loss": 1.6685, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.3893824925651588, |
|
"grad_norm": 1.498817940042482, |
|
"learning_rate": 3.523088803895111e-07, |
|
"loss": 1.6693, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.39012135877495985, |
|
"grad_norm": 1.5375475807699233, |
|
"learning_rate": 3.5181818685697454e-07, |
|
"loss": 1.6257, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.39086022498476086, |
|
"grad_norm": 1.4788669954107543, |
|
"learning_rate": 3.513270794093493e-07, |
|
"loss": 1.6396, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.39159909119456193, |
|
"grad_norm": 1.8280175785471986, |
|
"learning_rate": 3.508355606928417e-07, |
|
"loss": 1.6708, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.392337957404363, |
|
"grad_norm": 1.657327382022486, |
|
"learning_rate": 3.503436333558744e-07, |
|
"loss": 1.6344, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.3930768236141641, |
|
"grad_norm": 3.2933368891799772, |
|
"learning_rate": 3.498513000490713e-07, |
|
"loss": 1.6233, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.39381568982396514, |
|
"grad_norm": 1.5787521448516106, |
|
"learning_rate": 3.4935856342524445e-07, |
|
"loss": 1.6504, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.3945545560337662, |
|
"grad_norm": 1.7273082957996757, |
|
"learning_rate": 3.488654261393786e-07, |
|
"loss": 1.6501, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.3952934222435672, |
|
"grad_norm": 1.5427159019633168, |
|
"learning_rate": 3.483718908486173e-07, |
|
"loss": 1.6213, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.3960322884533683, |
|
"grad_norm": 2.4791279004019944, |
|
"learning_rate": 3.478779602122491e-07, |
|
"loss": 1.6341, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.39677115466316937, |
|
"grad_norm": 1.5057908958686839, |
|
"learning_rate": 3.4738363689169227e-07, |
|
"loss": 1.6344, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.39751002087297044, |
|
"grad_norm": 1.6211537727930727, |
|
"learning_rate": 3.4688892355048133e-07, |
|
"loss": 1.6684, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.3982488870827715, |
|
"grad_norm": 1.7112433425010558, |
|
"learning_rate": 3.4639382285425217e-07, |
|
"loss": 1.6742, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.3989877532925725, |
|
"grad_norm": 1.7626819549867558, |
|
"learning_rate": 3.4589833747072765e-07, |
|
"loss": 1.6497, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.3997266195023736, |
|
"grad_norm": 1.536514259186305, |
|
"learning_rate": 3.4540247006970395e-07, |
|
"loss": 1.6533, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.40046548571217466, |
|
"grad_norm": 1.4352156142464503, |
|
"learning_rate": 3.449062233230351e-07, |
|
"loss": 1.6423, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.40120435192197573, |
|
"grad_norm": 1.517870844401341, |
|
"learning_rate": 3.4440959990461936e-07, |
|
"loss": 1.6888, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.4019432181317768, |
|
"grad_norm": 1.6903764999597104, |
|
"learning_rate": 3.4391260249038467e-07, |
|
"loss": 1.6242, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.4026820843415779, |
|
"grad_norm": 1.9353070894961153, |
|
"learning_rate": 3.4341523375827407e-07, |
|
"loss": 1.6219, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.4034209505513789, |
|
"grad_norm": 1.70733565978221, |
|
"learning_rate": 3.4291749638823144e-07, |
|
"loss": 1.6524, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.40415981676117996, |
|
"grad_norm": 1.3794756923120337, |
|
"learning_rate": 3.4241939306218655e-07, |
|
"loss": 1.647, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.404898682970981, |
|
"grad_norm": 1.4536895089620647, |
|
"learning_rate": 3.4192092646404166e-07, |
|
"loss": 1.6697, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.4056375491807821, |
|
"grad_norm": 1.4185925084451405, |
|
"learning_rate": 3.41422099279656e-07, |
|
"loss": 1.6916, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.40637641539058317, |
|
"grad_norm": 1.5516883391882288, |
|
"learning_rate": 3.40922914196832e-07, |
|
"loss": 1.6702, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.40711528160038424, |
|
"grad_norm": 1.500896700694977, |
|
"learning_rate": 3.4042337390530027e-07, |
|
"loss": 1.6379, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.40785414781018525, |
|
"grad_norm": 1.4488842610705819, |
|
"learning_rate": 3.399234810967055e-07, |
|
"loss": 1.6322, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.4085930140199863, |
|
"grad_norm": 1.5363179452812292, |
|
"learning_rate": 3.394232384645918e-07, |
|
"loss": 1.7085, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.4093318802297874, |
|
"grad_norm": 1.6587795154693055, |
|
"learning_rate": 3.389226487043883e-07, |
|
"loss": 1.6212, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.41007074643958846, |
|
"grad_norm": 2.185811847037595, |
|
"learning_rate": 3.3842171451339446e-07, |
|
"loss": 1.653, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.41080961264938953, |
|
"grad_norm": 1.4930598472252423, |
|
"learning_rate": 3.3792043859076556e-07, |
|
"loss": 1.6401, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.41154847885919055, |
|
"grad_norm": 1.585267885050689, |
|
"learning_rate": 3.3741882363749836e-07, |
|
"loss": 1.6081, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.4122873450689916, |
|
"grad_norm": 1.5745770350836434, |
|
"learning_rate": 3.3691687235641633e-07, |
|
"loss": 1.6657, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.4130262112787927, |
|
"grad_norm": 1.638169374979827, |
|
"learning_rate": 3.364145874521552e-07, |
|
"loss": 1.6439, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.41376507748859376, |
|
"grad_norm": 1.5771694576157802, |
|
"learning_rate": 3.3591197163114807e-07, |
|
"loss": 1.6344, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.41450394369839483, |
|
"grad_norm": 1.507624879108444, |
|
"learning_rate": 3.3540902760161153e-07, |
|
"loss": 1.6414, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.4152428099081959, |
|
"grad_norm": 1.5517359392564993, |
|
"learning_rate": 3.349057580735304e-07, |
|
"loss": 1.6103, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.4159816761179969, |
|
"grad_norm": 1.6910189529581492, |
|
"learning_rate": 3.3440216575864336e-07, |
|
"loss": 1.6097, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.416720542327798, |
|
"grad_norm": 1.4817048826322234, |
|
"learning_rate": 3.338982533704284e-07, |
|
"loss": 1.6322, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.41745940853759905, |
|
"grad_norm": 2.4572073331823843, |
|
"learning_rate": 3.3339402362408803e-07, |
|
"loss": 1.6818, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.4181982747474001, |
|
"grad_norm": 1.4690103698141457, |
|
"learning_rate": 3.32889479236535e-07, |
|
"loss": 1.6734, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.4189371409572012, |
|
"grad_norm": 1.4525562290767953, |
|
"learning_rate": 3.323846229263772e-07, |
|
"loss": 1.6777, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.4196760071670022, |
|
"grad_norm": 1.6088576080590102, |
|
"learning_rate": 3.318794574139033e-07, |
|
"loss": 1.6815, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.4204148733768033, |
|
"grad_norm": 1.658735344378412, |
|
"learning_rate": 3.3137398542106816e-07, |
|
"loss": 1.7156, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.42115373958660435, |
|
"grad_norm": 1.856711421074202, |
|
"learning_rate": 3.308682096714777e-07, |
|
"loss": 1.6056, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.4218926057964054, |
|
"grad_norm": 1.524820866790581, |
|
"learning_rate": 3.3036213289037494e-07, |
|
"loss": 1.653, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.4226314720062065, |
|
"grad_norm": 2.091225075765613, |
|
"learning_rate": 3.298557578046248e-07, |
|
"loss": 1.6344, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.42337033821600756, |
|
"grad_norm": 1.5873899994137428, |
|
"learning_rate": 3.2934908714269926e-07, |
|
"loss": 1.7056, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.4241092044258086, |
|
"grad_norm": 1.530785170405434, |
|
"learning_rate": 3.2884212363466336e-07, |
|
"loss": 1.6592, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.42484807063560964, |
|
"grad_norm": 1.4187769683759475, |
|
"learning_rate": 3.283348700121599e-07, |
|
"loss": 1.6155, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.4255869368454107, |
|
"grad_norm": 1.7098484503844666, |
|
"learning_rate": 3.278273290083948e-07, |
|
"loss": 1.6145, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.4263258030552118, |
|
"grad_norm": 1.6337855300981592, |
|
"learning_rate": 3.2731950335812245e-07, |
|
"loss": 1.6718, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.42706466926501285, |
|
"grad_norm": 1.562376692174843, |
|
"learning_rate": 3.2681139579763116e-07, |
|
"loss": 1.6299, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.4278035354748139, |
|
"grad_norm": 1.7830680336877842, |
|
"learning_rate": 3.263030090647282e-07, |
|
"loss": 1.6427, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.42854240168461494, |
|
"grad_norm": 1.67004917671626, |
|
"learning_rate": 3.2579434589872487e-07, |
|
"loss": 1.6645, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.429281267894416, |
|
"grad_norm": 1.6704228734275928, |
|
"learning_rate": 3.2528540904042226e-07, |
|
"loss": 1.6427, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.4300201341042171, |
|
"grad_norm": 1.4195450351330696, |
|
"learning_rate": 3.24776201232096e-07, |
|
"loss": 1.62, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.43075900031401815, |
|
"grad_norm": 1.5285023969215334, |
|
"learning_rate": 3.242667252174816e-07, |
|
"loss": 1.6654, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.4314978665238192, |
|
"grad_norm": 1.4602369388272751, |
|
"learning_rate": 3.2375698374176e-07, |
|
"loss": 1.6073, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.43223673273362023, |
|
"grad_norm": 1.4791375841387864, |
|
"learning_rate": 3.232469795515423e-07, |
|
"loss": 1.6277, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.4329755989434213, |
|
"grad_norm": 1.4365509577307647, |
|
"learning_rate": 3.227367153948551e-07, |
|
"loss": 1.6678, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.4337144651532224, |
|
"grad_norm": 1.4925933032216425, |
|
"learning_rate": 3.22226194021126e-07, |
|
"loss": 1.6138, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.43445333136302344, |
|
"grad_norm": 1.5965165214882902, |
|
"learning_rate": 3.2171541818116844e-07, |
|
"loss": 1.682, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.4351921975728245, |
|
"grad_norm": 1.622561586319955, |
|
"learning_rate": 3.2120439062716673e-07, |
|
"loss": 1.6685, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.4359310637826256, |
|
"grad_norm": 1.5068996818021825, |
|
"learning_rate": 3.206931141126622e-07, |
|
"loss": 1.6353, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.4366699299924266, |
|
"grad_norm": 1.5980487695346257, |
|
"learning_rate": 3.2018159139253667e-07, |
|
"loss": 1.6442, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.43740879620222767, |
|
"grad_norm": 1.9446682447819341, |
|
"learning_rate": 3.1966982522299927e-07, |
|
"loss": 1.6215, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.43814766241202874, |
|
"grad_norm": 1.3911283325778476, |
|
"learning_rate": 3.1915781836157076e-07, |
|
"loss": 1.6237, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.4388865286218298, |
|
"grad_norm": 1.7379788181506113, |
|
"learning_rate": 3.1864557356706854e-07, |
|
"loss": 1.6311, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.4396253948316309, |
|
"grad_norm": 1.5960691894661032, |
|
"learning_rate": 3.181330935995925e-07, |
|
"loss": 1.6967, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.44036426104143195, |
|
"grad_norm": 1.334622875404918, |
|
"learning_rate": 3.176203812205092e-07, |
|
"loss": 1.7151, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.44110312725123296, |
|
"grad_norm": 2.3408851593313287, |
|
"learning_rate": 3.171074391924379e-07, |
|
"loss": 1.6204, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.44184199346103403, |
|
"grad_norm": 1.517416691835459, |
|
"learning_rate": 3.16594270279235e-07, |
|
"loss": 1.647, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.4425808596708351, |
|
"grad_norm": 1.732092967222855, |
|
"learning_rate": 3.160808772459796e-07, |
|
"loss": 1.6246, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.4433197258806362, |
|
"grad_norm": 1.4748895033828555, |
|
"learning_rate": 3.155672628589582e-07, |
|
"loss": 1.6559, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.44405859209043724, |
|
"grad_norm": 1.466688995230755, |
|
"learning_rate": 3.1505342988565024e-07, |
|
"loss": 1.6631, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.44479745830023826, |
|
"grad_norm": 1.5762348950247518, |
|
"learning_rate": 3.145393810947129e-07, |
|
"loss": 1.6507, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.4455363245100393, |
|
"grad_norm": 1.5705066014221254, |
|
"learning_rate": 3.1402511925596604e-07, |
|
"loss": 1.6218, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.4462751907198404, |
|
"grad_norm": 1.5033544192166477, |
|
"learning_rate": 3.135106471403778e-07, |
|
"loss": 1.6645, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.44701405692964147, |
|
"grad_norm": 1.8660368037827004, |
|
"learning_rate": 3.1299596752004884e-07, |
|
"loss": 1.6617, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.44775292313944254, |
|
"grad_norm": 1.6278625709035912, |
|
"learning_rate": 3.124810831681987e-07, |
|
"loss": 1.6383, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.4484917893492436, |
|
"grad_norm": 1.6698134882051106, |
|
"learning_rate": 3.1196599685914916e-07, |
|
"loss": 1.6691, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.4492306555590446, |
|
"grad_norm": 1.5877476217951574, |
|
"learning_rate": 3.114507113683109e-07, |
|
"loss": 1.6091, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.4499695217688457, |
|
"grad_norm": 1.533714449161249, |
|
"learning_rate": 3.109352294721674e-07, |
|
"loss": 1.6721, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.45070838797864676, |
|
"grad_norm": 1.415779061176635, |
|
"learning_rate": 3.104195539482607e-07, |
|
"loss": 1.606, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.45144725418844783, |
|
"grad_norm": 1.4338589085273825, |
|
"learning_rate": 3.0990368757517605e-07, |
|
"loss": 1.6661, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.4521861203982489, |
|
"grad_norm": 1.8998339669584823, |
|
"learning_rate": 3.093876331325269e-07, |
|
"loss": 1.609, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.45292498660805, |
|
"grad_norm": 1.384458068102408, |
|
"learning_rate": 3.0889720974519455e-07, |
|
"loss": 1.6454, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.453663852817851, |
|
"grad_norm": 1.4452081009096462, |
|
"learning_rate": 3.083807965655827e-07, |
|
"loss": 1.6452, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.45440271902765206, |
|
"grad_norm": 1.5698647385968285, |
|
"learning_rate": 3.0786420352211376e-07, |
|
"loss": 1.6741, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.45514158523745313, |
|
"grad_norm": 1.9552580205602894, |
|
"learning_rate": 3.0734743339831694e-07, |
|
"loss": 1.6845, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.4558804514472542, |
|
"grad_norm": 1.3583889408096808, |
|
"learning_rate": 3.068304889786754e-07, |
|
"loss": 1.6744, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.45661931765705527, |
|
"grad_norm": 1.6780668319449847, |
|
"learning_rate": 3.063133730486116e-07, |
|
"loss": 1.6258, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.4573581838668563, |
|
"grad_norm": 1.627173946323959, |
|
"learning_rate": 3.057960883944719e-07, |
|
"loss": 1.6198, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.45809705007665735, |
|
"grad_norm": 1.3800453841054778, |
|
"learning_rate": 3.0527863780351194e-07, |
|
"loss": 1.6268, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.4588359162864584, |
|
"grad_norm": 1.5516028071383072, |
|
"learning_rate": 3.047610240638816e-07, |
|
"loss": 1.679, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.4595747824962595, |
|
"grad_norm": 1.546230302013408, |
|
"learning_rate": 3.0424324996460955e-07, |
|
"loss": 1.6234, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.46031364870606056, |
|
"grad_norm": 1.5739393391599368, |
|
"learning_rate": 3.037253182955887e-07, |
|
"loss": 1.703, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.46105251491586163, |
|
"grad_norm": 1.5792552039289542, |
|
"learning_rate": 3.0320723184756095e-07, |
|
"loss": 1.6453, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.46179138112566265, |
|
"grad_norm": 1.5239329095833032, |
|
"learning_rate": 3.026889934121023e-07, |
|
"loss": 1.6553, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.4625302473354637, |
|
"grad_norm": 1.4558048272931619, |
|
"learning_rate": 3.021706057816074e-07, |
|
"loss": 1.6563, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.4632691135452648, |
|
"grad_norm": 1.5801820167249694, |
|
"learning_rate": 3.0165207174927513e-07, |
|
"loss": 1.6645, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.46400797975506586, |
|
"grad_norm": 1.5560547577828236, |
|
"learning_rate": 3.01133394109093e-07, |
|
"loss": 1.6596, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.46474684596486693, |
|
"grad_norm": 1.6818881647492323, |
|
"learning_rate": 3.006145756558223e-07, |
|
"loss": 1.6335, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.465485712174668, |
|
"grad_norm": 1.6120666995517767, |
|
"learning_rate": 3.0009561918498335e-07, |
|
"loss": 1.6685, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.466224578384469, |
|
"grad_norm": 1.4949729602626867, |
|
"learning_rate": 2.995765274928398e-07, |
|
"loss": 1.6753, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.4669634445942701, |
|
"grad_norm": 1.5289962949889762, |
|
"learning_rate": 2.9905730337638395e-07, |
|
"loss": 1.6548, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.46770231080407115, |
|
"grad_norm": 1.8299373423521412, |
|
"learning_rate": 2.98537949633322e-07, |
|
"loss": 1.5999, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.4684411770138722, |
|
"grad_norm": 1.5948007806430553, |
|
"learning_rate": 2.9801846906205794e-07, |
|
"loss": 1.6638, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.4691800432236733, |
|
"grad_norm": 1.418583561219425, |
|
"learning_rate": 2.974988644616799e-07, |
|
"loss": 1.6782, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.4699189094334743, |
|
"grad_norm": 1.461318006445296, |
|
"learning_rate": 2.9700512775939907e-07, |
|
"loss": 1.6528, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.4706577756432754, |
|
"grad_norm": 1.5468327583259127, |
|
"learning_rate": 2.964852893556419e-07, |
|
"loss": 1.6685, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.47139664185307645, |
|
"grad_norm": 1.6470459204833447, |
|
"learning_rate": 2.9596533518391615e-07, |
|
"loss": 1.6733, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.4721355080628775, |
|
"grad_norm": 1.624503313092944, |
|
"learning_rate": 2.954452680458612e-07, |
|
"loss": 1.6737, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.4728743742726786, |
|
"grad_norm": 1.5728828027087576, |
|
"learning_rate": 2.949250907437256e-07, |
|
"loss": 1.6671, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.47361324048247966, |
|
"grad_norm": 1.679151732155206, |
|
"learning_rate": 2.944048060803512e-07, |
|
"loss": 1.656, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.4743521066922807, |
|
"grad_norm": 1.4259988112675113, |
|
"learning_rate": 2.938844168591584e-07, |
|
"loss": 1.6088, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.47509097290208174, |
|
"grad_norm": 2.10422922646524, |
|
"learning_rate": 2.933639258841309e-07, |
|
"loss": 1.6411, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 0.4758298391118828, |
|
"grad_norm": 1.809412517307293, |
|
"learning_rate": 2.92843335959801e-07, |
|
"loss": 1.654, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 0.4765687053216839, |
|
"grad_norm": 1.6010915209622532, |
|
"learning_rate": 2.923226498912336e-07, |
|
"loss": 1.6653, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.47730757153148495, |
|
"grad_norm": 1.7399335136485357, |
|
"learning_rate": 2.918018704840123e-07, |
|
"loss": 1.6839, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 0.478046437741286, |
|
"grad_norm": 1.9845153410774579, |
|
"learning_rate": 2.912810005442231e-07, |
|
"loss": 1.6308, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 0.47878530395108704, |
|
"grad_norm": 1.4672730941447367, |
|
"learning_rate": 2.9076004287844007e-07, |
|
"loss": 1.7158, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.4795241701608881, |
|
"grad_norm": 1.3537458462825016, |
|
"learning_rate": 2.9023900029371e-07, |
|
"loss": 1.5888, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 0.4802630363706892, |
|
"grad_norm": 1.585460577335508, |
|
"learning_rate": 2.8971787559753695e-07, |
|
"loss": 1.6476, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.48100190258049025, |
|
"grad_norm": 1.561928549919643, |
|
"learning_rate": 2.891966715978679e-07, |
|
"loss": 1.6339, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.4817407687902913, |
|
"grad_norm": 1.439464952580829, |
|
"learning_rate": 2.886753911030767e-07, |
|
"loss": 1.6619, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 0.48247963500009233, |
|
"grad_norm": 1.5693967956885457, |
|
"learning_rate": 2.8815403692194954e-07, |
|
"loss": 1.6443, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 0.4832185012098934, |
|
"grad_norm": 1.8445144793183739, |
|
"learning_rate": 2.8763261186366977e-07, |
|
"loss": 1.6395, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.4839573674196945, |
|
"grad_norm": 1.4215590880054088, |
|
"learning_rate": 2.8711111873780224e-07, |
|
"loss": 1.6583, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.48469623362949554, |
|
"grad_norm": 1.6129407222161285, |
|
"learning_rate": 2.8658956035427917e-07, |
|
"loss": 1.6579, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.4854350998392966, |
|
"grad_norm": 1.7787904262576621, |
|
"learning_rate": 2.8606793952338394e-07, |
|
"loss": 1.6387, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 0.4861739660490977, |
|
"grad_norm": 2.9317837538381384, |
|
"learning_rate": 2.8554625905573646e-07, |
|
"loss": 1.6258, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 0.4869128322588987, |
|
"grad_norm": 1.6449106895888608, |
|
"learning_rate": 2.850245217622784e-07, |
|
"loss": 1.6492, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 0.48765169846869977, |
|
"grad_norm": 1.5321621721627146, |
|
"learning_rate": 2.8450273045425677e-07, |
|
"loss": 1.6456, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.48839056467850084, |
|
"grad_norm": 1.5327848701302575, |
|
"learning_rate": 2.8398088794321054e-07, |
|
"loss": 1.6299, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 0.4891294308883019, |
|
"grad_norm": 1.5262317315528862, |
|
"learning_rate": 2.8345899704095424e-07, |
|
"loss": 1.6815, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 0.489868297098103, |
|
"grad_norm": 8.056093277940944, |
|
"learning_rate": 2.8293706055956266e-07, |
|
"loss": 1.6196, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 0.49060716330790405, |
|
"grad_norm": 1.7903474479157373, |
|
"learning_rate": 2.8241508131135704e-07, |
|
"loss": 1.6748, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 0.49134602951770506, |
|
"grad_norm": 2.3280755640085857, |
|
"learning_rate": 2.818930621088883e-07, |
|
"loss": 1.674, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.49208489572750613, |
|
"grad_norm": 1.7132266058410768, |
|
"learning_rate": 2.8137100576492324e-07, |
|
"loss": 1.6407, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.4928237619373072, |
|
"grad_norm": 1.652779406776925, |
|
"learning_rate": 2.808489150924283e-07, |
|
"loss": 1.6672, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 0.4935626281471083, |
|
"grad_norm": 1.597072673714322, |
|
"learning_rate": 2.8032679290455525e-07, |
|
"loss": 1.6326, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 0.49430149435690934, |
|
"grad_norm": 1.483890002284729, |
|
"learning_rate": 2.798046420146254e-07, |
|
"loss": 1.6953, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 0.49504036056671036, |
|
"grad_norm": 1.5673926854706393, |
|
"learning_rate": 2.792824652361149e-07, |
|
"loss": 1.6348, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.49577922677651143, |
|
"grad_norm": 1.3752789014048936, |
|
"learning_rate": 2.7876026538263935e-07, |
|
"loss": 1.6333, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 0.4965180929863125, |
|
"grad_norm": 1.439519752453901, |
|
"learning_rate": 2.7823804526793863e-07, |
|
"loss": 1.6322, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.49725695919611357, |
|
"grad_norm": 1.6858659909371638, |
|
"learning_rate": 2.777158077058619e-07, |
|
"loss": 1.6087, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 0.49799582540591464, |
|
"grad_norm": 1.475020677300443, |
|
"learning_rate": 2.771935555103521e-07, |
|
"loss": 1.6085, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 0.4987346916157157, |
|
"grad_norm": 1.5498271971579036, |
|
"learning_rate": 2.766712914954314e-07, |
|
"loss": 1.6546, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.4994735578255167, |
|
"grad_norm": 2.096090843883931, |
|
"learning_rate": 2.7614901847518525e-07, |
|
"loss": 1.6812, |
|
"step": 13520 |
|
}, |
|
{ |
|
"epoch": 0.5002124240353178, |
|
"grad_norm": 1.4457832913454574, |
|
"learning_rate": 2.756267392637479e-07, |
|
"loss": 1.6581, |
|
"step": 13540 |
|
}, |
|
{ |
|
"epoch": 0.5009512902451189, |
|
"grad_norm": 2.01817520318154, |
|
"learning_rate": 2.751044566752869e-07, |
|
"loss": 1.6615, |
|
"step": 13560 |
|
}, |
|
{ |
|
"epoch": 0.5016901564549199, |
|
"grad_norm": 1.4227402127659055, |
|
"learning_rate": 2.745821735239878e-07, |
|
"loss": 1.6324, |
|
"step": 13580 |
|
}, |
|
{ |
|
"epoch": 0.502429022664721, |
|
"grad_norm": 1.8405513240063371, |
|
"learning_rate": 2.7405989262403955e-07, |
|
"loss": 1.6698, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.503167888874522, |
|
"grad_norm": 1.4788179775173926, |
|
"learning_rate": 2.7353761678961865e-07, |
|
"loss": 1.6359, |
|
"step": 13620 |
|
}, |
|
{ |
|
"epoch": 0.5039067550843231, |
|
"grad_norm": 1.7223731354636942, |
|
"learning_rate": 2.730153488348744e-07, |
|
"loss": 1.6306, |
|
"step": 13640 |
|
}, |
|
{ |
|
"epoch": 0.5046456212941242, |
|
"grad_norm": 2.5321925077821406, |
|
"learning_rate": 2.724930915739137e-07, |
|
"loss": 1.6752, |
|
"step": 13660 |
|
}, |
|
{ |
|
"epoch": 0.5053844875039252, |
|
"grad_norm": 1.5208216957527443, |
|
"learning_rate": 2.7197084782078585e-07, |
|
"loss": 1.6439, |
|
"step": 13680 |
|
}, |
|
{ |
|
"epoch": 0.5061233537137263, |
|
"grad_norm": 1.4323741561095633, |
|
"learning_rate": 2.7144862038946716e-07, |
|
"loss": 1.644, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.5068622199235273, |
|
"grad_norm": 1.426194444263622, |
|
"learning_rate": 2.709264120938464e-07, |
|
"loss": 1.6383, |
|
"step": 13720 |
|
}, |
|
{ |
|
"epoch": 0.5076010861333284, |
|
"grad_norm": 1.9190094996790648, |
|
"learning_rate": 2.7040422574770866e-07, |
|
"loss": 1.6015, |
|
"step": 13740 |
|
}, |
|
{ |
|
"epoch": 0.5083399523431295, |
|
"grad_norm": 1.5070566631142777, |
|
"learning_rate": 2.698820641647212e-07, |
|
"loss": 1.6841, |
|
"step": 13760 |
|
}, |
|
{ |
|
"epoch": 0.5090788185529306, |
|
"grad_norm": 1.9970969408548236, |
|
"learning_rate": 2.693599301584179e-07, |
|
"loss": 1.6346, |
|
"step": 13780 |
|
}, |
|
{ |
|
"epoch": 0.5098176847627316, |
|
"grad_norm": 1.683784538174349, |
|
"learning_rate": 2.688378265421837e-07, |
|
"loss": 1.6829, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.5105565509725326, |
|
"grad_norm": 1.7421711729558282, |
|
"learning_rate": 2.683157561292399e-07, |
|
"loss": 1.626, |
|
"step": 13820 |
|
}, |
|
{ |
|
"epoch": 0.5112954171823337, |
|
"grad_norm": 1.6638975974760875, |
|
"learning_rate": 2.6779372173262917e-07, |
|
"loss": 1.6847, |
|
"step": 13840 |
|
}, |
|
{ |
|
"epoch": 0.5120342833921347, |
|
"grad_norm": 1.7300243765637946, |
|
"learning_rate": 2.672717261651998e-07, |
|
"loss": 1.6635, |
|
"step": 13860 |
|
}, |
|
{ |
|
"epoch": 0.5127731496019359, |
|
"grad_norm": 1.7350443481000342, |
|
"learning_rate": 2.667497722395909e-07, |
|
"loss": 1.6648, |
|
"step": 13880 |
|
}, |
|
{ |
|
"epoch": 0.5135120158117369, |
|
"grad_norm": 1.8257677624748465, |
|
"learning_rate": 2.662278627682172e-07, |
|
"loss": 1.642, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.5142508820215379, |
|
"grad_norm": 1.7828372493231617, |
|
"learning_rate": 2.657060005632543e-07, |
|
"loss": 1.6354, |
|
"step": 13920 |
|
}, |
|
{ |
|
"epoch": 0.514989748231339, |
|
"grad_norm": 1.4463498826235905, |
|
"learning_rate": 2.6518418843662256e-07, |
|
"loss": 1.6342, |
|
"step": 13940 |
|
}, |
|
{ |
|
"epoch": 0.51572861444114, |
|
"grad_norm": 1.5876083742799603, |
|
"learning_rate": 2.6466242919997263e-07, |
|
"loss": 1.6541, |
|
"step": 13960 |
|
}, |
|
{ |
|
"epoch": 0.5164674806509412, |
|
"grad_norm": 1.4658443332943762, |
|
"learning_rate": 2.641407256646705e-07, |
|
"loss": 1.6865, |
|
"step": 13980 |
|
}, |
|
{ |
|
"epoch": 0.5172063468607422, |
|
"grad_norm": 1.3991873689568013, |
|
"learning_rate": 2.636190806417817e-07, |
|
"loss": 1.6322, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.5179452130705432, |
|
"grad_norm": 2.1443694620412823, |
|
"learning_rate": 2.6309749694205643e-07, |
|
"loss": 1.6337, |
|
"step": 14020 |
|
}, |
|
{ |
|
"epoch": 0.5186840792803443, |
|
"grad_norm": 1.8812922050974208, |
|
"learning_rate": 2.6257597737591484e-07, |
|
"loss": 1.6003, |
|
"step": 14040 |
|
}, |
|
{ |
|
"epoch": 0.5194229454901453, |
|
"grad_norm": 1.4849904179267404, |
|
"learning_rate": 2.6205452475343135e-07, |
|
"loss": 1.6554, |
|
"step": 14060 |
|
}, |
|
{ |
|
"epoch": 0.5201618116999465, |
|
"grad_norm": 1.5710794059095268, |
|
"learning_rate": 2.6153314188431934e-07, |
|
"loss": 1.6585, |
|
"step": 14080 |
|
}, |
|
{ |
|
"epoch": 0.5209006779097475, |
|
"grad_norm": 1.4300979250373247, |
|
"learning_rate": 2.6101183157791687e-07, |
|
"loss": 1.6266, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.5216395441195486, |
|
"grad_norm": 1.4201641845366786, |
|
"learning_rate": 2.604905966431707e-07, |
|
"loss": 1.6278, |
|
"step": 14120 |
|
}, |
|
{ |
|
"epoch": 0.5223784103293496, |
|
"grad_norm": 1.4634294685934828, |
|
"learning_rate": 2.5996943988862136e-07, |
|
"loss": 1.6575, |
|
"step": 14140 |
|
}, |
|
{ |
|
"epoch": 0.5231172765391506, |
|
"grad_norm": 1.5428372121996694, |
|
"learning_rate": 2.594483641223885e-07, |
|
"loss": 1.6751, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 0.5238561427489518, |
|
"grad_norm": 1.738164845435304, |
|
"learning_rate": 2.5892737215215507e-07, |
|
"loss": 1.6492, |
|
"step": 14180 |
|
}, |
|
{ |
|
"epoch": 0.5245950089587528, |
|
"grad_norm": 1.5256411770058975, |
|
"learning_rate": 2.584064667851527e-07, |
|
"loss": 1.6491, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.5253338751685539, |
|
"grad_norm": 2.0408240630415513, |
|
"learning_rate": 2.578856508281461e-07, |
|
"loss": 1.6424, |
|
"step": 14220 |
|
}, |
|
{ |
|
"epoch": 0.5260727413783549, |
|
"grad_norm": 1.5107852579348091, |
|
"learning_rate": 2.573649270874187e-07, |
|
"loss": 1.6575, |
|
"step": 14240 |
|
}, |
|
{ |
|
"epoch": 0.5268116075881559, |
|
"grad_norm": 1.606923866961281, |
|
"learning_rate": 2.568442983687567e-07, |
|
"loss": 1.6678, |
|
"step": 14260 |
|
}, |
|
{ |
|
"epoch": 0.527550473797957, |
|
"grad_norm": 1.86036331527246, |
|
"learning_rate": 2.5632376747743416e-07, |
|
"loss": 1.6611, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 0.5282893400077581, |
|
"grad_norm": 1.6282520348397496, |
|
"learning_rate": 2.5580333721819837e-07, |
|
"loss": 1.6887, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.5290282062175592, |
|
"grad_norm": 1.4902965967534727, |
|
"learning_rate": 2.5528301039525427e-07, |
|
"loss": 1.673, |
|
"step": 14320 |
|
}, |
|
{ |
|
"epoch": 0.5297670724273602, |
|
"grad_norm": 2.9289521410401607, |
|
"learning_rate": 2.547627898122493e-07, |
|
"loss": 1.618, |
|
"step": 14340 |
|
}, |
|
{ |
|
"epoch": 0.5305059386371612, |
|
"grad_norm": 1.5801255890460382, |
|
"learning_rate": 2.5424267827225884e-07, |
|
"loss": 1.6478, |
|
"step": 14360 |
|
}, |
|
{ |
|
"epoch": 0.5312448048469623, |
|
"grad_norm": 1.904222753922445, |
|
"learning_rate": 2.5372267857777017e-07, |
|
"loss": 1.6543, |
|
"step": 14380 |
|
}, |
|
{ |
|
"epoch": 0.5319836710567634, |
|
"grad_norm": 1.5136725876022765, |
|
"learning_rate": 2.532027935306684e-07, |
|
"loss": 1.658, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.5327225372665645, |
|
"grad_norm": 1.8648484080963088, |
|
"learning_rate": 2.5268302593222056e-07, |
|
"loss": 1.6279, |
|
"step": 14420 |
|
}, |
|
{ |
|
"epoch": 0.5334614034763655, |
|
"grad_norm": 1.4732933175166334, |
|
"learning_rate": 2.521633785830612e-07, |
|
"loss": 1.6535, |
|
"step": 14440 |
|
}, |
|
{ |
|
"epoch": 0.5342002696861666, |
|
"grad_norm": 1.7964137810644547, |
|
"learning_rate": 2.5164385428317656e-07, |
|
"loss": 1.6291, |
|
"step": 14460 |
|
}, |
|
{ |
|
"epoch": 0.5349391358959676, |
|
"grad_norm": 1.7384258178088878, |
|
"learning_rate": 2.5112445583189e-07, |
|
"loss": 1.6484, |
|
"step": 14480 |
|
}, |
|
{ |
|
"epoch": 0.5356780021057687, |
|
"grad_norm": 1.6118844731600752, |
|
"learning_rate": 2.506051860278469e-07, |
|
"loss": 1.6461, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.5364168683155698, |
|
"grad_norm": 1.612441861147252, |
|
"learning_rate": 2.500860476689993e-07, |
|
"loss": 1.6368, |
|
"step": 14520 |
|
}, |
|
{ |
|
"epoch": 0.5371557345253708, |
|
"grad_norm": 1.4719276982885592, |
|
"learning_rate": 2.4956704355259106e-07, |
|
"loss": 1.616, |
|
"step": 14540 |
|
}, |
|
{ |
|
"epoch": 0.5378946007351719, |
|
"grad_norm": 1.4849285106056183, |
|
"learning_rate": 2.4904817647514273e-07, |
|
"loss": 1.6467, |
|
"step": 14560 |
|
}, |
|
{ |
|
"epoch": 0.5386334669449729, |
|
"grad_norm": 2.0929018106610533, |
|
"learning_rate": 2.485294492324364e-07, |
|
"loss": 1.6517, |
|
"step": 14580 |
|
}, |
|
{ |
|
"epoch": 0.539372333154774, |
|
"grad_norm": 1.3910097740422103, |
|
"learning_rate": 2.480108646195006e-07, |
|
"loss": 1.6319, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.5401111993645751, |
|
"grad_norm": 1.8158803135234147, |
|
"learning_rate": 2.474924254305956e-07, |
|
"loss": 1.6902, |
|
"step": 14620 |
|
}, |
|
{ |
|
"epoch": 0.5408500655743761, |
|
"grad_norm": 1.6514040636762424, |
|
"learning_rate": 2.4697413445919785e-07, |
|
"loss": 1.6479, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 0.5415889317841772, |
|
"grad_norm": 1.5739603939688216, |
|
"learning_rate": 2.4645599449798536e-07, |
|
"loss": 1.639, |
|
"step": 14660 |
|
}, |
|
{ |
|
"epoch": 0.5423277979939782, |
|
"grad_norm": 1.5178753830207266, |
|
"learning_rate": 2.459380083388221e-07, |
|
"loss": 1.6235, |
|
"step": 14680 |
|
}, |
|
{ |
|
"epoch": 0.5430666642037792, |
|
"grad_norm": 1.52558838171546, |
|
"learning_rate": 2.4542017877274397e-07, |
|
"loss": 1.6835, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.5438055304135804, |
|
"grad_norm": 2.2408509501139533, |
|
"learning_rate": 2.4490250858994243e-07, |
|
"loss": 1.5869, |
|
"step": 14720 |
|
}, |
|
{ |
|
"epoch": 0.5445443966233814, |
|
"grad_norm": 1.6053244248684069, |
|
"learning_rate": 2.4438500057975043e-07, |
|
"loss": 1.6698, |
|
"step": 14740 |
|
}, |
|
{ |
|
"epoch": 0.5452832628331825, |
|
"grad_norm": 1.4975811830975623, |
|
"learning_rate": 2.4386765753062733e-07, |
|
"loss": 1.6337, |
|
"step": 14760 |
|
}, |
|
{ |
|
"epoch": 0.5460221290429835, |
|
"grad_norm": 1.4849817547603397, |
|
"learning_rate": 2.4335048223014316e-07, |
|
"loss": 1.6095, |
|
"step": 14780 |
|
}, |
|
{ |
|
"epoch": 0.5467609952527847, |
|
"grad_norm": 1.8454272427613772, |
|
"learning_rate": 2.4283347746496436e-07, |
|
"loss": 1.6191, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.5474998614625857, |
|
"grad_norm": 1.484721990845683, |
|
"learning_rate": 2.4231664602083857e-07, |
|
"loss": 1.6156, |
|
"step": 14820 |
|
}, |
|
{ |
|
"epoch": 0.5482387276723867, |
|
"grad_norm": 1.4970531164331227, |
|
"learning_rate": 2.4179999068257935e-07, |
|
"loss": 1.6903, |
|
"step": 14840 |
|
}, |
|
{ |
|
"epoch": 0.5489775938821878, |
|
"grad_norm": 1.60919652354879, |
|
"learning_rate": 2.412835142340513e-07, |
|
"loss": 1.6813, |
|
"step": 14860 |
|
}, |
|
{ |
|
"epoch": 0.5497164600919888, |
|
"grad_norm": 1.3606018353206684, |
|
"learning_rate": 2.4076721945815544e-07, |
|
"loss": 1.6769, |
|
"step": 14880 |
|
}, |
|
{ |
|
"epoch": 0.55045532630179, |
|
"grad_norm": 1.458693168765768, |
|
"learning_rate": 2.4025110913681355e-07, |
|
"loss": 1.6373, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.551194192511591, |
|
"grad_norm": 1.547291419668359, |
|
"learning_rate": 2.397351860509537e-07, |
|
"loss": 1.6525, |
|
"step": 14920 |
|
}, |
|
{ |
|
"epoch": 0.551933058721392, |
|
"grad_norm": 1.7224542921095407, |
|
"learning_rate": 2.392194529804951e-07, |
|
"loss": 1.6761, |
|
"step": 14940 |
|
}, |
|
{ |
|
"epoch": 0.5526719249311931, |
|
"grad_norm": 1.6677249547234672, |
|
"learning_rate": 2.38703912704333e-07, |
|
"loss": 1.625, |
|
"step": 14960 |
|
}, |
|
{ |
|
"epoch": 0.5534107911409941, |
|
"grad_norm": 1.4519952098563818, |
|
"learning_rate": 2.3818856800032395e-07, |
|
"loss": 1.6244, |
|
"step": 14980 |
|
}, |
|
{ |
|
"epoch": 0.5541496573507952, |
|
"grad_norm": 1.7967122495859562, |
|
"learning_rate": 2.3767342164527055e-07, |
|
"loss": 1.6719, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.5548885235605963, |
|
"grad_norm": 1.3751693238795433, |
|
"learning_rate": 2.3715847641490688e-07, |
|
"loss": 1.6397, |
|
"step": 15020 |
|
}, |
|
{ |
|
"epoch": 0.5556273897703973, |
|
"grad_norm": 1.5461207825297583, |
|
"learning_rate": 2.3664373508388318e-07, |
|
"loss": 1.6871, |
|
"step": 15040 |
|
}, |
|
{ |
|
"epoch": 0.5563662559801984, |
|
"grad_norm": 1.3729095610665938, |
|
"learning_rate": 2.3612920042575091e-07, |
|
"loss": 1.6568, |
|
"step": 15060 |
|
}, |
|
{ |
|
"epoch": 0.5571051221899994, |
|
"grad_norm": 1.5955595428086877, |
|
"learning_rate": 2.3561487521294814e-07, |
|
"loss": 1.6439, |
|
"step": 15080 |
|
}, |
|
{ |
|
"epoch": 0.5578439883998005, |
|
"grad_norm": 1.505255489966295, |
|
"learning_rate": 2.351007622167843e-07, |
|
"loss": 1.6114, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.5585828546096016, |
|
"grad_norm": 1.4629681148522744, |
|
"learning_rate": 2.3458686420742528e-07, |
|
"loss": 1.6114, |
|
"step": 15120 |
|
}, |
|
{ |
|
"epoch": 0.5593217208194027, |
|
"grad_norm": 1.7359961722060924, |
|
"learning_rate": 2.3407318395387875e-07, |
|
"loss": 1.6416, |
|
"step": 15140 |
|
}, |
|
{ |
|
"epoch": 0.5600605870292037, |
|
"grad_norm": 1.6390324621472498, |
|
"learning_rate": 2.3355972422397895e-07, |
|
"loss": 1.6625, |
|
"step": 15160 |
|
}, |
|
{ |
|
"epoch": 0.5607994532390047, |
|
"grad_norm": 1.7925619507510513, |
|
"learning_rate": 2.3304648778437175e-07, |
|
"loss": 1.6822, |
|
"step": 15180 |
|
}, |
|
{ |
|
"epoch": 0.5615383194488058, |
|
"grad_norm": 1.6256712121515025, |
|
"learning_rate": 2.3253347740050012e-07, |
|
"loss": 1.6793, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.5622771856586068, |
|
"grad_norm": 1.6887168187109596, |
|
"learning_rate": 2.3202069583658883e-07, |
|
"loss": 1.6403, |
|
"step": 15220 |
|
}, |
|
{ |
|
"epoch": 0.563016051868408, |
|
"grad_norm": 1.4622893380793243, |
|
"learning_rate": 2.3150814585562984e-07, |
|
"loss": 1.6256, |
|
"step": 15240 |
|
}, |
|
{ |
|
"epoch": 0.563754918078209, |
|
"grad_norm": 1.720681049824639, |
|
"learning_rate": 2.3099583021936703e-07, |
|
"loss": 1.6331, |
|
"step": 15260 |
|
}, |
|
{ |
|
"epoch": 0.56449378428801, |
|
"grad_norm": 1.6844323896773028, |
|
"learning_rate": 2.3048375168828194e-07, |
|
"loss": 1.6249, |
|
"step": 15280 |
|
}, |
|
{ |
|
"epoch": 0.5652326504978111, |
|
"grad_norm": 1.4304416297000766, |
|
"learning_rate": 2.2997191302157831e-07, |
|
"loss": 1.6476, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.5659715167076121, |
|
"grad_norm": 2.6747036703519966, |
|
"learning_rate": 2.2946031697716728e-07, |
|
"loss": 1.6704, |
|
"step": 15320 |
|
}, |
|
{ |
|
"epoch": 0.5667103829174133, |
|
"grad_norm": 1.8934913018327109, |
|
"learning_rate": 2.2894896631165312e-07, |
|
"loss": 1.6557, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 0.5674492491272143, |
|
"grad_norm": 1.5864443521535418, |
|
"learning_rate": 2.2843786378031749e-07, |
|
"loss": 1.6111, |
|
"step": 15360 |
|
}, |
|
{ |
|
"epoch": 0.5681881153370153, |
|
"grad_norm": 1.6147764207744268, |
|
"learning_rate": 2.279270121371053e-07, |
|
"loss": 1.6617, |
|
"step": 15380 |
|
}, |
|
{ |
|
"epoch": 0.5689269815468164, |
|
"grad_norm": 1.5889401903281988, |
|
"learning_rate": 2.274164141346096e-07, |
|
"loss": 1.6472, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.5696658477566174, |
|
"grad_norm": 1.8322046948313095, |
|
"learning_rate": 2.2690607252405664e-07, |
|
"loss": 1.681, |
|
"step": 15420 |
|
}, |
|
{ |
|
"epoch": 0.5704047139664186, |
|
"grad_norm": 1.319095874026253, |
|
"learning_rate": 2.2639599005529124e-07, |
|
"loss": 1.6339, |
|
"step": 15440 |
|
}, |
|
{ |
|
"epoch": 0.5711435801762196, |
|
"grad_norm": 1.568413450074265, |
|
"learning_rate": 2.258861694767619e-07, |
|
"loss": 1.6385, |
|
"step": 15460 |
|
}, |
|
{ |
|
"epoch": 0.5718824463860207, |
|
"grad_norm": 1.659163649600049, |
|
"learning_rate": 2.2537661353550603e-07, |
|
"loss": 1.6292, |
|
"step": 15480 |
|
}, |
|
{ |
|
"epoch": 0.5726213125958217, |
|
"grad_norm": 1.484851792665619, |
|
"learning_rate": 2.2486732497713507e-07, |
|
"loss": 1.6887, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.5733601788056227, |
|
"grad_norm": 1.609907878598695, |
|
"learning_rate": 2.2435830654581962e-07, |
|
"loss": 1.6266, |
|
"step": 15520 |
|
}, |
|
{ |
|
"epoch": 0.5740990450154239, |
|
"grad_norm": 1.4453575034227937, |
|
"learning_rate": 2.2387499173937125e-07, |
|
"loss": 1.6537, |
|
"step": 15540 |
|
}, |
|
{ |
|
"epoch": 0.5748379112252249, |
|
"grad_norm": 1.7710876217433056, |
|
"learning_rate": 2.2336650794320994e-07, |
|
"loss": 1.6588, |
|
"step": 15560 |
|
}, |
|
{ |
|
"epoch": 0.575576777435026, |
|
"grad_norm": 1.4085011499137292, |
|
"learning_rate": 2.2285830236087167e-07, |
|
"loss": 1.6293, |
|
"step": 15580 |
|
}, |
|
{ |
|
"epoch": 0.576315643644827, |
|
"grad_norm": 1.4053148152524308, |
|
"learning_rate": 2.2235037773069188e-07, |
|
"loss": 1.629, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.577054509854628, |
|
"grad_norm": 1.456136317052379, |
|
"learning_rate": 2.2184273678949212e-07, |
|
"loss": 1.6448, |
|
"step": 15620 |
|
}, |
|
{ |
|
"epoch": 0.5777933760644292, |
|
"grad_norm": 1.5709035364905237, |
|
"learning_rate": 2.213353822725652e-07, |
|
"loss": 1.6556, |
|
"step": 15640 |
|
}, |
|
{ |
|
"epoch": 0.5785322422742302, |
|
"grad_norm": 2.381482655936729, |
|
"learning_rate": 2.2082831691366104e-07, |
|
"loss": 1.6298, |
|
"step": 15660 |
|
}, |
|
{ |
|
"epoch": 0.5792711084840313, |
|
"grad_norm": 1.510088899026219, |
|
"learning_rate": 2.2032154344497096e-07, |
|
"loss": 1.69, |
|
"step": 15680 |
|
}, |
|
{ |
|
"epoch": 0.5800099746938323, |
|
"grad_norm": 1.4208293328335637, |
|
"learning_rate": 2.198150645971138e-07, |
|
"loss": 1.6533, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.5807488409036333, |
|
"grad_norm": 1.5394108559637645, |
|
"learning_rate": 2.1930888309912098e-07, |
|
"loss": 1.6145, |
|
"step": 15720 |
|
}, |
|
{ |
|
"epoch": 0.5814877071134344, |
|
"grad_norm": 1.8494498268185677, |
|
"learning_rate": 2.188030016784216e-07, |
|
"loss": 1.6262, |
|
"step": 15740 |
|
}, |
|
{ |
|
"epoch": 0.5822265733232355, |
|
"grad_norm": 2.390942191221342, |
|
"learning_rate": 2.1829742306082778e-07, |
|
"loss": 1.612, |
|
"step": 15760 |
|
}, |
|
{ |
|
"epoch": 0.5829654395330366, |
|
"grad_norm": 2.4364332149226446, |
|
"learning_rate": 2.1779214997052025e-07, |
|
"loss": 1.6548, |
|
"step": 15780 |
|
}, |
|
{ |
|
"epoch": 0.5837043057428376, |
|
"grad_norm": 1.7161768355514782, |
|
"learning_rate": 2.1728718513003342e-07, |
|
"loss": 1.6822, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.5844431719526387, |
|
"grad_norm": 1.6209379371159418, |
|
"learning_rate": 2.1678253126024072e-07, |
|
"loss": 1.6068, |
|
"step": 15820 |
|
}, |
|
{ |
|
"epoch": 0.5851820381624397, |
|
"grad_norm": 2.1623351366291725, |
|
"learning_rate": 2.1627819108034002e-07, |
|
"loss": 1.6138, |
|
"step": 15840 |
|
}, |
|
{ |
|
"epoch": 0.5859209043722408, |
|
"grad_norm": 1.3848518910214123, |
|
"learning_rate": 2.1577416730783904e-07, |
|
"loss": 1.6315, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 0.5866597705820419, |
|
"grad_norm": 1.377598599479366, |
|
"learning_rate": 2.1527046265854049e-07, |
|
"loss": 1.6263, |
|
"step": 15880 |
|
}, |
|
{ |
|
"epoch": 0.5873986367918429, |
|
"grad_norm": 1.5951258889353628, |
|
"learning_rate": 2.1476707984652764e-07, |
|
"loss": 1.6442, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.588137503001644, |
|
"grad_norm": 1.4119428291190372, |
|
"learning_rate": 2.1426402158414964e-07, |
|
"loss": 1.6776, |
|
"step": 15920 |
|
}, |
|
{ |
|
"epoch": 0.588876369211445, |
|
"grad_norm": 1.5401792838637114, |
|
"learning_rate": 2.1376129058200687e-07, |
|
"loss": 1.6489, |
|
"step": 15940 |
|
}, |
|
{ |
|
"epoch": 0.589615235421246, |
|
"grad_norm": 1.603780373356476, |
|
"learning_rate": 2.1325888954893618e-07, |
|
"loss": 1.6525, |
|
"step": 15960 |
|
}, |
|
{ |
|
"epoch": 0.5903541016310472, |
|
"grad_norm": 1.5200619012123444, |
|
"learning_rate": 2.1275682119199674e-07, |
|
"loss": 1.6103, |
|
"step": 15980 |
|
}, |
|
{ |
|
"epoch": 0.5910929678408482, |
|
"grad_norm": 2.1303907208230637, |
|
"learning_rate": 2.122550882164552e-07, |
|
"loss": 1.6515, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.5918318340506493, |
|
"grad_norm": 1.4309458414094776, |
|
"learning_rate": 2.1175369332577075e-07, |
|
"loss": 1.6476, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 0.5925707002604503, |
|
"grad_norm": 1.3885096209200305, |
|
"learning_rate": 2.112526392215811e-07, |
|
"loss": 1.6161, |
|
"step": 16040 |
|
}, |
|
{ |
|
"epoch": 0.5933095664702513, |
|
"grad_norm": 1.4639170589501997, |
|
"learning_rate": 2.107519286036879e-07, |
|
"loss": 1.6626, |
|
"step": 16060 |
|
}, |
|
{ |
|
"epoch": 0.5940484326800525, |
|
"grad_norm": 1.5413296048888148, |
|
"learning_rate": 2.102515641700417e-07, |
|
"loss": 1.7111, |
|
"step": 16080 |
|
}, |
|
{ |
|
"epoch": 0.5947872988898535, |
|
"grad_norm": 1.477261253181655, |
|
"learning_rate": 2.0975154861672782e-07, |
|
"loss": 1.6606, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.5955261650996546, |
|
"grad_norm": 1.484117052461405, |
|
"learning_rate": 2.0925188463795195e-07, |
|
"loss": 1.6587, |
|
"step": 16120 |
|
}, |
|
{ |
|
"epoch": 0.5962650313094556, |
|
"grad_norm": 1.492261770923395, |
|
"learning_rate": 2.0875257492602505e-07, |
|
"loss": 1.629, |
|
"step": 16140 |
|
}, |
|
{ |
|
"epoch": 0.5970038975192568, |
|
"grad_norm": 1.4469424063226348, |
|
"learning_rate": 2.082536221713494e-07, |
|
"loss": 1.6496, |
|
"step": 16160 |
|
}, |
|
{ |
|
"epoch": 0.5977427637290578, |
|
"grad_norm": 1.6092362505845061, |
|
"learning_rate": 2.07755029062404e-07, |
|
"loss": 1.6664, |
|
"step": 16180 |
|
}, |
|
{ |
|
"epoch": 0.5984816299388588, |
|
"grad_norm": 1.779958420465131, |
|
"learning_rate": 2.0725679828572983e-07, |
|
"loss": 1.6212, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.5992204961486599, |
|
"grad_norm": 2.256981377181274, |
|
"learning_rate": 2.0675893252591558e-07, |
|
"loss": 1.6603, |
|
"step": 16220 |
|
}, |
|
{ |
|
"epoch": 0.5999593623584609, |
|
"grad_norm": 1.4438145967369689, |
|
"learning_rate": 2.0626143446558313e-07, |
|
"loss": 1.7086, |
|
"step": 16240 |
|
}, |
|
{ |
|
"epoch": 0.600698228568262, |
|
"grad_norm": 1.4523681015745287, |
|
"learning_rate": 2.0576430678537314e-07, |
|
"loss": 1.6363, |
|
"step": 16260 |
|
}, |
|
{ |
|
"epoch": 0.6014370947780631, |
|
"grad_norm": 2.081965836536827, |
|
"learning_rate": 2.052675521639306e-07, |
|
"loss": 1.6525, |
|
"step": 16280 |
|
}, |
|
{ |
|
"epoch": 0.6021759609878641, |
|
"grad_norm": 1.641105539346371, |
|
"learning_rate": 2.0477117327789017e-07, |
|
"loss": 1.7219, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.6029148271976652, |
|
"grad_norm": 2.1960028742429887, |
|
"learning_rate": 2.0427517280186225e-07, |
|
"loss": 1.7079, |
|
"step": 16320 |
|
}, |
|
{ |
|
"epoch": 0.6036536934074662, |
|
"grad_norm": 1.421358868551972, |
|
"learning_rate": 2.0377955340841817e-07, |
|
"loss": 1.6494, |
|
"step": 16340 |
|
}, |
|
{ |
|
"epoch": 0.6043925596172673, |
|
"grad_norm": 1.4519180712299584, |
|
"learning_rate": 2.032843177680757e-07, |
|
"loss": 1.6497, |
|
"step": 16360 |
|
}, |
|
{ |
|
"epoch": 0.6051314258270684, |
|
"grad_norm": 1.4554186364319244, |
|
"learning_rate": 2.0278946854928512e-07, |
|
"loss": 1.6623, |
|
"step": 16380 |
|
}, |
|
{ |
|
"epoch": 0.6058702920368694, |
|
"grad_norm": 1.453630709571824, |
|
"learning_rate": 2.022950084184145e-07, |
|
"loss": 1.6481, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.6066091582466705, |
|
"grad_norm": 1.504491667770329, |
|
"learning_rate": 2.018009400397353e-07, |
|
"loss": 1.677, |
|
"step": 16420 |
|
}, |
|
{ |
|
"epoch": 0.6073480244564715, |
|
"grad_norm": 1.388924417705384, |
|
"learning_rate": 2.0130726607540828e-07, |
|
"loss": 1.6496, |
|
"step": 16440 |
|
}, |
|
{ |
|
"epoch": 0.6080868906662726, |
|
"grad_norm": 1.464940095501643, |
|
"learning_rate": 2.0081398918546882e-07, |
|
"loss": 1.6999, |
|
"step": 16460 |
|
}, |
|
{ |
|
"epoch": 0.6088257568760737, |
|
"grad_norm": 1.7055463049168984, |
|
"learning_rate": 2.0032111202781282e-07, |
|
"loss": 1.6249, |
|
"step": 16480 |
|
}, |
|
{ |
|
"epoch": 0.6095646230858748, |
|
"grad_norm": 1.6279220224411552, |
|
"learning_rate": 1.9982863725818267e-07, |
|
"loss": 1.6285, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.6103034892956758, |
|
"grad_norm": 2.0351245502127404, |
|
"learning_rate": 1.9933656753015204e-07, |
|
"loss": 1.6595, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 0.6110423555054768, |
|
"grad_norm": 2.018723900559302, |
|
"learning_rate": 1.9884490549511252e-07, |
|
"loss": 1.7325, |
|
"step": 16540 |
|
}, |
|
{ |
|
"epoch": 0.6117812217152779, |
|
"grad_norm": 1.4930972850593807, |
|
"learning_rate": 1.983782066004026e-07, |
|
"loss": 1.6739, |
|
"step": 16560 |
|
}, |
|
{ |
|
"epoch": 0.612520087925079, |
|
"grad_norm": 1.6719536221986355, |
|
"learning_rate": 1.9788734718442834e-07, |
|
"loss": 1.6453, |
|
"step": 16580 |
|
}, |
|
{ |
|
"epoch": 0.6132589541348801, |
|
"grad_norm": 1.5901664783269642, |
|
"learning_rate": 1.9739690327019692e-07, |
|
"loss": 1.6688, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.6139978203446811, |
|
"grad_norm": 1.5005389488409309, |
|
"learning_rate": 1.9693136881713379e-07, |
|
"loss": 1.6697, |
|
"step": 16620 |
|
}, |
|
{ |
|
"epoch": 0.6147366865544821, |
|
"grad_norm": 1.5857034959363703, |
|
"learning_rate": 1.9644174273011738e-07, |
|
"loss": 1.6639, |
|
"step": 16640 |
|
}, |
|
{ |
|
"epoch": 0.6154755527642832, |
|
"grad_norm": 1.8800052700521002, |
|
"learning_rate": 1.959525399341126e-07, |
|
"loss": 1.6406, |
|
"step": 16660 |
|
}, |
|
{ |
|
"epoch": 0.6162144189740842, |
|
"grad_norm": 1.5463318718925796, |
|
"learning_rate": 1.954637630650633e-07, |
|
"loss": 1.6456, |
|
"step": 16680 |
|
}, |
|
{ |
|
"epoch": 0.6169532851838854, |
|
"grad_norm": 1.7265411721417883, |
|
"learning_rate": 1.9497541475661822e-07, |
|
"loss": 1.6396, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.6176921513936864, |
|
"grad_norm": 1.6019332231293413, |
|
"learning_rate": 1.9448749764011674e-07, |
|
"loss": 1.6319, |
|
"step": 16720 |
|
}, |
|
{ |
|
"epoch": 0.6184310176034874, |
|
"grad_norm": 1.6078339500202126, |
|
"learning_rate": 1.940000143445753e-07, |
|
"loss": 1.6287, |
|
"step": 16740 |
|
}, |
|
{ |
|
"epoch": 0.6191698838132885, |
|
"grad_norm": 1.5200063311449286, |
|
"learning_rate": 1.9351296749667239e-07, |
|
"loss": 1.6556, |
|
"step": 16760 |
|
}, |
|
{ |
|
"epoch": 0.6199087500230895, |
|
"grad_norm": 1.5605900758303721, |
|
"learning_rate": 1.9302635972073504e-07, |
|
"loss": 1.6709, |
|
"step": 16780 |
|
}, |
|
{ |
|
"epoch": 0.6206476162328907, |
|
"grad_norm": 1.5245501861602075, |
|
"learning_rate": 1.9254019363872432e-07, |
|
"loss": 1.6744, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.6213864824426917, |
|
"grad_norm": 1.4527294863239084, |
|
"learning_rate": 1.9205447187022145e-07, |
|
"loss": 1.6564, |
|
"step": 16820 |
|
}, |
|
{ |
|
"epoch": 0.6221253486524928, |
|
"grad_norm": 2.0368137299260276, |
|
"learning_rate": 1.915691970324137e-07, |
|
"loss": 1.6289, |
|
"step": 16840 |
|
}, |
|
{ |
|
"epoch": 0.6228642148622938, |
|
"grad_norm": 2.2640348268112147, |
|
"learning_rate": 1.9108437174007967e-07, |
|
"loss": 1.667, |
|
"step": 16860 |
|
}, |
|
{ |
|
"epoch": 0.6236030810720948, |
|
"grad_norm": 1.4879411305430876, |
|
"learning_rate": 1.9059999860557635e-07, |
|
"loss": 1.6516, |
|
"step": 16880 |
|
}, |
|
{ |
|
"epoch": 0.624341947281896, |
|
"grad_norm": 1.99321589038771, |
|
"learning_rate": 1.9011608023882396e-07, |
|
"loss": 1.6617, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.625080813491697, |
|
"grad_norm": 1.4486992732108148, |
|
"learning_rate": 1.8963261924729247e-07, |
|
"loss": 1.6477, |
|
"step": 16920 |
|
}, |
|
{ |
|
"epoch": 0.6258196797014981, |
|
"grad_norm": 1.4436779823541692, |
|
"learning_rate": 1.8914961823598742e-07, |
|
"loss": 1.6276, |
|
"step": 16940 |
|
}, |
|
{ |
|
"epoch": 0.6265585459112991, |
|
"grad_norm": 1.7823515681610929, |
|
"learning_rate": 1.886670798074358e-07, |
|
"loss": 1.6722, |
|
"step": 16960 |
|
}, |
|
{ |
|
"epoch": 0.6272974121211001, |
|
"grad_norm": 1.4559994514082784, |
|
"learning_rate": 1.8818500656167198e-07, |
|
"loss": 1.6721, |
|
"step": 16980 |
|
}, |
|
{ |
|
"epoch": 0.6280362783309013, |
|
"grad_norm": 1.5502170823927217, |
|
"learning_rate": 1.8770340109622418e-07, |
|
"loss": 1.6468, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.6287751445407023, |
|
"grad_norm": 1.3693032988758314, |
|
"learning_rate": 1.8722226600609974e-07, |
|
"loss": 1.6503, |
|
"step": 17020 |
|
}, |
|
{ |
|
"epoch": 0.6295140107505034, |
|
"grad_norm": 1.8228163395950472, |
|
"learning_rate": 1.8674160388377174e-07, |
|
"loss": 1.6691, |
|
"step": 17040 |
|
}, |
|
{ |
|
"epoch": 0.6302528769603044, |
|
"grad_norm": 1.607512275964286, |
|
"learning_rate": 1.8626141731916446e-07, |
|
"loss": 1.6381, |
|
"step": 17060 |
|
}, |
|
{ |
|
"epoch": 0.6309917431701054, |
|
"grad_norm": 1.6555733853411483, |
|
"learning_rate": 1.8578170889964022e-07, |
|
"loss": 1.624, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 0.6317306093799065, |
|
"grad_norm": 1.4667357369050853, |
|
"learning_rate": 1.853024812099847e-07, |
|
"loss": 1.6233, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.6324694755897076, |
|
"grad_norm": 1.555065221242107, |
|
"learning_rate": 1.8482373683239316e-07, |
|
"loss": 1.6372, |
|
"step": 17120 |
|
}, |
|
{ |
|
"epoch": 0.6332083417995087, |
|
"grad_norm": 1.5169327799558363, |
|
"learning_rate": 1.8434547834645714e-07, |
|
"loss": 1.6738, |
|
"step": 17140 |
|
}, |
|
{ |
|
"epoch": 0.6339472080093097, |
|
"grad_norm": 1.419410682586359, |
|
"learning_rate": 1.8386770832914955e-07, |
|
"loss": 1.6677, |
|
"step": 17160 |
|
}, |
|
{ |
|
"epoch": 0.6346860742191108, |
|
"grad_norm": 1.6719841699284368, |
|
"learning_rate": 1.833904293548116e-07, |
|
"loss": 1.6821, |
|
"step": 17180 |
|
}, |
|
{ |
|
"epoch": 0.6354249404289118, |
|
"grad_norm": 1.5798183541162123, |
|
"learning_rate": 1.8291364399513864e-07, |
|
"loss": 1.7092, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.6361638066387129, |
|
"grad_norm": 1.4604030691233605, |
|
"learning_rate": 1.8243735481916611e-07, |
|
"loss": 1.662, |
|
"step": 17220 |
|
}, |
|
{ |
|
"epoch": 0.636902672848514, |
|
"grad_norm": 1.7774575653306484, |
|
"learning_rate": 1.8196156439325604e-07, |
|
"loss": 1.655, |
|
"step": 17240 |
|
}, |
|
{ |
|
"epoch": 0.637641539058315, |
|
"grad_norm": 2.062948052538768, |
|
"learning_rate": 1.8148627528108323e-07, |
|
"loss": 1.65, |
|
"step": 17260 |
|
}, |
|
{ |
|
"epoch": 0.6383804052681161, |
|
"grad_norm": 1.7560243016328074, |
|
"learning_rate": 1.8101149004362088e-07, |
|
"loss": 1.6068, |
|
"step": 17280 |
|
}, |
|
{ |
|
"epoch": 0.6391192714779171, |
|
"grad_norm": 1.589922555292764, |
|
"learning_rate": 1.8053721123912764e-07, |
|
"loss": 1.6432, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.6398581376877182, |
|
"grad_norm": 1.7855781248038047, |
|
"learning_rate": 1.8006344142313285e-07, |
|
"loss": 1.6444, |
|
"step": 17320 |
|
}, |
|
{ |
|
"epoch": 0.6405970038975193, |
|
"grad_norm": 1.462859488532895, |
|
"learning_rate": 1.7959018314842395e-07, |
|
"loss": 1.6225, |
|
"step": 17340 |
|
}, |
|
{ |
|
"epoch": 0.6413358701073203, |
|
"grad_norm": 1.5201929263554286, |
|
"learning_rate": 1.7911743896503144e-07, |
|
"loss": 1.6216, |
|
"step": 17360 |
|
}, |
|
{ |
|
"epoch": 0.6420747363171214, |
|
"grad_norm": 1.5039545520824391, |
|
"learning_rate": 1.7864521142021616e-07, |
|
"loss": 1.597, |
|
"step": 17380 |
|
}, |
|
{ |
|
"epoch": 0.6428136025269224, |
|
"grad_norm": 2.1198882531068106, |
|
"learning_rate": 1.7817350305845503e-07, |
|
"loss": 1.6762, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.6435524687367234, |
|
"grad_norm": 1.5052045132821683, |
|
"learning_rate": 1.7770231642142758e-07, |
|
"loss": 1.6459, |
|
"step": 17420 |
|
}, |
|
{ |
|
"epoch": 0.6442913349465246, |
|
"grad_norm": 1.5702310750127326, |
|
"learning_rate": 1.77231654048002e-07, |
|
"loss": 1.5676, |
|
"step": 17440 |
|
}, |
|
{ |
|
"epoch": 0.6450302011563256, |
|
"grad_norm": 1.49975631121171, |
|
"learning_rate": 1.7676151847422188e-07, |
|
"loss": 1.6558, |
|
"step": 17460 |
|
}, |
|
{ |
|
"epoch": 0.6457690673661267, |
|
"grad_norm": 1.8852376014336283, |
|
"learning_rate": 1.7629191223329188e-07, |
|
"loss": 1.6598, |
|
"step": 17480 |
|
}, |
|
{ |
|
"epoch": 0.6465079335759277, |
|
"grad_norm": 1.5809036111526213, |
|
"learning_rate": 1.7582283785556494e-07, |
|
"loss": 1.6148, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.6472467997857289, |
|
"grad_norm": 1.4247569077843545, |
|
"learning_rate": 1.75354297868528e-07, |
|
"loss": 1.6318, |
|
"step": 17520 |
|
}, |
|
{ |
|
"epoch": 0.6479856659955299, |
|
"grad_norm": 1.6577683592238937, |
|
"learning_rate": 1.748862947967885e-07, |
|
"loss": 1.6551, |
|
"step": 17540 |
|
}, |
|
{ |
|
"epoch": 0.6487245322053309, |
|
"grad_norm": 7.300032033927882, |
|
"learning_rate": 1.744188311620608e-07, |
|
"loss": 1.6892, |
|
"step": 17560 |
|
}, |
|
{ |
|
"epoch": 0.649463398415132, |
|
"grad_norm": 1.4132601163703873, |
|
"learning_rate": 1.7395190948315282e-07, |
|
"loss": 1.6817, |
|
"step": 17580 |
|
}, |
|
{ |
|
"epoch": 0.650202264624933, |
|
"grad_norm": 1.5063433467194194, |
|
"learning_rate": 1.7348553227595218e-07, |
|
"loss": 1.6158, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.6509411308347341, |
|
"grad_norm": 1.5169596981657725, |
|
"learning_rate": 1.7301970205341292e-07, |
|
"loss": 1.6779, |
|
"step": 17620 |
|
}, |
|
{ |
|
"epoch": 0.6516799970445352, |
|
"grad_norm": 1.6068564294026548, |
|
"learning_rate": 1.725544213255415e-07, |
|
"loss": 1.6179, |
|
"step": 17640 |
|
}, |
|
{ |
|
"epoch": 0.6524188632543362, |
|
"grad_norm": 1.401533779590892, |
|
"learning_rate": 1.7208969259938396e-07, |
|
"loss": 1.6992, |
|
"step": 17660 |
|
}, |
|
{ |
|
"epoch": 0.6531577294641373, |
|
"grad_norm": 1.7940271180903984, |
|
"learning_rate": 1.7162551837901149e-07, |
|
"loss": 1.6343, |
|
"step": 17680 |
|
}, |
|
{ |
|
"epoch": 0.6538965956739383, |
|
"grad_norm": 1.4503762459176361, |
|
"learning_rate": 1.7116190116550798e-07, |
|
"loss": 1.6241, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.6546354618837394, |
|
"grad_norm": 1.9129744363614924, |
|
"learning_rate": 1.7069884345695585e-07, |
|
"loss": 1.6242, |
|
"step": 17720 |
|
}, |
|
{ |
|
"epoch": 0.6553743280935405, |
|
"grad_norm": 1.4592502547252286, |
|
"learning_rate": 1.7023634774842265e-07, |
|
"loss": 1.6433, |
|
"step": 17740 |
|
}, |
|
{ |
|
"epoch": 0.6561131943033415, |
|
"grad_norm": 2.3740218695344026, |
|
"learning_rate": 1.6977441653194778e-07, |
|
"loss": 1.6407, |
|
"step": 17760 |
|
}, |
|
{ |
|
"epoch": 0.6568520605131426, |
|
"grad_norm": 1.652867656549423, |
|
"learning_rate": 1.6931305229652911e-07, |
|
"loss": 1.6571, |
|
"step": 17780 |
|
}, |
|
{ |
|
"epoch": 0.6575909267229436, |
|
"grad_norm": 1.8510532804043571, |
|
"learning_rate": 1.688522575281096e-07, |
|
"loss": 1.6393, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.6583297929327447, |
|
"grad_norm": 1.5330852891820108, |
|
"learning_rate": 1.6839203470956348e-07, |
|
"loss": 1.6181, |
|
"step": 17820 |
|
}, |
|
{ |
|
"epoch": 0.6590686591425458, |
|
"grad_norm": 2.179872107638406, |
|
"learning_rate": 1.6793238632068323e-07, |
|
"loss": 1.6467, |
|
"step": 17840 |
|
}, |
|
{ |
|
"epoch": 0.6598075253523468, |
|
"grad_norm": 1.5709625450812563, |
|
"learning_rate": 1.6747331483816645e-07, |
|
"loss": 1.6931, |
|
"step": 17860 |
|
}, |
|
{ |
|
"epoch": 0.6605463915621479, |
|
"grad_norm": 1.7454282483475967, |
|
"learning_rate": 1.6701482273560185e-07, |
|
"loss": 1.6292, |
|
"step": 17880 |
|
}, |
|
{ |
|
"epoch": 0.6612852577719489, |
|
"grad_norm": 1.7594994883208979, |
|
"learning_rate": 1.6655691248345655e-07, |
|
"loss": 1.6171, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.66202412398175, |
|
"grad_norm": 1.5140697252908892, |
|
"learning_rate": 1.6609958654906255e-07, |
|
"loss": 1.6319, |
|
"step": 17920 |
|
}, |
|
{ |
|
"epoch": 0.662762990191551, |
|
"grad_norm": 2.248352984954327, |
|
"learning_rate": 1.6564284739660316e-07, |
|
"loss": 1.6363, |
|
"step": 17940 |
|
}, |
|
{ |
|
"epoch": 0.6635018564013522, |
|
"grad_norm": 2.0596192177611368, |
|
"learning_rate": 1.6518669748710013e-07, |
|
"loss": 1.6264, |
|
"step": 17960 |
|
}, |
|
{ |
|
"epoch": 0.6642407226111532, |
|
"grad_norm": 1.4805518708471208, |
|
"learning_rate": 1.647311392784002e-07, |
|
"loss": 1.6559, |
|
"step": 17980 |
|
}, |
|
{ |
|
"epoch": 0.6649795888209542, |
|
"grad_norm": 1.5620227618208977, |
|
"learning_rate": 1.6427617522516196e-07, |
|
"loss": 1.6528, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.6657184550307553, |
|
"grad_norm": 1.5698059903501222, |
|
"learning_rate": 1.6382180777884236e-07, |
|
"loss": 1.68, |
|
"step": 18020 |
|
}, |
|
{ |
|
"epoch": 0.6664573212405563, |
|
"grad_norm": 1.525456023190327, |
|
"learning_rate": 1.6336803938768396e-07, |
|
"loss": 1.6129, |
|
"step": 18040 |
|
}, |
|
{ |
|
"epoch": 0.6671961874503575, |
|
"grad_norm": 1.9244616810959143, |
|
"learning_rate": 1.6291487249670116e-07, |
|
"loss": 1.6074, |
|
"step": 18060 |
|
}, |
|
{ |
|
"epoch": 0.6679350536601585, |
|
"grad_norm": 1.5470316335951617, |
|
"learning_rate": 1.6246230954766744e-07, |
|
"loss": 1.6174, |
|
"step": 18080 |
|
}, |
|
{ |
|
"epoch": 0.6686739198699595, |
|
"grad_norm": 1.460047028189958, |
|
"learning_rate": 1.6201035297910215e-07, |
|
"loss": 1.6387, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.6694127860797606, |
|
"grad_norm": 1.849597715575099, |
|
"learning_rate": 1.6155900522625744e-07, |
|
"loss": 1.6357, |
|
"step": 18120 |
|
}, |
|
{ |
|
"epoch": 0.6701516522895616, |
|
"grad_norm": 1.595432962229376, |
|
"learning_rate": 1.6110826872110478e-07, |
|
"loss": 1.6175, |
|
"step": 18140 |
|
}, |
|
{ |
|
"epoch": 0.6708905184993628, |
|
"grad_norm": 1.5318757576478021, |
|
"learning_rate": 1.6065814589232206e-07, |
|
"loss": 1.6235, |
|
"step": 18160 |
|
}, |
|
{ |
|
"epoch": 0.6716293847091638, |
|
"grad_norm": 1.4152502346247018, |
|
"learning_rate": 1.602086391652807e-07, |
|
"loss": 1.6287, |
|
"step": 18180 |
|
}, |
|
{ |
|
"epoch": 0.6723682509189648, |
|
"grad_norm": 1.730605954821045, |
|
"learning_rate": 1.5975975096203248e-07, |
|
"loss": 1.6297, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.6731071171287659, |
|
"grad_norm": 1.641811600664541, |
|
"learning_rate": 1.5931148370129613e-07, |
|
"loss": 1.6575, |
|
"step": 18220 |
|
}, |
|
{ |
|
"epoch": 0.6738459833385669, |
|
"grad_norm": 1.4446876896322507, |
|
"learning_rate": 1.5886383979844492e-07, |
|
"loss": 1.6488, |
|
"step": 18240 |
|
}, |
|
{ |
|
"epoch": 0.6745848495483681, |
|
"grad_norm": 1.6489416268912538, |
|
"learning_rate": 1.5841682166549308e-07, |
|
"loss": 1.6466, |
|
"step": 18260 |
|
}, |
|
{ |
|
"epoch": 0.6753237157581691, |
|
"grad_norm": 1.6240331247999147, |
|
"learning_rate": 1.5797043171108297e-07, |
|
"loss": 1.6693, |
|
"step": 18280 |
|
}, |
|
{ |
|
"epoch": 0.6760625819679702, |
|
"grad_norm": 2.2147991050957, |
|
"learning_rate": 1.5752467234047263e-07, |
|
"loss": 1.6051, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.6768014481777712, |
|
"grad_norm": 1.5203059720344088, |
|
"learning_rate": 1.5707954595552187e-07, |
|
"loss": 1.653, |
|
"step": 18320 |
|
}, |
|
{ |
|
"epoch": 0.6775403143875722, |
|
"grad_norm": 1.5328417599383586, |
|
"learning_rate": 1.5663505495468e-07, |
|
"loss": 1.6381, |
|
"step": 18340 |
|
}, |
|
{ |
|
"epoch": 0.6782791805973734, |
|
"grad_norm": 1.5445956099646183, |
|
"learning_rate": 1.5619120173297267e-07, |
|
"loss": 1.6037, |
|
"step": 18360 |
|
}, |
|
{ |
|
"epoch": 0.6790180468071744, |
|
"grad_norm": 1.479872310550016, |
|
"learning_rate": 1.5574798868198912e-07, |
|
"loss": 1.6353, |
|
"step": 18380 |
|
}, |
|
{ |
|
"epoch": 0.6797569130169755, |
|
"grad_norm": 1.7841436633262773, |
|
"learning_rate": 1.5530541818986927e-07, |
|
"loss": 1.7364, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.6804957792267765, |
|
"grad_norm": 1.529508435392583, |
|
"learning_rate": 1.5486349264129046e-07, |
|
"loss": 1.6181, |
|
"step": 18420 |
|
}, |
|
{ |
|
"epoch": 0.6812346454365775, |
|
"grad_norm": 1.6539396952625665, |
|
"learning_rate": 1.5442221441745533e-07, |
|
"loss": 1.6985, |
|
"step": 18440 |
|
}, |
|
{ |
|
"epoch": 0.6819735116463786, |
|
"grad_norm": 1.5860780535239207, |
|
"learning_rate": 1.5398158589607813e-07, |
|
"loss": 1.6636, |
|
"step": 18460 |
|
}, |
|
{ |
|
"epoch": 0.6827123778561797, |
|
"grad_norm": 1.9353694955508953, |
|
"learning_rate": 1.5354160945137268e-07, |
|
"loss": 1.6277, |
|
"step": 18480 |
|
}, |
|
{ |
|
"epoch": 0.6834512440659808, |
|
"grad_norm": 1.4060414431962835, |
|
"learning_rate": 1.5310228745403925e-07, |
|
"loss": 1.6348, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.6841901102757818, |
|
"grad_norm": 1.9510007446700244, |
|
"learning_rate": 1.5266362227125164e-07, |
|
"loss": 1.666, |
|
"step": 18520 |
|
}, |
|
{ |
|
"epoch": 0.6849289764855828, |
|
"grad_norm": 2.5976331102164694, |
|
"learning_rate": 1.5222561626664448e-07, |
|
"loss": 1.6437, |
|
"step": 18540 |
|
}, |
|
{ |
|
"epoch": 0.6856678426953839, |
|
"grad_norm": 1.635565277090673, |
|
"learning_rate": 1.51788271800301e-07, |
|
"loss": 1.6367, |
|
"step": 18560 |
|
}, |
|
{ |
|
"epoch": 0.686406708905185, |
|
"grad_norm": 1.6414633412876904, |
|
"learning_rate": 1.5135159122873936e-07, |
|
"loss": 1.6239, |
|
"step": 18580 |
|
}, |
|
{ |
|
"epoch": 0.6871455751149861, |
|
"grad_norm": 1.972663651970077, |
|
"learning_rate": 1.5091557690490104e-07, |
|
"loss": 1.6551, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.6878844413247871, |
|
"grad_norm": 1.376913066395765, |
|
"learning_rate": 1.504802311781371e-07, |
|
"loss": 1.6494, |
|
"step": 18620 |
|
}, |
|
{ |
|
"epoch": 0.6886233075345882, |
|
"grad_norm": 1.441207784040776, |
|
"learning_rate": 1.5004555639419648e-07, |
|
"loss": 1.6697, |
|
"step": 18640 |
|
}, |
|
{ |
|
"epoch": 0.6893621737443892, |
|
"grad_norm": 2.5475644652288514, |
|
"learning_rate": 1.4961155489521253e-07, |
|
"loss": 1.6449, |
|
"step": 18660 |
|
}, |
|
{ |
|
"epoch": 0.6901010399541903, |
|
"grad_norm": 1.4330764200962958, |
|
"learning_rate": 1.4917822901969108e-07, |
|
"loss": 1.5962, |
|
"step": 18680 |
|
}, |
|
{ |
|
"epoch": 0.6908399061639914, |
|
"grad_norm": 1.5535375552432238, |
|
"learning_rate": 1.487455811024975e-07, |
|
"loss": 1.6682, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.6915787723737924, |
|
"grad_norm": 1.5430558472764233, |
|
"learning_rate": 1.4831361347484396e-07, |
|
"loss": 1.6646, |
|
"step": 18720 |
|
}, |
|
{ |
|
"epoch": 0.6923176385835935, |
|
"grad_norm": 1.5354124537032656, |
|
"learning_rate": 1.4788232846427718e-07, |
|
"loss": 1.6569, |
|
"step": 18740 |
|
}, |
|
{ |
|
"epoch": 0.6930565047933945, |
|
"grad_norm": 1.723896126450271, |
|
"learning_rate": 1.474517283946658e-07, |
|
"loss": 1.6694, |
|
"step": 18760 |
|
}, |
|
{ |
|
"epoch": 0.6937953710031955, |
|
"grad_norm": 1.4743738549149994, |
|
"learning_rate": 1.4702181558618777e-07, |
|
"loss": 1.6161, |
|
"step": 18780 |
|
}, |
|
{ |
|
"epoch": 0.6945342372129967, |
|
"grad_norm": 1.675747008439809, |
|
"learning_rate": 1.4659259235531796e-07, |
|
"loss": 1.6558, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.6952731034227977, |
|
"grad_norm": 1.760786257067446, |
|
"learning_rate": 1.4616406101481574e-07, |
|
"loss": 1.5887, |
|
"step": 18820 |
|
}, |
|
{ |
|
"epoch": 0.6960119696325988, |
|
"grad_norm": 2.8049367365120608, |
|
"learning_rate": 1.4573622387371217e-07, |
|
"loss": 1.6649, |
|
"step": 18840 |
|
}, |
|
{ |
|
"epoch": 0.6967508358423998, |
|
"grad_norm": 1.496529351669967, |
|
"learning_rate": 1.4530908323729782e-07, |
|
"loss": 1.6433, |
|
"step": 18860 |
|
}, |
|
{ |
|
"epoch": 0.6974897020522008, |
|
"grad_norm": 1.4994802420062043, |
|
"learning_rate": 1.448826414071105e-07, |
|
"loss": 1.6841, |
|
"step": 18880 |
|
}, |
|
{ |
|
"epoch": 0.698228568262002, |
|
"grad_norm": 1.420851366464802, |
|
"learning_rate": 1.4445690068092265e-07, |
|
"loss": 1.6504, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.698967434471803, |
|
"grad_norm": 1.7411191806669424, |
|
"learning_rate": 1.4403186335272888e-07, |
|
"loss": 1.6298, |
|
"step": 18920 |
|
}, |
|
{ |
|
"epoch": 0.6997063006816041, |
|
"grad_norm": 1.628227507992112, |
|
"learning_rate": 1.4360753171273364e-07, |
|
"loss": 1.673, |
|
"step": 18940 |
|
}, |
|
{ |
|
"epoch": 0.7004451668914051, |
|
"grad_norm": 1.7368645634603777, |
|
"learning_rate": 1.4318390804733927e-07, |
|
"loss": 1.6198, |
|
"step": 18960 |
|
}, |
|
{ |
|
"epoch": 0.7011840331012062, |
|
"grad_norm": 1.4616447916754742, |
|
"learning_rate": 1.4276099463913315e-07, |
|
"loss": 1.6096, |
|
"step": 18980 |
|
}, |
|
{ |
|
"epoch": 0.7019228993110073, |
|
"grad_norm": 1.517480098110094, |
|
"learning_rate": 1.4233879376687563e-07, |
|
"loss": 1.6345, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.7026617655208083, |
|
"grad_norm": 1.636195025828432, |
|
"learning_rate": 1.419173077054878e-07, |
|
"loss": 1.6119, |
|
"step": 19020 |
|
}, |
|
{ |
|
"epoch": 0.7034006317306094, |
|
"grad_norm": 1.5039586339840252, |
|
"learning_rate": 1.4149653872603917e-07, |
|
"loss": 1.7208, |
|
"step": 19040 |
|
}, |
|
{ |
|
"epoch": 0.7041394979404104, |
|
"grad_norm": 1.4728764699529369, |
|
"learning_rate": 1.410764890957353e-07, |
|
"loss": 1.6572, |
|
"step": 19060 |
|
}, |
|
{ |
|
"epoch": 0.7048783641502115, |
|
"grad_norm": 1.9218697223400836, |
|
"learning_rate": 1.406571610779059e-07, |
|
"loss": 1.6514, |
|
"step": 19080 |
|
}, |
|
{ |
|
"epoch": 0.7056172303600126, |
|
"grad_norm": 1.5761294021476189, |
|
"learning_rate": 1.4023855693199254e-07, |
|
"loss": 1.6381, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.7063560965698136, |
|
"grad_norm": 1.435908518604352, |
|
"learning_rate": 1.398206789135361e-07, |
|
"loss": 1.6126, |
|
"step": 19120 |
|
}, |
|
{ |
|
"epoch": 0.7070949627796147, |
|
"grad_norm": 4.717577212666518, |
|
"learning_rate": 1.3940352927416504e-07, |
|
"loss": 1.6647, |
|
"step": 19140 |
|
}, |
|
{ |
|
"epoch": 0.7078338289894157, |
|
"grad_norm": 2.1188904047967245, |
|
"learning_rate": 1.3898711026158323e-07, |
|
"loss": 1.6794, |
|
"step": 19160 |
|
}, |
|
{ |
|
"epoch": 0.7085726951992168, |
|
"grad_norm": 1.5687418673344722, |
|
"learning_rate": 1.3857142411955767e-07, |
|
"loss": 1.6474, |
|
"step": 19180 |
|
}, |
|
{ |
|
"epoch": 0.7093115614090179, |
|
"grad_norm": 1.6271449022527302, |
|
"learning_rate": 1.381564730879064e-07, |
|
"loss": 1.6347, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.7100504276188189, |
|
"grad_norm": 1.4693942372788273, |
|
"learning_rate": 1.377422594024867e-07, |
|
"loss": 1.6474, |
|
"step": 19220 |
|
}, |
|
{ |
|
"epoch": 0.71078929382862, |
|
"grad_norm": 1.488154969512232, |
|
"learning_rate": 1.373287852951826e-07, |
|
"loss": 1.6128, |
|
"step": 19240 |
|
}, |
|
{ |
|
"epoch": 0.711528160038421, |
|
"grad_norm": 1.5779135188256272, |
|
"learning_rate": 1.3691605299389328e-07, |
|
"loss": 1.7183, |
|
"step": 19260 |
|
}, |
|
{ |
|
"epoch": 0.7122670262482221, |
|
"grad_norm": 1.6650630460525442, |
|
"learning_rate": 1.3650406472252083e-07, |
|
"loss": 1.6683, |
|
"step": 19280 |
|
}, |
|
{ |
|
"epoch": 0.7130058924580231, |
|
"grad_norm": 1.4154650117196357, |
|
"learning_rate": 1.360928227009584e-07, |
|
"loss": 1.6717, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.7137447586678243, |
|
"grad_norm": 1.6468623503038222, |
|
"learning_rate": 1.3568232914507802e-07, |
|
"loss": 1.6348, |
|
"step": 19320 |
|
}, |
|
{ |
|
"epoch": 0.7144836248776253, |
|
"grad_norm": 1.5015397491680238, |
|
"learning_rate": 1.3527258626671898e-07, |
|
"loss": 1.6112, |
|
"step": 19340 |
|
}, |
|
{ |
|
"epoch": 0.7152224910874263, |
|
"grad_norm": 3.3400095996186865, |
|
"learning_rate": 1.348635962736755e-07, |
|
"loss": 1.6523, |
|
"step": 19360 |
|
}, |
|
{ |
|
"epoch": 0.7159613572972274, |
|
"grad_norm": 1.5139103946143873, |
|
"learning_rate": 1.344553613696854e-07, |
|
"loss": 1.6941, |
|
"step": 19380 |
|
}, |
|
{ |
|
"epoch": 0.7167002235070284, |
|
"grad_norm": 1.4051928644539238, |
|
"learning_rate": 1.340478837544175e-07, |
|
"loss": 1.6237, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.7174390897168296, |
|
"grad_norm": 1.5234389161550645, |
|
"learning_rate": 1.3364116562346055e-07, |
|
"loss": 1.6559, |
|
"step": 19420 |
|
}, |
|
{ |
|
"epoch": 0.7181779559266306, |
|
"grad_norm": 1.4205504198026582, |
|
"learning_rate": 1.3323520916831077e-07, |
|
"loss": 1.6478, |
|
"step": 19440 |
|
}, |
|
{ |
|
"epoch": 0.7189168221364316, |
|
"grad_norm": 1.5989862880917087, |
|
"learning_rate": 1.328300165763602e-07, |
|
"loss": 1.6123, |
|
"step": 19460 |
|
}, |
|
{ |
|
"epoch": 0.7196556883462327, |
|
"grad_norm": 1.6443108557654487, |
|
"learning_rate": 1.3242559003088546e-07, |
|
"loss": 1.6832, |
|
"step": 19480 |
|
}, |
|
{ |
|
"epoch": 0.7203945545560337, |
|
"grad_norm": 1.3202697054517272, |
|
"learning_rate": 1.3202193171103506e-07, |
|
"loss": 1.6339, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.7211334207658349, |
|
"grad_norm": 1.5006943077767945, |
|
"learning_rate": 1.316190437918182e-07, |
|
"loss": 1.6469, |
|
"step": 19520 |
|
}, |
|
{ |
|
"epoch": 0.7218722869756359, |
|
"grad_norm": 1.7379534891877164, |
|
"learning_rate": 1.3121692844409321e-07, |
|
"loss": 1.6797, |
|
"step": 19540 |
|
}, |
|
{ |
|
"epoch": 0.7226111531854369, |
|
"grad_norm": 1.526373724090785, |
|
"learning_rate": 1.308155878345553e-07, |
|
"loss": 1.6636, |
|
"step": 19560 |
|
}, |
|
{ |
|
"epoch": 0.723350019395238, |
|
"grad_norm": 2.0046685771285793, |
|
"learning_rate": 1.3041502412572542e-07, |
|
"loss": 1.6748, |
|
"step": 19580 |
|
}, |
|
{ |
|
"epoch": 0.724088885605039, |
|
"grad_norm": 1.4955882650728989, |
|
"learning_rate": 1.3001523947593845e-07, |
|
"loss": 1.6293, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.7248277518148402, |
|
"grad_norm": 2.4302511767713324, |
|
"learning_rate": 1.2961623603933134e-07, |
|
"loss": 1.6004, |
|
"step": 19620 |
|
}, |
|
{ |
|
"epoch": 0.7255666180246412, |
|
"grad_norm": 1.6494154347871601, |
|
"learning_rate": 1.2921801596583153e-07, |
|
"loss": 1.6136, |
|
"step": 19640 |
|
}, |
|
{ |
|
"epoch": 0.7263054842344423, |
|
"grad_norm": 1.4459727786023948, |
|
"learning_rate": 1.2882058140114594e-07, |
|
"loss": 1.6435, |
|
"step": 19660 |
|
}, |
|
{ |
|
"epoch": 0.7270443504442433, |
|
"grad_norm": 1.4490955525578755, |
|
"learning_rate": 1.2842393448674869e-07, |
|
"loss": 1.6508, |
|
"step": 19680 |
|
}, |
|
{ |
|
"epoch": 0.7277832166540443, |
|
"grad_norm": 1.7167939191812815, |
|
"learning_rate": 1.280280773598699e-07, |
|
"loss": 1.6299, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.7285220828638455, |
|
"grad_norm": 1.8412228497101617, |
|
"learning_rate": 1.2763301215348402e-07, |
|
"loss": 1.6758, |
|
"step": 19720 |
|
}, |
|
{ |
|
"epoch": 0.7292609490736465, |
|
"grad_norm": 1.6407591339864582, |
|
"learning_rate": 1.2723874099629866e-07, |
|
"loss": 1.6443, |
|
"step": 19740 |
|
}, |
|
{ |
|
"epoch": 0.7299998152834476, |
|
"grad_norm": 2.010243920808459, |
|
"learning_rate": 1.268452660127427e-07, |
|
"loss": 1.6317, |
|
"step": 19760 |
|
}, |
|
{ |
|
"epoch": 0.7307386814932486, |
|
"grad_norm": 1.521357800662826, |
|
"learning_rate": 1.2645258932295518e-07, |
|
"loss": 1.6162, |
|
"step": 19780 |
|
}, |
|
{ |
|
"epoch": 0.7314775477030496, |
|
"grad_norm": 1.5657714545631887, |
|
"learning_rate": 1.260607130427737e-07, |
|
"loss": 1.6134, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.7322164139128507, |
|
"grad_norm": 1.7902489767561236, |
|
"learning_rate": 1.2566963928372308e-07, |
|
"loss": 1.6633, |
|
"step": 19820 |
|
}, |
|
{ |
|
"epoch": 0.7329552801226518, |
|
"grad_norm": 2.0435731294538466, |
|
"learning_rate": 1.2527937015300378e-07, |
|
"loss": 1.6505, |
|
"step": 19840 |
|
}, |
|
{ |
|
"epoch": 0.7336941463324529, |
|
"grad_norm": 5.207754525218824, |
|
"learning_rate": 1.2488990775348092e-07, |
|
"loss": 1.6453, |
|
"step": 19860 |
|
}, |
|
{ |
|
"epoch": 0.7344330125422539, |
|
"grad_norm": 1.6112840529464336, |
|
"learning_rate": 1.245012541836728e-07, |
|
"loss": 1.6082, |
|
"step": 19880 |
|
}, |
|
{ |
|
"epoch": 0.7351718787520549, |
|
"grad_norm": 1.4827262765821532, |
|
"learning_rate": 1.241134115377394e-07, |
|
"loss": 1.6161, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.735910744961856, |
|
"grad_norm": 1.7219801506968755, |
|
"learning_rate": 1.2372638190547122e-07, |
|
"loss": 1.6305, |
|
"step": 19920 |
|
}, |
|
{ |
|
"epoch": 0.7366496111716571, |
|
"grad_norm": 1.3720219936046893, |
|
"learning_rate": 1.233401673722782e-07, |
|
"loss": 1.6099, |
|
"step": 19940 |
|
}, |
|
{ |
|
"epoch": 0.7373884773814582, |
|
"grad_norm": 1.7432612385035637, |
|
"learning_rate": 1.229547700191783e-07, |
|
"loss": 1.6372, |
|
"step": 19960 |
|
}, |
|
{ |
|
"epoch": 0.7381273435912592, |
|
"grad_norm": 3.2034872788326925, |
|
"learning_rate": 1.2257019192278617e-07, |
|
"loss": 1.6147, |
|
"step": 19980 |
|
}, |
|
{ |
|
"epoch": 0.7388662098010603, |
|
"grad_norm": 1.6442745462596664, |
|
"learning_rate": 1.2218643515530227e-07, |
|
"loss": 1.6344, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.7396050760108613, |
|
"grad_norm": 1.959102806007239, |
|
"learning_rate": 1.218035017845015e-07, |
|
"loss": 1.6451, |
|
"step": 20020 |
|
}, |
|
{ |
|
"epoch": 0.7403439422206624, |
|
"grad_norm": 1.6408059937998853, |
|
"learning_rate": 1.214213938737219e-07, |
|
"loss": 1.6757, |
|
"step": 20040 |
|
}, |
|
{ |
|
"epoch": 0.7410828084304635, |
|
"grad_norm": 1.5657243128524525, |
|
"learning_rate": 1.210591578161399e-07, |
|
"loss": 1.6359, |
|
"step": 20060 |
|
}, |
|
{ |
|
"epoch": 0.7418216746402645, |
|
"grad_norm": 1.4736673628427441, |
|
"learning_rate": 1.2067866547022443e-07, |
|
"loss": 1.6603, |
|
"step": 20080 |
|
}, |
|
{ |
|
"epoch": 0.7425605408500656, |
|
"grad_norm": 1.4833315219916223, |
|
"learning_rate": 1.2029900464522203e-07, |
|
"loss": 1.6342, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.7432994070598666, |
|
"grad_norm": 1.9340686772443259, |
|
"learning_rate": 1.1992017738683768e-07, |
|
"loss": 1.6416, |
|
"step": 20120 |
|
}, |
|
{ |
|
"epoch": 0.7440382732696676, |
|
"grad_norm": 1.6355654798248513, |
|
"learning_rate": 1.1954218573628499e-07, |
|
"loss": 1.6678, |
|
"step": 20140 |
|
}, |
|
{ |
|
"epoch": 0.7447771394794688, |
|
"grad_norm": 1.5624481100138734, |
|
"learning_rate": 1.1916503173027475e-07, |
|
"loss": 1.614, |
|
"step": 20160 |
|
}, |
|
{ |
|
"epoch": 0.7455160056892698, |
|
"grad_norm": 1.5029974061648055, |
|
"learning_rate": 1.1878871740100476e-07, |
|
"loss": 1.639, |
|
"step": 20180 |
|
}, |
|
{ |
|
"epoch": 0.7462548718990709, |
|
"grad_norm": 1.4683397727523646, |
|
"learning_rate": 1.1841324477614812e-07, |
|
"loss": 1.6516, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.7469937381088719, |
|
"grad_norm": 1.478703041295488, |
|
"learning_rate": 1.1803861587884268e-07, |
|
"loss": 1.7247, |
|
"step": 20220 |
|
}, |
|
{ |
|
"epoch": 0.7477326043186729, |
|
"grad_norm": 1.4765169074470068, |
|
"learning_rate": 1.1766483272768017e-07, |
|
"loss": 1.6786, |
|
"step": 20240 |
|
}, |
|
{ |
|
"epoch": 0.7484714705284741, |
|
"grad_norm": 1.3861683142566674, |
|
"learning_rate": 1.1729189733669528e-07, |
|
"loss": 1.6242, |
|
"step": 20260 |
|
}, |
|
{ |
|
"epoch": 0.7492103367382751, |
|
"grad_norm": 1.5107470749741048, |
|
"learning_rate": 1.1691981171535459e-07, |
|
"loss": 1.6476, |
|
"step": 20280 |
|
}, |
|
{ |
|
"epoch": 0.7499492029480762, |
|
"grad_norm": 1.5696138247640767, |
|
"learning_rate": 1.1654857786854591e-07, |
|
"loss": 1.6691, |
|
"step": 20300 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 27068, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 6767, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3859009492746240.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|