|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.49016360866397296, |
|
"eval_steps": 500, |
|
"global_step": 59200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008279790686891435, |
|
"grad_norm": 0.2026122659444809, |
|
"learning_rate": 0.00019994481795043316, |
|
"loss": 1.9308, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.001655958137378287, |
|
"grad_norm": 0.189473494887352, |
|
"learning_rate": 0.00019988961934128355, |
|
"loss": 1.796, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0024839372060674308, |
|
"grad_norm": 0.21440361440181732, |
|
"learning_rate": 0.00019983442073213394, |
|
"loss": 1.7292, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.003311916274756574, |
|
"grad_norm": 0.4124305844306946, |
|
"learning_rate": 0.00019977922212298434, |
|
"loss": 1.7479, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.004139895343445717, |
|
"grad_norm": 0.326767235994339, |
|
"learning_rate": 0.00019972402351383473, |
|
"loss": 1.7445, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0049678744121348616, |
|
"grad_norm": 0.23368559777736664, |
|
"learning_rate": 0.00019966882490468513, |
|
"loss": 1.7146, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.005795853480824005, |
|
"grad_norm": 0.2080787867307663, |
|
"learning_rate": 0.00019961362629553552, |
|
"loss": 1.7009, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.006623832549513148, |
|
"grad_norm": 0.23906764388084412, |
|
"learning_rate": 0.00019955842768638592, |
|
"loss": 1.7346, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0074518116182022915, |
|
"grad_norm": 0.21370458602905273, |
|
"learning_rate": 0.00019950322907723634, |
|
"loss": 1.7161, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.008279790686891435, |
|
"grad_norm": 0.2232082337141037, |
|
"learning_rate": 0.0001994480304680867, |
|
"loss": 1.6845, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.009107769755580579, |
|
"grad_norm": 0.28137433528900146, |
|
"learning_rate": 0.00019939283185893713, |
|
"loss": 1.7064, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.009935748824269723, |
|
"grad_norm": 0.27577510476112366, |
|
"learning_rate": 0.00019933763324978752, |
|
"loss": 1.7216, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.010763727892958866, |
|
"grad_norm": 0.19732429087162018, |
|
"learning_rate": 0.0001992824346406379, |
|
"loss": 1.6785, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.01159170696164801, |
|
"grad_norm": 0.2802606523036957, |
|
"learning_rate": 0.0001992272360314883, |
|
"loss": 1.6987, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.012419686030337154, |
|
"grad_norm": 0.2615717053413391, |
|
"learning_rate": 0.0001991720374223387, |
|
"loss": 1.7157, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.013247665099026296, |
|
"grad_norm": 0.21463246643543243, |
|
"learning_rate": 0.00019911683881318907, |
|
"loss": 1.709, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01407564416771544, |
|
"grad_norm": 0.23596777021884918, |
|
"learning_rate": 0.0001990616402040395, |
|
"loss": 1.7288, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.014903623236404583, |
|
"grad_norm": 0.26049914956092834, |
|
"learning_rate": 0.00019900644159488989, |
|
"loss": 1.7007, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01573160230509373, |
|
"grad_norm": 0.1828412264585495, |
|
"learning_rate": 0.00019895124298574025, |
|
"loss": 1.6872, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.01655958137378287, |
|
"grad_norm": 0.30434760451316833, |
|
"learning_rate": 0.00019889604437659067, |
|
"loss": 1.7084, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.017387560442472014, |
|
"grad_norm": 0.19470958411693573, |
|
"learning_rate": 0.00019884084576744107, |
|
"loss": 1.6804, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.018215539511161158, |
|
"grad_norm": 0.23792260885238647, |
|
"learning_rate": 0.00019878564715829144, |
|
"loss": 1.6873, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.019043518579850302, |
|
"grad_norm": 0.2714627683162689, |
|
"learning_rate": 0.00019873044854914186, |
|
"loss": 1.6803, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.019871497648539446, |
|
"grad_norm": 0.2310653179883957, |
|
"learning_rate": 0.00019867524993999225, |
|
"loss": 1.7056, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.020699476717228587, |
|
"grad_norm": 0.21665313839912415, |
|
"learning_rate": 0.00019862005133084262, |
|
"loss": 1.6636, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02152745578591773, |
|
"grad_norm": 0.26358917355537415, |
|
"learning_rate": 0.00019856485272169304, |
|
"loss": 1.6567, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.022355434854606875, |
|
"grad_norm": 0.2334413081407547, |
|
"learning_rate": 0.00019850965411254343, |
|
"loss": 1.6666, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.02318341392329602, |
|
"grad_norm": 0.20429347455501556, |
|
"learning_rate": 0.0001984544555033938, |
|
"loss": 1.7123, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.024011392991985164, |
|
"grad_norm": 0.22018109261989594, |
|
"learning_rate": 0.00019839925689424422, |
|
"loss": 1.6405, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.024839372060674308, |
|
"grad_norm": 0.2289331555366516, |
|
"learning_rate": 0.00019834405828509462, |
|
"loss": 1.6769, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02566735112936345, |
|
"grad_norm": 0.21042221784591675, |
|
"learning_rate": 0.00019828885967594498, |
|
"loss": 1.6559, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.026495330198052593, |
|
"grad_norm": 0.24456587433815002, |
|
"learning_rate": 0.0001982336610667954, |
|
"loss": 1.6889, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.027323309266741737, |
|
"grad_norm": 0.19650936126708984, |
|
"learning_rate": 0.0001981784624576458, |
|
"loss": 1.6577, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.02815128833543088, |
|
"grad_norm": 0.2078315168619156, |
|
"learning_rate": 0.00019812326384849617, |
|
"loss": 1.6968, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.028979267404120025, |
|
"grad_norm": 0.27083057165145874, |
|
"learning_rate": 0.0001980680652393466, |
|
"loss": 1.6851, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.029807246472809166, |
|
"grad_norm": 0.19873012602329254, |
|
"learning_rate": 0.00019801286663019698, |
|
"loss": 1.6642, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.03063522554149831, |
|
"grad_norm": 0.25127413868904114, |
|
"learning_rate": 0.00019795766802104735, |
|
"loss": 1.6925, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.03146320461018746, |
|
"grad_norm": 0.28270578384399414, |
|
"learning_rate": 0.00019790246941189777, |
|
"loss": 1.6823, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.032291183678876595, |
|
"grad_norm": 0.22762413322925568, |
|
"learning_rate": 0.00019784727080274816, |
|
"loss": 1.6802, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.03311916274756574, |
|
"grad_norm": 0.24451415240764618, |
|
"learning_rate": 0.00019779207219359853, |
|
"loss": 1.6855, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03394714181625488, |
|
"grad_norm": 0.18539206683635712, |
|
"learning_rate": 0.00019773687358444895, |
|
"loss": 1.6753, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.03477512088494403, |
|
"grad_norm": 0.20416654646396637, |
|
"learning_rate": 0.00019768167497529935, |
|
"loss": 1.6892, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.03560309995363317, |
|
"grad_norm": 0.22939777374267578, |
|
"learning_rate": 0.00019762647636614971, |
|
"loss": 1.6953, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.036431079022322316, |
|
"grad_norm": 0.21451617777347565, |
|
"learning_rate": 0.00019757127775700014, |
|
"loss": 1.6609, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.03725905809101146, |
|
"grad_norm": 0.20271216332912445, |
|
"learning_rate": 0.00019751607914785053, |
|
"loss": 1.6868, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.038087037159700604, |
|
"grad_norm": 0.23506902158260345, |
|
"learning_rate": 0.0001974608805387009, |
|
"loss": 1.7012, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.03891501622838975, |
|
"grad_norm": 0.19889087975025177, |
|
"learning_rate": 0.00019740568192955132, |
|
"loss": 1.6867, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.03974299529707889, |
|
"grad_norm": 0.21165409684181213, |
|
"learning_rate": 0.0001973504833204017, |
|
"loss": 1.6666, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.04057097436576804, |
|
"grad_norm": 0.25541654229164124, |
|
"learning_rate": 0.0001972952847112521, |
|
"loss": 1.7012, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.041398953434457174, |
|
"grad_norm": 0.2289835512638092, |
|
"learning_rate": 0.0001972400861021025, |
|
"loss": 1.6351, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.04222693250314632, |
|
"grad_norm": 0.20699161291122437, |
|
"learning_rate": 0.0001971848874929529, |
|
"loss": 1.6834, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.04305491157183546, |
|
"grad_norm": 0.21547134220600128, |
|
"learning_rate": 0.0001971296888838033, |
|
"loss": 1.6605, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.043882890640524606, |
|
"grad_norm": 0.21996253728866577, |
|
"learning_rate": 0.00019707449027465368, |
|
"loss": 1.6747, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.04471086970921375, |
|
"grad_norm": 0.19425031542778015, |
|
"learning_rate": 0.00019701929166550408, |
|
"loss": 1.6677, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.045538848777902895, |
|
"grad_norm": 0.21223612129688263, |
|
"learning_rate": 0.00019696409305635447, |
|
"loss": 1.6401, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04636682784659204, |
|
"grad_norm": 0.21771861612796783, |
|
"learning_rate": 0.00019690889444720487, |
|
"loss": 1.6655, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.04719480691528118, |
|
"grad_norm": 0.17102624475955963, |
|
"learning_rate": 0.00019685369583805526, |
|
"loss": 1.6373, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.04802278598397033, |
|
"grad_norm": 1.002648115158081, |
|
"learning_rate": 0.00019679849722890565, |
|
"loss": 1.6578, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.04885076505265947, |
|
"grad_norm": 0.209450364112854, |
|
"learning_rate": 0.00019674329861975605, |
|
"loss": 1.7114, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.049678744121348616, |
|
"grad_norm": 0.21328583359718323, |
|
"learning_rate": 0.00019668810001060644, |
|
"loss": 1.6816, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05050672319003775, |
|
"grad_norm": 0.2184838354587555, |
|
"learning_rate": 0.00019663290140145684, |
|
"loss": 1.6596, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.0513347022587269, |
|
"grad_norm": 0.23963460326194763, |
|
"learning_rate": 0.00019657825477839872, |
|
"loss": 1.6627, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.05216268132741604, |
|
"grad_norm": 0.2165103405714035, |
|
"learning_rate": 0.00019652305616924912, |
|
"loss": 1.6808, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.052990660396105185, |
|
"grad_norm": 0.19727864861488342, |
|
"learning_rate": 0.0001964678575600995, |
|
"loss": 1.7164, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.05381863946479433, |
|
"grad_norm": 0.23533885180950165, |
|
"learning_rate": 0.0001964126589509499, |
|
"loss": 1.6575, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.054646618533483474, |
|
"grad_norm": 0.23083338141441345, |
|
"learning_rate": 0.0001963574603418003, |
|
"loss": 1.6481, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.05547459760217262, |
|
"grad_norm": 0.18879123032093048, |
|
"learning_rate": 0.0001963022617326507, |
|
"loss": 1.6798, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.05630257667086176, |
|
"grad_norm": 0.2472197264432907, |
|
"learning_rate": 0.0001962470631235011, |
|
"loss": 1.6739, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.057130555739550906, |
|
"grad_norm": 0.19961301982402802, |
|
"learning_rate": 0.00019619186451435148, |
|
"loss": 1.6307, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.05795853480824005, |
|
"grad_norm": 0.1921355426311493, |
|
"learning_rate": 0.00019613666590520188, |
|
"loss": 1.6432, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.058786513876929194, |
|
"grad_norm": 0.29221248626708984, |
|
"learning_rate": 0.00019608146729605227, |
|
"loss": 1.6591, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.05961449294561833, |
|
"grad_norm": 0.2565169632434845, |
|
"learning_rate": 0.00019602626868690267, |
|
"loss": 1.6314, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.060442472014307476, |
|
"grad_norm": 0.19964313507080078, |
|
"learning_rate": 0.00019597107007775306, |
|
"loss": 1.6571, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.06127045108299662, |
|
"grad_norm": 0.19322757422924042, |
|
"learning_rate": 0.00019591587146860345, |
|
"loss": 1.6553, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.062098430151685764, |
|
"grad_norm": 0.2548861801624298, |
|
"learning_rate": 0.00019586067285945385, |
|
"loss": 1.6791, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.06292640922037492, |
|
"grad_norm": 0.2616177797317505, |
|
"learning_rate": 0.00019580547425030424, |
|
"loss": 1.6208, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.06375438828906405, |
|
"grad_norm": 0.2758786976337433, |
|
"learning_rate": 0.00019575027564115464, |
|
"loss": 1.6838, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.06458236735775319, |
|
"grad_norm": 0.1881067007780075, |
|
"learning_rate": 0.00019569507703200503, |
|
"loss": 1.6577, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.06541034642644233, |
|
"grad_norm": 0.21222031116485596, |
|
"learning_rate": 0.00019563987842285543, |
|
"loss": 1.6952, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.06623832549513148, |
|
"grad_norm": 0.23496469855308533, |
|
"learning_rate": 0.00019558467981370582, |
|
"loss": 1.6604, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.06706630456382062, |
|
"grad_norm": 0.21739377081394196, |
|
"learning_rate": 0.00019552948120455621, |
|
"loss": 1.6493, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.06789428363250977, |
|
"grad_norm": 0.2083568572998047, |
|
"learning_rate": 0.0001954742825954066, |
|
"loss": 1.6507, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.06872226270119891, |
|
"grad_norm": 0.2494797557592392, |
|
"learning_rate": 0.000195419083986257, |
|
"loss": 1.6718, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.06955024176988805, |
|
"grad_norm": 0.22432386875152588, |
|
"learning_rate": 0.0001953638853771074, |
|
"loss": 1.6758, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.0703782208385772, |
|
"grad_norm": 0.21674951910972595, |
|
"learning_rate": 0.00019530923875404928, |
|
"loss": 1.6437, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.07120619990726634, |
|
"grad_norm": 0.188558429479599, |
|
"learning_rate": 0.00019525404014489968, |
|
"loss": 1.6345, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.07203417897595549, |
|
"grad_norm": 0.22968587279319763, |
|
"learning_rate": 0.00019519884153575007, |
|
"loss": 1.6609, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.07286215804464463, |
|
"grad_norm": 0.2717007100582123, |
|
"learning_rate": 0.00019514364292660047, |
|
"loss": 1.6514, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.07369013711333378, |
|
"grad_norm": 0.2857931852340698, |
|
"learning_rate": 0.00019508844431745086, |
|
"loss": 1.6669, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.07451811618202292, |
|
"grad_norm": 0.23388004302978516, |
|
"learning_rate": 0.00019503324570830125, |
|
"loss": 1.6785, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07534609525071206, |
|
"grad_norm": 0.20342163741588593, |
|
"learning_rate": 0.00019497804709915165, |
|
"loss": 1.6777, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.07617407431940121, |
|
"grad_norm": 0.2345508486032486, |
|
"learning_rate": 0.00019492340047609353, |
|
"loss": 1.6672, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.07700205338809035, |
|
"grad_norm": 0.20348191261291504, |
|
"learning_rate": 0.00019486820186694396, |
|
"loss": 1.6851, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.0778300324567795, |
|
"grad_norm": 0.2147115170955658, |
|
"learning_rate": 0.00019481300325779435, |
|
"loss": 1.6768, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.07865801152546864, |
|
"grad_norm": 0.24411408603191376, |
|
"learning_rate": 0.00019475780464864472, |
|
"loss": 1.6488, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.07948599059415778, |
|
"grad_norm": 0.2360156625509262, |
|
"learning_rate": 0.00019470260603949514, |
|
"loss": 1.6248, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.08031396966284693, |
|
"grad_norm": 0.24534212052822113, |
|
"learning_rate": 0.00019464740743034553, |
|
"loss": 1.6425, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.08114194873153607, |
|
"grad_norm": 0.26849544048309326, |
|
"learning_rate": 0.0001945922088211959, |
|
"loss": 1.668, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.0819699278002252, |
|
"grad_norm": 0.20883604884147644, |
|
"learning_rate": 0.00019453701021204632, |
|
"loss": 1.6492, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.08279790686891435, |
|
"grad_norm": 0.22967711091041565, |
|
"learning_rate": 0.00019448181160289672, |
|
"loss": 1.6667, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08362588593760349, |
|
"grad_norm": 0.24589480459690094, |
|
"learning_rate": 0.00019442661299374708, |
|
"loss": 1.62, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.08445386500629264, |
|
"grad_norm": 0.26812034845352173, |
|
"learning_rate": 0.0001943714143845975, |
|
"loss": 1.6351, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.08528184407498178, |
|
"grad_norm": 0.24378615617752075, |
|
"learning_rate": 0.0001943162157754479, |
|
"loss": 1.6097, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.08610982314367092, |
|
"grad_norm": 0.31285640597343445, |
|
"learning_rate": 0.00019426101716629827, |
|
"loss": 1.7014, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.08693780221236007, |
|
"grad_norm": 0.277204692363739, |
|
"learning_rate": 0.0001942058185571487, |
|
"loss": 1.6482, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.08776578128104921, |
|
"grad_norm": 0.22598931193351746, |
|
"learning_rate": 0.00019415061994799908, |
|
"loss": 1.6459, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.08859376034973836, |
|
"grad_norm": 0.3453767001628876, |
|
"learning_rate": 0.00019409542133884945, |
|
"loss": 1.6549, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.0894217394184275, |
|
"grad_norm": 0.2226220667362213, |
|
"learning_rate": 0.00019404022272969987, |
|
"loss": 1.6481, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.09024971848711665, |
|
"grad_norm": 0.2280457317829132, |
|
"learning_rate": 0.00019398502412055026, |
|
"loss": 1.6587, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.09107769755580579, |
|
"grad_norm": 0.2163500040769577, |
|
"learning_rate": 0.00019392982551140063, |
|
"loss": 1.6693, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.09190567662449493, |
|
"grad_norm": 0.19597041606903076, |
|
"learning_rate": 0.00019387462690225105, |
|
"loss": 1.6313, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.09273365569318408, |
|
"grad_norm": 0.2143152803182602, |
|
"learning_rate": 0.00019381942829310145, |
|
"loss": 1.6543, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.09356163476187322, |
|
"grad_norm": 0.2231709212064743, |
|
"learning_rate": 0.0001937642296839518, |
|
"loss": 1.6534, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.09438961383056237, |
|
"grad_norm": 0.21003256738185883, |
|
"learning_rate": 0.00019370903107480223, |
|
"loss": 1.644, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.09521759289925151, |
|
"grad_norm": 0.23484201729297638, |
|
"learning_rate": 0.00019365383246565263, |
|
"loss": 1.67, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.09604557196794065, |
|
"grad_norm": 0.23727314174175262, |
|
"learning_rate": 0.000193598633856503, |
|
"loss": 1.6078, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.0968735510366298, |
|
"grad_norm": 0.2734909653663635, |
|
"learning_rate": 0.00019354343524735342, |
|
"loss": 1.6389, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.09770153010531894, |
|
"grad_norm": 0.266926646232605, |
|
"learning_rate": 0.0001934882366382038, |
|
"loss": 1.6522, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.09852950917400809, |
|
"grad_norm": 0.22209756076335907, |
|
"learning_rate": 0.00019343303802905418, |
|
"loss": 1.679, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.09935748824269723, |
|
"grad_norm": 0.20740656554698944, |
|
"learning_rate": 0.0001933778394199046, |
|
"loss": 1.6494, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.10018546731138636, |
|
"grad_norm": 0.19304044544696808, |
|
"learning_rate": 0.000193322640810755, |
|
"loss": 1.644, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.1010134463800755, |
|
"grad_norm": 0.2168605774641037, |
|
"learning_rate": 0.0001932674422016054, |
|
"loss": 1.673, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.10184142544876465, |
|
"grad_norm": 0.2477482557296753, |
|
"learning_rate": 0.00019321224359245578, |
|
"loss": 1.6912, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.1026694045174538, |
|
"grad_norm": 0.220897376537323, |
|
"learning_rate": 0.00019315704498330618, |
|
"loss": 1.6553, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.10349738358614294, |
|
"grad_norm": 0.22782349586486816, |
|
"learning_rate": 0.00019310184637415657, |
|
"loss": 1.6585, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.10432536265483208, |
|
"grad_norm": 0.22199232876300812, |
|
"learning_rate": 0.00019304664776500697, |
|
"loss": 1.6617, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.10515334172352123, |
|
"grad_norm": 0.19177618622779846, |
|
"learning_rate": 0.00019299144915585736, |
|
"loss": 1.6788, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.10598132079221037, |
|
"grad_norm": 0.20250776410102844, |
|
"learning_rate": 0.00019293625054670775, |
|
"loss": 1.6329, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.10680929986089951, |
|
"grad_norm": 0.23451390862464905, |
|
"learning_rate": 0.00019288105193755815, |
|
"loss": 1.6336, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.10763727892958866, |
|
"grad_norm": 0.20917341113090515, |
|
"learning_rate": 0.00019282585332840854, |
|
"loss": 1.6436, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1084652579982778, |
|
"grad_norm": 0.24702110886573792, |
|
"learning_rate": 0.00019277065471925894, |
|
"loss": 1.6569, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.10929323706696695, |
|
"grad_norm": 0.2046515792608261, |
|
"learning_rate": 0.00019271545611010933, |
|
"loss": 1.6728, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.11012121613565609, |
|
"grad_norm": 0.23590846359729767, |
|
"learning_rate": 0.00019266025750095973, |
|
"loss": 1.6808, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.11094919520434524, |
|
"grad_norm": 0.2132890671491623, |
|
"learning_rate": 0.00019260505889181012, |
|
"loss": 1.6428, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.11177717427303438, |
|
"grad_norm": 0.22132565081119537, |
|
"learning_rate": 0.000192550412268752, |
|
"loss": 1.688, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.11260515334172352, |
|
"grad_norm": 0.2686227262020111, |
|
"learning_rate": 0.0001924952136596024, |
|
"loss": 1.6523, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.11343313241041267, |
|
"grad_norm": 0.2114957571029663, |
|
"learning_rate": 0.0001924400150504528, |
|
"loss": 1.643, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.11426111147910181, |
|
"grad_norm": 0.22338928282260895, |
|
"learning_rate": 0.0001923848164413032, |
|
"loss": 1.6371, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.11508909054779096, |
|
"grad_norm": 0.27738282084465027, |
|
"learning_rate": 0.00019232961783215358, |
|
"loss": 1.664, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.1159170696164801, |
|
"grad_norm": 0.211566761136055, |
|
"learning_rate": 0.00019227441922300398, |
|
"loss": 1.7005, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.11674504868516924, |
|
"grad_norm": 0.23775742948055267, |
|
"learning_rate": 0.00019221922061385437, |
|
"loss": 1.6538, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.11757302775385839, |
|
"grad_norm": 0.24485789239406586, |
|
"learning_rate": 0.00019216402200470477, |
|
"loss": 1.6969, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.11840100682254752, |
|
"grad_norm": 0.2573772966861725, |
|
"learning_rate": 0.00019210882339555516, |
|
"loss": 1.6268, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.11922898589123666, |
|
"grad_norm": 0.24128000438213348, |
|
"learning_rate": 0.00019205362478640555, |
|
"loss": 1.6765, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.12005696495992581, |
|
"grad_norm": 0.2213265597820282, |
|
"learning_rate": 0.00019199842617725595, |
|
"loss": 1.6483, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.12088494402861495, |
|
"grad_norm": 0.3523562252521515, |
|
"learning_rate": 0.00019194322756810634, |
|
"loss": 1.6394, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.1217129230973041, |
|
"grad_norm": 0.26578399538993835, |
|
"learning_rate": 0.00019188802895895674, |
|
"loss": 1.7057, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.12254090216599324, |
|
"grad_norm": 0.2317681610584259, |
|
"learning_rate": 0.00019183283034980713, |
|
"loss": 1.6244, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.12336888123468238, |
|
"grad_norm": 0.2221396416425705, |
|
"learning_rate": 0.00019177763174065752, |
|
"loss": 1.6442, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.12419686030337153, |
|
"grad_norm": 0.2175658941268921, |
|
"learning_rate": 0.00019172243313150792, |
|
"loss": 1.6643, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.1250248393720607, |
|
"grad_norm": 0.21838437020778656, |
|
"learning_rate": 0.0001916672345223583, |
|
"loss": 1.6629, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.12585281844074983, |
|
"grad_norm": 0.19501908123493195, |
|
"learning_rate": 0.0001916120359132087, |
|
"loss": 1.6466, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.12668079750943897, |
|
"grad_norm": 0.24163535237312317, |
|
"learning_rate": 0.0001915568373040591, |
|
"loss": 1.6448, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.1275087765781281, |
|
"grad_norm": 0.2042425274848938, |
|
"learning_rate": 0.0001915016386949095, |
|
"loss": 1.6639, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.12833675564681724, |
|
"grad_norm": 0.22937864065170288, |
|
"learning_rate": 0.0001914464400857599, |
|
"loss": 1.635, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.12916473471550638, |
|
"grad_norm": 0.2340078055858612, |
|
"learning_rate": 0.00019139124147661028, |
|
"loss": 1.6335, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.12999271378419552, |
|
"grad_norm": 0.24161306023597717, |
|
"learning_rate": 0.00019133604286746068, |
|
"loss": 1.634, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.13082069285288467, |
|
"grad_norm": 0.25775766372680664, |
|
"learning_rate": 0.00019128084425831107, |
|
"loss": 1.6331, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.1316486719215738, |
|
"grad_norm": 0.23609529435634613, |
|
"learning_rate": 0.00019122564564916147, |
|
"loss": 1.6431, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.13247665099026296, |
|
"grad_norm": 0.24818001687526703, |
|
"learning_rate": 0.00019117044704001186, |
|
"loss": 1.6355, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1333046300589521, |
|
"grad_norm": 0.2805030941963196, |
|
"learning_rate": 0.00019111580041695375, |
|
"loss": 1.6459, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.13413260912764124, |
|
"grad_norm": 0.2721966505050659, |
|
"learning_rate": 0.00019106060180780417, |
|
"loss": 1.6384, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.1349605881963304, |
|
"grad_norm": 0.23994600772857666, |
|
"learning_rate": 0.00019100540319865454, |
|
"loss": 1.627, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.13578856726501953, |
|
"grad_norm": 0.20296916365623474, |
|
"learning_rate": 0.00019095020458950496, |
|
"loss": 1.6528, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.13661654633370868, |
|
"grad_norm": 0.21805709600448608, |
|
"learning_rate": 0.00019089500598035535, |
|
"loss": 1.64, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.13744452540239782, |
|
"grad_norm": 0.25123125314712524, |
|
"learning_rate": 0.00019083980737120572, |
|
"loss": 1.6889, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.13827250447108697, |
|
"grad_norm": 0.2638598680496216, |
|
"learning_rate": 0.00019078460876205614, |
|
"loss": 1.6507, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.1391004835397761, |
|
"grad_norm": 0.21334075927734375, |
|
"learning_rate": 0.00019072941015290653, |
|
"loss": 1.6428, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.13992846260846525, |
|
"grad_norm": 0.2150932401418686, |
|
"learning_rate": 0.0001906742115437569, |
|
"loss": 1.6277, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.1407564416771544, |
|
"grad_norm": 0.28504666686058044, |
|
"learning_rate": 0.00019061901293460732, |
|
"loss": 1.6609, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.14158442074584354, |
|
"grad_norm": 0.22296425700187683, |
|
"learning_rate": 0.00019056381432545772, |
|
"loss": 1.6359, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.1424123998145327, |
|
"grad_norm": 0.23885318636894226, |
|
"learning_rate": 0.00019050861571630808, |
|
"loss": 1.6262, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.14324037888322183, |
|
"grad_norm": 0.22231832146644592, |
|
"learning_rate": 0.0001904534171071585, |
|
"loss": 1.6369, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.14406835795191097, |
|
"grad_norm": 0.25279131531715393, |
|
"learning_rate": 0.0001903982184980089, |
|
"loss": 1.6376, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.14489633702060012, |
|
"grad_norm": 0.2754824459552765, |
|
"learning_rate": 0.00019034301988885927, |
|
"loss": 1.6579, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.14572431608928926, |
|
"grad_norm": 0.22775475680828094, |
|
"learning_rate": 0.0001902878212797097, |
|
"loss": 1.674, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.1465522951579784, |
|
"grad_norm": 0.21519581973552704, |
|
"learning_rate": 0.00019023262267056008, |
|
"loss": 1.654, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.14738027422666755, |
|
"grad_norm": 0.2484564632177353, |
|
"learning_rate": 0.00019017742406141045, |
|
"loss": 1.6046, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.1482082532953567, |
|
"grad_norm": 0.21091294288635254, |
|
"learning_rate": 0.00019012222545226087, |
|
"loss": 1.616, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.14903623236404584, |
|
"grad_norm": 0.27948471903800964, |
|
"learning_rate": 0.00019006702684311127, |
|
"loss": 1.6866, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.14986421143273498, |
|
"grad_norm": 0.28303253650665283, |
|
"learning_rate": 0.00019001182823396163, |
|
"loss": 1.6569, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.15069219050142413, |
|
"grad_norm": 0.2028988003730774, |
|
"learning_rate": 0.00018995662962481205, |
|
"loss": 1.6752, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.15152016957011327, |
|
"grad_norm": 0.237883523106575, |
|
"learning_rate": 0.0001899019830017539, |
|
"loss": 1.6618, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.15234814863880242, |
|
"grad_norm": 0.21337077021598816, |
|
"learning_rate": 0.00018984678439260433, |
|
"loss": 1.6397, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.15317612770749156, |
|
"grad_norm": 0.20675146579742432, |
|
"learning_rate": 0.00018979158578345473, |
|
"loss": 1.6382, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.1540041067761807, |
|
"grad_norm": 0.23405767977237701, |
|
"learning_rate": 0.0001897363871743051, |
|
"loss": 1.6112, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.15483208584486985, |
|
"grad_norm": 0.22801779210567474, |
|
"learning_rate": 0.00018968118856515552, |
|
"loss": 1.6332, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.155660064913559, |
|
"grad_norm": 0.28877681493759155, |
|
"learning_rate": 0.0001896259899560059, |
|
"loss": 1.618, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.15648804398224814, |
|
"grad_norm": 0.2644197940826416, |
|
"learning_rate": 0.00018957079134685628, |
|
"loss": 1.6617, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.15731602305093728, |
|
"grad_norm": 0.20182272791862488, |
|
"learning_rate": 0.0001895155927377067, |
|
"loss": 1.6273, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.15814400211962643, |
|
"grad_norm": 0.2702679932117462, |
|
"learning_rate": 0.0001894603941285571, |
|
"loss": 1.6573, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.15897198118831557, |
|
"grad_norm": 0.24677981436252594, |
|
"learning_rate": 0.00018940519551940746, |
|
"loss": 1.645, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.15979996025700471, |
|
"grad_norm": 0.22584903240203857, |
|
"learning_rate": 0.00018934999691025788, |
|
"loss": 1.6496, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.16062793932569386, |
|
"grad_norm": 0.25998055934906006, |
|
"learning_rate": 0.00018929479830110828, |
|
"loss": 1.6406, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.161455918394383, |
|
"grad_norm": 0.24679061770439148, |
|
"learning_rate": 0.00018923959969195867, |
|
"loss": 1.664, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.16228389746307215, |
|
"grad_norm": 0.21543079614639282, |
|
"learning_rate": 0.00018918440108280907, |
|
"loss": 1.6316, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.1631118765317613, |
|
"grad_norm": 0.20581622421741486, |
|
"learning_rate": 0.00018912920247365946, |
|
"loss": 1.6417, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.1639398556004504, |
|
"grad_norm": 0.24385277926921844, |
|
"learning_rate": 0.00018907400386450985, |
|
"loss": 1.6462, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.16476783466913955, |
|
"grad_norm": 0.21104101836681366, |
|
"learning_rate": 0.00018901880525536025, |
|
"loss": 1.6255, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.1655958137378287, |
|
"grad_norm": 0.2160460352897644, |
|
"learning_rate": 0.00018896360664621064, |
|
"loss": 1.6602, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.16642379280651784, |
|
"grad_norm": 0.27930399775505066, |
|
"learning_rate": 0.00018890840803706104, |
|
"loss": 1.676, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.16725177187520698, |
|
"grad_norm": 0.21541720628738403, |
|
"learning_rate": 0.00018885320942791143, |
|
"loss": 1.627, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.16807975094389613, |
|
"grad_norm": 0.21303394436836243, |
|
"learning_rate": 0.00018879801081876182, |
|
"loss": 1.6332, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.16890773001258527, |
|
"grad_norm": 0.22445669770240784, |
|
"learning_rate": 0.0001887433641957037, |
|
"loss": 1.6466, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.16973570908127442, |
|
"grad_norm": 0.21410098671913147, |
|
"learning_rate": 0.0001886881655865541, |
|
"loss": 1.6392, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.17056368814996356, |
|
"grad_norm": 0.19233547151088715, |
|
"learning_rate": 0.0001886329669774045, |
|
"loss": 1.6396, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.1713916672186527, |
|
"grad_norm": 0.20589858293533325, |
|
"learning_rate": 0.0001885777683682549, |
|
"loss": 1.6295, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.17221964628734185, |
|
"grad_norm": 0.25907692313194275, |
|
"learning_rate": 0.0001885225697591053, |
|
"loss": 1.6329, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.173047625356031, |
|
"grad_norm": 0.2765398919582367, |
|
"learning_rate": 0.00018846737114995568, |
|
"loss": 1.6868, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.17387560442472014, |
|
"grad_norm": 0.23948393762111664, |
|
"learning_rate": 0.00018841217254080608, |
|
"loss": 1.6664, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.17470358349340928, |
|
"grad_norm": 0.2371809184551239, |
|
"learning_rate": 0.00018835697393165647, |
|
"loss": 1.6843, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.17553156256209843, |
|
"grad_norm": 0.19910277426242828, |
|
"learning_rate": 0.00018830177532250686, |
|
"loss": 1.619, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.17635954163078757, |
|
"grad_norm": 0.24008843302726746, |
|
"learning_rate": 0.00018824657671335726, |
|
"loss": 1.6254, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.1771875206994767, |
|
"grad_norm": 0.22527045011520386, |
|
"learning_rate": 0.00018819137810420765, |
|
"loss": 1.656, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.17801549976816586, |
|
"grad_norm": 0.21581338346004486, |
|
"learning_rate": 0.00018813617949505805, |
|
"loss": 1.6297, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.178843478836855, |
|
"grad_norm": 0.23672404885292053, |
|
"learning_rate": 0.00018808098088590844, |
|
"loss": 1.6493, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.17967145790554415, |
|
"grad_norm": 0.22824163734912872, |
|
"learning_rate": 0.00018802578227675884, |
|
"loss": 1.6428, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.1804994369742333, |
|
"grad_norm": 0.21679380536079407, |
|
"learning_rate": 0.00018797058366760923, |
|
"loss": 1.6373, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.18132741604292243, |
|
"grad_norm": 0.23232513666152954, |
|
"learning_rate": 0.00018791538505845962, |
|
"loss": 1.6408, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.18215539511161158, |
|
"grad_norm": 0.21634866297245026, |
|
"learning_rate": 0.00018786018644931002, |
|
"loss": 1.6428, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.18298337418030072, |
|
"grad_norm": 0.24199073016643524, |
|
"learning_rate": 0.0001878049878401604, |
|
"loss": 1.6912, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.18381135324898987, |
|
"grad_norm": 0.24073319137096405, |
|
"learning_rate": 0.0001877497892310108, |
|
"loss": 1.6697, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.184639332317679, |
|
"grad_norm": 0.20666244626045227, |
|
"learning_rate": 0.0001876945906218612, |
|
"loss": 1.6372, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.18546731138636816, |
|
"grad_norm": 0.24439455568790436, |
|
"learning_rate": 0.0001876393920127116, |
|
"loss": 1.6562, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.1862952904550573, |
|
"grad_norm": 0.20771746337413788, |
|
"learning_rate": 0.000187584193403562, |
|
"loss": 1.6342, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.18712326952374644, |
|
"grad_norm": 0.23821604251861572, |
|
"learning_rate": 0.00018752899479441238, |
|
"loss": 1.6266, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.1879512485924356, |
|
"grad_norm": 0.20013689994812012, |
|
"learning_rate": 0.00018747379618526278, |
|
"loss": 1.6394, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.18877922766112473, |
|
"grad_norm": 0.24145907163619995, |
|
"learning_rate": 0.00018741859757611317, |
|
"loss": 1.639, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.18960720672981388, |
|
"grad_norm": 0.227446511387825, |
|
"learning_rate": 0.00018736395095305509, |
|
"loss": 1.6245, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.19043518579850302, |
|
"grad_norm": 0.23569269478321075, |
|
"learning_rate": 0.00018730875234390545, |
|
"loss": 1.6536, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.19126316486719216, |
|
"grad_norm": 0.26718559861183167, |
|
"learning_rate": 0.00018725355373475585, |
|
"loss": 1.6152, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.1920911439358813, |
|
"grad_norm": 0.2448846399784088, |
|
"learning_rate": 0.00018719835512560627, |
|
"loss": 1.6462, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.19291912300457045, |
|
"grad_norm": 0.22957266867160797, |
|
"learning_rate": 0.00018714315651645664, |
|
"loss": 1.6493, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.1937471020732596, |
|
"grad_norm": 0.24881067872047424, |
|
"learning_rate": 0.00018708795790730703, |
|
"loss": 1.6509, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.19457508114194874, |
|
"grad_norm": 0.26392731070518494, |
|
"learning_rate": 0.00018703275929815745, |
|
"loss": 1.6436, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.19540306021063789, |
|
"grad_norm": 0.23469410836696625, |
|
"learning_rate": 0.00018697756068900782, |
|
"loss": 1.6494, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.19623103927932703, |
|
"grad_norm": 0.191993847489357, |
|
"learning_rate": 0.00018692236207985824, |
|
"loss": 1.6118, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.19705901834801617, |
|
"grad_norm": 0.24076974391937256, |
|
"learning_rate": 0.00018686716347070863, |
|
"loss": 1.6414, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.19788699741670532, |
|
"grad_norm": 0.22676746547222137, |
|
"learning_rate": 0.000186811964861559, |
|
"loss": 1.6392, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.19871497648539446, |
|
"grad_norm": 0.2693268060684204, |
|
"learning_rate": 0.00018675676625240942, |
|
"loss": 1.6254, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.1995429555540836, |
|
"grad_norm": 0.23930659890174866, |
|
"learning_rate": 0.00018670156764325982, |
|
"loss": 1.6451, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.20037093462277272, |
|
"grad_norm": 0.23256762325763702, |
|
"learning_rate": 0.00018664636903411018, |
|
"loss": 1.6424, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.20119891369146187, |
|
"grad_norm": 0.2577391564846039, |
|
"learning_rate": 0.0001865911704249606, |
|
"loss": 1.6212, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.202026892760151, |
|
"grad_norm": 0.26071691513061523, |
|
"learning_rate": 0.000186535971815811, |
|
"loss": 1.6314, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.20285487182884016, |
|
"grad_norm": 0.22462689876556396, |
|
"learning_rate": 0.00018648077320666137, |
|
"loss": 1.6237, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.2036828508975293, |
|
"grad_norm": 0.24064601957798004, |
|
"learning_rate": 0.0001864255745975118, |
|
"loss": 1.6391, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.20451082996621844, |
|
"grad_norm": 0.22928814589977264, |
|
"learning_rate": 0.00018637037598836218, |
|
"loss": 1.6147, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.2053388090349076, |
|
"grad_norm": 0.22983402013778687, |
|
"learning_rate": 0.00018631517737921255, |
|
"loss": 1.6427, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.20616678810359673, |
|
"grad_norm": 0.22635537385940552, |
|
"learning_rate": 0.00018625997877006297, |
|
"loss": 1.6421, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.20699476717228588, |
|
"grad_norm": 0.23368728160858154, |
|
"learning_rate": 0.00018620478016091336, |
|
"loss": 1.6472, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.20782274624097502, |
|
"grad_norm": 0.24677674472332, |
|
"learning_rate": 0.00018614958155176373, |
|
"loss": 1.6697, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.20865072530966416, |
|
"grad_norm": 0.2160155028104782, |
|
"learning_rate": 0.00018609438294261415, |
|
"loss": 1.6327, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.2094787043783533, |
|
"grad_norm": 0.20933164656162262, |
|
"learning_rate": 0.00018603918433346455, |
|
"loss": 1.6464, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.21030668344704245, |
|
"grad_norm": 0.2508947253227234, |
|
"learning_rate": 0.00018598398572431491, |
|
"loss": 1.6531, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.2111346625157316, |
|
"grad_norm": 0.264946311712265, |
|
"learning_rate": 0.00018592878711516534, |
|
"loss": 1.6285, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.21196264158442074, |
|
"grad_norm": 0.23839199542999268, |
|
"learning_rate": 0.00018587358850601573, |
|
"loss": 1.6601, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.21279062065310989, |
|
"grad_norm": 0.20937936007976532, |
|
"learning_rate": 0.0001858183898968661, |
|
"loss": 1.6199, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.21361859972179903, |
|
"grad_norm": 0.2978517413139343, |
|
"learning_rate": 0.00018576319128771652, |
|
"loss": 1.6233, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.21444657879048817, |
|
"grad_norm": 0.22715617716312408, |
|
"learning_rate": 0.0001857079926785669, |
|
"loss": 1.6616, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.21527455785917732, |
|
"grad_norm": 0.21679271757602692, |
|
"learning_rate": 0.00018565279406941728, |
|
"loss": 1.6562, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.21610253692786646, |
|
"grad_norm": 0.2540909945964813, |
|
"learning_rate": 0.0001855975954602677, |
|
"loss": 1.6292, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.2169305159965556, |
|
"grad_norm": 0.23456817865371704, |
|
"learning_rate": 0.0001855423968511181, |
|
"loss": 1.6117, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.21775849506524475, |
|
"grad_norm": 0.2706565856933594, |
|
"learning_rate": 0.00018548719824196846, |
|
"loss": 1.6119, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.2185864741339339, |
|
"grad_norm": 0.24881504476070404, |
|
"learning_rate": 0.00018543199963281888, |
|
"loss": 1.6263, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.21941445320262304, |
|
"grad_norm": 0.3882363736629486, |
|
"learning_rate": 0.00018537680102366928, |
|
"loss": 1.6537, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.22024243227131218, |
|
"grad_norm": 0.19953063130378723, |
|
"learning_rate": 0.00018532160241451965, |
|
"loss": 1.6786, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.22107041134000133, |
|
"grad_norm": 0.21415996551513672, |
|
"learning_rate": 0.00018526640380537007, |
|
"loss": 1.6582, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.22189839040869047, |
|
"grad_norm": 0.22917871177196503, |
|
"learning_rate": 0.00018521120519622046, |
|
"loss": 1.5913, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.22272636947737962, |
|
"grad_norm": 0.23988696932792664, |
|
"learning_rate": 0.00018515600658707083, |
|
"loss": 1.6284, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.22355434854606876, |
|
"grad_norm": 0.21336591243743896, |
|
"learning_rate": 0.00018510080797792125, |
|
"loss": 1.6594, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.2243823276147579, |
|
"grad_norm": 0.22978056967258453, |
|
"learning_rate": 0.00018504560936877164, |
|
"loss": 1.653, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.22521030668344705, |
|
"grad_norm": 0.27317842841148376, |
|
"learning_rate": 0.00018499096274571353, |
|
"loss": 1.6267, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.2260382857521362, |
|
"grad_norm": 0.2396378219127655, |
|
"learning_rate": 0.00018493576413656392, |
|
"loss": 1.6452, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.22686626482082534, |
|
"grad_norm": 0.2800294756889343, |
|
"learning_rate": 0.00018488056552741432, |
|
"loss": 1.6722, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.22769424388951448, |
|
"grad_norm": 0.24665579199790955, |
|
"learning_rate": 0.0001848253669182647, |
|
"loss": 1.6473, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.22852222295820362, |
|
"grad_norm": 0.23755255341529846, |
|
"learning_rate": 0.0001847701683091151, |
|
"loss": 1.6072, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.22935020202689277, |
|
"grad_norm": 0.23274143040180206, |
|
"learning_rate": 0.0001847149696999655, |
|
"loss": 1.6547, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.2301781810955819, |
|
"grad_norm": 0.20112739503383636, |
|
"learning_rate": 0.0001846597710908159, |
|
"loss": 1.6307, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.23100616016427106, |
|
"grad_norm": 0.29942500591278076, |
|
"learning_rate": 0.0001846045724816663, |
|
"loss": 1.6384, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.2318341392329602, |
|
"grad_norm": 0.2471247911453247, |
|
"learning_rate": 0.00018454937387251668, |
|
"loss": 1.6568, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.23266211830164935, |
|
"grad_norm": 0.2586652934551239, |
|
"learning_rate": 0.00018449417526336708, |
|
"loss": 1.6167, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.2334900973703385, |
|
"grad_norm": 0.23116792738437653, |
|
"learning_rate": 0.00018443897665421747, |
|
"loss": 1.6222, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.23431807643902763, |
|
"grad_norm": 0.22467024624347687, |
|
"learning_rate": 0.00018438433003115936, |
|
"loss": 1.6343, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.23514605550771678, |
|
"grad_norm": 0.28815844655036926, |
|
"learning_rate": 0.00018432913142200975, |
|
"loss": 1.6388, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.23597403457640592, |
|
"grad_norm": 0.22312502562999725, |
|
"learning_rate": 0.00018427393281286017, |
|
"loss": 1.639, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.23680201364509504, |
|
"grad_norm": 0.2542140781879425, |
|
"learning_rate": 0.00018421873420371054, |
|
"loss": 1.6019, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.23762999271378418, |
|
"grad_norm": 0.2540210783481598, |
|
"learning_rate": 0.00018416353559456094, |
|
"loss": 1.6217, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.23845797178247333, |
|
"grad_norm": 0.2396947294473648, |
|
"learning_rate": 0.00018410833698541136, |
|
"loss": 1.6186, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.23928595085116247, |
|
"grad_norm": 0.2223241776227951, |
|
"learning_rate": 0.00018405313837626172, |
|
"loss": 1.6385, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.24011392991985162, |
|
"grad_norm": 0.21971747279167175, |
|
"learning_rate": 0.00018399793976711212, |
|
"loss": 1.6357, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.24094190898854076, |
|
"grad_norm": 0.24293990433216095, |
|
"learning_rate": 0.00018394274115796254, |
|
"loss": 1.6677, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.2417698880572299, |
|
"grad_norm": 0.2490108758211136, |
|
"learning_rate": 0.0001838875425488129, |
|
"loss": 1.6423, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.24259786712591905, |
|
"grad_norm": 0.2689921259880066, |
|
"learning_rate": 0.0001838323439396633, |
|
"loss": 1.6273, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.2434258461946082, |
|
"grad_norm": 0.21055381000041962, |
|
"learning_rate": 0.0001837776973166052, |
|
"loss": 1.5876, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.24425382526329734, |
|
"grad_norm": 0.24339735507965088, |
|
"learning_rate": 0.00018372249870745558, |
|
"loss": 1.5942, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.24508180433198648, |
|
"grad_norm": 0.22557999193668365, |
|
"learning_rate": 0.000183667300098306, |
|
"loss": 1.6407, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.24590978340067562, |
|
"grad_norm": 0.27617794275283813, |
|
"learning_rate": 0.00018361210148915637, |
|
"loss": 1.626, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.24673776246936477, |
|
"grad_norm": 0.2763831615447998, |
|
"learning_rate": 0.00018355690288000676, |
|
"loss": 1.6035, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.2475657415380539, |
|
"grad_norm": 0.23689210414886475, |
|
"learning_rate": 0.00018350170427085719, |
|
"loss": 1.6772, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.24839372060674306, |
|
"grad_norm": 0.22767847776412964, |
|
"learning_rate": 0.00018344650566170755, |
|
"loss": 1.6315, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.2492216996754322, |
|
"grad_norm": 0.23645687103271484, |
|
"learning_rate": 0.00018339130705255795, |
|
"loss": 1.6424, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.2500496787441214, |
|
"grad_norm": 0.29076236486434937, |
|
"learning_rate": 0.00018333610844340837, |
|
"loss": 1.6175, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.2508776578128105, |
|
"grad_norm": 0.22849859297275543, |
|
"learning_rate": 0.00018328090983425874, |
|
"loss": 1.6264, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.25170563688149966, |
|
"grad_norm": 0.19252796471118927, |
|
"learning_rate": 0.00018322571122510913, |
|
"loss": 1.6386, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.2525336159501888, |
|
"grad_norm": 0.22710925340652466, |
|
"learning_rate": 0.00018317051261595955, |
|
"loss": 1.6098, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.25336159501887795, |
|
"grad_norm": 0.22989875078201294, |
|
"learning_rate": 0.00018311531400680992, |
|
"loss": 1.6339, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.2541895740875671, |
|
"grad_norm": 0.25290095806121826, |
|
"learning_rate": 0.0001830601153976603, |
|
"loss": 1.634, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.2550175531562562, |
|
"grad_norm": 0.24599549174308777, |
|
"learning_rate": 0.00018300491678851073, |
|
"loss": 1.6577, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.2558455322249453, |
|
"grad_norm": 0.37027794122695923, |
|
"learning_rate": 0.0001829497181793611, |
|
"loss": 1.644, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.25667351129363447, |
|
"grad_norm": 0.2629932463169098, |
|
"learning_rate": 0.00018289451957021152, |
|
"loss": 1.622, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.2575014903623236, |
|
"grad_norm": 0.33217158913612366, |
|
"learning_rate": 0.00018283932096106192, |
|
"loss": 1.6334, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.25832946943101276, |
|
"grad_norm": 0.23134025931358337, |
|
"learning_rate": 0.00018278412235191228, |
|
"loss": 1.6411, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.2591574484997019, |
|
"grad_norm": 0.2514587342739105, |
|
"learning_rate": 0.0001827289237427627, |
|
"loss": 1.6125, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.25998542756839105, |
|
"grad_norm": 0.2340904325246811, |
|
"learning_rate": 0.0001826737251336131, |
|
"loss": 1.6567, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.2608134066370802, |
|
"grad_norm": 0.27328845858573914, |
|
"learning_rate": 0.00018261852652446347, |
|
"loss": 1.6479, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.26164138570576934, |
|
"grad_norm": 0.2700946033000946, |
|
"learning_rate": 0.0001825633279153139, |
|
"loss": 1.6262, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.2624693647744585, |
|
"grad_norm": 0.25395748019218445, |
|
"learning_rate": 0.00018250812930616428, |
|
"loss": 1.6166, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.2632973438431476, |
|
"grad_norm": 0.23837615549564362, |
|
"learning_rate": 0.00018245293069701465, |
|
"loss": 1.6378, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.26412532291183677, |
|
"grad_norm": 0.2575034201145172, |
|
"learning_rate": 0.00018239773208786507, |
|
"loss": 1.6448, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.2649533019805259, |
|
"grad_norm": 0.25627613067626953, |
|
"learning_rate": 0.00018234253347871546, |
|
"loss": 1.6674, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.26578128104921506, |
|
"grad_norm": 0.24171142280101776, |
|
"learning_rate": 0.00018228733486956583, |
|
"loss": 1.6584, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.2666092601179042, |
|
"grad_norm": 0.2696898877620697, |
|
"learning_rate": 0.00018223213626041625, |
|
"loss": 1.6346, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.26743723918659335, |
|
"grad_norm": 0.2296200692653656, |
|
"learning_rate": 0.00018217693765126665, |
|
"loss": 1.6376, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.2682652182552825, |
|
"grad_norm": 0.22668029367923737, |
|
"learning_rate": 0.00018212173904211701, |
|
"loss": 1.6568, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.26909319732397163, |
|
"grad_norm": 0.23633837699890137, |
|
"learning_rate": 0.00018206654043296744, |
|
"loss": 1.6355, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.2699211763926608, |
|
"grad_norm": 0.3272385597229004, |
|
"learning_rate": 0.00018201134182381783, |
|
"loss": 1.6333, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.2707491554613499, |
|
"grad_norm": 0.22284284234046936, |
|
"learning_rate": 0.0001819561432146682, |
|
"loss": 1.6482, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.27157713453003907, |
|
"grad_norm": 0.22121630609035492, |
|
"learning_rate": 0.00018190094460551862, |
|
"loss": 1.6496, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.2724051135987282, |
|
"grad_norm": 0.2997874319553375, |
|
"learning_rate": 0.000181845745996369, |
|
"loss": 1.6416, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.27323309266741735, |
|
"grad_norm": 0.26545023918151855, |
|
"learning_rate": 0.00018179054738721938, |
|
"loss": 1.6452, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.2740610717361065, |
|
"grad_norm": 0.2760174870491028, |
|
"learning_rate": 0.0001817353487780698, |
|
"loss": 1.6617, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.27488905080479564, |
|
"grad_norm": 0.2267056554555893, |
|
"learning_rate": 0.0001816801501689202, |
|
"loss": 1.6201, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.2757170298734848, |
|
"grad_norm": 0.2418147623538971, |
|
"learning_rate": 0.00018162495155977056, |
|
"loss": 1.6369, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.27654500894217393, |
|
"grad_norm": 0.2712232172489166, |
|
"learning_rate": 0.00018156975295062098, |
|
"loss": 1.61, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.2773729880108631, |
|
"grad_norm": 0.21989206969738007, |
|
"learning_rate": 0.00018151455434147138, |
|
"loss": 1.6385, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.2782009670795522, |
|
"grad_norm": 0.23285841941833496, |
|
"learning_rate": 0.00018145935573232174, |
|
"loss": 1.6234, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.27902894614824136, |
|
"grad_norm": 0.2723333537578583, |
|
"learning_rate": 0.00018140415712317217, |
|
"loss": 1.6265, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.2798569252169305, |
|
"grad_norm": 0.29798418283462524, |
|
"learning_rate": 0.00018134895851402256, |
|
"loss": 1.6278, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.28068490428561965, |
|
"grad_norm": 0.22149847447872162, |
|
"learning_rate": 0.00018129375990487293, |
|
"loss": 1.6347, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.2815128833543088, |
|
"grad_norm": 0.2652026116847992, |
|
"learning_rate": 0.00018123856129572335, |
|
"loss": 1.6388, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.28234086242299794, |
|
"grad_norm": 0.21799111366271973, |
|
"learning_rate": 0.00018118336268657374, |
|
"loss": 1.6031, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.2831688414916871, |
|
"grad_norm": 0.23749162256717682, |
|
"learning_rate": 0.0001811281640774241, |
|
"loss": 1.6268, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.28399682056037623, |
|
"grad_norm": 0.20693838596343994, |
|
"learning_rate": 0.00018107351745436602, |
|
"loss": 1.6406, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.2848247996290654, |
|
"grad_norm": 0.27509090304374695, |
|
"learning_rate": 0.00018101831884521642, |
|
"loss": 1.6291, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.2856527786977545, |
|
"grad_norm": 0.24260659515857697, |
|
"learning_rate": 0.0001809631202360668, |
|
"loss": 1.6418, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.28648075776644366, |
|
"grad_norm": 0.22240006923675537, |
|
"learning_rate": 0.0001809079216269172, |
|
"loss": 1.589, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.2873087368351328, |
|
"grad_norm": 0.21402128040790558, |
|
"learning_rate": 0.0001808527230177676, |
|
"loss": 1.6004, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.28813671590382195, |
|
"grad_norm": 0.23821817338466644, |
|
"learning_rate": 0.000180797524408618, |
|
"loss": 1.6203, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.2889646949725111, |
|
"grad_norm": 0.22152476012706757, |
|
"learning_rate": 0.0001807423257994684, |
|
"loss": 1.6351, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.28979267404120024, |
|
"grad_norm": 0.269619882106781, |
|
"learning_rate": 0.00018068712719031878, |
|
"loss": 1.6269, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.2906206531098894, |
|
"grad_norm": 0.2366069257259369, |
|
"learning_rate": 0.00018063192858116918, |
|
"loss": 1.6363, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.2914486321785785, |
|
"grad_norm": 0.2468317300081253, |
|
"learning_rate": 0.00018057672997201957, |
|
"loss": 1.6265, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.29227661124726767, |
|
"grad_norm": 0.2998073697090149, |
|
"learning_rate": 0.00018052153136286997, |
|
"loss": 1.6213, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.2931045903159568, |
|
"grad_norm": 0.2570279836654663, |
|
"learning_rate": 0.00018046633275372036, |
|
"loss": 1.6229, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.29393256938464596, |
|
"grad_norm": 0.29977649450302124, |
|
"learning_rate": 0.00018041113414457075, |
|
"loss": 1.5778, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.2947605484533351, |
|
"grad_norm": 0.23830650746822357, |
|
"learning_rate": 0.00018035593553542115, |
|
"loss": 1.643, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.29558852752202425, |
|
"grad_norm": 0.2635740339756012, |
|
"learning_rate": 0.00018030073692627154, |
|
"loss": 1.6369, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.2964165065907134, |
|
"grad_norm": 0.24356907606124878, |
|
"learning_rate": 0.00018024553831712194, |
|
"loss": 1.6231, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.29724448565940254, |
|
"grad_norm": 0.19405247271060944, |
|
"learning_rate": 0.00018019033970797233, |
|
"loss": 1.6504, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.2980724647280917, |
|
"grad_norm": 0.22447918355464935, |
|
"learning_rate": 0.00018013514109882273, |
|
"loss": 1.6385, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.2989004437967808, |
|
"grad_norm": 0.2469175159931183, |
|
"learning_rate": 0.00018007994248967312, |
|
"loss": 1.6285, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.29972842286546997, |
|
"grad_norm": 0.2642788290977478, |
|
"learning_rate": 0.00018002474388052351, |
|
"loss": 1.6119, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.3005564019341591, |
|
"grad_norm": 0.2765555679798126, |
|
"learning_rate": 0.0001799695452713739, |
|
"loss": 1.6178, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.30138438100284826, |
|
"grad_norm": 0.3768673837184906, |
|
"learning_rate": 0.0001799143466622243, |
|
"loss": 1.6424, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.3022123600715374, |
|
"grad_norm": 0.26609617471694946, |
|
"learning_rate": 0.0001798591480530747, |
|
"loss": 1.6599, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.30304033914022654, |
|
"grad_norm": 0.24030715227127075, |
|
"learning_rate": 0.0001798039494439251, |
|
"loss": 1.64, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.3038683182089157, |
|
"grad_norm": 0.24733193218708038, |
|
"learning_rate": 0.00017974875083477549, |
|
"loss": 1.6097, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.30469629727760483, |
|
"grad_norm": 0.24855640530586243, |
|
"learning_rate": 0.00017969355222562588, |
|
"loss": 1.6355, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.305524276346294, |
|
"grad_norm": 0.2558852434158325, |
|
"learning_rate": 0.00017963835361647627, |
|
"loss": 1.6395, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.3063522554149831, |
|
"grad_norm": 0.252812922000885, |
|
"learning_rate": 0.00017958315500732667, |
|
"loss": 1.6019, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.30718023448367227, |
|
"grad_norm": 0.2745685279369354, |
|
"learning_rate": 0.0001795279563981771, |
|
"loss": 1.6438, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.3080082135523614, |
|
"grad_norm": 0.24301989376544952, |
|
"learning_rate": 0.00017947330977511895, |
|
"loss": 1.5943, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.30883619262105055, |
|
"grad_norm": 0.25345346331596375, |
|
"learning_rate": 0.00017941811116596937, |
|
"loss": 1.6255, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.3096641716897397, |
|
"grad_norm": 0.2757332921028137, |
|
"learning_rate": 0.00017936291255681974, |
|
"loss": 1.622, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.31049215075842884, |
|
"grad_norm": 0.2548620402812958, |
|
"learning_rate": 0.00017930771394767013, |
|
"loss": 1.6315, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.311320129827118, |
|
"grad_norm": 0.2474357783794403, |
|
"learning_rate": 0.00017925251533852055, |
|
"loss": 1.6391, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.31214810889580713, |
|
"grad_norm": 0.23792117834091187, |
|
"learning_rate": 0.00017919731672937092, |
|
"loss": 1.6349, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.3129760879644963, |
|
"grad_norm": 0.22419889271259308, |
|
"learning_rate": 0.00017914211812022131, |
|
"loss": 1.6439, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.3138040670331854, |
|
"grad_norm": 0.2642965316772461, |
|
"learning_rate": 0.00017908691951107174, |
|
"loss": 1.6347, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.31463204610187456, |
|
"grad_norm": 0.2979467511177063, |
|
"learning_rate": 0.0001790317209019221, |
|
"loss": 1.6478, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.3154600251705637, |
|
"grad_norm": 0.33403944969177246, |
|
"learning_rate": 0.0001789765222927725, |
|
"loss": 1.5852, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.31628800423925285, |
|
"grad_norm": 0.2425394505262375, |
|
"learning_rate": 0.00017892132368362292, |
|
"loss": 1.596, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.317115983307942, |
|
"grad_norm": 0.24871741235256195, |
|
"learning_rate": 0.00017886612507447328, |
|
"loss": 1.6358, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.31794396237663114, |
|
"grad_norm": 0.2535829246044159, |
|
"learning_rate": 0.00017881092646532368, |
|
"loss": 1.6175, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.3187719414453203, |
|
"grad_norm": 0.3197729289531708, |
|
"learning_rate": 0.0001787557278561741, |
|
"loss": 1.6211, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.31959992051400943, |
|
"grad_norm": 0.23783159255981445, |
|
"learning_rate": 0.00017870052924702447, |
|
"loss": 1.6677, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.32042789958269857, |
|
"grad_norm": 0.2013082504272461, |
|
"learning_rate": 0.00017864533063787486, |
|
"loss": 1.5916, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.3212558786513877, |
|
"grad_norm": 0.2517942190170288, |
|
"learning_rate": 0.00017859013202872528, |
|
"loss": 1.61, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.32208385772007686, |
|
"grad_norm": 0.22963842749595642, |
|
"learning_rate": 0.00017853493341957565, |
|
"loss": 1.5888, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.322911836788766, |
|
"grad_norm": 0.27445054054260254, |
|
"learning_rate": 0.00017847973481042604, |
|
"loss": 1.6021, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.32373981585745515, |
|
"grad_norm": 0.21975164115428925, |
|
"learning_rate": 0.00017842453620127647, |
|
"loss": 1.6261, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.3245677949261443, |
|
"grad_norm": 0.27244439721107483, |
|
"learning_rate": 0.00017836988957821835, |
|
"loss": 1.6478, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.32539577399483344, |
|
"grad_norm": 0.2596853971481323, |
|
"learning_rate": 0.00017831469096906875, |
|
"loss": 1.5833, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.3262237530635226, |
|
"grad_norm": 0.22705727815628052, |
|
"learning_rate": 0.0001782594923599191, |
|
"loss": 1.64, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.32705173213221167, |
|
"grad_norm": 0.24524278938770294, |
|
"learning_rate": 0.00017820429375076953, |
|
"loss": 1.6177, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.3278797112009008, |
|
"grad_norm": 0.2465396374464035, |
|
"learning_rate": 0.00017814909514161993, |
|
"loss": 1.6241, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.32870769026958996, |
|
"grad_norm": 0.24681350588798523, |
|
"learning_rate": 0.0001780938965324703, |
|
"loss": 1.6366, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.3295356693382791, |
|
"grad_norm": 0.2725008726119995, |
|
"learning_rate": 0.00017803869792332072, |
|
"loss": 1.6168, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.33036364840696825, |
|
"grad_norm": 0.24282367527484894, |
|
"learning_rate": 0.0001779834993141711, |
|
"loss": 1.6427, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.3311916274756574, |
|
"grad_norm": 0.22297324240207672, |
|
"learning_rate": 0.00017792830070502148, |
|
"loss": 1.6094, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.33201960654434653, |
|
"grad_norm": 0.23385342955589294, |
|
"learning_rate": 0.0001778736540819634, |
|
"loss": 1.6268, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.3328475856130357, |
|
"grad_norm": 0.24977736175060272, |
|
"learning_rate": 0.00017781845547281376, |
|
"loss": 1.6237, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.3336755646817248, |
|
"grad_norm": 0.24268653988838196, |
|
"learning_rate": 0.00017776325686366418, |
|
"loss": 1.6471, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.33450354375041397, |
|
"grad_norm": 0.1996404081583023, |
|
"learning_rate": 0.00017770805825451457, |
|
"loss": 1.6132, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.3353315228191031, |
|
"grad_norm": 0.26735562086105347, |
|
"learning_rate": 0.00017765285964536494, |
|
"loss": 1.6451, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.33615950188779226, |
|
"grad_norm": 0.24058261513710022, |
|
"learning_rate": 0.00017759766103621536, |
|
"loss": 1.6199, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.3369874809564814, |
|
"grad_norm": 0.23054581880569458, |
|
"learning_rate": 0.00017754246242706576, |
|
"loss": 1.6227, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.33781546002517054, |
|
"grad_norm": 0.245001882314682, |
|
"learning_rate": 0.00017748726381791615, |
|
"loss": 1.6422, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.3386434390938597, |
|
"grad_norm": 0.40328505635261536, |
|
"learning_rate": 0.00017743206520876655, |
|
"loss": 1.6102, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.33947141816254883, |
|
"grad_norm": 0.21955619752407074, |
|
"learning_rate": 0.00017737686659961694, |
|
"loss": 1.6439, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.340299397231238, |
|
"grad_norm": 0.2642820477485657, |
|
"learning_rate": 0.00017732166799046733, |
|
"loss": 1.6576, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.3411273762999271, |
|
"grad_norm": 0.22586101293563843, |
|
"learning_rate": 0.00017726646938131773, |
|
"loss": 1.6331, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.34195535536861627, |
|
"grad_norm": 0.22351595759391785, |
|
"learning_rate": 0.00017721127077216812, |
|
"loss": 1.6218, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.3427833344373054, |
|
"grad_norm": 0.5699514150619507, |
|
"learning_rate": 0.00017715607216301852, |
|
"loss": 1.6398, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.34361131350599455, |
|
"grad_norm": 0.3038058280944824, |
|
"learning_rate": 0.0001771008735538689, |
|
"loss": 1.6364, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.3444392925746837, |
|
"grad_norm": 0.25284209847450256, |
|
"learning_rate": 0.0001770456749447193, |
|
"loss": 1.6345, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.34526727164337284, |
|
"grad_norm": 0.5244471430778503, |
|
"learning_rate": 0.0001769904763355697, |
|
"loss": 1.6348, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.346095250712062, |
|
"grad_norm": 0.240703284740448, |
|
"learning_rate": 0.0001769352777264201, |
|
"loss": 1.6312, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.34692322978075113, |
|
"grad_norm": 0.23771056532859802, |
|
"learning_rate": 0.0001768800791172705, |
|
"loss": 1.6526, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.3477512088494403, |
|
"grad_norm": 0.25365206599235535, |
|
"learning_rate": 0.00017682488050812088, |
|
"loss": 1.6376, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.3485791879181294, |
|
"grad_norm": 0.27795979380607605, |
|
"learning_rate": 0.00017676968189897128, |
|
"loss": 1.6621, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.34940716698681856, |
|
"grad_norm": 0.24921217560768127, |
|
"learning_rate": 0.00017671448328982167, |
|
"loss": 1.622, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.3502351460555077, |
|
"grad_norm": 0.24285615980625153, |
|
"learning_rate": 0.00017665928468067207, |
|
"loss": 1.6524, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.35106312512419685, |
|
"grad_norm": 0.23597076535224915, |
|
"learning_rate": 0.00017660408607152246, |
|
"loss": 1.6257, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.351891104192886, |
|
"grad_norm": 0.2752334177494049, |
|
"learning_rate": 0.00017654888746237285, |
|
"loss": 1.6223, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.35271908326157514, |
|
"grad_norm": 0.24339468777179718, |
|
"learning_rate": 0.00017649424083931474, |
|
"loss": 1.6307, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.3535470623302643, |
|
"grad_norm": 0.22084777057170868, |
|
"learning_rate": 0.00017643904223016513, |
|
"loss": 1.6245, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.3543750413989534, |
|
"grad_norm": 0.2476748824119568, |
|
"learning_rate": 0.00017638384362101556, |
|
"loss": 1.6262, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.35520302046764257, |
|
"grad_norm": 0.2630603611469269, |
|
"learning_rate": 0.00017632864501186592, |
|
"loss": 1.6396, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.3560309995363317, |
|
"grad_norm": 0.27675458788871765, |
|
"learning_rate": 0.00017627344640271632, |
|
"loss": 1.6497, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.35685897860502086, |
|
"grad_norm": 0.23261144757270813, |
|
"learning_rate": 0.00017621824779356674, |
|
"loss": 1.6097, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.35768695767371, |
|
"grad_norm": 0.24998947978019714, |
|
"learning_rate": 0.0001761630491844171, |
|
"loss": 1.6306, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.35851493674239915, |
|
"grad_norm": 0.2858506441116333, |
|
"learning_rate": 0.0001761078505752675, |
|
"loss": 1.6167, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.3593429158110883, |
|
"grad_norm": 0.24741750955581665, |
|
"learning_rate": 0.00017605265196611792, |
|
"loss": 1.6434, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.36017089487977744, |
|
"grad_norm": 0.2413889467716217, |
|
"learning_rate": 0.0001759974533569683, |
|
"loss": 1.6175, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.3609988739484666, |
|
"grad_norm": 0.23276059329509735, |
|
"learning_rate": 0.00017594225474781868, |
|
"loss": 1.6506, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.3618268530171557, |
|
"grad_norm": 0.7301183938980103, |
|
"learning_rate": 0.0001758870561386691, |
|
"loss": 1.6602, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.36265483208584487, |
|
"grad_norm": 0.26085367798805237, |
|
"learning_rate": 0.00017583185752951947, |
|
"loss": 1.6361, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.363482811154534, |
|
"grad_norm": 0.2507553994655609, |
|
"learning_rate": 0.00017577665892036987, |
|
"loss": 1.6207, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.36431079022322316, |
|
"grad_norm": 0.2663898766040802, |
|
"learning_rate": 0.00017572146031122029, |
|
"loss": 1.6443, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.3651387692919123, |
|
"grad_norm": 0.34235909581184387, |
|
"learning_rate": 0.00017566626170207065, |
|
"loss": 1.668, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.36596674836060145, |
|
"grad_norm": 0.26196587085723877, |
|
"learning_rate": 0.00017561106309292105, |
|
"loss": 1.6544, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.3667947274292906, |
|
"grad_norm": 0.23575757443904877, |
|
"learning_rate": 0.00017555586448377147, |
|
"loss": 1.6252, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.36762270649797973, |
|
"grad_norm": 0.2926133871078491, |
|
"learning_rate": 0.00017550066587462184, |
|
"loss": 1.6282, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.3684506855666689, |
|
"grad_norm": 0.21387304365634918, |
|
"learning_rate": 0.00017544546726547223, |
|
"loss": 1.6542, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.369278664635358, |
|
"grad_norm": 0.24483337998390198, |
|
"learning_rate": 0.00017539026865632265, |
|
"loss": 1.626, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.37010664370404717, |
|
"grad_norm": 0.3214597702026367, |
|
"learning_rate": 0.00017533507004717302, |
|
"loss": 1.5933, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.3709346227727363, |
|
"grad_norm": 0.23716263473033905, |
|
"learning_rate": 0.0001752798714380234, |
|
"loss": 1.6189, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.37176260184142546, |
|
"grad_norm": 0.22332042455673218, |
|
"learning_rate": 0.00017522467282887383, |
|
"loss": 1.6488, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.3725905809101146, |
|
"grad_norm": 0.2706310749053955, |
|
"learning_rate": 0.0001751694742197242, |
|
"loss": 1.6269, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.37341855997880374, |
|
"grad_norm": 0.2572775185108185, |
|
"learning_rate": 0.0001751142756105746, |
|
"loss": 1.6432, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.3742465390474929, |
|
"grad_norm": 0.23288114368915558, |
|
"learning_rate": 0.00017505907700142502, |
|
"loss": 1.6531, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.37507451811618203, |
|
"grad_norm": 0.2673160433769226, |
|
"learning_rate": 0.00017500387839227538, |
|
"loss": 1.6047, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.3759024971848712, |
|
"grad_norm": 0.29170718789100647, |
|
"learning_rate": 0.00017494867978312578, |
|
"loss": 1.6119, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.3767304762535603, |
|
"grad_norm": 0.2626097500324249, |
|
"learning_rate": 0.0001748934811739762, |
|
"loss": 1.6329, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.37755845532224946, |
|
"grad_norm": 0.22249017655849457, |
|
"learning_rate": 0.00017483828256482657, |
|
"loss": 1.6284, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.3783864343909386, |
|
"grad_norm": 0.36833006143569946, |
|
"learning_rate": 0.00017478308395567696, |
|
"loss": 1.6079, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.37921441345962775, |
|
"grad_norm": 0.23582060635089874, |
|
"learning_rate": 0.00017472788534652738, |
|
"loss": 1.6243, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.3800423925283169, |
|
"grad_norm": 0.20202980935573578, |
|
"learning_rate": 0.00017467268673737775, |
|
"loss": 1.6312, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.38087037159700604, |
|
"grad_norm": 0.3692006468772888, |
|
"learning_rate": 0.00017461748812822814, |
|
"loss": 1.6048, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.3816983506656952, |
|
"grad_norm": 0.27726104855537415, |
|
"learning_rate": 0.00017456228951907857, |
|
"loss": 1.6396, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.38252632973438433, |
|
"grad_norm": 0.2585281431674957, |
|
"learning_rate": 0.00017450709090992893, |
|
"loss": 1.6185, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.3833543088030735, |
|
"grad_norm": 0.24427704513072968, |
|
"learning_rate": 0.00017445189230077935, |
|
"loss": 1.5996, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.3841822878717626, |
|
"grad_norm": 0.2784598469734192, |
|
"learning_rate": 0.00017439669369162975, |
|
"loss": 1.6012, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.38501026694045176, |
|
"grad_norm": 0.25094759464263916, |
|
"learning_rate": 0.00017434149508248012, |
|
"loss": 1.6043, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.3858382460091409, |
|
"grad_norm": 0.2598397731781006, |
|
"learning_rate": 0.00017428629647333054, |
|
"loss": 1.6278, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.38666622507783005, |
|
"grad_norm": 0.22646105289459229, |
|
"learning_rate": 0.00017423109786418093, |
|
"loss": 1.6297, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.3874942041465192, |
|
"grad_norm": 0.29078444838523865, |
|
"learning_rate": 0.0001741758992550313, |
|
"loss": 1.6509, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.38832218321520834, |
|
"grad_norm": 0.2686766982078552, |
|
"learning_rate": 0.00017412070064588172, |
|
"loss": 1.6145, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.3891501622838975, |
|
"grad_norm": 0.2642682194709778, |
|
"learning_rate": 0.0001740655020367321, |
|
"loss": 1.6421, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.3899781413525866, |
|
"grad_norm": 0.2528562545776367, |
|
"learning_rate": 0.000174010855413674, |
|
"loss": 1.6607, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.39080612042127577, |
|
"grad_norm": 0.2299865484237671, |
|
"learning_rate": 0.00017395620879061589, |
|
"loss": 1.6112, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.3916340994899649, |
|
"grad_norm": 0.24297955632209778, |
|
"learning_rate": 0.00017390101018146628, |
|
"loss": 1.6081, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.39246207855865406, |
|
"grad_norm": 0.243024542927742, |
|
"learning_rate": 0.00017384581157231667, |
|
"loss": 1.6703, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.3932900576273432, |
|
"grad_norm": 0.24055872857570648, |
|
"learning_rate": 0.00017379061296316707, |
|
"loss": 1.5895, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.39411803669603235, |
|
"grad_norm": 0.2664213478565216, |
|
"learning_rate": 0.00017373541435401746, |
|
"loss": 1.6173, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.3949460157647215, |
|
"grad_norm": 0.23474732041358948, |
|
"learning_rate": 0.00017368021574486786, |
|
"loss": 1.6347, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.39577399483341064, |
|
"grad_norm": 0.24745678901672363, |
|
"learning_rate": 0.00017362501713571825, |
|
"loss": 1.6112, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.3966019739020998, |
|
"grad_norm": 0.2576342523097992, |
|
"learning_rate": 0.00017356981852656865, |
|
"loss": 1.6181, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.3974299529707889, |
|
"grad_norm": 0.27308520674705505, |
|
"learning_rate": 0.00017351461991741904, |
|
"loss": 1.6037, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.39825793203947807, |
|
"grad_norm": 0.2533150315284729, |
|
"learning_rate": 0.00017345942130826943, |
|
"loss": 1.6561, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.3990859111081672, |
|
"grad_norm": 0.24679023027420044, |
|
"learning_rate": 0.00017340422269911983, |
|
"loss": 1.6129, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.3999138901768563, |
|
"grad_norm": 0.28216132521629333, |
|
"learning_rate": 0.00017334902408997022, |
|
"loss": 1.6374, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.40074186924554545, |
|
"grad_norm": 0.26568201184272766, |
|
"learning_rate": 0.00017329382548082062, |
|
"loss": 1.6395, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.4015698483142346, |
|
"grad_norm": 0.2306041121482849, |
|
"learning_rate": 0.000173238626871671, |
|
"loss": 1.6055, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.40239782738292373, |
|
"grad_norm": 0.38825201988220215, |
|
"learning_rate": 0.0001731834282625214, |
|
"loss": 1.6479, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.4032258064516129, |
|
"grad_norm": 0.2115429788827896, |
|
"learning_rate": 0.0001731282296533718, |
|
"loss": 1.616, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.404053785520302, |
|
"grad_norm": 0.2399633675813675, |
|
"learning_rate": 0.0001730730310442222, |
|
"loss": 1.644, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.40488176458899117, |
|
"grad_norm": 0.2738541066646576, |
|
"learning_rate": 0.0001730178324350726, |
|
"loss": 1.6346, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.4057097436576803, |
|
"grad_norm": 0.24475158751010895, |
|
"learning_rate": 0.00017296263382592298, |
|
"loss": 1.6143, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.40653772272636945, |
|
"grad_norm": 0.2846825122833252, |
|
"learning_rate": 0.00017290743521677338, |
|
"loss": 1.627, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.4073657017950586, |
|
"grad_norm": 0.24657665193080902, |
|
"learning_rate": 0.00017285223660762377, |
|
"loss": 1.6223, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.40819368086374774, |
|
"grad_norm": 0.24977359175682068, |
|
"learning_rate": 0.00017279703799847416, |
|
"loss": 1.6454, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.4090216599324369, |
|
"grad_norm": 0.24544388055801392, |
|
"learning_rate": 0.00017274183938932456, |
|
"loss": 1.6557, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.40984963900112603, |
|
"grad_norm": 0.23884369432926178, |
|
"learning_rate": 0.00017268664078017495, |
|
"loss": 1.6128, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.4106776180698152, |
|
"grad_norm": 0.36807382106781006, |
|
"learning_rate": 0.00017263144217102535, |
|
"loss": 1.6237, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.4115055971385043, |
|
"grad_norm": 0.291007936000824, |
|
"learning_rate": 0.00017257624356187574, |
|
"loss": 1.6439, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.41233357620719346, |
|
"grad_norm": 0.22296257317066193, |
|
"learning_rate": 0.00017252104495272614, |
|
"loss": 1.636, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.4131615552758826, |
|
"grad_norm": 0.281239777803421, |
|
"learning_rate": 0.00017246584634357653, |
|
"loss": 1.6008, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.41398953434457175, |
|
"grad_norm": 0.30486106872558594, |
|
"learning_rate": 0.00017241064773442692, |
|
"loss": 1.5891, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.4148175134132609, |
|
"grad_norm": 0.24123990535736084, |
|
"learning_rate": 0.00017235544912527732, |
|
"loss": 1.6299, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.41564549248195004, |
|
"grad_norm": 0.3727368116378784, |
|
"learning_rate": 0.0001723002505161277, |
|
"loss": 1.6166, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.4164734715506392, |
|
"grad_norm": 0.24565471708774567, |
|
"learning_rate": 0.0001722450519069781, |
|
"loss": 1.6129, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.41730145061932833, |
|
"grad_norm": 0.2552422285079956, |
|
"learning_rate": 0.00017219040528392002, |
|
"loss": 1.6117, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.4181294296880175, |
|
"grad_norm": 0.21545369923114777, |
|
"learning_rate": 0.0001721352066747704, |
|
"loss": 1.6433, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.4189574087567066, |
|
"grad_norm": 0.23316791653633118, |
|
"learning_rate": 0.00017208000806562078, |
|
"loss": 1.6806, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.41978538782539576, |
|
"grad_norm": 0.2567419409751892, |
|
"learning_rate": 0.0001720248094564712, |
|
"loss": 1.6253, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.4206133668940849, |
|
"grad_norm": 0.31419897079467773, |
|
"learning_rate": 0.00017196961084732157, |
|
"loss": 1.611, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.42144134596277405, |
|
"grad_norm": 0.26649272441864014, |
|
"learning_rate": 0.00017191441223817196, |
|
"loss": 1.6269, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.4222693250314632, |
|
"grad_norm": 0.24049483239650726, |
|
"learning_rate": 0.00017185921362902239, |
|
"loss": 1.6257, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.42309730410015234, |
|
"grad_norm": 0.22467993199825287, |
|
"learning_rate": 0.00017180401501987275, |
|
"loss": 1.5821, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.4239252831688415, |
|
"grad_norm": 0.27348917722702026, |
|
"learning_rate": 0.00017174881641072315, |
|
"loss": 1.6243, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.4247532622375306, |
|
"grad_norm": 0.23247596621513367, |
|
"learning_rate": 0.00017169361780157357, |
|
"loss": 1.6333, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.42558124130621977, |
|
"grad_norm": 0.25708910822868347, |
|
"learning_rate": 0.00017163841919242394, |
|
"loss": 1.6168, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.4264092203749089, |
|
"grad_norm": 0.2495090365409851, |
|
"learning_rate": 0.00017158322058327433, |
|
"loss": 1.6105, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.42723719944359806, |
|
"grad_norm": 0.2512340545654297, |
|
"learning_rate": 0.00017152802197412475, |
|
"loss": 1.631, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.4280651785122872, |
|
"grad_norm": 0.27026236057281494, |
|
"learning_rate": 0.00017147282336497512, |
|
"loss": 1.5874, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.42889315758097635, |
|
"grad_norm": 0.23251605033874512, |
|
"learning_rate": 0.0001714176247558255, |
|
"loss": 1.6383, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.4297211366496655, |
|
"grad_norm": 0.3068407475948334, |
|
"learning_rate": 0.00017136242614667593, |
|
"loss": 1.607, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.43054911571835464, |
|
"grad_norm": 0.22653043270111084, |
|
"learning_rate": 0.0001713072275375263, |
|
"loss": 1.5913, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.4313770947870438, |
|
"grad_norm": 0.22893564403057098, |
|
"learning_rate": 0.0001712520289283767, |
|
"loss": 1.6218, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.4322050738557329, |
|
"grad_norm": 0.23899468779563904, |
|
"learning_rate": 0.00017119683031922712, |
|
"loss": 1.6178, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.43303305292442207, |
|
"grad_norm": 0.26594260334968567, |
|
"learning_rate": 0.00017114163171007748, |
|
"loss": 1.6118, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.4338610319931112, |
|
"grad_norm": 0.29181089997291565, |
|
"learning_rate": 0.00017108643310092788, |
|
"loss": 1.6391, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.43468901106180036, |
|
"grad_norm": 0.35307735204696655, |
|
"learning_rate": 0.0001710312344917783, |
|
"loss": 1.601, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.4355169901304895, |
|
"grad_norm": 0.25828322768211365, |
|
"learning_rate": 0.00017097603588262867, |
|
"loss": 1.6421, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.43634496919917864, |
|
"grad_norm": 0.30803248286247253, |
|
"learning_rate": 0.00017092083727347906, |
|
"loss": 1.655, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.4371729482678678, |
|
"grad_norm": 0.23458120226860046, |
|
"learning_rate": 0.00017086563866432948, |
|
"loss": 1.6099, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.43800092733655693, |
|
"grad_norm": 0.23281985521316528, |
|
"learning_rate": 0.00017081044005517985, |
|
"loss": 1.6388, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.4388289064052461, |
|
"grad_norm": 0.23414187133312225, |
|
"learning_rate": 0.00017075524144603024, |
|
"loss": 1.5956, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.4396568854739352, |
|
"grad_norm": 0.25793951749801636, |
|
"learning_rate": 0.00017070004283688066, |
|
"loss": 1.6734, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.44048486454262437, |
|
"grad_norm": 0.23932301998138428, |
|
"learning_rate": 0.00017064484422773103, |
|
"loss": 1.6106, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.4413128436113135, |
|
"grad_norm": 0.24835747480392456, |
|
"learning_rate": 0.00017058964561858143, |
|
"loss": 1.6077, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.44214082268000265, |
|
"grad_norm": 0.26732298731803894, |
|
"learning_rate": 0.00017053444700943185, |
|
"loss": 1.5729, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.4429688017486918, |
|
"grad_norm": 0.2826359272003174, |
|
"learning_rate": 0.00017047924840028221, |
|
"loss": 1.6623, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.44379678081738094, |
|
"grad_norm": 0.24312160909175873, |
|
"learning_rate": 0.00017042404979113264, |
|
"loss": 1.6075, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.4446247598860701, |
|
"grad_norm": 0.293720006942749, |
|
"learning_rate": 0.00017036885118198303, |
|
"loss": 1.6293, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.44545273895475923, |
|
"grad_norm": 0.2534725069999695, |
|
"learning_rate": 0.0001703136525728334, |
|
"loss": 1.5869, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.4462807180234484, |
|
"grad_norm": 0.25833797454833984, |
|
"learning_rate": 0.00017025845396368382, |
|
"loss": 1.6359, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.4471086970921375, |
|
"grad_norm": 0.2595597207546234, |
|
"learning_rate": 0.0001702032553545342, |
|
"loss": 1.598, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.44793667616082666, |
|
"grad_norm": 0.2243652492761612, |
|
"learning_rate": 0.00017014805674538458, |
|
"loss": 1.6555, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.4487646552295158, |
|
"grad_norm": 0.2626785337924957, |
|
"learning_rate": 0.000170092858136235, |
|
"loss": 1.6327, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.44959263429820495, |
|
"grad_norm": 0.28202512860298157, |
|
"learning_rate": 0.0001700376595270854, |
|
"loss": 1.5877, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.4504206133668941, |
|
"grad_norm": 0.23521679639816284, |
|
"learning_rate": 0.00016998246091793576, |
|
"loss": 1.6357, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.45124859243558324, |
|
"grad_norm": 0.24466179311275482, |
|
"learning_rate": 0.00016992726230878618, |
|
"loss": 1.6089, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.4520765715042724, |
|
"grad_norm": 0.3313222825527191, |
|
"learning_rate": 0.00016987261568572804, |
|
"loss": 1.634, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.45290455057296153, |
|
"grad_norm": 0.21911990642547607, |
|
"learning_rate": 0.00016981741707657846, |
|
"loss": 1.6241, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.4537325296416507, |
|
"grad_norm": 0.24226447939872742, |
|
"learning_rate": 0.00016976221846742886, |
|
"loss": 1.5985, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.4545605087103398, |
|
"grad_norm": 0.25372591614723206, |
|
"learning_rate": 0.00016970701985827925, |
|
"loss": 1.5914, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.45538848777902896, |
|
"grad_norm": 0.27368178963661194, |
|
"learning_rate": 0.00016965182124912965, |
|
"loss": 1.652, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.4562164668477181, |
|
"grad_norm": 0.305602103471756, |
|
"learning_rate": 0.00016959662263998004, |
|
"loss": 1.651, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.45704444591640725, |
|
"grad_norm": 0.2739979326725006, |
|
"learning_rate": 0.00016954142403083044, |
|
"loss": 1.6583, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.4578724249850964, |
|
"grad_norm": 0.2888374924659729, |
|
"learning_rate": 0.00016948622542168083, |
|
"loss": 1.6283, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.45870040405378554, |
|
"grad_norm": 0.26104286313056946, |
|
"learning_rate": 0.00016943102681253122, |
|
"loss": 1.6222, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.4595283831224747, |
|
"grad_norm": 0.2540399134159088, |
|
"learning_rate": 0.00016937582820338162, |
|
"loss": 1.62, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.4603563621911638, |
|
"grad_norm": 0.2812783718109131, |
|
"learning_rate": 0.000169320629594232, |
|
"loss": 1.6215, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.46118434125985297, |
|
"grad_norm": 0.28794047236442566, |
|
"learning_rate": 0.0001692654309850824, |
|
"loss": 1.6361, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.4620123203285421, |
|
"grad_norm": 0.24678725004196167, |
|
"learning_rate": 0.0001692102323759328, |
|
"loss": 1.6053, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.46284029939723126, |
|
"grad_norm": 0.2598378360271454, |
|
"learning_rate": 0.0001691550337667832, |
|
"loss": 1.606, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.4636682784659204, |
|
"grad_norm": 0.3878665566444397, |
|
"learning_rate": 0.0001690998351576336, |
|
"loss": 1.6494, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.46449625753460955, |
|
"grad_norm": 0.24927765130996704, |
|
"learning_rate": 0.00016904463654848398, |
|
"loss": 1.6219, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.4653242366032987, |
|
"grad_norm": 0.2416529506444931, |
|
"learning_rate": 0.00016898943793933438, |
|
"loss": 1.6164, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.46615221567198784, |
|
"grad_norm": 0.3543456792831421, |
|
"learning_rate": 0.00016893423933018477, |
|
"loss": 1.604, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.466980194740677, |
|
"grad_norm": 0.3229213058948517, |
|
"learning_rate": 0.00016887904072103517, |
|
"loss": 1.6126, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.4678081738093661, |
|
"grad_norm": 0.24769534170627594, |
|
"learning_rate": 0.00016882384211188556, |
|
"loss": 1.6298, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.46863615287805527, |
|
"grad_norm": 0.25397947430610657, |
|
"learning_rate": 0.00016876864350273595, |
|
"loss": 1.6389, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.4694641319467444, |
|
"grad_norm": 0.2763853371143341, |
|
"learning_rate": 0.00016871399687967784, |
|
"loss": 1.602, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.47029211101543356, |
|
"grad_norm": 0.24106226861476898, |
|
"learning_rate": 0.00016865879827052824, |
|
"loss": 1.6317, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.4711200900841227, |
|
"grad_norm": 0.31650885939598083, |
|
"learning_rate": 0.00016860359966137863, |
|
"loss": 1.6287, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.47194806915281184, |
|
"grad_norm": 0.22699052095413208, |
|
"learning_rate": 0.00016854840105222902, |
|
"loss": 1.6059, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.47277604822150093, |
|
"grad_norm": 0.23599205911159515, |
|
"learning_rate": 0.00016849320244307942, |
|
"loss": 1.6359, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.4736040272901901, |
|
"grad_norm": 0.2733438014984131, |
|
"learning_rate": 0.0001684380038339298, |
|
"loss": 1.6221, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.4744320063588792, |
|
"grad_norm": 0.22067943215370178, |
|
"learning_rate": 0.0001683828052247802, |
|
"loss": 1.603, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.47525998542756837, |
|
"grad_norm": 0.25246864557266235, |
|
"learning_rate": 0.0001683276066156306, |
|
"loss": 1.5875, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.4760879644962575, |
|
"grad_norm": 0.2747368812561035, |
|
"learning_rate": 0.000168272408006481, |
|
"loss": 1.6522, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.47691594356494665, |
|
"grad_norm": 0.352892130613327, |
|
"learning_rate": 0.0001682172093973314, |
|
"loss": 1.609, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.4777439226336358, |
|
"grad_norm": 0.26428744196891785, |
|
"learning_rate": 0.00016816201078818178, |
|
"loss": 1.6191, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.47857190170232494, |
|
"grad_norm": 0.2937561571598053, |
|
"learning_rate": 0.0001681068121790322, |
|
"loss": 1.6204, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.4793998807710141, |
|
"grad_norm": 0.23604629933834076, |
|
"learning_rate": 0.00016805161356988257, |
|
"loss": 1.6416, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.48022785983970323, |
|
"grad_norm": 0.2349170446395874, |
|
"learning_rate": 0.00016799641496073297, |
|
"loss": 1.6116, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.4810558389083924, |
|
"grad_norm": 0.28929609060287476, |
|
"learning_rate": 0.0001679412163515834, |
|
"loss": 1.5956, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.4818838179770815, |
|
"grad_norm": 0.2797716557979584, |
|
"learning_rate": 0.00016788601774243375, |
|
"loss": 1.598, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.48271179704577066, |
|
"grad_norm": 0.30529841780662537, |
|
"learning_rate": 0.00016783081913328415, |
|
"loss": 1.6324, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.4835397761144598, |
|
"grad_norm": 0.35713014006614685, |
|
"learning_rate": 0.00016777562052413457, |
|
"loss": 1.624, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.48436775518314895, |
|
"grad_norm": 0.29055917263031006, |
|
"learning_rate": 0.00016772042191498494, |
|
"loss": 1.618, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.4851957342518381, |
|
"grad_norm": 0.24039526283740997, |
|
"learning_rate": 0.00016766522330583533, |
|
"loss": 1.6056, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.48602371332052724, |
|
"grad_norm": 0.26216012239456177, |
|
"learning_rate": 0.00016761002469668575, |
|
"loss": 1.6113, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.4868516923892164, |
|
"grad_norm": 0.2159920334815979, |
|
"learning_rate": 0.00016755482608753612, |
|
"loss": 1.6335, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.48767967145790553, |
|
"grad_norm": 0.26959407329559326, |
|
"learning_rate": 0.00016749962747838651, |
|
"loss": 1.6376, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.4885076505265947, |
|
"grad_norm": 0.2333938479423523, |
|
"learning_rate": 0.00016744442886923694, |
|
"loss": 1.6516, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.4893356295952838, |
|
"grad_norm": 0.3124794065952301, |
|
"learning_rate": 0.0001673892302600873, |
|
"loss": 1.6144, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.49016360866397296, |
|
"grad_norm": 0.26165392994880676, |
|
"learning_rate": 0.0001673340316509377, |
|
"loss": 1.6102, |
|
"step": 59200 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 362328, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1600, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0468457732150804e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|