|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6623832549513148, |
|
"eval_steps": 500, |
|
"global_step": 80000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0008279790686891435, |
|
"grad_norm": 0.1965542435646057, |
|
"learning_rate": 0.00019994481795043316, |
|
"loss": 1.926, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.001655958137378287, |
|
"grad_norm": 0.17061847448349, |
|
"learning_rate": 0.00019988961934128355, |
|
"loss": 1.7922, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.0024839372060674308, |
|
"grad_norm": 0.19415344297885895, |
|
"learning_rate": 0.00019983442073213394, |
|
"loss": 1.7264, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.003311916274756574, |
|
"grad_norm": 0.33725494146347046, |
|
"learning_rate": 0.00019977922212298434, |
|
"loss": 1.7463, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.004139895343445717, |
|
"grad_norm": 0.24688084423542023, |
|
"learning_rate": 0.00019972402351383473, |
|
"loss": 1.7424, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0049678744121348616, |
|
"grad_norm": 0.235696479678154, |
|
"learning_rate": 0.00019966882490468513, |
|
"loss": 1.7127, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.005795853480824005, |
|
"grad_norm": 0.20345468819141388, |
|
"learning_rate": 0.00019961362629553552, |
|
"loss": 1.6991, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.006623832549513148, |
|
"grad_norm": 0.2328668087720871, |
|
"learning_rate": 0.00019955842768638592, |
|
"loss": 1.7335, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.0074518116182022915, |
|
"grad_norm": 0.21569831669330597, |
|
"learning_rate": 0.00019950322907723634, |
|
"loss": 1.7145, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.008279790686891435, |
|
"grad_norm": 0.2174810767173767, |
|
"learning_rate": 0.0001994480304680867, |
|
"loss": 1.6832, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.009107769755580579, |
|
"grad_norm": 0.24059951305389404, |
|
"learning_rate": 0.00019939283185893713, |
|
"loss": 1.7052, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.009935748824269723, |
|
"grad_norm": 0.2508637011051178, |
|
"learning_rate": 0.00019933763324978752, |
|
"loss": 1.7207, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.010763727892958866, |
|
"grad_norm": 0.20193152129650116, |
|
"learning_rate": 0.0001992824346406379, |
|
"loss": 1.6772, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.01159170696164801, |
|
"grad_norm": 0.23993732035160065, |
|
"learning_rate": 0.0001992272360314883, |
|
"loss": 1.6975, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.012419686030337154, |
|
"grad_norm": 0.2582526206970215, |
|
"learning_rate": 0.0001991720374223387, |
|
"loss": 1.7142, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.013247665099026296, |
|
"grad_norm": 0.20497630536556244, |
|
"learning_rate": 0.00019911683881318907, |
|
"loss": 1.7078, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.01407564416771544, |
|
"grad_norm": 0.22760722041130066, |
|
"learning_rate": 0.0001990616402040395, |
|
"loss": 1.7277, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.014903623236404583, |
|
"grad_norm": 0.22834879159927368, |
|
"learning_rate": 0.00019900644159488989, |
|
"loss": 1.6996, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.01573160230509373, |
|
"grad_norm": 0.1762934923171997, |
|
"learning_rate": 0.00019895124298574025, |
|
"loss": 1.6856, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.01655958137378287, |
|
"grad_norm": 0.2605816125869751, |
|
"learning_rate": 0.00019889604437659067, |
|
"loss": 1.707, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.017387560442472014, |
|
"grad_norm": 0.190592423081398, |
|
"learning_rate": 0.00019884084576744107, |
|
"loss": 1.6793, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.018215539511161158, |
|
"grad_norm": 0.22705131769180298, |
|
"learning_rate": 0.00019878564715829144, |
|
"loss": 1.6856, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.019043518579850302, |
|
"grad_norm": 0.26750609278678894, |
|
"learning_rate": 0.00019873044854914186, |
|
"loss": 1.6791, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.019871497648539446, |
|
"grad_norm": 0.20754960179328918, |
|
"learning_rate": 0.00019867524993999225, |
|
"loss": 1.7049, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.020699476717228587, |
|
"grad_norm": 0.2278178185224533, |
|
"learning_rate": 0.00019862005133084262, |
|
"loss": 1.6614, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02152745578591773, |
|
"grad_norm": 0.2631435990333557, |
|
"learning_rate": 0.00019856485272169304, |
|
"loss": 1.6555, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.022355434854606875, |
|
"grad_norm": 0.22711312770843506, |
|
"learning_rate": 0.00019850965411254343, |
|
"loss": 1.6652, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.02318341392329602, |
|
"grad_norm": 0.204128697514534, |
|
"learning_rate": 0.0001984544555033938, |
|
"loss": 1.7111, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.024011392991985164, |
|
"grad_norm": 0.2155551165342331, |
|
"learning_rate": 0.00019839925689424422, |
|
"loss": 1.6391, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.024839372060674308, |
|
"grad_norm": 0.222556933760643, |
|
"learning_rate": 0.00019834405828509462, |
|
"loss": 1.6754, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02566735112936345, |
|
"grad_norm": 0.21225596964359283, |
|
"learning_rate": 0.00019828885967594498, |
|
"loss": 1.6551, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.026495330198052593, |
|
"grad_norm": 0.2524341642856598, |
|
"learning_rate": 0.0001982336610667954, |
|
"loss": 1.6876, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.027323309266741737, |
|
"grad_norm": 0.19305479526519775, |
|
"learning_rate": 0.0001981784624576458, |
|
"loss": 1.6574, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.02815128833543088, |
|
"grad_norm": 0.20817072689533234, |
|
"learning_rate": 0.00019812326384849617, |
|
"loss": 1.6962, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.028979267404120025, |
|
"grad_norm": 0.2652389705181122, |
|
"learning_rate": 0.0001980680652393466, |
|
"loss": 1.6845, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.029807246472809166, |
|
"grad_norm": 0.19599197804927826, |
|
"learning_rate": 0.00019801286663019698, |
|
"loss": 1.6631, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.03063522554149831, |
|
"grad_norm": 0.23823082447052002, |
|
"learning_rate": 0.00019795766802104735, |
|
"loss": 1.6919, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.03146320461018746, |
|
"grad_norm": 0.2747247517108917, |
|
"learning_rate": 0.00019790246941189777, |
|
"loss": 1.6821, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.032291183678876595, |
|
"grad_norm": 0.2241993397474289, |
|
"learning_rate": 0.00019784727080274816, |
|
"loss": 1.6776, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.03311916274756574, |
|
"grad_norm": 0.246478870511055, |
|
"learning_rate": 0.00019779207219359853, |
|
"loss": 1.6837, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03394714181625488, |
|
"grad_norm": 0.18319016695022583, |
|
"learning_rate": 0.00019773687358444895, |
|
"loss": 1.6739, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.03477512088494403, |
|
"grad_norm": 0.19710256159305573, |
|
"learning_rate": 0.00019768167497529935, |
|
"loss": 1.6885, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.03560309995363317, |
|
"grad_norm": 0.21982429921627045, |
|
"learning_rate": 0.00019762647636614971, |
|
"loss": 1.6945, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.036431079022322316, |
|
"grad_norm": 0.20753520727157593, |
|
"learning_rate": 0.00019757127775700014, |
|
"loss": 1.6601, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.03725905809101146, |
|
"grad_norm": 0.2044544219970703, |
|
"learning_rate": 0.00019751607914785053, |
|
"loss": 1.6857, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.038087037159700604, |
|
"grad_norm": 0.22137849032878876, |
|
"learning_rate": 0.0001974608805387009, |
|
"loss": 1.6997, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.03891501622838975, |
|
"grad_norm": 0.1963178962469101, |
|
"learning_rate": 0.00019740568192955132, |
|
"loss": 1.6851, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.03974299529707889, |
|
"grad_norm": 0.20963279902935028, |
|
"learning_rate": 0.0001973504833204017, |
|
"loss": 1.6656, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.04057097436576804, |
|
"grad_norm": 0.25534650683403015, |
|
"learning_rate": 0.0001972952847112521, |
|
"loss": 1.6999, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.041398953434457174, |
|
"grad_norm": 0.22999747097492218, |
|
"learning_rate": 0.0001972400861021025, |
|
"loss": 1.634, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.04222693250314632, |
|
"grad_norm": 0.21249458193778992, |
|
"learning_rate": 0.0001971848874929529, |
|
"loss": 1.6824, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.04305491157183546, |
|
"grad_norm": 0.20595486462116241, |
|
"learning_rate": 0.0001971296888838033, |
|
"loss": 1.6587, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.043882890640524606, |
|
"grad_norm": 0.21987958252429962, |
|
"learning_rate": 0.00019707449027465368, |
|
"loss": 1.6727, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.04471086970921375, |
|
"grad_norm": 0.1945623904466629, |
|
"learning_rate": 0.00019701929166550408, |
|
"loss": 1.665, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.045538848777902895, |
|
"grad_norm": 0.20507337152957916, |
|
"learning_rate": 0.00019696409305635447, |
|
"loss": 1.6388, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04636682784659204, |
|
"grad_norm": 0.21563030779361725, |
|
"learning_rate": 0.00019690889444720487, |
|
"loss": 1.6643, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.04719480691528118, |
|
"grad_norm": 0.15810565650463104, |
|
"learning_rate": 0.00019685369583805526, |
|
"loss": 1.6367, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.04802278598397033, |
|
"grad_norm": 0.3873724937438965, |
|
"learning_rate": 0.00019679849722890565, |
|
"loss": 1.6568, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.04885076505265947, |
|
"grad_norm": 0.1988883763551712, |
|
"learning_rate": 0.00019674329861975605, |
|
"loss": 1.7088, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.049678744121348616, |
|
"grad_norm": 0.20232902467250824, |
|
"learning_rate": 0.00019668810001060644, |
|
"loss": 1.6803, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05050672319003775, |
|
"grad_norm": 0.2232154905796051, |
|
"learning_rate": 0.00019663290140145684, |
|
"loss": 1.6575, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.0513347022587269, |
|
"grad_norm": 0.23148497939109802, |
|
"learning_rate": 0.00019657770279230723, |
|
"loss": 1.6604, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.05216268132741604, |
|
"grad_norm": 0.21519899368286133, |
|
"learning_rate": 0.00019652250418315763, |
|
"loss": 1.6789, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.052990660396105185, |
|
"grad_norm": 0.1947392374277115, |
|
"learning_rate": 0.00019646730557400802, |
|
"loss": 1.7142, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.05381863946479433, |
|
"grad_norm": 0.22812333703041077, |
|
"learning_rate": 0.00019641210696485841, |
|
"loss": 1.6561, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.054646618533483474, |
|
"grad_norm": 0.22744423151016235, |
|
"learning_rate": 0.0001963569083557088, |
|
"loss": 1.6466, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.05547459760217262, |
|
"grad_norm": 0.18678049743175507, |
|
"learning_rate": 0.0001963017097465592, |
|
"loss": 1.6779, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.05630257667086176, |
|
"grad_norm": 0.23794223368167877, |
|
"learning_rate": 0.0001962465111374096, |
|
"loss": 1.6715, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.057130555739550906, |
|
"grad_norm": 0.2025059461593628, |
|
"learning_rate": 0.00019619131252826, |
|
"loss": 1.6296, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.05795853480824005, |
|
"grad_norm": 0.18825982511043549, |
|
"learning_rate": 0.00019613611391911039, |
|
"loss": 1.6415, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.058786513876929194, |
|
"grad_norm": 0.28439417481422424, |
|
"learning_rate": 0.00019608091530996078, |
|
"loss": 1.6574, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.05961449294561833, |
|
"grad_norm": 0.25905317068099976, |
|
"learning_rate": 0.00019602571670081117, |
|
"loss": 1.6296, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.060442472014307476, |
|
"grad_norm": 0.19414661824703217, |
|
"learning_rate": 0.00019597051809166157, |
|
"loss": 1.6556, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.06127045108299662, |
|
"grad_norm": 0.19103066623210907, |
|
"learning_rate": 0.00019591531948251196, |
|
"loss": 1.6536, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.062098430151685764, |
|
"grad_norm": 0.2532936632633209, |
|
"learning_rate": 0.00019586012087336236, |
|
"loss": 1.6773, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.06292640922037492, |
|
"grad_norm": 0.2522614300251007, |
|
"learning_rate": 0.00019580492226421275, |
|
"loss": 1.6192, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.06375438828906405, |
|
"grad_norm": 0.2658845782279968, |
|
"learning_rate": 0.00019574972365506315, |
|
"loss": 1.6819, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.06458236735775319, |
|
"grad_norm": 0.18649327754974365, |
|
"learning_rate": 0.00019569452504591354, |
|
"loss": 1.656, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.06541034642644233, |
|
"grad_norm": 0.20903462171554565, |
|
"learning_rate": 0.00019563932643676393, |
|
"loss": 1.6931, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.06623832549513148, |
|
"grad_norm": 0.23540228605270386, |
|
"learning_rate": 0.00019558412782761433, |
|
"loss": 1.6575, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.06706630456382062, |
|
"grad_norm": 0.21330572664737701, |
|
"learning_rate": 0.00019552892921846472, |
|
"loss": 1.6479, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.06789428363250977, |
|
"grad_norm": 0.19642361998558044, |
|
"learning_rate": 0.00019547373060931512, |
|
"loss": 1.649, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.06872226270119891, |
|
"grad_norm": 0.24467919766902924, |
|
"learning_rate": 0.0001954185320001655, |
|
"loss": 1.6701, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.06955024176988805, |
|
"grad_norm": 0.21666431427001953, |
|
"learning_rate": 0.0001953633333910159, |
|
"loss": 1.6745, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.0703782208385772, |
|
"grad_norm": 0.20939518511295319, |
|
"learning_rate": 0.0001953081347818663, |
|
"loss": 1.642, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.07120619990726634, |
|
"grad_norm": 0.1840800642967224, |
|
"learning_rate": 0.0001952529361727167, |
|
"loss": 1.6324, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.07203417897595549, |
|
"grad_norm": 0.22671596705913544, |
|
"learning_rate": 0.0001951977375635671, |
|
"loss": 1.6594, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.07286215804464463, |
|
"grad_norm": 0.2556142210960388, |
|
"learning_rate": 0.00019514253895441748, |
|
"loss": 1.6491, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.07369013711333378, |
|
"grad_norm": 0.2819201648235321, |
|
"learning_rate": 0.00019508734034526788, |
|
"loss": 1.665, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.07451811618202292, |
|
"grad_norm": 0.22931043803691864, |
|
"learning_rate": 0.00019503214173611827, |
|
"loss": 1.6766, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07534609525071206, |
|
"grad_norm": 0.2069370597600937, |
|
"learning_rate": 0.00019497749511306016, |
|
"loss": 1.6753, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.07617407431940121, |
|
"grad_norm": 0.23254962265491486, |
|
"learning_rate": 0.00019492340047609353, |
|
"loss": 1.6642, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.07700205338809035, |
|
"grad_norm": 0.19698099792003632, |
|
"learning_rate": 0.00019486820186694396, |
|
"loss": 1.6838, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.0778300324567795, |
|
"grad_norm": 0.21683326363563538, |
|
"learning_rate": 0.00019481300325779435, |
|
"loss": 1.6741, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.07865801152546864, |
|
"grad_norm": 0.2396763414144516, |
|
"learning_rate": 0.00019475780464864472, |
|
"loss": 1.6475, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.07948599059415778, |
|
"grad_norm": 0.224142923951149, |
|
"learning_rate": 0.00019470260603949514, |
|
"loss": 1.6221, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.08031396966284693, |
|
"grad_norm": 0.24930523335933685, |
|
"learning_rate": 0.00019464740743034553, |
|
"loss": 1.6401, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.08114194873153607, |
|
"grad_norm": 0.26143988966941833, |
|
"learning_rate": 0.0001945922088211959, |
|
"loss": 1.6655, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.0819699278002252, |
|
"grad_norm": 0.19812200963497162, |
|
"learning_rate": 0.00019453701021204632, |
|
"loss": 1.6469, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.08279790686891435, |
|
"grad_norm": 0.23136496543884277, |
|
"learning_rate": 0.00019448181160289672, |
|
"loss": 1.6643, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.08362588593760349, |
|
"grad_norm": 0.24188588559627533, |
|
"learning_rate": 0.00019442661299374708, |
|
"loss": 1.6182, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.08445386500629264, |
|
"grad_norm": 0.2655825912952423, |
|
"learning_rate": 0.0001943714143845975, |
|
"loss": 1.6339, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.08528184407498178, |
|
"grad_norm": 0.2417413890361786, |
|
"learning_rate": 0.0001943162157754479, |
|
"loss": 1.6076, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.08610982314367092, |
|
"grad_norm": 0.2977447807788849, |
|
"learning_rate": 0.00019426101716629827, |
|
"loss": 1.6999, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.08693780221236007, |
|
"grad_norm": 0.2565799057483673, |
|
"learning_rate": 0.0001942058185571487, |
|
"loss": 1.6461, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.08776578128104921, |
|
"grad_norm": 0.2196560502052307, |
|
"learning_rate": 0.00019415061994799908, |
|
"loss": 1.6441, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.08859376034973836, |
|
"grad_norm": 0.35161322355270386, |
|
"learning_rate": 0.00019409542133884945, |
|
"loss": 1.6521, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.0894217394184275, |
|
"grad_norm": 0.21615535020828247, |
|
"learning_rate": 0.00019404022272969987, |
|
"loss": 1.6467, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.09024971848711665, |
|
"grad_norm": 0.22406460344791412, |
|
"learning_rate": 0.00019398502412055026, |
|
"loss": 1.6562, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.09107769755580579, |
|
"grad_norm": 0.21635471284389496, |
|
"learning_rate": 0.00019392982551140063, |
|
"loss": 1.6678, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.09190567662449493, |
|
"grad_norm": 0.19095148146152496, |
|
"learning_rate": 0.00019387462690225105, |
|
"loss": 1.6288, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.09273365569318408, |
|
"grad_norm": 0.2104116678237915, |
|
"learning_rate": 0.00019381942829310145, |
|
"loss": 1.6518, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.09356163476187322, |
|
"grad_norm": 0.217295840382576, |
|
"learning_rate": 0.0001937642296839518, |
|
"loss": 1.6517, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.09438961383056237, |
|
"grad_norm": 0.2044863998889923, |
|
"learning_rate": 0.00019370903107480223, |
|
"loss": 1.6424, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.09521759289925151, |
|
"grad_norm": 0.257137656211853, |
|
"learning_rate": 0.00019365383246565263, |
|
"loss": 1.6686, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.09604557196794065, |
|
"grad_norm": 0.19957780838012695, |
|
"learning_rate": 0.000193598633856503, |
|
"loss": 1.6052, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.0968735510366298, |
|
"grad_norm": 0.2578992545604706, |
|
"learning_rate": 0.00019354343524735342, |
|
"loss": 1.637, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.09770153010531894, |
|
"grad_norm": 0.25783538818359375, |
|
"learning_rate": 0.0001934882366382038, |
|
"loss": 1.6513, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.09852950917400809, |
|
"grad_norm": 0.23085756599903107, |
|
"learning_rate": 0.00019343303802905418, |
|
"loss": 1.6775, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.09935748824269723, |
|
"grad_norm": 0.20355592668056488, |
|
"learning_rate": 0.0001933778394199046, |
|
"loss": 1.6475, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.10018546731138636, |
|
"grad_norm": 0.1904350370168686, |
|
"learning_rate": 0.000193322640810755, |
|
"loss": 1.6432, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.1010134463800755, |
|
"grad_norm": 0.2179509401321411, |
|
"learning_rate": 0.0001932674422016054, |
|
"loss": 1.6708, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.10184142544876465, |
|
"grad_norm": 0.23058493435382843, |
|
"learning_rate": 0.00019321224359245578, |
|
"loss": 1.6893, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.1026694045174538, |
|
"grad_norm": 0.21472018957138062, |
|
"learning_rate": 0.00019315704498330618, |
|
"loss": 1.6534, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.10349738358614294, |
|
"grad_norm": 0.22223271429538727, |
|
"learning_rate": 0.00019310184637415657, |
|
"loss": 1.6569, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.10432536265483208, |
|
"grad_norm": 0.218324676156044, |
|
"learning_rate": 0.00019304664776500697, |
|
"loss": 1.6592, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.10515334172352123, |
|
"grad_norm": 0.18915393948554993, |
|
"learning_rate": 0.00019299144915585736, |
|
"loss": 1.6757, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.10598132079221037, |
|
"grad_norm": 0.19763177633285522, |
|
"learning_rate": 0.00019293625054670775, |
|
"loss": 1.6327, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.10680929986089951, |
|
"grad_norm": 0.20737317204475403, |
|
"learning_rate": 0.00019288105193755815, |
|
"loss": 1.6319, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.10763727892958866, |
|
"grad_norm": 0.20598194003105164, |
|
"learning_rate": 0.00019282585332840854, |
|
"loss": 1.6431, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1084652579982778, |
|
"grad_norm": 0.24075473845005035, |
|
"learning_rate": 0.00019277065471925894, |
|
"loss": 1.6557, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.10929323706696695, |
|
"grad_norm": 0.19969555735588074, |
|
"learning_rate": 0.00019271545611010933, |
|
"loss": 1.6702, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.11012121613565609, |
|
"grad_norm": 0.2330610454082489, |
|
"learning_rate": 0.00019266025750095973, |
|
"loss": 1.6787, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.11094919520434524, |
|
"grad_norm": 0.20984840393066406, |
|
"learning_rate": 0.00019260505889181012, |
|
"loss": 1.6416, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.11177717427303438, |
|
"grad_norm": 0.2110757976770401, |
|
"learning_rate": 0.00019254986028266051, |
|
"loss": 1.686, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.11260515334172352, |
|
"grad_norm": 0.2638145685195923, |
|
"learning_rate": 0.0001924946616735109, |
|
"loss": 1.6507, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.11343313241041267, |
|
"grad_norm": 0.20532745122909546, |
|
"learning_rate": 0.0001924394630643613, |
|
"loss": 1.6409, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.11426111147910181, |
|
"grad_norm": 0.21913674473762512, |
|
"learning_rate": 0.0001923842644552117, |
|
"loss": 1.6341, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.11508909054779096, |
|
"grad_norm": 0.29349780082702637, |
|
"learning_rate": 0.0001923290658460621, |
|
"loss": 1.6626, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.1159170696164801, |
|
"grad_norm": 0.20620688796043396, |
|
"learning_rate": 0.00019227386723691248, |
|
"loss": 1.6993, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.11674504868516924, |
|
"grad_norm": 0.2426968812942505, |
|
"learning_rate": 0.00019221866862776288, |
|
"loss": 1.6519, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.11757302775385839, |
|
"grad_norm": 0.24563075602054596, |
|
"learning_rate": 0.00019216347001861327, |
|
"loss": 1.6951, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.11840100682254752, |
|
"grad_norm": 0.2335137128829956, |
|
"learning_rate": 0.00019210827140946367, |
|
"loss": 1.6252, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.11922898589123666, |
|
"grad_norm": 0.2369394600391388, |
|
"learning_rate": 0.00019205307280031406, |
|
"loss": 1.6747, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.12005696495992581, |
|
"grad_norm": 0.22131401300430298, |
|
"learning_rate": 0.00019199787419116446, |
|
"loss": 1.6462, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.12088494402861495, |
|
"grad_norm": 0.3465203046798706, |
|
"learning_rate": 0.00019194267558201485, |
|
"loss": 1.6379, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.1217129230973041, |
|
"grad_norm": 0.26189711689949036, |
|
"learning_rate": 0.00019188747697286524, |
|
"loss": 1.7039, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.12254090216599324, |
|
"grad_norm": 0.23303379118442535, |
|
"learning_rate": 0.00019183227836371564, |
|
"loss": 1.623, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.12336888123468238, |
|
"grad_norm": 0.21591255068778992, |
|
"learning_rate": 0.00019177707975456603, |
|
"loss": 1.6421, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.12419686030337153, |
|
"grad_norm": 0.21272686123847961, |
|
"learning_rate": 0.00019172188114541643, |
|
"loss": 1.6627, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.1250248393720607, |
|
"grad_norm": 0.21305076777935028, |
|
"learning_rate": 0.00019166668253626682, |
|
"loss": 1.6609, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.12585281844074983, |
|
"grad_norm": 0.19881877303123474, |
|
"learning_rate": 0.00019161148392711722, |
|
"loss": 1.6446, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.12668079750943897, |
|
"grad_norm": 0.23909015953540802, |
|
"learning_rate": 0.0001915568373040591, |
|
"loss": 1.6429, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.1275087765781281, |
|
"grad_norm": 0.2043364942073822, |
|
"learning_rate": 0.0001915016386949095, |
|
"loss": 1.6618, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.12833675564681724, |
|
"grad_norm": 0.2276351898908615, |
|
"learning_rate": 0.0001914464400857599, |
|
"loss": 1.6341, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.12916473471550638, |
|
"grad_norm": 0.23034417629241943, |
|
"learning_rate": 0.00019139124147661028, |
|
"loss": 1.6323, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.12999271378419552, |
|
"grad_norm": 0.24165049195289612, |
|
"learning_rate": 0.00019133604286746068, |
|
"loss": 1.6327, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.13082069285288467, |
|
"grad_norm": 0.2675997018814087, |
|
"learning_rate": 0.00019128084425831107, |
|
"loss": 1.6315, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.1316486719215738, |
|
"grad_norm": 0.228688582777977, |
|
"learning_rate": 0.00019122564564916147, |
|
"loss": 1.6417, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.13247665099026296, |
|
"grad_norm": 0.23833218216896057, |
|
"learning_rate": 0.00019117044704001186, |
|
"loss": 1.6341, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1333046300589521, |
|
"grad_norm": 0.26954251527786255, |
|
"learning_rate": 0.00019111580041695375, |
|
"loss": 1.6444, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.13413260912764124, |
|
"grad_norm": 0.2615845203399658, |
|
"learning_rate": 0.00019106060180780417, |
|
"loss": 1.6373, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.1349605881963304, |
|
"grad_norm": 0.24307669699192047, |
|
"learning_rate": 0.00019100540319865454, |
|
"loss": 1.6249, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.13578856726501953, |
|
"grad_norm": 0.2018076479434967, |
|
"learning_rate": 0.00019095020458950496, |
|
"loss": 1.6502, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.13661654633370868, |
|
"grad_norm": 0.21472395956516266, |
|
"learning_rate": 0.00019089500598035535, |
|
"loss": 1.6392, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.13744452540239782, |
|
"grad_norm": 0.24748454988002777, |
|
"learning_rate": 0.00019083980737120572, |
|
"loss": 1.687, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.13827250447108697, |
|
"grad_norm": 0.24712525308132172, |
|
"learning_rate": 0.00019078460876205614, |
|
"loss": 1.6488, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.1391004835397761, |
|
"grad_norm": 0.2053895890712738, |
|
"learning_rate": 0.00019072941015290653, |
|
"loss": 1.6407, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.13992846260846525, |
|
"grad_norm": 0.2149922251701355, |
|
"learning_rate": 0.0001906742115437569, |
|
"loss": 1.6265, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.1407564416771544, |
|
"grad_norm": 0.26773732900619507, |
|
"learning_rate": 0.00019061901293460732, |
|
"loss": 1.6593, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.14158442074584354, |
|
"grad_norm": 0.20975753664970398, |
|
"learning_rate": 0.00019056381432545772, |
|
"loss": 1.6345, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.1424123998145327, |
|
"grad_norm": 0.2390817403793335, |
|
"learning_rate": 0.00019050861571630808, |
|
"loss": 1.6241, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.14324037888322183, |
|
"grad_norm": 0.2248607873916626, |
|
"learning_rate": 0.0001904534171071585, |
|
"loss": 1.6352, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.14406835795191097, |
|
"grad_norm": 0.25773122906684875, |
|
"learning_rate": 0.0001903982184980089, |
|
"loss": 1.6365, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.14489633702060012, |
|
"grad_norm": 0.2824070155620575, |
|
"learning_rate": 0.00019034301988885927, |
|
"loss": 1.6565, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.14572431608928926, |
|
"grad_norm": 0.21992579102516174, |
|
"learning_rate": 0.0001902878212797097, |
|
"loss": 1.6707, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.1465522951579784, |
|
"grad_norm": 0.2076718509197235, |
|
"learning_rate": 0.00019023262267056008, |
|
"loss": 1.6515, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.14738027422666755, |
|
"grad_norm": 0.2828584313392639, |
|
"learning_rate": 0.00019017742406141045, |
|
"loss": 1.6021, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.1482082532953567, |
|
"grad_norm": 0.20166254043579102, |
|
"learning_rate": 0.00019012222545226087, |
|
"loss": 1.6144, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.14903623236404584, |
|
"grad_norm": 0.2705940008163452, |
|
"learning_rate": 0.00019006702684311127, |
|
"loss": 1.6853, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.14986421143273498, |
|
"grad_norm": 0.2661746144294739, |
|
"learning_rate": 0.00019001182823396163, |
|
"loss": 1.6554, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.15069219050142413, |
|
"grad_norm": 0.19586409628391266, |
|
"learning_rate": 0.00018995662962481205, |
|
"loss": 1.6739, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.15152016957011327, |
|
"grad_norm": 0.22720645368099213, |
|
"learning_rate": 0.00018990143101566245, |
|
"loss": 1.6599, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.15234814863880242, |
|
"grad_norm": 0.20618566870689392, |
|
"learning_rate": 0.00018984623240651282, |
|
"loss": 1.6389, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.15317612770749156, |
|
"grad_norm": 0.20746736228466034, |
|
"learning_rate": 0.00018979103379736324, |
|
"loss": 1.6367, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.1540041067761807, |
|
"grad_norm": 0.2366802841424942, |
|
"learning_rate": 0.00018973583518821363, |
|
"loss": 1.6096, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.15483208584486985, |
|
"grad_norm": 0.22609083354473114, |
|
"learning_rate": 0.000189680636579064, |
|
"loss": 1.6324, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.155660064913559, |
|
"grad_norm": 0.27429673075675964, |
|
"learning_rate": 0.00018962543796991442, |
|
"loss": 1.617, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.15648804398224814, |
|
"grad_norm": 0.24921450018882751, |
|
"learning_rate": 0.0001895702393607648, |
|
"loss": 1.6605, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.15731602305093728, |
|
"grad_norm": 0.20069612562656403, |
|
"learning_rate": 0.00018951504075161518, |
|
"loss": 1.6253, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.15814400211962643, |
|
"grad_norm": 0.26492151618003845, |
|
"learning_rate": 0.0001894598421424656, |
|
"loss": 1.6561, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.15897198118831557, |
|
"grad_norm": 0.24309319257736206, |
|
"learning_rate": 0.000189404643533316, |
|
"loss": 1.6432, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.15979996025700471, |
|
"grad_norm": 0.22135823965072632, |
|
"learning_rate": 0.00018934944492416636, |
|
"loss": 1.6476, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.16062793932569386, |
|
"grad_norm": 0.2506055533885956, |
|
"learning_rate": 0.00018929424631501678, |
|
"loss": 1.6391, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.161455918394383, |
|
"grad_norm": 0.24688918888568878, |
|
"learning_rate": 0.00018923904770586718, |
|
"loss": 1.662, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.16228389746307215, |
|
"grad_norm": 0.21143893897533417, |
|
"learning_rate": 0.00018918384909671755, |
|
"loss": 1.6302, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.1631118765317613, |
|
"grad_norm": 0.20364534854888916, |
|
"learning_rate": 0.00018912865048756797, |
|
"loss": 1.6402, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.1639398556004504, |
|
"grad_norm": 0.24464745819568634, |
|
"learning_rate": 0.00018907345187841836, |
|
"loss": 1.6452, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.16476783466913955, |
|
"grad_norm": 0.20947644114494324, |
|
"learning_rate": 0.00018901825326926873, |
|
"loss": 1.624, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.1655958137378287, |
|
"grad_norm": 0.20495034754276276, |
|
"learning_rate": 0.00018896305466011915, |
|
"loss": 1.6585, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.16642379280651784, |
|
"grad_norm": 0.26043763756752014, |
|
"learning_rate": 0.00018890785605096954, |
|
"loss": 1.6746, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.16725177187520698, |
|
"grad_norm": 0.20851582288742065, |
|
"learning_rate": 0.00018885320942791143, |
|
"loss": 1.625, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.16807975094389613, |
|
"grad_norm": 0.2077847421169281, |
|
"learning_rate": 0.00018879801081876182, |
|
"loss": 1.6315, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.16890773001258527, |
|
"grad_norm": 0.21309372782707214, |
|
"learning_rate": 0.0001887433641957037, |
|
"loss": 1.6452, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.16973570908127442, |
|
"grad_norm": 0.205887570977211, |
|
"learning_rate": 0.0001886881655865541, |
|
"loss": 1.637, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.17056368814996356, |
|
"grad_norm": 0.18793857097625732, |
|
"learning_rate": 0.0001886329669774045, |
|
"loss": 1.6381, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.1713916672186527, |
|
"grad_norm": 0.20410719513893127, |
|
"learning_rate": 0.0001885777683682549, |
|
"loss": 1.6282, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.17221964628734185, |
|
"grad_norm": 0.2512223422527313, |
|
"learning_rate": 0.0001885225697591053, |
|
"loss": 1.6311, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.173047625356031, |
|
"grad_norm": 0.2699196934700012, |
|
"learning_rate": 0.00018846737114995568, |
|
"loss": 1.6852, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.17387560442472014, |
|
"grad_norm": 0.24233217537403107, |
|
"learning_rate": 0.00018841217254080608, |
|
"loss": 1.6642, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.17470358349340928, |
|
"grad_norm": 0.23415082693099976, |
|
"learning_rate": 0.00018835697393165647, |
|
"loss": 1.6825, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.17553156256209843, |
|
"grad_norm": 0.20289281010627747, |
|
"learning_rate": 0.00018830177532250686, |
|
"loss": 1.6164, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.17635954163078757, |
|
"grad_norm": 0.2364041954278946, |
|
"learning_rate": 0.00018824657671335726, |
|
"loss": 1.6243, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.1771875206994767, |
|
"grad_norm": 0.21854723989963531, |
|
"learning_rate": 0.00018819137810420765, |
|
"loss": 1.6549, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.17801549976816586, |
|
"grad_norm": 0.2151113748550415, |
|
"learning_rate": 0.00018813617949505805, |
|
"loss": 1.6278, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.178843478836855, |
|
"grad_norm": 0.2160249501466751, |
|
"learning_rate": 0.00018808098088590844, |
|
"loss": 1.6475, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.17967145790554415, |
|
"grad_norm": 0.22971031069755554, |
|
"learning_rate": 0.00018802578227675884, |
|
"loss": 1.6414, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.1804994369742333, |
|
"grad_norm": 0.2136770784854889, |
|
"learning_rate": 0.00018797058366760923, |
|
"loss": 1.6364, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.18132741604292243, |
|
"grad_norm": 0.22959814965724945, |
|
"learning_rate": 0.00018791538505845962, |
|
"loss": 1.639, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.18215539511161158, |
|
"grad_norm": 0.21293580532073975, |
|
"learning_rate": 0.00018786018644931002, |
|
"loss": 1.6419, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.18298337418030072, |
|
"grad_norm": 0.241853266954422, |
|
"learning_rate": 0.0001878049878401604, |
|
"loss": 1.6887, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.18381135324898987, |
|
"grad_norm": 0.23669332265853882, |
|
"learning_rate": 0.0001877497892310108, |
|
"loss": 1.6685, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.184639332317679, |
|
"grad_norm": 0.20697368681430817, |
|
"learning_rate": 0.0001876945906218612, |
|
"loss": 1.6351, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.18546731138636816, |
|
"grad_norm": 0.2443273812532425, |
|
"learning_rate": 0.0001876393920127116, |
|
"loss": 1.6545, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.1862952904550573, |
|
"grad_norm": 0.20272250473499298, |
|
"learning_rate": 0.000187584193403562, |
|
"loss": 1.6325, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.18712326952374644, |
|
"grad_norm": 0.23750340938568115, |
|
"learning_rate": 0.00018752899479441238, |
|
"loss": 1.6248, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.1879512485924356, |
|
"grad_norm": 0.20021654665470123, |
|
"learning_rate": 0.00018747379618526278, |
|
"loss": 1.6383, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.18877922766112473, |
|
"grad_norm": 0.23722049593925476, |
|
"learning_rate": 0.00018741859757611317, |
|
"loss": 1.6377, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.18960720672981388, |
|
"grad_norm": 0.22547928988933563, |
|
"learning_rate": 0.00018736395095305509, |
|
"loss": 1.6223, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.19043518579850302, |
|
"grad_norm": 0.23537902534008026, |
|
"learning_rate": 0.00018730875234390545, |
|
"loss": 1.6527, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.19126316486719216, |
|
"grad_norm": 0.25864705443382263, |
|
"learning_rate": 0.00018725355373475585, |
|
"loss": 1.6138, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.1920911439358813, |
|
"grad_norm": 0.24374592304229736, |
|
"learning_rate": 0.00018719835512560627, |
|
"loss": 1.6443, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.19291912300457045, |
|
"grad_norm": 0.22655454277992249, |
|
"learning_rate": 0.00018714315651645664, |
|
"loss": 1.6472, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.1937471020732596, |
|
"grad_norm": 0.23370279371738434, |
|
"learning_rate": 0.00018708795790730703, |
|
"loss": 1.6491, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.19457508114194874, |
|
"grad_norm": 0.2537304162979126, |
|
"learning_rate": 0.00018703275929815745, |
|
"loss": 1.6426, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.19540306021063789, |
|
"grad_norm": 0.23084019124507904, |
|
"learning_rate": 0.00018697756068900782, |
|
"loss": 1.6487, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.19623103927932703, |
|
"grad_norm": 0.19103647768497467, |
|
"learning_rate": 0.00018692236207985824, |
|
"loss": 1.6103, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.19705901834801617, |
|
"grad_norm": 0.23761576414108276, |
|
"learning_rate": 0.00018686716347070863, |
|
"loss": 1.6402, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.19788699741670532, |
|
"grad_norm": 0.2252289056777954, |
|
"learning_rate": 0.000186811964861559, |
|
"loss": 1.6376, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.19871497648539446, |
|
"grad_norm": 0.25417011976242065, |
|
"learning_rate": 0.00018675676625240942, |
|
"loss": 1.6236, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.1995429555540836, |
|
"grad_norm": 0.2344467043876648, |
|
"learning_rate": 0.00018670156764325982, |
|
"loss": 1.6432, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.20037093462277272, |
|
"grad_norm": 0.22560527920722961, |
|
"learning_rate": 0.00018664636903411018, |
|
"loss": 1.6404, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.20119891369146187, |
|
"grad_norm": 0.24285413324832916, |
|
"learning_rate": 0.0001865911704249606, |
|
"loss": 1.6199, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.202026892760151, |
|
"grad_norm": 0.26057812571525574, |
|
"learning_rate": 0.000186535971815811, |
|
"loss": 1.6302, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.20285487182884016, |
|
"grad_norm": 0.22331953048706055, |
|
"learning_rate": 0.00018648077320666137, |
|
"loss": 1.623, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.2036828508975293, |
|
"grad_norm": 0.24363332986831665, |
|
"learning_rate": 0.0001864255745975118, |
|
"loss": 1.6365, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.20451082996621844, |
|
"grad_norm": 0.22553332149982452, |
|
"learning_rate": 0.00018637037598836218, |
|
"loss": 1.6135, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.2053388090349076, |
|
"grad_norm": 0.23822011053562164, |
|
"learning_rate": 0.00018631517737921255, |
|
"loss": 1.6404, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.20616678810359673, |
|
"grad_norm": 0.22308780252933502, |
|
"learning_rate": 0.00018625997877006297, |
|
"loss": 1.6401, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.20699476717228588, |
|
"grad_norm": 0.232479065656662, |
|
"learning_rate": 0.00018620478016091336, |
|
"loss": 1.6457, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.20782274624097502, |
|
"grad_norm": 0.23883666098117828, |
|
"learning_rate": 0.00018614958155176373, |
|
"loss": 1.6687, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.20865072530966416, |
|
"grad_norm": 0.215969979763031, |
|
"learning_rate": 0.00018609438294261415, |
|
"loss": 1.6314, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.2094787043783533, |
|
"grad_norm": 0.2068208009004593, |
|
"learning_rate": 0.00018603918433346455, |
|
"loss": 1.6458, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.21030668344704245, |
|
"grad_norm": 0.2438194751739502, |
|
"learning_rate": 0.00018598398572431491, |
|
"loss": 1.6519, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.2111346625157316, |
|
"grad_norm": 0.26404204964637756, |
|
"learning_rate": 0.00018592878711516534, |
|
"loss": 1.6276, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.21196264158442074, |
|
"grad_norm": 0.2229745090007782, |
|
"learning_rate": 0.0001858741404921072, |
|
"loss": 1.6585, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.21279062065310989, |
|
"grad_norm": 0.2071082442998886, |
|
"learning_rate": 0.00018581894188295762, |
|
"loss": 1.6187, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.21361859972179903, |
|
"grad_norm": 0.41804060339927673, |
|
"learning_rate": 0.000185763743273808, |
|
"loss": 1.6219, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.21444657879048817, |
|
"grad_norm": 0.22351470589637756, |
|
"learning_rate": 0.00018570854466465838, |
|
"loss": 1.6616, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.21527455785917732, |
|
"grad_norm": 0.219690203666687, |
|
"learning_rate": 0.0001856533460555088, |
|
"loss": 1.6551, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.21610253692786646, |
|
"grad_norm": 0.25187328457832336, |
|
"learning_rate": 0.0001855981474463592, |
|
"loss": 1.6272, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.2169305159965556, |
|
"grad_norm": 0.22479000687599182, |
|
"learning_rate": 0.00018554294883720956, |
|
"loss": 1.6096, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.21775849506524475, |
|
"grad_norm": 0.2545852065086365, |
|
"learning_rate": 0.00018548775022805998, |
|
"loss": 1.6099, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.2185864741339339, |
|
"grad_norm": 0.24930129945278168, |
|
"learning_rate": 0.00018543255161891038, |
|
"loss": 1.6247, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.21941445320262304, |
|
"grad_norm": 0.3432662785053253, |
|
"learning_rate": 0.00018537735300976077, |
|
"loss": 1.6523, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.22024243227131218, |
|
"grad_norm": 0.19649013876914978, |
|
"learning_rate": 0.00018532215440061116, |
|
"loss": 1.6761, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.22107041134000133, |
|
"grad_norm": 0.22068659961223602, |
|
"learning_rate": 0.00018526695579146156, |
|
"loss": 1.655, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.22189839040869047, |
|
"grad_norm": 0.2267031967639923, |
|
"learning_rate": 0.00018521175718231195, |
|
"loss": 1.5902, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.22272636947737962, |
|
"grad_norm": 0.2132362276315689, |
|
"learning_rate": 0.00018515655857316235, |
|
"loss": 1.6265, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.22355434854606876, |
|
"grad_norm": 0.21281544864177704, |
|
"learning_rate": 0.00018510135996401274, |
|
"loss": 1.6584, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.2243823276147579, |
|
"grad_norm": 0.22479993104934692, |
|
"learning_rate": 0.00018504616135486314, |
|
"loss": 1.6518, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.22521030668344705, |
|
"grad_norm": 0.27082720398902893, |
|
"learning_rate": 0.00018499096274571353, |
|
"loss": 1.6251, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.2260382857521362, |
|
"grad_norm": 0.22913210093975067, |
|
"learning_rate": 0.00018493576413656392, |
|
"loss": 1.6431, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.22686626482082534, |
|
"grad_norm": 0.2798856496810913, |
|
"learning_rate": 0.00018488056552741432, |
|
"loss": 1.6707, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.22769424388951448, |
|
"grad_norm": 0.2440473437309265, |
|
"learning_rate": 0.0001848253669182647, |
|
"loss": 1.6457, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.22852222295820362, |
|
"grad_norm": 0.22927863895893097, |
|
"learning_rate": 0.0001847701683091151, |
|
"loss": 1.6066, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.22935020202689277, |
|
"grad_norm": 0.2303207516670227, |
|
"learning_rate": 0.0001847149696999655, |
|
"loss": 1.6533, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.2301781810955819, |
|
"grad_norm": 0.1961013227701187, |
|
"learning_rate": 0.0001846597710908159, |
|
"loss": 1.6287, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.23100616016427106, |
|
"grad_norm": 0.29377955198287964, |
|
"learning_rate": 0.0001846045724816663, |
|
"loss": 1.6367, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.2318341392329602, |
|
"grad_norm": 0.22385641932487488, |
|
"learning_rate": 0.00018454937387251668, |
|
"loss": 1.6553, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.23266211830164935, |
|
"grad_norm": 0.23252160847187042, |
|
"learning_rate": 0.00018449417526336708, |
|
"loss": 1.6148, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.2334900973703385, |
|
"grad_norm": 0.22680921852588654, |
|
"learning_rate": 0.00018443897665421747, |
|
"loss": 1.6208, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.23431807643902763, |
|
"grad_norm": 0.2204786092042923, |
|
"learning_rate": 0.00018438433003115936, |
|
"loss": 1.6322, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.23514605550771678, |
|
"grad_norm": 0.2748723030090332, |
|
"learning_rate": 0.00018432913142200975, |
|
"loss": 1.6382, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.23597403457640592, |
|
"grad_norm": 0.21922816336154938, |
|
"learning_rate": 0.00018427393281286017, |
|
"loss": 1.6371, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.23680201364509504, |
|
"grad_norm": 0.2469870150089264, |
|
"learning_rate": 0.00018421873420371054, |
|
"loss": 1.5986, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.23762999271378418, |
|
"grad_norm": 0.24729487299919128, |
|
"learning_rate": 0.00018416353559456094, |
|
"loss": 1.6186, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.23845797178247333, |
|
"grad_norm": 0.2277776002883911, |
|
"learning_rate": 0.00018410833698541136, |
|
"loss": 1.617, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.23928595085116247, |
|
"grad_norm": 0.23853392899036407, |
|
"learning_rate": 0.00018405313837626172, |
|
"loss": 1.6358, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.24011392991985162, |
|
"grad_norm": 0.22271224856376648, |
|
"learning_rate": 0.00018399793976711212, |
|
"loss": 1.6349, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.24094190898854076, |
|
"grad_norm": 0.2435714304447174, |
|
"learning_rate": 0.00018394274115796254, |
|
"loss": 1.6673, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.2417698880572299, |
|
"grad_norm": 0.2396841198205948, |
|
"learning_rate": 0.0001838875425488129, |
|
"loss": 1.641, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.24259786712591905, |
|
"grad_norm": 0.26187509298324585, |
|
"learning_rate": 0.0001838323439396633, |
|
"loss": 1.6256, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.2434258461946082, |
|
"grad_norm": 0.21034131944179535, |
|
"learning_rate": 0.00018377714533051372, |
|
"loss": 1.5857, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.24425382526329734, |
|
"grad_norm": 0.23442912101745605, |
|
"learning_rate": 0.0001837219467213641, |
|
"loss": 1.5935, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.24508180433198648, |
|
"grad_norm": 0.21707892417907715, |
|
"learning_rate": 0.00018366674811221448, |
|
"loss": 1.6391, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.24590978340067562, |
|
"grad_norm": 0.2780430316925049, |
|
"learning_rate": 0.0001836115495030649, |
|
"loss": 1.6243, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.24673776246936477, |
|
"grad_norm": 0.2786915898323059, |
|
"learning_rate": 0.00018355635089391527, |
|
"loss": 1.6023, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.2475657415380539, |
|
"grad_norm": 0.23516952991485596, |
|
"learning_rate": 0.00018350115228476567, |
|
"loss": 1.6745, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.24839372060674306, |
|
"grad_norm": 0.22414027154445648, |
|
"learning_rate": 0.0001834459536756161, |
|
"loss": 1.6293, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.2492216996754322, |
|
"grad_norm": 0.2355353832244873, |
|
"learning_rate": 0.00018339075506646645, |
|
"loss": 1.6405, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.2500496787441214, |
|
"grad_norm": 0.284574031829834, |
|
"learning_rate": 0.00018333555645731685, |
|
"loss": 1.6156, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.2508776578128105, |
|
"grad_norm": 0.22318606078624725, |
|
"learning_rate": 0.00018328035784816727, |
|
"loss": 1.6259, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.25170563688149966, |
|
"grad_norm": 0.18916834890842438, |
|
"learning_rate": 0.00018322515923901764, |
|
"loss": 1.6371, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.2525336159501888, |
|
"grad_norm": 0.21935884654521942, |
|
"learning_rate": 0.00018316996062986803, |
|
"loss": 1.608, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.25336159501887795, |
|
"grad_norm": 0.22668689489364624, |
|
"learning_rate": 0.00018311476202071845, |
|
"loss": 1.6319, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.2541895740875671, |
|
"grad_norm": 0.2440369874238968, |
|
"learning_rate": 0.00018305956341156882, |
|
"loss": 1.6317, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.2550175531562562, |
|
"grad_norm": 0.24555020034313202, |
|
"learning_rate": 0.00018300436480241921, |
|
"loss": 1.6545, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.2558455322249453, |
|
"grad_norm": 0.24021874368190765, |
|
"learning_rate": 0.00018294916619326964, |
|
"loss": 1.6426, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.25667351129363447, |
|
"grad_norm": 0.24666373431682587, |
|
"learning_rate": 0.00018289451957021152, |
|
"loss": 1.6185, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.2575014903623236, |
|
"grad_norm": 0.33455362915992737, |
|
"learning_rate": 0.00018283932096106192, |
|
"loss": 1.63, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.25832946943101276, |
|
"grad_norm": 0.22457076609134674, |
|
"learning_rate": 0.00018278412235191228, |
|
"loss": 1.6384, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.2591574484997019, |
|
"grad_norm": 0.23992955684661865, |
|
"learning_rate": 0.0001827289237427627, |
|
"loss": 1.6101, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.25998542756839105, |
|
"grad_norm": 0.23438423871994019, |
|
"learning_rate": 0.0001826737251336131, |
|
"loss": 1.6541, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.2608134066370802, |
|
"grad_norm": 0.2723533511161804, |
|
"learning_rate": 0.00018261852652446347, |
|
"loss": 1.6466, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.26164138570576934, |
|
"grad_norm": 0.284927099943161, |
|
"learning_rate": 0.0001825633279153139, |
|
"loss": 1.6252, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.2624693647744585, |
|
"grad_norm": 0.2600990831851959, |
|
"learning_rate": 0.00018250812930616428, |
|
"loss": 1.6156, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.2632973438431476, |
|
"grad_norm": 0.2504718005657196, |
|
"learning_rate": 0.00018245293069701465, |
|
"loss": 1.6356, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.26412532291183677, |
|
"grad_norm": 0.2585621476173401, |
|
"learning_rate": 0.00018239773208786507, |
|
"loss": 1.6444, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.2649533019805259, |
|
"grad_norm": 0.2442982792854309, |
|
"learning_rate": 0.00018234253347871546, |
|
"loss": 1.6647, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.26578128104921506, |
|
"grad_norm": 0.23902744054794312, |
|
"learning_rate": 0.00018228733486956583, |
|
"loss": 1.6553, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.2666092601179042, |
|
"grad_norm": 0.2639334499835968, |
|
"learning_rate": 0.00018223213626041625, |
|
"loss": 1.6324, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.26743723918659335, |
|
"grad_norm": 0.21691977977752686, |
|
"learning_rate": 0.00018217693765126665, |
|
"loss": 1.6354, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.2682652182552825, |
|
"grad_norm": 0.22446656227111816, |
|
"learning_rate": 0.00018212173904211701, |
|
"loss": 1.6549, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.26909319732397163, |
|
"grad_norm": 0.23378610610961914, |
|
"learning_rate": 0.00018206654043296744, |
|
"loss": 1.6331, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.2699211763926608, |
|
"grad_norm": 0.31450459361076355, |
|
"learning_rate": 0.00018201134182381783, |
|
"loss": 1.6313, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.2707491554613499, |
|
"grad_norm": 0.22139056026935577, |
|
"learning_rate": 0.0001819561432146682, |
|
"loss": 1.6464, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.27157713453003907, |
|
"grad_norm": 0.2225482016801834, |
|
"learning_rate": 0.00018190094460551862, |
|
"loss": 1.648, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.2724051135987282, |
|
"grad_norm": 0.26663103699684143, |
|
"learning_rate": 0.000181845745996369, |
|
"loss": 1.6397, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.27323309266741735, |
|
"grad_norm": 0.2755584716796875, |
|
"learning_rate": 0.0001817910993733109, |
|
"loss": 1.6449, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.2740610717361065, |
|
"grad_norm": 0.25144413113594055, |
|
"learning_rate": 0.0001817359007641613, |
|
"loss": 1.6593, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.27488905080479564, |
|
"grad_norm": 0.23322436213493347, |
|
"learning_rate": 0.00018168070215501166, |
|
"loss": 1.6178, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.2757170298734848, |
|
"grad_norm": 0.23854686319828033, |
|
"learning_rate": 0.00018162550354586208, |
|
"loss": 1.6349, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.27654500894217393, |
|
"grad_norm": 0.27168381214141846, |
|
"learning_rate": 0.00018157030493671248, |
|
"loss": 1.6077, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.2773729880108631, |
|
"grad_norm": 0.2243831604719162, |
|
"learning_rate": 0.00018151510632756284, |
|
"loss": 1.6365, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.2782009670795522, |
|
"grad_norm": 0.2275819331407547, |
|
"learning_rate": 0.00018145990771841326, |
|
"loss": 1.6231, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.27902894614824136, |
|
"grad_norm": 0.2547686994075775, |
|
"learning_rate": 0.00018140470910926366, |
|
"loss": 1.6246, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.2798569252169305, |
|
"grad_norm": 0.299843430519104, |
|
"learning_rate": 0.00018134951050011405, |
|
"loss": 1.6258, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.28068490428561965, |
|
"grad_norm": 0.21927513182163239, |
|
"learning_rate": 0.00018129431189096445, |
|
"loss": 1.6332, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.2815128833543088, |
|
"grad_norm": 0.25713977217674255, |
|
"learning_rate": 0.00018123911328181484, |
|
"loss": 1.6369, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.28234086242299794, |
|
"grad_norm": 0.20918399095535278, |
|
"learning_rate": 0.00018118391467266524, |
|
"loss": 1.6009, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.2831688414916871, |
|
"grad_norm": 0.2372296303510666, |
|
"learning_rate": 0.00018112871606351563, |
|
"loss": 1.6264, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.28399682056037623, |
|
"grad_norm": 0.20098775625228882, |
|
"learning_rate": 0.00018107351745436602, |
|
"loss": 1.6386, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.2848247996290654, |
|
"grad_norm": 0.26937851309776306, |
|
"learning_rate": 0.00018101831884521642, |
|
"loss": 1.6269, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.2856527786977545, |
|
"grad_norm": 0.2214965969324112, |
|
"learning_rate": 0.0001809631202360668, |
|
"loss": 1.6395, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.28648075776644366, |
|
"grad_norm": 0.21897448599338531, |
|
"learning_rate": 0.0001809079216269172, |
|
"loss": 1.5864, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.2873087368351328, |
|
"grad_norm": 0.21076589822769165, |
|
"learning_rate": 0.0001808527230177676, |
|
"loss": 1.598, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.28813671590382195, |
|
"grad_norm": 0.2242691069841385, |
|
"learning_rate": 0.000180797524408618, |
|
"loss": 1.6202, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.2889646949725111, |
|
"grad_norm": 0.21810419857501984, |
|
"learning_rate": 0.0001807423257994684, |
|
"loss": 1.6345, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.28979267404120024, |
|
"grad_norm": 0.274824857711792, |
|
"learning_rate": 0.00018068712719031878, |
|
"loss": 1.626, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.2906206531098894, |
|
"grad_norm": 0.23478242754936218, |
|
"learning_rate": 0.00018063192858116918, |
|
"loss": 1.6346, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.2914486321785785, |
|
"grad_norm": 0.24421899020671844, |
|
"learning_rate": 0.00018057672997201957, |
|
"loss": 1.6243, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.29227661124726767, |
|
"grad_norm": 0.28101858496665955, |
|
"learning_rate": 0.00018052153136286997, |
|
"loss": 1.619, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.2931045903159568, |
|
"grad_norm": 0.24983233213424683, |
|
"learning_rate": 0.00018046633275372036, |
|
"loss": 1.621, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.29393256938464596, |
|
"grad_norm": 0.3039966821670532, |
|
"learning_rate": 0.00018041113414457075, |
|
"loss": 1.5755, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.2947605484533351, |
|
"grad_norm": 0.23376306891441345, |
|
"learning_rate": 0.00018035593553542115, |
|
"loss": 1.6419, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.29558852752202425, |
|
"grad_norm": 0.2499234825372696, |
|
"learning_rate": 0.00018030073692627154, |
|
"loss": 1.634, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.2964165065907134, |
|
"grad_norm": 0.24096137285232544, |
|
"learning_rate": 0.00018024553831712194, |
|
"loss": 1.621, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.29724448565940254, |
|
"grad_norm": 0.19286566972732544, |
|
"learning_rate": 0.00018019033970797233, |
|
"loss": 1.6477, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.2980724647280917, |
|
"grad_norm": 0.22015613317489624, |
|
"learning_rate": 0.00018013514109882273, |
|
"loss": 1.6364, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.2989004437967808, |
|
"grad_norm": 0.2441215068101883, |
|
"learning_rate": 0.00018008049447576464, |
|
"loss": 1.6261, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.29972842286546997, |
|
"grad_norm": 0.24828499555587769, |
|
"learning_rate": 0.000180025295866615, |
|
"loss": 1.6095, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.3005564019341591, |
|
"grad_norm": 0.27119019627571106, |
|
"learning_rate": 0.0001799700972574654, |
|
"loss": 1.617, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.30138438100284826, |
|
"grad_norm": 0.33090922236442566, |
|
"learning_rate": 0.00017991489864831582, |
|
"loss": 1.6395, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.3022123600715374, |
|
"grad_norm": 0.25472140312194824, |
|
"learning_rate": 0.0001798597000391662, |
|
"loss": 1.6586, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.30304033914022654, |
|
"grad_norm": 0.24048671126365662, |
|
"learning_rate": 0.00017980450143001658, |
|
"loss": 1.6378, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.3038683182089157, |
|
"grad_norm": 0.25020936131477356, |
|
"learning_rate": 0.000179749302820867, |
|
"loss": 1.6086, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.30469629727760483, |
|
"grad_norm": 0.2425384819507599, |
|
"learning_rate": 0.00017969410421171737, |
|
"loss": 1.6332, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.305524276346294, |
|
"grad_norm": 0.24413828551769257, |
|
"learning_rate": 0.00017963890560256777, |
|
"loss": 1.637, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.3063522554149831, |
|
"grad_norm": 0.24942953884601593, |
|
"learning_rate": 0.0001795837069934182, |
|
"loss": 1.6016, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.30718023448367227, |
|
"grad_norm": 0.2701806426048279, |
|
"learning_rate": 0.00017952850838426855, |
|
"loss": 1.6413, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.3080082135523614, |
|
"grad_norm": 0.23709261417388916, |
|
"learning_rate": 0.00017947330977511895, |
|
"loss": 1.5924, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.30883619262105055, |
|
"grad_norm": 0.252654105424881, |
|
"learning_rate": 0.00017941811116596937, |
|
"loss": 1.6229, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.3096641716897397, |
|
"grad_norm": 0.27790337800979614, |
|
"learning_rate": 0.00017936291255681974, |
|
"loss": 1.62, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.31049215075842884, |
|
"grad_norm": 0.25286659598350525, |
|
"learning_rate": 0.00017930771394767013, |
|
"loss": 1.6305, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.311320129827118, |
|
"grad_norm": 0.24629120528697968, |
|
"learning_rate": 0.00017925251533852055, |
|
"loss": 1.637, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.31214810889580713, |
|
"grad_norm": 0.26566171646118164, |
|
"learning_rate": 0.00017919731672937092, |
|
"loss": 1.6333, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.3129760879644963, |
|
"grad_norm": 0.22510451078414917, |
|
"learning_rate": 0.00017914211812022131, |
|
"loss": 1.6408, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.3138040670331854, |
|
"grad_norm": 0.27187344431877136, |
|
"learning_rate": 0.00017908691951107174, |
|
"loss": 1.633, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.31463204610187456, |
|
"grad_norm": 0.29819074273109436, |
|
"learning_rate": 0.0001790317209019221, |
|
"loss": 1.6448, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.3154600251705637, |
|
"grad_norm": 0.33122682571411133, |
|
"learning_rate": 0.0001789765222927725, |
|
"loss": 1.583, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.31628800423925285, |
|
"grad_norm": 0.24350272119045258, |
|
"learning_rate": 0.00017892132368362292, |
|
"loss": 1.5941, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.317115983307942, |
|
"grad_norm": 0.2509153485298157, |
|
"learning_rate": 0.00017886612507447328, |
|
"loss": 1.6332, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.31794396237663114, |
|
"grad_norm": 0.25258669257164, |
|
"learning_rate": 0.00017881092646532368, |
|
"loss": 1.6157, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.3187719414453203, |
|
"grad_norm": 0.31328025460243225, |
|
"learning_rate": 0.0001787557278561741, |
|
"loss": 1.619, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.31959992051400943, |
|
"grad_norm": 0.24107754230499268, |
|
"learning_rate": 0.00017870052924702447, |
|
"loss": 1.666, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.32042789958269857, |
|
"grad_norm": 0.19994518160820007, |
|
"learning_rate": 0.00017864533063787486, |
|
"loss": 1.5895, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.3212558786513877, |
|
"grad_norm": 0.25987759232521057, |
|
"learning_rate": 0.00017859013202872528, |
|
"loss": 1.6075, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.32208385772007686, |
|
"grad_norm": 0.23311209678649902, |
|
"learning_rate": 0.00017853493341957565, |
|
"loss": 1.5872, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.322911836788766, |
|
"grad_norm": 0.2703259289264679, |
|
"learning_rate": 0.00017847973481042604, |
|
"loss": 1.6011, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.32373981585745515, |
|
"grad_norm": 0.21432837843894958, |
|
"learning_rate": 0.00017842453620127647, |
|
"loss": 1.6239, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.3245677949261443, |
|
"grad_norm": 0.2730528712272644, |
|
"learning_rate": 0.00017836988957821835, |
|
"loss": 1.6453, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.32539577399483344, |
|
"grad_norm": 0.25443777441978455, |
|
"learning_rate": 0.00017831469096906875, |
|
"loss": 1.5818, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.3262237530635226, |
|
"grad_norm": 0.23115037381649017, |
|
"learning_rate": 0.0001782594923599191, |
|
"loss": 1.6373, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.32705173213221167, |
|
"grad_norm": 0.232584148645401, |
|
"learning_rate": 0.00017820429375076953, |
|
"loss": 1.6154, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.3278797112009008, |
|
"grad_norm": 0.2370355725288391, |
|
"learning_rate": 0.00017814909514161993, |
|
"loss": 1.6214, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.32870769026958996, |
|
"grad_norm": 0.2408798784017563, |
|
"learning_rate": 0.0001780938965324703, |
|
"loss": 1.6338, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.3295356693382791, |
|
"grad_norm": 0.2674672603607178, |
|
"learning_rate": 0.00017803869792332072, |
|
"loss": 1.6144, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.33036364840696825, |
|
"grad_norm": 0.2323506474494934, |
|
"learning_rate": 0.0001779834993141711, |
|
"loss": 1.642, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.3311916274756574, |
|
"grad_norm": 0.22446438670158386, |
|
"learning_rate": 0.00017792830070502148, |
|
"loss": 1.6089, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.33201960654434653, |
|
"grad_norm": 0.22976459562778473, |
|
"learning_rate": 0.0001778731020958719, |
|
"loss": 1.6249, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.3328475856130357, |
|
"grad_norm": 0.2459111511707306, |
|
"learning_rate": 0.0001778179034867223, |
|
"loss": 1.6212, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.3336755646817248, |
|
"grad_norm": 0.2351800501346588, |
|
"learning_rate": 0.00017776270487757266, |
|
"loss": 1.645, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.33450354375041397, |
|
"grad_norm": 0.1990719586610794, |
|
"learning_rate": 0.00017770750626842308, |
|
"loss": 1.6108, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.3353315228191031, |
|
"grad_norm": 0.26577112078666687, |
|
"learning_rate": 0.00017765230765927348, |
|
"loss": 1.6433, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.33615950188779226, |
|
"grad_norm": 0.24102246761322021, |
|
"learning_rate": 0.00017759710905012384, |
|
"loss": 1.6177, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.3369874809564814, |
|
"grad_norm": 0.22635315358638763, |
|
"learning_rate": 0.00017754191044097427, |
|
"loss": 1.6226, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.33781546002517054, |
|
"grad_norm": 0.24064379930496216, |
|
"learning_rate": 0.00017748671183182466, |
|
"loss": 1.6408, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.3386434390938597, |
|
"grad_norm": 0.2781156897544861, |
|
"learning_rate": 0.00017743151322267503, |
|
"loss": 1.6081, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.33947141816254883, |
|
"grad_norm": 0.21876955032348633, |
|
"learning_rate": 0.00017737631461352545, |
|
"loss": 1.6409, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.340299397231238, |
|
"grad_norm": 0.2646994888782501, |
|
"learning_rate": 0.00017732111600437584, |
|
"loss": 1.6566, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.3411273762999271, |
|
"grad_norm": 0.22703330218791962, |
|
"learning_rate": 0.0001772659173952262, |
|
"loss": 1.6322, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.34195535536861627, |
|
"grad_norm": 0.22421209514141083, |
|
"learning_rate": 0.00017721071878607663, |
|
"loss": 1.62, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.3427833344373054, |
|
"grad_norm": 0.6235564947128296, |
|
"learning_rate": 0.00017715552017692703, |
|
"loss": 1.6388, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.34361131350599455, |
|
"grad_norm": 0.2891361117362976, |
|
"learning_rate": 0.0001771003215677774, |
|
"loss": 1.6348, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.3444392925746837, |
|
"grad_norm": 0.2454879879951477, |
|
"learning_rate": 0.00017704512295862781, |
|
"loss": 1.6338, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.34526727164337284, |
|
"grad_norm": 0.39638543128967285, |
|
"learning_rate": 0.0001769899243494782, |
|
"loss": 1.6329, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 0.346095250712062, |
|
"grad_norm": 0.24618402123451233, |
|
"learning_rate": 0.00017693472574032858, |
|
"loss": 1.6304, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 0.34692322978075113, |
|
"grad_norm": 0.22741757333278656, |
|
"learning_rate": 0.000176879527131179, |
|
"loss": 1.6514, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 0.3477512088494403, |
|
"grad_norm": 0.2486487478017807, |
|
"learning_rate": 0.00017682488050812088, |
|
"loss": 1.6365, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.3485791879181294, |
|
"grad_norm": 0.26512429118156433, |
|
"learning_rate": 0.00017676968189897128, |
|
"loss": 1.66, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 0.34940716698681856, |
|
"grad_norm": 0.24919961392879486, |
|
"learning_rate": 0.00017671448328982167, |
|
"loss": 1.6216, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 0.3502351460555077, |
|
"grad_norm": 0.24012117087841034, |
|
"learning_rate": 0.00017665928468067207, |
|
"loss": 1.6522, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 0.35106312512419685, |
|
"grad_norm": 0.231460839509964, |
|
"learning_rate": 0.00017660408607152246, |
|
"loss": 1.6249, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 0.351891104192886, |
|
"grad_norm": 0.28340089321136475, |
|
"learning_rate": 0.00017654888746237285, |
|
"loss": 1.6198, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.35271908326157514, |
|
"grad_norm": 0.2706835865974426, |
|
"learning_rate": 0.00017649368885322325, |
|
"loss": 1.6292, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 0.3535470623302643, |
|
"grad_norm": 0.22307871282100677, |
|
"learning_rate": 0.00017643849024407364, |
|
"loss": 1.6236, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 0.3543750413989534, |
|
"grad_norm": 0.2504919767379761, |
|
"learning_rate": 0.00017638384362101556, |
|
"loss": 1.625, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 0.35520302046764257, |
|
"grad_norm": 0.2657232880592346, |
|
"learning_rate": 0.00017632864501186592, |
|
"loss": 1.6394, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 0.3560309995363317, |
|
"grad_norm": 0.2763049304485321, |
|
"learning_rate": 0.00017627344640271632, |
|
"loss": 1.6491, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.35685897860502086, |
|
"grad_norm": 0.2219325453042984, |
|
"learning_rate": 0.00017621824779356674, |
|
"loss": 1.6078, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 0.35768695767371, |
|
"grad_norm": 0.24751123785972595, |
|
"learning_rate": 0.0001761630491844171, |
|
"loss": 1.6286, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 0.35851493674239915, |
|
"grad_norm": 0.2798210382461548, |
|
"learning_rate": 0.0001761078505752675, |
|
"loss": 1.6157, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 0.3593429158110883, |
|
"grad_norm": 0.2589050829410553, |
|
"learning_rate": 0.00017605265196611792, |
|
"loss": 1.6419, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 0.36017089487977744, |
|
"grad_norm": 0.24586708843708038, |
|
"learning_rate": 0.0001759974533569683, |
|
"loss": 1.6156, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.3609988739484666, |
|
"grad_norm": 0.2211756557226181, |
|
"learning_rate": 0.00017594225474781868, |
|
"loss": 1.6491, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 0.3618268530171557, |
|
"grad_norm": 0.47307145595550537, |
|
"learning_rate": 0.0001758870561386691, |
|
"loss": 1.66, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 0.36265483208584487, |
|
"grad_norm": 0.260375440120697, |
|
"learning_rate": 0.00017583185752951947, |
|
"loss": 1.6352, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 0.363482811154534, |
|
"grad_norm": 0.28375470638275146, |
|
"learning_rate": 0.00017577665892036987, |
|
"loss": 1.6187, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 0.36431079022322316, |
|
"grad_norm": 0.27511486411094666, |
|
"learning_rate": 0.00017572146031122029, |
|
"loss": 1.6423, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.3651387692919123, |
|
"grad_norm": 0.32292941212654114, |
|
"learning_rate": 0.00017566626170207065, |
|
"loss": 1.6655, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 0.36596674836060145, |
|
"grad_norm": 0.2551015019416809, |
|
"learning_rate": 0.00017561106309292105, |
|
"loss": 1.6541, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 0.3667947274292906, |
|
"grad_norm": 0.24115487933158875, |
|
"learning_rate": 0.00017555586448377147, |
|
"loss": 1.6233, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 0.36762270649797973, |
|
"grad_norm": 0.2830246388912201, |
|
"learning_rate": 0.00017550066587462184, |
|
"loss": 1.6267, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 0.3684506855666689, |
|
"grad_norm": 0.20589886605739594, |
|
"learning_rate": 0.00017544546726547223, |
|
"loss": 1.6542, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.369278664635358, |
|
"grad_norm": 0.24481356143951416, |
|
"learning_rate": 0.00017539026865632265, |
|
"loss": 1.625, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 0.37010664370404717, |
|
"grad_norm": 0.31838709115982056, |
|
"learning_rate": 0.00017533507004717302, |
|
"loss": 1.593, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 0.3709346227727363, |
|
"grad_norm": 0.23912674188613892, |
|
"learning_rate": 0.0001752798714380234, |
|
"loss": 1.6169, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 0.37176260184142546, |
|
"grad_norm": 0.22365044057369232, |
|
"learning_rate": 0.00017522467282887383, |
|
"loss": 1.6472, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 0.3725905809101146, |
|
"grad_norm": 0.26311957836151123, |
|
"learning_rate": 0.0001751694742197242, |
|
"loss": 1.6257, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.37341855997880374, |
|
"grad_norm": 0.25842225551605225, |
|
"learning_rate": 0.0001751142756105746, |
|
"loss": 1.6422, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 0.3742465390474929, |
|
"grad_norm": 0.233975350856781, |
|
"learning_rate": 0.00017505907700142502, |
|
"loss": 1.6528, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 0.37507451811618203, |
|
"grad_norm": 0.2946456968784332, |
|
"learning_rate": 0.00017500387839227538, |
|
"loss": 1.6038, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 0.3759024971848712, |
|
"grad_norm": 0.28840962052345276, |
|
"learning_rate": 0.00017494867978312578, |
|
"loss": 1.6088, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 0.3767304762535603, |
|
"grad_norm": 0.2456226348876953, |
|
"learning_rate": 0.0001748934811739762, |
|
"loss": 1.6323, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.37755845532224946, |
|
"grad_norm": 0.22499750554561615, |
|
"learning_rate": 0.00017483828256482657, |
|
"loss": 1.6261, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 0.3783864343909386, |
|
"grad_norm": 0.2697817087173462, |
|
"learning_rate": 0.00017478308395567696, |
|
"loss": 1.6062, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 0.37921441345962775, |
|
"grad_norm": 0.21315142512321472, |
|
"learning_rate": 0.00017472788534652738, |
|
"loss": 1.6245, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 0.3800423925283169, |
|
"grad_norm": 0.20391778647899628, |
|
"learning_rate": 0.00017467268673737775, |
|
"loss": 1.6293, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 0.38087037159700604, |
|
"grad_norm": 0.30484962463378906, |
|
"learning_rate": 0.00017461748812822814, |
|
"loss": 1.6034, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.3816983506656952, |
|
"grad_norm": 0.2746950089931488, |
|
"learning_rate": 0.00017456228951907857, |
|
"loss": 1.6382, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 0.38252632973438433, |
|
"grad_norm": 0.2682898938655853, |
|
"learning_rate": 0.00017450709090992893, |
|
"loss": 1.6189, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 0.3833543088030735, |
|
"grad_norm": 0.23377636075019836, |
|
"learning_rate": 0.00017445189230077935, |
|
"loss": 1.5986, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 0.3841822878717626, |
|
"grad_norm": 0.285535603761673, |
|
"learning_rate": 0.00017439669369162975, |
|
"loss": 1.6005, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 0.38501026694045176, |
|
"grad_norm": 0.25021353363990784, |
|
"learning_rate": 0.00017434149508248012, |
|
"loss": 1.6028, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.3858382460091409, |
|
"grad_norm": 0.23996229469776154, |
|
"learning_rate": 0.00017428629647333054, |
|
"loss": 1.6265, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 0.38666622507783005, |
|
"grad_norm": 0.22236377000808716, |
|
"learning_rate": 0.00017423109786418093, |
|
"loss": 1.6285, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 0.3874942041465192, |
|
"grad_norm": 0.3048860728740692, |
|
"learning_rate": 0.0001741758992550313, |
|
"loss": 1.6503, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 0.38832218321520834, |
|
"grad_norm": 0.2737395167350769, |
|
"learning_rate": 0.00017412070064588172, |
|
"loss": 1.6136, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 0.3891501622838975, |
|
"grad_norm": 0.26264122128486633, |
|
"learning_rate": 0.0001740655020367321, |
|
"loss": 1.6407, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.3899781413525866, |
|
"grad_norm": 0.2572570741176605, |
|
"learning_rate": 0.00017401030342758248, |
|
"loss": 1.659, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 0.39080612042127577, |
|
"grad_norm": 0.23144544661045074, |
|
"learning_rate": 0.0001739556568045244, |
|
"loss": 1.6108, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 0.3916340994899649, |
|
"grad_norm": 0.2429717630147934, |
|
"learning_rate": 0.00017390045819537476, |
|
"loss": 1.6062, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 0.39246207855865406, |
|
"grad_norm": 0.23754027485847473, |
|
"learning_rate": 0.00017384525958622518, |
|
"loss": 1.6677, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 0.3932900576273432, |
|
"grad_norm": 0.23894472420215607, |
|
"learning_rate": 0.00017379006097707558, |
|
"loss": 1.5879, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.39411803669603235, |
|
"grad_norm": 0.2626464366912842, |
|
"learning_rate": 0.00017373486236792594, |
|
"loss": 1.6149, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 0.3949460157647215, |
|
"grad_norm": 0.22693473100662231, |
|
"learning_rate": 0.00017367966375877637, |
|
"loss": 1.6335, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 0.39577399483341064, |
|
"grad_norm": 0.250009149312973, |
|
"learning_rate": 0.00017362446514962676, |
|
"loss": 1.61, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 0.3966019739020998, |
|
"grad_norm": 0.2577686309814453, |
|
"learning_rate": 0.00017356926654047713, |
|
"loss": 1.6171, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 0.3974299529707889, |
|
"grad_norm": 0.2702758312225342, |
|
"learning_rate": 0.00017351406793132755, |
|
"loss": 1.6016, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.39825793203947807, |
|
"grad_norm": 0.24689382314682007, |
|
"learning_rate": 0.00017345886932217794, |
|
"loss": 1.6545, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 0.3990859111081672, |
|
"grad_norm": 0.29933419823646545, |
|
"learning_rate": 0.0001734036707130283, |
|
"loss": 1.612, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 0.3999138901768563, |
|
"grad_norm": 0.2971803545951843, |
|
"learning_rate": 0.00017334847210387873, |
|
"loss": 1.6364, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 0.40074186924554545, |
|
"grad_norm": 0.2572464048862457, |
|
"learning_rate": 0.00017329327349472912, |
|
"loss": 1.6386, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 0.4015698483142346, |
|
"grad_norm": 0.23949342966079712, |
|
"learning_rate": 0.0001732380748855795, |
|
"loss": 1.6038, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.40239782738292373, |
|
"grad_norm": 0.3574795424938202, |
|
"learning_rate": 0.0001731828762764299, |
|
"loss": 1.6472, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 0.4032258064516129, |
|
"grad_norm": 0.21243545413017273, |
|
"learning_rate": 0.0001731276776672803, |
|
"loss": 1.6144, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 0.404053785520302, |
|
"grad_norm": 0.237880140542984, |
|
"learning_rate": 0.00017307247905813067, |
|
"loss": 1.6422, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 0.40488176458899117, |
|
"grad_norm": 0.2700289189815521, |
|
"learning_rate": 0.0001730172804489811, |
|
"loss": 1.6329, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 0.4057097436576803, |
|
"grad_norm": 0.24220387637615204, |
|
"learning_rate": 0.0001729620818398315, |
|
"loss": 1.6129, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.40653772272636945, |
|
"grad_norm": 0.2865298390388489, |
|
"learning_rate": 0.00017290688323068188, |
|
"loss": 1.6253, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 0.4073657017950586, |
|
"grad_norm": 0.24625296890735626, |
|
"learning_rate": 0.00017285168462153228, |
|
"loss": 1.6206, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 0.40819368086374774, |
|
"grad_norm": 0.24253369867801666, |
|
"learning_rate": 0.00017279648601238267, |
|
"loss": 1.6423, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 0.4090216599324369, |
|
"grad_norm": 0.2467760145664215, |
|
"learning_rate": 0.00017274183938932456, |
|
"loss": 1.655, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 0.40984963900112603, |
|
"grad_norm": 0.2473566234111786, |
|
"learning_rate": 0.00017268664078017495, |
|
"loss": 1.6121, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.4106776180698152, |
|
"grad_norm": 0.2393588274717331, |
|
"learning_rate": 0.00017263144217102535, |
|
"loss": 1.6231, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 0.4115055971385043, |
|
"grad_norm": 0.28404486179351807, |
|
"learning_rate": 0.00017257624356187574, |
|
"loss": 1.6423, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 0.41233357620719346, |
|
"grad_norm": 0.22529253363609314, |
|
"learning_rate": 0.00017252104495272614, |
|
"loss": 1.6341, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 0.4131615552758826, |
|
"grad_norm": 0.27375975251197815, |
|
"learning_rate": 0.00017246584634357653, |
|
"loss": 1.5997, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 0.41398953434457175, |
|
"grad_norm": 0.2889430820941925, |
|
"learning_rate": 0.00017241064773442692, |
|
"loss": 1.5884, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.4148175134132609, |
|
"grad_norm": 0.2392152100801468, |
|
"learning_rate": 0.00017235544912527732, |
|
"loss": 1.6298, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 0.41564549248195004, |
|
"grad_norm": 0.22931554913520813, |
|
"learning_rate": 0.0001723002505161277, |
|
"loss": 1.6139, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 0.4164734715506392, |
|
"grad_norm": 0.24450622498989105, |
|
"learning_rate": 0.0001722450519069781, |
|
"loss": 1.6112, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 0.41730145061932833, |
|
"grad_norm": 0.25789907574653625, |
|
"learning_rate": 0.00017219040528392002, |
|
"loss": 1.6098, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 0.4181294296880175, |
|
"grad_norm": 0.21035891771316528, |
|
"learning_rate": 0.0001721352066747704, |
|
"loss": 1.6414, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.4189574087567066, |
|
"grad_norm": 0.21786290407180786, |
|
"learning_rate": 0.00017208000806562078, |
|
"loss": 1.6782, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 0.41978538782539576, |
|
"grad_norm": 0.25614434480667114, |
|
"learning_rate": 0.0001720248094564712, |
|
"loss": 1.6238, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 0.4206133668940849, |
|
"grad_norm": 0.3049182891845703, |
|
"learning_rate": 0.00017196961084732157, |
|
"loss": 1.6096, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 0.42144134596277405, |
|
"grad_norm": 0.2816081643104553, |
|
"learning_rate": 0.00017191441223817196, |
|
"loss": 1.6256, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 0.4222693250314632, |
|
"grad_norm": 0.23279207944869995, |
|
"learning_rate": 0.00017185921362902239, |
|
"loss": 1.6243, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.42309730410015234, |
|
"grad_norm": 0.23084907233715057, |
|
"learning_rate": 0.00017180401501987275, |
|
"loss": 1.5797, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 0.4239252831688415, |
|
"grad_norm": 0.26271852850914, |
|
"learning_rate": 0.00017174881641072315, |
|
"loss": 1.6228, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 0.4247532622375306, |
|
"grad_norm": 0.22446395456790924, |
|
"learning_rate": 0.00017169361780157357, |
|
"loss": 1.632, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 0.42558124130621977, |
|
"grad_norm": 0.2501899302005768, |
|
"learning_rate": 0.00017163841919242394, |
|
"loss": 1.6155, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 0.4264092203749089, |
|
"grad_norm": 0.24416933953762054, |
|
"learning_rate": 0.00017158322058327433, |
|
"loss": 1.6104, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.42723719944359806, |
|
"grad_norm": 0.24799175560474396, |
|
"learning_rate": 0.00017152802197412475, |
|
"loss": 1.629, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 0.4280651785122872, |
|
"grad_norm": 0.2706332504749298, |
|
"learning_rate": 0.00017147282336497512, |
|
"loss": 1.5854, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 0.42889315758097635, |
|
"grad_norm": 0.2325417846441269, |
|
"learning_rate": 0.0001714176247558255, |
|
"loss": 1.6373, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 0.4297211366496655, |
|
"grad_norm": 0.3152548372745514, |
|
"learning_rate": 0.00017136242614667593, |
|
"loss": 1.6051, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 0.43054911571835464, |
|
"grad_norm": 0.22651983797550201, |
|
"learning_rate": 0.0001713072275375263, |
|
"loss": 1.588, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.4313770947870438, |
|
"grad_norm": 0.22875835001468658, |
|
"learning_rate": 0.0001712520289283767, |
|
"loss": 1.6193, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 0.4322050738557329, |
|
"grad_norm": 0.244579017162323, |
|
"learning_rate": 0.00017119683031922712, |
|
"loss": 1.6165, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 0.43303305292442207, |
|
"grad_norm": 0.2727806270122528, |
|
"learning_rate": 0.00017114163171007748, |
|
"loss": 1.6106, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 0.4338610319931112, |
|
"grad_norm": 0.2944915294647217, |
|
"learning_rate": 0.00017108643310092788, |
|
"loss": 1.6379, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 0.43468901106180036, |
|
"grad_norm": 0.32742735743522644, |
|
"learning_rate": 0.0001710312344917783, |
|
"loss": 1.5989, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.4355169901304895, |
|
"grad_norm": 0.2617054879665375, |
|
"learning_rate": 0.00017097603588262867, |
|
"loss": 1.6405, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 0.43634496919917864, |
|
"grad_norm": 0.2704140841960907, |
|
"learning_rate": 0.00017092083727347906, |
|
"loss": 1.654, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 0.4371729482678678, |
|
"grad_norm": 0.22568093240261078, |
|
"learning_rate": 0.00017086563866432948, |
|
"loss": 1.6086, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 0.43800092733655693, |
|
"grad_norm": 0.2364298552274704, |
|
"learning_rate": 0.00017081044005517985, |
|
"loss": 1.637, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 0.4388289064052461, |
|
"grad_norm": 0.24392692744731903, |
|
"learning_rate": 0.00017075524144603024, |
|
"loss": 1.5939, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.4396568854739352, |
|
"grad_norm": 0.2571870684623718, |
|
"learning_rate": 0.00017070004283688066, |
|
"loss": 1.6708, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 0.44048486454262437, |
|
"grad_norm": 0.23539692163467407, |
|
"learning_rate": 0.00017064484422773103, |
|
"loss": 1.6085, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 0.4413128436113135, |
|
"grad_norm": 0.25460028648376465, |
|
"learning_rate": 0.00017058964561858143, |
|
"loss": 1.6051, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 0.44214082268000265, |
|
"grad_norm": 0.3254430294036865, |
|
"learning_rate": 0.00017053444700943185, |
|
"loss": 1.5707, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 0.4429688017486918, |
|
"grad_norm": 0.2832888960838318, |
|
"learning_rate": 0.00017047924840028221, |
|
"loss": 1.6606, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.44379678081738094, |
|
"grad_norm": 0.2448771893978119, |
|
"learning_rate": 0.00017042404979113264, |
|
"loss": 1.6052, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 0.4446247598860701, |
|
"grad_norm": 0.28213441371917725, |
|
"learning_rate": 0.00017036885118198303, |
|
"loss": 1.6286, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 0.44545273895475923, |
|
"grad_norm": 0.24302446842193604, |
|
"learning_rate": 0.0001703136525728334, |
|
"loss": 1.5793, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 0.4462807180234484, |
|
"grad_norm": 0.25916993618011475, |
|
"learning_rate": 0.0001702590059497753, |
|
"loss": 1.6339, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 0.4471086970921375, |
|
"grad_norm": 0.24786342680454254, |
|
"learning_rate": 0.00017020380734062568, |
|
"loss": 1.5977, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.44793667616082666, |
|
"grad_norm": 0.22815145552158356, |
|
"learning_rate": 0.0001701486087314761, |
|
"loss": 1.6541, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 0.4487646552295158, |
|
"grad_norm": 0.26695653796195984, |
|
"learning_rate": 0.0001700934101223265, |
|
"loss": 1.6313, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 0.44959263429820495, |
|
"grad_norm": 0.2767154276371002, |
|
"learning_rate": 0.00017003821151317686, |
|
"loss": 1.5868, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 0.4504206133668941, |
|
"grad_norm": 0.23107494413852692, |
|
"learning_rate": 0.00016998301290402728, |
|
"loss": 1.6353, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 0.45124859243558324, |
|
"grad_norm": 0.24376466870307922, |
|
"learning_rate": 0.00016992781429487768, |
|
"loss": 1.6075, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.4520765715042724, |
|
"grad_norm": 0.3318908214569092, |
|
"learning_rate": 0.00016987261568572804, |
|
"loss": 1.6337, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 0.45290455057296153, |
|
"grad_norm": 0.2159205824136734, |
|
"learning_rate": 0.00016981741707657846, |
|
"loss": 1.6237, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 0.4537325296416507, |
|
"grad_norm": 0.24641750752925873, |
|
"learning_rate": 0.00016976221846742886, |
|
"loss": 1.5984, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 0.4545605087103398, |
|
"grad_norm": 0.25615862011909485, |
|
"learning_rate": 0.00016970701985827925, |
|
"loss": 1.5901, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 0.45538848777902896, |
|
"grad_norm": 0.27317163348197937, |
|
"learning_rate": 0.00016965182124912965, |
|
"loss": 1.651, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.4562164668477181, |
|
"grad_norm": 0.2555563747882843, |
|
"learning_rate": 0.00016959662263998004, |
|
"loss": 1.6497, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 0.45704444591640725, |
|
"grad_norm": 0.2796775698661804, |
|
"learning_rate": 0.00016954142403083044, |
|
"loss": 1.6579, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 0.4578724249850964, |
|
"grad_norm": 0.26606446504592896, |
|
"learning_rate": 0.00016948622542168083, |
|
"loss": 1.6271, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 0.45870040405378554, |
|
"grad_norm": 0.2654423117637634, |
|
"learning_rate": 0.00016943102681253122, |
|
"loss": 1.6223, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 0.4595283831224747, |
|
"grad_norm": 0.24428655207157135, |
|
"learning_rate": 0.00016937582820338162, |
|
"loss": 1.6188, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.4603563621911638, |
|
"grad_norm": 0.28939154744148254, |
|
"learning_rate": 0.000169320629594232, |
|
"loss": 1.619, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 0.46118434125985297, |
|
"grad_norm": 0.28437426686286926, |
|
"learning_rate": 0.0001692654309850824, |
|
"loss": 1.6357, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 0.4620123203285421, |
|
"grad_norm": 0.2547878921031952, |
|
"learning_rate": 0.0001692102323759328, |
|
"loss": 1.6044, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 0.46284029939723126, |
|
"grad_norm": 0.25692984461784363, |
|
"learning_rate": 0.0001691550337667832, |
|
"loss": 1.6059, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 0.4636682784659204, |
|
"grad_norm": 0.3365028500556946, |
|
"learning_rate": 0.0001690998351576336, |
|
"loss": 1.6484, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.46449625753460955, |
|
"grad_norm": 0.2529739737510681, |
|
"learning_rate": 0.00016904463654848398, |
|
"loss": 1.6212, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 0.4653242366032987, |
|
"grad_norm": 0.23951299488544464, |
|
"learning_rate": 0.00016898943793933438, |
|
"loss": 1.6151, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 0.46615221567198784, |
|
"grad_norm": 0.3531137704849243, |
|
"learning_rate": 0.00016893423933018477, |
|
"loss": 1.6013, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 0.466980194740677, |
|
"grad_norm": 0.30527693033218384, |
|
"learning_rate": 0.00016887904072103517, |
|
"loss": 1.6112, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 0.4678081738093661, |
|
"grad_norm": 0.25314369797706604, |
|
"learning_rate": 0.00016882384211188556, |
|
"loss": 1.6281, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.46863615287805527, |
|
"grad_norm": 0.25864383578300476, |
|
"learning_rate": 0.00016876864350273595, |
|
"loss": 1.6376, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 0.4694641319467444, |
|
"grad_norm": 0.27216991782188416, |
|
"learning_rate": 0.00016871344489358635, |
|
"loss": 1.5999, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 0.47029211101543356, |
|
"grad_norm": 0.23203931748867035, |
|
"learning_rate": 0.00016865824628443674, |
|
"loss": 1.6304, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 0.4711200900841227, |
|
"grad_norm": 0.3125530183315277, |
|
"learning_rate": 0.00016860304767528714, |
|
"loss": 1.6264, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 0.47194806915281184, |
|
"grad_norm": 0.22787316143512726, |
|
"learning_rate": 0.00016854784906613753, |
|
"loss": 1.6044, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.47277604822150093, |
|
"grad_norm": 0.24813027679920197, |
|
"learning_rate": 0.00016849265045698793, |
|
"loss": 1.6347, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 0.4736040272901901, |
|
"grad_norm": 0.2771335542201996, |
|
"learning_rate": 0.00016843745184783832, |
|
"loss": 1.6186, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 0.4744320063588792, |
|
"grad_norm": 0.2221546620130539, |
|
"learning_rate": 0.00016838225323868871, |
|
"loss": 1.6019, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 0.47525998542756837, |
|
"grad_norm": 0.24322104454040527, |
|
"learning_rate": 0.0001683270546295391, |
|
"loss": 1.5865, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 0.4760879644962575, |
|
"grad_norm": 0.26411768794059753, |
|
"learning_rate": 0.0001682718560203895, |
|
"loss": 1.6504, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.47691594356494665, |
|
"grad_norm": 0.319624662399292, |
|
"learning_rate": 0.0001682166574112399, |
|
"loss": 1.6068, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 0.4777439226336358, |
|
"grad_norm": 0.2550843358039856, |
|
"learning_rate": 0.0001681614588020903, |
|
"loss": 1.6177, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 0.47857190170232494, |
|
"grad_norm": 0.28622475266456604, |
|
"learning_rate": 0.00016810626019294069, |
|
"loss": 1.619, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 0.4793998807710141, |
|
"grad_norm": 0.2016662210226059, |
|
"learning_rate": 0.00016805106158379108, |
|
"loss": 1.6413, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 0.48022785983970323, |
|
"grad_norm": 0.2323019951581955, |
|
"learning_rate": 0.00016799586297464147, |
|
"loss": 1.6104, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.4810558389083924, |
|
"grad_norm": 0.2892739176750183, |
|
"learning_rate": 0.00016794066436549187, |
|
"loss": 1.5928, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 0.4818838179770815, |
|
"grad_norm": 0.2968466877937317, |
|
"learning_rate": 0.00016788546575634226, |
|
"loss": 1.5965, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 0.48271179704577066, |
|
"grad_norm": 0.29501309990882874, |
|
"learning_rate": 0.00016783026714719266, |
|
"loss": 1.6309, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 0.4835397761144598, |
|
"grad_norm": 0.3167082369327545, |
|
"learning_rate": 0.00016777506853804305, |
|
"loss": 1.622, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 0.48436775518314895, |
|
"grad_norm": 0.2594543397426605, |
|
"learning_rate": 0.00016771986992889345, |
|
"loss": 1.6183, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.4851957342518381, |
|
"grad_norm": 0.23582147061824799, |
|
"learning_rate": 0.00016766467131974384, |
|
"loss": 1.6039, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 0.48602371332052724, |
|
"grad_norm": 0.24872687458992004, |
|
"learning_rate": 0.00016760947271059423, |
|
"loss": 1.6092, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 0.4868516923892164, |
|
"grad_norm": 0.21512049436569214, |
|
"learning_rate": 0.00016755427410144463, |
|
"loss": 1.6323, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 0.48767967145790553, |
|
"grad_norm": 0.25735777616500854, |
|
"learning_rate": 0.00016749907549229502, |
|
"loss": 1.6367, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 0.4885076505265947, |
|
"grad_norm": 0.23260653018951416, |
|
"learning_rate": 0.00016744387688314542, |
|
"loss": 1.6497, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.4893356295952838, |
|
"grad_norm": 0.31416621804237366, |
|
"learning_rate": 0.00016738867827399584, |
|
"loss": 1.6128, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 0.49016360866397296, |
|
"grad_norm": 0.2689904570579529, |
|
"learning_rate": 0.00016733458363702922, |
|
"loss": 1.6084, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 0.4909915877326621, |
|
"grad_norm": 0.26795658469200134, |
|
"learning_rate": 0.00016727938502787958, |
|
"loss": 1.6342, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 0.49181956680135125, |
|
"grad_norm": 0.28816089034080505, |
|
"learning_rate": 0.00016722418641872998, |
|
"loss": 1.5939, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 0.4926475458700404, |
|
"grad_norm": 0.3193409740924835, |
|
"learning_rate": 0.0001671689878095804, |
|
"loss": 1.6022, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.49347552493872954, |
|
"grad_norm": 0.2833710014820099, |
|
"learning_rate": 0.00016711378920043077, |
|
"loss": 1.6164, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 0.4943035040074187, |
|
"grad_norm": 0.2676531672477722, |
|
"learning_rate": 0.00016705859059128116, |
|
"loss": 1.6019, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 0.4951314830761078, |
|
"grad_norm": 0.29554829001426697, |
|
"learning_rate": 0.00016700339198213158, |
|
"loss": 1.6028, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 0.49595946214479697, |
|
"grad_norm": 0.24950549006462097, |
|
"learning_rate": 0.00016694819337298195, |
|
"loss": 1.5983, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 0.4967874412134861, |
|
"grad_norm": 0.31418663263320923, |
|
"learning_rate": 0.00016689299476383234, |
|
"loss": 1.6168, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.49761542028217526, |
|
"grad_norm": 0.2942350506782532, |
|
"learning_rate": 0.00016683779615468276, |
|
"loss": 1.6117, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 0.4984433993508644, |
|
"grad_norm": 0.2196948081254959, |
|
"learning_rate": 0.00016678259754553313, |
|
"loss": 1.6221, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 0.49927137841955355, |
|
"grad_norm": 0.25782310962677, |
|
"learning_rate": 0.00016672739893638353, |
|
"loss": 1.6207, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 0.5000993574882427, |
|
"grad_norm": 0.24130311608314514, |
|
"learning_rate": 0.00016667220032723395, |
|
"loss": 1.6244, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 0.5009273365569319, |
|
"grad_norm": 0.26923075318336487, |
|
"learning_rate": 0.00016661700171808431, |
|
"loss": 1.592, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.501755315625621, |
|
"grad_norm": 0.23227745294570923, |
|
"learning_rate": 0.00016656180310893474, |
|
"loss": 1.5768, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 0.5025832946943102, |
|
"grad_norm": 0.2719257175922394, |
|
"learning_rate": 0.00016650660449978513, |
|
"loss": 1.6187, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 0.5034112737629993, |
|
"grad_norm": 0.26879358291625977, |
|
"learning_rate": 0.0001664514058906355, |
|
"loss": 1.6194, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 0.5042392528316885, |
|
"grad_norm": 0.24769945442676544, |
|
"learning_rate": 0.00016639620728148592, |
|
"loss": 1.5787, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 0.5050672319003776, |
|
"grad_norm": 0.26854538917541504, |
|
"learning_rate": 0.0001663410086723363, |
|
"loss": 1.6254, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.5058952109690668, |
|
"grad_norm": 0.300074964761734, |
|
"learning_rate": 0.00016628581006318668, |
|
"loss": 1.6232, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 0.5067231900377559, |
|
"grad_norm": 0.3866933584213257, |
|
"learning_rate": 0.0001662306114540371, |
|
"loss": 1.596, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 0.507551169106445, |
|
"grad_norm": 0.23814737796783447, |
|
"learning_rate": 0.0001661754128448875, |
|
"loss": 1.6291, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 0.5083791481751342, |
|
"grad_norm": 0.26245006918907166, |
|
"learning_rate": 0.00016612021423573786, |
|
"loss": 1.6209, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 0.5092071272438232, |
|
"grad_norm": 0.24793724715709686, |
|
"learning_rate": 0.00016606501562658828, |
|
"loss": 1.5819, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.5100351063125124, |
|
"grad_norm": 0.2816125452518463, |
|
"learning_rate": 0.00016600981701743868, |
|
"loss": 1.6488, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 0.5108630853812015, |
|
"grad_norm": 0.2552174925804138, |
|
"learning_rate": 0.00016595461840828904, |
|
"loss": 1.6068, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 0.5116910644498907, |
|
"grad_norm": 0.34865882992744446, |
|
"learning_rate": 0.00016589941979913947, |
|
"loss": 1.6184, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 0.5125190435185798, |
|
"grad_norm": 0.238135427236557, |
|
"learning_rate": 0.00016584422118998986, |
|
"loss": 1.6167, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 0.5133470225872689, |
|
"grad_norm": 0.26624831557273865, |
|
"learning_rate": 0.00016578902258084023, |
|
"loss": 1.6137, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.5141750016559581, |
|
"grad_norm": 0.2653828561306, |
|
"learning_rate": 0.00016573382397169065, |
|
"loss": 1.63, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 0.5150029807246472, |
|
"grad_norm": 0.25800028443336487, |
|
"learning_rate": 0.00016567862536254104, |
|
"loss": 1.6301, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 0.5158309597933364, |
|
"grad_norm": 0.2781680226325989, |
|
"learning_rate": 0.0001656234267533914, |
|
"loss": 1.6505, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 0.5166589388620255, |
|
"grad_norm": 0.2765764594078064, |
|
"learning_rate": 0.00016556822814424183, |
|
"loss": 1.6243, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 0.5174869179307147, |
|
"grad_norm": 0.24544250965118408, |
|
"learning_rate": 0.00016551302953509223, |
|
"loss": 1.6221, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.5183148969994038, |
|
"grad_norm": 0.3250795304775238, |
|
"learning_rate": 0.0001654578309259426, |
|
"loss": 1.6504, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 0.519142876068093, |
|
"grad_norm": 0.23859694600105286, |
|
"learning_rate": 0.00016540263231679301, |
|
"loss": 1.6017, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 0.5199708551367821, |
|
"grad_norm": 0.27489158511161804, |
|
"learning_rate": 0.0001653474337076434, |
|
"loss": 1.5901, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 0.5207988342054712, |
|
"grad_norm": 0.25401830673217773, |
|
"learning_rate": 0.00016529223509849378, |
|
"loss": 1.6186, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 0.5216268132741604, |
|
"grad_norm": 0.26960933208465576, |
|
"learning_rate": 0.0001652370364893442, |
|
"loss": 1.6156, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.5224547923428495, |
|
"grad_norm": 0.256863534450531, |
|
"learning_rate": 0.0001651818378801946, |
|
"loss": 1.5954, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 0.5232827714115387, |
|
"grad_norm": 0.2537617087364197, |
|
"learning_rate": 0.00016512663927104496, |
|
"loss": 1.5872, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 0.5241107504802278, |
|
"grad_norm": 0.21423785388469696, |
|
"learning_rate": 0.00016507144066189538, |
|
"loss": 1.5982, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 0.524938729548917, |
|
"grad_norm": 0.3197900354862213, |
|
"learning_rate": 0.00016501624205274577, |
|
"loss": 1.6362, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 0.5257667086176061, |
|
"grad_norm": 0.2450074404478073, |
|
"learning_rate": 0.00016496104344359614, |
|
"loss": 1.6076, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.5265946876862952, |
|
"grad_norm": 0.251626193523407, |
|
"learning_rate": 0.00016490584483444656, |
|
"loss": 1.6395, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 0.5274226667549844, |
|
"grad_norm": 0.3137412965297699, |
|
"learning_rate": 0.00016485064622529696, |
|
"loss": 1.6047, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 0.5282506458236735, |
|
"grad_norm": 0.28337448835372925, |
|
"learning_rate": 0.00016479599960223884, |
|
"loss": 1.621, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 0.5290786248923627, |
|
"grad_norm": 0.22410038113594055, |
|
"learning_rate": 0.00016474080099308924, |
|
"loss": 1.5938, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 0.5299066039610518, |
|
"grad_norm": 0.24572992324829102, |
|
"learning_rate": 0.00016468560238393963, |
|
"loss": 1.6213, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.530734583029741, |
|
"grad_norm": 0.2508053481578827, |
|
"learning_rate": 0.00016463040377479003, |
|
"loss": 1.6016, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 0.5315625620984301, |
|
"grad_norm": 0.26450827717781067, |
|
"learning_rate": 0.00016457520516564042, |
|
"loss": 1.6326, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 0.5323905411671193, |
|
"grad_norm": 0.259705126285553, |
|
"learning_rate": 0.00016452000655649081, |
|
"loss": 1.622, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 0.5332185202358084, |
|
"grad_norm": 0.3122607469558716, |
|
"learning_rate": 0.0001644648079473412, |
|
"loss": 1.6068, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 0.5340464993044975, |
|
"grad_norm": 0.264130562543869, |
|
"learning_rate": 0.0001644096093381916, |
|
"loss": 1.6234, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.5348744783731867, |
|
"grad_norm": 0.2097257822751999, |
|
"learning_rate": 0.000164354410729042, |
|
"loss": 1.6296, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 0.5357024574418758, |
|
"grad_norm": 0.2408805638551712, |
|
"learning_rate": 0.0001642992121198924, |
|
"loss": 1.606, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 0.536530436510565, |
|
"grad_norm": 0.25239992141723633, |
|
"learning_rate": 0.00016424401351074279, |
|
"loss": 1.5934, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 0.5373584155792541, |
|
"grad_norm": 0.23828302323818207, |
|
"learning_rate": 0.00016418881490159318, |
|
"loss": 1.6142, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 0.5381863946479433, |
|
"grad_norm": 0.24627448618412018, |
|
"learning_rate": 0.00016413361629244357, |
|
"loss": 1.6397, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.5390143737166324, |
|
"grad_norm": 0.2534656226634979, |
|
"learning_rate": 0.00016407841768329397, |
|
"loss": 1.6293, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 0.5398423527853216, |
|
"grad_norm": 0.31040382385253906, |
|
"learning_rate": 0.00016402321907414436, |
|
"loss": 1.61, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 0.5406703318540107, |
|
"grad_norm": 0.23957033455371857, |
|
"learning_rate": 0.00016396802046499476, |
|
"loss": 1.6428, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 0.5414983109226998, |
|
"grad_norm": 0.21376514434814453, |
|
"learning_rate": 0.00016391282185584515, |
|
"loss": 1.6473, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 0.542326289991389, |
|
"grad_norm": 0.24178186058998108, |
|
"learning_rate": 0.00016385762324669554, |
|
"loss": 1.6311, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.5431542690600781, |
|
"grad_norm": 0.28505873680114746, |
|
"learning_rate": 0.00016380242463754594, |
|
"loss": 1.6206, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 0.5439822481287673, |
|
"grad_norm": 0.22037836909294128, |
|
"learning_rate": 0.00016374722602839633, |
|
"loss": 1.618, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 0.5448102271974564, |
|
"grad_norm": 0.2399994432926178, |
|
"learning_rate": 0.00016369202741924673, |
|
"loss": 1.6268, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 0.5456382062661456, |
|
"grad_norm": 0.24552536010742188, |
|
"learning_rate": 0.00016363682881009712, |
|
"loss": 1.6082, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 0.5464661853348347, |
|
"grad_norm": 0.24302148818969727, |
|
"learning_rate": 0.00016358163020094752, |
|
"loss": 1.63, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.5472941644035239, |
|
"grad_norm": 0.2581181228160858, |
|
"learning_rate": 0.00016352643159179794, |
|
"loss": 1.6249, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 0.548122143472213, |
|
"grad_norm": 0.3073265552520752, |
|
"learning_rate": 0.0001634712329826483, |
|
"loss": 1.6163, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 0.5489501225409021, |
|
"grad_norm": 0.24049556255340576, |
|
"learning_rate": 0.0001634160343734987, |
|
"loss": 1.63, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 0.5497781016095913, |
|
"grad_norm": 0.2216363102197647, |
|
"learning_rate": 0.00016336083576434912, |
|
"loss": 1.6114, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 0.5506060806782804, |
|
"grad_norm": 0.35404762625694275, |
|
"learning_rate": 0.00016330618914129098, |
|
"loss": 1.6514, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.5514340597469696, |
|
"grad_norm": 0.2302398830652237, |
|
"learning_rate": 0.0001632509905321414, |
|
"loss": 1.6084, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 0.5522620388156587, |
|
"grad_norm": 0.35909637808799744, |
|
"learning_rate": 0.00016319579192299177, |
|
"loss": 1.6509, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 0.5530900178843479, |
|
"grad_norm": 0.2828253507614136, |
|
"learning_rate": 0.00016314059331384216, |
|
"loss": 1.612, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 0.553917996953037, |
|
"grad_norm": 0.25532275438308716, |
|
"learning_rate": 0.00016308539470469258, |
|
"loss": 1.6508, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 0.5547459760217262, |
|
"grad_norm": 0.2709483504295349, |
|
"learning_rate": 0.00016303019609554295, |
|
"loss": 1.6227, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.5555739550904153, |
|
"grad_norm": 0.23612917959690094, |
|
"learning_rate": 0.00016297499748639334, |
|
"loss": 1.66, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 0.5564019341591044, |
|
"grad_norm": 0.29938626289367676, |
|
"learning_rate": 0.00016291979887724377, |
|
"loss": 1.601, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 0.5572299132277936, |
|
"grad_norm": 0.3122688829898834, |
|
"learning_rate": 0.00016286460026809413, |
|
"loss": 1.6137, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 0.5580578922964827, |
|
"grad_norm": 0.2629024088382721, |
|
"learning_rate": 0.00016280940165894453, |
|
"loss": 1.6272, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 0.5588858713651719, |
|
"grad_norm": 0.26424530148506165, |
|
"learning_rate": 0.00016275420304979495, |
|
"loss": 1.6248, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.559713850433861, |
|
"grad_norm": 0.2512044608592987, |
|
"learning_rate": 0.00016269900444064532, |
|
"loss": 1.6147, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 0.5605418295025502, |
|
"grad_norm": 0.22548918426036835, |
|
"learning_rate": 0.0001626438058314957, |
|
"loss": 1.5857, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 0.5613698085712393, |
|
"grad_norm": 0.23559758067131042, |
|
"learning_rate": 0.00016258860722234613, |
|
"loss": 1.6165, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 0.5621977876399284, |
|
"grad_norm": 0.28142249584198, |
|
"learning_rate": 0.0001625334086131965, |
|
"loss": 1.6195, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 0.5630257667086176, |
|
"grad_norm": 0.2530786991119385, |
|
"learning_rate": 0.0001624782100040469, |
|
"loss": 1.6198, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.5638537457773067, |
|
"grad_norm": 0.239883691072464, |
|
"learning_rate": 0.00016242301139489731, |
|
"loss": 1.6385, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 0.5646817248459959, |
|
"grad_norm": 0.2762130796909332, |
|
"learning_rate": 0.00016236781278574768, |
|
"loss": 1.6237, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 0.565509703914685, |
|
"grad_norm": 0.23887832462787628, |
|
"learning_rate": 0.00016231261417659808, |
|
"loss": 1.6299, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 0.5663376829833742, |
|
"grad_norm": 0.23560328781604767, |
|
"learning_rate": 0.0001622574155674485, |
|
"loss": 1.6526, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 0.5671656620520633, |
|
"grad_norm": 0.23129230737686157, |
|
"learning_rate": 0.00016220221695829886, |
|
"loss": 1.6188, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.5679936411207525, |
|
"grad_norm": 0.22737343609333038, |
|
"learning_rate": 0.00016214701834914926, |
|
"loss": 1.6108, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 0.5688216201894416, |
|
"grad_norm": 0.26043862104415894, |
|
"learning_rate": 0.00016209181973999968, |
|
"loss": 1.6087, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 0.5696495992581307, |
|
"grad_norm": 0.30376845598220825, |
|
"learning_rate": 0.00016203662113085005, |
|
"loss": 1.6184, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 0.5704775783268199, |
|
"grad_norm": 0.2718486487865448, |
|
"learning_rate": 0.00016198142252170047, |
|
"loss": 1.5929, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 0.571305557395509, |
|
"grad_norm": 0.24742506444454193, |
|
"learning_rate": 0.00016192622391255086, |
|
"loss": 1.6409, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.5721335364641982, |
|
"grad_norm": 0.2923363149166107, |
|
"learning_rate": 0.00016187102530340123, |
|
"loss": 1.6406, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 0.5729615155328873, |
|
"grad_norm": 0.2717527151107788, |
|
"learning_rate": 0.00016181582669425165, |
|
"loss": 1.6028, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 0.5737894946015765, |
|
"grad_norm": 0.24801529943943024, |
|
"learning_rate": 0.00016176118007119354, |
|
"loss": 1.6032, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 0.5746174736702656, |
|
"grad_norm": 0.23781093955039978, |
|
"learning_rate": 0.00016170598146204393, |
|
"loss": 1.5806, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 0.5754454527389548, |
|
"grad_norm": 0.2343488186597824, |
|
"learning_rate": 0.00016165078285289433, |
|
"loss": 1.6181, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.5762734318076439, |
|
"grad_norm": 0.2726878225803375, |
|
"learning_rate": 0.00016159558424374472, |
|
"loss": 1.5645, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 0.577101410876333, |
|
"grad_norm": 0.24465055763721466, |
|
"learning_rate": 0.00016154038563459511, |
|
"loss": 1.6262, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 0.5779293899450222, |
|
"grad_norm": 0.23371031880378723, |
|
"learning_rate": 0.0001614851870254455, |
|
"loss": 1.6486, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 0.5787573690137113, |
|
"grad_norm": 0.2924565076828003, |
|
"learning_rate": 0.0001614299884162959, |
|
"loss": 1.5835, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 0.5795853480824005, |
|
"grad_norm": 0.25226888060569763, |
|
"learning_rate": 0.0001613747898071463, |
|
"loss": 1.595, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.5804133271510896, |
|
"grad_norm": 0.24092262983322144, |
|
"learning_rate": 0.0001613195911979967, |
|
"loss": 1.6115, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 0.5812413062197788, |
|
"grad_norm": 0.26227688789367676, |
|
"learning_rate": 0.00016126439258884708, |
|
"loss": 1.6405, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 0.5820692852884679, |
|
"grad_norm": 0.24400807917118073, |
|
"learning_rate": 0.00016120919397969748, |
|
"loss": 1.6298, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 0.582897264357157, |
|
"grad_norm": 0.24254167079925537, |
|
"learning_rate": 0.00016115399537054787, |
|
"loss": 1.5931, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 0.5837252434258462, |
|
"grad_norm": 0.2771003246307373, |
|
"learning_rate": 0.00016109879676139827, |
|
"loss": 1.6287, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.5845532224945353, |
|
"grad_norm": 0.2865670323371887, |
|
"learning_rate": 0.00016104359815224866, |
|
"loss": 1.6418, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 0.5853812015632245, |
|
"grad_norm": 0.24516509473323822, |
|
"learning_rate": 0.00016098839954309906, |
|
"loss": 1.6244, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 0.5862091806319136, |
|
"grad_norm": 0.28212985396385193, |
|
"learning_rate": 0.00016093320093394945, |
|
"loss": 1.6139, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 0.5870371597006028, |
|
"grad_norm": 0.2579030990600586, |
|
"learning_rate": 0.00016087800232479984, |
|
"loss": 1.5961, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 0.5878651387692919, |
|
"grad_norm": 0.22696354985237122, |
|
"learning_rate": 0.00016082280371565024, |
|
"loss": 1.6209, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.5886931178379811, |
|
"grad_norm": 0.2646586298942566, |
|
"learning_rate": 0.00016076760510650063, |
|
"loss": 1.6111, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 0.5895210969066702, |
|
"grad_norm": 0.25043609738349915, |
|
"learning_rate": 0.00016071240649735103, |
|
"loss": 1.5784, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 0.5903490759753593, |
|
"grad_norm": 0.25381940603256226, |
|
"learning_rate": 0.00016065720788820142, |
|
"loss": 1.5925, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 0.5911770550440485, |
|
"grad_norm": 0.2380959391593933, |
|
"learning_rate": 0.00016060200927905182, |
|
"loss": 1.6379, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 0.5920050341127376, |
|
"grad_norm": 0.2419148087501526, |
|
"learning_rate": 0.0001605479146420852, |
|
"loss": 1.6047, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.5928330131814268, |
|
"grad_norm": 0.2510753273963928, |
|
"learning_rate": 0.0001604927160329356, |
|
"loss": 1.6109, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 0.5936609922501159, |
|
"grad_norm": 0.2659463584423065, |
|
"learning_rate": 0.00016043751742378598, |
|
"loss": 1.587, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 0.5944889713188051, |
|
"grad_norm": 0.28224682807922363, |
|
"learning_rate": 0.00016038231881463638, |
|
"loss": 1.6516, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 0.5953169503874942, |
|
"grad_norm": 0.24091459810733795, |
|
"learning_rate": 0.00016032712020548677, |
|
"loss": 1.6471, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 0.5961449294561834, |
|
"grad_norm": 0.258425772190094, |
|
"learning_rate": 0.00016027192159633717, |
|
"loss": 1.5975, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.5969729085248725, |
|
"grad_norm": 0.2684045135974884, |
|
"learning_rate": 0.0001602167229871876, |
|
"loss": 1.5932, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 0.5978008875935616, |
|
"grad_norm": 0.19851814210414886, |
|
"learning_rate": 0.00016016152437803795, |
|
"loss": 1.6163, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 0.5986288666622508, |
|
"grad_norm": 0.28404444456100464, |
|
"learning_rate": 0.00016010632576888835, |
|
"loss": 1.5884, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 0.5994568457309399, |
|
"grad_norm": 0.24942117929458618, |
|
"learning_rate": 0.00016005112715973877, |
|
"loss": 1.6062, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 0.6002848247996291, |
|
"grad_norm": 0.319736123085022, |
|
"learning_rate": 0.00015999592855058914, |
|
"loss": 1.6107, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.6011128038683182, |
|
"grad_norm": 0.297432541847229, |
|
"learning_rate": 0.00015994072994143953, |
|
"loss": 1.6253, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 0.6019407829370074, |
|
"grad_norm": 0.283583402633667, |
|
"learning_rate": 0.00015988553133228995, |
|
"loss": 1.6102, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 0.6027687620056965, |
|
"grad_norm": 0.27938199043273926, |
|
"learning_rate": 0.00015983033272314032, |
|
"loss": 1.6206, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 0.6035967410743857, |
|
"grad_norm": 0.2605910301208496, |
|
"learning_rate": 0.0001597751341139907, |
|
"loss": 1.6037, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 0.6044247201430748, |
|
"grad_norm": 0.28445005416870117, |
|
"learning_rate": 0.00015971993550484113, |
|
"loss": 1.646, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.605252699211764, |
|
"grad_norm": 0.26064708828926086, |
|
"learning_rate": 0.0001596647368956915, |
|
"loss": 1.614, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 0.6060806782804531, |
|
"grad_norm": 0.2497030645608902, |
|
"learning_rate": 0.0001596095382865419, |
|
"loss": 1.6076, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 0.6069086573491422, |
|
"grad_norm": 0.3160170614719391, |
|
"learning_rate": 0.00015955433967739232, |
|
"loss": 1.6347, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 0.6077366364178314, |
|
"grad_norm": 0.2508627772331238, |
|
"learning_rate": 0.00015949914106824268, |
|
"loss": 1.6461, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 0.6085646154865205, |
|
"grad_norm": 0.29909202456474304, |
|
"learning_rate": 0.0001594444944451846, |
|
"loss": 1.6179, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.6093925945552097, |
|
"grad_norm": 0.22416061162948608, |
|
"learning_rate": 0.000159389295836035, |
|
"loss": 1.6013, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 0.6102205736238988, |
|
"grad_norm": 0.3153701424598694, |
|
"learning_rate": 0.00015933409722688536, |
|
"loss": 1.6494, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 0.611048552692588, |
|
"grad_norm": 0.2792322635650635, |
|
"learning_rate": 0.00015927889861773578, |
|
"loss": 1.6055, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 0.6118765317612771, |
|
"grad_norm": 0.3239234685897827, |
|
"learning_rate": 0.00015922370000858617, |
|
"loss": 1.5841, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 0.6127045108299662, |
|
"grad_norm": 0.4340994656085968, |
|
"learning_rate": 0.00015916850139943654, |
|
"loss": 1.6187, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.6135324898986554, |
|
"grad_norm": 0.31595396995544434, |
|
"learning_rate": 0.00015911330279028696, |
|
"loss": 1.6438, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 0.6143604689673445, |
|
"grad_norm": 0.22218069434165955, |
|
"learning_rate": 0.00015905810418113736, |
|
"loss": 1.6298, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 0.6151884480360337, |
|
"grad_norm": 0.22682014107704163, |
|
"learning_rate": 0.00015900290557198772, |
|
"loss": 1.5793, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 0.6160164271047228, |
|
"grad_norm": 0.23543739318847656, |
|
"learning_rate": 0.00015894770696283815, |
|
"loss": 1.6292, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 0.616844406173412, |
|
"grad_norm": 0.2355438470840454, |
|
"learning_rate": 0.00015889250835368854, |
|
"loss": 1.636, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.6176723852421011, |
|
"grad_norm": 0.2809857428073883, |
|
"learning_rate": 0.0001588373097445389, |
|
"loss": 1.6081, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 0.6185003643107903, |
|
"grad_norm": 0.2578040361404419, |
|
"learning_rate": 0.00015878211113538933, |
|
"loss": 1.6238, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 0.6193283433794794, |
|
"grad_norm": 0.23350849747657776, |
|
"learning_rate": 0.00015872691252623972, |
|
"loss": 1.627, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 0.6201563224481685, |
|
"grad_norm": 0.28163543343544006, |
|
"learning_rate": 0.00015867171391709012, |
|
"loss": 1.6375, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 0.6209843015168577, |
|
"grad_norm": 0.2725831866264343, |
|
"learning_rate": 0.0001586165153079405, |
|
"loss": 1.6054, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.6218122805855468, |
|
"grad_norm": 0.294609397649765, |
|
"learning_rate": 0.0001585613166987909, |
|
"loss": 1.6532, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 0.622640259654236, |
|
"grad_norm": 0.2077389657497406, |
|
"learning_rate": 0.0001585061180896413, |
|
"loss": 1.6033, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 0.6234682387229251, |
|
"grad_norm": 0.25486066937446594, |
|
"learning_rate": 0.0001584509194804917, |
|
"loss": 1.6053, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 0.6242962177916143, |
|
"grad_norm": 0.27974551916122437, |
|
"learning_rate": 0.0001583957208713421, |
|
"loss": 1.6003, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 0.6251241968603034, |
|
"grad_norm": 0.22559039294719696, |
|
"learning_rate": 0.00015834052226219248, |
|
"loss": 1.6042, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.6259521759289925, |
|
"grad_norm": 0.2382129728794098, |
|
"learning_rate": 0.00015828532365304288, |
|
"loss": 1.6016, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 0.6267801549976817, |
|
"grad_norm": 0.22553443908691406, |
|
"learning_rate": 0.00015823012504389327, |
|
"loss": 1.5928, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 0.6276081340663708, |
|
"grad_norm": 0.2299722135066986, |
|
"learning_rate": 0.00015817492643474367, |
|
"loss": 1.5972, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 0.62843611313506, |
|
"grad_norm": 0.2587108910083771, |
|
"learning_rate": 0.00015811972782559406, |
|
"loss": 1.5785, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 0.6292640922037491, |
|
"grad_norm": 0.23947428166866302, |
|
"learning_rate": 0.00015806452921644445, |
|
"loss": 1.6201, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.6300920712724383, |
|
"grad_norm": 0.24543558061122894, |
|
"learning_rate": 0.00015800933060729485, |
|
"loss": 1.595, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 0.6309200503411274, |
|
"grad_norm": 0.2749697268009186, |
|
"learning_rate": 0.00015795413199814524, |
|
"loss": 1.6041, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 0.6317480294098166, |
|
"grad_norm": 0.2621932923793793, |
|
"learning_rate": 0.00015789893338899564, |
|
"loss": 1.6325, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 0.6325760084785057, |
|
"grad_norm": 0.23789739608764648, |
|
"learning_rate": 0.00015784373477984603, |
|
"loss": 1.6411, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 0.6334039875471948, |
|
"grad_norm": 0.3237096071243286, |
|
"learning_rate": 0.00015778853617069642, |
|
"loss": 1.6147, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.634231966615884, |
|
"grad_norm": 0.28203284740448, |
|
"learning_rate": 0.00015773333756154682, |
|
"loss": 1.6395, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 0.6350599456845731, |
|
"grad_norm": 0.25538840889930725, |
|
"learning_rate": 0.0001576781389523972, |
|
"loss": 1.5822, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 0.6358879247532623, |
|
"grad_norm": 0.23973847925662994, |
|
"learning_rate": 0.0001576229403432476, |
|
"loss": 1.6125, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 0.6367159038219514, |
|
"grad_norm": 0.22504395246505737, |
|
"learning_rate": 0.000157567741734098, |
|
"loss": 1.5907, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 0.6375438828906406, |
|
"grad_norm": 0.21240180730819702, |
|
"learning_rate": 0.0001575125431249484, |
|
"loss": 1.5818, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.6383718619593297, |
|
"grad_norm": 0.23655393719673157, |
|
"learning_rate": 0.0001574573445157988, |
|
"loss": 1.6278, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 0.6391998410280189, |
|
"grad_norm": 0.24175895750522614, |
|
"learning_rate": 0.00015740214590664918, |
|
"loss": 1.6079, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 0.640027820096708, |
|
"grad_norm": 0.2233435958623886, |
|
"learning_rate": 0.00015734694729749958, |
|
"loss": 1.6276, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 0.6408557991653971, |
|
"grad_norm": 0.2756839990615845, |
|
"learning_rate": 0.00015729174868834997, |
|
"loss": 1.6015, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 0.6416837782340863, |
|
"grad_norm": 0.24636799097061157, |
|
"learning_rate": 0.00015723655007920037, |
|
"loss": 1.5968, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.6425117573027754, |
|
"grad_norm": 0.28196850419044495, |
|
"learning_rate": 0.00015718135147005076, |
|
"loss": 1.6366, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 0.6433397363714646, |
|
"grad_norm": 0.2543933093547821, |
|
"learning_rate": 0.00015712670484699265, |
|
"loss": 1.6481, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 0.6441677154401537, |
|
"grad_norm": 0.28510308265686035, |
|
"learning_rate": 0.00015707150623784304, |
|
"loss": 1.5877, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 0.6449956945088429, |
|
"grad_norm": 0.22589468955993652, |
|
"learning_rate": 0.00015701630762869344, |
|
"loss": 1.5924, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 0.645823673577532, |
|
"grad_norm": 0.33463847637176514, |
|
"learning_rate": 0.00015696110901954383, |
|
"loss": 1.6093, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.6466516526462212, |
|
"grad_norm": 0.23685209453105927, |
|
"learning_rate": 0.00015690591041039422, |
|
"loss": 1.5936, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 0.6474796317149103, |
|
"grad_norm": 0.2393782138824463, |
|
"learning_rate": 0.00015685071180124462, |
|
"loss": 1.5973, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 0.6483076107835994, |
|
"grad_norm": 0.2803311049938202, |
|
"learning_rate": 0.000156795513192095, |
|
"loss": 1.6438, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 0.6491355898522886, |
|
"grad_norm": 0.24263189733028412, |
|
"learning_rate": 0.0001567403145829454, |
|
"loss": 1.6008, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 0.6499635689209777, |
|
"grad_norm": 0.2571956217288971, |
|
"learning_rate": 0.0001566851159737958, |
|
"loss": 1.6195, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.6507915479896669, |
|
"grad_norm": 0.2744641900062561, |
|
"learning_rate": 0.0001566299173646462, |
|
"loss": 1.6018, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 0.651619527058356, |
|
"grad_norm": 0.26064786314964294, |
|
"learning_rate": 0.0001565747187554966, |
|
"loss": 1.6229, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 0.6524475061270452, |
|
"grad_norm": 0.25279179215431213, |
|
"learning_rate": 0.00015651952014634698, |
|
"loss": 1.6232, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 0.6532754851957343, |
|
"grad_norm": 0.2717379927635193, |
|
"learning_rate": 0.00015646432153719738, |
|
"loss": 1.5906, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 0.6541034642644233, |
|
"grad_norm": 0.25891339778900146, |
|
"learning_rate": 0.00015640912292804777, |
|
"loss": 1.6343, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.6549314433331125, |
|
"grad_norm": 0.27862370014190674, |
|
"learning_rate": 0.00015635392431889817, |
|
"loss": 1.643, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 0.6557594224018016, |
|
"grad_norm": 0.27120205760002136, |
|
"learning_rate": 0.00015629872570974856, |
|
"loss": 1.6838, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 0.6565874014704908, |
|
"grad_norm": 0.2255089432001114, |
|
"learning_rate": 0.00015624352710059896, |
|
"loss": 1.6496, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 0.6574153805391799, |
|
"grad_norm": 0.2028416097164154, |
|
"learning_rate": 0.00015618832849144935, |
|
"loss": 1.5873, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 0.6582433596078691, |
|
"grad_norm": 0.2558017075061798, |
|
"learning_rate": 0.00015613312988229974, |
|
"loss": 1.612, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.6590713386765582, |
|
"grad_norm": 0.32027551531791687, |
|
"learning_rate": 0.00015607793127315014, |
|
"loss": 1.6218, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 0.6598993177452473, |
|
"grad_norm": 0.2677933871746063, |
|
"learning_rate": 0.00015602273266400053, |
|
"loss": 1.5858, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 0.6607272968139365, |
|
"grad_norm": 0.25464698672294617, |
|
"learning_rate": 0.00015596753405485093, |
|
"loss": 1.611, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 0.6615552758826256, |
|
"grad_norm": 0.2716948091983795, |
|
"learning_rate": 0.00015591233544570132, |
|
"loss": 1.6245, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 0.6623832549513148, |
|
"grad_norm": 0.24213983118534088, |
|
"learning_rate": 0.00015585713683655171, |
|
"loss": 1.6145, |
|
"step": 80000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 362328, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1600, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.7660211147080745e+19, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|